def test_copies_labels_if_present(self): dataset = data_utils.as_normalized(SPECTRA, COORDINATES, labels=DATASET.labels) npt.assert_equal(dataset.labels, [8]) unlabeled = data_utils.as_normalized(SPECTRA, COORDINATES, labels=None) self.assertIsNone(unlabeled.labels)
def regenerate_dataset(dataset_path: str, analysis_root: str): """Regenerate file with transformed dataset""" transformed_path = os.path.join(analysis_root, 'result.pkl') result = joblib.load(transformed_path) metadata = get_metadata(analysis_root) dataset = data_utils.as_normalized(result, metadata.coordinates, metadata.labels) data_utils.dumps_txt(dataset_path, dataset)
def tSNE(self, analysis_name: str, dataset_name: str, **kwargs): # preprocessing of our current strange format analysis_details = dataset_name, tSNE.__name__, analysis_name manifold = TSNE(**kwargs, verbose=True) with status_notifier(self) as notify, \ open_analysis(*analysis_details) as tmp_path: notify('PRESERVING CONFIGURATION') config_path = os.path.join(tmp_path, 'options') dump_configuration(config_path, kwargs) notify('LOADING DATA') data = load_dataset(dataset_name) notify('RUNNING T-SNE') result = manifold.fit_transform(data.spectra) notify('PRESERVING RESULTS') model_path = os.path.join(tmp_path, 'model') joblib.dump(manifold, model_path + '.pkl') result_path = os.path.join(tmp_path, 'result') joblib.dump(result, result_path + '.pkl') np.savetxt(result_path + '.csv', result) normalized = data_utils.as_normalized(result, data.coordinates, data.labels) dataset_path = os.path.join(tmp_path, 'data.txt') data_utils.dumps_txt(dataset_path, normalized)
def test_creates_artificial_mzs(self): dataset = data_utils.as_normalized(SPECTRA, COORDINATES) npt.assert_equal(dataset.mz, [0, 1])
def test_throws_on_sizes_mismatch(self): with self.assertRaises(ValueError): data_utils.as_normalized(np.array([[1], [2]]), COORDINATES) with self.assertRaises(ValueError): data_utils.as_normalized(np.array([]), COORDINATES)