def test_copies_labels_if_present(self):
     dataset = data_utils.as_normalized(SPECTRA, COORDINATES,
                                        labels=DATASET.labels)
     npt.assert_equal(dataset.labels, [8])
     unlabeled = data_utils.as_normalized(SPECTRA, COORDINATES,
                                          labels=None)
     self.assertIsNone(unlabeled.labels)
Exemple #2
0
def regenerate_dataset(dataset_path: str, analysis_root: str):
    """Regenerate file with transformed dataset"""
    transformed_path = os.path.join(analysis_root, 'result.pkl')
    result = joblib.load(transformed_path)
    metadata = get_metadata(analysis_root)
    dataset = data_utils.as_normalized(result, metadata.coordinates,
                                       metadata.labels)
    data_utils.dumps_txt(dataset_path, dataset)
Exemple #3
0
def tSNE(self, analysis_name: str, dataset_name: str, **kwargs):
    # preprocessing of our current strange format
    analysis_details = dataset_name, tSNE.__name__, analysis_name
    manifold = TSNE(**kwargs, verbose=True)

    with status_notifier(self) as notify, \
            open_analysis(*analysis_details) as tmp_path:
        notify('PRESERVING CONFIGURATION')
        config_path = os.path.join(tmp_path, 'options')
        dump_configuration(config_path, kwargs)
        notify('LOADING DATA')
        data = load_dataset(dataset_name)
        notify('RUNNING T-SNE')
        result = manifold.fit_transform(data.spectra)
        notify('PRESERVING RESULTS')
        model_path = os.path.join(tmp_path, 'model')
        joblib.dump(manifold, model_path + '.pkl')
        result_path = os.path.join(tmp_path, 'result')
        joblib.dump(result, result_path + '.pkl')
        np.savetxt(result_path + '.csv', result)
        normalized = data_utils.as_normalized(result, data.coordinates,
                                              data.labels)
        dataset_path = os.path.join(tmp_path, 'data.txt')
        data_utils.dumps_txt(dataset_path, normalized)
 def test_creates_artificial_mzs(self):
     dataset = data_utils.as_normalized(SPECTRA, COORDINATES)
     npt.assert_equal(dataset.mz, [0, 1])
 def test_throws_on_sizes_mismatch(self):
     with self.assertRaises(ValueError):
         data_utils.as_normalized(np.array([[1], [2]]), COORDINATES)
     with self.assertRaises(ValueError):
         data_utils.as_normalized(np.array([]), COORDINATES)