def it_saves_and_loads_array_results(): with tmp.tmp_folder() as folder: with local.cwd(folder): shape = (100, 87) arr = ArrayResult("arr.arr", dtype=np.float64, shape=shape, mode="w+") r = np.random.uniform(size=shape) arr[:] = r res1 = ComplexPropertyResult(foo=3, arr=arr) res1.save() pickle_file = local.path(ComplexPropertyResult.filename) assert ( pickle_file.stat().st_size < 200 ) # The important part is that it doesn't include the array! arr_file = local.path("arr.arr") assert ( arr_file.stat().st_size == shape[0] * shape[1] * 8 ) # 8 bytes for a float64 # It should go back to a different folder # but the load_from_folder() should be able # deal with that assert local.cwd != folder res2 = ComplexPropertyResult.load_from_folder(folder) assert res2.foo == 3 assert np.all(res2.arr == r)
def field_chcy_ims(self, field_i): if field_i not in self._cache_field_chcy_ims: self._cache_field_chcy_ims[field_i] = ArrayResult( self._field_ims_filename(field_i), dtype=np.dtype(self.dtype), shape=(self.n_channels, self.n_cycles, self.dim, self.dim), ) return self._cache_field_chcy_ims[field_i].arr()
def _make_arrays(name, n_peps, n_samples): dyemat = ArrayResult( f"{name}_dyemat", shape=(n_peps, n_samples, sim_params.n_channels, sim_params.n_cycles), dtype=np.uint8, mode="w+", ) radmat = ArrayResult( f"{name}_radmat", shape=(n_peps, n_samples, sim_params.n_channels, sim_params.n_cycles), dtype=np.float32, mode="w+", ) recall = ArrayResult( f"{name}_recall", shape=(n_peps, ), dtype=np.float32, mode="w+", ) return dyemat, radmat, recall
def it_returns_an_open_array_without_overwrite(): with tmp.tmp_folder(chdir=True): ar = ArrayResult("test1", shape=(10, 5), dtype=np.uint8, mode="w+") fp = ar.arr() ar[:] = np.arange(10 * 5).astype(np.uint8).reshape((10, 5)) _fp = ar.arr() assert _fp is fp ar.flush() assert local.path("test1").stat().st_size == 10 * 5
def test_nn(test_nn_params, prep_result, sim_result, progress=None, pipeline=None): n_channels, n_cycles = sim_result.params.n_channels_and_cycles n_phases = 6 if test_nn_params.include_training_set else 3 if pipeline is not None: pipeline.set_phase(0, n_phases) shape = sim_result.test_radmat.shape assert len(shape) == 4 test_radmat = sim_result.test_radmat.reshape( (shape[0] * shape[1], shape[2], shape[3])) test_dyemat = sim_result.test_dyemat.reshape( (shape[0] * shape[1], shape[2], shape[3])) test_result = nn( test_nn_params, sim_result, radmat=test_radmat, true_dyemat=test_dyemat, progress=progress, ) test_result.true_pep_iz = ArrayResult( filename="test_true_pep_iz", shape=(shape[0] * shape[1], ), dtype=IndexType, mode="w+", ) test_result.true_pep_iz[:] = np.repeat( np.arange(shape[0]).astype(IndexType), shape[1]) check.t(test_result.true_pep_iz, ArrayResult) check.t(test_result.pred_pep_iz, ArrayResult) call_bag = CallBag( true_pep_iz=test_result.true_pep_iz.arr(), pred_pep_iz=test_result.pred_pep_iz.arr(), scores=test_result.scores.arr(), prep_result=prep_result, sim_result=sim_result, ) if pipeline is not None: pipeline.set_phase(1, n_phases) test_result.peps_pr = call_bag.pr_curve_by_pep(progress=progress) # If there is abundance information, compute the abundance-adjusted PR # This call returns None if there is no abundance info avail. if pipeline is not None: pipeline.set_phase(2, n_phases) test_result.peps_pr_abund = call_bag.pr_curve_by_pep_with_abundance( progress=progress) if test_nn_params.include_training_set: # Permit testing for over-fitting by classifying on the train data if pipeline is not None: pipeline.set_phase(3, n_phases) real_pep_iz = prep_result.peps__no_decoys().pep_i.values keep_rows = np.isin(sim_result.train_true_pep_iz, real_pep_iz) train_radmat = sim_result.train_radmat[keep_rows] train_dyemat = sim_result.train_dyemat[keep_rows] assert train_radmat.shape == shape train_result = nn( test_nn_params.use_gmm, sim_result, radmat=train_radmat, true_dyemat=train_dyemat, progress=progress, ) train_result.true_pep_iz = sim_result.train_true_pep_iz train_result.true_pep_iz = ArrayResult( filename="train_true_pep_iz", shape=(shape[0] * shape[1], ), dtype=IndexType, mode="w+", ) train_result.true_pep_iz[:] = np.repeat( np.arange(shape[0]).astype(IndexType), shape[1]) check.t(train_result.true_pep_iz, ArrayResult) check.t(train_result.pred_pep_iz, ArrayResult) call_bag = CallBag( true_pep_iz=train_result.true_pep_iz.arr(), pred_pep_iz=train_result.pred_pep_iz.arr(), scores=train_result.scores.arr(), prep_result=prep_result, sim_result=sim_result, ) if pipeline is not None: pipeline.set_phase(4, n_phases) train_result.peps_pr = call_bag.pr_curve_by_pep(progress=progress) if pipeline is not None: pipeline.set_phase(5, n_phases) train_result.peps_pr_abund = call_bag.pr_curve_by_pep_with_abundance( progress=progress) else: train_result = {k: None for k in test_result.keys()} def rename(d, prefix): return {f"{prefix}{k}": v for k, v in d.items()} return TestNNResult( params=test_nn_params, **rename(test_result, "test_"), **rename(train_result, "train_"), )
def allocate_field(self, field_i, shape, dtype): filename = self._field_ims_filename(field_i) return ArrayResult(filename, dtype, shape, mode="w+")