def test_no_derived_attributes(self, prj_data, exclude_derived_attributes): """ Passing Sample's project is equivalent to its inference. """ # Here we're disinterested in parameterization w.r.t. data source key, # so make it constant. src_key = self.SOURCE_KEYS[0] # Explicitly-passed object needs to at least be an AttMap. if exclude_derived_attributes: prj_data.pop("derived_attributes") sample_data = { SAMPLE_NAME_COLNAME: "arbitrary_sample", "prj": prj_data, DATA_SOURCE_COLNAME: src_key} sample_data = AttMap(sample_data) s = Sample(sample_data) assert not hasattr(s, src_key) assert src_key not in s # Create the samples and make the calls under test. s = Sample(sample_data) s.set_file_paths() # Check results. putative_new_attr = self.DATA_SOURCES[src_key] if exclude_derived_attributes: # The value to which the source key maps won't have been added. assert not hasattr(s, putative_new_attr) assert putative_new_attr not in s else: # The value to which the source key maps will have been added. assert putative_new_attr == getattr(s, DATA_SOURCE_COLNAME) assert putative_new_attr == s[DATA_SOURCE_COLNAME]
def test_make_sample_dirs(paths, preexists, tmpdir): """ Existence guarantee Sample instance's folders is safe and valid. """ # Derive full paths and assure nonexistence before creation. fullpaths = [] for p in paths: fullpath = tmpdir.join(p).strpath assert not os.path.exists(fullpath) if preexists: os.makedirs(fullpath) fullpaths.append(fullpath) # Make the sample and assure paths preexistence. s = Sample({SAMPLE_NAME_COLNAME: "placeholder"}) s.paths = fullpaths # Base the test's initial condition on the parameterization. if preexists: def precheck(flags): return all(flags) else: def precheck(flags): return not any(flags) assert precheck([os.path.exists(p) for p in s.paths]) # The sample folders creation call should do nothing. s.make_sample_dirs() assert all([os.path.exists(p) for p in s.paths])
def test_requires_sample_name(self, has_name, data_type): """ Construction of sample requires data with sample name. """ data = {} sample_name = "test-sample" if has_name: data[SAMPLE_NAME_COLNAME] = sample_name sample = Sample(data_type(data)) assert sample_name == getattr(sample, SAMPLE_NAME_COLNAME) else: with pytest.raises(ValueError): Sample(data_type(data))
def test_peppy_and_snakemake_names(self, fetch, name_attr, data, expect_result, exp_err): """ Original peppy naming of sample name is favored; exception iff values differ. """ if isinstance(expect_result, type) and issubclass( expect_result, Exception): with pytest.raises(expect_result): Sample(data) else: s = Sample(data) assert expect_result == fetch(s, name_attr) with pytest.raises(exp_err): fetch(s, SNAKEMAKE_SAMPLE_COL)
def test_prefers_explicit_project_context(self, prj_data): """ Explicit project data overrides any pre-stored project data. """ prj_data_modified = AttMap(copy.deepcopy(prj_data)) new_src = "src3" new_src_val = "newpath" assert new_src not in prj_data[DATA_SOURCES_SECTION] prj_data_modified[DATA_SOURCES_SECTION][new_src] = new_src_val sample_data = AttMap( {SAMPLE_NAME_COLNAME: "random-sample", "prj": prj_data, DATA_SOURCE_COLNAME: new_src}) s = Sample(sample_data) s.set_file_paths(prj_data_modified) assert new_src_val == getattr(s, DATA_SOURCE_COLNAME)
def test_accuracy_and_allows_empty_data_sources( self, colname, src_key, prj_data, data_type, include_data_sources): """ Locator is accurate and does not require data source map. """ sample_data = data_type( {SAMPLE_NAME_COLNAME: "random-sample", "prj": prj_data, colname: src_key}) s = Sample(sample_data) data_sources = s.prj.data_sources if include_data_sources else None path = s.locate_data_source( data_sources, column_name=colname, source_key=src_key) if include_data_sources: assert self.PATH_BY_KEY[src_key] == path else: assert path is None
def test_input_files(files, test_type, tmpdir): """ Test for access to Sample input files. """ file_text = " ".join(files) sample_data = {SAMPLE_NAME_COLNAME: "test-sample", DATA_SOURCE_COLNAME: file_text} s = Sample(sample_data) assert file_text == s.data_source assert files == s.input_file_paths if test_type == "to_disk": path_sample_file = tmpdir.join("test-sample.yaml").strpath s.to_yaml(path_sample_file) with open(path_sample_file) as sf: reloaded_sample_data = yaml.load(sf, SafeLoader) s_reloaded = Sample(reloaded_sample_data) assert files == s_reloaded.input_file_paths
def test_prefers_explicit_project_context(self, prj_data): """ Explicit project data overrides any pre-stored project data. """ prj_data_modified = AttMap(copy.deepcopy(prj_data)) new_src = "src3" new_src_val = "newpath" assert new_src not in prj_data[DATA_SOURCES_SECTION] prj_data_modified[DATA_SOURCES_SECTION][new_src] = new_src_val sample_data = AttMap({ SAMPLE_NAME_COLNAME: "random-sample", "prj": prj_data, DATA_SOURCE_COLNAME: new_src }) s = Sample(sample_data) s.set_file_paths(prj_data_modified) assert new_src_val == getattr(s, DATA_SOURCE_COLNAME)
def test_only_snakemake_name(self, fetch, name_attr, exp_err): """ Snakemake --> peppy <--> sample --> sample_name. """ name = "testsample" s = Sample({SNAKEMAKE_SAMPLE_COL: name}) with pytest.raises(exp_err): fetch(s, SNAKEMAKE_SAMPLE_COL) assert name == fetch(s, name_attr)
def test_project_prj_ref_as_arg(self, proj_type, fetch, tmpdir): """ Project is converted to PathExAttMap of sample-independent data. """ proj_data = {METADATA_KEY: {OUTDIR_KEY: tmpdir.strpath}} prj = _get_prj( tmpdir.join("minimal_config.yaml").strpath, proj_data, proj_type) assert isinstance(prj, Project) s = Sample({SAMPLE_NAME_COLNAME: "testsample"}, prj=prj) self._assert_prj_dat(proj_data, s, fetch)
def test_pickle_roundtrip(self): """ Test whether pickle roundtrip produces a comparable object """ s = Sample({SAMPLE_NAME_COLNAME: "testsample"}) _buffer = tempfile.TemporaryFile() pickle.dump(s, _buffer) _buffer.seek(0) new_s = pickle.load(_buffer) assert s == new_s
def samples(): """ Create collection of Samples, useful for mocking a Project. :return Iterable[Sample]: collection of bare bones Sample objects, with only name and protocol defined """ return [Sample({SAMPLE_NAME_COLNAME: sn, "protocol": p}) for sn, p in PROTOCOL_BY_SAMPLE.items()]
def test_accuracy_and_allows_empty_data_sources(self, colname, src_key, prj_data, data_type, include_data_sources): """ Locator is accurate and does not require data source map. """ sample_data = data_type({ SAMPLE_NAME_COLNAME: "random-sample", "prj": prj_data, colname: src_key }) s = Sample(sample_data) assert isinstance(s.prj, AttMap) data_sources = s.prj.data_sources if include_data_sources else None path = s.locate_data_source(data_sources, column_name=colname, source_key=src_key) if include_data_sources: assert self.PATH_BY_KEY[src_key] == path else: assert path is None
def test_equivalence_between_implicit_and_explicit_prj( self, prj_data, data_src_attr, src_key, explicit): """ Passing Sample's project is equivalent to its inference. """ # Explicitly-passed object needs to at least be an AttMap. sample_data = AttMap( {SAMPLE_NAME_COLNAME: "arbitrary_sample", "prj": prj_data, data_src_attr: src_key, "derived_attributes": [data_src_attr]}) # Create the samples and make the calls under test. s = Sample(sample_data) if explicit: s.set_file_paths(sample_data.prj) else: s.set_file_paths() # Check results. expected = self.DATA_SOURCES[src_key] observed = getattr(s, data_src_attr) assert expected == observed
def test_no_derived_attributes(self, prj_data, exclude_derived_attributes): """ Passing Sample's project is equivalent to its inference. """ # Here we're disinterested in parameterization w.r.t. data source key, # so make it constant. src_key = self.SOURCE_KEYS[0] # Explicitly-passed object needs to at least be an AttMap. if exclude_derived_attributes: prj_data.pop("derived_attributes") sample_data = { SAMPLE_NAME_COLNAME: "arbitrary_sample", "prj": prj_data, DATA_SOURCE_COLNAME: src_key } sample_data = AttMap(sample_data) s = Sample(sample_data) assert not hasattr(s, src_key) assert src_key not in s # Create the samples and make the calls under test. s = Sample(sample_data) s.set_file_paths() # Check results. putative_new_attr = self.DATA_SOURCES[src_key] if exclude_derived_attributes: # The value to which the source key maps won't have been added. assert not hasattr(s, putative_new_attr) assert putative_new_attr not in s else: # The value to which the source key maps will have been added. assert putative_new_attr == getattr(s, DATA_SOURCE_COLNAME) assert putative_new_attr == s[DATA_SOURCE_COLNAME]
def test_exception_type_matches_access_mode(data_type, accessor): """ Exception for attribute access failure reflects access mode. """ data = {SAMPLE_NAME_COLNAME: "placeholder"} sample = Sample(data_type(data)) if accessor == "attr": with pytest.raises(AttributeError): sample.undefined_attribute elif accessor == "item": with pytest.raises(KeyError): sample["not-set"] else: # Personal safeguard against unexpected behavior pytest.fail( "Unknown access mode for exception type test: {}".format(accessor))
def test_equivalence_between_implicit_and_explicit_prj( self, prj_data, data_src_attr, src_key, explicit): """ Passing Sample's project is equivalent to its inference. """ # Explicitly-passed object needs to at least be an AttMap. sample_data = AttMap({ SAMPLE_NAME_COLNAME: "arbitrary_sample", "prj": prj_data, data_src_attr: src_key, "derived_attributes": [data_src_attr] }) # Create the samples and make the calls under test. s = Sample(sample_data) if explicit: s.set_file_paths(sample_data.prj) else: s.set_file_paths() # Check results. expected = self.DATA_SOURCES[src_key] observed = getattr(s, data_src_attr) assert expected == observed
def test_input_files(files, test_type, tmpdir): """ Test for access to Sample input files. """ file_text = " ".join(files) sample_data = { SAMPLE_NAME_COLNAME: "test-sample", DATA_SOURCE_COLNAME: file_text } s = Sample(sample_data) assert file_text == s.data_source assert files == s.input_file_paths if test_type == "to_disk": path_sample_file = tmpdir.join("test-sample.yaml").strpath s.to_yaml(path_sample_file) print("Sample items: {}".format(s.items())) with open(path_sample_file) as sf: reloaded_sample_data = yaml.load(sf, SafeLoader) print("reloaded keys: {}".format(list(reloaded_sample_data.keys()))) try: s_reloaded = Sample(reloaded_sample_data) except Exception: with open(path_sample_file) as sf: print("LINES (below):\n{}".format("".join(sf.readlines()))) raise assert files == s_reloaded.input_file_paths
def basic_sample(self): """ Provide test cases with a simple Sample instance. """ return Sample({SAMPLE_NAME_COLNAME: "arbitrarily_named_sample"})
with open(tmpdir.join("pipe-iface-conf.yaml").strpath, 'w') as f: yaml.safe_dump(bundled_piface, f) else: pipe_iface_config = bundled_piface pi = PipelineInterface(pipe_iface_config) assert bundled_piface == pi.pipe_iface_config assert pi.pipe_iface_file == (pipe_iface_config if from_file else None) @pytest.mark.parametrize( argnames="funcname_and_kwargs", argvalues=[("choose_resource_package", { "file_size": 4 }), ("get_arg_string", { "sample": Sample({"sample_name": "arbitrary-sample-name"}) }), ("get_attribute", { "attribute_key": "irrelevant-attr-name" }), ("get_pipeline_name", {})]) @pytest.mark.parametrize(argnames="use_resources", argvalues=[False, True]) def test_unconfigured_pipeline_exception(funcname_and_kwargs, use_resources, pi_with_resources): """ Each public function throws same exception given unmapped pipeline. """ pi = pi_with_resources if not use_resources: for pipeline in pi.pipelines: try: del pipeline["resources"][DEFAULT_COMPUTE_RESOURCES_NAME] except KeyError: # Already no default resource package. pass
def test_sheet_attr_order(self, proj): """ The sample's sheet attributes are ordered. """ s = Sample(getattr(proj, NAME_TABLE_ATTR).iloc[0]) d = s.get_sheet_dict() assert SAMPLE_NAME_COLNAME == list(d)[0]
def test_non_project_prj_ref_as_arg(self, fetch, prj_ref_val, expect): """ Project reference must be null, or an attmap bounded above by PathExAttMap. """ s = Sample({SAMPLE_NAME_COLNAME: "testsample"}, prj=prj_ref_val) assert expect == fetch(s)
def test_non_project_prj_ref(self, fetch, prj_ref_val, expect): """ Project reference is null, or a PathExAttMap. """ s = Sample({SAMPLE_NAME_COLNAME: "testsample", PRJ_REF: prj_ref_val}) assert expect == fetch(s)
def test_no_prj_ref(self, has_ref, get_ref): """ Construction of a Sample without project ref --> null value """ s = Sample({SAMPLE_NAME_COLNAME: "test-sample"}) assert has_ref(s) assert get_ref(s) is None
# variables = ['sample_name', 'cell_line', 'condition', 'timepoint', 'knockout_clone', 'replicate', 'experiment_name'] # analysis.annotate_with_sample_metadata(attributes=variables, quant_matrix="coverage_annotated") analysis.expression = analysis.coverage_annotated.loc[~(analysis.coverage_annotated.sum(axis=1) == 0)].dropna().drop_duplicates() analysis.expression = analysis.expression.rename( columns={"HAP1_WT": "HAP1_WT-WT", "C8": "HAP1_MTHFD1KO-C8", "D3": "HAP1_MTHFD1KO-D3"}) # Add just minimal sample info (name) samples = list() for i, col in enumerate(analysis.expression.columns): samples.append( Sample( pd.Series([col, col.split("_")[0], "_".join(col.split("_")[1:]), "b1" if "-" in col else "b2"], index=['sample_name', 'cell_line', 'perturbation', 'batch']))) analysis._samples = samples analysis.expression.columns = pd.MultiIndex.from_arrays( [ [s.name for s in analysis._samples], [s.cell_line for s in analysis._samples], [s.perturbation for s in analysis._samples], [s.batch for s in analysis._samples]], names=['sample_name', "cell_line", "perturbation", "batch"]) analysis.to_pickle() # Unsupervised analysis unsupervised_analysis(
def test_only_peppy_name(self, fetch, name_attr): """ name and sample_name access Sample's name and work with varied syntax. """ name = "testsample" s = Sample({SAMPLE_NAME_COLNAME: name}) assert name == fetch(s, name_attr)
def sample(self): return Sample({SAMPLE_NAME_COLNAME: "basic_sample"})