예제 #1
0
    def test_no_derived_attributes(self, prj_data, exclude_derived_attributes):
        """ Passing Sample's project is equivalent to its inference. """

        # Here we're disinterested in parameterization w.r.t. data source key,
        # so make it constant.
        src_key = self.SOURCE_KEYS[0]

        # Explicitly-passed object needs to at least be an AttMap.
        if exclude_derived_attributes:
            prj_data.pop("derived_attributes")
        sample_data = {
                SAMPLE_NAME_COLNAME: "arbitrary_sample", "prj": prj_data,
                DATA_SOURCE_COLNAME: src_key}
        sample_data = AttMap(sample_data)
        s = Sample(sample_data)

        assert not hasattr(s, src_key)
        assert src_key not in s

        # Create the samples and make the calls under test.
        s = Sample(sample_data)
        s.set_file_paths()

        # Check results.
        putative_new_attr = self.DATA_SOURCES[src_key]
        if exclude_derived_attributes:
            # The value to which the source key maps won't have been added.
            assert not hasattr(s, putative_new_attr)
            assert putative_new_attr not in s
        else:
            # The value to which the source key maps will have been added.
            assert putative_new_attr == getattr(s, DATA_SOURCE_COLNAME)
            assert putative_new_attr == s[DATA_SOURCE_COLNAME]
예제 #2
0
def test_make_sample_dirs(paths, preexists, tmpdir):
    """ Existence guarantee Sample instance's folders is safe and valid. """

    # Derive full paths and assure nonexistence before creation.
    fullpaths = []
    for p in paths:
        fullpath = tmpdir.join(p).strpath
        assert not os.path.exists(fullpath)
        if preexists:
            os.makedirs(fullpath)
        fullpaths.append(fullpath)

    # Make the sample and assure paths preexistence.
    s = Sample({SAMPLE_NAME_COLNAME: "placeholder"})
    s.paths = fullpaths

    # Base the test's initial condition on the parameterization.
    if preexists:

        def precheck(flags):
            return all(flags)
    else:

        def precheck(flags):
            return not any(flags)

    assert precheck([os.path.exists(p) for p in s.paths])

    # The sample folders creation call should do nothing.
    s.make_sample_dirs()
    assert all([os.path.exists(p) for p in s.paths])
예제 #3
0
def test_make_sample_dirs(paths, preexists, tmpdir):
    """ Existence guarantee Sample instance's folders is safe and valid. """

    # Derive full paths and assure nonexistence before creation.
    fullpaths = []
    for p in paths:
        fullpath = tmpdir.join(p).strpath
        assert not os.path.exists(fullpath)
        if preexists:
            os.makedirs(fullpath)
        fullpaths.append(fullpath)

    # Make the sample and assure paths preexistence.
    s = Sample({SAMPLE_NAME_COLNAME: "placeholder"})
    s.paths = fullpaths

    # Base the test's initial condition on the parameterization.
    if preexists:
        def precheck(flags):
            return all(flags)
    else:
        def precheck(flags):
            return not any(flags)
    assert precheck([os.path.exists(p) for p in s.paths])

    # The sample folders creation call should do nothing.
    s.make_sample_dirs()
    assert all([os.path.exists(p) for p in s.paths])
예제 #4
0
 def test_requires_sample_name(self, has_name, data_type):
     """ Construction of sample requires data with sample name. """
     data = {}
     sample_name = "test-sample"
     if has_name:
         data[SAMPLE_NAME_COLNAME] = sample_name
         sample = Sample(data_type(data))
         assert sample_name == getattr(sample, SAMPLE_NAME_COLNAME)
     else:
         with pytest.raises(ValueError):
             Sample(data_type(data))
예제 #5
0
 def test_peppy_and_snakemake_names(self, fetch, name_attr, data,
                                    expect_result, exp_err):
     """ Original peppy naming of sample name is favored; exception iff values differ. """
     if isinstance(expect_result, type) and issubclass(
             expect_result, Exception):
         with pytest.raises(expect_result):
             Sample(data)
     else:
         s = Sample(data)
         assert expect_result == fetch(s, name_attr)
         with pytest.raises(exp_err):
             fetch(s, SNAKEMAKE_SAMPLE_COL)
예제 #6
0
 def test_prefers_explicit_project_context(self, prj_data):
     """ Explicit project data overrides any pre-stored project data. """
     prj_data_modified = AttMap(copy.deepcopy(prj_data))
     new_src = "src3"
     new_src_val = "newpath"
     assert new_src not in prj_data[DATA_SOURCES_SECTION]
     prj_data_modified[DATA_SOURCES_SECTION][new_src] = new_src_val
     sample_data = AttMap(
         {SAMPLE_NAME_COLNAME: "random-sample",
          "prj": prj_data, DATA_SOURCE_COLNAME: new_src})
     s = Sample(sample_data)
     s.set_file_paths(prj_data_modified)
     assert new_src_val == getattr(s, DATA_SOURCE_COLNAME)
예제 #7
0
 def test_accuracy_and_allows_empty_data_sources(
         self, colname, src_key, prj_data, data_type, include_data_sources):
     """ Locator is accurate and does not require data source map. """
     sample_data = data_type(
         {SAMPLE_NAME_COLNAME: "random-sample",
          "prj": prj_data, colname: src_key})
     s = Sample(sample_data)
     data_sources = s.prj.data_sources if include_data_sources else None
     path = s.locate_data_source(
             data_sources, column_name=colname, source_key=src_key)
     if include_data_sources:
         assert self.PATH_BY_KEY[src_key] == path
     else:
         assert path is None
예제 #8
0
def test_input_files(files, test_type, tmpdir):
    """ Test for access to Sample input files. """
    file_text = " ".join(files)
    sample_data = {SAMPLE_NAME_COLNAME: "test-sample",
                   DATA_SOURCE_COLNAME: file_text}
    s = Sample(sample_data)
    assert file_text == s.data_source
    assert files == s.input_file_paths
    if test_type == "to_disk":
        path_sample_file = tmpdir.join("test-sample.yaml").strpath
        s.to_yaml(path_sample_file)
        with open(path_sample_file) as sf:
            reloaded_sample_data = yaml.load(sf, SafeLoader)
        s_reloaded = Sample(reloaded_sample_data)
        assert files == s_reloaded.input_file_paths
예제 #9
0
 def test_prefers_explicit_project_context(self, prj_data):
     """ Explicit project data overrides any pre-stored project data. """
     prj_data_modified = AttMap(copy.deepcopy(prj_data))
     new_src = "src3"
     new_src_val = "newpath"
     assert new_src not in prj_data[DATA_SOURCES_SECTION]
     prj_data_modified[DATA_SOURCES_SECTION][new_src] = new_src_val
     sample_data = AttMap({
         SAMPLE_NAME_COLNAME: "random-sample",
         "prj": prj_data,
         DATA_SOURCE_COLNAME: new_src
     })
     s = Sample(sample_data)
     s.set_file_paths(prj_data_modified)
     assert new_src_val == getattr(s, DATA_SOURCE_COLNAME)
예제 #10
0
    def test_only_snakemake_name(self, fetch, name_attr, exp_err):
        """ Snakemake --> peppy <--> sample --> sample_name. """
        name = "testsample"
        s = Sample({SNAKEMAKE_SAMPLE_COL: name})
        with pytest.raises(exp_err):

            fetch(s, SNAKEMAKE_SAMPLE_COL)
        assert name == fetch(s, name_attr)
예제 #11
0
 def test_project_prj_ref_as_arg(self, proj_type, fetch, tmpdir):
     """ Project is converted to PathExAttMap of sample-independent data. """
     proj_data = {METADATA_KEY: {OUTDIR_KEY: tmpdir.strpath}}
     prj = _get_prj(
         tmpdir.join("minimal_config.yaml").strpath, proj_data, proj_type)
     assert isinstance(prj, Project)
     s = Sample({SAMPLE_NAME_COLNAME: "testsample"}, prj=prj)
     self._assert_prj_dat(proj_data, s, fetch)
예제 #12
0
 def test_pickle_roundtrip(self):
     """ Test whether pickle roundtrip produces a comparable object """
     s = Sample({SAMPLE_NAME_COLNAME: "testsample"})
     _buffer = tempfile.TemporaryFile()
     pickle.dump(s, _buffer)
     _buffer.seek(0)
     new_s = pickle.load(_buffer)
     assert s == new_s
예제 #13
0
def samples():
    """
    Create collection of Samples, useful for mocking a Project.

    :return Iterable[Sample]: collection of bare bones Sample objects, with
        only name and protocol defined
    """
    return [Sample({SAMPLE_NAME_COLNAME: sn, "protocol": p})
            for sn, p in PROTOCOL_BY_SAMPLE.items()]
예제 #14
0
 def test_accuracy_and_allows_empty_data_sources(self, colname, src_key,
                                                 prj_data, data_type,
                                                 include_data_sources):
     """ Locator is accurate and does not require data source map. """
     sample_data = data_type({
         SAMPLE_NAME_COLNAME: "random-sample",
         "prj": prj_data,
         colname: src_key
     })
     s = Sample(sample_data)
     assert isinstance(s.prj, AttMap)
     data_sources = s.prj.data_sources if include_data_sources else None
     path = s.locate_data_source(data_sources,
                                 column_name=colname,
                                 source_key=src_key)
     if include_data_sources:
         assert self.PATH_BY_KEY[src_key] == path
     else:
         assert path is None
예제 #15
0
 def test_equivalence_between_implicit_and_explicit_prj(
         self, prj_data, data_src_attr, src_key, explicit):
     """ Passing Sample's project is equivalent to its inference. """
     
     # Explicitly-passed object needs to at least be an AttMap.
     sample_data = AttMap(
             {SAMPLE_NAME_COLNAME: "arbitrary_sample", "prj": prj_data,
              data_src_attr: src_key, "derived_attributes": [data_src_attr]})
     
     # Create the samples and make the calls under test.
     s = Sample(sample_data)
     if explicit:
         s.set_file_paths(sample_data.prj)
     else:
         s.set_file_paths()
     
     # Check results.
     expected = self.DATA_SOURCES[src_key]
     observed = getattr(s, data_src_attr)
     assert expected == observed
예제 #16
0
    def test_no_derived_attributes(self, prj_data, exclude_derived_attributes):
        """ Passing Sample's project is equivalent to its inference. """

        # Here we're disinterested in parameterization w.r.t. data source key,
        # so make it constant.
        src_key = self.SOURCE_KEYS[0]

        # Explicitly-passed object needs to at least be an AttMap.
        if exclude_derived_attributes:
            prj_data.pop("derived_attributes")
        sample_data = {
            SAMPLE_NAME_COLNAME: "arbitrary_sample",
            "prj": prj_data,
            DATA_SOURCE_COLNAME: src_key
        }
        sample_data = AttMap(sample_data)
        s = Sample(sample_data)

        assert not hasattr(s, src_key)
        assert src_key not in s

        # Create the samples and make the calls under test.
        s = Sample(sample_data)
        s.set_file_paths()

        # Check results.
        putative_new_attr = self.DATA_SOURCES[src_key]
        if exclude_derived_attributes:
            # The value to which the source key maps won't have been added.
            assert not hasattr(s, putative_new_attr)
            assert putative_new_attr not in s
        else:
            # The value to which the source key maps will have been added.
            assert putative_new_attr == getattr(s, DATA_SOURCE_COLNAME)
            assert putative_new_attr == s[DATA_SOURCE_COLNAME]
예제 #17
0
def test_exception_type_matches_access_mode(data_type, accessor):
    """ Exception for attribute access failure reflects access mode. """
    data = {SAMPLE_NAME_COLNAME: "placeholder"}
    sample = Sample(data_type(data))
    if accessor == "attr":
        with pytest.raises(AttributeError):
            sample.undefined_attribute
    elif accessor == "item":
        with pytest.raises(KeyError):
            sample["not-set"]
    else:
        # Personal safeguard against unexpected behavior
        pytest.fail(
            "Unknown access mode for exception type test: {}".format(accessor))
예제 #18
0
    def test_equivalence_between_implicit_and_explicit_prj(
            self, prj_data, data_src_attr, src_key, explicit):
        """ Passing Sample's project is equivalent to its inference. """

        # Explicitly-passed object needs to at least be an AttMap.
        sample_data = AttMap({
            SAMPLE_NAME_COLNAME: "arbitrary_sample",
            "prj": prj_data,
            data_src_attr: src_key,
            "derived_attributes": [data_src_attr]
        })

        # Create the samples and make the calls under test.
        s = Sample(sample_data)
        if explicit:
            s.set_file_paths(sample_data.prj)
        else:
            s.set_file_paths()

        # Check results.
        expected = self.DATA_SOURCES[src_key]
        observed = getattr(s, data_src_attr)
        assert expected == observed
예제 #19
0
def test_input_files(files, test_type, tmpdir):
    """ Test for access to Sample input files. """
    file_text = " ".join(files)
    sample_data = {
        SAMPLE_NAME_COLNAME: "test-sample",
        DATA_SOURCE_COLNAME: file_text
    }
    s = Sample(sample_data)
    assert file_text == s.data_source
    assert files == s.input_file_paths
    if test_type == "to_disk":
        path_sample_file = tmpdir.join("test-sample.yaml").strpath
        s.to_yaml(path_sample_file)
        print("Sample items: {}".format(s.items()))
        with open(path_sample_file) as sf:
            reloaded_sample_data = yaml.load(sf, SafeLoader)
        print("reloaded keys: {}".format(list(reloaded_sample_data.keys())))
        try:
            s_reloaded = Sample(reloaded_sample_data)
        except Exception:
            with open(path_sample_file) as sf:
                print("LINES (below):\n{}".format("".join(sf.readlines())))
            raise
        assert files == s_reloaded.input_file_paths
예제 #20
0
 def basic_sample(self):
     """ Provide test cases with a simple Sample instance. """
     return Sample({SAMPLE_NAME_COLNAME: "arbitrarily_named_sample"})
예제 #21
0
        with open(tmpdir.join("pipe-iface-conf.yaml").strpath, 'w') as f:
            yaml.safe_dump(bundled_piface, f)
    else:
        pipe_iface_config = bundled_piface
    pi = PipelineInterface(pipe_iface_config)
    assert bundled_piface == pi.pipe_iface_config
    assert pi.pipe_iface_file == (pipe_iface_config if from_file else None)


@pytest.mark.parametrize(
    argnames="funcname_and_kwargs",
    argvalues=[("choose_resource_package", {
        "file_size": 4
    }),
               ("get_arg_string", {
                   "sample": Sample({"sample_name": "arbitrary-sample-name"})
               }), ("get_attribute", {
                   "attribute_key": "irrelevant-attr-name"
               }), ("get_pipeline_name", {})])
@pytest.mark.parametrize(argnames="use_resources", argvalues=[False, True])
def test_unconfigured_pipeline_exception(funcname_and_kwargs, use_resources,
                                         pi_with_resources):
    """ Each public function throws same exception given unmapped pipeline. """
    pi = pi_with_resources
    if not use_resources:
        for pipeline in pi.pipelines:
            try:
                del pipeline["resources"][DEFAULT_COMPUTE_RESOURCES_NAME]
            except KeyError:
                # Already no default resource package.
                pass
 def test_sheet_attr_order(self, proj):
     """ The sample's sheet attributes are ordered. """
     s = Sample(getattr(proj, NAME_TABLE_ATTR).iloc[0])
     d = s.get_sheet_dict()
     assert SAMPLE_NAME_COLNAME == list(d)[0]
예제 #23
0
 def test_non_project_prj_ref_as_arg(self, fetch, prj_ref_val, expect):
     """ Project reference must be null, or an attmap bounded above by PathExAttMap. """
     s = Sample({SAMPLE_NAME_COLNAME: "testsample"}, prj=prj_ref_val)
     assert expect == fetch(s)
예제 #24
0
 def test_non_project_prj_ref(self, fetch, prj_ref_val, expect):
     """ Project reference is null, or a PathExAttMap. """
     s = Sample({SAMPLE_NAME_COLNAME: "testsample", PRJ_REF: prj_ref_val})
     assert expect == fetch(s)
예제 #25
0
 def test_no_prj_ref(self, has_ref, get_ref):
     """ Construction of a Sample without project ref --> null value """
     s = Sample({SAMPLE_NAME_COLNAME: "test-sample"})
     assert has_ref(s)
     assert get_ref(s) is None
예제 #26
0
# variables = ['sample_name', 'cell_line', 'condition', 'timepoint', 'knockout_clone', 'replicate', 'experiment_name']
# analysis.annotate_with_sample_metadata(attributes=variables, quant_matrix="coverage_annotated")

analysis.expression = analysis.coverage_annotated.loc[~(analysis.coverage_annotated.sum(axis=1) == 0)].dropna().drop_duplicates()


analysis.expression = analysis.expression.rename(
    columns={"HAP1_WT": "HAP1_WT-WT", "C8": "HAP1_MTHFD1KO-C8", "D3": "HAP1_MTHFD1KO-D3"})

# Add just minimal sample info (name)
samples = list()
for i, col in enumerate(analysis.expression.columns):
    samples.append(
        Sample(
            pd.Series([col, col.split("_")[0], "_".join(col.split("_")[1:]), "b1" if "-" in col else "b2"],
                      index=['sample_name', 'cell_line', 'perturbation', 'batch'])))

analysis._samples = samples
analysis.expression.columns = pd.MultiIndex.from_arrays(
    [
        [s.name for s in analysis._samples],
        [s.cell_line for s in analysis._samples],
        [s.perturbation for s in analysis._samples],
        [s.batch for s in analysis._samples]],
    names=['sample_name', "cell_line", "perturbation", "batch"])
analysis.to_pickle()


# Unsupervised analysis
unsupervised_analysis(
 def test_sheet_attr_order(self, proj):
     """ The sample's sheet attributes are ordered. """
     s = Sample(getattr(proj, NAME_TABLE_ATTR).iloc[0])
     d = s.get_sheet_dict()
     assert SAMPLE_NAME_COLNAME == list(d)[0]
예제 #28
0
 def test_only_peppy_name(self, fetch, name_attr):
     """ name and sample_name access Sample's name and work with varied syntax. """
     name = "testsample"
     s = Sample({SAMPLE_NAME_COLNAME: name})
     assert name == fetch(s, name_attr)
예제 #29
0
 def sample(self):
     return Sample({SAMPLE_NAME_COLNAME: "basic_sample"})