def test_single_sample(
            self, tmpdir, path_proj_conf_file, which_sample_index):
        """ Single Sample is perfectly valid for Project and sheet. """

        # Pull out the values for the current sample.
        values = DATA[which_sample_index]

        # Write the annotations.
        anns_path = os.path.join(tmpdir.strpath, NAME_ANNOTATIONS_FILE)
        with open(anns_path, 'w') as anns_file:
            anns_file.write("{}\n".format(",".join(COLUMNS)))
            anns_file.write("{}\n".format(",".join([str(v) for v in values])))

        # Build the sheet.
        p = Project(path_proj_conf_file)
        sheet = p.build_sheet()

        # It should be a single-row DataFrame.
        assert isinstance(sheet, pd.DataFrame)
        assert 1 == len(sheet)
        assert 1 == p.num_samples

        # There will be additional values added from the Project,
        # but the core data values will have remained the same.
        sample = list(p.samples)[0]
        for attr, exp_val in zip(COLUMNS, values):
            obs_val = getattr(sample, attr)
            try:
                assert exp_val == obs_val
            except AssertionError as e:
                try:
                    assert exp_val == int(obs_val)
                except AssertionError:
                    raise e
 def test_subproject_activation_updates_sample_annotations_path(conf_file):
     """ Subproject's sample annotation file pointer replaces original. """
     with mock.patch("peppy.project.Project.parse_sample_sheet"):
         p = Project(conf_file)
         p.activate_subproject(_SP_NAME)
     _, anns_file = os.path.split(p[METADATA_KEY][NAME_TABLE_ATTR])
     assert _CHILD_ANNS == anns_file
    def test_single_sample(self, tmpdir, path_proj_conf_file,
                           which_sample_index):
        """ Single Sample is perfectly valid for Project and sheet. """

        # Pull out the values for the current sample.
        values = DATA[which_sample_index]

        # Write the annotations.
        anns_path = os.path.join(tmpdir.strpath, NAME_ANNOTATIONS_FILE)
        with open(anns_path, 'w') as anns_file:
            anns_file.write("{}\n".format(",".join(COLUMNS)))
            anns_file.write("{}\n".format(",".join([str(v) for v in values])))

        # Build the sheet.
        p = Project(path_proj_conf_file)
        sheet = p.build_sheet()

        # It should be a single-row DataFrame.
        assert isinstance(sheet, pd.DataFrame)
        assert 1 == len(sheet)
        assert 1 == p.num_samples

        # There will be additional values added from the Project,
        # but the core data values will have remained the same.
        sample = list(p.samples)[0]
        for attr, exp_val in zip(COLUMNS, values):
            obs_val = getattr(sample, attr)
            try:
                assert exp_val == obs_val
            except AssertionError as e:
                try:
                    assert exp_val == int(obs_val)
                except AssertionError:
                    raise e
 def test_no_samples(self, protocols, delimiter, path_empty_project):
     """ Lack of Samples is unproblematic for the sheet build. """
     # Regardless of protocol(s), the sheet should be empty.
     print("Test config file: {}".format(path_empty_project))
     p = Project(path_empty_project)
     sheet = p.build_sheet(*protocols)
     assert sheet.empty
 def test_subproject_activation_updates_sample_annotations_path(conf_file):
     """ Subproject's sample annotation file pointer replaces original. """
     with mock.patch(SHEET_PARSE_FUNCPATH):
         p = Project(conf_file)
         p.activate_subproject(_SP_NAME)
     _, anns_file = os.path.split(p[METADATA_KEY][NAME_TABLE_ATTR])
     assert _CHILD_ANNS == anns_file
    def test_multiple_samples(self, protocols, path_anns_file,
                              path_proj_conf_file):
        """ Project also processes multiple Sample fine. """

        p = Project(path_proj_conf_file)

        # Total sample count is constant.
        assert len(SAMPLE_NAMES) == sum(1 for _ in p.samples)

        # But the sheet permits filtering to specific protocol(s).
        exp_num_samples = len(SAMPLE_NAMES) if not protocols else \
            sum(sum(1 for p2 in PROTOCOLS if p2 == p1) for p1 in protocols)
        sheet = p.build_sheet(*protocols)
        assert exp_num_samples == len(sheet)

        if protocols:

            def as_expected(sd):
                return sd.protocol in set(protocols)
        else:

            def as_expected(sd):
                return sd.protocol not in set(protocols)

        for _, sample_data in sheet.iterrows():
            assert as_expected(sample_data)
 def test_relative_path_metadata_stasis(self, conf_file, eq_attr):
     """ Key metadata paths are preserved with subproject that doesn't alter them. """
     with mock.patch(SHEET_PARSE_FUNCPATH):
         p = Project(conf_file)
     main_path = getattr(p, eq_attr)
     with mock.patch(SHEET_PARSE_FUNCPATH):
         sub = p.activate_subproject(_SP_NAME)
     assert main_path == getattr(sub, eq_attr)
Example #8
0
 def test_amendments_activation_interactive(self, example_pep_cfg_path):
     """
     Verify that the amendment can be activated interactively
     """
     p = Project(cfg=example_pep_cfg_path)
     p.activate_amendments("newLib")
     assert all([s["protocol"] == "ABCD" for s in p.samples])
     assert p.amendments is not None
Example #9
0
 def test_str_repr_correctness(self, example_pep_cfg_path, defer):
     """
     Verify string representation correctness
     """
     p = Project(cfg=example_pep_cfg_path, defer_samples_creation=defer)
     str_repr = p.__str__()
     assert example_pep_cfg_path in str_repr
     assert "{} samples".format(str(len(p.samples))) in str_repr
     assert p.name in str_repr
Example #10
0
def _get_pair_to_post_init_test(cfg_path):
    """

    :param cfg_path: path to the project config file
    :type cfg_path: str
    :return: list of two project objects to compare
    :rtype: list[peppy.Project]
    """
    p = Project(cfg=cfg_path)
    pd = Project(cfg=cfg_path, defer_samples_creation=True)
    pd.create_samples()
    return [p, pd]
Example #11
0
 def test_derive(self, example_pep_cfg_path):
     """
     Verify that the declared attr derivation happened
     """
     p = Project(cfg=example_pep_cfg_path)
     assert all(["file_path" in s for s in p.samples])
     assert all(["file_path" in s["_derived_cols_done"] for s in p.samples])
 def test_old_encodings(
         delimiter, tmpdir, main_table_file,
         subann_table_file, anns_key, subs_key):
     """ Current and previous encoding of tables works, deprecated appropriately. """
     # Data setup
     anns_data, subs_data = LINES_BY_DELIM[delimiter]
     anns_file = _write(main_table_file, anns_data)
     subs_file = _write(subann_table_file, subs_data)
     conf_file = tmpdir.join("conf.yaml").strpath
     conf_data = {
         METADATA_KEY: {
             anns_key: anns_file,
             subs_key: subs_file,
             OUTDIR_KEY: tmpdir.strpath
         }
     }
     # Project creation
     with open(conf_file, 'w') as cfg:
         yaml.dump(conf_data, cfg)
     prj = Project(conf_file)
     # Behavioral validation/assertions
     with pytest.warns(DeprecationWarning):
         anns1 = getattr(prj, anns_key)
     with pytest.warns(DeprecationWarning):
         anns2 = getattr(prj, anns_key)
     with pytest.warns(DeprecationWarning):
         subs1 = getattr(prj, subs_key)
     with pytest.warns(DeprecationWarning):
         subs2 = getattr(prj, subs_key)
     # Validation that we didn't just get back garbage value(s)
     assert anns1.equals(anns2)
     assert subs1.equals(subs2)
Example #13
0
 def test_minimal_configuration_name_inference(self, tmpdir,
                                               minimal_project_conf_path,
                                               env_config_filepath):
     """ Project infers name from where its configuration lives. """
     project = Project(minimal_project_conf_path)
     _, expected_name = os.path.split(tmpdir.strpath)
     assert expected_name == project.name
Example #14
0
 def test_missing_sample_name(self, example_pep_cfg_noname_path):
     """
     Verify that if sample_name column is missing in the sample table an
     error is issued
     """
     with pytest.raises(InvalidSampleTableFileException):
         Project(cfg=example_pep_cfg_noname_path)
Example #15
0
 def test_sheet_dict_excludes_private_attrs(self, example_pep_cfg_path):
     """
     Verify that sheet dict includes only original Sample attributes
     """
     p = Project(cfg=example_pep_cfg_path)
     for sample in p.samples:
         assert len(sample.get_sheet_dict()) == len(p.sample_table.columns)
def project(request, tmpdir, env_config_filepath):
    """ Provide requesting test case with a basic Project instance. """

    # Write just the sample names as the annotations.
    annotations_filename = "anns-fill.tsv"
    anns_path = tmpdir.join(annotations_filename).strpath
    num_samples = request.getfixturevalue("num_samples")
    df = pd.DataFrame(
        OrderedDict([(SAMPLE_NAME_COLNAME,
                      ["sample{}".format(i) for i in range(num_samples)]),
                     ("data", range(num_samples))]))
    with open(anns_path, 'w') as anns_file:
        df.to_csv(anns_file, sep="\t", index=False)

    # Create the Project config data.
    config_data = {METADATA_KEY: {NAME_TABLE_ATTR: annotations_filename}}
    if request.getfixturevalue(request.cls.CONFIG_DATA_PATHS_HOOK):
        config_data["paths"] = {}
        paths_dest = config_data["paths"]
    else:
        paths_dest = config_data[METADATA_KEY]

    # Add the paths data to the Project config.
    for path_name, path in PATH_BY_TYPE.items():
        paths_dest[path_name] = path \
            if path_name in [RESULTS_FOLDER_KEY, SUBMISSION_FOLDER_KEY] \
            else os.path.join(tmpdir.strpath, path)

    # Write the Project config file.
    conf_path = tmpdir.join("proj-conf.yaml").strpath
    with open(conf_path, 'w') as conf_file:
        yaml.safe_dump(config_data, conf_file)

    return Project(conf_path)
Example #17
0
 def test_missing_sample_name_derive(self, example_pep_cfg_noname_path):
     """
     Verify that even if sample_name column is missing in the sample table,
     it can be derived and no error is issued
     """
     p = Project(cfg=example_pep_cfg_noname_path)
     assert SAMPLE_NAME_ATTR in p.sample_table.columns
Example #18
0
    def test_suggests_implied_attributes(self, recwarn, tmpdir,
                                         path_sample_anns, project_config_data,
                                         ideally_implied_mappings):
        """ Assemblies directly in proj conf (not implied) is deprecated. """

        # Add the mappings parameterization to the config data.
        conf_data = copy.deepcopy(project_config_data)
        conf_data.update(ideally_implied_mappings)

        # Write the config file.
        conf_file = tmpdir.join("proj_conf.yaml").strpath
        assert not os.path.isfile(conf_file), \
            "Test project temp config file already exists: {}".format(conf_file)
        with open(conf_file, 'w') as cf:
            yaml.safe_dump(conf_data, cf)

        # (Hopefully) generate the warnings.
        assert 0 == len(recwarn)  # Ensure a fresh start.
        warnings.simplefilter('always')  # Allow DeprecationWarning capture.
        Project(conf_file)  # Generate the warning(s).
        msgs = [
            str(w.message) for w in recwarn  # Grab deprecation messages.
            if isinstance(w.message, DeprecationWarning)
        ]
        assert len(ideally_implied_mappings) == len(msgs)  # 1:1 warnings
        for k in ideally_implied_mappings:
            # Each section that should be implied should generate exactly 1
            # warning; check message for content then remove it from the pool.
            matched = [
                m for m in msgs if k in m and IMPLICATIONS_DECLARATION in m
            ]
            assert 1 == len(matched)
            msgs.remove(matched[0])
Example #19
0
 def test_no_sample_subannotation_in_config(self, tmpdir, spec_type, lazy,
                                            proj_conf_data,
                                            path_sample_anns):
     """ Subannotation attribute remains null if config lacks subannotation. """
     metadata = proj_conf_data[METADATA_KEY]
     try:
         assert SAMPLE_SUBANNOTATIONS_KEY in metadata
     except AssertionError:
         print("Project metadata section lacks '{}'".format(
             SAMPLE_SUBANNOTATIONS_KEY))
         print("All config data: {}".format(proj_conf_data))
         print("Config metadata section: {}".format(metadata))
         raise
     if spec_type == "as_null":
         metadata[SAMPLE_SUBANNOTATIONS_KEY] = None
     elif spec_type == "missing":
         del metadata[SAMPLE_SUBANNOTATIONS_KEY]
     else:
         raise ValueError(
             "Unknown way to specify no merge table: {}".format(spec_type))
     path_config_file = os.path.join(tmpdir.strpath, "project_config.yaml")
     with open(path_config_file, 'w') as conf_file:
         yaml.safe_dump(proj_conf_data, conf_file)
     p = Project(path_config_file, defer_sample_construction=lazy)
     assert getattr(p, SAMPLE_SUBANNOTATIONS_KEY) is None
Example #20
0
 def test_sample_name_availability(self, path_project_conf,
                                   path_sample_anns, lazy):
     """ Sample names always available on Project. """
     with open(path_sample_anns, 'r') as anns_file:
         expected_sample_names = \
             [l.split(",")[0] for l in anns_file.readlines()[1:] if l]
     p = Project(path_project_conf, defer_sample_construction=lazy)
     assert expected_sample_names == list(p.sample_names)
Example #21
0
 def test_missing_amendment_raises_error(self, example_pep_cfg_path, defer):
     """
     Verify that the missing amendment request raises correct exception
     """
     with pytest.raises(MissingAmendmentError):
         Project(cfg=example_pep_cfg_path,
                 amendments="nieznany",
                 defer_samples_creation=defer)
Example #22
0
 def test_sample_updates_regenerate_df(self, example_pep_cfg_path):
     """
     Verify that Sample modifications cause sample_table regeneration
     """
     p = Project(cfg=example_pep_cfg_path)
     s_ori = p.sample_table
     p.samples[0].update({"witam": "i_o_zdrowie_pytam"})
     assert not p.sample_table.equals(s_ori)
Example #23
0
 def test_amendments(self, example_pep_cfg_path, defer):
     """
     Verify that the amendment is activate at object instantiation
     """
     p = Project(cfg=example_pep_cfg_path,
                 amendments="newLib",
                 defer_samples_creation=defer)
     assert all([s["protocol"] == "ABCD" for s in p.samples])
Example #24
0
 def test_old_format_support(self, example_pep_cfg_path, defer):
     """
     Verify that old format (without implications and subprojects)
     is still supported
     """
     os.environ["DATA"] = "data"
     p = Project(cfg=example_pep_cfg_path, defer_samples_creation=defer)
     assert all(["read1" in s for s in p.samples])
Example #25
0
 def test_subsample_table_works_when_no_sample_mods(self,
                                                    example_pep_cfg_path):
     """
     Verify that subsample table functionality is not
     dependant on sample modifiers
     """
     p = Project(cfg=example_pep_cfg_path)
     assert any([s["file"] != "multi" for s in p.samples])
Example #26
0
 def test_missing_sample_name_custom_index(self,
                                           example_pep_cfg_noname_path):
     """
     Verify that if sample_name column is missing in the sample table an
     error is not issued if a custom sample_table index is set
     """
     p = Project(cfg=example_pep_cfg_noname_path, sample_table_index="id")
     assert p.sample_name_colname == "id"
Example #27
0
 def test_no_description(self, example_pep_cfg_path, defer):
     """
     Verify that Project object is successfully created when no description
      is specified in the config
     """
     p = Project(cfg=example_pep_cfg_path, defer_samples_creation=defer)
     assert isinstance(p, Project)
     assert "description" in p and p.description is None
Example #28
0
def _make_flags(cfg, type, count):
    p = Project(cfg)
    out_dir = p[CONFIG_KEY][LOOPER_KEY][OUTDIR_KEY]
    for s in p.samples[:count]:
        sf = os.path.join(out_dir, "results_pipeline", s[SAMPLE_NAME_ATTR])
        if not os.path.exists(sf):
            os.makedirs(sf)
        open(os.path.join(sf, type + ".flag"), 'a').close()
Example #29
0
    def test_pickle_roundtrip(self, minimal_project_conf_path):
        """ Test whether pickle roundtrip produces a comparable object """
        prj = Project(minimal_project_conf_path)

        _buffer = tempfile.TemporaryFile()
        pickle.dump(prj, _buffer)
        _buffer.seek(0)
        new_prj = pickle.load(_buffer)
        assert prj == new_prj
def test_samples_are_generic(path_anns_file, path_proj_conf_file):
    """ Regardless of protocol, Samples for sheet are generic. """
    # Annotations filepath fixture is also writes that file, so
    # it's needed even though that return value isn't used locally.
    p = Project(path_proj_conf_file)
    assert len(SAMPLE_NAMES) == p.num_samples
    samples = list(p.samples)
    assert p.num_samples == len(samples)
    assert all([Sample is type(s) for s in samples])
Example #31
0
 def test_str_repr_correctness(self, example_pep_cfg_path):
     """
     Verify that the missing amendment request raises correct exception
     """
     p = Project(cfg=example_pep_cfg_path)
     for sample in p.samples:
         str_repr = sample.__str__(max_attr=100)
         assert example_pep_cfg_path in str_repr
         assert "Sample '{}'".format(sample.sample_name) in str_repr
 def prj(self, tmpdir, prj_data, sample_lines):
     conf = tmpdir.join(randomize_filename()).strpath
     anns = tmpdir.join(self._ANNS_NAME).strpath
     with open(conf, 'w') as f:
         yaml.dump(prj_data, f)
     self._write_lines(anns, sample_lines)
     assert os.path.isfile(conf), "Missing proj conf: {}".format(conf)
     assert os.path.isfile(anns), "Missing annotations: {}".format(anns)
     return Project(conf)
    def test_multiple_samples(
            self, protocols, path_anns_file, path_proj_conf_file):
        """ Project also processes multiple Sample fine. """

        p = Project(path_proj_conf_file)

        # Total sample count is constant.
        assert len(SAMPLE_NAMES) == sum(1 for _ in p.samples)

        # But the sheet permits filtering to specific protocol(s).
        exp_num_samples = len(SAMPLE_NAMES) if not protocols else \
            sum(sum(1 for p2 in PROTOCOLS if p2 == p1) for p1 in protocols)
        sheet = p.build_sheet(*protocols)
        assert exp_num_samples == len(sheet)

        if protocols:
            def as_expected(sd):
                return sd.protocol in set(protocols)
        else:
            def as_expected(sd):
                return sd.protocol not in set(protocols)

        for _, sample_data in sheet.iterrows():
            assert as_expected(sample_data)
 def test_no_samples(self, protocols, delimiter, path_empty_project):
     """ Lack of Samples is unproblematic for the sheet build. """
     # Regardless of protocol(s), the sheet should be empty.
     p = Project(path_empty_project)
     sheet = p.build_sheet(*protocols)
     assert sheet.empty