def test_single_sample( self, tmpdir, path_proj_conf_file, which_sample_index): """ Single Sample is perfectly valid for Project and sheet. """ # Pull out the values for the current sample. values = DATA[which_sample_index] # Write the annotations. anns_path = os.path.join(tmpdir.strpath, NAME_ANNOTATIONS_FILE) with open(anns_path, 'w') as anns_file: anns_file.write("{}\n".format(",".join(COLUMNS))) anns_file.write("{}\n".format(",".join([str(v) for v in values]))) # Build the sheet. p = Project(path_proj_conf_file) sheet = p.build_sheet() # It should be a single-row DataFrame. assert isinstance(sheet, pd.DataFrame) assert 1 == len(sheet) assert 1 == p.num_samples # There will be additional values added from the Project, # but the core data values will have remained the same. sample = list(p.samples)[0] for attr, exp_val in zip(COLUMNS, values): obs_val = getattr(sample, attr) try: assert exp_val == obs_val except AssertionError as e: try: assert exp_val == int(obs_val) except AssertionError: raise e
def test_subproject_activation_updates_sample_annotations_path(conf_file): """ Subproject's sample annotation file pointer replaces original. """ with mock.patch("peppy.project.Project.parse_sample_sheet"): p = Project(conf_file) p.activate_subproject(_SP_NAME) _, anns_file = os.path.split(p[METADATA_KEY][NAME_TABLE_ATTR]) assert _CHILD_ANNS == anns_file
def test_single_sample(self, tmpdir, path_proj_conf_file, which_sample_index): """ Single Sample is perfectly valid for Project and sheet. """ # Pull out the values for the current sample. values = DATA[which_sample_index] # Write the annotations. anns_path = os.path.join(tmpdir.strpath, NAME_ANNOTATIONS_FILE) with open(anns_path, 'w') as anns_file: anns_file.write("{}\n".format(",".join(COLUMNS))) anns_file.write("{}\n".format(",".join([str(v) for v in values]))) # Build the sheet. p = Project(path_proj_conf_file) sheet = p.build_sheet() # It should be a single-row DataFrame. assert isinstance(sheet, pd.DataFrame) assert 1 == len(sheet) assert 1 == p.num_samples # There will be additional values added from the Project, # but the core data values will have remained the same. sample = list(p.samples)[0] for attr, exp_val in zip(COLUMNS, values): obs_val = getattr(sample, attr) try: assert exp_val == obs_val except AssertionError as e: try: assert exp_val == int(obs_val) except AssertionError: raise e
def test_no_samples(self, protocols, delimiter, path_empty_project): """ Lack of Samples is unproblematic for the sheet build. """ # Regardless of protocol(s), the sheet should be empty. print("Test config file: {}".format(path_empty_project)) p = Project(path_empty_project) sheet = p.build_sheet(*protocols) assert sheet.empty
def test_subproject_activation_updates_sample_annotations_path(conf_file): """ Subproject's sample annotation file pointer replaces original. """ with mock.patch(SHEET_PARSE_FUNCPATH): p = Project(conf_file) p.activate_subproject(_SP_NAME) _, anns_file = os.path.split(p[METADATA_KEY][NAME_TABLE_ATTR]) assert _CHILD_ANNS == anns_file
def test_multiple_samples(self, protocols, path_anns_file, path_proj_conf_file): """ Project also processes multiple Sample fine. """ p = Project(path_proj_conf_file) # Total sample count is constant. assert len(SAMPLE_NAMES) == sum(1 for _ in p.samples) # But the sheet permits filtering to specific protocol(s). exp_num_samples = len(SAMPLE_NAMES) if not protocols else \ sum(sum(1 for p2 in PROTOCOLS if p2 == p1) for p1 in protocols) sheet = p.build_sheet(*protocols) assert exp_num_samples == len(sheet) if protocols: def as_expected(sd): return sd.protocol in set(protocols) else: def as_expected(sd): return sd.protocol not in set(protocols) for _, sample_data in sheet.iterrows(): assert as_expected(sample_data)
def test_relative_path_metadata_stasis(self, conf_file, eq_attr): """ Key metadata paths are preserved with subproject that doesn't alter them. """ with mock.patch(SHEET_PARSE_FUNCPATH): p = Project(conf_file) main_path = getattr(p, eq_attr) with mock.patch(SHEET_PARSE_FUNCPATH): sub = p.activate_subproject(_SP_NAME) assert main_path == getattr(sub, eq_attr)
def test_amendments_activation_interactive(self, example_pep_cfg_path): """ Verify that the amendment can be activated interactively """ p = Project(cfg=example_pep_cfg_path) p.activate_amendments("newLib") assert all([s["protocol"] == "ABCD" for s in p.samples]) assert p.amendments is not None
def test_str_repr_correctness(self, example_pep_cfg_path, defer): """ Verify string representation correctness """ p = Project(cfg=example_pep_cfg_path, defer_samples_creation=defer) str_repr = p.__str__() assert example_pep_cfg_path in str_repr assert "{} samples".format(str(len(p.samples))) in str_repr assert p.name in str_repr
def _get_pair_to_post_init_test(cfg_path): """ :param cfg_path: path to the project config file :type cfg_path: str :return: list of two project objects to compare :rtype: list[peppy.Project] """ p = Project(cfg=cfg_path) pd = Project(cfg=cfg_path, defer_samples_creation=True) pd.create_samples() return [p, pd]
def test_derive(self, example_pep_cfg_path): """ Verify that the declared attr derivation happened """ p = Project(cfg=example_pep_cfg_path) assert all(["file_path" in s for s in p.samples]) assert all(["file_path" in s["_derived_cols_done"] for s in p.samples])
def test_old_encodings( delimiter, tmpdir, main_table_file, subann_table_file, anns_key, subs_key): """ Current and previous encoding of tables works, deprecated appropriately. """ # Data setup anns_data, subs_data = LINES_BY_DELIM[delimiter] anns_file = _write(main_table_file, anns_data) subs_file = _write(subann_table_file, subs_data) conf_file = tmpdir.join("conf.yaml").strpath conf_data = { METADATA_KEY: { anns_key: anns_file, subs_key: subs_file, OUTDIR_KEY: tmpdir.strpath } } # Project creation with open(conf_file, 'w') as cfg: yaml.dump(conf_data, cfg) prj = Project(conf_file) # Behavioral validation/assertions with pytest.warns(DeprecationWarning): anns1 = getattr(prj, anns_key) with pytest.warns(DeprecationWarning): anns2 = getattr(prj, anns_key) with pytest.warns(DeprecationWarning): subs1 = getattr(prj, subs_key) with pytest.warns(DeprecationWarning): subs2 = getattr(prj, subs_key) # Validation that we didn't just get back garbage value(s) assert anns1.equals(anns2) assert subs1.equals(subs2)
def test_minimal_configuration_name_inference(self, tmpdir, minimal_project_conf_path, env_config_filepath): """ Project infers name from where its configuration lives. """ project = Project(minimal_project_conf_path) _, expected_name = os.path.split(tmpdir.strpath) assert expected_name == project.name
def test_missing_sample_name(self, example_pep_cfg_noname_path): """ Verify that if sample_name column is missing in the sample table an error is issued """ with pytest.raises(InvalidSampleTableFileException): Project(cfg=example_pep_cfg_noname_path)
def test_sheet_dict_excludes_private_attrs(self, example_pep_cfg_path): """ Verify that sheet dict includes only original Sample attributes """ p = Project(cfg=example_pep_cfg_path) for sample in p.samples: assert len(sample.get_sheet_dict()) == len(p.sample_table.columns)
def project(request, tmpdir, env_config_filepath): """ Provide requesting test case with a basic Project instance. """ # Write just the sample names as the annotations. annotations_filename = "anns-fill.tsv" anns_path = tmpdir.join(annotations_filename).strpath num_samples = request.getfixturevalue("num_samples") df = pd.DataFrame( OrderedDict([(SAMPLE_NAME_COLNAME, ["sample{}".format(i) for i in range(num_samples)]), ("data", range(num_samples))])) with open(anns_path, 'w') as anns_file: df.to_csv(anns_file, sep="\t", index=False) # Create the Project config data. config_data = {METADATA_KEY: {NAME_TABLE_ATTR: annotations_filename}} if request.getfixturevalue(request.cls.CONFIG_DATA_PATHS_HOOK): config_data["paths"] = {} paths_dest = config_data["paths"] else: paths_dest = config_data[METADATA_KEY] # Add the paths data to the Project config. for path_name, path in PATH_BY_TYPE.items(): paths_dest[path_name] = path \ if path_name in [RESULTS_FOLDER_KEY, SUBMISSION_FOLDER_KEY] \ else os.path.join(tmpdir.strpath, path) # Write the Project config file. conf_path = tmpdir.join("proj-conf.yaml").strpath with open(conf_path, 'w') as conf_file: yaml.safe_dump(config_data, conf_file) return Project(conf_path)
def test_missing_sample_name_derive(self, example_pep_cfg_noname_path): """ Verify that even if sample_name column is missing in the sample table, it can be derived and no error is issued """ p = Project(cfg=example_pep_cfg_noname_path) assert SAMPLE_NAME_ATTR in p.sample_table.columns
def test_suggests_implied_attributes(self, recwarn, tmpdir, path_sample_anns, project_config_data, ideally_implied_mappings): """ Assemblies directly in proj conf (not implied) is deprecated. """ # Add the mappings parameterization to the config data. conf_data = copy.deepcopy(project_config_data) conf_data.update(ideally_implied_mappings) # Write the config file. conf_file = tmpdir.join("proj_conf.yaml").strpath assert not os.path.isfile(conf_file), \ "Test project temp config file already exists: {}".format(conf_file) with open(conf_file, 'w') as cf: yaml.safe_dump(conf_data, cf) # (Hopefully) generate the warnings. assert 0 == len(recwarn) # Ensure a fresh start. warnings.simplefilter('always') # Allow DeprecationWarning capture. Project(conf_file) # Generate the warning(s). msgs = [ str(w.message) for w in recwarn # Grab deprecation messages. if isinstance(w.message, DeprecationWarning) ] assert len(ideally_implied_mappings) == len(msgs) # 1:1 warnings for k in ideally_implied_mappings: # Each section that should be implied should generate exactly 1 # warning; check message for content then remove it from the pool. matched = [ m for m in msgs if k in m and IMPLICATIONS_DECLARATION in m ] assert 1 == len(matched) msgs.remove(matched[0])
def test_no_sample_subannotation_in_config(self, tmpdir, spec_type, lazy, proj_conf_data, path_sample_anns): """ Subannotation attribute remains null if config lacks subannotation. """ metadata = proj_conf_data[METADATA_KEY] try: assert SAMPLE_SUBANNOTATIONS_KEY in metadata except AssertionError: print("Project metadata section lacks '{}'".format( SAMPLE_SUBANNOTATIONS_KEY)) print("All config data: {}".format(proj_conf_data)) print("Config metadata section: {}".format(metadata)) raise if spec_type == "as_null": metadata[SAMPLE_SUBANNOTATIONS_KEY] = None elif spec_type == "missing": del metadata[SAMPLE_SUBANNOTATIONS_KEY] else: raise ValueError( "Unknown way to specify no merge table: {}".format(spec_type)) path_config_file = os.path.join(tmpdir.strpath, "project_config.yaml") with open(path_config_file, 'w') as conf_file: yaml.safe_dump(proj_conf_data, conf_file) p = Project(path_config_file, defer_sample_construction=lazy) assert getattr(p, SAMPLE_SUBANNOTATIONS_KEY) is None
def test_sample_name_availability(self, path_project_conf, path_sample_anns, lazy): """ Sample names always available on Project. """ with open(path_sample_anns, 'r') as anns_file: expected_sample_names = \ [l.split(",")[0] for l in anns_file.readlines()[1:] if l] p = Project(path_project_conf, defer_sample_construction=lazy) assert expected_sample_names == list(p.sample_names)
def test_missing_amendment_raises_error(self, example_pep_cfg_path, defer): """ Verify that the missing amendment request raises correct exception """ with pytest.raises(MissingAmendmentError): Project(cfg=example_pep_cfg_path, amendments="nieznany", defer_samples_creation=defer)
def test_sample_updates_regenerate_df(self, example_pep_cfg_path): """ Verify that Sample modifications cause sample_table regeneration """ p = Project(cfg=example_pep_cfg_path) s_ori = p.sample_table p.samples[0].update({"witam": "i_o_zdrowie_pytam"}) assert not p.sample_table.equals(s_ori)
def test_amendments(self, example_pep_cfg_path, defer): """ Verify that the amendment is activate at object instantiation """ p = Project(cfg=example_pep_cfg_path, amendments="newLib", defer_samples_creation=defer) assert all([s["protocol"] == "ABCD" for s in p.samples])
def test_old_format_support(self, example_pep_cfg_path, defer): """ Verify that old format (without implications and subprojects) is still supported """ os.environ["DATA"] = "data" p = Project(cfg=example_pep_cfg_path, defer_samples_creation=defer) assert all(["read1" in s for s in p.samples])
def test_subsample_table_works_when_no_sample_mods(self, example_pep_cfg_path): """ Verify that subsample table functionality is not dependant on sample modifiers """ p = Project(cfg=example_pep_cfg_path) assert any([s["file"] != "multi" for s in p.samples])
def test_missing_sample_name_custom_index(self, example_pep_cfg_noname_path): """ Verify that if sample_name column is missing in the sample table an error is not issued if a custom sample_table index is set """ p = Project(cfg=example_pep_cfg_noname_path, sample_table_index="id") assert p.sample_name_colname == "id"
def test_no_description(self, example_pep_cfg_path, defer): """ Verify that Project object is successfully created when no description is specified in the config """ p = Project(cfg=example_pep_cfg_path, defer_samples_creation=defer) assert isinstance(p, Project) assert "description" in p and p.description is None
def _make_flags(cfg, type, count): p = Project(cfg) out_dir = p[CONFIG_KEY][LOOPER_KEY][OUTDIR_KEY] for s in p.samples[:count]: sf = os.path.join(out_dir, "results_pipeline", s[SAMPLE_NAME_ATTR]) if not os.path.exists(sf): os.makedirs(sf) open(os.path.join(sf, type + ".flag"), 'a').close()
def test_pickle_roundtrip(self, minimal_project_conf_path): """ Test whether pickle roundtrip produces a comparable object """ prj = Project(minimal_project_conf_path) _buffer = tempfile.TemporaryFile() pickle.dump(prj, _buffer) _buffer.seek(0) new_prj = pickle.load(_buffer) assert prj == new_prj
def test_samples_are_generic(path_anns_file, path_proj_conf_file): """ Regardless of protocol, Samples for sheet are generic. """ # Annotations filepath fixture is also writes that file, so # it's needed even though that return value isn't used locally. p = Project(path_proj_conf_file) assert len(SAMPLE_NAMES) == p.num_samples samples = list(p.samples) assert p.num_samples == len(samples) assert all([Sample is type(s) for s in samples])
def test_str_repr_correctness(self, example_pep_cfg_path): """ Verify that the missing amendment request raises correct exception """ p = Project(cfg=example_pep_cfg_path) for sample in p.samples: str_repr = sample.__str__(max_attr=100) assert example_pep_cfg_path in str_repr assert "Sample '{}'".format(sample.sample_name) in str_repr
def prj(self, tmpdir, prj_data, sample_lines): conf = tmpdir.join(randomize_filename()).strpath anns = tmpdir.join(self._ANNS_NAME).strpath with open(conf, 'w') as f: yaml.dump(prj_data, f) self._write_lines(anns, sample_lines) assert os.path.isfile(conf), "Missing proj conf: {}".format(conf) assert os.path.isfile(anns), "Missing annotations: {}".format(anns) return Project(conf)
def test_multiple_samples( self, protocols, path_anns_file, path_proj_conf_file): """ Project also processes multiple Sample fine. """ p = Project(path_proj_conf_file) # Total sample count is constant. assert len(SAMPLE_NAMES) == sum(1 for _ in p.samples) # But the sheet permits filtering to specific protocol(s). exp_num_samples = len(SAMPLE_NAMES) if not protocols else \ sum(sum(1 for p2 in PROTOCOLS if p2 == p1) for p1 in protocols) sheet = p.build_sheet(*protocols) assert exp_num_samples == len(sheet) if protocols: def as_expected(sd): return sd.protocol in set(protocols) else: def as_expected(sd): return sd.protocol not in set(protocols) for _, sample_data in sheet.iterrows(): assert as_expected(sample_data)
def test_no_samples(self, protocols, delimiter, path_empty_project): """ Lack of Samples is unproblematic for the sheet build. """ # Regardless of protocol(s), the sheet should be empty. p = Project(path_empty_project) sheet = p.build_sheet(*protocols) assert sheet.empty