def test_annotate_pheno_with_no_pheno(self): expected_df = DataFrame() expected_df['SAMPLE'] = ["A", "B", "C"] exprs_data = parse_gct("test_data/ds/dummy/small.gct") annotated_pheno = annotate_ds_pheno_data(exprs_data) assert_frame_equal(annotated_pheno, expected_df)
def test_annotate_pheno_mismatching_counts(self): pheno_df = DataFrame() pheno_df['CellType'] = ["Microglia", "Macrophage"] pheno_df['Donor'] = ["Donor A", "Donor A"] exprs_data = parse_gct("test_data/ds/dummy/small.gct") with pytest.raises(ROGERUsageError): annotate_ds_pheno_data(exprs_data, pheno_df)
def test_check_matrix_fail_on_noh_integer_data(self): exprs_data = parse_gct("test_data/ds/dummy/small.gct") design_matrix = DataFrame(index=["A", "B", "C"]) design_matrix['Group1'] = [1, 1, 1] design_matrix['Group2'] = [0, "A", 0] design_matrix['Group3'] = [0, 0, 1] with pytest.raises(ROGERUsageError): check_design_matrix(exprs_data.columns, design_matrix)
def test_check_matrix_fail_on_row_count_mismatch(self): exprs_data = parse_gct("test_data/ds/dummy/small.gct") design_matrix = DataFrame() design_matrix['Group1'] = [1, 1] design_matrix['Group2'] = [0, 1] design_matrix['Group3'] = [0, 0] with pytest.raises(ROGERUsageError): check_design_matrix(exprs_data.columns, design_matrix)
def test_annotate_pheno_witn_simple_pheno(self): pheno_df = DataFrame() pheno_df['CellType'] = ["Microglia", "Macrophage", "Macrophage"] pheno_df['Donor'] = ["Donor A", "Donor A", "Donor A"] expected_df = DataFrame() expected_df['SAMPLE'] = ["A", "B", "C"] expected_df['CellType'] = ["Microglia", "Macrophage", "Macrophage"] expected_df['Donor'] = ["Donor A", "Donor A", "Donor A"] exprs_data = parse_gct("test_data/ds/dummy/small.gct") annotated_pheno = annotate_ds_pheno_data(exprs_data, pheno_df) assert_frame_equal(annotated_pheno, expected_df)
def test_check_matrix(self): exprs_data = parse_gct("test_data/ds/dummy/small.gct") design_matrix = DataFrame() design_matrix['Group1'] = [1, 1, 1] design_matrix['Group2'] = [0, 1, 0] design_matrix['Group3'] = [0, 0, 1] check_design_matrix(exprs_data.columns, design_matrix) design_matrix = DataFrame(index=["A", "B", "C"]) design_matrix['Group1'] = [1, 1, 1] design_matrix['Group2'] = [0, 1, 0] design_matrix['Group3'] = [0, 0, 1] check_design_matrix(exprs_data.columns, design_matrix)
def test_annotate_entrezgene(self, sqlite_in_memory): session = sqlite_in_memory.session() roger.persistence.geneanno.add_species( session, roger.persistence.geneanno.human_dataset, roger.persistence.geneanno.human_tax_id) gct_data = parse_gct( file_path="test_data/ds/rnaseq-example-readCounts.gct") (feature_data, annotation_version) = roger.logic.geneanno.annotate( session, gct_data, roger.persistence.geneanno.human_tax_id, "entrezgene") assert "Human genes" in annotation_version assert_frame_equal( read_df("test_data/ds/rnaseq-example-rogerFeatureAnno.txt"), feature_data)
def test_annotate_chip_data(self, sqlite_in_memory): session = sqlite_in_memory.session() roger.persistence.geneanno.add_species( session, roger.persistence.geneanno.human_dataset, roger.persistence.geneanno.human_tax_id) roger.persistence.geneanno.add_species(session, mouse_dataset, mouse_tax_id) gct_data = parse_gct(file_path="test_data/ds/ma-example-signals.gct") (feature_data, annotation_version) = roger.logic.geneanno.annotate( session, gct_data, mouse_tax_id, "affy_mouse430_2") assert "Mouse genes" in annotation_version assert_frame_equal( read_df("test_data/ds/ma-example-rogerFeatureAnno.txt"), feature_data)
def create_ds(session, ds_type: Type[DataSet], exprs_file, tax_id, symbol_type, pheno_file=None, name=None, normalization_method=None, description=None, xref=None): name = get_or_guess_name(name, exprs_file) # Input checking species_list = list_species(session) if species_list[species_list.TaxID == tax_id].empty: raise ROGERUsageError('Unknown taxon id: %s' % tax_id) if session.query(DataSet).filter( DataSet.Name == name).one_or_none() is not None: raise ROGERUsageError("Data set with name '%s' already exists" % name) exprs_data = parse_gct(file_path=exprs_file) (annotation_data, annotation_version) = annotate(session, exprs_data, tax_id, symbol_type) pheno_data = pd.DataFrame() if pheno_file is not None: pheno_data = read_df(pheno_file) annotated_pheno_data = annotate_ds_pheno_data(exprs_data, pheno_data) return DataSetProperties(ds_type, tax_id, exprs_file, pheno_file, exprs_data, annotated_pheno_data, annotation_data, annotation_version, name, normalization_method, description, xref)