Exemple #1
0
    def test_annotate_pheno_with_no_pheno(self):
        expected_df = DataFrame()
        expected_df['SAMPLE'] = ["A", "B", "C"]

        exprs_data = parse_gct("test_data/ds/dummy/small.gct")
        annotated_pheno = annotate_ds_pheno_data(exprs_data)
        assert_frame_equal(annotated_pheno, expected_df)
Exemple #2
0
    def test_annotate_pheno_mismatching_counts(self):
        pheno_df = DataFrame()
        pheno_df['CellType'] = ["Microglia", "Macrophage"]
        pheno_df['Donor'] = ["Donor A", "Donor A"]

        exprs_data = parse_gct("test_data/ds/dummy/small.gct")
        with pytest.raises(ROGERUsageError):
            annotate_ds_pheno_data(exprs_data, pheno_df)
Exemple #3
0
    def test_check_matrix_fail_on_noh_integer_data(self):
        exprs_data = parse_gct("test_data/ds/dummy/small.gct")

        design_matrix = DataFrame(index=["A", "B", "C"])
        design_matrix['Group1'] = [1, 1, 1]
        design_matrix['Group2'] = [0, "A", 0]
        design_matrix['Group3'] = [0, 0, 1]

        with pytest.raises(ROGERUsageError):
            check_design_matrix(exprs_data.columns, design_matrix)
Exemple #4
0
    def test_check_matrix_fail_on_row_count_mismatch(self):
        exprs_data = parse_gct("test_data/ds/dummy/small.gct")

        design_matrix = DataFrame()
        design_matrix['Group1'] = [1, 1]
        design_matrix['Group2'] = [0, 1]
        design_matrix['Group3'] = [0, 0]

        with pytest.raises(ROGERUsageError):
            check_design_matrix(exprs_data.columns, design_matrix)
Exemple #5
0
    def test_annotate_pheno_witn_simple_pheno(self):
        pheno_df = DataFrame()
        pheno_df['CellType'] = ["Microglia", "Macrophage", "Macrophage"]
        pheno_df['Donor'] = ["Donor A", "Donor A", "Donor A"]

        expected_df = DataFrame()
        expected_df['SAMPLE'] = ["A", "B", "C"]
        expected_df['CellType'] = ["Microglia", "Macrophage", "Macrophage"]
        expected_df['Donor'] = ["Donor A", "Donor A", "Donor A"]

        exprs_data = parse_gct("test_data/ds/dummy/small.gct")
        annotated_pheno = annotate_ds_pheno_data(exprs_data, pheno_df)
        assert_frame_equal(annotated_pheno, expected_df)
Exemple #6
0
    def test_check_matrix(self):
        exprs_data = parse_gct("test_data/ds/dummy/small.gct")

        design_matrix = DataFrame()
        design_matrix['Group1'] = [1, 1, 1]
        design_matrix['Group2'] = [0, 1, 0]
        design_matrix['Group3'] = [0, 0, 1]

        check_design_matrix(exprs_data.columns, design_matrix)

        design_matrix = DataFrame(index=["A", "B", "C"])
        design_matrix['Group1'] = [1, 1, 1]
        design_matrix['Group2'] = [0, 1, 0]
        design_matrix['Group3'] = [0, 0, 1]

        check_design_matrix(exprs_data.columns, design_matrix)
Exemple #7
0
    def test_annotate_entrezgene(self, sqlite_in_memory):
        session = sqlite_in_memory.session()
        roger.persistence.geneanno.add_species(
            session, roger.persistence.geneanno.human_dataset,
            roger.persistence.geneanno.human_tax_id)

        gct_data = parse_gct(
            file_path="test_data/ds/rnaseq-example-readCounts.gct")
        (feature_data, annotation_version) = roger.logic.geneanno.annotate(
            session, gct_data, roger.persistence.geneanno.human_tax_id,
            "entrezgene")

        assert "Human genes" in annotation_version
        assert_frame_equal(
            read_df("test_data/ds/rnaseq-example-rogerFeatureAnno.txt"),
            feature_data)
Exemple #8
0
    def test_annotate_chip_data(self, sqlite_in_memory):
        session = sqlite_in_memory.session()
        roger.persistence.geneanno.add_species(
            session, roger.persistence.geneanno.human_dataset,
            roger.persistence.geneanno.human_tax_id)
        roger.persistence.geneanno.add_species(session, mouse_dataset,
                                               mouse_tax_id)

        gct_data = parse_gct(file_path="test_data/ds/ma-example-signals.gct")
        (feature_data, annotation_version) = roger.logic.geneanno.annotate(
            session, gct_data, mouse_tax_id, "affy_mouse430_2")
        assert "Mouse genes" in annotation_version

        assert_frame_equal(
            read_df("test_data/ds/ma-example-rogerFeatureAnno.txt"),
            feature_data)
Exemple #9
0
def create_ds(session,
              ds_type: Type[DataSet],
              exprs_file,
              tax_id,
              symbol_type,
              pheno_file=None,
              name=None,
              normalization_method=None,
              description=None,
              xref=None):
    name = get_or_guess_name(name, exprs_file)

    # Input checking
    species_list = list_species(session)
    if species_list[species_list.TaxID == tax_id].empty:
        raise ROGERUsageError('Unknown taxon id: %s' % tax_id)

    if session.query(DataSet).filter(
            DataSet.Name == name).one_or_none() is not None:
        raise ROGERUsageError("Data set with name '%s' already exists" % name)

    exprs_data = parse_gct(file_path=exprs_file)

    (annotation_data, annotation_version) = annotate(session, exprs_data,
                                                     tax_id, symbol_type)

    pheno_data = pd.DataFrame()
    if pheno_file is not None:
        pheno_data = read_df(pheno_file)

    annotated_pheno_data = annotate_ds_pheno_data(exprs_data, pheno_data)

    return DataSetProperties(ds_type, tax_id, exprs_file, pheno_file,
                             exprs_data, annotated_pheno_data, annotation_data,
                             annotation_version, name, normalization_method,
                             description, xref)