예제 #1
0
 def it_raises_if_both_seq_and_uniprot_ac():
     csv_string = """
     Name, Seq, UniprotAC
     P1, A, P100
     """
     with zest.raises(ValueError) as e:
         helpers.protein_csv_df(csv_string)
     assert "both a Seq and a UniprotAC" in str(e.exception)
예제 #2
0
 def it_raises_if_no_name_and_no_uniprot_ac():
     csv_string = """
     Seq, Abundance
     ABC, 10
     """
     with zest.raises(ValueError) as e:
         helpers.protein_csv_df(csv_string)
     assert "missing a Name column" in str(e.exception)
예제 #3
0
 def it_raises_if_no_seq_nor_uniprot_ac():
     csv_string = """
     Name, Abundance
     A, 10
     """
     with zest.raises(ValueError) as e:
         helpers.protein_csv_df(csv_string)
     assert "missing either a Seq or a UniprotAC" in str(e.exception)
예제 #4
0
 def it_raises_on_duplicate_seqs():
     csv_string = """
     Name, Seq
     P1, ABC
     P2, ABC
     """
     with zest.raises(ValueError) as e:
         helpers.protein_csv_df(csv_string)
     assert "duplicate seqs" in str(e.exception)
예제 #5
0
 def it_warns_on_no_seq_from_uniprot():
     csv_string = """
     UniprotAC
     P1
     """
     with zest.mock(helpers._protein_csv_warning) as m_warn:
         with zest.mock(helpers._uniprot_lookup) as m_lookup:
             m_lookup.returns([])
             helpers.protein_csv_df(csv_string)
     assert m_warn.called_once()
예제 #6
0
 def it_sorts_by_name_if_no_abundance():
     csv_string = """
     Name, Seq
     P2, DEF
     P1, ABC
     """
     df = helpers.protein_csv_df(csv_string)
     assert df.loc[0, "seqstr"] == "ABC" and df.loc[0, "id"] == "P1"
     assert df.loc[1, "seqstr"] == "DEF" and df.loc[1, "id"] == "P2"
예제 #7
0
 def it_nans_missing_abundances():
     csv_string = """
     UniprotAC
     P1
     """
     with zest.mock(helpers._uniprot_lookup) as m:
         m.returns([{"id:": "foo", "seqstr": "ABC"}])
         df = helpers.protein_csv_df(csv_string)
     assert (df.loc[0, "id"] == "P1" and df.loc[0, "seqstr"] == "ABC"
             and np.isnan(df.loc[0, "abundance"]))
예제 #8
0
 def it_imports_ptm():
     csv_string = """
     Name, Seq, PTM
     P1, ABC, 3
     P2, DEF, 1;2
     P3, GHI, 
     """
     df = helpers.protein_csv_df(csv_string)
     assert df.loc[0, "ptm_locs"] == "3"
     assert df.loc[1, "ptm_locs"] == "1;2"
     assert df.loc[2, "ptm_locs"] == ""
예제 #9
0
 def it_reverse_sorts_by_abundance():
     csv_string = """
     Name, Seq, Abundance
     P1, ABC, 10
     P2, DEF, 100
     """
     df = helpers.protein_csv_df(csv_string)
     assert (df.loc[0, "seqstr"] == "DEF" and df.loc[0, "id"] == "P2"
             and df.loc[0, "abundance"] == 100.0)
     assert (df.loc[1, "seqstr"] == "ABC" and df.loc[1, "id"] == "P1"
             and df.loc[1, "abundance"] == 10.0)
예제 #10
0
 def protein_csv(self, sources):
     """
     Include protein(s) in csv format (Must have a header row with 'Name', 'Seq' and optional
     'Abundance', 'UniprotAC', and 'PTM' columns.). May be a local File-path or URL.
     """
     for source in sources:
         source = helpers.cache_source(
             self.cache_folder, source, self.local_sources_tmp_folder
         )
         proteins_df = helpers.protein_csv_df(source)
         self.derived_vals.protein += proteins_df.to_dict("records")
예제 #11
0
파일: gen_main.py 프로젝트: erisyon/plaster
 def protein_csv(self, sources):
     """
     Include protein(s) in csv format (Must have a header row with 'Name', 'Seq' and optional
     'Abundance', 'UniprotAC', and 'PTM' columns.). May be a local File-path or URL.
     """
     for source in sources:
         source = helpers.cache_source(self.cache_folder, source,
                                       self.local_sources_tmp_folder)
         proteins_df = helpers.protein_csv_df(source)
         # Handy hack when you need to save the sequences
         # This needs to become its own tool at some point
         # proteins_df.to_csv("_pro_df.csv")
         self.derived_vals.protein += proteins_df.to_dict("records")
예제 #12
0
    def it_accepts_name_and_sequence():
        csv_string = """
        Name, Seq
        P1, ABC
        P2, DEF
        """
        df = helpers.protein_csv_df(csv_string)
        assert df.loc[0, "seqstr"] == "ABC" and df.loc[0, "id"] == "P1"
        assert df.loc[1, "seqstr"] == "DEF" and df.loc[1, "id"] == "P2"

        def it_defaults_ptm_locs_to_empty_str():
            assert df.loc[0, "ptm_locs"] == ""

        def it_defaults_abundance_to_nan():
            assert np.isnan(df.loc[0, "abundance"])

        zest()
예제 #13
0
 def it_warns_on_more_than_one_seq_from_uniprot():
     csv_string = """
     UniprotAC
     P1
     """
     with zest.mock(helpers._protein_csv_warning) as m_warn:
         with zest.mock(helpers._uniprot_lookup) as m_lookup:
             m_lookup.returns([{
                 "id": "foo",
                 "seqstr": "123"
             }, {
                 "id": "bar",
                 "seqstr": "123456"
             }])
             df = helpers.protein_csv_df(csv_string)
             assert len(df) == 1 and df.loc[0, "seqstr"] == "123456"
     assert m_warn.called_once()
예제 #14
0
    def it_lookups_uniprot():
        csv_string = """
        UniprotAC, Abundance
        P1, 10
        """
        with zest.mock(helpers._uniprot_lookup) as m:
            m.returns([{"id:": "foo", "seqstr": "ABC"}])
            df = helpers.protein_csv_df(csv_string)
        assert df.loc[0, "seqstr"] == "ABC"

        def it_uses_uniprot_ac_as_name():
            assert df.loc[0, "id"] == "P1"

        def it_imports_abundance():
            assert df.loc[0, "abundance"] == 10.0

        zest()