Python get_protein_id Examples

Programming Language: Python

Namespace/Package Name: deepbgc.util

Method/Function: get_protein_id

Examples at hotexamples.com: 3

Python get_protein_id - 3 examples found. These are the top rated real world Python examples of deepbgc.util.get_protein_id extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: pfam.py Project: youngDouble/DeepBGC

 def _write_proteins(self, proteins, protein_path):
     records = []
     for feature in proteins:
         translation = feature.extract(self.record.seq).translate()
         records.append(
             SeqRecord(translation,
                       util.get_protein_id(feature),
                       description=''))
     SeqIO.write(records, protein_path, 'fasta')

Example #2

Show file

File: pfam.py Project: chenhu77/deepbgc

 def _write_proteins(self, proteins, protein_path):
     records = []
     for feature in proteins:
         translation_str = feature.qualifiers.get('translation', [None])[0]
         if translation_str:
             translation = Seq(translation_str)
         else:
             translation = feature.extract(self.record.seq).translate()
         records.append(
             SeqRecord(translation,
                       util.get_protein_id(feature),
                       description=''))
     SeqIO.write(records, protein_path, 'fasta')

Example #3

Show file

File: test_integration_prepare.py Project: youngDouble/DeepBGC

def test_integration_prepare_default(tmpdir):
    tmpdir = str(tmpdir)
    outgbk = os.path.join(tmpdir, 'outfile.gbk')
    outtsv = os.path.join(tmpdir, 'outfile.tsv')
    run([
        'prepare', '--output-gbk', outgbk, '--output-tsv', outtsv,
        get_test_file('BGC0000015.fa')
    ])

    records = list(SeqIO.parse(outgbk, 'genbank'))

    assert len(records) == 2

    record = records[0]
    assert_sorted_features(record)
    proteins = util.get_protein_features(record)
    pfams = util.get_pfam_features(record)

    assert len(proteins) == 18
    print([util.get_protein_id(f) for f in proteins])
    assert len(pfams) == 111

    record = records[1]
    assert_sorted_features(record)
    proteins = util.get_protein_features(record)
    pfams = util.get_pfam_features(record)

    assert len(proteins) == 27
    assert len(pfams) == 36

    domains = pd.read_csv(outtsv, sep='\t')
    records = domains.groupby('sequence_id')

    assert len(records) == 2

    record = records.get_group('BGC0000015.1')
    print(record['protein_id'].unique())
    # some of the proteins do not have any Pfam domains so they are not present
    assert len(record['protein_id'].unique()) == 17
    assert len(record) == 111

    record = records.get_group('BGC0000015.2')
    # some of the proteins do not have any Pfam domains so they are not present
    assert len(record['protein_id'].unique()) == 11
    assert len(record) == 36