Example #1
0
 def _write_proteins(self, proteins, protein_path):
     records = []
     for feature in proteins:
         translation = feature.extract(self.record.seq).translate()
         records.append(
             SeqRecord(translation,
                       util.get_protein_id(feature),
                       description=''))
     SeqIO.write(records, protein_path, 'fasta')
Example #2
0
 def _write_proteins(self, proteins, protein_path):
     records = []
     for feature in proteins:
         translation_str = feature.qualifiers.get('translation', [None])[0]
         if translation_str:
             translation = Seq(translation_str)
         else:
             translation = feature.extract(self.record.seq).translate()
         records.append(
             SeqRecord(translation,
                       util.get_protein_id(feature),
                       description=''))
     SeqIO.write(records, protein_path, 'fasta')
def test_integration_prepare_default(tmpdir):
    tmpdir = str(tmpdir)
    outgbk = os.path.join(tmpdir, 'outfile.gbk')
    outtsv = os.path.join(tmpdir, 'outfile.tsv')
    run([
        'prepare', '--output-gbk', outgbk, '--output-tsv', outtsv,
        get_test_file('BGC0000015.fa')
    ])

    records = list(SeqIO.parse(outgbk, 'genbank'))

    assert len(records) == 2

    record = records[0]
    assert_sorted_features(record)
    proteins = util.get_protein_features(record)
    pfams = util.get_pfam_features(record)

    assert len(proteins) == 18
    print([util.get_protein_id(f) for f in proteins])
    assert len(pfams) == 111

    record = records[1]
    assert_sorted_features(record)
    proteins = util.get_protein_features(record)
    pfams = util.get_pfam_features(record)

    assert len(proteins) == 27
    assert len(pfams) == 36

    domains = pd.read_csv(outtsv, sep='\t')
    records = domains.groupby('sequence_id')

    assert len(records) == 2

    record = records.get_group('BGC0000015.1')
    print(record['protein_id'].unique())
    # some of the proteins do not have any Pfam domains so they are not present
    assert len(record['protein_id'].unique()) == 17
    assert len(record) == 111

    record = records.get_group('BGC0000015.2')
    # some of the proteins do not have any Pfam domains so they are not present
    assert len(record['protein_id'].unique()) == 11
    assert len(record) == 36