def _write_proteins(self, proteins, protein_path): records = [] for feature in proteins: translation = feature.extract(self.record.seq).translate() records.append( SeqRecord(translation, util.get_protein_id(feature), description='')) SeqIO.write(records, protein_path, 'fasta')
def _write_proteins(self, proteins, protein_path): records = [] for feature in proteins: translation_str = feature.qualifiers.get('translation', [None])[0] if translation_str: translation = Seq(translation_str) else: translation = feature.extract(self.record.seq).translate() records.append( SeqRecord(translation, util.get_protein_id(feature), description='')) SeqIO.write(records, protein_path, 'fasta')
def test_integration_prepare_default(tmpdir): tmpdir = str(tmpdir) outgbk = os.path.join(tmpdir, 'outfile.gbk') outtsv = os.path.join(tmpdir, 'outfile.tsv') run([ 'prepare', '--output-gbk', outgbk, '--output-tsv', outtsv, get_test_file('BGC0000015.fa') ]) records = list(SeqIO.parse(outgbk, 'genbank')) assert len(records) == 2 record = records[0] assert_sorted_features(record) proteins = util.get_protein_features(record) pfams = util.get_pfam_features(record) assert len(proteins) == 18 print([util.get_protein_id(f) for f in proteins]) assert len(pfams) == 111 record = records[1] assert_sorted_features(record) proteins = util.get_protein_features(record) pfams = util.get_pfam_features(record) assert len(proteins) == 27 assert len(pfams) == 36 domains = pd.read_csv(outtsv, sep='\t') records = domains.groupby('sequence_id') assert len(records) == 2 record = records.get_group('BGC0000015.1') print(record['protein_id'].unique()) # some of the proteins do not have any Pfam domains so they are not present assert len(record['protein_id'].unique()) == 17 assert len(record) == 111 record = records.get_group('BGC0000015.2') # some of the proteins do not have any Pfam domains so they are not present assert len(record['protein_id'].unique()) == 11 assert len(record) == 36