with open(args.input) as fp: for line in fp: gen.append(line.rstrip()) if args.remove: taxon_id = None else: taxon_id = add_new_taxonomy(server, args.new_taxons, args.taxid) for rec in gen: server.adaptor.execute('update bioentry set taxon_id = %s where bioentry_id = %s',(taxon_id, db.adaptor.fetch_seqid_by_display_id(db.dbid, rec))) server.commit() if __name__ == "__main__": parser = standard_options() parser.add_argument('-D', '--database-name', help='namespace of the database that you want to add into', dest='database_name', default=None) parser.add_argument('-f', '--fasta', help='fasta file to add into the database') parser.add_argument('-G', '--genbank', help='genbank file to add into the database') parser.add_argument('-i', '--input', help='file containing sequence names, one per line') parser.add_argument('--remove', action='store_true', default=False, help='remove the taxonomy ID from the sequences') parser.add_argument('-T', '--taxid', help='supply a ncbi taxonomy id that will be applied to all sequences in the file, or if new_taxons are supplied on the command line this taxonomy ID will be used as the parent taxonomy for the novel lineages', default=None) parser.add_argument('new_taxons', nargs="*", help='specify novel taxonomies not currenly in the NCBI database. each taxon specified on the command line should take the form of <taxon_name>:<taxon_rank>. Check the taxon table in the database for the appropriate values for the taxon_rank. e.g. ANME-2ab:family ANME-2b:genus ANME-hr1:species') args = parser.parse_args() if args.password is None: args.password = getpass("Please enter the password for user " + \ args.user + " on database " + args.database) main(args)
get_seqfeature_ids_for_bioseqs( server, [x[0] for x in dbids.keys()]), type=types) else: for (dbid, dbname), taxid in dbids.items(): db = server[dbname] try: dbrec = db[dbid] SeqIO.write(dbrec, sys.stdout, args.output_format) except KeyError: pass if __name__ == "__main__": parser = standard_options( description= "This script will extract from the database all of the sequences associated with a particular taxonomy. The input is either an NCBI taxonomy ID or the complete taxonomic name." ) parser.add_argument('-o', '--output_format', help='output format of the selected sequences', choices=['fasta', 'gb', 'feat-prot', 'feat-nucl'], default='fasta') parser.add_argument( 'taxid', help= 'supply a ncbi taxonomy id that will be extracted. If an integer is supplied it will be interpreted as an NCBI taxonomy id; otherwise it will be interpreted as part of a taxonomy name (e.g. Proteobacteria)', default=None) parser.add_argument( '-s', '--split_species', help=
seqfeature_ids = get_seqfeature_ids_from_qv(server, args.qualifier, args.value, args.database_name, fuzzy=args.fuzzy) if args.feature_type is not None: types = args.feature_type elif args.output_format == 'feat-prot': types = ['CDS'] elif args.output_format == 'feat-nucl': types = ['CDS', 'rRNA', 'tRNA'] if args.output_format == 'feat-prot': extract_feature_sql(server, seqfeature_ids, type=types, translate=True ) elif args.output_format == 'feat-nucl': extract_feature_sql(server, seqfeature_ids, type=types) if __name__ == "__main__": parser = standard_options() parser.add_argument('-D', '--database-name', help='limit the extracted sequences from this namespace', dest='database_name') parser.add_argument('-o', '--output_format', help='output format of the selected sequences', choices=['feat-prot', 'feat-nucl'], default='feat-prot') parser.add_argument('-t', '--feature-type', help='restrict the results to feature type e.g. rRNA, tRNA, CDS. This option can be specified multiple times for multiple types', default=None, action='append') parser.add_argument('-f', '--fuzzy', help='the value can be a partial match', default=False, action='store_true') parser.add_argument('qualifier', help='name of the qualifier', default=None) parser.add_argument('value', help='value to match on' ) args = parser.parse_args() if args.password is None: args.password = getpass("Please enter the password for user " + \ args.user + " on database " + args.database) main(args)
else: if args.output_format == 'feat-prot': extract_feature_sql(server, get_seqfeature_ids_for_bioseqs(server, [x[0] for x in dbids.keys()]),type=types, translate=True ) elif args.output_format == 'feat-nucl': extract_feature_sql(server, get_seqfeature_ids_for_bioseqs(server, [x[0] for x in dbids.keys()]), type=types) elif args.output_format == 'csv': print_feature_qv_csv(server, get_seqfeature_ids_for_bioseqs(server, [x[0] for x in dbids.keys()])) else: for (dbid, dbname), taxid in dbids.items(): db = server[dbname] try: dbrec = db[dbid] SeqIO.write(dbrec, sys.stdout, args.output_format) except KeyError: pass if __name__ == "__main__": parser = standard_options(description="This script will extract from the database all of the sequences associated with a particular taxonomy. The input is either an NCBI taxonomy ID or the complete taxonomic name.") parser.add_argument('-o', '--output_format', help='output format of the selected sequences. Choices: fasta - fasta file of the contigs; gb - genbank file of the sequences; feat-prot - fasta file containing the translated coding sequences; feat-nucl - fasta file containing the untranslated coding sequences, tRNAs and rRNAs; csv - csv file of annotations for the features', choices=['fasta', 'gb', 'feat-prot', 'feat-nucl', 'csv'], default='fasta') parser.add_argument('taxid', help='supply a ncbi taxonomy id that will be extracted. If an integer is supplied it will be interpreted as an NCBI taxonomy id; otherwise it will be interpreted as part of a taxonomy name (e.g. Proteobacteria)', default=None) parser.add_argument('-s', '--split_species', help='when there are multiple species to be returned, split them into separate files, based on their name, instead of printing to stdout', default=False, action='store_true') parser.add_argument('-t', '--feature-type', help='restrict the results to feature type e.g. rRNA, tRNA, CDS. This option can be specified multiple times for multiple types', default=None, action='append') args = parser.parse_args() if args.password is None: args.password = getpass("Please enter the password for user " + \ args.user + " on database " + args.database) main(args)