コード例 #1
0
        with open(args.input) as fp:
            for line in fp:
                gen.append(line.rstrip())

    if args.remove:
        taxon_id = None
    else:
        taxon_id = add_new_taxonomy(server, args.new_taxons, args.taxid)

    for rec in gen:
        server.adaptor.execute('update bioentry set taxon_id = %s where bioentry_id = %s',(taxon_id, db.adaptor.fetch_seqid_by_display_id(db.dbid, rec)))
    server.commit()


if __name__ == "__main__":
    parser = standard_options()
    parser.add_argument('-D', '--database-name', help='namespace of the database that you want to add into', dest='database_name', default=None)
    parser.add_argument('-f', '--fasta', help='fasta file to add into the database')
    parser.add_argument('-G', '--genbank', help='genbank file to add into the database')
    parser.add_argument('-i', '--input', help='file containing sequence names, one per line')
    parser.add_argument('--remove', action='store_true', default=False, help='remove the taxonomy ID from the sequences')
    parser.add_argument('-T', '--taxid', help='supply a ncbi taxonomy id that will be applied to all sequences in the file, or if new_taxons are supplied on the command line this taxonomy ID will be used as the parent taxonomy for the novel lineages', default=None)

    parser.add_argument('new_taxons', nargs="*", help='specify novel taxonomies not currenly in the NCBI database. each taxon specified on the command line should take the form of <taxon_name>:<taxon_rank>. Check the taxon table in the database for the appropriate values for the taxon_rank. e.g. ANME-2ab:family ANME-2b:genus ANME-hr1:species')
    args = parser.parse_args()
    if args.password is None:
        args.password = getpass("Please enter the password for user " + \
                args.user + " on database " + args.database)

    main(args)
コード例 #2
0
                                get_seqfeature_ids_for_bioseqs(
                                    server, [x[0] for x in dbids.keys()]),
                                type=types)
        else:
            for (dbid, dbname), taxid in dbids.items():
                db = server[dbname]
                try:
                    dbrec = db[dbid]
                    SeqIO.write(dbrec, sys.stdout, args.output_format)
                except KeyError:
                    pass


if __name__ == "__main__":
    parser = standard_options(
        description=
        "This script will extract from the database all of the sequences associated with a particular taxonomy. The input is either an NCBI taxonomy ID or the complete taxonomic name."
    )
    parser.add_argument('-o',
                        '--output_format',
                        help='output format of the selected sequences',
                        choices=['fasta', 'gb', 'feat-prot', 'feat-nucl'],
                        default='fasta')
    parser.add_argument(
        'taxid',
        help=
        'supply a ncbi taxonomy id that will be extracted. If an integer is supplied it will be interpreted as an NCBI taxonomy id; otherwise it will be interpreted as part of a taxonomy name (e.g. Proteobacteria)',
        default=None)
    parser.add_argument(
        '-s',
        '--split_species',
        help=
コード例 #3
0
    seqfeature_ids = get_seqfeature_ids_from_qv(server, args.qualifier, args.value, args.database_name, fuzzy=args.fuzzy)

    if args.feature_type is not None:
        types = args.feature_type
    elif args.output_format == 'feat-prot':
        types = ['CDS']
    elif args.output_format == 'feat-nucl':
        types = ['CDS', 'rRNA', 'tRNA']

    if args.output_format == 'feat-prot':
        extract_feature_sql(server, seqfeature_ids, type=types, translate=True )
    elif args.output_format == 'feat-nucl':
        extract_feature_sql(server, seqfeature_ids, type=types)


if __name__ == "__main__":
    parser = standard_options()
    parser.add_argument('-D', '--database-name', help='limit the extracted sequences from this namespace', dest='database_name')
    parser.add_argument('-o', '--output_format', help='output format of the selected sequences', choices=['feat-prot', 'feat-nucl'], default='feat-prot')
    parser.add_argument('-t', '--feature-type', help='restrict the results to feature type e.g. rRNA, tRNA, CDS. This option can be specified multiple times for multiple types', default=None, action='append')
    parser.add_argument('-f', '--fuzzy', help='the value can be a partial match', default=False, action='store_true')
    parser.add_argument('qualifier', help='name of the qualifier', default=None)
    parser.add_argument('value', help='value to match on' )
    args = parser.parse_args()
    if args.password is None:
        args.password = getpass("Please enter the password for user " + \
                args.user + " on database " + args.database)
    main(args)

    else:
        if args.output_format == 'feat-prot':
            extract_feature_sql(server, get_seqfeature_ids_for_bioseqs(server, [x[0] for x in dbids.keys()]),type=types, translate=True )
        elif args.output_format == 'feat-nucl':
            extract_feature_sql(server, get_seqfeature_ids_for_bioseqs(server, [x[0] for x in dbids.keys()]), type=types)
        elif args.output_format == 'csv':
            print_feature_qv_csv(server, get_seqfeature_ids_for_bioseqs(server, [x[0] for x in dbids.keys()]))
        else:
            for (dbid, dbname), taxid in dbids.items():
                db = server[dbname]
                try:
                    dbrec = db[dbid]
                    SeqIO.write(dbrec, sys.stdout, args.output_format)
                except KeyError:
                    pass


if __name__ == "__main__":
    parser = standard_options(description="This script will extract from the database all of the sequences associated with a particular taxonomy. The input is either an NCBI taxonomy ID or the complete taxonomic name.")
    parser.add_argument('-o', '--output_format', help='output format of the selected sequences. Choices: fasta - fasta file of the contigs; gb - genbank file of the sequences; feat-prot - fasta file containing the translated coding sequences; feat-nucl - fasta file containing the untranslated coding sequences, tRNAs and rRNAs; csv - csv file of annotations for the features', choices=['fasta', 'gb', 'feat-prot', 'feat-nucl', 'csv'], default='fasta')
    parser.add_argument('taxid', help='supply a ncbi taxonomy id that will be extracted. If an integer is supplied it will be interpreted as an NCBI taxonomy id; otherwise it will be interpreted as part of a taxonomy name (e.g. Proteobacteria)', default=None)
    parser.add_argument('-s', '--split_species', help='when there are multiple species to be returned, split them into separate files, based on their name, instead of printing to stdout', default=False, action='store_true')
    parser.add_argument('-t', '--feature-type', help='restrict the results to feature type e.g. rRNA, tRNA, CDS. This option can be specified multiple times for multiple types', default=None, action='append')
    args = parser.parse_args()
    if args.password is None:
        args.password = getpass("Please enter the password for user " + \
                args.user + " on database " + args.database)
    main(args)