Python extract_feature_sql Beispiele, common.extract_feature_sql Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: extract_sequences_using_qv.py Projekt: ichobits/BioSQL-Extensions

def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password)

    seqfeature_ids = get_seqfeature_ids_from_qv(server, args.qualifier, args.value, args.database_name)

    if args.output_format == 'feat-prot':
        extract_feature_sql(server, seqfeature_ids, type=['CDS'], translate=True )
    elif args.output_format == 'feat-nucl':
        extract_feature_sql(server, seqfeature_ids )

Beispiel #2

0

Datei anzeigen

Datei: extract_sequences_using_qv.py Projekt: rumanubhardwaj/BioSQL-Extensions

def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password)

    seqfeature_ids = get_seqfeature_ids_from_qv(server, args.qualifier, args.value, args.database_name, fuzzy=args.fuzzy)

    if args.feature_type is not None:
        types = args.feature_type
    elif args.output_format == 'feat-prot':
        types = ['CDS']
    elif args.output_format == 'feat-nucl':
        types = ['CDS', 'rRNA', 'tRNA']

    if args.output_format == 'feat-prot':
        extract_feature_sql(server, seqfeature_ids, type=types, translate=True )
    elif args.output_format == 'feat-nucl':
        extract_feature_sql(server, seqfeature_ids, type=types)

Beispiel #3

0

Datei anzeigen

Datei: dump_biodatabase.py Projekt: ctSkennerton/BioSQL-Extensions

def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password)

    if args.output_format == 'fasta':
        from Bio import SeqIO
        db = server[args.database_name]
        for rec in db.values():
            SeqIO.write(rec, sys.stdout, args.output_format)
    else:

        seqfeature_ids = get_seqfeature_for_db(server, args.database_name)

        if args.output_format == 'feat-prot':
            extract_feature_sql(server, seqfeature_ids, type=['CDS'], translate=True )
        elif args.output_format == 'feat-nucl':
            extract_feature_sql(server, seqfeature_ids )

Beispiel #4

0

Datei anzeigen

def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver,
                                          db=args.database,
                                          user=args.user,
                                          host=args.host,
                                          passwd=args.password)
    seqfeature_ids = []
    with open(args.infile) as fp:
        for line in fp:
            seqfeature_ids.append(int(line.rstrip()))

    if args.output_format == 'feat-prot':
        extract_feature_sql(server,
                            seqfeature_ids,
                            type=['CDS'],
                            translate=True)
    elif args.output_format == 'feat-nucl':
        extract_feature_sql(server, seqfeature_ids)

Beispiel #5

0

Datei anzeigen

def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver,
                                          db=args.database,
                                          user=args.user,
                                          host=args.host,
                                          passwd=args.password)

    if args.output_format == 'fasta':
        from Bio import SeqIO
        db = server[args.database_name]
        for rec in db.values():
            SeqIO.write(rec, sys.stdout, args.output_format)
    else:

        seqfeature_ids = get_seqfeature_for_db(server, args.database_name)

        if args.output_format == 'feat-prot':
            extract_feature_sql(server,
                                seqfeature_ids,
                                type=['CDS'],
                                translate=True)
        elif args.output_format == 'feat-nucl':
            extract_feature_sql(server, seqfeature_ids)

Beispiel #6

0

Datei anzeigen

def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password)

    tax_name = False
    try:
        ncbi_tax = int(args.taxid)
    except ValueError:
        tax_name = True

    if not tax_name:
        print("interpreting as an NCBI taxon ID...", file=sys.stderr)
        taxon_id_lookup_sql = "SELECT bioentry_id, taxon_id, biodatabase.name FROM bioentry JOIN "\
                "biodatabase USING(biodatabase_id) WHERE taxon_id IN "\
                "(SELECT DISTINCT include.taxon_id FROM taxon "\
                "INNER JOIN taxon as include ON (include.left_value "\
                "BETWEEN taxon.left_value AND taxon.right_value) "\
                "WHERE taxon.ncbi_taxon_id  = %s AND include.right_value = include.left_value + 1)"

        rows = server.adaptor.execute_and_fetchall(taxon_id_lookup_sql, (ncbi_tax,))
    else:
        print("interpreting as a taxon name...", file=sys.stderr)
        taxon_name_lookup_sql = "SELECT bioentry_id, taxon_id, biodatabase.name FROM bioentry JOIN "\
                "biodatabase USING(biodatabase_id) WHERE taxon_id IN "\
                "(SELECT DISTINCT include.taxon_id FROM taxon "\
                "INNER JOIN taxon as include ON (include.left_value "\
                "BETWEEN taxon.left_value AND taxon.right_value) "\
                "WHERE taxon.taxon_id IN (SELECT taxon_id FROM taxon_name "\
                "WHERE name like %s) AND include.right_value = include.left_value + 1)"
        rows = server.adaptor.execute_and_fetchall(taxon_name_lookup_sql, (args.taxid,))

    dbids = {}
    for row in rows:
        dbids[(row[0], row[2])] = row[1]
    files = {}
    taxid_to_dbids = {}
    if args.split_species:
        taxon_file_mapping = {}
        for k, v in dbids.items():
            tname = server.adaptor.execute_and_fetch_col0("SELECT name from taxon_name where taxon_id = %s and name_class = %s", (v,'scientific name'))[0]
            tname = tname.replace(' ', '_')
            if args.output_format == 'gb':
                tname += '.gb'
            elif args.output_format == 'feat-prot':
                tname += '.faa'
            else:
                tname += '.fna'
            files[v] = tname
            taxid_to_dbids.setdefault(v, []).append(k)


    if args.split_species:
        # got to save all of the records before printing them out
        outdata = {}
        for taxid, dbid_list in taxid_to_dbids.items():
            for dbid, dbname in dbid_list:
                db = server[dbname]
                seq_rec = db[dbid]
                outdata.setdefault(taxid, []).append(seq_rec)

        for taxid, dbrecs in outdata.items():
            with open(files[taxid], 'w') as fp:
                if 'feat' in args.output_format:
                    for dbrec in dbrecs:
                        extract_feature(dbrec, args.output_format, fp)
                else:
                    SeqIO.write(dbrecs, fp, args.output_format)

    else:
        if args.output_format == 'feat-prot':
            extract_feature_sql(server, get_seqfeature_ids_for_bioseqs(server, [x[0] for x in dbids.keys()]),type=['CDS'], translate=True )
        elif args.output_format == 'feat-nucl':
            extract_feature_sql(server, get_seqfeature_ids_for_bioseqs(server, [x[0] for x in dbids.keys()]))
        else:
            for (dbid, dbname), taxid in dbids.items():
                db = server[dbname]
                try:
                    dbrec = db[dbid]
                    SeqIO.write(dbrec, sys.stdout, args.output_format)
                except KeyError:
                    pass

Beispiel #7

0

Datei anzeigen

Datei: extract_sequences_from_taxonomy.py Projekt: ctSkennerton/BioSQL-Extensions

def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password)

    tax_name = False
    try:
        ncbi_tax = int(args.taxid)
    except ValueError:
        tax_name = True

    if not tax_name:
        print("interpreting as an NCBI taxon ID...", file=sys.stderr)
        taxon_id_lookup_sql = "SELECT bioentry_id, taxon_id, biodatabase.name FROM bioentry JOIN "\
                "biodatabase USING(biodatabase_id) WHERE taxon_id IN "\
                "(SELECT DISTINCT include.taxon_id FROM taxon "\
                "INNER JOIN taxon as include ON (include.left_value "\
                "BETWEEN taxon.left_value AND taxon.right_value) "\
                "WHERE taxon.ncbi_taxon_id  = %s AND include.right_value = include.left_value + 1)"

        rows = server.adaptor.execute_and_fetchall(taxon_id_lookup_sql, (ncbi_tax,))
    else:
        print("interpreting as a taxon name...", file=sys.stderr)
        taxon_name_lookup_sql = "SELECT bioentry_id, taxon_id, biodatabase.name FROM bioentry JOIN "\
                "biodatabase USING(biodatabase_id) WHERE taxon_id IN "\
                "(SELECT DISTINCT include.taxon_id FROM taxon "\
                "INNER JOIN taxon as include ON (include.left_value "\
                "BETWEEN taxon.left_value AND taxon.right_value) "\
                "WHERE taxon.taxon_id IN (SELECT taxon_id FROM taxon_name "\
                "WHERE name like %s) AND include.right_value = include.left_value + 1)"
        rows = server.adaptor.execute_and_fetchall(taxon_name_lookup_sql, (args.taxid,))

    if args.feature_type is not None:
        types = args.feature_type
    elif args.output_format == 'feat-prot':
        types = ['CDS']
    elif args.output_format == 'feat-nucl':
        types = ['CDS', 'rRNA', 'tRNA']

    if len(rows) == 0:
        print("\nThere does not appear to be any sequences associated with\n"
                "the taxonomy provided. If you used a taxonomy name, make sure\n"
                "it is spelled correctly. And remember that it must be the complete name\n"
                "for a particular rank, for example 'Deltaproteo' will match nothing\n"
                "it has to be 'Deltaproteobacteria'.\n"
                "Don't forget to add 'Candidatus ' to the begining of some names\n"
                "or the strain designation for a species. If you used an NCBI taxonomy ID, make\n"
                "sure that it is correct by double checking on the NCBI taxonomy website.", file=sys.stderr)
        sys.exit(1)

    dbids = {}
    for row in rows:
        dbids[(row[0], row[2])] = row[1]
    files = {}
    taxid_to_dbids = {}
    if args.split_species:
        taxon_file_mapping = {}
        for k, v in dbids.items():
            tname = server.adaptor.execute_and_fetch_col0("SELECT name from taxon_name where taxon_id = %s and name_class = %s", (v,'scientific name'))[0]
            tname = tname.replace(' ', '_')
            if args.output_format == 'gb':
                tname += '.gb'
            elif args.output_format == 'feat-prot':
                tname += '.faa'
            elif args.output_format == 'csv':
                tname += '.csv'
            else:
                tname += '.fna'
            files[v] = tname
            taxid_to_dbids.setdefault(v, []).append(k)


    if args.split_species:
        # got to save all of the records before printing them out
        outdata = {}
        for taxid, dbid_list in taxid_to_dbids.items():
            if args.output_format == 'csv':
                with open(files[taxid], 'w') as fp:
                    print_feature_qv_csv(server, get_seqfeature_ids_for_bioseqs(server, [x[0] for x in dbid_list]), fp)
            else:
                for dbid, dbname in dbid_list:
                    db = server[dbname]
                    seq_rec = db[dbid]
                    outdata.setdefault(taxid, []).append(seq_rec)

        for taxid, dbrecs in outdata.items():
            with open(files[taxid], 'w') as fp:
                if 'feat' in args.output_format:
                    for dbrec in dbrecs:
                        extract_feature(dbrec, args.output_format, fp)
                elif 'csv' != args.output_format:
                    SeqIO.write(dbrecs, fp, args.output_format)

    else:
        if args.output_format == 'feat-prot':
            extract_feature_sql(server, get_seqfeature_ids_for_bioseqs(server, [x[0] for x in dbids.keys()]),type=types, translate=True )
        elif args.output_format == 'feat-nucl':
            extract_feature_sql(server, get_seqfeature_ids_for_bioseqs(server, [x[0] for x in dbids.keys()]), type=types)
        elif args.output_format == 'csv':
            print_feature_qv_csv(server, get_seqfeature_ids_for_bioseqs(server, [x[0] for x in dbids.keys()]))
        else:
            for (dbid, dbname), taxid in dbids.items():
                db = server[dbname]
                try:
                    dbrec = db[dbid]
                    SeqIO.write(dbrec, sys.stdout, args.output_format)
                except KeyError:
                    pass