Example #1
0
def libs(args):
    """
    %prog libs libfile

    Get list of lib_ids to be run by pull(). The SQL commands:

    select library.lib_id, library.name from library join bac on
        library.bac_id=bac.id where bac.lib_name="Medicago";
    select seq_name from sequence where seq_name like 'MBE%'
        and trash is null;
    """
    p = OptionParser(libs.__doc__)
    p.set_db_opts(dbname="track", credentials=None)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    libfile, = args

    sqlcmd = "select library.lib_id, library.name, bac.gb# from library join bac on " + \
             "library.bac_id=bac.id where bac.lib_name='Medicago'"
    cur = connect(opts.dbname)
    results = fetchall(cur, sqlcmd)

    fw = open(libfile, "w")
    for lib_id, name, gb in results:
        name = name.translate(None, "\n")
        if not gb:
            gb = "None"

        print >> fw, "|".join((lib_id, name, gb))
    fw.close()
Example #2
0
File: db.py Project: radaniba/jcvi
def libs(args):
    """
    %prog libs libfile

    Get list of lib_ids to be run by pull(). The SQL commands:

    select library.lib_id, library.name from library join bac on
        library.bac_id=bac.id where bac.lib_name="Medicago";
    select seq_name from sequence where seq_name like 'MBE%'
        and trash is null;
    """
    p = OptionParser(libs.__doc__)
    p.set_db_opts(dbname="track", credentials=None)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    libfile, = args

    sqlcmd = "select library.lib_id, library.name, bac.gb# from library join bac on " + \
             "library.bac_id=bac.id where bac.lib_name='Medicago'"
    cur = connect(opts.dbname)
    results = fetchall(cur, sqlcmd)

    fw = open(libfile, "w")
    for lib_id, name, gb in results:
        name = name.translate(None, "\n")
        if not gb:
            gb = "None"

        print >> fw, "|".join((lib_id, name, gb))
    fw.close()
Example #3
0
def pull(args):
    """
    %prog pull libfile

    Pull the sequences using the first column in the libfile.
    """
    p = OptionParser(pull.__doc__)
    p.set_db_opts(dbname="mtg2", credentials=None)
    p.add_option(
        "--frag",
        default=False,
        action="store_true",
        help="The command to pull sequences from db",
    )
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (libfile, ) = args

    dbname = opts.dbname
    frag = opts.frag
    fp = open(libfile)
    hostname, username, password = get_profile()

    for row in fp:
        lib_id, name = row.split("|", 1)
        sqlfile = lib_id + ".sql"

        if not op.exists(sqlfile):
            fw = open(sqlfile, "w")
            print(
                "select seq_name from sequence where seq_name like" +
                " '{0}%' and trash is null".format(lib_id),
                file=fw,
            )
            fw.close()

        if frag:
            cmd = "pullfrag -D {0} -n {1}.sql -o {1} -q -S {2}".format(
                dbname, lib_id, hostname)
            cmd += " -U {0} -P {1}".format(username, password)
        else:
            cmd = "pullseq -D {0} -n {1}.sql -o {1} -q".format(dbname, lib_id)
        sh(cmd)
Example #4
0
File: db.py Project: rrane/jcvi
def pull(args):
    """
    %prog pull libfile

    Pull the sequences using the first column in the libfile.
    """
    p = OptionParser(pull.__doc__)
    p.set_db_opts(dbname="mtg2", credentials=None)
    p.add_option(
        "--frag", default=False, action="store_true", help="The command to pull sequences from db [default: %default]"
    )
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    libfile, = args

    dbname = opts.dbname
    frag = opts.frag
    fp = open(libfile)
    hostname, username, password = get_profile()

    for row in fp:
        lib_id, name = row.split("|", 1)
        sqlfile = lib_id + ".sql"

        if not op.exists(sqlfile):
            fw = open(sqlfile, "w")
            print >> fw, "select seq_name from sequence where seq_name like" + " '{0}%' and trash is null".format(
                lib_id
            )
            fw.close()

        if frag:
            cmd = "pullfrag -D {0} -n {1}.sql -o {1} -q -S {2}".format(dbname, lib_id, hostname)
            cmd += " -U {0} -P {1}".format(username, password)
        else:
            cmd = "pullseq -D {0} -n {1}.sql -o {1} -q".format(dbname, lib_id)
        sh(cmd)
Example #5
0
def query(args):
    """
    %prog query "SELECT feat_name FROM asm_feature WHERE feat_type = \\"{0}\\" AND end5 <= \\"{1}\\" AND end3 >= \\"{2}\\"" ::: datafile1 ....

    Script takes the data from tab-delimited datafile(s) and replaces the placeholders
    in the query which is then executed. Depending upon the type of query, results are
    either printed out (when running `select`) or not (when running `insert`, `update`
    or `delete`)

    If the query contains quotes around field values, then these need to be escaped with \\
    """
    p = OptionParser(query.__doc__)
    p.set_db_opts()
    p.add_option(
        "--dryrun",
        default=False,
        action="store_true",
        help="Don't commit to database. Just print queries [default: %default]"
    )
    p.set_sep(help="Specify output field separator")
    p.set_verbose(help="Print out all the queries")
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) == 0:
        sys.exit(not p.print_help())

    fieldsep = opts.sep

    sep = ":::"
    files = None
    if sep in args:
        sepidx = args.index(sep)
        files = args[sepidx + 1:]
        args = args[:sepidx]
        if not files:
            files = [""]

    qrys = []
    qry = " ".join(args)
    if ";" in qry:
        for q in qry.split(";"):
            if len(q.strip()) > 0:
                qrys.append(q)
    else:
        qrys.append(qry)

    queries = set()
    if files:
        for datafile in files:
            datafile = datafile.strip()
            fp = must_open(datafile)
            for row in fp:
                for qry in qrys:
                    qry = qry.strip()
                    m = re.findall(r"\{\d+\}", qry)
                    if m:
                        mi = [int(x.strip("{}")) for x in m]
                        atoms = row.strip().split("\t")
                        assert max(mi) <= len(atoms), \
                                "Number of columns in `datafile`({0})".format(len(atoms)) + \
                                " != number of `placeholders`({0})".format(len(m))
                        natoms = [atoms[x] for x in mi]
                        for idx, (match, atom) in enumerate(zip(m, natoms)):
                            qry = qry.replace(match, atom)
                    queries.add(qry)
    else:
        for qry in qrys:
            if re.search(r"\{\d+\}", qry):
                logging.error(
                    "Query `{0}` contains placeholders, no datafile(s) specified"
                    .format(qry))
                sys.exit()
            queries.add(qry)

    if not opts.dryrun:
        fw = must_open(opts.outfile, "w")
        dbh, cur = connect(opts.dbname, connector=opts.dbconn, hostname=opts.hostname, \
                username=opts.username, password=opts.password, port=opts.port)
    cflag = None
    for qry in queries:
        if opts.dryrun or opts.verbose:
            print qry
        if not opts.dryrun:
            if to_commit(qry):
                execute(cur, qry)
                cflag = True
            else:
                results = fetchall(cur, qry, connector=opts.dbconn)
                for result in results:
                    print >> fw, fieldsep.join([str(x) for x in result])
    if not opts.dryrun and cflag:
        commit(dbh)
Example #6
0
File: db.py Project: radaniba/jcvi
def query(args):
    """
    %prog query "SELECT feat_name FROM asm_feature WHERE feat_type = \\"{0}\\" AND end5 <= \\"{1}\\" AND end3 >= \\"{2}\\"" ::: datafile1 ....

    Script takes the data from tab-delimited datafile(s) and replaces the placeholders
    in the query which is then executed. Depending upon the type of query, results are
    either printed out (when running `select`) or not (when running `insert`, `update`
    or `delete`)

    If the query contains quotes around field values, then these need to be escaped with \\
    """
    p = OptionParser(query.__doc__)
    p.set_db_opts()
    p.add_option("--dryrun", default=False, action="store_true",
                 help="Don't commit to database. Just print queries [default: %default]")
    p.set_sep(help="Specify output field separator")
    p.set_verbose(help="Print out all the queries")
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) == 0:
        sys.exit(not p.print_help())

    fieldsep = opts.sep

    sep = ":::"
    files = None
    if sep in args:
        sepidx = args.index(sep)
        files = args[sepidx + 1:]
        args = args[:sepidx]
        if not files:
            files = [""]

    qrys = []
    qry = " ".join(args)
    if ";" in qry:
        for q in qry.split(";"):
            if len(q.strip()) > 0:
                qrys.append(q)
    else:
        qrys.append(qry)

    queries = set()
    if files:
        for datafile in files:
            datafile = datafile.strip()
            fp = must_open(datafile)
            for row in fp:
                for qry in qrys:
                    qry = qry.strip()
                    m = re.findall(r"\{\d+\}", qry)
                    if m:
                        mi = [int(x.strip("{}")) for x in m]
                        atoms = row.strip().split("\t")
                        assert max(mi) <= len(atoms), \
                                "Number of columns in `datafile`({0})".format(len(atoms)) + \
                                " != number of `placeholders`({0})".format(len(m))
                        natoms = [atoms[x] for x in mi]
                        for idx, (match, atom) in enumerate(zip(m, natoms)):
                            qry = qry.replace(match, atom)
                    queries.add(qry)
    else:
        for qry in qrys:
            if re.search(r"\{\d+\}", qry):
                logging.error("Query `{0}` contains placeholders, no datafile(s) specified".format(qry))
                sys.exit()
            queries.add(qry)

    if not opts.dryrun:
        fw = must_open(opts.outfile, "w")
        dbh, cur = connect(opts.dbname, connector=opts.dbconn, hostname=opts.hostname, \
                username=opts.username, password=opts.password, port=opts.port)
    cflag = None
    for qry in queries:
        if opts.dryrun or opts.verbose:
            print qry
        if not opts.dryrun:
            if to_commit(qry):
                execute(cur, qry)
                cflag = True
            else:
                results = fetchall(cur, qry)
                for result in results:
                    print >> fw, fieldsep.join([str(x) for x in result])
    if not opts.dryrun and cflag:
        commit(dbh)