Ejemplo n.º 1
0
def get_region(c, args):
    region_regex = re.compile("(\S+):(\d+)-(\d+)")

    try:
        region = region_regex.findall(args.region)[0]
    except IndexError:
        sys.exit("Malformed region (--reg) string")

    if len(region) != 3:
        sys.exit("Malformed region (--reg) string")

    chrom = region[0]
    start = region[1]
    end = region[2]

    query = "SELECT * \
             FROM variants v \
             WHERE v.chrom = " + "'" + chrom + "'" + \
        " AND ((v.start BETWEEN " + start + " AND " + end + ")" +\
        " OR (v.end BETWEEN " + start + " AND " + end + "))" + \
        "ORDER BY chrom, start"
    c.execute(query)

    # build a list of all the column indices that are NOT
    # gt_* columns.  These will be the columns reported
    (col_names, non_gt_idxs) = \
        util.get_col_names_and_indices(c.description, ignore_gt_cols=True)

    if args.use_header:
        print args.separator.join(col for col in col_names)
    for row in c:
        print args.separator.join(str(row[i]) if row[i] is not None else "."
                                  for i in non_gt_idxs)
Ejemplo n.º 2
0
def get_genotypes(conn, metadata, args):
    """For each variant, report each sample's genotype
       on a separate line.
    """
    idx_to_sample = util.map_indices_to_samples(metadata)

    query = "SELECT  v.chrom, v.start, v.end, \
                     v.ref, v.alt, \
                     v.type, v.sub_type, \
                     v.aaf, v.in_dbsnp, v.gene, \
                     v.gts \
             FROM    variants v \
             ORDER BY chrom, start"
    res = conn.execute(sql.text(query))

    # build a list of all the column indices that are NOT
    # gt_* columns.  These will be the columns reported
    (col_names, non_gt_idxs) = \
        util.get_col_names_and_indices(metadata.tables["variants"], ignore_gt_cols=True)
    col_names.append('sample')
    col_names.append('genotype')

    if args.use_header:
        print args.separator.join(col for col in col_names)
    for row in res:
        gts = Z.unpack_genotype_blob(row['gts'])
        for idx, gt in enumerate(gts):
            # xrange(len(row)-1) to avoid printing v.gts
            a = args.separator.join(str(row[i]) for i in xrange(len(row)-1))
            b = args.separator.join([idx_to_sample[idx], gt])
            print args.separator.join((a, b))
Ejemplo n.º 3
0
def get_genotypes(c, args):
    """For each variant, report each sample's genotype
       on a separate line.
    """
    idx_to_sample = util.map_indicies_to_samples(c)

    query = "SELECT  v.chrom, v.start, v.end, \
                     v.ref, v.alt, \
                     v.type, v.sub_type, \
                     v.aaf, v.in_dbsnp, v.gene, \
                     v.gts \
             FROM    variants v \
             ORDER BY chrom, start"

    c.execute(query)

    # build a list of all the column indices that are NOT
    # gt_* columns.  These will be the columns reported
    (col_names, non_gt_idxs) = \
        util.get_col_names_and_indices(c.description, ignore_gt_cols=True)
    col_names.append('sample')
    col_names.append('genotype')

    if args.use_header:
        print args.separator.join(col for col in col_names)
    for row in c:
        gts = np.array(cPickle.loads(zlib.decompress(row['gts'])))
        for idx, gt in enumerate(gts):
            # xrange(len(row)-1) to avoid printing v.gts
            print args.separator.join(
                str(row[i]) for i in xrange(len(row) - 1)),
            print args.separator.join([idx_to_sample[idx], gt])
Ejemplo n.º 4
0
def get_genotypes(c, args):
    """For each variant, report each sample's genotype
       on a separate line.
    """
    idx_to_sample = util.map_indices_to_samples(c)

    query = "SELECT  v.chrom, v.start, v.end, \
                     v.ref, v.alt, \
                     v.type, v.sub_type, \
                     v.aaf, v.in_dbsnp, v.gene, \
                     v.gts \
             FROM    variants v \
             ORDER BY chrom, start"
    c.execute(query)

    # build a list of all the column indices that are NOT
    # gt_* columns.  These will be the columns reported
    (col_names, non_gt_idxs) = \
        util.get_col_names_and_indices(c.description, ignore_gt_cols=True)
    col_names.append('sample')
    col_names.append('genotype')

    if args.use_header:
        print args.separator.join(col for col in col_names)
    for row in c:
        gts = np.array(cPickle.loads(zlib.decompress(row['gts'])))
        for idx, gt in enumerate(gts):
            # xrange(len(row)-1) to avoid printing v.gts
            print args.separator.join(str(row[i]) for i in xrange(len(row)-1)),
            print args.separator.join([idx_to_sample[idx], gt])
Ejemplo n.º 5
0
def get_samples(c, args):
    """
    Report all of the information about the samples in the DB
    """
    query = "SELECT * FROM samples"
    c.execute(query)

    (col_names, col_idxs) = util.get_col_names_and_indices(c.description)
    if args.use_header:
        print args.separator.join(col_names)
    for row in c:
        print args.separator.join(str(row[i]) if row[i] is not None else "." \
                                              for i in xrange(len(row)) )
Ejemplo n.º 6
0
def get_samples(c, args):
    """
    Report all of the information about the samples in the DB
    """
    query = "SELECT * FROM samples"
    c.execute(query)

    (col_names, col_idxs) = util.get_col_names_and_indices(c.description)
    if args.use_header:
        print args.separator.join(col_names)
    for row in c:
        print args.separator.join(str(row[i]) if row[i] is not None else "." \
                                              for i in xrange(len(row)) )
Ejemplo n.º 7
0
def get_samples(conn, metadata, args):
    """
    Report all of the information about the samples in the DB
    """
    query = "SELECT * FROM samples"
    res = conn.execute(query)

    (col_names, col_idxs) = util.get_col_names_and_indices(metadata.tables["samples"])
    if args.use_header:
        print args.separator.join(col_names)
    for row in res:
        print args.separator.join(str(row[i]) if row[i] is not None else "." \
                                              for i in xrange(len(row)) )
Ejemplo n.º 8
0
def get_samples(conn, metadata, args):
    """
    Report all of the information about the samples in the DB
    """
    query = "SELECT * FROM samples"
    res = conn.execute(query)

    (col_names, col_idxs) = util.get_col_names_and_indices(metadata.tables["samples"])
    if args.use_header:
        print args.separator.join(col_names)
    for row in res:
        print args.separator.join(str(row[i]) if row[i] is not None else "." \
                                              for i in xrange(len(row)) )
Ejemplo n.º 9
0
def get_variants(conn, metadata, args):
    """
    Report all columns in the variant table, except for the
    genotype vectors.
    """
    query = "SELECT * FROM variants \
             ORDER BY chrom, start"
    res = conn.execute(query)

    # build a list of all the column indices that are NOT
    # gt_* columns.  These will be the columns reported
    (col_names, non_gt_idxs) = \
        util.get_col_names_and_indices(metadata.tables["variants"], ignore_gt_cols=True)

    if args.use_header:
        print args.separator.join(col for col in col_names)
    for row in res:
        print args.separator.join('.' if (row[i] is None) else row[i].encode('utf-8') if type(row[i]) is unicode else str(row[i]) for i in non_gt_idxs)
Ejemplo n.º 10
0
def get_variants(conn, metadata, args):
    """
    Report all columns in the variant table, except for the
    genotype vectors.
    """
    query = "SELECT * FROM variants \
             ORDER BY chrom, start"
    res = conn.execute(query)

    # build a list of all the column indices that are NOT
    # gt_* columns.  These will be the columns reported
    (col_names, non_gt_idxs) = \
        util.get_col_names_and_indices(metadata.tables["variants"], ignore_gt_cols=True)

    if args.use_header:
        print args.separator.join(col for col in col_names)
    for row in res:
        print args.separator.join('.' if (row[i] is None) else row[i].encode('utf-8') if type(row[i]) is unicode else str(row[i]) for i in non_gt_idxs)
Ejemplo n.º 11
0
def get_variants(c, args):
    """
    Report all columns in the variant table, except for the
    genotype vectors.
    """
    query = "SELECT * FROM variants \
             ORDER BY chrom, start"
    c.execute(query)

    # build a list of all the column indices that are NOT
    # gt_* columns.  These will be the columns reported
    (col_names, non_gt_idxs) = \
        util.get_col_names_and_indices(c.description, ignore_gt_cols=True)

    if args.use_header:
        print args.separator.join(col for col in col_names)
    for row in c:
        print args.separator.join(str(row[i]) if row[i] is not None else "." \
                                              for i in non_gt_idxs )
Ejemplo n.º 12
0
def get_variants(c, args):
    """
    Report all columns in the variant table, except for the
    genotype vectors.
    """
    query = "SELECT * FROM variants \
             ORDER BY chrom, start"
    c.execute(query)

    # build a list of all the column indices that are NOT
    # gt_* columns.  These will be the columns reported
    (col_names, non_gt_idxs) = \
        util.get_col_names_and_indices(c.description, ignore_gt_cols=True)

    if args.use_header:
        print args.separator.join(col for col in col_names)
    for row in c:
        print args.separator.join(str(row[i]) if row[i] is not None else "." \
                                              for i in non_gt_idxs )
Ejemplo n.º 13
0
def get_genotypes(conn, metadata, args):
    """For each variant, report each sample's genotype
       on a separate line.
    """
    idx_to_sample = util.map_indices_to_samples(metadata)

    query = "SELECT  v.chrom, v.start, v.end, \
                     v.ref, v.alt, \
                     v.type, v.sub_type, \
                     v.aaf, v.in_dbsnp, v.gene, \
                     v.gts \
             FROM    variants v \
             ORDER BY chrom, start"
    res = conn.execute(sql.text(query))

    # build a list of all the column indices that are NOT
    # gt_* columns.  These will be the columns reported
    (col_names, non_gt_idxs) = \
        util.get_col_names_and_indices(metadata.tables["variants"], ignore_gt_cols=True)
    col_names.append('sample')
    col_names.append('genotype')

    if args.use_header:
        print args.separator.join(col for col in col_names)

    unpack = Z.unpack_genotype_blob
    import zlib


    for row in res:
        try:
            gts = unpack(row['gts'])
        except zlib.error:
            unpack = Z.snappy_unpack_blob
            gts = unpack(row['gts'])

        for idx, gt in enumerate(gts):
            # xrange(len(row)-1) to avoid printing v.gts
            a = args.separator.join(str(row[i]) for i in xrange(len(row)-1))
            b = args.separator.join([idx_to_sample[idx], gt])
            print args.separator.join((a, b))
Ejemplo n.º 14
0
def get_gene(c, args):
    """
    Report all variants in a specific gene.
    """
    query = "SELECT * \
    FROM variants v \
    WHERE v.gene = " + "'" + args.gene + "' " \
        "ORDER BY chrom, start"
    c.execute(query)

    # build a list of all the column indices that are NOT
    # gt_* columns.  These will be the columns reported
    (col_names, non_gt_idxs) = \
        util.get_col_names_and_indices(c.description, ignore_gt_cols=True)

    if args.use_header:
        print args.separator.join(col for col in col_names)

    for row in c:
        print args.separator.join(str(row[i]) if row[i] is not None else "."
                                  for i in non_gt_idxs)