Ejemplo n.º 1
0
def merge2IntervalTree(mergeTree):
    convTree = ChromTree()
    i = 0
    for chrom, trees in mergeTree.iteritems():
        for beg, end, ns in mergeTree[chrom].getregions():
            convTree.insert(chrom,beg,end,(beg,end))
    return convTree
Ejemplo n.º 2
0
def merge2IntervalTree(mergeTree):
    convTree = ChromTree()
    i = 0
    for chrom, trees in mergeTree.iteritems():
        for beg, end, ns in mergeTree[chrom].getregions():
            convTree.insert(chrom,beg,end,(beg,end))
    return convTree
Ejemplo n.º 3
0
def intervalTree(fin):
    """
    Reads gzipped bed file as interval tree
    """
    i = 0 
    tree = ChromTree()
    for line in gzip.open(fin, 'r'):
        i+= 1
        chrom, start, end = line.rstrip().split("\t")[:3]
        if chrom == "Chrom": continue
        tree.insert(chrom, int(start), int(end), (int(start),int(end)))
    print "Read {i} lines from {fin}".format(**locals())
    return tree
Ejemplo n.º 4
0
def intervalTree(fin):
    """
    Reads gzipped bed file as interval tree
    """
    i = 0 
    tree = ChromTree()
    for line in gzip.open(fin, 'r'):
        i+= 1
        chrom, start, end = line.rstrip().split("\t")[:3]
        if chrom == "Chrom": continue
        tree.insert(chrom, int(start), int(end), (int(start),int(end)))
    print "Read {i} lines from {fin}".format(**locals())
    return tree
Ejemplo n.º 5
0
def main(args):
    snps = pd.read_csv(args.bfile + '.bim',
                       header=None,
                       delimiter='\t',
                       names=['CHR', 'SNP', 'CM', 'BP', 'A1', 'A2'])
    n_snp = len(snps)
    n_annot = len(args.annot_bed)
    annot_matrix = np.zeros((n_snp, n_annot), dtype=np.int8)
    annot_names = []
    extend = args.extend_bp + 1

    for i in range(n_annot):
        trees = ChromTree()
        bed = pd.read_csv(args.annot_bed[i], header=None, delimiter='\t')
        annot_name = os.path.basename(
            args.annot_bed[i]
        )[:-4] if args.annot_name is None else args.annot_name[i]
        annot_names.append(annot_name)
        if args.chr is not None:
            args.chr = trees._format_chrom(args.chr)
            bed = bed[bed[0] == args.chr]
        bed.apply(
            lambda x: trees.insert(x[0], x[1] - extend, x[2] + extend, i),
            axis=1)
        idx = snps.apply(lambda x: trees.find(x[0], x[3], x[3]), axis=1)
        [[annot_matrix.itemset((j, i), 1) for _ in v]
         for j, v in idx.iteritems()]

    out_fname_annot = args.out + '.annot'
    comp = None
    if not args.only_annot:
        annot_df = pd.concat(
            [snps[['CHR', 'BP', 'SNP', 'CM']],
             pd.DataFrame(annot_matrix)],
            axis=1)
        annot_df.columns = ['CHR', 'BP', 'SNP', 'CM'] + annot_names
        out_fname_annot += '.gz'
        comp = 'gzip'
    else:
        annot_df = pd.DataFrame(annot_matrix)
        annot_df.columns = annot_names
    annot_df.to_csv(out_fname_annot,
                    sep="\t",
                    header=True,
                    index=False,
                    float_format='%.4f',
                    compression=comp)