Beispiel #1
0
def structureFromBed(path, chrom=None, start=None, end=None, offset=0):
	"""Initializes structure from intrachromosomal BED file."""
	if chrom is None:
		chrom = chromFromBed(path)

	if start is None:
		start = chrom.minPos

	if end is None:
		end = chrom.maxPos

	structure = Structure([], [], chrom, offset)
	
	structure.points = np.zeros(int((end - start)/chrom.res) + 1, dtype=object)	#true if locus should be added
	tracker = Tracker("Identifying loci", structure.chrom.size)

	#add loci
	with open(path) as listFile:
		for line in listFile:
			line = line.strip().split()
			pos1 = int(line[1])
			pos2 = int(line[4])
			if pos1 >= start and pos1 <= end and pos2 >= start and pos2 <= end:
				abs_index1 = structure.chrom.getAbsoluteIndex(pos1)
				abs_index2 = structure.chrom.getAbsoluteIndex(pos2)
				if abs_index1 != abs_index2:	#non-self-interacting
					structure.points[int((pos1 - start)/chrom.res)] = Point((0,0,0), structure.chrom, abs_index1, 0)
					structure.points[int((pos2 - start)/chrom.res)] = Point((0,0,0), structure.chrom, abs_index2, 0)
			tracker.increment()
		listFile.close()

	structure.set_rel_indices()
	
	return structure
Beispiel #2
0
def structureFromBed(path,
                     chrom=None,
                     start=None,
                     end=None,
                     offset=0,
                     tads=None):
    """Initializes structure from intrachromosomal BED file."""
    if chrom is None:
        chrom = chromFromBed(path)

    if start is None:
        start = chrom.minPos

    if end is None:
        end = chrom.maxPos

    structure = Structure([], [], chrom, offset)

    #get TAD for every locus
    #if tads is None:
    #		tadNums = np.zeros(structure.chrom.getLength())
    #else:
    #	tadNums = []
    #	for i, tad in enumerate(tads):
    #		for j in range(tad[0], tad[1]):
    #			tadNums.append(i)

    #maxIndex = len(tadNums) - 1

    structure.points = np.zeros((end - start) / chrom.res + 1,
                                dtype=object)  #true if locus should be added
    tracker = Tracker("Identifying loci", structure.chrom.size)

    #add loci
    with open(path) as listFile:
        for line in listFile:
            line = line.strip().split()
            pos1 = int(line[1])
            pos2 = int(line[4])
            if pos1 >= start and pos1 <= end and pos2 >= start and pos2 <= end:
                pointNum1 = structure.chrom.getPointNum(pos1)
                pointNum2 = structure.chrom.getPointNum(pos2)
                #tadNum1 = tadNums[min(pointNum1, maxIndex)]
                #tadNum2 = tadNums[min(pointNum2, maxIndex)]
                #if pointNum1 != pointNum2 and tadNum1 == tadNum2:		#must be in same TAD
                if pointNum1 != pointNum2:  #non-self-interacting
                    structure.points[(pos1 - start) / chrom.res] = Point(
                        (0, 0, 0), pointNum1, structure.chrom, 0)
                    structure.points[(pos2 - start) / chrom.res] = Point(
                        (0, 0, 0), pointNum2, structure.chrom, 0)
            tracker.increment()
        listFile.close()

    structure.indexPoints()

    return structure
Beispiel #3
0
def clusterFromBed(path, chrom, tads):
    """Initializes cluster from intrachromosomal BED file."""
    if chrom is None:
        chrom = intraChromFromBed(path, None)

    cluster = Cluster([], [], chrom, 0)

    #get TAD for every locus
    if tads is None:
        tadNums = np.zeros(cluster.chrom.getLength())
    else:
        tadNums = []
        tadNum = 1
        for tad in tads:
            for i in range(tad[0], tad[1]):
                tadNums.append(tadNum)
            tadNum += 1
    maxIndex = len(tadNums) - 1

    points_to_add = np.zeros(cluster.chrom.getLength(),
                             dtype=np.bool)  #true if locus should be added
    tracker = Tracker("Identifying loci", cluster.chrom.size)

    #find which loci should be added
    with open(path) as listFile:
        for line in listFile:
            line = line.strip().split()
            pos1 = int(line[1])
            pos2 = int(line[4])
            pointNum1 = cluster.chrom.getPointNum(pos1)
            pointNum2 = cluster.chrom.getPointNum(pos2)
            if pointNum1 is not None and pointNum2 is not None:
                tadNum1 = tadNums[min(pointNum1, maxIndex)]
                tadNum2 = tadNums[min(pointNum2, maxIndex)]
                if pointNum1 != pointNum2 and tadNum1 == tadNum2:  #must be in same TAD
                    if points_to_add[pointNum1] == False:
                        points_to_add[pointNum1] = True
                    if points_to_add[pointNum2] == False:
                        points_to_add[pointNum2] = True
            tracker.increment()
    listFile.close()

    #create points
    points = np.zeros(cluster.chrom.getLength(), dtype=np.object)
    pointNums = np.where(points_to_add == True)[0]
    for pointNum in pointNums:
        points[pointNum] = Point((0, 0, 0), pointNum, cluster.chrom, None)
    cluster.points = points
    cluster.indexPoints()

    return cluster
Beispiel #4
0
def matFromDixon(path, chrom):
    """Creates contact matrix from Dixon tsv data"""
    numBins = chrom.getLength()
    mat = np.zeros((numBins, numBins))
    tracker = Tracker("Reading " + path, chrom.size)
    with open(path) as infile:
        for line in infile:
            line = line.strip().split()
            pos1 = int(line[0])
            pos2 = int(line[1])
            if pos1 != pos2:
                if pos1 >= chrom.minPos and pos1 <= chrom.maxPos and pos2 >= chrom.minPos and pos2 <= chrom.maxPos:
                    bin1 = chrom.getAbsoluteIndex(pos1)
                    bin2 = chrom.getAbsoluteIndex(pos2)
                    if bin1 > bin2:
                        row = bin1
                        col = bin2
                    else:
                        row = bin1
                        col = bin2
                    mat[row, col] += 1
            tracker.increment()
    infile.close()
    return mat
Beispiel #5
0
import numpy as np

in_path = sys.argv[1]
out_path = sys.argv[2]
f = h5py.File(in_path)

counts = np.array(f["pixels"]["count"])
bin_ids1 = np.array(f["pixels"]["bin1_id"])
bin_ids2 = np.array(f["pixels"]["bin2_id"])
chroms = np.array(f["bins"]["chrom"])
starts = np.array(f["bins"]["start"])
ends = np.array(f["bins"]["end"])

f.close()

tracker = Tracker("Converting to BED", len(counts))

print "Begin converting to BED"
with open(out_path, "w") as out_file:
    for count, bin_id1, bin_id2 in zip(counts, bin_ids1, bin_ids2):
        if count != 0:
            chrom1 = str(chroms[bin_id1] + 1)  #switch to 1-indexed
            chrom2 = str(chroms[bin_id2] + 1)
            start1 = str(starts[bin_id1])
            end1 = str(ends[bin_id1])
            start2 = str(starts[bin_id2])
            end2 = str(ends[bin_id2])
            out_file.write("\t".join(
                ("chr" + chrom1, start1, end1, "chr" + chrom2, start2, end2,
                 str(count))))
            out_file.write("\n")
Beispiel #6
0
""""Convert fixedStep wig to binned bed"""

import sys

sys.path.append("..")
from tools import Tracker

wig = sys.argv[1]
bin_size = int(sys.argv[2])
file_size = int(sys.argv[3])

prefix = wig.split(".")[0]

tracker = Tracker("Converting {}".format(wig), file_size)

tot = 0
count = 0

with open(wig) as in_file:
    with open("{}_{}kb.bed".format(prefix, bin_size / 1000), "w") as out_file:
        for line in in_file:
            line = line.strip().split()
            if line[0] == "fixedStep":  #header
                chrom = line[1].split("=")[1]
                curr_pos = int(line[2].split("=")[1])
                step = int(line[3].split("=")[1])
                span = int(line[4].split("=")[1])
            else:
                tot += float(line[0])
                count += span
                if curr_pos % bin_size == 0: