Ejemplo n.º 1
0
def interMDS(names, prefix, inter_res, intra_res, full, args):
	inter_res_string = tools.get_res_string(inter_res)
	intra_res_string = tools.get_res_string(intra_res)

	#get low-res structures from intra files
	low_structures = []
	for name in names:
		path = "{}_{}_{}.bed".format(prefix, name, intra_res_string)
		chrom = dt.chromFromBed(path)
		#reduce res
		chrom.res = inter_res
		chrom.minPos = int(np.floor(float(chrom.minPos)/chrom.res)) * chrom.res	#round
		chrom.maxPos = int(np.ceil(float(chrom.maxPos)/chrom.res)) * chrom.res
		low_structures.append(dt.structureFromBed(path, chrom))

	#for correct indexing
	n = len(names)
	offsets = np.zeros(n, dtype=int)
	for i in range(1, n):
		offsets[i] = offsets[i-1] + len(low_structures[i-1].getPoints())

	inter_mat = get_inter_mat(prefix, inter_res_string, intra_res_string, low_structures, offsets)

	#perform MDS at low resolution on all chroms
	infer_structures(inter_mat, low_structures, offsets, args[3], args[4])

	#perform MDS at high resolution on each chrom
	high_structures = []
	inferred_low_structures = []
	ts = []
	for true_low, name in zip(low_structures, names):
		path = "{}_{}_{}.bed".format(prefix, name, intra_res_string)
		if full:
			high_structure = mm.fullMDS(path, False, args[4], args[3])
		else:
			high_structure = mm.partitionedMDS(path, args)
		high_structures.append(high_structure)
		inferred_low = dt.highToLow(high_structure, true_low.chrom.res/high_structure.chrom.res)
		inferred_low_structures.append(inferred_low)

		#rescale
		rescaling_factor = la.radius_of_gyration(true_low)/la.radius_of_gyration(inferred_low)
		rescaled_coords = [rescaling_factor * coord for coord in inferred_low.getCoords()]
		for i, point in enumerate(inferred_low.getPoints()):
			point.pos = rescaled_coords[i]

		r, t = la.getTransformation(inferred_low, true_low)
		high_structure.transform(r, None)	#do not translate now (need to rescale)
		ts.append(t)	

	#translate (with rescaling)
	low_rgs = np.array([la.radius_of_gyration(structure) for structure in low_structures])
	high_rgs = np.array([la.radius_of_gyration(structure) for structure in high_structures])
	scaling_factor = np.mean(high_rgs/low_rgs)
	for high_structure, t in zip(high_structures, ts):
		high_structure.transform(None, scaling_factor*t)	#rescale translation

	return high_structures
Ejemplo n.º 2
0
def interMDS(names, inter_prefix, intra_prefix, inter_res, intra_res, intra_low_res, args):
	inter_res_string = tools.get_res_string(inter_res)
	intra_res_string = tools.get_res_string(intra_res)
	if intra_low_res is None:
		intra_low_res_string = None
	else:
		intra_low_res_string = tools.get_res_string(intra_low_res)

	#get low-res clusters from intra files
	low_clusters = [dt.clusterFromBed("{}_{}_{}.bed".format(intra_prefix, name, inter_res_string), None, None) for name in names]

	#for correct indexing
	n = len(names)
	offsets = np.zeros(n, dtype=np.int32)
	for i in range(1, n):
		offsets[i] = offsets[i-1] + len(low_clusters[i-1].getPoints())

	inter_mat = get_inter_mat(intra_prefix, inter_prefix, inter_res, low_clusters, offsets)

	#perform MDS at low resolution on all chroms
	mm.infer_clusters(inter_mat, low_clusters, offsets, args[4])

	#perform MDS at high resolution on each chrom
	high_clusters = []
	inferred_low_clusters = []
	ts = []
	for true_low, name in zip(low_clusters, names):
		path = "{}_{}_{}.bed".format(intra_prefix, name, intra_res_string)
		if intra_low_res_string is None:
			high_cluster = mm.fullMDS(path, False, args[4])
		else:
			low_path = "{}_{}_{}.bed".format(intra_prefix, name, intra_low_res_string)
			high_cluster = mm.partitionedMDS(path, low_path, args)
		high_clusters.append(high_cluster)
		inferred_low = dt.highToLow(high_cluster, true_low.chrom.res/high_cluster.chrom.res)
		inferred_low_clusters.append(inferred_low)

		#rescale
		rescaling_factor = la.radius_of_gyration(true_low)/la.radius_of_gyration(inferred_low)
		rescaled_coords = [rescaling_factor * coord for coord in inferred_low.getCoords()]
		for i, point in enumerate(inferred_low.getPoints()):
			point.pos = rescaled_coords[i]

		r, t = la.getTransformation(inferred_low, true_low)
		high_cluster.transform(r, None)	#do not translate now (need to rescale)
		ts.append(t)	

	#translate (with rescaling)
	low_rgs = np.array([la.radius_of_gyration(cluster) for cluster in low_clusters])
	high_rgs = np.array([la.radius_of_gyration(cluster) for cluster in high_clusters])
	scaling_factor = np.mean(high_rgs/low_rgs)
	for high_cluster, t in zip(high_clusters, ts):
		high_cluster.transform(None, scaling_factor*t)	#rescale translation

	return high_clusters
Ejemplo n.º 3
0
 def rescale(self):
     """Rescale radius of gyration of structure to 1"""
     rg = la.radius_of_gyration(self)
     for i, point in enumerate(self.points):
         if point != 0:
             x, y, z = point.pos
             self.points[i].pos = (x // rg, y // rg, z // rg)
Ejemplo n.º 4
0
def transform(trueLow, highSubstructure, res_ratio):
    #approximate as low resolution
    inferredLow = dt.highToLow(highSubstructure, res_ratio)

    scaling_factor = la.radius_of_gyration(trueLow) / la.radius_of_gyration(
        inferredLow)
    for i, point in enumerate(inferredLow.points):
        if point != 0:
            x, y, z = point.pos
            inferredLow.points[i].pos = (x * scaling_factor,
                                         y * scaling_factor,
                                         z * scaling_factor)

    #recover the transformation for inferred from true low structure
    r, t = la.getTransformation(inferredLow, trueLow)
    t /= scaling_factor

    #transform high structure
    highSubstructure.transform(r, t)
Ejemplo n.º 5
0
def partitionedMDS(path, args):
    """Partitions structure into substructures and performs MDS"""
    domainSmoothingParameter = args[0]
    minSizeFraction = args[1]
    maxmemory = args[2]
    num_threads = args[3]
    alpha = args[4]
    res_ratio = args[5]
    alpha2 = args[6]

    #create low-res structure
    low_chrom = dt.chromFromBed(path)
    low_chrom.res *= res_ratio
    lowstructure = dt.structureFromBed(path, low_chrom)  #low global structure

    #get TADs
    low_contactMat = dt.matFromBed(path, lowstructure)
    low_tad_indices = tad.getDomains(
        low_contactMat, lowstructure, domainSmoothingParameter, minSizeFraction
    )  #low substructures, defined on relative indices not absolute indices
    tad.substructuresFromTads(lowstructure, low_tad_indices)

    #create high-res chrom
    size, res = dt.basicParamsFromBed(path)
    highChrom = dt.ChromParameters(lowstructure.chrom.minPos,
                                   lowstructure.chrom.maxPos, res,
                                   lowstructure.chrom.name, size)

    highstructure = dt.Structure([], [], highChrom, 0)
    high_substructures = []

    low_gen_coords = lowstructure.getGenCoords()
    offset = 0  #initialize
    for td in low_tad_indices:
        start_gen_coord = low_gen_coords[td[0]]
        end_gen_coord = low_gen_coords[td[1]]
        high_substructure = dt.structureFromBed(path, highChrom,
                                                start_gen_coord, end_gen_coord,
                                                offset)
        high_substructures.append(high_substructure)
        offset += len(high_substructure.points)  #update
        offset -= 1

    highstructure.setstructures(high_substructures)

    infer_structure(low_contactMat, lowstructure, alpha, num_threads)
    print "Low-resolution MDS complete"

    highSubstructures = pymp.shared.list(highstructure.structures)
    lowSubstructures = pymp.shared.list(lowstructure.structures)

    numSubstructures = len(highstructure.structures)
    num_threads = min(
        (num_threads, mp.cpu_count(), numSubstructures)
    )  #don't exceed number of requested threads, available threads, or structures
    with pymp.Parallel(num_threads) as p:
        for substructurenum in p.range(numSubstructures):
            highSubstructure = highSubstructures[substructurenum]
            if len(highSubstructure.getPoints()) > 0:  #skip empty
                trueLow = lowSubstructures[substructurenum]

                #perform MDS individually
                structure_contactMat = dt.matFromBed(
                    path,
                    highSubstructure)  #contact matrix for this structure only
                infer_structure(structure_contactMat, highSubstructure, alpha2,
                                num_threads)

                #approximate as low resolution
                inferredLow = dt.highToLow(highSubstructure, res_ratio)

                #rescale
                scaling_factor = la.radius_of_gyration(
                    trueLow) / la.radius_of_gyration(inferredLow)
                for i, point in enumerate(inferredLow.points):
                    if point != 0:
                        x, y, z = point.pos
                        inferredLow.points[i].pos = (x * scaling_factor,
                                                     y * scaling_factor,
                                                     z * scaling_factor)

                #recover the transformation for inferred from true low structure
                r, t = la.getTransformation(inferredLow, trueLow)
                t /= scaling_factor

                #transform high structure
                highSubstructure.transform(r, t)
                highSubstructures[substructurenum] = highSubstructure

                print "MDS performed on structure {} of {}".format(
                    substructurenum + 1, numSubstructures)

    highstructure.setstructures(highSubstructures)

    return highstructure
Ejemplo n.º 6
0
def partitionedMDS(path, lowpath, args):
    """Partitions cluster into subclusters and performs MDS"""
    domainSmoothingParameter = args[0]
    minSizeFraction = args[1]
    maxmemory = args[2]
    num_threads = args[3]
    alpha = args[4]

    #create low-res cluster
    lowCluster = dt.clusterFromBed(lowpath, None, None)

    #get TADs
    low_contactMat = dt.matFromBed(lowpath, lowCluster)
    lowTads = tad.getDomains(low_contactMat, lowCluster,
                             domainSmoothingParameter,
                             minSizeFraction)  #low subclusters

    #create high-res chrom
    size, res = dt.basicParamsFromBed(path)
    highChrom = dt.ChromParameters(lowCluster.chrom.minPos,
                                   lowCluster.chrom.maxPos, res,
                                   lowCluster.chrom.name, size)

    #create high-res cluster
    resRatio = lowCluster.chrom.res / highChrom.res
    highTads = lowTads * resRatio
    highCluster = dt.clusterFromBed(path, highChrom, highTads)

    #create compatible subclusters
    tad.subclustersFromTads(highCluster, lowCluster, lowTads)

    infer_cluster(low_contactMat, lowCluster, alpha)
    print "Low-resolution MDS complete"

    highSubclusters = pymp.shared.list(highCluster.clusters)
    lowSubclusters = pymp.shared.list(lowCluster.clusters)

    numSubclusters = len(highCluster.clusters)
    num_threads = min(
        (num_threads, mp.cpu_count(), numSubclusters)
    )  #don't exceed number of requested threads, available threads, or clusters
    with pymp.Parallel(num_threads) as p:
        for subclusternum in p.range(numSubclusters):
            highSubcluster = highSubclusters[subclusternum]
            trueLow = lowSubclusters[subclusternum]

            #perform MDS individually
            cluster_contactMat = dt.matFromBed(
                path, highSubcluster)  #contact matrix for this cluster only
            infer_cluster(cluster_contactMat, highSubcluster, alpha)

            #approximate as low resolution
            inferredLow = dt.highToLow(highSubcluster, resRatio)

            #rescale
            scaling_factor = la.radius_of_gyration(
                trueLow) / la.radius_of_gyration(inferredLow)
            for i, point in enumerate(inferredLow.points):
                if point != 0:
                    x, y, z = point.pos
                    inferredLow.points[i].pos = (x * scaling_factor,
                                                 y * scaling_factor,
                                                 z * scaling_factor)

            #recover the transformation for inferred from true low cluster
            r, t = la.getTransformation(inferredLow, trueLow)
            t *= resRatio**(2. / 3)  #rescale

            #transform high cluster
            highSubcluster.transform(r, t)
            highSubclusters[subclusternum] = highSubcluster

            print "MDS performed on cluster {} of {}".format(
                subclusternum + 1, numSubclusters)

    highCluster.setClusters(highSubclusters)

    return highCluster