def partitionedMDS(path, args): """Partitions structure into substructures and performs MDS""" domainSmoothingParameter = args[0] minSizeFraction = args[1] maxmemory = args[2] num_threads = args[3] alpha = args[4] res_ratio = args[5] alpha2 = args[6] #create low-res structure low_chrom = dt.chromFromBed(path) low_chrom.res *= res_ratio lowstructure = dt.structureFromBed(path, low_chrom) #low global structure #get TADs low_contactMat = dt.matFromBed(path, lowstructure) low_tad_indices = tad.getDomains( low_contactMat, lowstructure, domainSmoothingParameter, minSizeFraction ) #low substructures, defined on relative indices not absolute indices tad.substructuresFromTads(lowstructure, low_tad_indices) #create high-res chrom size, res = dt.basicParamsFromBed(path) highChrom = dt.ChromParameters(lowstructure.chrom.minPos, lowstructure.chrom.maxPos, res, lowstructure.chrom.name, size) highstructure = dt.Structure([], [], highChrom, 0) high_substructures = [] low_gen_coords = lowstructure.getGenCoords() offset = 0 #initialize for td in low_tad_indices: start_gen_coord = low_gen_coords[td[0]] end_gen_coord = low_gen_coords[td[1]] high_substructure = dt.structureFromBed(path, highChrom, start_gen_coord, end_gen_coord, offset) high_substructures.append(high_substructure) offset += len(high_substructure.points) #update offset -= 1 highstructure.setstructures(high_substructures) infer_structure(low_contactMat, lowstructure, alpha, num_threads) print "Low-resolution MDS complete" highSubstructures = pymp.shared.list(highstructure.structures) lowSubstructures = pymp.shared.list(lowstructure.structures) numSubstructures = len(highstructure.structures) num_threads = min( (num_threads, mp.cpu_count(), numSubstructures) ) #don't exceed number of requested threads, available threads, or structures with pymp.Parallel(num_threads) as p: for substructurenum in p.range(numSubstructures): highSubstructure = highSubstructures[substructurenum] if len(highSubstructure.getPoints()) > 0: #skip empty trueLow = lowSubstructures[substructurenum] #perform MDS individually structure_contactMat = dt.matFromBed( path, highSubstructure) #contact matrix for this structure only infer_structure(structure_contactMat, highSubstructure, alpha2, num_threads) #approximate as low resolution inferredLow = dt.highToLow(highSubstructure, res_ratio) #rescale scaling_factor = la.radius_of_gyration( trueLow) / la.radius_of_gyration(inferredLow) for i, point in enumerate(inferredLow.points): if point != 0: x, y, z = point.pos inferredLow.points[i].pos = (x * scaling_factor, y * scaling_factor, z * scaling_factor) #recover the transformation for inferred from true low structure r, t = la.getTransformation(inferredLow, trueLow) t /= scaling_factor #transform high structure highSubstructure.transform(r, t) highSubstructures[substructurenum] = highSubstructure print "MDS performed on structure {} of {}".format( substructurenum + 1, numSubstructures) highstructure.setstructures(highSubstructures) return highstructure
def create_high_res_structure(path, lowstructure): size, res = dt.basicParamsFromBed(path) highChrom = dt.ChromParameters(lowstructure.chrom.minPos, lowstructure.chrom.maxPos, res, lowstructure.chrom.name, size) return dt.Structure([], [], highChrom, 0)
def partitioned_mds(path1, path2, prefix="", centromere=0, num_partitions=4, maxmemory=32000000, num_threads=3, alpha=4, res_ratio=10, penalty=0.05, weight=0.05): """Partitions structure into substructures and performs MDS""" #create low-res structures lowstructure1 = create_low_res_structure(path1, res_ratio) lowstructure2 = create_low_res_structure(path2, res_ratio) dt.make_compatible((lowstructure1, lowstructure2)) #get partitions n = len(lowstructure1.getPoints()) if centromere == 0: midpoint = int(n / 2) else: midpoint = lowstructure1.chrom.getAbsoluteIndex(centromere) assert num_partitions % 2 == 0 partition_size1 = int(np.ceil(float(midpoint) / (num_partitions / 2))) partition_size2 = int(np.ceil(float(n - midpoint) / (num_partitions / 2))) lowpartitions = [ ] #low substructures, defined on absolute indices not relative indices for i in range(int(num_partitions / 2)): lowpartitions.append( (i * partition_size1, min(((i + 1) * partition_size1), midpoint))) for i in range(int(num_partitions / 2)): lowpartitions.append((midpoint + i * partition_size2, min((midpoint + (i + 1) * partition_size2), n - 1))) lowpartitions = np.array(lowpartitions) low_contactMat1 = dt.matFromBed(path1, lowstructure1) low_contactMat2 = dt.matFromBed(path2, lowstructure2) tad.substructuresFromAbsoluteTads(lowstructure1, lowpartitions) tad.substructuresFromAbsoluteTads(lowstructure2, lowpartitions) #create high-res chroms size1, res1 = dt.basicParamsFromBed(path1) highChrom1 = dt.ChromParameters(lowstructure1.chrom.minPos, lowstructure1.chrom.maxPos, res1, lowstructure1.chrom.name, size1) size2, res2 = dt.basicParamsFromBed(path2) highChrom2 = dt.ChromParameters(lowstructure2.chrom.minPos, lowstructure2.chrom.maxPos, res2, lowstructure2.chrom.name, size2) #initialize high-res substructures high_substructures1 = [] high_substructures2 = [] low_gen_coords = lowstructure1.getGenCoords() offset1 = 0 #initialize offset2 = 0 for partition in lowpartitions: start_gen_coord = low_gen_coords[partition[0]] end_gen_coord = low_gen_coords[partition[1]] high_substructure1 = dt.structureFromBed(path1, highChrom1, start_gen_coord, end_gen_coord, offset1) high_substructure2 = dt.structureFromBed(path2, highChrom2, start_gen_coord, end_gen_coord, offset2) high_substructures1.append(high_substructure1) high_substructures2.append(high_substructure2) offset1 += (len(high_substructure1.points) - 1) #update offset2 += (len(high_substructure2.points) - 1) #update for high_substructure1, high_substructure2 in zip(high_substructures1, high_substructures2): dt.make_points_compatible((high_substructure1, high_substructure2)) highstructure1 = dt.Structure([], high_substructures1, highChrom1, 0) highstructure2 = dt.Structure([], high_substructures2, highChrom2, 0) infer_structures(low_contactMat1, lowstructure1, low_contactMat2, lowstructure2, alpha, penalty, num_threads, weight) print("Low-resolution MDS complete") highSubstructures1 = pymp.shared.list(highstructure1.structures) highSubstructures2 = pymp.shared.list(highstructure2.structures) lowSubstructures1 = pymp.shared.list(lowstructure1.structures) lowSubstructures2 = pymp.shared.list(lowstructure2.structures) numSubstructures = len(highstructure1.structures) num_threads = min( (num_threads, mp.cpu_count(), numSubstructures) ) #don't exceed number of requested threads, available threads, or structures with pymp.Parallel(num_threads) as p: for substructurenum in p.range(numSubstructures): highSubstructure1 = highSubstructures1[substructurenum] highSubstructure2 = highSubstructures2[substructurenum] trueLow1 = lowSubstructures1[substructurenum] trueLow2 = lowSubstructures2[substructurenum] #joint MDS structure_contactMat1 = dt.matFromBed( path1, highSubstructure1) #contact matrix for this structure only structure_contactMat2 = dt.matFromBed( path2, highSubstructure2) #contact matrix for this structure only infer_structures(structure_contactMat1, highSubstructure1, structure_contactMat2, highSubstructure2, 2.5, penalty, num_threads, weight) transform(trueLow1, highSubstructure1, res_ratio) transform(trueLow2, highSubstructure2, res_ratio) highSubstructures1[substructurenum] = highSubstructure1 highSubstructures2[substructurenum] = highSubstructure2 print("MDS performed on structure {} of {}".format( substructurenum + 1, numSubstructures)) highstructure1.setstructures(highSubstructures1) highstructure2.setstructures(highSubstructures2) highstructure1.set_rel_indices() highstructure2.set_rel_indices() return highstructure1, highstructure2
def partitionedMDS(path, lowpath, args): """Partitions cluster into subclusters and performs MDS""" domainSmoothingParameter = args[0] minSizeFraction = args[1] maxmemory = args[2] num_threads = args[3] #create low-res cluster lowCluster = dt.clusterFromBed(lowpath, None, None) #get TADs low_contactMat = dt.matFromBed(lowpath, lowCluster) lowTads = tad.getDomains(low_contactMat, lowCluster, domainSmoothingParameter, minSizeFraction) #low subclusters #create high-res chrom size, res = dt.basicParamsFromBed(path) highChrom = dt.ChromParameters(lowCluster.chrom.minPos, lowCluster.chrom.maxPos, res, lowCluster.chrom.name, size) #create high-res cluster resRatio = lowCluster.chrom.res / highChrom.res highTads = lowTads * resRatio highCluster = dt.clusterFromBed(path, highChrom, highTads) #create compatible subclusters tad.subclustersFromTads(highCluster, lowCluster, lowTads) infer_cluster(low_contactMat, lowCluster, False) print "Low-resolution MDS complete" highSubclusters = pymp.shared.list(highCluster.clusters) lowSubclusters = pymp.shared.list(lowCluster.clusters) numSubclusters = len(highCluster.clusters) num_threads = min( (num_threads, mp.cpu_count(), numSubclusters) ) #don't exceed number of requested threads, available threads, or clusters with pymp.Parallel(num_threads) as p: for subclusternum in p.range(numSubclusters): highSubcluster = highSubclusters[subclusternum] trueLow = lowSubclusters[subclusternum] #perform MDS individually cluster_contactMat = dt.matFromBed( path, highSubcluster) #contact matrix for this cluster only infer_cluster(cluster_contactMat, highSubcluster, False) #approximate as low resolution inferredLow = dt.highToLow(highSubcluster, resRatio) #recover the transformation for inferred from true low cluster r, t, reflect = la.getTransformation(inferredLow, trueLow) t *= resRatio**(2. / 3) #rescale #transform high cluster highSubcluster.transform(r, t, reflect) highSubclusters[subclusternum] = highSubcluster print "MDS performed on cluster {} of {}".format( subclusternum + 1, numSubclusters) highCluster.setClusters(highSubclusters) return highCluster