コード例 #1
0
ファイル: minimds.py プロジェクト: RuiqinZheng/miniMDS
def partitionedMDS(path, args):
    """Partitions structure into substructures and performs MDS"""
    domainSmoothingParameter = args[0]
    minSizeFraction = args[1]
    maxmemory = args[2]
    num_threads = args[3]
    alpha = args[4]
    res_ratio = args[5]
    alpha2 = args[6]

    #create low-res structure
    low_chrom = dt.chromFromBed(path)
    low_chrom.res *= res_ratio
    lowstructure = dt.structureFromBed(path, low_chrom)  #low global structure

    #get TADs
    low_contactMat = dt.matFromBed(path, lowstructure)
    low_tad_indices = tad.getDomains(
        low_contactMat, lowstructure, domainSmoothingParameter, minSizeFraction
    )  #low substructures, defined on relative indices not absolute indices
    tad.substructuresFromTads(lowstructure, low_tad_indices)

    #create high-res chrom
    size, res = dt.basicParamsFromBed(path)
    highChrom = dt.ChromParameters(lowstructure.chrom.minPos,
                                   lowstructure.chrom.maxPos, res,
                                   lowstructure.chrom.name, size)

    highstructure = dt.Structure([], [], highChrom, 0)
    high_substructures = []

    low_gen_coords = lowstructure.getGenCoords()
    offset = 0  #initialize
    for td in low_tad_indices:
        start_gen_coord = low_gen_coords[td[0]]
        end_gen_coord = low_gen_coords[td[1]]
        high_substructure = dt.structureFromBed(path, highChrom,
                                                start_gen_coord, end_gen_coord,
                                                offset)
        high_substructures.append(high_substructure)
        offset += len(high_substructure.points)  #update
        offset -= 1

    highstructure.setstructures(high_substructures)

    infer_structure(low_contactMat, lowstructure, alpha, num_threads)
    print "Low-resolution MDS complete"

    highSubstructures = pymp.shared.list(highstructure.structures)
    lowSubstructures = pymp.shared.list(lowstructure.structures)

    numSubstructures = len(highstructure.structures)
    num_threads = min(
        (num_threads, mp.cpu_count(), numSubstructures)
    )  #don't exceed number of requested threads, available threads, or structures
    with pymp.Parallel(num_threads) as p:
        for substructurenum in p.range(numSubstructures):
            highSubstructure = highSubstructures[substructurenum]
            if len(highSubstructure.getPoints()) > 0:  #skip empty
                trueLow = lowSubstructures[substructurenum]

                #perform MDS individually
                structure_contactMat = dt.matFromBed(
                    path,
                    highSubstructure)  #contact matrix for this structure only
                infer_structure(structure_contactMat, highSubstructure, alpha2,
                                num_threads)

                #approximate as low resolution
                inferredLow = dt.highToLow(highSubstructure, res_ratio)

                #rescale
                scaling_factor = la.radius_of_gyration(
                    trueLow) / la.radius_of_gyration(inferredLow)
                for i, point in enumerate(inferredLow.points):
                    if point != 0:
                        x, y, z = point.pos
                        inferredLow.points[i].pos = (x * scaling_factor,
                                                     y * scaling_factor,
                                                     z * scaling_factor)

                #recover the transformation for inferred from true low structure
                r, t = la.getTransformation(inferredLow, trueLow)
                t /= scaling_factor

                #transform high structure
                highSubstructure.transform(r, t)
                highSubstructures[substructurenum] = highSubstructure

                print "MDS performed on structure {} of {}".format(
                    substructurenum + 1, numSubstructures)

    highstructure.setstructures(highSubstructures)

    return highstructure
コード例 #2
0
ファイル: fig1.py プロジェクト: Nanguage/miniMDS
    infile.close()
    return mat


def plotDixon(mat):
    tads = [[0, 8], [8, 38], [38, 52], [52, 78], [78, 97], [97, 115],
            [115, 127]]
    outpath = "Fig1A"
    hm.heatMapFromMat(mat, 100, tads, outpath)


def plotMovingAverage(mat):
    smoothingFactor = 5
    outpath = "Fig1B"
    tads = tad.getDomains(mat, smoothingFactor, 0)
    hm.heatMapFromMat(mat, 100, tads, outpath)


minPos = 49000000  #from Dixon
maxPos = 54066692  #from Dixon
res = 40000  #from Dixon
name = "chr22"
size = 30949158
path = "mESC_chr6.tsv"

chrom = dt.ChromParameters(minPos, maxPos, res, name, size)

mat = matFromDixon(path, chrom)
plotDixon(mat)
plotMovingAverage(mat)
コード例 #3
0
def create_high_res_structure(path, lowstructure):
    size, res = dt.basicParamsFromBed(path)
    highChrom = dt.ChromParameters(lowstructure.chrom.minPos,
                                   lowstructure.chrom.maxPos, res,
                                   lowstructure.chrom.name, size)
    return dt.Structure([], [], highChrom, 0)
コード例 #4
0
def partitioned_mds(path1,
                    path2,
                    prefix="",
                    centromere=0,
                    num_partitions=4,
                    maxmemory=32000000,
                    num_threads=3,
                    alpha=4,
                    res_ratio=10,
                    penalty=0.05,
                    weight=0.05):
    """Partitions structure into substructures and performs MDS"""
    #create low-res structures
    lowstructure1 = create_low_res_structure(path1, res_ratio)
    lowstructure2 = create_low_res_structure(path2, res_ratio)
    dt.make_compatible((lowstructure1, lowstructure2))

    #get partitions
    n = len(lowstructure1.getPoints())
    if centromere == 0:
        midpoint = int(n / 2)
    else:
        midpoint = lowstructure1.chrom.getAbsoluteIndex(centromere)

    assert num_partitions % 2 == 0

    partition_size1 = int(np.ceil(float(midpoint) / (num_partitions / 2)))
    partition_size2 = int(np.ceil(float(n - midpoint) / (num_partitions / 2)))

    lowpartitions = [
    ]  #low substructures, defined on absolute indices not relative indices

    for i in range(int(num_partitions / 2)):
        lowpartitions.append(
            (i * partition_size1, min(((i + 1) * partition_size1), midpoint)))

    for i in range(int(num_partitions / 2)):
        lowpartitions.append((midpoint + i * partition_size2,
                              min((midpoint + (i + 1) * partition_size2),
                                  n - 1)))

    lowpartitions = np.array(lowpartitions)

    low_contactMat1 = dt.matFromBed(path1, lowstructure1)
    low_contactMat2 = dt.matFromBed(path2, lowstructure2)

    tad.substructuresFromAbsoluteTads(lowstructure1, lowpartitions)
    tad.substructuresFromAbsoluteTads(lowstructure2, lowpartitions)

    #create high-res chroms
    size1, res1 = dt.basicParamsFromBed(path1)
    highChrom1 = dt.ChromParameters(lowstructure1.chrom.minPos,
                                    lowstructure1.chrom.maxPos, res1,
                                    lowstructure1.chrom.name, size1)
    size2, res2 = dt.basicParamsFromBed(path2)
    highChrom2 = dt.ChromParameters(lowstructure2.chrom.minPos,
                                    lowstructure2.chrom.maxPos, res2,
                                    lowstructure2.chrom.name, size2)

    #initialize high-res substructures
    high_substructures1 = []
    high_substructures2 = []
    low_gen_coords = lowstructure1.getGenCoords()
    offset1 = 0  #initialize
    offset2 = 0
    for partition in lowpartitions:
        start_gen_coord = low_gen_coords[partition[0]]
        end_gen_coord = low_gen_coords[partition[1]]
        high_substructure1 = dt.structureFromBed(path1, highChrom1,
                                                 start_gen_coord,
                                                 end_gen_coord, offset1)
        high_substructure2 = dt.structureFromBed(path2, highChrom2,
                                                 start_gen_coord,
                                                 end_gen_coord, offset2)
        high_substructures1.append(high_substructure1)
        high_substructures2.append(high_substructure2)
        offset1 += (len(high_substructure1.points) - 1)  #update
        offset2 += (len(high_substructure2.points) - 1)  #update

    for high_substructure1, high_substructure2 in zip(high_substructures1,
                                                      high_substructures2):
        dt.make_points_compatible((high_substructure1, high_substructure2))

    highstructure1 = dt.Structure([], high_substructures1, highChrom1, 0)
    highstructure2 = dt.Structure([], high_substructures2, highChrom2, 0)

    infer_structures(low_contactMat1, lowstructure1, low_contactMat2,
                     lowstructure2, alpha, penalty, num_threads, weight)
    print("Low-resolution MDS complete")

    highSubstructures1 = pymp.shared.list(highstructure1.structures)
    highSubstructures2 = pymp.shared.list(highstructure2.structures)
    lowSubstructures1 = pymp.shared.list(lowstructure1.structures)
    lowSubstructures2 = pymp.shared.list(lowstructure2.structures)

    numSubstructures = len(highstructure1.structures)
    num_threads = min(
        (num_threads, mp.cpu_count(), numSubstructures)
    )  #don't exceed number of requested threads, available threads, or structures
    with pymp.Parallel(num_threads) as p:
        for substructurenum in p.range(numSubstructures):
            highSubstructure1 = highSubstructures1[substructurenum]
            highSubstructure2 = highSubstructures2[substructurenum]
            trueLow1 = lowSubstructures1[substructurenum]
            trueLow2 = lowSubstructures2[substructurenum]

            #joint MDS
            structure_contactMat1 = dt.matFromBed(
                path1,
                highSubstructure1)  #contact matrix for this structure only
            structure_contactMat2 = dt.matFromBed(
                path2,
                highSubstructure2)  #contact matrix for this structure only

            infer_structures(structure_contactMat1, highSubstructure1,
                             structure_contactMat2, highSubstructure2, 2.5,
                             penalty, num_threads, weight)

            transform(trueLow1, highSubstructure1, res_ratio)
            transform(trueLow2, highSubstructure2, res_ratio)

            highSubstructures1[substructurenum] = highSubstructure1
            highSubstructures2[substructurenum] = highSubstructure2

            print("MDS performed on structure {} of {}".format(
                substructurenum + 1, numSubstructures))

    highstructure1.setstructures(highSubstructures1)
    highstructure2.setstructures(highSubstructures2)

    highstructure1.set_rel_indices()
    highstructure2.set_rel_indices()

    return highstructure1, highstructure2
コード例 #5
0
ファイル: minimds.py プロジェクト: grmwld/miniMDS
def partitionedMDS(path, lowpath, args):
    """Partitions cluster into subclusters and performs MDS"""
    domainSmoothingParameter = args[0]
    minSizeFraction = args[1]
    maxmemory = args[2]
    num_threads = args[3]

    #create low-res cluster
    lowCluster = dt.clusterFromBed(lowpath, None, None)

    #get TADs
    low_contactMat = dt.matFromBed(lowpath, lowCluster)
    lowTads = tad.getDomains(low_contactMat, lowCluster,
                             domainSmoothingParameter,
                             minSizeFraction)  #low subclusters

    #create high-res chrom
    size, res = dt.basicParamsFromBed(path)
    highChrom = dt.ChromParameters(lowCluster.chrom.minPos,
                                   lowCluster.chrom.maxPos, res,
                                   lowCluster.chrom.name, size)

    #create high-res cluster
    resRatio = lowCluster.chrom.res / highChrom.res
    highTads = lowTads * resRatio
    highCluster = dt.clusterFromBed(path, highChrom, highTads)

    #create compatible subclusters
    tad.subclustersFromTads(highCluster, lowCluster, lowTads)

    infer_cluster(low_contactMat, lowCluster, False)
    print "Low-resolution MDS complete"

    highSubclusters = pymp.shared.list(highCluster.clusters)
    lowSubclusters = pymp.shared.list(lowCluster.clusters)

    numSubclusters = len(highCluster.clusters)
    num_threads = min(
        (num_threads, mp.cpu_count(), numSubclusters)
    )  #don't exceed number of requested threads, available threads, or clusters
    with pymp.Parallel(num_threads) as p:
        for subclusternum in p.range(numSubclusters):
            highSubcluster = highSubclusters[subclusternum]
            trueLow = lowSubclusters[subclusternum]

            #perform MDS individually
            cluster_contactMat = dt.matFromBed(
                path, highSubcluster)  #contact matrix for this cluster only
            infer_cluster(cluster_contactMat, highSubcluster, False)

            #approximate as low resolution
            inferredLow = dt.highToLow(highSubcluster, resRatio)

            #recover the transformation for inferred from true low cluster
            r, t, reflect = la.getTransformation(inferredLow, trueLow)
            t *= resRatio**(2. / 3)  #rescale

            #transform high cluster
            highSubcluster.transform(r, t, reflect)
            highSubclusters[subclusternum] = highSubcluster

            print "MDS performed on cluster {} of {}".format(
                subclusternum + 1, numSubclusters)

    highCluster.setClusters(highSubclusters)

    return highCluster