Ejemplo n.º 1
0
def partitioned_mds(path1,
                    path2,
                    prefix="",
                    centromere=0,
                    num_partitions=4,
                    maxmemory=32000000,
                    num_threads=3,
                    alpha=4,
                    res_ratio=10,
                    penalty=0.05,
                    weight=0.05):
    """Partitions structure into substructures and performs MDS"""
    #create low-res structures
    lowstructure1 = create_low_res_structure(path1, res_ratio)
    lowstructure2 = create_low_res_structure(path2, res_ratio)
    dt.make_compatible((lowstructure1, lowstructure2))

    #get partitions
    n = len(lowstructure1.getPoints())
    if centromere == 0:
        midpoint = int(n / 2)
    else:
        midpoint = lowstructure1.chrom.getAbsoluteIndex(centromere)

    assert num_partitions % 2 == 0

    partition_size1 = int(np.ceil(float(midpoint) / (num_partitions / 2)))
    partition_size2 = int(np.ceil(float(n - midpoint) / (num_partitions / 2)))

    lowpartitions = [
    ]  #low substructures, defined on absolute indices not relative indices

    for i in range(int(num_partitions / 2)):
        lowpartitions.append(
            (i * partition_size1, min(((i + 1) * partition_size1), midpoint)))

    for i in range(int(num_partitions / 2)):
        lowpartitions.append((midpoint + i * partition_size2,
                              min((midpoint + (i + 1) * partition_size2),
                                  n - 1)))

    lowpartitions = np.array(lowpartitions)

    low_contactMat1 = dt.matFromBed(path1, lowstructure1)
    low_contactMat2 = dt.matFromBed(path2, lowstructure2)

    tad.substructuresFromAbsoluteTads(lowstructure1, lowpartitions)
    tad.substructuresFromAbsoluteTads(lowstructure2, lowpartitions)

    #create high-res chroms
    size1, res1 = dt.basicParamsFromBed(path1)
    highChrom1 = dt.ChromParameters(lowstructure1.chrom.minPos,
                                    lowstructure1.chrom.maxPos, res1,
                                    lowstructure1.chrom.name, size1)
    size2, res2 = dt.basicParamsFromBed(path2)
    highChrom2 = dt.ChromParameters(lowstructure2.chrom.minPos,
                                    lowstructure2.chrom.maxPos, res2,
                                    lowstructure2.chrom.name, size2)

    #initialize high-res substructures
    high_substructures1 = []
    high_substructures2 = []
    low_gen_coords = lowstructure1.getGenCoords()
    offset1 = 0  #initialize
    offset2 = 0
    for partition in lowpartitions:
        start_gen_coord = low_gen_coords[partition[0]]
        end_gen_coord = low_gen_coords[partition[1]]
        high_substructure1 = dt.structureFromBed(path1, highChrom1,
                                                 start_gen_coord,
                                                 end_gen_coord, offset1)
        high_substructure2 = dt.structureFromBed(path2, highChrom2,
                                                 start_gen_coord,
                                                 end_gen_coord, offset2)
        high_substructures1.append(high_substructure1)
        high_substructures2.append(high_substructure2)
        offset1 += (len(high_substructure1.points) - 1)  #update
        offset2 += (len(high_substructure2.points) - 1)  #update

    for high_substructure1, high_substructure2 in zip(high_substructures1,
                                                      high_substructures2):
        dt.make_points_compatible((high_substructure1, high_substructure2))

    highstructure1 = dt.Structure([], high_substructures1, highChrom1, 0)
    highstructure2 = dt.Structure([], high_substructures2, highChrom2, 0)

    infer_structures(low_contactMat1, lowstructure1, low_contactMat2,
                     lowstructure2, alpha, penalty, num_threads, weight)
    print("Low-resolution MDS complete")

    highSubstructures1 = pymp.shared.list(highstructure1.structures)
    highSubstructures2 = pymp.shared.list(highstructure2.structures)
    lowSubstructures1 = pymp.shared.list(lowstructure1.structures)
    lowSubstructures2 = pymp.shared.list(lowstructure2.structures)

    numSubstructures = len(highstructure1.structures)
    num_threads = min(
        (num_threads, mp.cpu_count(), numSubstructures)
    )  #don't exceed number of requested threads, available threads, or structures
    with pymp.Parallel(num_threads) as p:
        for substructurenum in p.range(numSubstructures):
            highSubstructure1 = highSubstructures1[substructurenum]
            highSubstructure2 = highSubstructures2[substructurenum]
            trueLow1 = lowSubstructures1[substructurenum]
            trueLow2 = lowSubstructures2[substructurenum]

            #joint MDS
            structure_contactMat1 = dt.matFromBed(
                path1,
                highSubstructure1)  #contact matrix for this structure only
            structure_contactMat2 = dt.matFromBed(
                path2,
                highSubstructure2)  #contact matrix for this structure only

            infer_structures(structure_contactMat1, highSubstructure1,
                             structure_contactMat2, highSubstructure2, 2.5,
                             penalty, num_threads, weight)

            transform(trueLow1, highSubstructure1, res_ratio)
            transform(trueLow2, highSubstructure2, res_ratio)

            highSubstructures1[substructurenum] = highSubstructure1
            highSubstructures2[substructurenum] = highSubstructure2

            print("MDS performed on structure {} of {}".format(
                substructurenum + 1, numSubstructures))

    highstructure1.setstructures(highSubstructures1)
    highstructure2.setstructures(highSubstructures2)

    highstructure1.set_rel_indices()
    highstructure2.set_rel_indices()

    return highstructure1, highstructure2
Ejemplo n.º 2
0
def partitionedMDS(path, args):
    """Partitions structure into substructures and performs MDS"""
    domainSmoothingParameter = args[0]
    minSizeFraction = args[1]
    maxmemory = args[2]
    num_threads = args[3]
    alpha = args[4]
    res_ratio = args[5]
    alpha2 = args[6]

    #create low-res structure
    low_chrom = dt.chromFromBed(path)
    low_chrom.res *= res_ratio
    lowstructure = dt.structureFromBed(path, low_chrom)  #low global structure

    #get TADs
    low_contactMat = dt.matFromBed(path, lowstructure)
    low_tad_indices = tad.getDomains(
        low_contactMat, lowstructure, domainSmoothingParameter, minSizeFraction
    )  #low substructures, defined on relative indices not absolute indices
    tad.substructuresFromTads(lowstructure, low_tad_indices)

    #create high-res chrom
    size, res = dt.basicParamsFromBed(path)
    highChrom = dt.ChromParameters(lowstructure.chrom.minPos,
                                   lowstructure.chrom.maxPos, res,
                                   lowstructure.chrom.name, size)

    highstructure = dt.Structure([], [], highChrom, 0)
    high_substructures = []

    low_gen_coords = lowstructure.getGenCoords()
    offset = 0  #initialize
    for td in low_tad_indices:
        start_gen_coord = low_gen_coords[td[0]]
        end_gen_coord = low_gen_coords[td[1]]
        high_substructure = dt.structureFromBed(path, highChrom,
                                                start_gen_coord, end_gen_coord,
                                                offset)
        high_substructures.append(high_substructure)
        offset += len(high_substructure.points)  #update
        offset -= 1

    highstructure.setstructures(high_substructures)

    infer_structure(low_contactMat, lowstructure, alpha, num_threads)
    print "Low-resolution MDS complete"

    highSubstructures = pymp.shared.list(highstructure.structures)
    lowSubstructures = pymp.shared.list(lowstructure.structures)

    numSubstructures = len(highstructure.structures)
    num_threads = min(
        (num_threads, mp.cpu_count(), numSubstructures)
    )  #don't exceed number of requested threads, available threads, or structures
    with pymp.Parallel(num_threads) as p:
        for substructurenum in p.range(numSubstructures):
            highSubstructure = highSubstructures[substructurenum]
            if len(highSubstructure.getPoints()) > 0:  #skip empty
                trueLow = lowSubstructures[substructurenum]

                #perform MDS individually
                structure_contactMat = dt.matFromBed(
                    path,
                    highSubstructure)  #contact matrix for this structure only
                infer_structure(structure_contactMat, highSubstructure, alpha2,
                                num_threads)

                #approximate as low resolution
                inferredLow = dt.highToLow(highSubstructure, res_ratio)

                #rescale
                scaling_factor = la.radius_of_gyration(
                    trueLow) / la.radius_of_gyration(inferredLow)
                for i, point in enumerate(inferredLow.points):
                    if point != 0:
                        x, y, z = point.pos
                        inferredLow.points[i].pos = (x * scaling_factor,
                                                     y * scaling_factor,
                                                     z * scaling_factor)

                #recover the transformation for inferred from true low structure
                r, t = la.getTransformation(inferredLow, trueLow)
                t /= scaling_factor

                #transform high structure
                highSubstructure.transform(r, t)
                highSubstructures[substructurenum] = highSubstructure

                print "MDS performed on structure {} of {}".format(
                    substructurenum + 1, numSubstructures)

    highstructure.setstructures(highSubstructures)

    return highstructure
Ejemplo n.º 3
0
def create_high_res_structure(path, lowstructure):
    size, res = dt.basicParamsFromBed(path)
    highChrom = dt.ChromParameters(lowstructure.chrom.minPos,
                                   lowstructure.chrom.maxPos, res,
                                   lowstructure.chrom.name, size)
    return dt.Structure([], [], highChrom, 0)