Beispiel #1
0
def distsFromCoords(coords):
    """Creates distance matrix from 3D coords"""
    n = len(coords)
    distMat = np.zeros((n, n))
    for i in range(n):
        for j in range(i):
            distMat[i, j] = la.calcDistance(coords[i], coords[j])
    return distMat
Beispiel #2
0
 def distMat(self):
     """Creates distance matrix from cluster"""
     points = self.getPoints()
     numPoints = len(points)
     mat = np.zeros((numPoints, numPoints))
     for i in range(numPoints):
         for j in range(i):
             mat[i, j] = la.calcDistance(points[i].pos, points[j].pos)
     return mat
Beispiel #3
0
def distsFromCoords(coords):
	"""Creates distance matrix from 3D coords"""
	n = len(coords)
	distMat = np.zeros((n,n))
	for i in range(n):
		for j in range(i):
			distMat[i,j] = la.calcDistance(coords[i], coords[j])
			if distMat[i,j] == 0:
				print "Error. Duplicate coordinates."
				print coords[i]
				print coords[j]
				sys.exit(0)
	return distMat
Beispiel #4
0
def error(dists, coords):
    assert len(dists) == len(coords)
    n = len(dists)

    sse = 0
    count = 0
    for i in range(n):
        for j in range(i):
            embedded_dist = la.calcDistance(coords[i], coords[j])
            sse += (embedded_dist - dists[i, j])**2
            count += 1
    mse = sse / count
    rmse = mse**(1. / 2)
    return rmse
Beispiel #5
0
def calculateRadius(structures):
	"""Calculate to-scale radius based on Kuhn length and diameter of chromatin"""
	conversionFactors = np.zeros(len(structures))
	for j, structure in enumerate(structures):
		totDist = 0
		coords = structure.getCoords()
		n = len(coords)
		for i in range(1, n):
			totDist += la.calcDistance(coords[i-1], coords[i])
		avgDist = totDist/(n-1)		#average distance between neighboring loci
		physicalDist = kl * (structure.chrom.res/bpPerKL)**(1./2)		#physical distance between neighboring loci (nm)
		conversionFactors[j] = avgDist/physicalDist
	conversionFactor = np.mean(conversionFactors)
	return chromatinDiameter/2 * conversionFactor
Beispiel #6
0
def calculateRadius(coords, res):
	"""Calculate to-scale radius based on Kuhn length and diameter of chromatin"""
	#from Rippe (2001)
	kl = 289	#Kuhn length (nm)
	bpPerKL = 30000.	#base pairs per Kuhn length 
	chromatinDiameter = 30	#diameter of heterochromatin (nm)

	totDist = 0
	count = 0
	n = len(coords)
	for i in range(1, n):
		totDist += la.calcDistance(coords[i-1], coords[i])
		count += 1
	avgDist = totDist/count		#average distance between neighboring loci
	physicalDist = kl * (res/bpPerKL)**(1./2)		#physical distance between neighboring loci (nm)
	conversionFactor = avgDist/physicalDist
	return chromatinDiameter/2 * conversionFactor
Beispiel #7
0
def rmsd(cluster1, cluster2):
    """Root mean square distance"""
    assert cluster1.chrom.res == cluster2.chrom.res
    res = cluster1.chrom.res
    assert cluster1.chrom.minPos / res == cluster2.chrom.minPos / res  #indexing must be same

    intersection = [
        num for num in cluster1.getPointNums()
        if num in cluster2.getPointNums()
    ]

    dist_sum = 0
    for num in intersection:
        point1 = cluster1.points[num - cluster1.offset]
        point2 = cluster2.points[num - cluster2.offset]
        dist_sum += la.calcDistance(point1.pos, point2.pos)**2
    msd = dist_sum / len(intersection)  #mean square distance
    return msd**(1. / 2)  #root mean square distance
Beispiel #8
0
def calculateRadius(clusters):
    """Calculate to-scale radius based on Kuhn length and diameter of chromatin"""
    conversionFactors = np.zeros(len(clusters))
    clusterNum = 0
    for cluster in clusters:
        totDist = 0
        count = 0
        coords = cluster.getCoords()
        n = len(coords)
        for i in range(1, n):
            totDist += la.calcDistance(coords[i - 1], coords[i])
            count += 1
        avgDist = totDist / count  #average distance between neighboring loci
        physicalDist = kl * (cluster.chrom.res / bpPerKL)**(
            1. / 2)  #physical distance between neighboring loci (nm)
        conversionFactors[clusterNum] = avgDist / physicalDist
        clusterNum += 1
    conversionFactor = np.mean(conversionFactors)
    return chromatinDiameter / 2 * conversionFactor
Beispiel #9
0
        print(i)
        os.system("python ../multimds.py -P {} --full {} {}".format(
            p, path1, path2))

        structure1 = dt.structure_from_file("{}_structure.tsv".format(
            os.path.basename(prefix1)))
        structure2 = dt.structure_from_file("{}_structure.tsv".format(
            os.path.basename(prefix2)))

        if p == 0:
            r, t = la.getTransformation(structure1, structure2)
            structure1.transform(r, t)

        all_changes.append(
            np.array([
                la.calcDistance(coord1, coord2) for coord1, coord2 in zip(
                    structure1.getCoords(), structure2.getCoords())
            ]))

    r_sq = []
    for i in range(n):
        for j in range(i):
            r, p = st.pearsonr(all_changes[i], all_changes[j])
            r_sq.append(r**2)

    all_r_sq.append(r_sq)

ys = all_r_sq

#start with a frameless plot (extra room on the left)
plt.subplot2grid((10, 10), (0, 0), 9, 10, frameon=False)
Beispiel #10
0
from matplotlib import pyplot as plt
import numpy as np
import compartment_analysis as ca
from scipy import stats as st

cell_type1 = sys.argv[1]
cell_type2 = sys.argv[2]
res_kb = int(sys.argv[3])

struct1 = dt.structure_from_file("{}_21_{}kb_structure.tsv".format(
    cell_type1, res_kb))
struct2 = dt.structure_from_file("{}_21_{}kb_structure.tsv".format(
    cell_type2, res_kb))
gen_coords = np.array(struct1.getGenCoords())
dists = np.array([
    la.calcDistance(coord1, coord2)
    for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords())
])

mat1 = dt.matFromBed("hic_data/{}_21_{}kb.bed".format(cell_type1, res_kb),
                     struct1)
comps1 = ca.get_compartments(mat1, struct1)
mat2 = dt.matFromBed("hic_data/{}_21_{}kb.bed".format(cell_type2, res_kb),
                     struct2)
comps2 = ca.get_compartments(mat2, struct2)

r, p = st.pearsonr(comps1, comps2)
if r < 0:
    comps1 = -comps1

comp_diffs = np.abs(comps1 - comps2)
Beispiel #11
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Jointly reconstruct 3D coordinates from two normalized intrachromosomal Hi-C BED files."
    )
    parser.add_argument("path1",
                        help="path to first intrachromosomal Hi-C BED file")
    parser.add_argument("path2",
                        help="path to second intrachromosomal Hi-C BED file")
    parser.add_argument("--partitioned",
                        action="store_true",
                        help="use partitioned MDS (default: full MDS)")
    parser.add_argument("-l",
                        type=int,
                        help="low resolution/high resolution",
                        default=10)
    parser.add_argument("-o", help="output file prefix")
    parser.add_argument("-r",
                        default=32000000,
                        help="maximum RAM to use (in kb)")
    parser.add_argument("-n", type=int, default=3, help="number of threads")
    parser.add_argument(
        "-a",
        type=float,
        default=4,
        help=
        "alpha factor for converting contact frequencies to physical distances"
    )
    parser.add_argument("-P",
                        type=float,
                        default=0.05,
                        help="joint MDS penalty")
    parser.add_argument("-m",
                        type=int,
                        default=0,
                        help="midpoint (usually centromere) for partitioning")
    parser.add_argument("-N", type=int, default=2, help="number of partitions")
    parser.add_argument("-w",
                        type=float,
                        default=0.05,
                        help="weight of distance decay prior")
    args = parser.parse_args()

    if args.partitioned:
        #TODO: cleanup
        params = (args.m, args.N, args.r, args.n, args.a, args.l, args.P,
                  args.w)
        names = ("Midpoint", "Number of partitions", "Maximum memory",
                 "Number of threads", "Alpha", "Resolution ratio", "Penalty",
                 "Weight")
        intervals = ((None, None), (1, None), (0, None), (0, None), (1, None),
                     (1, None), (0, None), (0, 1))
        if not tools.args_are_valid(params, names, intervals):
            sys.exit(0)

        structure1, structure2 = partitionedMDS(args.path1, args.path2, params)

    else:
        structure1, structure2 = fullMDS(args.path1, args.path2, args.a,
                                         args.P, args.n, args.w)

    if args.o:
        prefix = args.o
    else:
        prefix = ""

    #print("structure 1")
    #for i in range(len(structure1.points)):
    #	if structure1.points[i] != 0:
    #		print(structure1.points[i].relative_index)
    #print("structure 2")
    #for i in range(len(structure2.points)):
    #	if structure2.points[i] != 0:
    #		print(structure2.points[i].relative_index)

    prefix1 = os.path.splitext(os.path.basename(args.path1))[0]
    structure1.write("{}{}_structure.tsv".format(prefix, prefix1))
    prefix2 = os.path.splitext(os.path.basename(args.path2))[0]
    structure2.write("{}{}_structure.tsv".format(prefix, prefix2))

    coords1 = np.array(structure1.getCoords())
    coords2 = np.array(structure2.getCoords())
    dists = [
        la.calcDistance(coord1, coord2)
        for coord1, coord2 in zip(coords1, coords2)
    ]
    np.savetxt("{}{}_{}_relocalization.bed".format(prefix, prefix1, prefix2),
               dists)

    print("Fractional compartment change: ")
    print(
        calculate_compartment_fraction(structure1, structure2, args.path1,
                                       args.path2))
Beispiel #12
0
chrom_num = sys.argv[2]
gene_loc = int(sys.argv[3])
prefix1 = sys.argv[4]
prefix2 = sys.argv[5]
res_kb = 32

max_dists = []
max_gencoords = []

plt.subplot2grid((10,10), (0,0), 9, 10, frameon=False)
for strain in ("Scer", "Suva"):
	chrom_name = "{}_{}".format(strain, chrom_num)
	os.system("python ~/git/multimds/multimds.py --full -P 0.1 -w 0 {}_{}_{}kb.bed {}_{}_{}kb.bed".format(prefix1, chrom_name, res_kb, prefix2, chrom_name, res_kb))
	struct1 = dt.structure_from_file("{}_{}_{}kb_structure.tsv".format(prefix1, chrom_name, res_kb))
	struct2 = dt.structure_from_file("{}_{}_{}kb_structure.tsv".format(prefix2, chrom_name, res_kb))
	dists = [la.calcDistance(coord1, coord2) for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords())]
	max_dists.append(max(dists))
	max_gencoords.append(max(struct1.getGenCoords()))
	plt.plot(struct1.getGenCoords(), dists, label=strain, lw=4)

x_int_size = 200000
ys = dists
y_int_size = 0.01
x_start = -x_int_size/4.
x_end = max(max_gencoords) + x_int_size/5.
y_start = -y_int_size/5.
y_end = max(max_dists) + y_int_size/5.

plt.title("chr{}".format(chrom_num), fontsize=14)
plt.xlabel("Genomic coordinate", fontsize=14)
plt.ylabel("Relocalization", fontsize=14)