Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser(
        description='create boxplots of interactions for timecourse' +
        ' at different distances')
    parser.add_argument(
        '-i',
        help='timecourse hdf5 files in order Mock -> Overnight digestion',
        nargs=7)
    args = parser.parse_args()

    # List of timecourse file objects in order
    f_obj_list = []
    for a in args.i:
        f_obj_list.append(h5py.File(a, 'r'))
    # List of chromosome 14 matrices for timecourse data
    # in order
    chr14_list = []
    for o in f_obj_list:
        chr14_list.append(mf.get_cis_matrix(o, 'chr14'))

    if not check_shape(chr14_list):
        print 'ERROR unequal dimensions for cis matrices'
        sys.exit()

    # Max
    z = chr14_list[2]
    max_idx = np.unravel_index(np.argmax(z), z.shape)
    print max_idx
    print z[max_idx]
    quit()

    # Boxplots for all interactions
    data = []
    for c in chr14_list:
        iu1 = np.triu_indices(c.shape[0])
        data.append(c[iu1])
    my_boxplot(data)

    # Mean interaction plots by distance
    plot_mean_interaction_distance(chr14_list)
Esempio n. 2
0
def main():

    parser = argparse.ArgumentParser(
        description='Return hdf5 with z-score cis matrices for input hdf5')
    parser.add_argument('-i',
                        help='input hdf5 Hi-C file',
                        type=str,
                        required=True)
    args = parser.parse_args()

    # Generate z-score hdf5
    if args.i[-5:] == '.hdf5':
        filename_prefix = args.i[:-5]
        zscore_file = filename_prefix + '_zScore.hdf5'
        shutil.copy(args.i, zscore_file)
        f = h5py.File(zscore_file, 'r+')
        obs = f['interactions'][:]
        blocksize = f['interactions'].chunks
        # Loop through chromosomes:
        chroms = f['chrs'][:]
        for chrom in chroms:
            obs_cis = mf.get_cis_matrix(f, chrom)
            z = mf.z_score(obs_cis)
            # Replace observed cis matrix with zscore matrix
            chr_idx, = np.where(chroms == chrom)
            chr_idx = chr_idx[0]
            bins = f['chr_bin_range'][chr_idx]
            obs[bins[0]:bins[1] + 1, bins[0]:bins[1] + 1] = z

        del f['interactions']
        f.create_dataset("interactions",
                         data=obs,
                         dtype='float64',
                         compression='gzip',
                         chunks=blocksize)
        f.close()

    else:
        print 'ERROR: wrong file extension'
        sys.exit()
def main():
    parser = argparse.ArgumentParser(
        description='create mean interactions vs timecourse and color' +
        ' by distance plots')
    parser.add_argument(
        '-i',
        help='timecourse hdf5 files in order Mock -> Overnight digestion',
        nargs=7)
    parser.add_argument(
        '-c',
        help=
        'compartment file (ex. HBHiC-K562-MN-Dp-1__hg19__genome__C-500000-raw_scaleBy_2.72.balanced_scaleBy_51.45__all.zScore.eigen1.sorted.bedGraph)',
        type=str,
        required=True)
    args = parser.parse_args()

    # List of timecourse file objects in order
    f_obj_list = []
    for a in args.i:
        f_obj_list.append(h5py.File(a, 'r'))
    # List of chromosome 14 matrices for timecourse data
    # in order
    chr14_list = []
    for o in f_obj_list:
        chr14_list.append(mf.get_cis_matrix(o, 'chr14'))

    if not check_shape(chr14_list):
        print 'ERROR unequal dimensions for cis matrices'
        sys.exit()

    # Make mean interaction plots by distane for
    # A-A, B-B, A-B
    CF = open(args.c, 'r')
    eigen = get_chrom_eigen(CF, 'chr14')
    plot_mean_interaction_distance_AB(chr14_list, eigen, 'AA')
    plot_mean_interaction_distance_AB(chr14_list, eigen, 'BB')
    plot_mean_interaction_distance_AB(chr14_list, eigen, 'AB')
Esempio n. 4
0
def main():

    parser = argparse.ArgumentParser(
        description='Create half-life matrix from timecourse')
    parser.add_argument(
        '-i',
        help='timecourse hdf5 files in order Mock -> Overnight digestion',
        nargs=7)
    args = parser.parse_args()

    # Create copy of mock and use that to write over
    # with half life data
    shutil.copy(args.i[0], 'half_life_chr14_6Mb.hdf5')
    f = h5py.File('half_life_chr14_6Mb.hdf5', 'r+')

    # List of timecourse file objects in order
    f_obj_list = []
    for a in args.i:
        f_obj_list.append(h5py.File(a, 'r'))

    # List of chromosome 14 matrices for timecourse data
    # in order
    chr14_list = []
    for o in f_obj_list:
        chr14_list.append(mf.get_cis_matrix(o, 'chr14'))

    if not check_shape(chr14_list):
        print 'ERROR unequal dimensions for cis matrices'
        sys.exit()

    time = np.array([5, 60, 120, 180, 240, 960], dtype=float)

    # # 1 MB distance
    # interactions = []
    # for h in chr14_list:
    # 	interactions.append(h[500, 510])
    # # delta interactions
    # delta_intxns = get_delta_intxns(interactions)
    # mid = get_intxns_half(delta_intxns)
    # hl, params = get_half_life(delta_intxns, time, mid)
    # x = np.linspace(-10, 1000, 100, dtype=float)
    # y = exp_decay(x, *params)
    # scatter_plot(delta_intxns, time, x, y, 'test_1MB_distance_delta.png', hl, mid)

    # # 400kb distance
    # interactions = []
    # for h in chr14_list:
    # 	interactions.append(h[500,  504])
    # # delta interactions
    # delta_intxns = get_delta_intxns(interactions)
    # mid = get_intxns_half(delta_intxns)
    # hl, params = get_half_life(delta_intxns, time, mid)
    # x = np.linspace(-10, 1000, 100, dtype=float)
    # y = exp_decay(x, *params)
    # scatter_plot(delta_intxns, time, x, y, 'test_400kb_distance_delta.png', hl, mid)

    chrom = 'chr14'
    binSize = f['bin_positions'][0, 2]
    dist = (6000000 / binSize) / 2
    #obs = f['interactions'][:]
    bin_positions = f['bin_positions'][:]
    num_bins = len(bin_positions)
    chr_idx, = np.where(f['chrs'][:] == chrom)
    chr_idx = chr_idx[0]
    bins = f['chr_bin_range'][chr_idx]
    for i in range(bins[0], bins[1] + 1):
        #for i in range(bins[0] + 1250, bins[1] + 1):
        print 'on row: ' + str(i)
        # 6Mb window
        # Check if row is all nan
        if np.all(np.isnan(f['interactions'][i])) or np.nansum(
                f['interactions'][i]) == 0:
            f['interactions'][i] = np.nan
        # Check if at start of chromosome (upstream range reaches trans)
        elif bin_positions[i, 0] != bin_positions[i - dist, 0]:
            f['interactions'][i] = np.nan
        # Check if at end of chromosome (downstream range reaches trans)
        elif bin_positions[i, 0] != bin_positions[i + (dist - 1), 0]:
            f['interactions'][i] = np.nan
        else:
            for j in range(bins[0], bins[1] + 1):
                # Check if in 6Mb window
                if j >= i - dist and j < i + dist:
                    interactions = []
                    for h in chr14_list:
                        interactions.append(h[i - bins[0], j - bins[0]])
                    if np.any(np.isnan(interactions)) or (interactions[0]
                                                          == 0):
                        f['interactions'][i, j] = np.nan
                    else:
                        delta_intxns = get_delta_intxns(interactions)
                        mid = get_intxns_half(delta_intxns)
                        hl, params = get_half_life(delta_intxns, time, mid)
                        f['interactions'][i, j] = hl
                        f['interactions'][j, i] = hl
                else:
                    f['interactions'][i, j] = np.nan
    f.close()
def main():

	parser = argparse.ArgumentParser(description='Create half-life matrix from timecourse')
	parser.add_argument('-i', help= 'timecourse hdf5 files in order Mock -> Overnight digestion', nargs=7)
	parser.add_argument('-t', help= 'median trans interaction file (eg. median_trans.txt)', type=str, required=True)
	args = parser.parse_args()

	# Create copy of mock and use that to write over
	# with half life data
	shutil.copy(args.i[0], 'half_life_chr14_correct.hdf5')
	f = h5py.File('half_life_chr14_correct.hdf5', 'r+')

	# Get trans interaction data
	trans_dict = {}
	with open(args.t, 'r') as t:
		for line in t:
			splitline = line.split()
			trans_dict[splitline[0]] = float(splitline[1])

	# trans graph
	trans_barplot(args.i, trans_dict)

	# List of timecourse file objects in order
	f_obj_list = []
	for a in args.i:
		f_obj_list.append(h5py.File(a, 'r'))

	# List of chromosome 14 matrices for timecourse data
	# in order
	chr14_list = []
	for o in f_obj_list:
		chr14_list.append(mf.get_cis_matrix(o, 'chr14'))

	if not check_shape(chr14_list):
		print 'ERROR unequal dimensions for cis matrices'
		sys.exit()

	median_trans = []
	for file in args.i:
		median_trans.append(trans_dict[file])
	median_trans = np.array(median_trans)

	chr14_list = subtract_trans(chr14_list, median_trans)

	interactions = []
	time = np.array([0, 5, 60, 120, 180, 240, 960])
	for h in chr14_list:
		interactions.append(h[80, 90])
	
	# polyfit
	z = np.polyfit(time, interactions, 2)
	p = np.poly1d(z)
	xp = np.linspace(-10, 1000, 100)
	hl, pm = get_half_life(interactions, time)

	scatter_plot(interactions, time, xp, p, 'test_10_correct.png', hl, pm)

	interactions = []
	for h in chr14_list:
		interactions.append(h[80,180])

	# polyfit
	z = np.polyfit(time, interactions, 2)
	p = np.poly1d(z)
	xp = np.linspace(-10, 1000, 100)
	hl, pm = get_half_life(interactions, time)
	scatter_plot(interactions, time, xp, p, 'test_100_correct.png', hl, pm)
	print interactions

	interactions = []
	for h in chr14_list:
		interactions.append(h[80,85])

	# polyfit
	z = np.polyfit(time, interactions, 2)
	p = np.poly1d(z)
	xp = np.linspace(-10, 1000, 100)
	hl, pm = get_half_life(interactions, time)
	scatter_plot(interactions, time, xp, p, 'test_5_correct.png', hl, pm)
	print interactions

	interactions = []
	for h in chr14_list:
		interactions.append(h[80,120])

	# polyfit
	z = np.polyfit(time, interactions, 2)
	p = np.poly1d(z)
	xp = np.linspace(-10, 1000, 100)
	hl, pm = get_half_life(interactions, time)
	scatter_plot(interactions, time, xp, p, 'test_40_correct.png', hl, pm)

	hl, pred_mid = get_half_life(interactions, time)
	print '*****hl*****'
	print hl
	print '************'	

	#scatter_plot(interactions, time, xp, p, 'test_10_lines.png', hl, pred_mid)
	chrom = 'chr14'
	chr_idx, = np.where(f['chrs'][:] == chrom)
	chr_idx = chr_idx[0]
	bins = f['chr_bin_range'][chr_idx]
	print bins
	print len(range(bins[0], bins[1] + 1))
	
	print f['interactions'][bins[0] + 80, bins[0] + 90]
	for i in range((bins[1] - bins[0]) + 1):
		print 'on row: ' + str(i)
		for j in range((bins[1] - bins[0]) + 1):
			interactions = []
			for h in chr14_list:
				interactions.append(h[i, j])
			hl, pred_mid = get_half_life(interactions, time)
			f['interactions'][bins[0] + i, bins[0] + j] = hl
	print f['interactions'][:]
	print f['interactions'][bins[0] + 80, bins[0] + 90]

	f.close()
def main():

    parser = argparse.ArgumentParser(
        description='Create half-life matrix from timecourse')
    parser.add_argument(
        '-i',
        help='timecourse hdf5 files in order Mock -> Overnight digestion',
        nargs=7)
    args = parser.parse_args()

    # Create copy of mock and use that to write over
    # with slope data
    shutil.copy(args.i[0], 'slope_chr14_zscore.hdf5')
    f = h5py.File('slope_chr14_zscore.hdf5', 'r+')

    # List of timecourse file objects in order
    f_obj_list = []
    for a in args.i:
        f_obj_list.append(h5py.File(a, 'r'))

    # List of chromosome 14 matrices for timecourse data
    # in order
    chr14_list = []
    for o in f_obj_list:
        chr14_list.append(mf.get_cis_matrix(o, 'chr14'))

    if not check_shape(chr14_list):
        print 'ERROR unequal dimensions for cis matrices'
        sys.exit()
    # Get expected matrices
    chr14_expected = []
    for c in chr14_list:
        chr14_expected.append(mf.expected(c))
    # Write expected matrices to file
    for i, c in enumerate(chr14_expected):
        colnames = map(str, range(c.shape[0]))
        colnames = ['x' + x for x in colnames]
        rownames = list(colnames)
        mf.numpy_matrix_2_dekker(
            c, colnames, rownames,
            'expected/' + args.i[i][:-5] + '_expected.matrix.gz')
    # Get observed-expected matrices
    chr14_obs_exp = []
    for i, c in enumerate(chr14_list):
        oe = c - chr14_expected[i]
        chr14_obs_exp.append(oe)
    # Write observed - expected matrices to file
    for i, c in enumerate(chr14_obs_exp):
        colnames = map(str, range(c.shape[0]))
        colnames = ['x' + x for x in colnames]
        rownames = list(colnames)
        mf.numpy_matrix_2_dekker(
            c, colnames, rownames,
            'obs-exp/' + args.i[i][:-5] + '_obs-exp.matrix.gz')
    # Get z-score matrices
    chr14_zscore = []
    for c in chr14_list:
        chr14_zscore.append(mf.z_score(c))
    # Write z-score matrices to file
    for i, c in enumerate(chr14_zscore):
        colnames = map(str, range(c.shape[0]))
        colnames = ['x' + x for x in colnames]
        rownames = list(colnames)
        mf.numpy_matrix_2_dekker(
            c, colnames, rownames,
            'zscore/' + args.i[i][:-5] + '_zscore.matrix.gz')

    interactions = []
    time = np.array([0, 5, 60, 120, 180, 240, 960])
    for h in chr14_zscore:
        interactions.append(h[80, 90])

    # polyfit
    s = get_slope(interactions, time, 150)
    z = np.polyfit(time, interactions, 2)
    p = np.poly1d(z)
    xp = np.linspace(-10, 1000, 100)
    scatter_plot_slope(interactions, time, xp, p, s, 150, 'test_10_slope.png')

    interactions = []
    for h in chr14_zscore:
        interactions.append(h[80, 180])

    # polyfit
    s = get_slope(interactions, time, 150)
    z = np.polyfit(time, interactions, 2)
    p = np.poly1d(z)
    xp = np.linspace(-10, 1000, 100)
    scatter_plot_slope(interactions, time, xp, p, s, 150, 'test_100_slope.png')
    print interactions

    interactions = []
    for h in chr14_zscore:
        interactions.append(h[80, 85])

    # polyfit
    s = get_slope(interactions, time, 150)
    z = np.polyfit(time, interactions, 2)
    p = np.poly1d(z)
    xp = np.linspace(-10, 1000, 100)
    scatter_plot_slope(interactions, time, xp, p, s, 150, 'test_5_slope.png')
    print interactions

    interactions = []
    for h in chr14_zscore:
        interactions.append(h[80, 120])

    # polyfit
    s = get_slope(interactions, time, 150)
    z = np.polyfit(time, interactions, 2)
    p = np.poly1d(z)
    xp = np.linspace(-10, 1000, 100)
    scatter_plot_slope(interactions, time, xp, p, s, 150, 'test_40_slope.png')

    quit()

    chrom = 'chr14'
    chr_idx, = np.where(f['chrs'][:] == chrom)
    chr_idx = chr_idx[0]
    bins = f['chr_bin_range'][chr_idx]
    print bins
    print len(range(bins[0], bins[1] + 1))

    print f['interactions'][bins[0] + 80, bins[0] + 90]
    for i in range((bins[1] - bins[0]) + 1):
        print 'on row: ' + str(i)
        for j in range((bins[1] - bins[0]) + 1):
            interactions = []
            for h in chr14_zscore:
                interactions.append(h[i, j])
            s = get_slope(interactions, time, 150)
            if s < 1e-15 and s > -1e-15:
                s = 0.0
            f['interactions'][bins[0] + i, bins[0] + j] = s
    print f['interactions'][:]
    print f['interactions'][bins[0] + 80, bins[0] + 90]

    f.close()