def main(): args = getArgs() samples = args.i output = args.o chr = args.c ncpu = args.p resolution = args.r species = args.s gbuild = args.b # initiate a chromosome object that will store all Hi-C data and analysis my_chrom = Chromosome( name=chr, # 染色体名 centromere_search=True, # centromereを検出するか species=species, assembly=gbuild # genome build ) for sample in samples: label, path = sample.split(",") print(label) print(path) getHiCData(my_chrom, output, label, path, resolution, ncpu) # if not os.path.exists('tdb'): # os.makedirs("tdb") my_chrom.save_chromosome(output + ".tdb", force=True)
def test_05_save_load(self): test_chr = Chromosome(name='Test Chromosome', experiment_tads=[exp1, exp2], experiment_names=['exp1', 'exp2'], experiment_resolutions=[20000,20000]) test_chr.save_chromosome('lolo', force=True) test_chr = load_chromosome('lolo') system('rm -f lolo') system('rm -f lolo_hic')
def test_05_save_load(self): if CHKTIME: t0 = time() test_chr1 = Chromosome(name='Test Chromosome', experiment_tads=[exp1, exp2], experiment_names=['exp1', 'exp2'], experiment_resolutions=[20000, 20000], silent=True) test_chr1.save_chromosome('lolo', force=True) test_chr2 = load_chromosome('lolo') system('rm -f lolo') system('rm -f lolo_hic') self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__)) if CHKTIME: print '5', time() - t0
def test_05_save_load(self): if CHKTIME: t0 = time() test_chr1 = Chromosome(name='Test Chromosome', experiment_tads=[exp1, exp2], experiment_names=['exp1', 'exp2'], experiment_resolutions=[20000,20000], silent=True) test_chr1.save_chromosome('lolo', force=True) test_chr2 = load_chromosome('lolo') system('rm -f lolo') system('rm -f lolo_hic') self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__)) if CHKTIME: print '5', time() - t0
def test_05_save_load(self): if ONLY and not "05" in ONLY: return if CHKTIME: t0 = time() test_chr1 = Chromosome(name="Test Chromosome", experiment_tads=[exp1, exp2], experiment_names=["exp1", "exp2"], experiment_resolutions=[20000, 20000], silent=True) test_chr1.save_chromosome("lolo", force=True) test_chr2 = load_chromosome("lolo") system("rm -f lolo") system("rm -f lolo_hic") self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__)) if CHKTIME: print "5", time() - t0
def test_05_save_load(self): if ONLY and ONLY != "05": return if CHKTIME: t0 = time() test_chr1 = Chromosome( name="Test Chromosome", experiment_tads=[exp1, exp2], experiment_names=["exp1", "exp2"], experiment_resolutions=[20000, 20000], silent=True, ) test_chr1.save_chromosome("lolo", force=True) test_chr2 = load_chromosome("lolo") system("rm -f lolo") system("rm -f lolo_hic") self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__)) if CHKTIME: print "5", time() - t0
def process(): if (options.outputFilename != ""): outfilefileprefix = options.outputDir + options.outputFilename else: outfilefileprefix = options.outputDir + os.path.basename(args[0]) for matrixFile in xrange(len(args)): sample = os.path.splitext(os.path.basename( args[matrixFile]))[0].split(".matrix")[0] chr = sample.rsplit(".", 1)[-1] sample = sample.rsplit(".", 1)[0] chrom = Chromosome(name=chr, centromere_search=True, species=options.species, assembly=options.assembly) chrom.set_max_tad_size(5000000) chrom.add_experiment(sample, exp_type='Hi-C', identifier=sample, hic_data=args[matrixFile], resolution=options.resolution) exp = chrom.experiments[sample] exp.normalize_hic(silent=True) chrom.find_tad(sample, n_cpus=options.threads, normalized=True, verbose=False) exp.write_tad_borders(outfilefileprefix + "." + chr + ".border") # chrom.tad_density_plot(sample,savefig=outfilefileprefix+".density."+chr+".pdf") chrom.visualize(exp.name, paint_tads=True, savefig=outfilefileprefix + "chr." + chr + ".pdf") chrom.save_chromosome(outfilefileprefix + "chr." + chr + ".tdb", force=True)
def process(): if ( options.outputFilename != "" ): outfilefileprefix=options.outputDir+options.outputFilename else: outfilefileprefix=options.outputDir+os.path.basename(args[0]) for matrixFile in xrange(len(args)): sample=os.path.splitext(os.path.basename(args[matrixFile]))[0].split(".matrix")[0] chr = sample.rsplit(".",1)[-1] sample = sample.rsplit(".",1)[0] chrom = Chromosome(name=chr, centromere_search=True, species=options.species, assembly=options.assembly) chrom.set_max_tad_size(5000000) chrom.add_experiment(sample, exp_type='Hi-C', identifier=sample, hic_data=args[matrixFile], resolution=options.resolution) exp = chrom.experiments[sample] exp.normalize_hic(silent=True) chrom.find_tad(sample, n_cpus=options.threads, normalized=True, verbose=False) exp.write_tad_borders(outfilefileprefix+"."+chr+".border") # chrom.tad_density_plot(sample,savefig=outfilefileprefix+".density."+chr+".pdf") chrom.visualize(exp.name, paint_tads=True, savefig=outfilefileprefix+"chr."+chr+".pdf") chrom.save_chromosome(outfilefileprefix+"chr."+chr+".tdb", force=True)
def call_tads(matrix_filenames, chrom_name): print print "Call TADs for chromosome " + chrom_name + '...' print "Contact matrices: " for matrix_filename in matrix_filenames: print matrix_filename chrom_number = search(r'\d+|X|Y', chrom_name).group(0) if len(chrom_number) == 1 and chrom_number != 'X' and chrom_number != 'Y': chrom_number = '0' + chrom_number chrom_id = 'chr' + chrom_number else: chrom_id = chrom_name output_txt_filename = join(txt_directory, chrom_id + '_TADs.txt') print 'Output TXT file:', output_txt_filename output_bed_filename = join(bed_directory, chrom_id + '_TADs.bed') print 'Output BED file:', output_bed_filename filename_list.append(output_bed_filename) """tads_2D_filename = join(png_directory, chrom_id + '_TADs_2D.png') print 'Output 2D TAD plot file:', tads_2D_filename tads_1D_filename = join(png_directory, chrom_id + '_TADs_1D.png') print 'Output 1D TAD plot file:', tads_1D_filename""" # Call TADs and write their borders in TADbit text format and in BED format chrom = Chromosome(name=chrom_name) if len(matrix_filenames) > 1: # several matrices for one chromosome combined_experiment_name = 'batch' experiment_names = [] for matrix_index, matrix_filename in enumerate(matrix_filenames): experiment_name = splitext(basename(matrix_filename))[0] + '_' + str(matrix_index) experiment_names.append(experiment_name) combined_experiment_name += '_' + experiment_name chrom.add_experiment(experiment_name, hic_data=matrix_filename, \ resolution=matrix_resolution) chrom.find_tad(experiment_names, batch_mode = True, n_cpus=thread_number) chrom.experiments[combined_experiment_name].write_tad_borders(savedata=output_txt_filename) #chrom.visualize(combined_experiment_name, paint_tads=True, savefig=tads_2D_filename) #chrom.tad_density_plot(combined_experiment_name, savefig=tads_1D_filename) else: # only one matrix for one chromosome matrix_filename = matrix_filenames[0] experiment_name = splitext(basename(matrix_filename))[0] chrom.add_experiment(experiment_name, hic_data=matrix_filename, \ resolution=matrix_resolution) chrom.find_tad(experiment_name, n_cpus=thread_number) chrom.experiments[experiment_name].write_tad_borders(savedata=output_txt_filename) #chrom.visualize(experiment_name, paint_tads=True, savefig=tads_2D_filename) #chrom.tad_density_plot(experiment_name, savefig=tads_1D_filename) with open(output_txt_filename, 'r') as src, open(output_bed_filename, 'w') as dst: track_line = 'track name="' + chrom_name + '_TADs" visibility=1 itemRgb="On"' dst.write(track_line + '\n') for i, line in enumerate(src): if line.split()[0] == '#': continue line_list = line.split() tad_name = chrom_name + '.' + 'TAD' + '.' + str(i) # Coordinates in BED format are 0-based, # and a region is presented by [x,y) interval. start_pos = (int(line_list[1]) - 1) * matrix_resolution end_pos = int(line_list[2]) * matrix_resolution score = 0 # Just to fill in the field strand = '.' # Just to fill in the field if i%2: color = '0,0,255' # blue else: color = '255,0,0' # red bed_line = chrom_name + '\t' + str(start_pos) + '\t' + str(end_pos) + '\t' + \ tad_name + '\t' + str(score) + '\t' + strand + '\t' + \ str(start_pos) + '\t' + str(end_pos) + '\t' + color dst.write(bed_line + '\n') chrom_filename = join(tdb_directory, chrom_id + '.tdb') chrom.save_chromosome(chrom_filename, force=True) print 'Finish.'
def call_tads(matrix_filenames, chrom_name): print print "Call TADs for chromosome " + chrom_name + '...' print "Contact matrices: " for matrix_filename in matrix_filenames: print matrix_filename chrom_number = search(r'\d+|X|Y', chrom_name).group(0) if len(chrom_number) == 1 and chrom_number != 'X' and chrom_number != 'Y': chrom_number = '0' + chrom_number chrom_id = 'chr' + chrom_number else: chrom_id = chrom_name output_txt_filename = join(txt_directory, chrom_id + '_TADs.txt') print 'Output TXT file:', output_txt_filename output_bed_filename = join(bed_directory, chrom_id + '_TADs.bed') print 'Output BED file:', output_bed_filename filename_list.append(output_bed_filename) """tads_2D_filename = join(png_directory, chrom_id + '_TADs_2D.png') print 'Output 2D TAD plot file:', tads_2D_filename tads_1D_filename = join(png_directory, chrom_id + '_TADs_1D.png') print 'Output 1D TAD plot file:', tads_1D_filename""" # Call TADs and write their borders in TADbit text format and in BED format chrom = Chromosome(name=chrom_name) if len(matrix_filenames) > 1: # several matrices for one chromosome combined_experiment_name = 'batch' experiment_names = [] for matrix_index, matrix_filename in enumerate(matrix_filenames): experiment_name = splitext( basename(matrix_filename))[0] + '_' + str(matrix_index) experiment_names.append(experiment_name) combined_experiment_name += '_' + experiment_name chrom.add_experiment(experiment_name, hic_data=matrix_filename, \ resolution=matrix_resolution) chrom.find_tad(experiment_names, batch_mode=True, n_cpus=thread_number) chrom.experiments[combined_experiment_name].write_tad_borders( savedata=output_txt_filename) #chrom.visualize(combined_experiment_name, paint_tads=True, savefig=tads_2D_filename) #chrom.tad_density_plot(combined_experiment_name, savefig=tads_1D_filename) else: # only one matrix for one chromosome matrix_filename = matrix_filenames[0] experiment_name = splitext(basename(matrix_filename))[0] chrom.add_experiment(experiment_name, hic_data=matrix_filename, \ resolution=matrix_resolution) chrom.find_tad(experiment_name, n_cpus=thread_number) chrom.experiments[experiment_name].write_tad_borders( savedata=output_txt_filename) #chrom.visualize(experiment_name, paint_tads=True, savefig=tads_2D_filename) #chrom.tad_density_plot(experiment_name, savefig=tads_1D_filename) with open(output_txt_filename, 'r') as src, open(output_bed_filename, 'w') as dst: track_line = 'track name="' + chrom_name + '_TADs" visibility=1 itemRgb="On"' dst.write(track_line + '\n') for i, line in enumerate(src): if line.split()[0] == '#': continue line_list = line.split() tad_name = chrom_name + '.' + 'TAD' + '.' + str(i) # Coordinates in BED format are 0-based, # and a region is presented by [x,y) interval. start_pos = (int(line_list[1]) - 1) * matrix_resolution end_pos = int(line_list[2]) * matrix_resolution score = 0 # Just to fill in the field strand = '.' # Just to fill in the field if i % 2: color = '0,0,255' # blue else: color = '255,0,0' # red bed_line = chrom_name + '\t' + str(start_pos) + '\t' + str(end_pos) + '\t' + \ tad_name + '\t' + str(score) + '\t' + strand + '\t' + \ str(start_pos) + '\t' + str(end_pos) + '\t' + color dst.write(bed_line + '\n') chrom_filename = join(tdb_directory, chrom_id + '.tdb') chrom.save_chromosome(chrom_filename, force=True) print 'Finish.'