Example #1
0
def main():
    args = getArgs()
    samples = args.i
    output = args.o
    chr = args.c
    ncpu = args.p
    resolution = args.r
    species = args.s
    gbuild = args.b

    # initiate a chromosome object that will store all Hi-C data and analysis
    my_chrom = Chromosome(
        name=chr,  # 染色体名
        centromere_search=True,  # centromereを検出するか
        species=species,
        assembly=gbuild  # genome build
    )
    for sample in samples:
        label, path = sample.split(",")
        print(label)
        print(path)
        getHiCData(my_chrom, output, label, path, resolution, ncpu)


#    if not os.path.exists('tdb'):
#       os.makedirs("tdb")

    my_chrom.save_chromosome(output + ".tdb", force=True)
Example #2
0
 def test_05_save_load(self):
     test_chr = Chromosome(name='Test Chromosome',
                           experiment_tads=[exp1, exp2],
                           experiment_names=['exp1', 'exp2'],
                           experiment_resolutions=[20000,20000])
     test_chr.save_chromosome('lolo', force=True)
     test_chr = load_chromosome('lolo')
     system('rm -f lolo')
     system('rm -f lolo_hic')
Example #3
0
    def test_05_save_load(self):
        if CHKTIME:
            t0 = time()

        test_chr1 = Chromosome(name='Test Chromosome',
                               experiment_tads=[exp1, exp2],
                               experiment_names=['exp1', 'exp2'],
                               experiment_resolutions=[20000, 20000],
                               silent=True)
        test_chr1.save_chromosome('lolo', force=True)
        test_chr2 = load_chromosome('lolo')
        system('rm -f lolo')
        system('rm -f lolo_hic')
        self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__))
        if CHKTIME:
            print '5', time() - t0
Example #4
0
    def test_05_save_load(self):
        if CHKTIME:
            t0 = time()

        test_chr1 = Chromosome(name='Test Chromosome',
                               experiment_tads=[exp1, exp2],
                               experiment_names=['exp1', 'exp2'],
                               experiment_resolutions=[20000,20000],
                               silent=True)
        test_chr1.save_chromosome('lolo', force=True)
        test_chr2 = load_chromosome('lolo')
        system('rm -f lolo')
        system('rm -f lolo_hic')
        self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__))
        if CHKTIME:
            print '5', time() - t0
Example #5
0
    def test_05_save_load(self):
        if ONLY and not "05" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr1 = Chromosome(name="Test Chromosome",
                               experiment_tads=[exp1, exp2],
                               experiment_names=["exp1", "exp2"],
                               experiment_resolutions=[20000, 20000],
                               silent=True)
        test_chr1.save_chromosome("lolo", force=True)
        test_chr2 = load_chromosome("lolo")
        system("rm -f lolo")
        system("rm -f lolo_hic")
        self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__))
        if CHKTIME:
            print "5", time() - t0
Example #6
0
    def test_05_save_load(self):
        if ONLY and ONLY != "05":
            return
        if CHKTIME:
            t0 = time()

        test_chr1 = Chromosome(
            name="Test Chromosome",
            experiment_tads=[exp1, exp2],
            experiment_names=["exp1", "exp2"],
            experiment_resolutions=[20000, 20000],
            silent=True,
        )
        test_chr1.save_chromosome("lolo", force=True)
        test_chr2 = load_chromosome("lolo")
        system("rm -f lolo")
        system("rm -f lolo_hic")
        self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__))
        if CHKTIME:
            print "5", time() - t0
Example #7
0
def process():

    if (options.outputFilename != ""):
        outfilefileprefix = options.outputDir + options.outputFilename
    else:
        outfilefileprefix = options.outputDir + os.path.basename(args[0])

    for matrixFile in xrange(len(args)):
        sample = os.path.splitext(os.path.basename(
            args[matrixFile]))[0].split(".matrix")[0]
        chr = sample.rsplit(".", 1)[-1]
        sample = sample.rsplit(".", 1)[0]
        chrom = Chromosome(name=chr,
                           centromere_search=True,
                           species=options.species,
                           assembly=options.assembly)
        chrom.set_max_tad_size(5000000)
        chrom.add_experiment(sample,
                             exp_type='Hi-C',
                             identifier=sample,
                             hic_data=args[matrixFile],
                             resolution=options.resolution)

        exp = chrom.experiments[sample]
        exp.normalize_hic(silent=True)
        chrom.find_tad(sample,
                       n_cpus=options.threads,
                       normalized=True,
                       verbose=False)
        exp.write_tad_borders(outfilefileprefix + "." + chr + ".border")

        # chrom.tad_density_plot(sample,savefig=outfilefileprefix+".density."+chr+".pdf")
        chrom.visualize(exp.name,
                        paint_tads=True,
                        savefig=outfilefileprefix + "chr." + chr + ".pdf")
        chrom.save_chromosome(outfilefileprefix + "chr." + chr + ".tdb",
                              force=True)
Example #8
0
def process():

    if ( options.outputFilename != "" ):
        outfilefileprefix=options.outputDir+options.outputFilename
    else:
        outfilefileprefix=options.outputDir+os.path.basename(args[0])
    
    for matrixFile in xrange(len(args)):
        sample=os.path.splitext(os.path.basename(args[matrixFile]))[0].split(".matrix")[0]
        chr = sample.rsplit(".",1)[-1]
        sample = sample.rsplit(".",1)[0]
        chrom = Chromosome(name=chr, centromere_search=True, species=options.species, assembly=options.assembly)
        chrom.set_max_tad_size(5000000)
        chrom.add_experiment(sample, exp_type='Hi-C', identifier=sample,
                        hic_data=args[matrixFile], resolution=options.resolution)
        
        exp = chrom.experiments[sample]
        exp.normalize_hic(silent=True)
        chrom.find_tad(sample, n_cpus=options.threads, normalized=True, verbose=False)
        exp.write_tad_borders(outfilefileprefix+"."+chr+".border")

        # chrom.tad_density_plot(sample,savefig=outfilefileprefix+".density."+chr+".pdf")
        chrom.visualize(exp.name, paint_tads=True, savefig=outfilefileprefix+"chr."+chr+".pdf")
        chrom.save_chromosome(outfilefileprefix+"chr."+chr+".tdb", force=True)
Example #9
0
def call_tads(matrix_filenames, chrom_name):
    print
    print "Call TADs for chromosome " + chrom_name + '...'
    print "Contact matrices: "
    for matrix_filename in matrix_filenames:
        print matrix_filename
    chrom_number = search(r'\d+|X|Y', chrom_name).group(0)
    if len(chrom_number) == 1 and chrom_number != 'X' and chrom_number != 'Y':
        chrom_number = '0' + chrom_number
        chrom_id = 'chr' + chrom_number
    else:
        chrom_id = chrom_name
        
    output_txt_filename = join(txt_directory, chrom_id + '_TADs.txt')
    print 'Output TXT file:', output_txt_filename
    output_bed_filename = join(bed_directory, chrom_id + '_TADs.bed')
    print 'Output BED file:', output_bed_filename
    filename_list.append(output_bed_filename)

    """tads_2D_filename = join(png_directory, chrom_id + '_TADs_2D.png')
    print 'Output 2D TAD plot file:', tads_2D_filename
    tads_1D_filename = join(png_directory, chrom_id + '_TADs_1D.png')
    print 'Output 1D TAD plot file:', tads_1D_filename"""

    # Call TADs and write their borders in TADbit text format and in BED format
    chrom = Chromosome(name=chrom_name)
    if len(matrix_filenames) > 1: # several matrices for one chromosome
        combined_experiment_name = 'batch'
        experiment_names = []
        for matrix_index, matrix_filename in enumerate(matrix_filenames):
            experiment_name = splitext(basename(matrix_filename))[0] + '_' + str(matrix_index)
            experiment_names.append(experiment_name)
            combined_experiment_name += '_' + experiment_name 
            chrom.add_experiment(experiment_name, hic_data=matrix_filename, \
                                 resolution=matrix_resolution)
        chrom.find_tad(experiment_names, batch_mode = True, n_cpus=thread_number)
        chrom.experiments[combined_experiment_name].write_tad_borders(savedata=output_txt_filename)
        #chrom.visualize(combined_experiment_name, paint_tads=True, savefig=tads_2D_filename)
        #chrom.tad_density_plot(combined_experiment_name, savefig=tads_1D_filename)
    else: # only one matrix for one chromosome
        matrix_filename = matrix_filenames[0]
        experiment_name = splitext(basename(matrix_filename))[0]
        chrom.add_experiment(experiment_name, hic_data=matrix_filename, \
                             resolution=matrix_resolution)
        chrom.find_tad(experiment_name, n_cpus=thread_number)
        chrom.experiments[experiment_name].write_tad_borders(savedata=output_txt_filename)
        #chrom.visualize(experiment_name, paint_tads=True, savefig=tads_2D_filename)
        #chrom.tad_density_plot(experiment_name, savefig=tads_1D_filename)

    with open(output_txt_filename, 'r') as src, open(output_bed_filename, 'w') as dst:
        track_line = 'track name="' + chrom_name + '_TADs" visibility=1 itemRgb="On"'
        dst.write(track_line + '\n')
        for i, line in enumerate(src):
            if line.split()[0] == '#':
                continue
            line_list = line.split()
            tad_name =  chrom_name + '.' + 'TAD' + '.' + str(i)
            # Coordinates in BED format are 0-based, 
            # and a region is presented by [x,y) interval.
            start_pos = (int(line_list[1]) - 1) * matrix_resolution
            end_pos = int(line_list[2]) * matrix_resolution
            score = 0 # Just to fill in the field
            strand = '.' # Just to fill in the field
            if i%2:
                color = '0,0,255' # blue
            else:
                color = '255,0,0' # red
            bed_line = chrom_name + '\t' + str(start_pos) + '\t' + str(end_pos) + '\t' + \
                        tad_name + '\t' + str(score) + '\t' + strand + '\t' + \
                        str(start_pos) + '\t' + str(end_pos) + '\t' + color
            dst.write(bed_line + '\n')
    chrom_filename = join(tdb_directory, chrom_id + '.tdb')
    chrom.save_chromosome(chrom_filename, force=True)
    print 'Finish.'
Example #10
0
def call_tads(matrix_filenames, chrom_name):
    print
    print "Call TADs for chromosome " + chrom_name + '...'
    print "Contact matrices: "
    for matrix_filename in matrix_filenames:
        print matrix_filename
    chrom_number = search(r'\d+|X|Y', chrom_name).group(0)
    if len(chrom_number) == 1 and chrom_number != 'X' and chrom_number != 'Y':
        chrom_number = '0' + chrom_number
        chrom_id = 'chr' + chrom_number
    else:
        chrom_id = chrom_name

    output_txt_filename = join(txt_directory, chrom_id + '_TADs.txt')
    print 'Output TXT file:', output_txt_filename
    output_bed_filename = join(bed_directory, chrom_id + '_TADs.bed')
    print 'Output BED file:', output_bed_filename
    filename_list.append(output_bed_filename)
    """tads_2D_filename = join(png_directory, chrom_id + '_TADs_2D.png')
    print 'Output 2D TAD plot file:', tads_2D_filename
    tads_1D_filename = join(png_directory, chrom_id + '_TADs_1D.png')
    print 'Output 1D TAD plot file:', tads_1D_filename"""

    # Call TADs and write their borders in TADbit text format and in BED format
    chrom = Chromosome(name=chrom_name)
    if len(matrix_filenames) > 1:  # several matrices for one chromosome
        combined_experiment_name = 'batch'
        experiment_names = []
        for matrix_index, matrix_filename in enumerate(matrix_filenames):
            experiment_name = splitext(
                basename(matrix_filename))[0] + '_' + str(matrix_index)
            experiment_names.append(experiment_name)
            combined_experiment_name += '_' + experiment_name
            chrom.add_experiment(experiment_name, hic_data=matrix_filename, \
                                 resolution=matrix_resolution)
        chrom.find_tad(experiment_names, batch_mode=True, n_cpus=thread_number)
        chrom.experiments[combined_experiment_name].write_tad_borders(
            savedata=output_txt_filename)
        #chrom.visualize(combined_experiment_name, paint_tads=True, savefig=tads_2D_filename)
        #chrom.tad_density_plot(combined_experiment_name, savefig=tads_1D_filename)
    else:  # only one matrix for one chromosome
        matrix_filename = matrix_filenames[0]
        experiment_name = splitext(basename(matrix_filename))[0]
        chrom.add_experiment(experiment_name, hic_data=matrix_filename, \
                             resolution=matrix_resolution)
        chrom.find_tad(experiment_name, n_cpus=thread_number)
        chrom.experiments[experiment_name].write_tad_borders(
            savedata=output_txt_filename)
        #chrom.visualize(experiment_name, paint_tads=True, savefig=tads_2D_filename)
        #chrom.tad_density_plot(experiment_name, savefig=tads_1D_filename)

    with open(output_txt_filename, 'r') as src, open(output_bed_filename,
                                                     'w') as dst:
        track_line = 'track name="' + chrom_name + '_TADs" visibility=1 itemRgb="On"'
        dst.write(track_line + '\n')
        for i, line in enumerate(src):
            if line.split()[0] == '#':
                continue
            line_list = line.split()
            tad_name = chrom_name + '.' + 'TAD' + '.' + str(i)
            # Coordinates in BED format are 0-based,
            # and a region is presented by [x,y) interval.
            start_pos = (int(line_list[1]) - 1) * matrix_resolution
            end_pos = int(line_list[2]) * matrix_resolution
            score = 0  # Just to fill in the field
            strand = '.'  # Just to fill in the field
            if i % 2:
                color = '0,0,255'  # blue
            else:
                color = '255,0,0'  # red
            bed_line = chrom_name + '\t' + str(start_pos) + '\t' + str(end_pos) + '\t' + \
                        tad_name + '\t' + str(score) + '\t' + strand + '\t' + \
                        str(start_pos) + '\t' + str(end_pos) + '\t' + color
            dst.write(bed_line + '\n')
    chrom_filename = join(tdb_directory, chrom_id + '.tdb')
    chrom.save_chromosome(chrom_filename, force=True)
    print 'Finish.'