Ejemplo n.º 1
0
    def test_19_matrix_manip(self):
        if ONLY and not "19" in ONLY:
            return
        if CHKTIME:
            t0 = time()
        hic_data1 = load_hic_data_from_reads("lala-map~", resolution=10000)
        hic_map(hic_data1, savedata="lala-map.tsv~", savefig="lala.pdf")
        hic_map(hic_data1,
                by_chrom="intra",
                savedata="lala-maps~",
                savefig="lalalo~")
        hic_map(hic_data1,
                by_chrom="inter",
                savedata="lala-maps~",
                savefig="lalala~")
        # slowest part of the all test:
        hic_data2 = read_matrix("lala-map.tsv~", resolution=10000)
        self.assertEqual(hic_data1, hic_data2)
        # vals = plot_distance_vs_interactions(hic_data1)

        # self.assertEqual([round(i, 2) if str(i)!="nan" else 0.0 for i in
        #                   reduce(lambda x, y: x + y, vals)],
        #                  [-1.68, -2.08, 0.02, 2.76, -8.99, 0.0, 0.82, -6.8, 0.0])

        a, b = insert_sizes("lala-map~")
        self.assertEqual([int(a), int(b)], [43, 1033])

        hic_data1 = read_matrix(PATH + "/20Kb/chrT/chrT_A.tsv",
                                resolution=20000)
        hic_data2 = read_matrix(PATH + "/20Kb/chrT/chrT_B.tsv",
                                resolution=20000)

        corr = correlate_matrices(hic_data1, hic_data2)
        corr = [round(i, 3) for i in corr[0]]
        self.assertEqual(corr, [
            0.755, 0.729, 0.804, 0.761, 0.789, 0.776, 0.828, 0.757, 0.797,
            0.832
        ])

        ecorr = eig_correlate_matrices(hic_data1,
                                       hic_data2,
                                       savefig='lala3.pdf')

        ecorr = [round(i, 3) for i in reduce(lambda x, y: x + y, ecorr)]
        self.assertEqual(ecorr, [
            0.997, 0.322, 0.442, 0.017, 0.243, 0.014, 0.321, 0.999, 0.01,
            0.006, 0.0, 0.007, 0.451, 0.012, 0.996, 0.031, 0.013, 0.004, 0.002,
            0.006, 0.029, 0.974, 0.076, 0.03, 0.219, 0.013, 0.031, 0.08, 0.974,
            0.018, 0.028, 0.004, 0.0, 0.028, 0.034, 0.89
        ])
        system("rm -rf lala*")
        if CHKTIME:
            self.assertEqual(True, True)
            print "19", time() - t0
Ejemplo n.º 2
0
    def test_19_matrix_manip(self):
        if ONLY and ONLY != '19':
            return
        if CHKTIME:
            t0 = time()
        hic_data1 = load_hic_data_from_reads('lala-map~', resolution=10000)
        hic_map(hic_data1, savedata='lala-map.tsv~', savefig='lala.pdf~')
        hic_map(hic_data1,
                by_chrom='intra',
                savedata='lala-maps~',
                savefig='lalalo~')
        hic_map(hic_data1,
                by_chrom='inter',
                savedata='lala-maps~',
                savefig='lalala~')
        # slowest part of the all test:
        hic_data2 = read_matrix('lala-map.tsv~', resolution=10000)
        self.assertEqual(hic_data1, hic_data2)
        vals = plot_distance_vs_interactions(hic_data1)

        self.assertEqual([
            round(i, 2) if str(i) != 'nan' else 0.0
            for i in reduce(lambda x, y: x + y, vals)
        ], [-1.68, -2.08, 0.02, 2.76, -8.99, 0.0, 0.82, -6.8, 0.0])

        a, b = insert_sizes('lala-map~')
        self.assertEqual([int(a), int(b)], [43, 1033])

        hic_data1 = read_matrix('20Kb/chrT/chrT_A.tsv', resolution=20000)
        hic_data2 = read_matrix('20Kb/chrT/chrT_B.tsv', resolution=20000)

        corr = correlate_matrices(hic_data1, hic_data2)
        corr = [round(i, 3) for i in corr[0]]
        self.assertEqual(corr, [
            0.755, 0.729, 0.804, 0.761, 0.789, 0.776, 0.828, 0.757, 0.797,
            0.832
        ])

        ecorr = eig_correlate_matrices(hic_data1, hic_data2)
        ecorr = [round(i, 3) for i in reduce(lambda x, y: x + y, ecorr)]
        self.assertEqual(ecorr, [
            0.997, 0.322, 0.442, 0.017, 0.243, 0.014, 0.321, 0.999, 0.01,
            0.006, 0.0, 0.007, 0.451, 0.012, 0.996, 0.031, 0.013, 0.004, 0.002,
            0.006, 0.029, 0.974, 0.076, 0.03, 0.219, 0.013, 0.031, 0.08, 0.974,
            0.018, 0.028, 0.004, 0.0, 0.028, 0.034, 0.89
        ])
        system('rm -rf lala*')
        if CHKTIME:
            self.assertEqual(True, True)
            print '19', time() - t0
Ejemplo n.º 3
0
    def test_19_matrix_manip(self):
        if ONLY and ONLY != '19':
            return
        if CHKTIME:
            t0 = time()
        hic_data1 = load_hic_data_from_reads('lala-map~', resolution=10000)
        hic_map(hic_data1, savedata='lala-map.tsv~', savefig='lala.pdf~')
        hic_map(hic_data1, by_chrom='intra', savedata='lala-maps~', savefig='lalalo~')
        hic_map(hic_data1, by_chrom='inter', savedata='lala-maps~', savefig='lalala~')
        # slowest part of the all test:
        hic_data2 = read_matrix('lala-map.tsv~', resolution=10000)
        self.assertEqual(hic_data1, hic_data2)
        vals = plot_distance_vs_interactions(hic_data1)
        
        self.assertEqual([round(i, 2) if str(i)!='nan' else 0.0 for i in
                          reduce(lambda x, y: x + y, vals)],
                         [-1.74, 4.2, 0.52, 1.82, -0.44, 0.0, -0.5, 2.95, 0.0])
        
        a, b = insert_sizes('lala-map~')
        self.assertEqual([int(a),int(b)], [43, 1033])

        hic_data1 = read_matrix('20Kb/chrT/chrT_A.tsv', resolution=20000)
        hic_data2 = read_matrix('20Kb/chrT/chrT_B.tsv', resolution=20000)
        
        corr = correlate_matrices(hic_data1, hic_data2)
        corr =  [round(i,3) for i in corr[0]]
        self.assertEqual(corr, [0.755, 0.729, 0.804, 0.761, 0.789, 0.776, 0.828,
                                0.757, 0.797, 0.832])
        
        ecorr = eig_correlate_matrices(hic_data1, hic_data2)
        ecorr = [round(i,3) for i in reduce(lambda x, y:x+y, ecorr)]
        self.assertEqual(ecorr, [0.997, 0.322, 0.442, 0.017, 0.243, 0.014,
                                 0.321, 0.999, 0.01, 0.006, 0.0, 0.007, 0.451,
                                 0.012, 0.996, 0.031, 0.013, 0.004, 0.002,
                                 0.006, 0.029, 0.974, 0.076, 0.03, 0.219, 0.013,
                                 0.031, 0.08, 0.974, 0.018, 0.028, 0.004, 0.0,
                                 0.028, 0.034, 0.89])
        system('rm -rf lala*')
        if CHKTIME:
            self.assertEqual(True, True)
            print '19', time() - t0
Ejemplo n.º 4
0
def run(opts):
    check_options(opts)
    samtools = which(opts.samtools)
    launch_time = time.localtime()

    param_hash = digest_parameters(opts)

    reso1 = reso2 = None
    if opts.bam1:
        mreads1 = path.realpath(opts.bam1)
        biases1 = opts.biases1
    else:
        biases1, mreads1, reso1 = load_parameters_fromdb(
            opts.workdir1, opts.jobid1, opts, opts.tmpdb1)
        mreads1 = path.join(opts.workdir1, mreads1)
        try:
            biases1 = path.join(opts.workdir1, biases1)
        except AttributeError:
            biases1 = None
        except TypeError:  # Py3
            biases1 = None

    if opts.bam2:
        mreads2 = path.realpath(opts.bam2)
        biases2 = opts.biases2
    else:
        biases2, mreads2, reso2 = load_parameters_fromdb(
            opts.workdir2, opts.jobid2, opts, opts.tmpdb2)
        mreads2 = path.join(opts.workdir2, mreads2)
        try:
            biases2 = path.join(opts.workdir2, biases2)
        except AttributeError:
            biases2 = None
        except TypeError:  # Py3
            biases1 = None

    filter_exclude = opts.filter

    if reso1 != reso2:
        raise Exception('ERROR: differing resolutions between experiments to '
                        'be merged')

    mkdir(path.join(opts.workdir, '00_merge'))

    if not opts.skip_comparison:
        printime('  - loading first sample %s' % (mreads1))
        hic_data1 = load_hic_data_from_bam(mreads1, opts.reso, biases=biases1,
                                           tmpdir=path.join(opts.workdir, '00_merge'),
                                           ncpus=opts.cpus,
                                           filter_exclude=filter_exclude)

        printime('  - loading second sample %s' % (mreads2))
        hic_data2 = load_hic_data_from_bam(mreads2, opts.reso, biases=biases2,
                                           tmpdir=path.join(opts.workdir, '00_merge'),
                                           ncpus=opts.cpus,
                                           filter_exclude=filter_exclude)

        if opts.workdir1 and opts.workdir2:
            masked1 = {'valid-pairs': {'count': 0}}
            masked2 = {'valid-pairs': {'count': 0}}
        else:
            masked1 = {'valid-pairs': {'count': sum(hic_data1.values())}}
            masked2 = {'valid-pairs': {'count': sum(hic_data2.values())}}

        decay_corr_dat = path.join(opts.workdir, '00_merge', 'decay_corr_dat_%s_%s.txt' % (opts.reso, param_hash))
        decay_corr_fig = path.join(opts.workdir, '00_merge', 'decay_corr_dat_%s_%s.png' % (opts.reso, param_hash))
        eigen_corr_dat = path.join(opts.workdir, '00_merge', 'eigen_corr_dat_%s_%s.txt' % (opts.reso, param_hash))
        eigen_corr_fig = path.join(opts.workdir, '00_merge', 'eigen_corr_dat_%s_%s.png' % (opts.reso, param_hash))

        printime('  - comparing experiments')
        printime('    => correlation between equidistant loci')
        corr, _, scc, std, bads = correlate_matrices(
            hic_data1, hic_data2, normalized=opts.norm,
            remove_bad_columns=True, savefig=decay_corr_fig,
            savedata=decay_corr_dat, get_bads=True)
        print('         - correlation score (SCC): %.4f (+- %.7f)' % (scc, std))
        printime('    => correlation between eigenvectors')
        eig_corr = eig_correlate_matrices(hic_data1, hic_data2, normalized=opts.norm,
                                          remove_bad_columns=True, nvect=6,
                                          savefig=eigen_corr_fig,
                                          savedata=eigen_corr_dat)

        printime('    => reproducibility score')
        reprod = get_reproducibility(hic_data1, hic_data2, num_evec=20, normalized=opts.norm,
                                     verbose=False, remove_bad_columns=True)
        print('         - reproducibility score: %.4f' % (reprod))
        ncols = len(hic_data1)
    else:
        ncols = 0
        decay_corr_dat = 'None'
        decay_corr_fig = 'None'
        eigen_corr_dat = 'None'
        eigen_corr_fig = 'None'
        masked1 = {}
        masked2 = {}

        corr = eig_corr = scc = std = reprod = 0
        bads = {}

    # merge inputs
    mkdir(path.join(opts.workdir, '03_filtered_reads'))
    outbam = path.join(opts.workdir, '03_filtered_reads',
                       'intersection_%s.bam' % (param_hash))

    if not opts.skip_merge:
        outbam = path.join(opts.workdir, '03_filtered_reads',
                           'intersection_%s.bam' % (param_hash))
        printime('  - Mergeing experiments')
        system(samtools  + ' merge -@ %d %s %s %s' % (opts.cpus, outbam, mreads1, mreads2))
        printime('  - Indexing new BAM file')
        # check samtools version number and modify command line
        version = LooseVersion([l.split()[1]
                                for l in Popen(samtools, stderr=PIPE,
                                               universal_newlines=True).communicate()[1].split('\n')
                                if 'Version' in l][0])
        if version >= LooseVersion('1.3.1'):
            system(samtools  + ' index -@ %d %s' % (opts.cpus, outbam))
        else:
            system(samtools  + ' index %s' % (outbam))
    else:
        outbam = ''

    finish_time = time.localtime()
    save_to_db (opts, mreads1, mreads2, decay_corr_dat, decay_corr_fig,
                len(list(bads.keys())), ncols, scc, std, reprod,
                eigen_corr_dat, eigen_corr_fig, outbam, corr, eig_corr,
                biases1, biases2, masked1, masked2, launch_time, finish_time)
    printime('\nDone.')
Ejemplo n.º 5
0
def run(opts):
    check_options(opts)
    launch_time = time.localtime()

    param_hash = digest_parameters(opts)

    reso1 = reso2 = None
    if opts.bed1:
        mreads1 = path.realpath(opts.bed1)
        bad_co1 = opts.bad_co1
        biases1 = opts.biases1
    else:
        bad_co1, biases1, mreads1, reso1 = load_parameters_fromdb(
            opts.workdir1, opts.jobid1, opts, opts.tmpdb1)
        mreads1 = path.join(opts.workdir1, mreads1)

    if opts.bed2:
        mreads2 = path.realpath(opts.bed2)
        bad_co2 = opts.bad_co2
        biases2 = opts.biases2
    else:
        bad_co2, biases2, mreads2, reso2 = load_parameters_fromdb(
            opts.workdir2, opts.jobid2, opts, opts.tmpdb2)
        mreads2 = path.join(opts.workdir2, mreads2)

    if reso1 != reso2:
        raise Exception('ERROR: differing resolutions between experiments to '
                        'be merged')

    mkdir(path.join(opts.workdir, '00_merge'))

    if not opts.skip_comparison:
        print 'Comparison'
        print ' - loading first sample', mreads1
        hic_data1 = load_hic_data_from_reads(mreads1, opts.reso)

        print ' - loading second sample', mreads2
        hic_data2 = load_hic_data_from_reads(mreads2, opts.reso)

        if opts.norm and biases1:
            bad_co1 = path.join(opts.workdir1, bad_co1)
            print ' - loading bad columns from first sample', bad_co1
            hic_data1.bads = dict(
                (int(l.strip()), True) for l in open(bad_co1))
            biases1 = path.join(opts.workdir1, biases1)
            print ' - loading biases from first sample', biases1
            hic_data1.bias = dict((int(l.split()[0]), float(l.split()[1]))
                                  for l in open(biases1))
        elif opts.norm:
            raise Exception('ERROR: biases or filtered-columns not found')
        if opts.norm and biases2:
            bad_co2 = path.join(opts.workdir2, bad_co2)
            print ' - loading bad columns from second sample', bad_co2
            hic_data2.bads = dict(
                (int(l.strip()), True) for l in open(bad_co2))
            biases2 = path.join(opts.workdir2, biases2)
            print ' - loading biases from second sample', biases2
            hic_data2.bias = dict((int(l.split()[0]), float(l.split()[1]))
                                  for l in open(biases2))
        elif opts.norm:
            raise Exception('ERROR: biases or filtered-columns not found')
        decay_corr_dat = path.join(
            opts.workdir, '00_merge',
            'decay_corr_dat_%s_%s.txt' % (opts.reso, param_hash))
        decay_corr_fig = path.join(
            opts.workdir, '00_merge',
            'decay_corr_dat_%s_%s.png' % (opts.reso, param_hash))
        eigen_corr_dat = path.join(
            opts.workdir, '00_merge',
            'eigen_corr_dat_%s_%s.txt' % (opts.reso, param_hash))
        eigen_corr_fig = path.join(
            opts.workdir, '00_merge',
            'eigen_corr_dat_%s_%s.png' % (opts.reso, param_hash))
    else:
        hic_data1 = {}
        hic_data2 = {}
        decay_corr_dat = 'None'
        decay_corr_fig = 'None'
        eigen_corr_dat = 'None'
        eigen_corr_fig = 'None'

    # if opts.norm:
    # has bias file

    if not opts.skip_comparison:
        print '  => correlation between equidistant loci'
        corr, _, bads = correlate_matrices(hic_data1,
                                           hic_data2,
                                           normalized=opts.norm,
                                           remove_bad_columns=True,
                                           savefig=decay_corr_fig,
                                           savedata=decay_corr_dat,
                                           get_bads=True)
        print '  => correlation between eigenvectors'
        eig_corr = eig_correlate_matrices(hic_data1,
                                          hic_data2,
                                          normalized=opts.norm,
                                          remove_bad_columns=True,
                                          nvect=6,
                                          savefig=eigen_corr_fig,
                                          savedata=eigen_corr_dat)
    else:
        corr = eig_corr = 0
        bads = {}

    # merge inputs
    mkdir(path.join(opts.workdir, '03_filtered_reads'))
    outbed = path.join(opts.workdir, '03_filtered_reads',
                       'valid_r1-r2_intersection_%s.tsv' % (param_hash))

    print '\nMergeing...'
    nreads = merge_2d_beds(mreads1, mreads2, outbed)

    finish_time = time.localtime()
    save_to_db(opts, mreads1, mreads2, decay_corr_dat, decay_corr_fig,
               len(bads.keys()), len(hic_data1), nreads, eigen_corr_dat,
               eigen_corr_fig, outbed, corr, eig_corr, biases1, bad_co1,
               biases2, bad_co2, launch_time, finish_time)
    print '\n\nDone.'
Ejemplo n.º 6
0
def run(opts):
    check_options(opts)
    samtools = which(opts.samtools)
    launch_time = time.localtime()

    param_hash = digest_parameters(opts)

    reso1 = reso2 = None
    if opts.bam1:
        mreads1 = path.realpath(opts.bam1)
        biases1 = opts.biases1
    else:
        biases1, mreads1, reso1 = load_parameters_fromdb(
            opts.workdir1, opts.jobid1, opts, opts.tmpdb1)
        mreads1 = path.join(opts.workdir1, mreads1)
        try:
            biases1 = path.join(opts.workdir1, biases1)
        except AttributeError:
            biases1 = None

    if opts.bam2:
        mreads2 = path.realpath(opts.bam2)
        biases2 = opts.biases2
    else:
        biases2, mreads2, reso2 = load_parameters_fromdb(
            opts.workdir2, opts.jobid2, opts, opts.tmpdb2)
        mreads2 = path.join(opts.workdir2, mreads2)
        try:
            biases2 = path.join(opts.workdir2, biases2)
        except AttributeError:
            biases2 = None

    filter_exclude = opts.filter

    if reso1 != reso2:
        raise Exception('ERROR: differing resolutions between experiments to '
                        'be merged')

    mkdir(path.join(opts.workdir, '00_merge'))

    if not opts.skip_comparison:
        printime('  - loading first sample %s' % (mreads1))
        hic_data1 = load_hic_data_from_bam(mreads1, opts.reso, biases=biases1,
                                           tmpdir=path.join(opts.workdir, '00_merge'),
                                           ncpus=opts.cpus,
                                           filter_exclude=filter_exclude)

        printime('  - loading second sample %s' % (mreads2))
        hic_data2 = load_hic_data_from_bam(mreads2, opts.reso, biases=biases2,
                                           tmpdir=path.join(opts.workdir, '00_merge'),
                                           ncpus=opts.cpus,
                                           filter_exclude=filter_exclude)
        decay_corr_dat = path.join(opts.workdir, '00_merge', 'decay_corr_dat_%s_%s.txt' % (opts.reso, param_hash))
        decay_corr_fig = path.join(opts.workdir, '00_merge', 'decay_corr_dat_%s_%s.png' % (opts.reso, param_hash))
        eigen_corr_dat = path.join(opts.workdir, '00_merge', 'eigen_corr_dat_%s_%s.txt' % (opts.reso, param_hash))
        eigen_corr_fig = path.join(opts.workdir, '00_merge', 'eigen_corr_dat_%s_%s.png' % (opts.reso, param_hash))

        printime('  - comparing experiments')
        printime('    => correlation between equidistant loci')
        corr, _, scc, std, bads = correlate_matrices(
            hic_data1, hic_data2, normalized=opts.norm,
            remove_bad_columns=True, savefig=decay_corr_fig,
            savedata=decay_corr_dat, get_bads=True)
        print '         - correlation score (SCC): %.4f (+- %.7f)' % (scc, std)
        printime('    => correlation between eigenvectors')
        eig_corr = eig_correlate_matrices(hic_data1, hic_data2, normalized=opts.norm,
                                          remove_bad_columns=True, nvect=6,
                                          savefig=eigen_corr_fig,
                                          savedata=eigen_corr_dat)

        printime('    => reproducibility score')
        reprod = get_reproducibility(hic_data1, hic_data2, num_evec=20, normalized=opts.norm,
                                     verbose=False, remove_bad_columns=True)
        print '         - reproducibility score: %.4f' % (reprod)
        ncols = len(hic_data1)
    else:
        ncols = 0
        decay_corr_dat = 'None'
        decay_corr_fig = 'None'
        eigen_corr_dat = 'None'
        eigen_corr_fig = 'None'

        corr = eig_corr = 0
        bads = {}

    # merge inputs
    mkdir(path.join(opts.workdir, '03_filtered_reads'))
    outbam = path.join(opts.workdir, '03_filtered_reads',
                       'intersection_%s.bam' % (param_hash))

    printime('  - Mergeing experiments')
    system(samtools  + ' merge -@ %d %s %s %s' % (opts.cpus, outbam, mreads1, mreads2))
    printime('  - Indexing new BAM file')
    # check samtools version number and modify command line
    version = LooseVersion([l.split()[1]
                            for l in Popen(samtools, stderr=PIPE).communicate()[1].split('\n')
                            if 'Version' in l][0])
    if version >= LooseVersion('1.3.1'):
        system(samtools  + ' index -@ %d %s' % (opts.cpus, outbam))
    else:
        system(samtools  + ' index %s' % (outbam))

    finish_time = time.localtime()
    save_to_db (opts, mreads1, mreads2, decay_corr_dat, decay_corr_fig,
                len(bads.keys()), ncols, scc, std, reprod,
                eigen_corr_dat, eigen_corr_fig, outbam, corr, eig_corr,
                biases1, biases2, launch_time, finish_time)
    printime('\nDone.')
Ejemplo n.º 7
0
def run(opts):
    check_options(opts)
    launch_time = time.localtime()

    param_hash = digest_parameters(opts)

    reso1 = reso2 = None
    if opts.bed1:
        mreads1 = path.realpath(opts.bed1)
        bad_co1 = opts.bad_co1
        biases1 = opts.biases1
    else:
        bad_co1, biases1, mreads1, reso1 = load_parameters_fromdb(
                opts.workdir1, opts.jobid1, opts, opts.tmpdb1)
        mreads1 = path.join(opts.workdir1, mreads1)

    if opts.bed2:
        mreads2 = path.realpath(opts.bed2)
        bad_co2 = opts.bad_co2
        biases2 = opts.biases2
    else:
        bad_co2, biases2, mreads2, reso2 = load_parameters_fromdb(
                opts.workdir2, opts.jobid2, opts, opts.tmpdb2)
        mreads2 = path.join(opts.workdir2, mreads2)

    if reso1 != reso2:
        raise Exception('ERROR: differing resolutions between experiments to '
                        'be merged')

    print 'loading first sample', mreads1
    hic_data1 = load_hic_data_from_reads(mreads1, opts.reso)

    print 'loading second sample', mreads2
    hic_data2 = load_hic_data_from_reads(mreads2, opts.reso)

    if opts.norm and biases1:
        bad_co1 = path.join(opts.workdir1, bad_co1)
        print 'loading bad columns from first sample', bad_co1
        hic_data1.bads = dict((int(l.strip()), True) for l in open(bad_co1))
        biases1 = path.join(opts.workdir1, biases1)
        print 'loading biases from first sample', biases1
        hic_data1.bias = dict((int(l.split()[0]), float(l.split()[1]))
                              for l in open(biases1))
    elif opts.norm:
        raise Exception('ERROR: biases or filtered-columns not found')
    if opts.norm and biases2:
        bad_co2 = path.join(opts.workdir2, bad_co2)
        print 'loading bad columns from second sample', bad_co2
        hic_data2.bads = dict((int(l.strip()), True) for l in open(bad_co2))
        biases2 = path.join(opts.workdir2, biases2)
        print 'loading biases from second sample', biases2
        hic_data2.bias = dict((int(l.split()[0]), float(l.split()[1]))
                              for l in open(biases2))
    elif opts.norm:
        raise Exception('ERROR: biases or filtered-columns not found')

    mkdir(path.join(opts.workdir, '00_merge'))

    if not opts.skip_comparison:
        decay_corr_dat = path.join(opts.workdir, '00_merge', 'decay_corr_dat_%s_%s.txt' % (opts.reso, param_hash))
        decay_corr_fig = path.join(opts.workdir, '00_merge', 'decay_corr_dat_%s_%s.png' % (opts.reso, param_hash))
        eigen_corr_dat = path.join(opts.workdir, '00_merge', 'eigen_corr_dat_%s_%s.txt' % (opts.reso, param_hash))
        eigen_corr_fig = path.join(opts.workdir, '00_merge', 'eigen_corr_dat_%s_%s.png' % (opts.reso, param_hash))
    else:
        decay_corr_dat = 'None'
        decay_corr_fig = 'None'
        eigen_corr_dat = 'None'
        eigen_corr_fig = 'None'
        
    # if opts.norm:
        # has bias file

    if not opts.skip_comparison:
        print 'correlation between equidistant loci'
        corr, _, bads = correlate_matrices(hic_data1, hic_data2, normalized=opts.norm,
                                           remove_bad_columns=True,
                                           savefig=decay_corr_fig,
                                           savedata=decay_corr_dat, get_bads=True)
        print 'correlation between eigenvectors'
        eig_corr = eig_correlate_matrices(hic_data1, hic_data2, normalized=opts.norm,
                                          remove_bad_columns=True, nvect=6,
                                          savefig=eigen_corr_fig,
                                          savedata=eigen_corr_dat)
    else:
        corr = eig_corr = None
        bads = {}

    # merge inputs
    mkdir(path.join(opts.workdir, '03_filtered_reads'))
    outbed = path.join(opts.workdir, '03_filtered_reads', 'valid_r1-r2_intersection_%s.tsv' % (
        param_hash))

    nreads = merge_2d_beds(mreads1, mreads2, outbed)

    finish_time = time.localtime()
    save_to_db (opts, mreads1, mreads2, decay_corr_dat, decay_corr_fig,
                len(bads.keys()), len(hic_data1), nreads,
                eigen_corr_dat, eigen_corr_fig, outbed, corr, eig_corr,
                biases1, bad_co1, biases2, bad_co2, launch_time, finish_time)