Exemple #1
0
def save_to_db(opts, count, multiples, reads, mreads, n_valid_pairs, masked,
               hist_path, median, max_f, mad, launch_time, finish_time):
    con = lite.connect(path.join(opts.workdir, 'trace.db'))
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='INTERSECTION_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
        create table INTERSECTION_OUTPUTs
           (Id integer primary key,
            PATHid int,
            Total_interactions int,
            Multiple_interactions text,
            Median_fragment_length,
            MAD_fragment_length,
            Max_fragment_length,
            unique (PATHid))""")
            cur.execute("""
        create table FILTER_OUTPUTs
           (Id integer primary key,
            PATHid int,
            Name text,
            Count int,
            JOBid int,
            unique (PATHid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True)
            cur.execute("""
    insert into JOBs
     (Id  , Parameters, Launch_time, Finish_time,    Type, Parameters_md5)
    values
     (NULL,       '%s',        '%s',        '%s', 'Filter',           '%s')
     """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
            time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass

        jobid = get_jobid(cur)

        add_path(cur, mreads, '2D_BED', jobid, opts.workdir)
        add_path(cur, reads, '2D_BED', jobid, opts.workdir)
        add_path(cur, hist_path, 'FIGURE', jobid, opts.workdir)
        try:
            cur.execute("""
            insert into INTERSECTION_OUTPUTs
            (Id  , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length)
            values
            (NULL,    %d,                  %d,                  '%s',                     %d,                  %d,                  %d)
            """ % (get_path_id(cur, mreads, opts.workdir), count, ' '.join(
                ['%s:%d' % (k, multiples[k])
                 for k in sorted(multiples)]), median, mad, max_f))
        except lite.IntegrityError:
            print 'WARNING: already filtered'
            if opts.force:
                cur.execute(
                    'delete from INTERSECTION_OUTPUTs where PATHid = %d' %
                    (get_path_id(cur, mreads, opts.workdir)))
                cur.execute("""
                insert into INTERSECTION_OUTPUTs
                (Id  , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length)
                values
                (NULL,    %d,                  %d,                  '%s',                     %d,                  %d,                  %d)
                """ % (get_path_id(cur, mreads, opts.workdir), count, ' '.join(
                    ['%s:%d' % (k, multiples[k])
                     for k in sorted(multiples)]), median, mad, max_f))
        for f in masked:
            add_path(cur, masked[f]['fnam'], 'FILTER', jobid, opts.workdir)
            try:
                cur.execute("""
            insert into FILTER_OUTPUTs
            (Id  , PATHid, Name, Count, JOBid)
            values
            (NULL,    %d,     '%s',      '%s', %d)
                """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir),
                       masked[f]['name'], masked[f]['reads'], jobid))
            except lite.IntegrityError:
                print 'WARNING: already filtered'
                if opts.force:
                    cur.execute(
                        'delete from FILTER_OUTPUTs where PATHid = %d' %
                        (get_path_id(cur, masked[f]['fnam'], opts.workdir)))
                    cur.execute("""
                insert into FILTER_OUTPUTs
                (Id  , PATHid, Name, Count, JOBid)
                values
                (NULL,    %d,     '%s',      '%s', %d)
                    """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir),
                           masked[f]['name'], masked[f]['reads'], jobid))
        try:
            cur.execute("""
        insert into FILTER_OUTPUTs
        (Id  , PATHid, Name, Count, JOBid)
        values
        (NULL,    %d,     '%s',      '%s', %d)
            """ % (get_path_id(cur, mreads, opts.workdir), 'valid-pairs',
                   n_valid_pairs, jobid))
        except lite.IntegrityError:
            print 'WARNING: already filtered'
            if opts.force:
                cur.execute('delete from FILTER_OUTPUTs where PATHid = %d' %
                            (get_path_id(cur, mreads, opts.workdir)))
                cur.execute("""
                insert into FILTER_OUTPUTs
                (Id  , PATHid, Name, Count, JOBid)
                values
                (NULL,    %d,     '%s',      '%s', %d)
                """ % (get_path_id(cur, mreads, opts.workdir), 'valid-pairs',
                       n_valid_pairs, jobid))
        print_db(cur, 'MAPPED_INPUTs')
        print_db(cur, 'PATHs')
        print_db(cur, 'MAPPED_OUTPUTs')
        print_db(cur, 'PARSED_OUTPUTs')
        print_db(cur, 'JOBs')
        print_db(cur, 'INTERSECTION_OUTPUTs')
        print_db(cur, 'FILTER_OUTPUTs')
Exemple #2
0
def save_to_db(opts, cmp_result, tad_result, reso, inputs,
               launch_time, finish_time):
    if 'tmp' in opts and opts.tmp:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'wa').close()
        # tmp file
        dbfile = opts.tmp
        copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='SEGMENT_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
            create table SEGMENT_OUTPUTs
               (Id integer primary key,
                JOBid int,
                Inputs text,
                TADs int,
                Compartments int,
                Chromosome text,
                Resolution int)""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True )
            cur.execute("""
            insert into JOBs
            (Id  , Parameters, Launch_time, Finish_time, Type , Parameters_md5)
            values
            (NULL,       '%s',        '%s',        '%s', 'Segment',           '%s')
            """ % (parameters,
                   time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
                   time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        for crm in max(cmp_result.keys(), tad_result.keys(),
                       key=lambda x: len(x)):
            if crm in cmp_result:
                add_path(cur, cmp_result[crm]['path'], 'COMPARTMENT',
                         jobid, opts.workdir)
            if crm in tad_result:
                add_path(cur, tad_result[crm]['path'], 'TAD', jobid, opts.workdir)
            cur.execute("""
            insert into SEGMENT_OUTPUTs
            (Id  , JOBid, Inputs, TADs, Compartments, Chromosome, Resolution)
            values
            (NULL,    %d,   '%s',   %d,           %d,       '%s',         %d)
            """ % (jobid,
                   ','.join([str(i) for i in inputs]),
                   tad_result[crm]['num'] if crm in tad_result else 0,
                   cmp_result[crm]['num'] if crm in cmp_result else 0,
                   crm,
                   reso))
            print_db(cur, 'PATHs')
            print_db(cur, 'JOBs')
            print_db(cur, 'SEGMENT_OUTPUTs')
    if 'tmp' in opts and opts.tmp:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
        # release lock
        remove(path.join(opts.workdir, '__lock_db'))
Exemple #3
0
def save_to_db(opts, counts, multis, f_names1, f_names2, out_file1, out_file2,
               launch_time, finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try:  # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='PARSED_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
        create table MAPPED_OUTPUTs
           (Id integer primary key,
            PATHid int,
            BEDid int,
            Uniquely_mapped int,
            unique (PATHid, BEDid))""")
            cur.execute("""
        create table PARSED_OUTPUTs
           (Id integer primary key,
            PATHid int,
            Total_interactions int,
            Multiples text,
            unique (PATHid))""")
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='JOBs'""")
        if not cur.fetchall():
            cur.execute("""
            create table PATHs
               (Id integer primary key,
                JOBid int, Path text, Type text,
                unique (Path))""")
            cur.execute("""
            create table JOBs
               (Id integer primary key,
                Parameters text,
                Launch_time text,
                Finish_time text,
                Type text,
                Parameters_md5 text,
                unique (Parameters_md5))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True)
            cur.execute("""
    insert into JOBs
     (Id  , Parameters, Launch_time, Finish_time,    Type, Parameters_md5)
    values
     (NULL,       '%s',        '%s',        '%s', 'Parse',           '%s')
     """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
            time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        add_path(cur, out_file1, 'BED', jobid, opts.workdir)
        for genome in opts.genome:
            add_path(cur, genome, 'FASTA', jobid, opts.workdir)
        if out_file2:
            add_path(cur, out_file2, 'BED', jobid, opts.workdir)
        fnames = f_names1, f_names2
        outfiles = out_file1, out_file2
        for count in counts:
            try:
                sum_reads = 0
                for i, item in enumerate(counts[count]):
                    add_path(cur, fnames[count][i], 'MAPPED_FASTQ', jobid,
                             opts.workdir)
                    cur.execute("""
                    insert into MAPPED_OUTPUTs
                    (Id  , PATHid, BEDid, Uniquely_mapped)
                    values
                    (NULL,    %d,     %d,      %d)
                    """ % (get_path_id(cur, fnames[count][i], opts.workdir),
                           get_path_id(cur, outfiles[count],
                                       opts.workdir), counts[count][item]))
                    sum_reads += counts[count][item]
            except lite.IntegrityError:
                print('WARNING: already parsed (MAPPED_OUTPUTs)')
            try:
                cur.execute("""
                insert into PARSED_OUTPUTs
                (Id  , PATHid, Total_interactions, Multiples)
                values
                (NULL,     %d,      %d,        '%s')
                """ % (get_path_id(
                    cur, outfiles[count], opts.workdir), sum_reads, ','.join([
                        ':'.join(map(str, (n, multis[count][n])))
                        for n in multis[count] if n
                    ])))
            except lite.IntegrityError:
                print('WARNING: already parsed (PARSED_OUTPUTs)')

        print_db(cur, 'PATHs')
        print_db(cur, 'MAPPED_OUTPUTs')
        print_db(cur, 'PARSED_OUTPUTs')
        print_db(cur, 'JOBs')
    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #4
0
def save_to_db(opts, counts, multis, f_names1, f_names2, out_file1, out_file2,
               launch_time, finish_time):
    con = lite.connect(path.join(opts.workdir, 'trace.db'))
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='PARSED_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
        create table MAPPED_OUTPUTs
           (Id integer primary key,
            PATHid int,
            BEDid int,
            Uniquely_mapped int,
            unique (PATHid, BEDid))""")
            cur.execute("""
        create table PARSED_OUTPUTs
           (Id integer primary key,
            PATHid int,
            Total_interactions int,
            Multiples int,
            unique (PATHid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True )
            cur.execute("""
    insert into JOBs
     (Id  , Parameters, Launch_time, Finish_time,    Type, Parameters_md5)
    values
     (NULL,       '%s',        '%s',        '%s', 'Parse',           '%s')
     """ % (parameters,
            time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
            time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        add_path(cur, out_file1, 'BED', jobid, opts.workdir)
        for genome in opts.genome:
            add_path(cur, genome, 'FASTA', jobid, opts.workdir)
        if out_file2:
            add_path(cur, out_file2, 'BED', jobid, opts.workdir)
        fnames = f_names1, f_names2
        outfiles = out_file1, out_file2
        for count in counts:
            try:
                sum_reads = 0
                for i, item in enumerate(counts[count]):
                    cur.execute("""
                    insert into MAPPED_OUTPUTs
                    (Id  , PATHid, BEDid, Uniquely_mapped)
                    values
                    (NULL,    %d,     %d,      %d)
                    """ % (get_path_id(cur, fnames[count][i], opts.workdir),
                           get_path_id(cur, outfiles[count], opts.workdir),
                           counts[count][item]))
                    sum_reads += counts[count][item]
            except lite.IntegrityError:
                print 'WARNING: already parsed (MAPPED_OUTPUTs)'
            try:
                cur.execute("""
                insert into PARSED_OUTPUTs
                (Id  , PATHid, Total_interactions, Multiples)
                values
                (NULL,     %d,      %d,        %d)
                """ % (get_path_id(cur, outfiles[count], opts.workdir),
                       sum_reads, multis[count]))
            except lite.IntegrityError:
                print 'WARNING: already parsed (PARSED_OUTPUTs)'
        print_db(cur, 'MAPPED_INPUTs')
        print_db(cur, 'PATHs')
        print_db(cur, 'MAPPED_OUTPUTs')
        print_db(cur, 'PARSED_OUTPUTs')
        print_db(cur, 'JOBs')
Exemple #5
0
def save_to_db(opts, count, multiples, reads, mreads, n_valid_pairs, masked,
               outbam, hist_path, median, max_f, mad, launch_time,
               finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try:  # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='INTERSECTION_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
        create table INTERSECTION_OUTPUTs
           (Id integer primary key,
            PATHid int,
            Total_interactions int,
            Multiple_interactions text,
            Median_fragment_length,
            MAD_fragment_length,
            Max_fragment_length,
            unique (PATHid))""")
            cur.execute("""
        create table FILTER_OUTPUTs
           (Id integer primary key,
            PATHid int,
            Name text,
            Count int,
            Applied text,
            JOBid int,
            unique (PATHid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True)
            cur.execute("""
    insert into JOBs
     (Id  , Parameters, Launch_time, Finish_time,    Type, Parameters_md5)
    values
     (NULL,       '%s',        '%s',        '%s', 'Filter',           '%s')
     """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
            time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass

        jobid = get_jobid(cur)

        add_path(cur, mreads, '2D_BED', jobid, opts.workdir)
        add_path(cur, outbam, 'HIC_BAM', jobid, opts.workdir)
        add_path(cur, outbam + '.bai', 'HIC_BAI', jobid, opts.workdir)
        add_path(cur, reads, '2D_BED', jobid, opts.workdir)
        add_path(cur, hist_path, 'FIGURE', jobid, opts.workdir)
        try:
            real_count = count
            for mult in multiples:
                real_count = real_count - multiples[mult] + multiples[mult] * (
                    (mult * (mult + 1)) // 2)
            cur.execute(
                """
            insert into INTERSECTION_OUTPUTs
            (Id  , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length)
            values
            (NULL,    %d,                  %d,                  '%s',                     %d,                  %d,                  %d)
            """ %
                (get_path_id(cur, mreads, opts.workdir), real_count, ' '.join(
                    ['%s:%d' % (k, multiples[k])
                     for k in sorted(multiples)]), median, mad, max_f))
        except lite.IntegrityError:
            print('WARNING: already filtered')
            if opts.force:
                cur.execute(
                    'delete from INTERSECTION_OUTPUTs where PATHid = %d' %
                    (get_path_id(cur, mreads, opts.workdir)))
                cur.execute("""
                insert into INTERSECTION_OUTPUTs
                (Id  , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length)
                values
                (NULL,    %d,                  %d,                  '%s',                     %d,                  %d,                  %d)
                """ % (get_path_id(cur, mreads, opts.workdir), count, ' '.join(
                    ['%s:%d' % (k, multiples[k])
                     for k in sorted(multiples)]), median, mad, max_f))
        for nf, f in enumerate(masked, 1):
            try:
                add_path(cur, masked[f]['fnam'], 'FILTER', jobid, opts.workdir)
            except KeyError:
                continue
            try:
                cur.execute("""
            insert into FILTER_OUTPUTs
                (Id  , PATHid, Name, Count, Applied, JOBid)
            values
                (NULL,     %d, '%s',  '%s',    '%s',    %d)
                """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir),
                       masked[f]['name'], masked[f]['reads'],
                       'True' if nf in opts.apply else 'False', jobid))
            except lite.IntegrityError:
                print('WARNING: already filtered')
                if opts.force:
                    cur.execute(
                        'delete from FILTER_OUTPUTs where PATHid = %d' %
                        (get_path_id(cur, masked[f]['fnam'], opts.workdir)))
                    cur.execute("""
                insert into FILTER_OUTPUTs
                    (Id  , PATHid, Name, Count, Applied, JOBid)
                values
                    (NULL,     %d, '%s',  '%s',    '%s',    %d)
                    """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir),
                           masked[f]['name'], masked[f]['reads'],
                           'True' if nf in opts.apply else 'False', jobid))
        try:
            cur.execute("""
        insert into FILTER_OUTPUTs
            (Id  , PATHid, Name, Count, Applied, JOBid)
        values
            (NULL,     %d, '%s',  '%s',    '%s',    %d)
            """ % (get_path_id(cur, mreads, opts.workdir), 'valid-pairs',
                   n_valid_pairs, '', jobid))
        except lite.IntegrityError:
            print('WARNING: already filtered')
            if opts.force:
                cur.execute('delete from FILTER_OUTPUTs where PATHid = %d' %
                            (get_path_id(cur, mreads, opts.workdir)))
                cur.execute("""
                insert into FILTER_OUTPUTs
                (Id  , PATHid, Name, Count, Applied, JOBid)
                values
                (NULL,     %d, '%s',  '%s',    '%s',    %d)
                """ % (get_path_id(cur, mreads, opts.workdir), 'valid-pairs',
                       n_valid_pairs, '', jobid))
        print_db(cur, 'PATHs')
        if not opts.fast_fragment:
            print_db(cur, 'MAPPED_OUTPUTs')
            print_db(cur, 'PARSED_OUTPUTs')
        print_db(cur, 'JOBs')
        print_db(cur, 'INTERSECTION_OUTPUTs')
        print_db(cur, 'FILTER_OUTPUTs')
    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #6
0
def save_to_db(opts, mreads1, mreads2, decay_corr_dat, decay_corr_fig,
               nbad_columns, ncolumns, scc, std, reprod,
               eigen_corr_dat, eigen_corr_fig, outbed, corr, eig_corr,
               biases1, biases2, masked1, masked2, launch_time, finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try: # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='MERGE_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
            create table PATHs
               (Id integer primary key,
                JOBid int, Path text, Type text,
                unique (Path))""")
            cur.execute("""
            create table JOBs
               (Id integer primary key,
                Parameters text,
                Launch_time text,
                Finish_time text,
                Type text,
                Parameters_md5 text,
                unique (Parameters_md5))""")
            cur.execute("""
            create table FILTER_OUTPUTs
               (Id integer primary key,
                PATHid int,
                Name text,
                Count int,
                JOBid int,
                unique (PATHid))""")
            cur.execute("""
            create table MERGE_OUTPUTs
               (Id integer primary key,
                JOBid int,
                Wrkd1Path int,
                Wrkd2Path int,
                Bed1Path int,
                Bed2Path int,
                MergePath int,
                unique (JOBid))""")
            cur.execute("""
            create table MERGE_STATs
               (Id integer primary key,
                JOBid int,
                Inputs text,
                decay_corr text,
                eigen_corr text,
                reprod real,
                scc real,
                std_scc real,
                N_columns int,
                N_filtered int,
                Resolution int,
                bias1Path int,
                bias2Path int,
                unique (JOBid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True )
            cur.execute("""
            insert into JOBs
            (Id  , Parameters, Launch_time, Finish_time, Type   , Parameters_md5)
            values
            (NULL,       '%s',        '%s',        '%s', 'Merge',           '%s')
            """ % (parameters,
                   time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
                   time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass

        jobid = get_jobid(cur)
        add_path(cur, decay_corr_dat, 'CORR'      , jobid, opts.workdir)
        add_path(cur, decay_corr_fig, 'FIGURE'    , jobid, opts.workdir)
        add_path(cur, eigen_corr_dat, 'CORR'      , jobid, opts.workdir)
        add_path(cur, eigen_corr_fig, 'FIGURE'    , jobid, opts.workdir)

        add_path(cur, opts.workdir , 'WORKDIR'    , jobid)
        add_path(cur, opts.workdir1, 'WORKDIR1'   , jobid, opts.workdir)
        add_path(cur, opts.workdir2, 'WORKDIR2'   , jobid, opts.workdir)
        add_path(cur, mreads1      , 'EXT_HIC_BAM', jobid, opts.workdir)
        add_path(cur, mreads2      , 'EXT_HIC_BAM', jobid, opts.workdir)
        if not opts.skip_merge:
            add_path(cur, outbed   , 'HIC_BAM'    , jobid, opts.workdir)

        if opts.norm:
            add_path(cur, biases1      , 'BIASES'     , jobid, opts.workdir)
            add_path(cur, biases2      , 'BIASES'     , jobid, opts.workdir)

            biasid1 = get_path_id(cur, biases1, opts.workdir)
            biasid2 = get_path_id(cur, biases2, opts.workdir)
        else:
            biasid1 = 0
            biasid2 = 0

        cur.execute("select id from paths where path = '%s'" % (
            path.relpath(mreads1, opts.workdir)))
        bed1 = cur.fetchall()[0][0]
        if opts.workdir1:
            cur.execute("select id from paths where path = '%s'" % (
                path.relpath(opts.workdir1, opts.workdir)))
            w1path = cur.fetchall()[0][0]
        else:
            w1path = 0
        cur.execute("select id from paths where path = '%s'" % (
            path.relpath(mreads2, opts.workdir)))
        bed2 = cur.fetchall()[0][0]
        if opts.workdir2:
            cur.execute("select id from paths where path = '%s'" % (
                path.relpath(opts.workdir2, opts.workdir)))
            w2path = cur.fetchall()[0][0]
        else:
            w2path = 0
        if not opts.skip_merge:
            cur.execute("select id from paths where path = '%s'" % (
                path.relpath(outbed, opts.workdir)))
            outbedid = cur.fetchall()[0][0]
        if not opts.skip_comparison:
            decay_corr = '-'.join(['%.1f' % (v)
                                   for v in corr[:10:2]]).replace('0.', '.')
            eigen_corr = '-'.join(['%.2f' % (max(v))
                                   for v in eig_corr[:4]]).replace('0.', '.')
        else:
            decay_corr = eigen_corr = None
        if not opts.skip_merge:
            cur.execute("""
            insert into MERGE_OUTPUTs
            (Id  , JOBid, Wrkd1Path, Wrkd2Path, Bed1Path, Bed2Path, MergePath)
            values
            (NULL,    %d,        %d,        %d,       %d,       %d,        %d)
            """ % (jobid,    w1path,    w2path,     bed1,     bed2,  outbedid))

        if not opts.skip_comparison:
            cur.execute("""
            insert into MERGE_STATs
            (Id  , JOBid, N_columns,   N_filtered, Resolution, reprod, scc, std_scc, decay_corr, eigen_corr, bias1Path, bias2Path)
            values
            (NULL,    %d,        %d,           %d,         %d,     %f,  %f,      %f,       '%s',       '%s',        %d,        %d)
            """ % (jobid,  ncolumns, nbad_columns, opts.reso , reprod, scc,     std, decay_corr, eigen_corr,   biasid1,   biasid2))

        if opts.workdir1:
            if 'tmpdb' in opts and opts.tmpdb:
                # tmp file
                dbfile1 = opts.tmpdb1
                try: # to copy in case read1 was already mapped for example
                    copyfile(path.join(opts.workdir1, 'trace.db'), dbfile1)
                except IOError:
                    pass
            else:
                dbfile1 = path.join(opts.workdir1, 'trace.db')
            tmpcon = lite.connect(dbfile1)
            with tmpcon:
                tmpcur = tmpcon.cursor()
                tmpcur.execute("select Name, PATHid, Count from filter_outputs")
                for name, pathid, count in tmpcur.fetchall():
                    res = tmpcur.execute("select Path from PATHs where Id = %d" % (pathid))
                    tmppath = res.fetchall()[0][0]
                    masked1[name] = {'path': tmppath, 'count': count}
            if 'tmpdb' in opts and opts.tmpdb:
                remove(dbfile1)
        if opts.workdir2:
            if 'tmpdb' in opts and opts.tmpdb:
                # tmp file
                dbfile2 = opts.tmpdb2
                try: # to copy in case read2 was already mapped for example
                    copyfile(path.join(opts.workdir2, 'trace.db'), dbfile2)
                except IOError:
                    pass
            else:
                dbfile2 = path.join(opts.workdir2, 'trace.db')
            tmpcon = lite.connect(dbfile2)
            with tmpcon:
                tmpcur = tmpcon.cursor()
                tmpcur.execute("select Name, PATHid, Count from filter_outputs")
                for name, pathid, count in tmpcur.fetchall():
                    res = tmpcur.execute("select Path from PATHs where Id = %d" % (pathid))
                    tmppath = res.fetchall()[0][0]
                    masked2[name] = {'path': tmppath, 'count': count}
            if 'tmpdb' in opts and opts.tmpdb:
                remove(dbfile2)

        for f in masked1:
            if f  != 'valid-pairs':
                outmask = path.join(opts.workdir, '03_filtered_reads',
                                    'all_r1-r2_intersection_%s.tsv_%s.tsv' % (
                                        param_hash, f))
                out = open(outmask, 'w')
                try:
                    fh = magic_open(path.join(opts.workdir1, masked1[f]['path']))
                except FileNotFoundError:
                    fh = magic_open(path.join(opts.workdir1, masked1[f]['path'] + '.gz'))
                for line in fh:
                    out.write(line)
                try:
                    fh = magic_open(path.join(opts.workdir2, masked2[f]['path']))
                except FileNotFoundError:
                    fh = magic_open(path.join(opts.workdir2, masked2[f]['path'] + '.gz'))
                for line in fh:
                    out.write(line)
                add_path(cur, outmask, 'FILTER', jobid, opts.workdir)
            else:
                if opts.skip_merge:
                    outmask = 'NA'
                else:
                    outmask = outbed
            try:
                path_id = get_path_id(cur, outmask, opts.workdir)
            except IndexError:
                path_id = -1
            cur.execute("""
            insert into FILTER_OUTPUTs
            (Id  , PATHid, Name, Count, JOBid)
            values
            (NULL,     %d, '%s',  '%s',    %d)
            """ % (path_id, f, masked1[f]['count'] + masked2[f]['count'], jobid))

        print_db(cur, 'PATHs')
        print_db(cur, 'JOBs')
        print_db(cur, 'MERGE_OUTPUTs')
        print_db(cur, 'MERGE_STATs')
        print_db(cur, 'FILTER_OUTPUTs')

    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #7
0
def save_to_db(opts, bias_file, mreads, bad_col_image, nbad_columns, ncolumns,
               raw_cisprc, norm_cisprc, inter_vs_gcoord, a2, bam_filter,
               launch_time, finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try:  # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='JOBs'""")
        if not cur.fetchall():
            cur.execute("""
            create table PATHs
               (Id integer primary key,
                JOBid int, Path text, Type text,
                unique (Path))""")
            cur.execute("""
            create table JOBs
               (Id integer primary key,
                Parameters text,
                Launch_time text,
                Finish_time text,
                Type text,
                Parameters_md5 text,
                unique (Parameters_md5))""")
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='NORMALIZE_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
            create table NORMALIZE_OUTPUTs
               (Id integer primary key,
                JOBid int,
                Input int,
                N_columns int,
                N_filtered int,
                BAM_filter int,
                Cis_percentage_Raw real,
                Cis_percentage_Norm real,
                Slope_700kb_10Mb real,
                Resolution int,
                Normalization text,
                Factor int,
                unique (JOBid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True)
            cur.execute(
                """
            insert into JOBs
            (Id  , Parameters, Launch_time, Finish_time, Type , Parameters_md5)
            values
            (NULL,       '%s',        '%s',        '%s', 'Normalize',           '%s')
            """ %
                (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
                 time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        add_path(cur, bias_file, 'BIASES', jobid, opts.workdir)
        add_path(cur, bad_col_image, 'FIGURE', jobid, opts.workdir)
        add_path(cur, inter_vs_gcoord, 'FIGURE', jobid, opts.workdir)
        if opts.bam:
            add_path(cur, path.realpath(opts.bam), 'EXT_2D_BAM', jobid,
                     opts.workdir)
        if opts.mappability:
            add_path(cur, path.realpath(opts.mappability), 'EXT_MAPPABILITY',
                     jobid, opts.workdir)
        if opts.fasta:
            add_path(cur, path.realpath(opts.fasta), 'EXT_FASTA', jobid,
                     opts.workdir)
        # get pathid of input
        cur.execute("select id from paths where path = '%s'" %
                    (path.relpath(mreads, opts.workdir)))
        input_bed = cur.fetchall()[0][0]

        a2 = 0 if isnan(a2) else a2
        try:
            cur.execute("""
            insert into NORMALIZE_OUTPUTs
            (Id  , JOBid,     Input, N_columns,   N_filtered, BAM_filter, Cis_percentage_Raw, Cis_percentage_Norm, Slope_700kb_10Mb,   Resolution,      Normalization,      Factor)
            values
            (NULL,    %d,        %d,        %d,           %d,         %d,                 %f,                  %f,               %f,           %d,               '%s',          %f)
            """ % (jobid, input_bed, ncolumns, nbad_columns, bam_filter,
                   100 * raw_cisprc, 100 * norm_cisprc, a2, opts.reso,
                   opts.normalization, opts.factor))
        except lite.OperationalError:
            try:
                cur.execute("""
                insert into NORMALIZE_OUTPUTs
                (Id  , JOBid,     Input, N_columns,   N_filtered, BAM_filter,      Cis_percentage_Raw, Cis_percentage_Norm, Slope_700kb_10Mb,   Resolution,     Normalization,       Factor)
                values
                (NULL,    %d,        %d,        %d,           %d,         %d,                      %f,                  %f,               %f,           %d,               '%s',          %f)
                """ % (jobid, input_bed, ncolumns, nbad_columns, bam_filter,
                       100 * raw_cisprc, 100 * norm_cisprc, a2, opts.reso,
                       opts.normalization, opts.factor))
            except lite.OperationalError:
                print 'WANRING: Normalized table not written!!!'

        print_db(cur, 'PATHs')
        print_db(cur, 'JOBs')
        try:
            print_db(cur, 'FILTER_OUTPUTs')
            print_db(cur, 'INTERSECTION_OUTPUTs')
            print_db(cur, 'MAPPED_INPUTs')
            print_db(cur, 'MAPPED_OUTPUTs')
            print_db(cur, 'PARSED_OUTPUTs')
            print_db(cur, 'FILTER_OUTPUTs')
        except lite.OperationalError:
            pass
        print_db(cur, 'NORMALIZE_OUTPUTs')
    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #8
0
def save_to_db(opts, counts, multis, f_names1, f_names2, out_file1, out_file2,
               launch_time, finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try: # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='PARSED_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
        create table MAPPED_OUTPUTs
           (Id integer primary key,
            PATHid int,
            BEDid int,
            Uniquely_mapped int,
            unique (PATHid, BEDid))""")
            cur.execute("""
        create table PARSED_OUTPUTs
           (Id integer primary key,
            PATHid int,
            Total_interactions int,
            Multiples text,
            unique (PATHid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True )
            cur.execute("""
    insert into JOBs
     (Id  , Parameters, Launch_time, Finish_time,    Type, Parameters_md5)
    values
     (NULL,       '%s',        '%s',        '%s', 'Parse',           '%s')
     """ % (parameters,
            time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
            time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        add_path(cur, out_file1, 'BED', jobid, opts.workdir)
        for genome in opts.genome:
            add_path(cur, genome, 'FASTA', jobid, opts.workdir)
        if out_file2:
            add_path(cur, out_file2, 'BED', jobid, opts.workdir)
        fnames = f_names1, f_names2
        outfiles = out_file1, out_file2
        for count in counts:
            try:
                sum_reads = 0
                for i, item in enumerate(counts[count]):
                    cur.execute("""
                    insert into MAPPED_OUTPUTs
                    (Id  , PATHid, BEDid, Uniquely_mapped)
                    values
                    (NULL,    %d,     %d,      %d)
                    """ % (get_path_id(cur, fnames[count][i], opts.workdir),
                           get_path_id(cur, outfiles[count], opts.workdir),
                           counts[count][item]))
                    sum_reads += counts[count][item]
            except lite.IntegrityError:
                print 'WARNING: already parsed (MAPPED_OUTPUTs)'
            try:
                cur.execute("""
                insert into PARSED_OUTPUTs
                (Id  , PATHid, Total_interactions, Multiples)
                values
                (NULL,     %d,      %d,        '%s')
                """ % (get_path_id(cur, outfiles[count], opts.workdir),
                       sum_reads, ','.join([':'.join(map(str, (n, multis[count][n])))
                                            for n in multis[count] if n])))
            except lite.IntegrityError:
                print 'WARNING: already parsed (PARSED_OUTPUTs)'

        print_db(cur, 'MAPPED_INPUTs')
        print_db(cur, 'PATHs')
        print_db(cur, 'MAPPED_OUTPUTs')
        print_db(cur, 'PARSED_OUTPUTs')
        print_db(cur, 'JOBs')
    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #9
0
def save_to_db(opts, cmp_result, tad_result, reso, inputs,
               richA_stats, firsts, param_hash,
               launch_time, finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='JOBs'""")
        if not cur.fetchall():
            cur.execute("""
            create table PATHs
               (Id integer primary key,
                JOBid int, Path text, Type text,
                unique (Path))""")
            cur.execute("""
            create table JOBs
               (Id integer primary key,
                Parameters text,
                Launch_time text,
                Finish_time text,
                Type text,
                Parameters_md5 text,
                unique (Parameters_md5))""")
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='SEGMENT_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
            create table SEGMENT_OUTPUTs
               (Id integer primary key,
                JOBid int,
                Inputs text,
                TADs int,
                Compartments int,
                richA_corr real,
                EV_index int,
                EValue real,
                Chromosome text,
                Resolution int)""")
        try:
            parameters = digest_parameters(opts, get_md5=False, extra=['fasta'])
            cur.execute("""
            insert into JOBs
            (Id  , Parameters, Launch_time, Finish_time, Type , Parameters_md5)
            values
            (NULL,       '%s',        '%s',        '%s', 'Segment',       '%s')
            """ % (parameters,
                   time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
                   time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        for ncrm, crm in enumerate(max(cmp_result.keys(), tad_result.keys(), key=len)):
            if crm in cmp_result:
                add_path(cur, cmp_result[crm]['path_cmprt1'], 'COMPARTMENT',
                         jobid, opts.workdir)
                add_path(cur, cmp_result[crm]['path_cmprt2'], 'COMPARTMENT',
                         jobid, opts.workdir)
                add_path(cur, cmp_result[crm]['image_cmprt'], 'FIGURE',
                         jobid, opts.workdir)
                if opts.savecorr:
                    add_path(cur, cmp_result[crm]['path_cormat'],
                             'CROSS_CORR_MAT', jobid, opts.workdir)
            if crm in tad_result:
                add_path(cur, tad_result[crm]['path'], 'TAD', jobid, opts.workdir)
            if opts.rich_in_A:
                add_path(cur, opts.rich_in_A, 'BED', jobid, opts.workdir)

            if crm in firsts:
                evalue = firsts[crm][0][(opts.ev_index[ncrm] - 1) if opts.ev_index else 0]
                eindex = opts.ev_index[ncrm] if opts.ev_index else 1
            else:
                evalue = 'NULL'
                eindex = 'NULL'
            try:
                cur.execute("""
                insert into SEGMENT_OUTPUTs
                (Id  , JOBid, Inputs, TADs, Compartments, richA_corr, EV_index, EValue, Chromosome, Resolution)
                values
                (NULL,    %d,   '%s',   %s,           %s,         %s,       %s,     %s,       '%s',         %d)
                """ % (jobid,
                       ','.join([str(i) for i in inputs]),
                       tad_result[crm]['num'] if crm in tad_result else 'NULL',
                       cmp_result[crm]['num'] if crm in cmp_result else 'NULL',
                       (richA_stats[crm] if crm in richA_stats
                        and richA_stats[crm] is not None else 'NULL'),
                       eindex, evalue, crm, reso))
            except lite.OperationalError:  # TODO: remove this
                print_exc()
                try:
                    cur.execute("alter table SEGMENT_OUTPUTs add column 'richA_corr' 'real'")
                except:
                    pass
                try:
                    cur.execute("alter table SEGMENT_OUTPUTs add column 'EValue' 'real'")
                except:
                    pass
                try:
                    cur.execute("alter table SEGMENT_OUTPUTs add column 'EV_index', 'int'")
                except:
                    pass
                cur.execute("""
                insert into SEGMENT_OUTPUTs
                (Id  , JOBid, Inputs, TADs, Compartments, richA_corr, EV_index, EValue, Chromosome, Resolution)
                values
                (NULL,    %d,   '%s',   %d,           %d,         %s,       %s,     %s,       '%s',         %d)
                """ % (jobid,
                       ','.join([str(i) for i in inputs]),
                       tad_result[crm]['num'] if crm in tad_result else 0,
                       cmp_result[crm]['num'] if crm in cmp_result else 0,
                       (richA_stats[crm] if crm in richA_stats
                        and richA_stats[crm] is not None else 'NULL'),
                       eindex, evalue, crm, reso))
        print_db(cur, 'PATHs')
        print_db(cur, 'JOBs')
        print_db(cur, 'SEGMENT_OUTPUTs')
    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
        # release lock
        remove(path.join(opts.workdir, '__lock_db'))
Exemple #10
0
def save_to_db(opts, mreads1, mreads2, decay_corr_dat, decay_corr_fig,
               nbad_columns, ncolumns, scc, std, reprod,
               eigen_corr_dat, eigen_corr_fig, outbed, corr, eig_corr,
               biases1, biases2, launch_time, finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try: # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='MERGE_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
            create table PATHs
               (Id integer primary key,
                JOBid int, Path text, Type text,
                unique (Path))""")
            cur.execute("""
            create table JOBs
               (Id integer primary key,
                Parameters text,
                Launch_time text,
                Finish_time text,
                Type text,
                Parameters_md5 text,
                unique (Parameters_md5))""")
            cur.execute("""
            create table FILTER_OUTPUTs
               (Id integer primary key,
                PATHid int,
                Name text,
                Count int,
                JOBid int,
                unique (PATHid))""")
            cur.execute("""
            create table MERGE_OUTPUTs
               (Id integer primary key,
                JOBid int,
                Wrkd1Path int,
                Wrkd2Path int,
                Bed1Path int,
                Bed2Path int,
                MergePath int,
                unique (JOBid))""")
            cur.execute("""
            create table MERGE_STATs
               (Id integer primary key,
                JOBid int,
                Inputs text,
                decay_corr text,
                eigen_corr text,
                reprod real,
                scc real,
                std_scc real,
                N_columns int,
                N_filtered int,
                Resolution int,
                bias1Path int,
                bias2Path int,
                unique (JOBid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True )
            cur.execute("""
            insert into JOBs
            (Id  , Parameters, Launch_time, Finish_time, Type   , Parameters_md5)
            values
            (NULL,       '%s',        '%s',        '%s', 'Merge',           '%s')
            """ % (parameters,
                   time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
                   time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass

        jobid = get_jobid(cur)
        add_path(cur, decay_corr_dat, 'CORR'      , jobid, opts.workdir)
        add_path(cur, decay_corr_fig, 'FIGURE'    , jobid, opts.workdir)
        add_path(cur, eigen_corr_dat, 'CORR'      , jobid, opts.workdir)
        add_path(cur, eigen_corr_fig, 'FIGURE'    , jobid, opts.workdir)

        add_path(cur, opts.workdir , 'WORKDIR'    , jobid)
        add_path(cur, opts.workdir1, 'WORKDIR1'   , jobid, opts.workdir)
        add_path(cur, opts.workdir2, 'WORKDIR2'   , jobid, opts.workdir)
        add_path(cur, mreads1      , 'EXT_HIC_BAM', jobid, opts.workdir)
        add_path(cur, mreads2      , 'EXT_HIC_BAM', jobid, opts.workdir)
        add_path(cur, outbed       , 'HIC_BAM'    , jobid, opts.workdir)

        if opts.norm:
            add_path(cur, biases1      , 'BIASES'     , jobid, opts.workdir)
            add_path(cur, biases2      , 'BIASES'     , jobid, opts.workdir)

            biasid1 = get_path_id(cur, biases1, opts.workdir)
            biasid2 = get_path_id(cur, biases2, opts.workdir)
        else:
            biasid1 = 0
            biasid2 = 0

        cur.execute("select id from paths where path = '%s'" % (
            path.relpath(mreads1, opts.workdir)))
        bed1 = cur.fetchall()[0][0]
        if opts.workdir1:
            cur.execute("select id from paths where path = '%s'" % (
                path.relpath(opts.workdir1, opts.workdir)))
            w1path = cur.fetchall()[0][0]
        else:
            w1path = 0
        cur.execute("select id from paths where path = '%s'" % (
            path.relpath(mreads2, opts.workdir)))
        bed2 = cur.fetchall()[0][0]
        if opts.workdir2:
            cur.execute("select id from paths where path = '%s'" % (
                path.relpath(opts.workdir2, opts.workdir)))
            w2path = cur.fetchall()[0][0]
        else:
            w2path = 0
        cur.execute("select id from paths where path = '%s'" % (
            path.relpath(outbed, opts.workdir)))
        outbedid = cur.fetchall()[0][0]
        if not opts.skip_comparison:
            decay_corr = '-'.join(['%.1f' % (v)
                                   for v in corr[:10:2]]).replace('0.', '.')
            eigen_corr = '-'.join(['%.2f' % (max(v))
                                   for v in eig_corr[:4]]).replace('0.', '.')
        else:
            decay_corr = eigen_corr = None
        cur.execute("""
        insert into MERGE_OUTPUTs
        (Id  , JOBid, Wrkd1Path, Wrkd2Path, Bed1Path, Bed2Path, MergePath)
        values
        (NULL,    %d,        %d,        %d,       %d,       %d,        %d)
        """ % (jobid,    w1path,    w2path,     bed1,     bed2,  outbedid))

        if not opts.skip_comparison:
            cur.execute("""
            insert into MERGE_STATs
            (Id  , JOBid, N_columns,   N_filtered, Resolution, reprod, scc, std_scc, decay_corr, eigen_corr, bias1Path, bias2Path)
            values
            (NULL,    %d,        %d,           %d,         %d,     %f,  %f,      %f,       '%s',       '%s',        %d,        %d)
            """ % (jobid,  ncolumns, nbad_columns, opts.reso , reprod, scc,     std, decay_corr, eigen_corr,   biasid1,   biasid2))

        masked1 = {'valid-pairs': {'count': 0}}
        if opts.workdir1:
            if 'tmpdb' in opts and opts.tmpdb:
                # tmp file
                dbfile1 = opts.tmpdb1
                try: # to copy in case read1 was already mapped for example
                    copyfile(path.join(opts.workdir1, 'trace.db'), dbfile1)
                except IOError:
                    pass
            else:
                dbfile1 = path.join(opts.workdir1, 'trace.db')
            tmpcon = lite.connect(dbfile1)
            with tmpcon:
                tmpcur = tmpcon.cursor()
                tmpcur.execute("select Name, PATHid, Count from filter_outputs")
                for name, pathid, count in tmpcur.fetchall():
                    res = tmpcur.execute("select Path from PATHs where Id = %d" % (pathid))
                    tmppath = res.fetchall()[0][0]
                    masked1[name] = {'path': tmppath, 'count': count}
            if 'tmpdb' in opts and opts.tmpdb:
                remove(dbfile1)
        masked2 = {'valid-pairs': {'count': 0}}
        if opts.workdir2:
            if 'tmpdb' in opts and opts.tmpdb:
                # tmp file
                dbfile2 = opts.tmpdb2
                try: # to copy in case read2 was already mapped for example
                    copyfile(path.join(opts.workdir2, 'trace.db'), dbfile2)
                except IOError:
                    pass
            else:
                dbfile2 = path.join(opts.workdir2, 'trace.db')
            tmpcon = lite.connect(dbfile2)
            with tmpcon:
                tmpcur = tmpcon.cursor()
                tmpcur.execute("select Name, PATHid, Count from filter_outputs")
                for name, pathid, count in tmpcur.fetchall():
                    res = tmpcur.execute("select Path from PATHs where Id = %d" % (pathid))
                    tmppath = res.fetchall()[0][0]
                    masked2[name] = {'path': tmppath, 'count': count}
            if 'tmpdb' in opts and opts.tmpdb:
                remove(dbfile2)

        for f in masked1:
            if f  != 'valid-pairs':
                outmask = path.join(opts.workdir, '03_filtered_reads',
                                    'all_r1-r2_intersection_%s.tsv_%s.tsv' % (
                                        param_hash, f))
                out = open(outmask, 'w')
                for line in open(path.join(opts.workdir1, masked1[f]['path'])):
                    out.write(line)
                for line in open(path.join(opts.workdir2, masked2[f]['path'])):
                    out.write(line)
                add_path(cur, outmask, 'FILTER', jobid, opts.workdir)
            else:
                outmask = outbed

            cur.execute("""
            insert into FILTER_OUTPUTs
            (Id  , PATHid, Name, Count, JOBid)
            values
            (NULL,     %d, '%s',  '%s',    %d)
            """ % (get_path_id(cur, outmask, opts.workdir),
                   f, masked1[f]['count'] + masked2[f]['count'], jobid))

        print_db(cur, 'PATHs')
        print_db(cur, 'JOBs')
        print_db(cur, 'MERGE_OUTPUTs')
        print_db(cur, 'MERGE_STATs')
        print_db(cur, 'FILTER_OUTPUTs')

    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #11
0
def save_to_db(opts, bias_file, mreads, bad_col_image,
               nbad_columns, ncolumns, raw_cisprc, norm_cisprc,
               inter_vs_gcoord, a2, bam_filter,
               launch_time, finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try: # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='JOBs'""")
        if not cur.fetchall():
            cur.execute("""
            create table PATHs
               (Id integer primary key,
                JOBid int, Path text, Type text,
                unique (Path))""")
            cur.execute("""
            create table JOBs
               (Id integer primary key,
                Parameters text,
                Launch_time text,
                Finish_time text,
                Type text,
                Parameters_md5 text,
                unique (Parameters_md5))""")
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='NORMALIZE_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
            create table NORMALIZE_OUTPUTs
               (Id integer primary key,
                JOBid int,
                Input int,
                N_columns int,
                N_filtered int,
                BAM_filter int,
                Cis_percentage_Raw real,
                Cis_percentage_Norm real,
                Slope_700kb_10Mb real,
                Resolution int,
                Normalization text,
                Factor int,
                unique (JOBid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True )
            cur.execute("""
            insert into JOBs
            (Id  , Parameters, Launch_time, Finish_time, Type , Parameters_md5)
            values
            (NULL,       '%s',        '%s',        '%s', 'Normalize',           '%s')
            """ % (parameters,
                   time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
                   time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        add_path(cur, bias_file       , 'BIASES'     , jobid, opts.workdir)
        add_path(cur, bad_col_image   , 'FIGURE'     , jobid, opts.workdir)
        add_path(cur, inter_vs_gcoord , 'FIGURE'     , jobid, opts.workdir)
        if opts.bam:
            add_path(cur, path.realpath(opts.bam), 'EXT_2D_BAM' , jobid, opts.workdir)
        if opts.mappability:
            add_path(cur, path.realpath(opts.mappability), 'EXT_MAPPABILITY' , jobid, opts.workdir)
        if opts.fasta:
            add_path(cur, path.realpath(opts.fasta), 'EXT_FASTA' , jobid, opts.workdir)
        # get pathid of input
        cur.execute("select id from paths where path = '%s'" % (path.relpath(mreads, opts.workdir)))
        input_bed = cur.fetchall()[0][0]

        a2 = 0 if isnan(a2) else a2
        try:
            cur.execute("""
            insert into NORMALIZE_OUTPUTs
            (Id  , JOBid,     Input, N_columns,   N_filtered, BAM_filter, Cis_percentage_Raw, Cis_percentage_Norm, Slope_700kb_10Mb,   Resolution,      Normalization,      Factor)
            values
            (NULL,    %d,        %d,        %d,           %d,         %d,                 %f,                  %f,               %f,           %d,               '%s',          %f)
            """ % (jobid, input_bed,  ncolumns, nbad_columns, bam_filter,   100 * raw_cisprc,   100 * norm_cisprc,               a2,    opts.reso, opts.normalization, opts.factor))
        except lite.OperationalError:
            try:
                cur.execute("""
                insert into NORMALIZE_OUTPUTs
                (Id  , JOBid,     Input, N_columns,   N_filtered, BAM_filter,      Cis_percentage_Raw, Cis_percentage_Norm, Slope_700kb_10Mb,   Resolution,     Normalization,       Factor)
                values
                (NULL,    %d,        %d,        %d,           %d,         %d,                      %f,                  %f,               %f,           %d,               '%s',          %f)
                """ % (jobid, input_bed,  ncolumns, nbad_columns, bam_filter,        100 * raw_cisprc,   100 * norm_cisprc,               a2,    opts.reso, opts.normalization, opts.factor))
            except lite.OperationalError:
                print 'WANRING: Normalized table not written!!!'

        print_db(cur, 'PATHs')
        print_db(cur, 'JOBs')
        try:
            print_db(cur, 'FILTER_OUTPUTs')
            print_db(cur, 'INTERSECTION_OUTPUTs')
            print_db(cur, 'MAPPED_INPUTs')
            print_db(cur, 'MAPPED_OUTPUTs')
            print_db(cur, 'PARSED_OUTPUTs')
            print_db(cur, 'FILTER_OUTPUTs')
        except lite.OperationalError:
            pass
        print_db(cur, 'NORMALIZE_OUTPUTs')
    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #12
0
def save_to_db(opts, count, multiples, reads, mreads, n_valid_pairs, masked,
               outbam, hist_path, median, max_f, mad, launch_time, finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try: # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='INTERSECTION_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
        create table INTERSECTION_OUTPUTs
           (Id integer primary key,
            PATHid int,
            Total_interactions int,
            Multiple_interactions text,
            Median_fragment_length,
            MAD_fragment_length,
            Max_fragment_length,
            unique (PATHid))""")
            cur.execute("""
        create table FILTER_OUTPUTs
           (Id integer primary key,
            PATHid int,
            Name text,
            Count int,
            Applied text,
            JOBid int,
            unique (PATHid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True )
            cur.execute("""
    insert into JOBs
     (Id  , Parameters, Launch_time, Finish_time,    Type, Parameters_md5)
    values
     (NULL,       '%s',        '%s',        '%s', 'Filter',           '%s')
     """ % (parameters,
            time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
            time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass

        jobid = get_jobid(cur)

        add_path(cur, mreads, '2D_BED', jobid, opts.workdir)
        add_path(cur, outbam, 'HIC_BAM', jobid, opts.workdir)
        add_path(cur, outbam + '.bai', 'HIC_BAI', jobid, opts.workdir)
        add_path(cur,  reads, '2D_BED', jobid, opts.workdir)
        add_path(cur, hist_path, 'FIGURE', jobid, opts.workdir)
        try:
            cur.execute("""
            insert into INTERSECTION_OUTPUTs
            (Id  , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length)
            values
            (NULL,    %d,                  %d,                  '%s',                     %d,                  %d,                  %d)
            """ % (get_path_id(cur, mreads, opts.workdir),
                   count, ' '.join(['%s:%d' % (k, multiples[k])
                                    for k in sorted(multiples)]),
                   median, mad, max_f))
        except lite.IntegrityError:
            print 'WARNING: already filtered'
            if opts.force:
                cur.execute(
                    'delete from INTERSECTION_OUTPUTs where PATHid = %d' % (
                        get_path_id(cur, mreads, opts.workdir)))
                cur.execute("""
                insert into INTERSECTION_OUTPUTs
                (Id  , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length)
                values
                (NULL,    %d,                  %d,                  '%s',                     %d,                  %d,                  %d)
                """ % (get_path_id(cur, mreads, opts.workdir),
                       count, ' '.join(['%s:%d' % (k, multiples[k])
                                        for k in sorted(multiples)]),
                       median, mad, max_f))
        for nf, f in enumerate(masked, 1):
            try:
                add_path(cur, masked[f]['fnam'], 'FILTER', jobid, opts.workdir)
            except KeyError:
                continue
            try:
                cur.execute("""
            insert into FILTER_OUTPUTs
                (Id  , PATHid, Name, Count, Applied, JOBid)
            values
                (NULL,     %d, '%s',  '%s',    '%s',    %d)
                """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir),
                       masked[f]['name'], masked[f]['reads'],
                       'True' if nf in opts.apply else 'False', jobid))
            except lite.IntegrityError:
                print 'WARNING: already filtered'
                if opts.force:
                    cur.execute(
                        'delete from FILTER_OUTPUTs where PATHid = %d' % (
                            get_path_id(cur, masked[f]['fnam'], opts.workdir)))
                    cur.execute("""
                insert into FILTER_OUTPUTs
                    (Id  , PATHid, Name, Count, Applied, JOBid)
                values
                    (NULL,     %d, '%s',  '%s',    '%s',    %d)
                    """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir),
                           masked[f]['name'], masked[f]['reads'],
                           'True' if nf in opts.apply else 'False', jobid))
        try:
            cur.execute("""
        insert into FILTER_OUTPUTs
            (Id  , PATHid, Name, Count, Applied, JOBid)
        values
            (NULL,     %d, '%s',  '%s',    '%s',    %d)
            """ % (get_path_id(cur, mreads, opts.workdir),
                   'valid-pairs', n_valid_pairs, '', jobid))
        except lite.IntegrityError:
            print 'WARNING: already filtered'
            if opts.force:
                cur.execute(
                    'delete from FILTER_OUTPUTs where PATHid = %d' % (
                        get_path_id(cur, mreads, opts.workdir)))
                cur.execute("""
                insert into FILTER_OUTPUTs
                (Id  , PATHid, Name, Count, Applied, JOBid)
                values
                (NULL,     %d, '%s',  '%s',    '%s',    %d)
                """ % (get_path_id(cur, mreads, opts.workdir),
                       'valid-pairs', n_valid_pairs, '', jobid))
        print_db(cur, 'MAPPED_INPUTs')
        print_db(cur, 'PATHs')
        print_db(cur, 'MAPPED_OUTPUTs')
        print_db(cur, 'PARSED_OUTPUTs')
        print_db(cur, 'JOBs')
        print_db(cur, 'INTERSECTION_OUTPUTs')
        print_db(cur, 'FILTER_OUTPUTs')
    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #13
0
def save_to_db(opts, outdir, results, batch_job_hash, launch_time,
               finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try:  # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')

    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='JOBs'""")
        if not cur.fetchall():
            cur.execute("""
            create table PATHs
               (Id integer primary key,
                JOBid int, Path text, Type text,
                unique (Path))""")
            cur.execute("""
            create table JOBs
               (Id integer primary key,
                Parameters text,
                Launch_time text,
                Finish_time text,
                Type text,
                Parameters_md5 text,
                unique (Parameters_md5))""")
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='MODELED_REGIONs'""")
        if not cur.fetchall():
            cur.execute("""
        create table MODELED_REGIONs
           (Id integer primary key,
            PATHid int,
            PARAM_md5 text,
            RESO int,
            BEG int,
            END int,
            unique (PARAM_md5))""")
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='MODELs'""")
        if not cur.fetchall():
            cur.execute("""
        create table MODELs
           (Id integer primary key,
            REGIONid int,
            JOBid int,
            OPTPAR_md5 text,
            MaxDist int,
            UpFreq int,
            LowFreq int,
            Scale int,
            Cutoff int,
            Nmodels int,
            Kept int,
            Correlation int)""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            # In case optimization or modeling  is split in different computers
            param_hash = digest_parameters(opts, get_md5=True)
            cur.execute("""
    insert into JOBs
     (Id  , Parameters, Launch_time, Finish_time,    Type, Parameters_md5)
    values
     (NULL,       '%s',        '%s',        '%s',    '%s',           '%s')
     """ % ((parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
             time.strftime("%d/%m/%Y %H:%M:%S", finish_time),
             (('PRE_' if opts.job_list else '') +
              ('OPTIM' if opts.optimize else 'MODEL')), param_hash)))
        except lite.IntegrityError:
            pass
        ##### STORE OPTIMIZATION RESULT
        jobid = get_jobid(cur)
        add_path(cur, outdir, 'DIR', jobid, opts.workdir)
        pathid = get_path_id(cur, outdir, opts.workdir)
        # models = compile_models(opts, outdir, exp=exp, ngood=opts.nkeep)
        ### STORE GENERAL OPTIMIZATION INFO
        try:
            cur.execute("""
            insert into MODELED_REGIONs
            (Id  , PATHid, PARAM_md5, RESO, BEG, END)
            values
            (NULL,     %d,      "%s",   %d,  %d,  %d)
            """ % (pathid, batch_job_hash, opts.reso, opts.beg, opts.end))
        except lite.IntegrityError:
            pass
        ### STORE EACH OPTIMIZATION
        cur.execute("SELECT Id from MODELED_REGIONs where PARAM_md5='%s'" %
                    (batch_job_hash))
        optimid = cur.fetchall()[0][0]
        for m, u, l, d, s in results:
            optpar_md5 = md5('%s%s%s%s%s' % (m, u, l, d, s)).hexdigest()[:12]
            cur.execute(
                ("SELECT Id from MODELs where "
                 "OPTPAR_md5='%s' and REGIONid='%s'") % (optpar_md5, optimid))
            if not cur.fetchall():
                cur.execute("""
                insert into MODELs
                (Id  , REGIONid, JOBid, OPTPAR_md5, MaxDist, UpFreq, LowFreq, Cutoff, Scale, Nmodels, Kept, Correlation)
                values
                (NULL,             %d,    %d,      '%s',      %s,     %s,      %s,     %s,    %s,      %d,   %d,          %f)
                """ % ((optimid, jobid, optpar_md5, m, u, l, d, s,
                        results[(m, u, l, d, s)]['nmodels'],
                        results[(m, u, l, d, s)]['kept'],
                        results[(m, u, l, d, s)]['corr'])))
            else:
                cur.execute(
                    ("update MODELs "
                     "set Nmodels = %d, Kept = %d, Correlation = %f "
                     "where "
                     "OPTPAR_md5='%s' and REGIONid='%s'") %
                    (results[(m, u, l, d, s)]['nmodels'], results[(m, u, l, d,
                                                                   s)]['kept'],
                     results[(m, u, l, d, s)]['corr'], optpar_md5, optimid))

        ### MODELING
        if not opts.optimization_id:
            cur.execute("SELECT Id from MODELED_REGIONs")
            optimid = cur.fetchall()[0]
            if len(optimid) > 1:
                raise IndexError("ERROR: more than 1 optimization in folder "
                                 "choose with 'tadbit describe' and "
                                 "--optimization_id")
            optimid = optimid[0]
        else:
            cur.execute("SELECT Id from MODELED_REGIONs where Id=%d" %
                        (opts.optimization_id))
            optimid = cur.fetchall()[0][0]

    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #14
0
def save_to_db(opts, count, ncolumns, bias_file, nbad_columns, outbam,
               launch_time, finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try:  # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='JOBs'""")
        if not cur.fetchall():
            try:
                cur.execute("""
                create table PATHs
                (Id integer primary key,
                JOBid int, Path text, Type text,
                unique (Path))""")
            except lite.OperationalError:
                pass  # may append when mapped files cleaned
            cur.execute("""
            create table JOBs
               (Id integer primary key,
                Parameters text,
                Launch_time text,
                Finish_time text,
                Type text,
                Parameters_md5 text,
                unique (Parameters_md5))""")
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='INTERSECTION_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
                create table INTERSECTION_OUTPUTs
                   (Id integer primary key,
                    PATHid int,
                    Total_interactions int,
                    Multiple_interactions text,
                    Median_fragment_length,
                    MAD_fragment_length,
                    Max_fragment_length,
                    unique (PATHid))""")
            cur.execute("""
                create table FILTER_OUTPUTs
                   (Id integer primary key,
                    PATHid int,
                    Name text,
                    Count int,
                    Applied text,
                    JOBid int,
                    unique (PATHid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True)
            cur.execute("""
    insert into JOBs
     (Id  , Parameters, Launch_time, Finish_time,    Type, Parameters_md5)
    values
     (NULL,       '%s',        '%s',        '%s', 'Import',           '%s')
     """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
            time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass

        jobid = get_jobid(cur)

        add_path(cur, outbam, 'HIC_BAM', jobid, opts.workdir)
        add_path(cur, outbam + '.bai', 'HIC_BAI', jobid, opts.workdir)
        try:
            cur.execute("""
            insert into INTERSECTION_OUTPUTs
            (Id  , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length)
            values
            (NULL,    %d,                  %d,                  '%s',                     %d,                  %d,                  %d)
            """ % (get_path_id(cur, outbam, opts.workdir), count, '', 1, 1, 1))
        except lite.IntegrityError:
            print('WARNING: already filtered')
        try:
            cur.execute("""
        insert into FILTER_OUTPUTs
            (Id  , PATHid, Name, Count, Applied, JOBid)
        values
            (NULL,     %d, '%s',  '%s',    '%s',    %d)
            """ % (get_path_id(cur, outbam,
                               opts.workdir), 'valid-pairs', count, '', jobid))
        except lite.IntegrityError:
            print('WARNING: already filtered')

        if bias_file:
            cur.execute("""SELECT name FROM sqlite_master WHERE
                           type='table' AND name='NORMALIZE_OUTPUTs'""")
            if not cur.fetchall():
                cur.execute("""
                create table NORMALIZE_OUTPUTs
                   (Id integer primary key,
                    JOBid int,
                    Input int,
                    N_columns int,
                    N_filtered int,
                    BAM_filter int,
                    Cis_percentage_Raw real,
                    Cis_percentage_Norm real,
                    Slope_700kb_10Mb real,
                    Resolution int,
                    Normalization text,
                    Factor int,
                    unique (JOBid))""")
            try:
                opts.normalization = 'custom'
                parameters = digest_parameters(opts, get_md5=False)
                param_hash = digest_parameters(opts, get_md5=True)
                cur.execute("""
                insert into JOBs
                (Id  , Parameters, Launch_time, Finish_time, Type , Parameters_md5)
                values
                (NULL,       '%s',        '%s',        '%s', 'Normalize',           '%s')
                """ % (parameters,
                       time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
                       time.strftime("%d/%m/%Y %H:%M:%S",
                                     finish_time), param_hash))
            except lite.IntegrityError:
                pass
            jobid = get_jobid(cur)
            add_path(cur, bias_file, 'BIASES', jobid, opts.workdir)
            input_bed = get_path_id(cur, outbam, opts.workdir)
            try:
                cur.execute("""
                insert into NORMALIZE_OUTPUTs
                (Id  , JOBid,     Input, N_columns,   N_filtered, BAM_filter, Cis_percentage_Raw, Cis_percentage_Norm, Slope_700kb_10Mb,   Resolution,      Normalization,      Factor)
                values
                (NULL,    %d,        %d,        %d,           %d,         %d,                 %f,                  %f,               %f,           %d,               '%s',          %f)
                """ % (jobid, input_bed, ncolumns, nbad_columns, 0, 0, 0, 0,
                       opts.reso, 'custom', 0))
            except lite.OperationalError:
                print('WARNING: Normalized table not written!!!')
        print_db(cur, 'PATHs')
        #print_db(cur, 'MAPPED_OUTPUTs')
        #print_db(cur, 'PARSED_OUTPUTs')
        print_db(cur, 'JOBs')
        print_db(cur, 'INTERSECTION_OUTPUTs')
        print_db(cur, 'FILTER_OUTPUTs')
        if bias_file:
            print_db(cur, 'NORMALIZE_OUTPUTs')
    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #15
0
def save_to_db(opts, counts, multis, f_names1, f_names2, out_file1, out_file2,
               launch_time, finish_time):
    con = lite.connect(path.join(opts.workdir, 'trace.db'))
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='PARSED_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
        create table MAPPED_OUTPUTs
           (Id integer primary key,
            PATHid int,
            BEDid int,
            Uniquely_mapped int,
            unique (PATHid, BEDid))""")
            cur.execute("""
        create table PARSED_OUTPUTs
           (Id integer primary key,
            PATHid int,
            Total_interactions int,
            Multiples int,
            unique (PATHid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True)
            cur.execute("""
    insert into JOBs
     (Id  , Parameters, Launch_time, Finish_time,    Type, Parameters_md5)
    values
     (NULL,       '%s',        '%s',        '%s', 'Parse',           '%s')
     """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
            time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        add_path(cur, out_file1, 'BED', jobid, opts.workdir)
        for genome in opts.genome:
            add_path(cur, genome, 'FASTA', jobid, opts.workdir)
        if out_file2:
            add_path(cur, out_file2, 'BED', jobid, opts.workdir)
        fnames = f_names1, f_names2
        outfiles = out_file1, out_file2
        for count in counts:
            try:
                sum_reads = 0
                for i, item in enumerate(counts[count]):
                    cur.execute("""
                    insert into MAPPED_OUTPUTs
                    (Id  , PATHid, BEDid, Uniquely_mapped)
                    values
                    (NULL,    %d,     %d,      %d)
                    """ % (get_path_id(cur, fnames[count][i], opts.workdir),
                           get_path_id(cur, outfiles[count],
                                       opts.workdir), counts[count][item]))
                    sum_reads += counts[count][item]
            except lite.IntegrityError:
                print 'WARNING: already parsed (MAPPED_OUTPUTs)'
            try:
                cur.execute("""
                insert into PARSED_OUTPUTs
                (Id  , PATHid, Total_interactions, Multiples)
                values
                (NULL,     %d,      %d,        %d)
                """ % (get_path_id(cur, outfiles[count],
                                   opts.workdir), sum_reads, multis[count]))
            except lite.IntegrityError:
                print 'WARNING: already parsed (PARSED_OUTPUTs)'
        print_db(cur, 'MAPPED_INPUTs')
        print_db(cur, 'PATHs')
        print_db(cur, 'MAPPED_OUTPUTs')
        print_db(cur, 'PARSED_OUTPUTs')
        print_db(cur, 'JOBs')
Exemple #16
0
def save_to_db(opts, outfiles, launch_time, finish_time):
    # write little DB to keep track of processes and options
    con = lite.connect(path.join(opts.workdir, 'trace.db'))
    with con:
        # check if table exists
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='MAPPED_INPUTs'""")
        if not cur.fetchall():
            cur.execute("""
            create table PATHs
               (Id integer primary key,
                JOBid int, Path text, Type text,
                unique (Path))""")
            cur.execute("""
            create table JOBs
               (Id integer primary key,
                Parameters text,
                Launch_time text,
                Finish_time text,
                Type text,
                Parameters_md5 text,
                unique (Parameters_md5))""")
            cur.execute("""
            create table MAPPED_INPUTs
               (Id integer primary key,
                PATHid int,
                Entries int,
                Trim text,
                Frag text,
                Read int,
                Enzyme text,
                WRKDIRid int,
                MAPPED_OUTPUTid int,
                INDEXid int,
                unique (PATHid,Entries,Read,Enzyme,WRKDIRid,MAPPED_OUTPUTid,INDEXid))""")

        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True)
            cur.execute("""
    insert into JOBs
     (Id  , Parameters, Launch_time, Finish_time, Type , Parameters_md5)
    values
     (NULL,       '%s',        '%s',        '%s', 'Map',           '%s')
     """ % (parameters,
            time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
            time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        add_path(cur, opts.workdir, 'WORKDIR', jobid)
        add_path(cur, opts.fastq  ,  'MAPPED_FASTQ' , jobid, opts.workdir)
        add_path(cur, opts.index  , 'INDEX'  , jobid, opts.workdir)
        for i, (out, num) in enumerate(outfiles):
            try:
                window = opts.windows[i]
            except IndexError:
                window = opts.windows[-1]
            except TypeError:
                window = 'None'
            add_path(cur, out, 'SAM/MAP', jobid, opts.workdir)
            frag = ('none' if opts.iterative else 'frag' if i==len(outfiles) - 1
                    else 'full')
            try:
                cur.execute("""
    insert into MAPPED_INPUTs
     (Id  , PATHid, Entries, Trim, Frag, Read, Enzyme, WRKDIRid, MAPPED_OUTPUTid, INDEXid)
    values
     (NULL,      %d,     %d, '%s', '%s',   %d,   '%s',       %d,    %d,      %d)
     """ % (get_path_id(cur, opts.fastq, opts.workdir), num, window, frag,
            opts.read, opts.renz, get_path_id(cur, opts.workdir),
            get_path_id(cur, out, opts.workdir),
            get_path_id(cur, opts.index, opts.workdir)))
            except lite.IntegrityError:
                pass
        print_db(cur, 'MAPPED_INPUTs')
        print_db(cur, 'PATHs' )
        print_db(cur, 'JOBs'  )
Exemple #17
0
def save_to_db(opts, launch_time, finish_time, out_files, out_plots):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try: # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        try:
            parameters = digest_parameters(opts, get_md5=False, extra=['quiet'])
            param_hash = digest_parameters(opts, get_md5=True , extra=['quiet'])
            cur.execute("""
            insert into JOBs
            (Id  , Parameters, Launch_time, Finish_time, Type , Parameters_md5)
            values
            (NULL,       '%s',        '%s',        '%s', 'Bin',           '%s')
            """ % (parameters,
                   time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
                   time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        except lite.OperationalError:
            try:
                cur.execute("""
                create table PATHs
                (Id integer primary key,
                JOBid int, Path text, Type text,
                unique (Path))""")
            except lite.OperationalError:
                pass  # may append when mapped files cleaned
            cur.execute("""
            create table JOBs
               (Id integer primary key,
                Parameters text,
                Launch_time text,
                Finish_time text,
                Type text,
                Parameters_md5 text,
                unique (Parameters_md5))""")
            cur.execute("""
            insert into JOBs
            (Id  , Parameters, Launch_time, Finish_time, Type , Parameters_md5)
            values
            (NULL,       '%s',        '%s',        '%s', 'Bin',           '%s')
            """ % (parameters,
                   time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
                   time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        jobid = get_jobid(cur)
        for fnam in out_files:
            add_path(cur, out_files[fnam], fnam + '_MATRIX', jobid, opts.workdir)
        for fnam in out_plots:
            add_path(cur, out_plots[fnam], fnam + '_FIGURE', jobid, opts.workdir)
        if not opts.quiet:
            print_db(cur, 'JOBs')
            print_db(cur, 'PATHs')
    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #18
0
def save_to_db(opts, count, multiples, reads, mreads, n_valid_pairs, masked,
               hist_path, median, max_f, mad, launch_time, finish_time):
    con = lite.connect(path.join(opts.workdir, 'trace.db'))
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='INTERSECTION_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
        create table INTERSECTION_OUTPUTs
           (Id integer primary key,
            PATHid int,
            Total_interactions int,
            Multiple_interactions text,
            Median_fragment_length,
            MAD_fragment_length,
            Max_fragment_length,
            unique (PATHid))""")
            cur.execute("""
        create table FILTER_OUTPUTs
           (Id integer primary key,
            PATHid int,
            Name text,
            Count int,
            JOBid int,
            unique (PATHid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True )            
            cur.execute("""
    insert into JOBs
     (Id  , Parameters, Launch_time, Finish_time,    Type, Parameters_md5)
    values
     (NULL,       '%s',        '%s',        '%s', 'Filter',           '%s')
     """ % (parameters,
            time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
            time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass

        jobid = get_jobid(cur)
        
        add_path(cur, mreads, '2D_BED', jobid, opts.workdir)
        add_path(cur,  reads, '2D_BED', jobid, opts.workdir)
        add_path(cur, hist_path, 'FIGURE', jobid, opts.workdir)
        try:
            cur.execute("""
            insert into INTERSECTION_OUTPUTs
            (Id  , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length)
            values
            (NULL,    %d,                  %d,                  '%s',                     %d,                  %d,                  %d)
            """ % (get_path_id(cur, mreads, opts.workdir),
                   count, ' '.join(['%s:%d' % (k, multiples[k])
                                    for k in sorted(multiples)]),
                   median, mad, max_f))
        except lite.IntegrityError:
            print 'WARNING: already filtered'
            if opts.force:
                cur.execute(
                    'delete from INTERSECTION_OUTPUTs where PATHid = %d' % (
                        get_path_id(cur, mreads, opts.workdir)))
                cur.execute("""
                insert into INTERSECTION_OUTPUTs
                (Id  , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length)
                values
                (NULL,    %d,                  %d,                  '%s',                     %d,                  %d,                  %d)
                """ % (get_path_id(cur, mreads, opts.workdir),
                       count, ' '.join(['%s:%d' % (k, multiples[k])
                                        for k in sorted(multiples)]),
                       median, mad, max_f))
        for f in masked:
            add_path(cur, masked[f]['fnam'], 'FILTER', jobid, opts.workdir)
            try:
                cur.execute("""
            insert into FILTER_OUTPUTs
            (Id  , PATHid, Name, Count, JOBid)
            values
            (NULL,    %d,     '%s',      '%s', %d)
                """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir),
                       masked[f]['name'], masked[f]['reads'], jobid))
            except lite.IntegrityError:
                print 'WARNING: already filtered'
                if opts.force:
                    cur.execute(
                        'delete from FILTER_OUTPUTs where PATHid = %d' % (
                            get_path_id(cur, masked[f]['fnam'], opts.workdir)))
                    cur.execute("""
                insert into FILTER_OUTPUTs
                (Id  , PATHid, Name, Count, JOBid)
                values
                (NULL,    %d,     '%s',      '%s', %d)
                    """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir),
                           masked[f]['name'], masked[f]['reads'], jobid))
        try:
            cur.execute("""
        insert into FILTER_OUTPUTs
        (Id  , PATHid, Name, Count, JOBid)
        values
        (NULL,    %d,     '%s',      '%s', %d)
            """ % (get_path_id(cur, mreads, opts.workdir),
                   'valid-pairs', n_valid_pairs, jobid))
        except lite.IntegrityError:
            print 'WARNING: already filtered'
            if opts.force:
                cur.execute(
                    'delete from FILTER_OUTPUTs where PATHid = %d' % (
                        get_path_id(cur, mreads, opts.workdir)))
                cur.execute("""
                insert into FILTER_OUTPUTs
                (Id  , PATHid, Name, Count, JOBid)
                values
                (NULL,    %d,     '%s',      '%s', %d)
                """ % (get_path_id(cur, mreads, opts.workdir),
                       'valid-pairs', n_valid_pairs, jobid))
        print_db(cur, 'MAPPED_INPUTs')
        print_db(cur, 'PATHs')
        print_db(cur, 'MAPPED_OUTPUTs')
        print_db(cur, 'PARSED_OUTPUTs')
        print_db(cur, 'JOBs')
        print_db(cur, 'INTERSECTION_OUTPUTs')        
        print_db(cur, 'FILTER_OUTPUTs')
Exemple #19
0
def save_to_db(opts, cis_trans_N_D, cis_trans_N_d, cis_trans_n_D,
               cis_trans_n_d, a2, bad_columns_file, bias_file, inter_vs_gcoord,
               mreads, nbad_columns, ncolumns, intra_dir_nrm_fig,
               intra_dir_nrm_txt, inter_dir_nrm_fig, inter_dir_nrm_txt,
               genom_map_nrm_fig, genom_map_nrm_txt, intra_dir_raw_fig,
               intra_dir_raw_txt, inter_dir_raw_fig, inter_dir_raw_txt,
               genom_map_raw_fig, genom_map_raw_txt, pickle_path, launch_time,
               finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try:  # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='NORMALIZE_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
            create table NORMALIZE_OUTPUTs
               (Id integer primary key,
                JOBid int,
                Input int,
                N_columns int,
                N_filtered int,
                CisTrans_nrm_all real,
                CisTrans_nrm_out real,
                CisTrans_raw_all real,
                CisTrans_raw_out real,
                Slope_700kb_10Mb real,
                Resolution int,
                Factor int,
                unique (JOBid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True)
            cur.execute(
                """
            insert into JOBs
            (Id  , Parameters, Launch_time, Finish_time, Type , Parameters_md5)
            values
            (NULL,       '%s',        '%s',        '%s', 'Normalize',           '%s')
            """ %
                (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
                 time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        add_path(cur, pickle_path, 'PICKLE', jobid, opts.workdir)
        add_path(cur, bad_columns_file, 'BAD_COLUMNS', jobid, opts.workdir)
        add_path(cur, bias_file, 'BIASES', jobid, opts.workdir)
        add_path(cur, inter_vs_gcoord, 'FIGURE', jobid, opts.workdir)
        add_path(cur, mreads, '2D_BED', jobid, opts.workdir)
        # get pathid of input
        cur.execute("select id from paths where path = '%s'" %
                    (path.relpath(mreads, opts.workdir)))
        input_bed = cur.fetchall()[0][0]
        if intra_dir_nrm_fig:
            add_path(cur, intra_dir_nrm_fig, 'FIGURES', jobid, opts.workdir)
        if intra_dir_nrm_fig:
            add_path(cur, intra_dir_nrm_txt, 'NRM_MATRICES', jobid,
                     opts.workdir)
        if inter_dir_nrm_fig:
            add_path(cur, inter_dir_nrm_fig, 'FIGURES', jobid, opts.workdir)
        if inter_dir_nrm_fig:
            add_path(cur, inter_dir_nrm_txt, 'NRM_MATRICES', jobid,
                     opts.workdir)
        if genom_map_nrm_fig:
            add_path(cur, genom_map_nrm_fig, 'FIGURE', jobid, opts.workdir)
        if genom_map_nrm_txt:
            add_path(cur, genom_map_nrm_txt, 'NRM_MATRIX', jobid, opts.workdir)
        if intra_dir_raw_fig:
            add_path(cur, intra_dir_raw_fig, 'FIGURES', jobid, opts.workdir)
        if intra_dir_raw_fig:
            add_path(cur, intra_dir_raw_txt, 'RAW_MATRICES', jobid,
                     opts.workdir)
        if inter_dir_raw_fig:
            add_path(cur, inter_dir_raw_fig, 'FIGURES', jobid, opts.workdir)
        if inter_dir_raw_fig:
            add_path(cur, inter_dir_raw_txt, 'RAW_MATRICES', jobid,
                     opts.workdir)
        if genom_map_raw_fig:
            add_path(cur, genom_map_raw_fig, 'FIGURE', jobid, opts.workdir)
        if genom_map_raw_txt:
            add_path(cur, genom_map_raw_txt, 'RAW_MATRIX', jobid, opts.workdir)

        try:
            cur.execute("""
            insert into NORMALIZE_OUTPUTs
            (Id  , JOBid,     Input, N_columns,   N_filtered, CisTrans_nrm_all,   CisTrans_nrm_out,   CisTrans_raw_all,   CisTrans_raw_out, Slope_700kb_10Mb,   Resolution,      Factor)
            values
            (NULL,    %d,        %d,        %d,           %d,               %f,                 %f,                 %f,                 %f,               %f,           %d,          %f)
            """ % (jobid, input_bed, ncolumns, nbad_columns, cis_trans_N_D,
                   cis_trans_N_d, cis_trans_n_D, cis_trans_n_d, a2, opts.reso,
                   opts.factor))
        except lite.OperationalError:
            try:
                cur.execute("""
                insert into NORMALIZE_OUTPUTs
                (Id  , JOBid,     Input, N_columns,   N_filtered,  CisTrans_raw_all,   CisTrans_raw_out, Slope_700kb_10Mb,   Resolution,      Factor)
                values
                (NULL,    %d,        %d,        %d,           %d,                %f,                 %f,               %f,           %d,          %f)
                """ % (jobid, input_bed, ncolumns, nbad_columns, cis_trans_n_D,
                       cis_trans_n_d, a2, opts.reso, opts.factor))
            except lite.OperationalError:
                print 'WANRING: Normalized table not written!!!'

        print_db(cur, 'PATHs')
        print_db(cur, 'JOBs')
        try:
            print_db(cur, 'INTERSECTION_OUTPUTs')
            print_db(cur, 'MAPPED_INPUTs')
            print_db(cur, 'MAPPED_OUTPUTs')
            print_db(cur, 'PARSED_OUTPUTs')
        except lite.OperationalError:
            pass
        print_db(cur, 'FILTER_OUTPUTs')
        print_db(cur, 'NORMALIZE_OUTPUTs')
    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #20
0
def save_to_db(opts, dangling_ends, ligated, fig_path, outfiles, launch_time,
               finish_time):
    """
    write little DB to keep track of processes and options
    """
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try:  # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        # check if table exists
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='MAPPED_INPUTs'""")
        if not cur.fetchall():
            try:
                cur.execute("""
                create table PATHs
                (Id integer primary key,
                JOBid int, Path text, Type text,
                unique (Path))""")
            except lite.OperationalError:
                pass  # may append when mapped files cleaned
            cur.execute("""
            create table JOBs
               (Id integer primary key,
                Parameters text,
                Launch_time text,
                Finish_time text,
                Type text,
                Parameters_md5 text,
                unique (Parameters_md5))""")
            cur.execute("""
            create table MAPPED_INPUTs
               (Id integer primary key,
                PATHid int,
                Entries int,
                Trim text,
                Frag text,
                Read int,
                Enzyme text,
                Dangling_Ends text,
                Ligation_Sites text,
                WRKDIRid int,
                MAPPED_OUTPUTid int,
                INDEXid int,
                unique (PATHid,Entries,Read,Enzyme,WRKDIRid,MAPPED_OUTPUTid,INDEXid))"""
                        )

        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True)
            cur.execute("""
    insert into JOBs
     (Id  , Parameters, Launch_time, Finish_time, Type , Parameters_md5)
    values
     (NULL,       '%s',        '%s',        '%s', 'Map',           '%s')
     """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
            time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        add_path(cur, opts.workdir, 'WORKDIR', jobid)
        add_path(cur, opts.fastq, 'MAPPED_FASTQ', jobid, opts.workdir)
        add_path(cur, opts.index, 'INDEX', jobid, opts.workdir)
        add_path(cur, fig_path, 'FIGURE', jobid, opts.workdir)
        for i, (out, num) in enumerate(outfiles):
            try:
                window = opts.windows[i]
            except IndexError:
                window = opts.windows[-1]
            except TypeError:
                window = 'None'
            add_path(cur, out, '2D_BED' if opts.read == 0 else 'SAM/MAP',
                     jobid, opts.workdir)
            frag = ('none' if opts.iterative else 'fast_frag' if opts.read == 0
                    else 'frag' if i == len(outfiles) - 1 else 'full')
            try:
                cur.execute("""
    insert into MAPPED_INPUTs
     (Id  , PATHid, Entries, Trim, Frag, Read, Enzyme, Dangling_Ends, Ligation_Sites, WRKDIRid, MAPPED_OUTPUTid, INDEXid)
    values
     (NULL,      %d,     %d, '%s', '%s',   %d,   '%s',         '%s',          '%s',       %d,              %d,      %d)
     """ % (get_path_id(cur, opts.fastq, opts.workdir), num, window, frag,
                opts.read, '-'.join(map(str, opts.renz)), ' '.join(
                '%s:%.3f%%' % (r, dangling_ends.get(r, float('nan')))
                for r in opts.renz), ' '.join(
                    '%s:%.3f%%' % ('-'.join(r), ligated.get(r, float('nan')))
                    for r in ligated), get_path_id(cur, opts.workdir),
                get_path_id(cur, out, opts.workdir),
                get_path_id(cur, opts.index, opts.workdir)))
            except lite.IntegrityError:
                pass
        print_db(cur, 'MAPPED_INPUTs')
        print_db(cur, 'PATHs')
        print_db(cur, 'JOBs')
    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass
Exemple #21
0
def save_to_db(opts, cis_trans_N_D, cis_trans_N_d, cis_trans_n_D,
               cis_trans_n_d, a2, bad_columns_file, bias_file, inter_vs_gcoord,
               mreads, intra_dir_nrm_fig, intra_dir_nrm_txt, inter_dir_nrm_fig,
               inter_dir_nrm_txt, genom_map_nrm_fig, genom_map_nrm_txt,
               intra_dir_raw_fig, intra_dir_raw_txt, inter_dir_raw_fig,
               inter_dir_raw_txt, genom_map_raw_fig, genom_map_raw_txt,
               launch_time, finish_time):
    con = lite.connect(path.join(opts.workdir, 'trace.db'))
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='NORMALIZE_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
            create table NORMALIZE_OUTPUTs
               (Id integer primary key,
                JOBid int,
                Input int,
                CisTrans_nrm_all real,
                CisTrans_nrm_out real,
                CisTrans_raw_all real,
                CisTrans_raw_out real,
                Slope_700kb_10Mb real,
                Resolution int,
                Factor int,
                unique (JOBid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True)
            cur.execute(
                """
            insert into JOBs
            (Id  , Parameters, Launch_time, Finish_time, Type , Parameters_md5)
            values
            (NULL,       '%s',        '%s',        '%s', 'Normalize',           '%s')
            """ %
                (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
                 time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        add_path(cur, bad_columns_file, 'BAD_COLUMNS', jobid, opts.workdir)
        add_path(cur, bias_file, 'BIASES', jobid, opts.workdir)
        add_path(cur, inter_vs_gcoord, 'FIGURE', jobid, opts.workdir)
        add_path(cur, mreads, '2D_BED', jobid, opts.workdir)
        # get pathid of input
        cur.execute("select id from paths where path = '%s'" %
                    (path.relpath(mreads, opts.workdir)))
        input_bed = cur.fetchall()[0][0]
        if intra_dir_nrm_fig:
            add_path(cur, intra_dir_nrm_fig, 'FIGURES', jobid, opts.workdir)
        if intra_dir_nrm_fig:
            add_path(cur, intra_dir_nrm_txt, 'NRM_MATRICES', jobid,
                     opts.workdir)
        if inter_dir_nrm_fig:
            add_path(cur, inter_dir_nrm_fig, 'FIGURES', jobid, opts.workdir)
        if inter_dir_nrm_fig:
            add_path(cur, inter_dir_nrm_txt, 'NRM_MATRICES', jobid,
                     opts.workdir)
        if genom_map_nrm_fig:
            add_path(cur, genom_map_nrm_fig, 'FIGURE', jobid, opts.workdir)
        if genom_map_nrm_txt:
            add_path(cur, genom_map_nrm_txt, 'NRM_MATRIX', jobid, opts.workdir)
        if intra_dir_raw_fig:
            add_path(cur, intra_dir_raw_fig, 'FIGURES', jobid, opts.workdir)
        if intra_dir_raw_fig:
            add_path(cur, intra_dir_raw_txt, 'RAW_MATRICES', jobid,
                     opts.workdir)
        if inter_dir_raw_fig:
            add_path(cur, inter_dir_raw_fig, 'FIGURES', jobid, opts.workdir)
        if inter_dir_raw_fig:
            add_path(cur, inter_dir_raw_txt, 'RAW_MATRICES', jobid,
                     opts.workdir)
        if genom_map_raw_fig:
            add_path(cur, genom_map_raw_fig, 'FIGURE', jobid, opts.workdir)
        if genom_map_raw_txt:
            add_path(cur, genom_map_raw_txt, 'RAW_MATRIX', jobid, opts.workdir)

        cur.execute("""
        insert into NORMALIZE_OUTPUTs
        (Id  , JOBid,     Input, CisTrans_nrm_all,   CisTrans_nrm_out,   CisTrans_raw_all,   CisTrans_raw_out, Slope_700kb_10Mb,   Resolution,      Factor)
        values
        (NULL,    %d,        %d,               %f,                 %f,                 %f,                 %f,               %f,           %d,          %f)
        """ % (jobid, input_bed, cis_trans_N_D, cis_trans_N_d, cis_trans_n_D,
               cis_trans_n_d, a2, opts.reso, opts.factor))
        print_db(cur, 'MAPPED_INPUTs')
        print_db(cur, 'PATHs')
        print_db(cur, 'MAPPED_OUTPUTs')
        print_db(cur, 'PARSED_OUTPUTs')
        print_db(cur, 'JOBs')
        print_db(cur, 'INTERSECTION_OUTPUTs')
        print_db(cur, 'FILTER_OUTPUTs')
        print_db(cur, 'NORMALIZE_OUTPUTs')
Exemple #22
0
def save_to_db(opts, cis_trans_N_D, cis_trans_N_d, cis_trans_n_D, cis_trans_n_d,
               a2, bad_columns_file, bias_file, inter_vs_gcoord, mreads,
               nbad_columns, ncolumns,
               intra_dir_nrm_fig, intra_dir_nrm_txt,
               inter_dir_nrm_fig, inter_dir_nrm_txt,
               genom_map_nrm_fig, genom_map_nrm_txt,
               intra_dir_raw_fig, intra_dir_raw_txt,
               inter_dir_raw_fig, inter_dir_raw_txt,
               genom_map_raw_fig, genom_map_raw_txt,
               pickle_path, launch_time, finish_time):
    if 'tmpdb' in opts and opts.tmpdb:
        # check lock
        while path.exists(path.join(opts.workdir, '__lock_db')):
            time.sleep(0.5)
        # close lock
        open(path.join(opts.workdir, '__lock_db'), 'a').close()
        # tmp file
        dbfile = opts.tmpdb
        try: # to copy in case read1 was already mapped for example
            copyfile(path.join(opts.workdir, 'trace.db'), dbfile)
        except IOError:
            pass
    else:
        dbfile = path.join(opts.workdir, 'trace.db')
    con = lite.connect(dbfile)
    with con:
        cur = con.cursor()
        cur.execute("""SELECT name FROM sqlite_master WHERE
                       type='table' AND name='NORMALIZE_OUTPUTs'""")
        if not cur.fetchall():
            cur.execute("""
            create table NORMALIZE_OUTPUTs
               (Id integer primary key,
                JOBid int,
                Input int,
                N_columns int,
                N_filtered int,
                CisTrans_nrm_all real,
                CisTrans_nrm_out real,
                CisTrans_raw_all real,
                CisTrans_raw_out real,
                Slope_700kb_10Mb real,
                Resolution int,
                Factor int,
                unique (JOBid))""")
        try:
            parameters = digest_parameters(opts, get_md5=False)
            param_hash = digest_parameters(opts, get_md5=True )
            cur.execute("""
            insert into JOBs
            (Id  , Parameters, Launch_time, Finish_time, Type , Parameters_md5)
            values
            (NULL,       '%s',        '%s',        '%s', 'Normalize',           '%s')
            """ % (parameters,
                   time.strftime("%d/%m/%Y %H:%M:%S", launch_time),
                   time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash))
        except lite.IntegrityError:
            pass
        jobid = get_jobid(cur)
        add_path(cur, pickle_path     , 'PICKLE'     , jobid, opts.workdir)
        add_path(cur, bad_columns_file, 'BAD_COLUMNS', jobid, opts.workdir)
        add_path(cur, bias_file       , 'BIASES'     , jobid, opts.workdir)
        add_path(cur, inter_vs_gcoord , 'FIGURE'     , jobid, opts.workdir)
        add_path(cur, mreads          , '2D_BED'     , jobid, opts.workdir)
        # get pathid of input
        cur.execute("select id from paths where path = '%s'" % (path.relpath(mreads, opts.workdir)))
        input_bed = cur.fetchall()[0][0]
        if intra_dir_nrm_fig:
            add_path(cur, intra_dir_nrm_fig, 'FIGURES', jobid, opts.workdir)
        if intra_dir_nrm_fig:
            add_path(cur, intra_dir_nrm_txt, 'NRM_MATRICES', jobid, opts.workdir)
        if inter_dir_nrm_fig:
            add_path(cur, inter_dir_nrm_fig, 'FIGURES', jobid, opts.workdir)
        if inter_dir_nrm_fig:
            add_path(cur, inter_dir_nrm_txt, 'NRM_MATRICES', jobid, opts.workdir)
        if genom_map_nrm_fig:
            add_path(cur, genom_map_nrm_fig, 'FIGURE', jobid, opts.workdir)
        if genom_map_nrm_txt:
            add_path(cur, genom_map_nrm_txt, 'NRM_MATRIX', jobid, opts.workdir)
        if intra_dir_raw_fig:
            add_path(cur, intra_dir_raw_fig, 'FIGURES', jobid, opts.workdir)
        if intra_dir_raw_fig:
            add_path(cur, intra_dir_raw_txt, 'RAW_MATRICES', jobid, opts.workdir)
        if inter_dir_raw_fig:
            add_path(cur, inter_dir_raw_fig, 'FIGURES', jobid, opts.workdir)
        if inter_dir_raw_fig:
            add_path(cur, inter_dir_raw_txt, 'RAW_MATRICES', jobid, opts.workdir)
        if genom_map_raw_fig:
            add_path(cur, genom_map_raw_fig, 'FIGURE', jobid, opts.workdir)
        if genom_map_raw_txt:
            add_path(cur, genom_map_raw_txt, 'RAW_MATRIX', jobid, opts.workdir)

        try:
            cur.execute("""
            insert into NORMALIZE_OUTPUTs
            (Id  , JOBid,     Input, N_columns,   N_filtered, CisTrans_nrm_all,   CisTrans_nrm_out,   CisTrans_raw_all,   CisTrans_raw_out, Slope_700kb_10Mb,   Resolution,      Factor)
            values
            (NULL,    %d,        %d,        %d,           %d,               %f,                 %f,                 %f,                 %f,               %f,           %d,          %f)
            """ % (jobid, input_bed,  ncolumns, nbad_columns,    cis_trans_N_D,      cis_trans_N_d,      cis_trans_n_D,      cis_trans_n_d,               a2,    opts.reso, opts.factor))
        except lite.OperationalError:
            try:
                cur.execute("""
                insert into NORMALIZE_OUTPUTs
                (Id  , JOBid,     Input, N_columns,   N_filtered,  CisTrans_raw_all,   CisTrans_raw_out, Slope_700kb_10Mb,   Resolution,      Factor)
                values
                (NULL,    %d,        %d,        %d,           %d,                %f,                 %f,               %f,           %d,          %f)
                """ % (jobid, input_bed,  ncolumns, nbad_columns,     cis_trans_n_D,      cis_trans_n_d,               a2,    opts.reso, opts.factor))
            except lite.OperationalError:
                print 'WANRING: Normalized table not written!!!'
            
        print_db(cur, 'PATHs')
        print_db(cur, 'JOBs')
        try:
            print_db(cur, 'INTERSECTION_OUTPUTs')        
            print_db(cur, 'MAPPED_INPUTs')
            print_db(cur, 'MAPPED_OUTPUTs')
            print_db(cur, 'PARSED_OUTPUTs')
        except lite.OperationalError:
            pass
        print_db(cur, 'FILTER_OUTPUTs')
        print_db(cur, 'NORMALIZE_OUTPUTs')
    if 'tmpdb' in opts and opts.tmpdb:
        # copy back file
        copyfile(dbfile, path.join(opts.workdir, 'trace.db'))
        remove(dbfile)
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_db'))
    except OSError:
        pass