def run(opts): check_options(opts) if "tmpdb" in opts and opts.tmpdb: dbfile = opts.tmpdb copyfile(path.join(opts.workdir, "trace.db"), dbfile) else: dbfile = path.join(opts.workdir, "trace.db") con = lite.connect(dbfile) if opts.tsv and path.exists(opts.tsv): remove(opts.tsv) with con: cur = con.cursor() cur.execute("SELECT name FROM sqlite_master WHERE type='table'") for table in cur.fetchall(): if table[0].lower() in ["jobs", "paths"] and opts.tsv: continue if table[0].lower() in opts.tables: print_db( cur, table[0], savedata=opts.tsv, append=True, no_print=["JOBid", "Id", "Input", "" if table[0] == "MAPPED_OUTPUTs" else "PATHid"] if opts.tsv else "", ) if "tmpdb" in opts and opts.tmpdb: copyfile(dbfile, path.join(opts.workdir, "trace.db")) remove(dbfile)
def run(opts): check_options(opts) if 'tmpdb' in opts and opts.tmpdb: dbfile = opts.tmpdb copyfile(path.join(opts.workdir, 'trace.db'), dbfile) else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) if opts.output and path.exists(opts.output): remove(opts.output) with con: cur = con.cursor() cur.execute("SELECT name FROM sqlite_master WHERE type='table'") for table in cur.fetchall(): if table[0].lower() in ['jobs', 'paths'] and opts.tsv: continue if table[0].lower() in opts.tables: print_db(cur, table[0], savedata=opts.output, append=True, tsv=opts.tsv, no_print=['JOBid', 'Id', 'Input', '' if table[0] == 'MAPPED_OUTPUTs' else 'PATHid'] if opts.tsv else '', jobids=opts.jobids, columns=opts.select, **dict(f.split(',') for f in opts.where)) if 'tmpdb' in opts and opts.tmpdb: copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile)
def run(opts): check_options(opts) if 'tmp' in opts and opts.tmp: dbfile = opts.tmp copyfile(path.join(opts.workdir, 'trace.db'), dbfile) else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("SELECT name FROM sqlite_master WHERE type='table'") for table in cur.fetchall(): if table[0].lower() in opts.tables: print_db(cur, table[0]) if 'tmp' in opts and opts.tmp: copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile)
def save_to_db(opts, count, multiples, reads, mreads, n_valid_pairs, masked, hist_path, median, max_f, mad, launch_time, finish_time): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='INTERSECTION_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table INTERSECTION_OUTPUTs (Id integer primary key, PATHid int, Total_interactions int, Multiple_interactions text, Median_fragment_length, MAD_fragment_length, Max_fragment_length, unique (PATHid))""") cur.execute(""" create table FILTER_OUTPUTs (Id integer primary key, PATHid int, Name text, Count int, JOBid int, unique (PATHid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type, Parameters_md5) values (NULL, '%s', '%s', '%s', 'Filter', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, mreads, '2D_BED', jobid, opts.workdir) add_path(cur, reads, '2D_BED', jobid, opts.workdir) add_path(cur, hist_path, 'FIGURE', jobid, opts.workdir) try: cur.execute(""" insert into INTERSECTION_OUTPUTs (Id , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length) values (NULL, %d, %d, '%s', %d, %d, %d) """ % (get_path_id(cur, mreads, opts.workdir), count, ' '.join( ['%s:%d' % (k, multiples[k]) for k in sorted(multiples)]), median, mad, max_f)) except lite.IntegrityError: print 'WARNING: already filtered' if opts.force: cur.execute( 'delete from INTERSECTION_OUTPUTs where PATHid = %d' % (get_path_id(cur, mreads, opts.workdir))) cur.execute(""" insert into INTERSECTION_OUTPUTs (Id , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length) values (NULL, %d, %d, '%s', %d, %d, %d) """ % (get_path_id(cur, mreads, opts.workdir), count, ' '.join( ['%s:%d' % (k, multiples[k]) for k in sorted(multiples)]), median, mad, max_f)) for f in masked: add_path(cur, masked[f]['fnam'], 'FILTER', jobid, opts.workdir) try: cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, JOBid) values (NULL, %d, '%s', '%s', %d) """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir), masked[f]['name'], masked[f]['reads'], jobid)) except lite.IntegrityError: print 'WARNING: already filtered' if opts.force: cur.execute( 'delete from FILTER_OUTPUTs where PATHid = %d' % (get_path_id(cur, masked[f]['fnam'], opts.workdir))) cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, JOBid) values (NULL, %d, '%s', '%s', %d) """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir), masked[f]['name'], masked[f]['reads'], jobid)) try: cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, JOBid) values (NULL, %d, '%s', '%s', %d) """ % (get_path_id(cur, mreads, opts.workdir), 'valid-pairs', n_valid_pairs, jobid)) except lite.IntegrityError: print 'WARNING: already filtered' if opts.force: cur.execute('delete from FILTER_OUTPUTs where PATHid = %d' % (get_path_id(cur, mreads, opts.workdir))) cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, JOBid) values (NULL, %d, '%s', '%s', %d) """ % (get_path_id(cur, mreads, opts.workdir), 'valid-pairs', n_valid_pairs, jobid)) print_db(cur, 'MAPPED_INPUTs') print_db(cur, 'PATHs') print_db(cur, 'MAPPED_OUTPUTs') print_db(cur, 'PARSED_OUTPUTs') print_db(cur, 'JOBs') print_db(cur, 'INTERSECTION_OUTPUTs') print_db(cur, 'FILTER_OUTPUTs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass
def save_to_db(opts, bias_file, mreads, bad_col_image, nbad_columns, ncolumns, raw_cisprc, norm_cisprc, inter_vs_gcoord, a2, bam_filter, launch_time, finish_time): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='JOBs'""") if not cur.fetchall(): cur.execute(""" create table PATHs (Id integer primary key, JOBid int, Path text, Type text, unique (Path))""") cur.execute(""" create table JOBs (Id integer primary key, Parameters text, Launch_time text, Finish_time text, Type text, Parameters_md5 text, unique (Parameters_md5))""") cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='NORMALIZE_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table NORMALIZE_OUTPUTs (Id integer primary key, JOBid int, Input int, N_columns int, N_filtered int, BAM_filter int, Cis_percentage_Raw real, Cis_percentage_Norm real, Slope_700kb_10Mb real, Resolution int, Normalization text, Factor int, unique (JOBid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True ) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Normalize', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, bias_file , 'BIASES' , jobid, opts.workdir) add_path(cur, bad_col_image , 'FIGURE' , jobid, opts.workdir) add_path(cur, inter_vs_gcoord , 'FIGURE' , jobid, opts.workdir) if opts.bam: add_path(cur, path.realpath(opts.bam), 'EXT_2D_BAM' , jobid, opts.workdir) if opts.mappability: add_path(cur, path.realpath(opts.mappability), 'EXT_MAPPABILITY' , jobid, opts.workdir) if opts.fasta: add_path(cur, path.realpath(opts.fasta), 'EXT_FASTA' , jobid, opts.workdir) # get pathid of input cur.execute("select id from paths where path = '%s'" % (path.relpath(mreads, opts.workdir))) input_bed = cur.fetchall()[0][0] a2 = 0 if isnan(a2) else a2 try: cur.execute(""" insert into NORMALIZE_OUTPUTs (Id , JOBid, Input, N_columns, N_filtered, BAM_filter, Cis_percentage_Raw, Cis_percentage_Norm, Slope_700kb_10Mb, Resolution, Normalization, Factor) values (NULL, %d, %d, %d, %d, %d, %f, %f, %f, %d, '%s', %f) """ % (jobid, input_bed, ncolumns, nbad_columns, bam_filter, 100 * raw_cisprc, 100 * norm_cisprc, a2, opts.reso, opts.normalization, opts.factor)) except lite.OperationalError: try: cur.execute(""" insert into NORMALIZE_OUTPUTs (Id , JOBid, Input, N_columns, N_filtered, BAM_filter, Cis_percentage_Raw, Cis_percentage_Norm, Slope_700kb_10Mb, Resolution, Normalization, Factor) values (NULL, %d, %d, %d, %d, %d, %f, %f, %f, %d, '%s', %f) """ % (jobid, input_bed, ncolumns, nbad_columns, bam_filter, 100 * raw_cisprc, 100 * norm_cisprc, a2, opts.reso, opts.normalization, opts.factor)) except lite.OperationalError: print 'WANRING: Normalized table not written!!!' print_db(cur, 'PATHs') print_db(cur, 'JOBs') try: print_db(cur, 'FILTER_OUTPUTs') print_db(cur, 'INTERSECTION_OUTPUTs') print_db(cur, 'MAPPED_INPUTs') print_db(cur, 'MAPPED_OUTPUTs') print_db(cur, 'PARSED_OUTPUTs') print_db(cur, 'FILTER_OUTPUTs') except lite.OperationalError: pass print_db(cur, 'NORMALIZE_OUTPUTs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass
def save_to_db(opts, cmp_result, tad_result, reso, inputs, launch_time, finish_time): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb copyfile(path.join(opts.workdir, 'trace.db'), dbfile) else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='SEGMENT_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table SEGMENT_OUTPUTs (Id integer primary key, JOBid int, Inputs text, TADs int, Compartments int, Chromosome text, Resolution int)""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True ) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Segment', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) for crm in max(cmp_result.keys(), tad_result.keys(), key=lambda x: len(x)): if crm in cmp_result: add_path(cur, cmp_result[crm]['path'], 'COMPARTMENT', jobid, opts.workdir) if crm in tad_result: add_path(cur, tad_result[crm]['path'], 'TAD', jobid, opts.workdir) if opts.rich_in_A: add_path(cur, opts.rich_in_A, 'BED', jobid, opts.workdir) cur.execute(""" insert into SEGMENT_OUTPUTs (Id , JOBid, Inputs, TADs, Compartments, Chromosome, Resolution) values (NULL, %d, '%s', %d, %d, '%s', %d) """ % (jobid, ','.join([str(i) for i in inputs]), tad_result[crm]['num'] if crm in tad_result else 0, cmp_result[crm]['num'] if crm in cmp_result else 0, crm, reso)) print_db(cur, 'PATHs') print_db(cur, 'JOBs') print_db(cur, 'SEGMENT_OUTPUTs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock remove(path.join(opts.workdir, '__lock_db'))
def save_to_db(opts, count, multiples, reads, mreads, n_valid_pairs, masked, hist_path, median, max_f, mad, launch_time, finish_time): con = lite.connect(path.join(opts.workdir, 'trace.db')) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='INTERSECTION_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table INTERSECTION_OUTPUTs (Id integer primary key, PATHid int, Total_interactions int, Multiple_interactions text, Median_fragment_length, MAD_fragment_length, Max_fragment_length, unique (PATHid))""") cur.execute(""" create table FILTER_OUTPUTs (Id integer primary key, PATHid int, Name text, Count int, JOBid int, unique (PATHid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True ) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type, Parameters_md5) values (NULL, '%s', '%s', '%s', 'Filter', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, mreads, '2D_BED', jobid, opts.workdir) add_path(cur, reads, '2D_BED', jobid, opts.workdir) add_path(cur, hist_path, 'FIGURE', jobid, opts.workdir) try: cur.execute(""" insert into INTERSECTION_OUTPUTs (Id , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length) values (NULL, %d, %d, '%s', %d, %d, %d) """ % (get_path_id(cur, mreads, opts.workdir), count, ' '.join(['%s:%d' % (k, multiples[k]) for k in sorted(multiples)]), median, mad, max_f)) except lite.IntegrityError: print 'WARNING: already filtered' if opts.force: cur.execute( 'delete from INTERSECTION_OUTPUTs where PATHid = %d' % ( get_path_id(cur, mreads, opts.workdir))) cur.execute(""" insert into INTERSECTION_OUTPUTs (Id , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length) values (NULL, %d, %d, '%s', %d, %d, %d) """ % (get_path_id(cur, mreads, opts.workdir), count, ' '.join(['%s:%d' % (k, multiples[k]) for k in sorted(multiples)]), median, mad, max_f)) for f in masked: add_path(cur, masked[f]['fnam'], 'FILTER', jobid, opts.workdir) try: cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, JOBid) values (NULL, %d, '%s', '%s', %d) """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir), masked[f]['name'], masked[f]['reads'], jobid)) except lite.IntegrityError: print 'WARNING: already filtered' if opts.force: cur.execute( 'delete from FILTER_OUTPUTs where PATHid = %d' % ( get_path_id(cur, masked[f]['fnam'], opts.workdir))) cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, JOBid) values (NULL, %d, '%s', '%s', %d) """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir), masked[f]['name'], masked[f]['reads'], jobid)) try: cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, JOBid) values (NULL, %d, '%s', '%s', %d) """ % (get_path_id(cur, mreads, opts.workdir), 'valid-pairs', n_valid_pairs, jobid)) except lite.IntegrityError: print 'WARNING: already filtered' if opts.force: cur.execute( 'delete from FILTER_OUTPUTs where PATHid = %d' % ( get_path_id(cur, mreads, opts.workdir))) cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, JOBid) values (NULL, %d, '%s', '%s', %d) """ % (get_path_id(cur, mreads, opts.workdir), 'valid-pairs', n_valid_pairs, jobid)) print_db(cur, 'MAPPED_INPUTs') print_db(cur, 'PATHs') print_db(cur, 'MAPPED_OUTPUTs') print_db(cur, 'PARSED_OUTPUTs') print_db(cur, 'JOBs') print_db(cur, 'INTERSECTION_OUTPUTs') print_db(cur, 'FILTER_OUTPUTs')
def save_to_db(opts, bias_file, mreads, nbad_columns, ncolumns, raw_cisprc, norm_cisprc, inter_vs_gcoord, a2, bam_filter, launch_time, finish_time): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='JOBs'""") if not cur.fetchall(): cur.execute(""" create table PATHs (Id integer primary key, JOBid int, Path text, Type text, unique (Path))""") cur.execute(""" create table JOBs (Id integer primary key, Parameters text, Launch_time text, Finish_time text, Type text, Parameters_md5 text, unique (Parameters_md5))""") cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='NORMALIZE_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table NORMALIZE_OUTPUTs (Id integer primary key, JOBid int, Input int, N_columns int, N_filtered int, BAM_filter int, Cis_percentage_Raw real, Cis_percentage_Norm real, Slope_700kb_10Mb real, Resolution int, Normalization text, Factor int, unique (JOBid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True ) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Normalize', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, bias_file , 'BIASES' , jobid, opts.workdir) add_path(cur, inter_vs_gcoord , 'FIGURE' , jobid, opts.workdir) if opts.bam: add_path(cur, path.realpath(opts.bam), 'EXT_2D_BAM' , jobid, opts.workdir) if opts.mappability: add_path(cur, path.realpath(opts.mappability), 'EXT_MAPPABILITY' , jobid, opts.workdir) if opts.fasta: add_path(cur, path.realpath(opts.fasta), 'EXT_FASTA' , jobid, opts.workdir) # get pathid of input cur.execute("select id from paths where path = '%s'" % (path.relpath(mreads, opts.workdir))) input_bed = cur.fetchall()[0][0] a2 = 0 if isnan(a2) else a2 try: cur.execute(""" insert into NORMALIZE_OUTPUTs (Id , JOBid, Input, N_columns, N_filtered, BAM_filter, Cis_percentage_Raw, Cis_percentage_Norm, Slope_700kb_10Mb, Resolution, Normalization, Factor) values (NULL, %d, %d, %d, %d, %d, %f, %f, %f, %d, '%s', %f) """ % (jobid, input_bed, ncolumns, nbad_columns, bam_filter, 100 * raw_cisprc, 100 * norm_cisprc, a2, opts.reso, opts.normalization, opts.factor)) except lite.OperationalError: try: cur.execute(""" insert into NORMALIZE_OUTPUTs (Id , JOBid, Input, N_columns, N_filtered, BAM_filter, Cis_percentage_Raw, Cis_percentage_Norm, Slope_700kb_10Mb, Resolution, Normalization, Factor) values (NULL, %d, %d, %d, %d, %d, %f, %f, %f, %d, '%s', %f) """ % (jobid, input_bed, ncolumns, nbad_columns, bam_filter, 100 * raw_cisprc, 100 * norm_cisprc, a2, opts.reso, opts.normalization, opts.factor)) except lite.OperationalError: print('WANRING: Normalized table not written!!!') print_db(cur, 'PATHs') print_db(cur, 'JOBs') try: print_db(cur, 'FILTER_OUTPUTs') print_db(cur, 'INTERSECTION_OUTPUTs') print_db(cur, 'MAPPED_INPUTs') print_db(cur, 'MAPPED_OUTPUTs') print_db(cur, 'PARSED_OUTPUTs') print_db(cur, 'FILTER_OUTPUTs') except lite.OperationalError: pass print_db(cur, 'NORMALIZE_OUTPUTs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass
def save_to_db(opts, dangling_ends, ligated, fig_path, outfiles, launch_time, finish_time): """ write little DB to keep track of processes and options """ if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: # check if table exists cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='MAPPED_INPUTs'""") if not cur.fetchall(): try: cur.execute(""" create table PATHs (Id integer primary key, JOBid int, Path text, Type text, unique (Path))""") except lite.OperationalError: pass # may append when mapped files cleaned cur.execute(""" create table JOBs (Id integer primary key, Parameters text, Launch_time text, Finish_time text, Type text, Parameters_md5 text, unique (Parameters_md5))""") cur.execute(""" create table MAPPED_INPUTs (Id integer primary key, PATHid int, Entries int, Trim text, Frag text, Read int, Enzyme text, Dangling_Ends text, Ligation_Sites text, WRKDIRid int, MAPPED_OUTPUTid int, INDEXid int, unique (PATHid,Entries,Read,Enzyme,WRKDIRid,MAPPED_OUTPUTid,INDEXid))""" ) try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Map', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, opts.workdir, 'WORKDIR', jobid) add_path(cur, opts.fastq, 'MAPPED_FASTQ', jobid, opts.workdir) add_path(cur, opts.index, 'INDEX', jobid, opts.workdir) add_path(cur, fig_path, 'FIGURE', jobid, opts.workdir) for i, (out, num) in enumerate(outfiles): try: window = opts.windows[i] except IndexError: window = opts.windows[-1] except TypeError: window = 'None' add_path(cur, out, '2D_BED' if opts.read == 0 else 'SAM/MAP', jobid, opts.workdir) frag = ('none' if opts.iterative else 'fast_frag' if opts.read == 0 else 'frag' if i == len(outfiles) - 1 else 'full') try: cur.execute(""" insert into MAPPED_INPUTs (Id , PATHid, Entries, Trim, Frag, Read, Enzyme, Dangling_Ends, Ligation_Sites, WRKDIRid, MAPPED_OUTPUTid, INDEXid) values (NULL, %d, %d, '%s', '%s', %d, '%s', '%s', '%s', %d, %d, %d) """ % (get_path_id(cur, opts.fastq, opts.workdir), num, window, frag, opts.read, '-'.join(map(str, opts.renz)), ' '.join( '%s:%.3f%%' % (r, dangling_ends.get(r, float('nan'))) for r in opts.renz), ' '.join( '%s:%.3f%%' % ('-'.join(r), ligated.get(r, float('nan'))) for r in ligated), get_path_id(cur, opts.workdir), get_path_id(cur, out, opts.workdir), get_path_id(cur, opts.index, opts.workdir))) except lite.IntegrityError: pass print_db(cur, 'MAPPED_INPUTs') print_db(cur, 'PATHs') print_db(cur, 'JOBs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass
def save_to_db(opts, count, multiples, reads, mreads, n_valid_pairs, masked, outbam, hist_path, median, max_f, mad, launch_time, finish_time): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='INTERSECTION_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table INTERSECTION_OUTPUTs (Id integer primary key, PATHid int, Total_interactions int, Multiple_interactions text, Median_fragment_length, MAD_fragment_length, Max_fragment_length, unique (PATHid))""") cur.execute(""" create table FILTER_OUTPUTs (Id integer primary key, PATHid int, Name text, Count int, Applied text, JOBid int, unique (PATHid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True ) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type, Parameters_md5) values (NULL, '%s', '%s', '%s', 'Filter', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, mreads, '2D_BED', jobid, opts.workdir) add_path(cur, outbam, 'HIC_BAM', jobid, opts.workdir) add_path(cur, outbam + '.bai', 'HIC_BAI', jobid, opts.workdir) add_path(cur, reads, '2D_BED', jobid, opts.workdir) add_path(cur, hist_path, 'FIGURE', jobid, opts.workdir) try: cur.execute(""" insert into INTERSECTION_OUTPUTs (Id , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length) values (NULL, %d, %d, '%s', %d, %d, %d) """ % (get_path_id(cur, mreads, opts.workdir), count, ' '.join(['%s:%d' % (k, multiples[k]) for k in sorted(multiples)]), median, mad, max_f)) except lite.IntegrityError: print 'WARNING: already filtered' if opts.force: cur.execute( 'delete from INTERSECTION_OUTPUTs where PATHid = %d' % ( get_path_id(cur, mreads, opts.workdir))) cur.execute(""" insert into INTERSECTION_OUTPUTs (Id , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length) values (NULL, %d, %d, '%s', %d, %d, %d) """ % (get_path_id(cur, mreads, opts.workdir), count, ' '.join(['%s:%d' % (k, multiples[k]) for k in sorted(multiples)]), median, mad, max_f)) for nf, f in enumerate(masked, 1): try: add_path(cur, masked[f]['fnam'], 'FILTER', jobid, opts.workdir) except KeyError: continue try: cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, Applied, JOBid) values (NULL, %d, '%s', '%s', '%s', %d) """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir), masked[f]['name'], masked[f]['reads'], 'True' if nf in opts.apply else 'False', jobid)) except lite.IntegrityError: print 'WARNING: already filtered' if opts.force: cur.execute( 'delete from FILTER_OUTPUTs where PATHid = %d' % ( get_path_id(cur, masked[f]['fnam'], opts.workdir))) cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, Applied, JOBid) values (NULL, %d, '%s', '%s', '%s', %d) """ % (get_path_id(cur, masked[f]['fnam'], opts.workdir), masked[f]['name'], masked[f]['reads'], 'True' if nf in opts.apply else 'False', jobid)) try: cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, Applied, JOBid) values (NULL, %d, '%s', '%s', '%s', %d) """ % (get_path_id(cur, mreads, opts.workdir), 'valid-pairs', n_valid_pairs, '', jobid)) except lite.IntegrityError: print 'WARNING: already filtered' if opts.force: cur.execute( 'delete from FILTER_OUTPUTs where PATHid = %d' % ( get_path_id(cur, mreads, opts.workdir))) cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, Applied, JOBid) values (NULL, %d, '%s', '%s', '%s', %d) """ % (get_path_id(cur, mreads, opts.workdir), 'valid-pairs', n_valid_pairs, '', jobid)) print_db(cur, 'MAPPED_INPUTs') print_db(cur, 'PATHs') print_db(cur, 'MAPPED_OUTPUTs') print_db(cur, 'PARSED_OUTPUTs') print_db(cur, 'JOBs') print_db(cur, 'INTERSECTION_OUTPUTs') print_db(cur, 'FILTER_OUTPUTs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass
def save_to_db(opts, count, ncolumns, bias_file, nbad_columns, outbam, launch_time, finish_time): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='JOBs'""") if not cur.fetchall(): try: cur.execute(""" create table PATHs (Id integer primary key, JOBid int, Path text, Type text, unique (Path))""") except lite.OperationalError: pass # may append when mapped files cleaned cur.execute(""" create table JOBs (Id integer primary key, Parameters text, Launch_time text, Finish_time text, Type text, Parameters_md5 text, unique (Parameters_md5))""") cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='INTERSECTION_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table INTERSECTION_OUTPUTs (Id integer primary key, PATHid int, Total_interactions int, Multiple_interactions text, Median_fragment_length, MAD_fragment_length, Max_fragment_length, unique (PATHid))""") cur.execute(""" create table FILTER_OUTPUTs (Id integer primary key, PATHid int, Name text, Count int, Applied text, JOBid int, unique (PATHid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type, Parameters_md5) values (NULL, '%s', '%s', '%s', 'Import', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, outbam, 'HIC_BAM', jobid, opts.workdir) add_path(cur, outbam + '.bai', 'HIC_BAI', jobid, opts.workdir) try: cur.execute(""" insert into INTERSECTION_OUTPUTs (Id , PATHid, Total_interactions, Multiple_interactions, Median_fragment_length, MAD_fragment_length, Max_fragment_length) values (NULL, %d, %d, '%s', %d, %d, %d) """ % (get_path_id(cur, outbam, opts.workdir), count, '', 1, 1, 1)) except lite.IntegrityError: print('WARNING: already filtered') try: cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, Applied, JOBid) values (NULL, %d, '%s', '%s', '%s', %d) """ % (get_path_id(cur, outbam, opts.workdir), 'valid-pairs', count, '', jobid)) except lite.IntegrityError: print('WARNING: already filtered') if bias_file: cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='NORMALIZE_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table NORMALIZE_OUTPUTs (Id integer primary key, JOBid int, Input int, N_columns int, N_filtered int, BAM_filter int, Cis_percentage_Raw real, Cis_percentage_Norm real, Slope_700kb_10Mb real, Resolution int, Normalization text, Factor int, unique (JOBid))""") try: opts.normalization = 'custom' parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Normalize', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, bias_file, 'BIASES', jobid, opts.workdir) input_bed = get_path_id(cur, outbam, opts.workdir) try: cur.execute(""" insert into NORMALIZE_OUTPUTs (Id , JOBid, Input, N_columns, N_filtered, BAM_filter, Cis_percentage_Raw, Cis_percentage_Norm, Slope_700kb_10Mb, Resolution, Normalization, Factor) values (NULL, %d, %d, %d, %d, %d, %f, %f, %f, %d, '%s', %f) """ % (jobid, input_bed, ncolumns, nbad_columns, 0, 0, 0, 0, opts.reso, 'custom', 0)) except lite.OperationalError: print('WARNING: Normalized table not written!!!') print_db(cur, 'PATHs') #print_db(cur, 'MAPPED_OUTPUTs') #print_db(cur, 'PARSED_OUTPUTs') print_db(cur, 'JOBs') print_db(cur, 'INTERSECTION_OUTPUTs') print_db(cur, 'FILTER_OUTPUTs') if bias_file: print_db(cur, 'NORMALIZE_OUTPUTs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass
def save_to_db(opts, counts, multis, f_names1, f_names2, out_file1, out_file2, launch_time, finish_time): con = lite.connect(path.join(opts.workdir, 'trace.db')) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='PARSED_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table MAPPED_OUTPUTs (Id integer primary key, PATHid int, BEDid int, Uniquely_mapped int, unique (PATHid, BEDid))""") cur.execute(""" create table PARSED_OUTPUTs (Id integer primary key, PATHid int, Total_interactions int, Multiples int, unique (PATHid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True ) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type, Parameters_md5) values (NULL, '%s', '%s', '%s', 'Parse', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, out_file1, 'BED', jobid, opts.workdir) for genome in opts.genome: add_path(cur, genome, 'FASTA', jobid, opts.workdir) if out_file2: add_path(cur, out_file2, 'BED', jobid, opts.workdir) fnames = f_names1, f_names2 outfiles = out_file1, out_file2 for count in counts: try: sum_reads = 0 for i, item in enumerate(counts[count]): cur.execute(""" insert into MAPPED_OUTPUTs (Id , PATHid, BEDid, Uniquely_mapped) values (NULL, %d, %d, %d) """ % (get_path_id(cur, fnames[count][i], opts.workdir), get_path_id(cur, outfiles[count], opts.workdir), counts[count][item])) sum_reads += counts[count][item] except lite.IntegrityError: print 'WARNING: already parsed (MAPPED_OUTPUTs)' try: cur.execute(""" insert into PARSED_OUTPUTs (Id , PATHid, Total_interactions, Multiples) values (NULL, %d, %d, %d) """ % (get_path_id(cur, outfiles[count], opts.workdir), sum_reads, multis[count])) except lite.IntegrityError: print 'WARNING: already parsed (PARSED_OUTPUTs)' print_db(cur, 'MAPPED_INPUTs') print_db(cur, 'PATHs') print_db(cur, 'MAPPED_OUTPUTs') print_db(cur, 'PARSED_OUTPUTs') print_db(cur, 'JOBs')
def save_to_db(opts, cis_trans_N_D, cis_trans_N_d, cis_trans_n_D, cis_trans_n_d, a2, bad_columns_file, bias_file, inter_vs_gcoord, mreads, nbad_columns, ncolumns, intra_dir_nrm_fig, intra_dir_nrm_txt, inter_dir_nrm_fig, inter_dir_nrm_txt, genom_map_nrm_fig, genom_map_nrm_txt, intra_dir_raw_fig, intra_dir_raw_txt, inter_dir_raw_fig, inter_dir_raw_txt, genom_map_raw_fig, genom_map_raw_txt, pickle_path, launch_time, finish_time): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='NORMALIZE_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table NORMALIZE_OUTPUTs (Id integer primary key, JOBid int, Input int, N_columns int, N_filtered int, CisTrans_nrm_all real, CisTrans_nrm_out real, CisTrans_raw_all real, CisTrans_raw_out real, Slope_700kb_10Mb real, Resolution int, Factor int, unique (JOBid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True ) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Normalize', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, pickle_path , 'PICKLE' , jobid, opts.workdir) add_path(cur, bad_columns_file, 'BAD_COLUMNS', jobid, opts.workdir) add_path(cur, bias_file , 'BIASES' , jobid, opts.workdir) add_path(cur, inter_vs_gcoord , 'FIGURE' , jobid, opts.workdir) add_path(cur, mreads , '2D_BED' , jobid, opts.workdir) # get pathid of input cur.execute("select id from paths where path = '%s'" % (path.relpath(mreads, opts.workdir))) input_bed = cur.fetchall()[0][0] if intra_dir_nrm_fig: add_path(cur, intra_dir_nrm_fig, 'FIGURES', jobid, opts.workdir) if intra_dir_nrm_fig: add_path(cur, intra_dir_nrm_txt, 'NRM_MATRICES', jobid, opts.workdir) if inter_dir_nrm_fig: add_path(cur, inter_dir_nrm_fig, 'FIGURES', jobid, opts.workdir) if inter_dir_nrm_fig: add_path(cur, inter_dir_nrm_txt, 'NRM_MATRICES', jobid, opts.workdir) if genom_map_nrm_fig: add_path(cur, genom_map_nrm_fig, 'FIGURE', jobid, opts.workdir) if genom_map_nrm_txt: add_path(cur, genom_map_nrm_txt, 'NRM_MATRIX', jobid, opts.workdir) if intra_dir_raw_fig: add_path(cur, intra_dir_raw_fig, 'FIGURES', jobid, opts.workdir) if intra_dir_raw_fig: add_path(cur, intra_dir_raw_txt, 'RAW_MATRICES', jobid, opts.workdir) if inter_dir_raw_fig: add_path(cur, inter_dir_raw_fig, 'FIGURES', jobid, opts.workdir) if inter_dir_raw_fig: add_path(cur, inter_dir_raw_txt, 'RAW_MATRICES', jobid, opts.workdir) if genom_map_raw_fig: add_path(cur, genom_map_raw_fig, 'FIGURE', jobid, opts.workdir) if genom_map_raw_txt: add_path(cur, genom_map_raw_txt, 'RAW_MATRIX', jobid, opts.workdir) try: cur.execute(""" insert into NORMALIZE_OUTPUTs (Id , JOBid, Input, N_columns, N_filtered, CisTrans_nrm_all, CisTrans_nrm_out, CisTrans_raw_all, CisTrans_raw_out, Slope_700kb_10Mb, Resolution, Factor) values (NULL, %d, %d, %d, %d, %f, %f, %f, %f, %f, %d, %f) """ % (jobid, input_bed, ncolumns, nbad_columns, cis_trans_N_D, cis_trans_N_d, cis_trans_n_D, cis_trans_n_d, a2, opts.reso, opts.factor)) except lite.OperationalError: try: cur.execute(""" insert into NORMALIZE_OUTPUTs (Id , JOBid, Input, N_columns, N_filtered, CisTrans_raw_all, CisTrans_raw_out, Slope_700kb_10Mb, Resolution, Factor) values (NULL, %d, %d, %d, %d, %f, %f, %f, %d, %f) """ % (jobid, input_bed, ncolumns, nbad_columns, cis_trans_n_D, cis_trans_n_d, a2, opts.reso, opts.factor)) except lite.OperationalError: print 'WANRING: Normalized table not written!!!' print_db(cur, 'PATHs') print_db(cur, 'JOBs') try: print_db(cur, 'INTERSECTION_OUTPUTs') print_db(cur, 'MAPPED_INPUTs') print_db(cur, 'MAPPED_OUTPUTs') print_db(cur, 'PARSED_OUTPUTs') except lite.OperationalError: pass print_db(cur, 'FILTER_OUTPUTs') print_db(cur, 'NORMALIZE_OUTPUTs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass
def save_to_db(opts, counts, multis, f_names1, f_names2, out_file1, out_file2, launch_time, finish_time): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='PARSED_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table MAPPED_OUTPUTs (Id integer primary key, PATHid int, BEDid int, Uniquely_mapped int, unique (PATHid, BEDid))""") cur.execute(""" create table PARSED_OUTPUTs (Id integer primary key, PATHid int, Total_interactions int, Multiples text, unique (PATHid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True ) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type, Parameters_md5) values (NULL, '%s', '%s', '%s', 'Parse', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, out_file1, 'BED', jobid, opts.workdir) for genome in opts.genome: add_path(cur, genome, 'FASTA', jobid, opts.workdir) if out_file2: add_path(cur, out_file2, 'BED', jobid, opts.workdir) fnames = f_names1, f_names2 outfiles = out_file1, out_file2 for count in counts: try: sum_reads = 0 for i, item in enumerate(counts[count]): cur.execute(""" insert into MAPPED_OUTPUTs (Id , PATHid, BEDid, Uniquely_mapped) values (NULL, %d, %d, %d) """ % (get_path_id(cur, fnames[count][i], opts.workdir), get_path_id(cur, outfiles[count], opts.workdir), counts[count][item])) sum_reads += counts[count][item] except lite.IntegrityError: print 'WARNING: already parsed (MAPPED_OUTPUTs)' try: cur.execute(""" insert into PARSED_OUTPUTs (Id , PATHid, Total_interactions, Multiples) values (NULL, %d, %d, '%s') """ % (get_path_id(cur, outfiles[count], opts.workdir), sum_reads, ','.join([':'.join(map(str, (n, multis[count][n]))) for n in multis[count] if n]))) except lite.IntegrityError: print 'WARNING: already parsed (PARSED_OUTPUTs)' print_db(cur, 'MAPPED_INPUTs') print_db(cur, 'PATHs') print_db(cur, 'MAPPED_OUTPUTs') print_db(cur, 'PARSED_OUTPUTs') print_db(cur, 'JOBs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass
def save_to_db(opts, counts, multis, f_names1, f_names2, out_file1, out_file2, launch_time, finish_time): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='PARSED_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table MAPPED_OUTPUTs (Id integer primary key, PATHid int, BEDid int, Uniquely_mapped int, unique (PATHid, BEDid))""") cur.execute(""" create table PARSED_OUTPUTs (Id integer primary key, PATHid int, Total_interactions int, Multiples text, unique (PATHid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type, Parameters_md5) values (NULL, '%s', '%s', '%s', 'Parse', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, out_file1, 'BED', jobid, opts.workdir) for genome in opts.genome: add_path(cur, genome, 'FASTA', jobid, opts.workdir) if out_file2: add_path(cur, out_file2, 'BED', jobid, opts.workdir) fnames = f_names1, f_names2 outfiles = out_file1, out_file2 for count in counts: try: sum_reads = 0 for i, item in enumerate(counts[count]): cur.execute(""" insert into MAPPED_OUTPUTs (Id , PATHid, BEDid, Uniquely_mapped) values (NULL, %d, %d, %d) """ % (get_path_id(cur, fnames[count][i], opts.workdir), get_path_id(cur, outfiles[count], opts.workdir), counts[count][item])) sum_reads += counts[count][item] except lite.IntegrityError: print 'WARNING: already parsed (MAPPED_OUTPUTs)' try: cur.execute(""" insert into PARSED_OUTPUTs (Id , PATHid, Total_interactions, Multiples) values (NULL, %d, %d, '%s') """ % (get_path_id( cur, outfiles[count], opts.workdir), sum_reads, ','.join([ ':'.join(map(str, (n, multis[count][n]))) for n in multis[count] if n ]))) except lite.IntegrityError: print 'WARNING: already parsed (PARSED_OUTPUTs)' print_db(cur, 'MAPPED_INPUTs') print_db(cur, 'PATHs') print_db(cur, 'MAPPED_OUTPUTs') print_db(cur, 'PARSED_OUTPUTs') print_db(cur, 'JOBs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass
def save_to_db(opts, cmp_result, tad_result, reso, inputs, richA_stats, firsts, param_hash, launch_time, finish_time): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb copyfile(path.join(opts.workdir, 'trace.db'), dbfile) else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='JOBs'""") if not cur.fetchall(): cur.execute(""" create table PATHs (Id integer primary key, JOBid int, Path text, Type text, unique (Path))""") cur.execute(""" create table JOBs (Id integer primary key, Parameters text, Launch_time text, Finish_time text, Type text, Parameters_md5 text, unique (Parameters_md5))""") cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='SEGMENT_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table SEGMENT_OUTPUTs (Id integer primary key, JOBid int, Inputs text, TADs int, Compartments int, richA_corr real, EV_index int, EValue real, Chromosome text, Resolution int)""") try: parameters = digest_parameters(opts, get_md5=False, extra=['fasta']) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Segment', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) for ncrm, crm in enumerate(max(cmp_result.keys(), tad_result.keys(), key=len)): if crm in cmp_result: add_path(cur, cmp_result[crm]['path_cmprt1'], 'COMPARTMENT', jobid, opts.workdir) add_path(cur, cmp_result[crm]['path_cmprt2'], 'COMPARTMENT', jobid, opts.workdir) add_path(cur, cmp_result[crm]['image_cmprt'], 'FIGURE', jobid, opts.workdir) if opts.savecorr: add_path(cur, cmp_result[crm]['path_cormat'], 'CROSS_CORR_MAT', jobid, opts.workdir) if crm in tad_result: add_path(cur, tad_result[crm]['path'], 'TAD', jobid, opts.workdir) if opts.rich_in_A: add_path(cur, opts.rich_in_A, 'BED', jobid, opts.workdir) if crm in firsts: evalue = firsts[crm][0][(opts.ev_index[ncrm] - 1) if opts.ev_index else 0] eindex = opts.ev_index[ncrm] if opts.ev_index else 1 else: evalue = 'NULL' eindex = 'NULL' try: cur.execute(""" insert into SEGMENT_OUTPUTs (Id , JOBid, Inputs, TADs, Compartments, richA_corr, EV_index, EValue, Chromosome, Resolution) values (NULL, %d, '%s', %s, %s, %s, %s, %s, '%s', %d) """ % (jobid, ','.join([str(i) for i in inputs]), tad_result[crm]['num'] if crm in tad_result else 'NULL', cmp_result[crm]['num'] if crm in cmp_result else 'NULL', (richA_stats[crm] if crm in richA_stats and richA_stats[crm] is not None else 'NULL'), eindex, evalue, crm, reso)) except lite.OperationalError: # TODO: remove this print_exc() try: cur.execute("alter table SEGMENT_OUTPUTs add column 'richA_corr' 'real'") except: pass try: cur.execute("alter table SEGMENT_OUTPUTs add column 'EValue' 'real'") except: pass try: cur.execute("alter table SEGMENT_OUTPUTs add column 'EV_index', 'int'") except: pass cur.execute(""" insert into SEGMENT_OUTPUTs (Id , JOBid, Inputs, TADs, Compartments, richA_corr, EV_index, EValue, Chromosome, Resolution) values (NULL, %d, '%s', %d, %d, %s, %s, %s, '%s', %d) """ % (jobid, ','.join([str(i) for i in inputs]), tad_result[crm]['num'] if crm in tad_result else 0, cmp_result[crm]['num'] if crm in cmp_result else 0, (richA_stats[crm] if crm in richA_stats and richA_stats[crm] is not None else 'NULL'), eindex, evalue, crm, reso)) print_db(cur, 'PATHs') print_db(cur, 'JOBs') print_db(cur, 'SEGMENT_OUTPUTs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock remove(path.join(opts.workdir, '__lock_db'))
def save_to_db(opts, mreads1, mreads2, decay_corr_dat, decay_corr_fig, nbad_columns, ncolumns, scc, std, reprod, eigen_corr_dat, eigen_corr_fig, outbed, corr, eig_corr, biases1, biases2, masked1, masked2, launch_time, finish_time): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='MERGE_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table PATHs (Id integer primary key, JOBid int, Path text, Type text, unique (Path))""") cur.execute(""" create table JOBs (Id integer primary key, Parameters text, Launch_time text, Finish_time text, Type text, Parameters_md5 text, unique (Parameters_md5))""") cur.execute(""" create table FILTER_OUTPUTs (Id integer primary key, PATHid int, Name text, Count int, JOBid int, unique (PATHid))""") cur.execute(""" create table MERGE_OUTPUTs (Id integer primary key, JOBid int, Wrkd1Path int, Wrkd2Path int, Bed1Path int, Bed2Path int, MergePath int, unique (JOBid))""") cur.execute(""" create table MERGE_STATs (Id integer primary key, JOBid int, Inputs text, decay_corr text, eigen_corr text, reprod real, scc real, std_scc real, N_columns int, N_filtered int, Resolution int, bias1Path int, bias2Path int, unique (JOBid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True ) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Merge', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, decay_corr_dat, 'CORR' , jobid, opts.workdir) add_path(cur, decay_corr_fig, 'FIGURE' , jobid, opts.workdir) add_path(cur, eigen_corr_dat, 'CORR' , jobid, opts.workdir) add_path(cur, eigen_corr_fig, 'FIGURE' , jobid, opts.workdir) add_path(cur, opts.workdir , 'WORKDIR' , jobid) add_path(cur, opts.workdir1, 'WORKDIR1' , jobid, opts.workdir) add_path(cur, opts.workdir2, 'WORKDIR2' , jobid, opts.workdir) add_path(cur, mreads1 , 'EXT_HIC_BAM', jobid, opts.workdir) add_path(cur, mreads2 , 'EXT_HIC_BAM', jobid, opts.workdir) if not opts.skip_merge: add_path(cur, outbed , 'HIC_BAM' , jobid, opts.workdir) if opts.norm: add_path(cur, biases1 , 'BIASES' , jobid, opts.workdir) add_path(cur, biases2 , 'BIASES' , jobid, opts.workdir) biasid1 = get_path_id(cur, biases1, opts.workdir) biasid2 = get_path_id(cur, biases2, opts.workdir) else: biasid1 = 0 biasid2 = 0 cur.execute("select id from paths where path = '%s'" % ( path.relpath(mreads1, opts.workdir))) bed1 = cur.fetchall()[0][0] if opts.workdir1: cur.execute("select id from paths where path = '%s'" % ( path.relpath(opts.workdir1, opts.workdir))) w1path = cur.fetchall()[0][0] else: w1path = 0 cur.execute("select id from paths where path = '%s'" % ( path.relpath(mreads2, opts.workdir))) bed2 = cur.fetchall()[0][0] if opts.workdir2: cur.execute("select id from paths where path = '%s'" % ( path.relpath(opts.workdir2, opts.workdir))) w2path = cur.fetchall()[0][0] else: w2path = 0 if not opts.skip_merge: cur.execute("select id from paths where path = '%s'" % ( path.relpath(outbed, opts.workdir))) outbedid = cur.fetchall()[0][0] if not opts.skip_comparison: decay_corr = '-'.join(['%.1f' % (v) for v in corr[:10:2]]).replace('0.', '.') eigen_corr = '-'.join(['%.2f' % (max(v)) for v in eig_corr[:4]]).replace('0.', '.') else: decay_corr = eigen_corr = None if not opts.skip_merge: cur.execute(""" insert into MERGE_OUTPUTs (Id , JOBid, Wrkd1Path, Wrkd2Path, Bed1Path, Bed2Path, MergePath) values (NULL, %d, %d, %d, %d, %d, %d) """ % (jobid, w1path, w2path, bed1, bed2, outbedid)) if not opts.skip_comparison: cur.execute(""" insert into MERGE_STATs (Id , JOBid, N_columns, N_filtered, Resolution, reprod, scc, std_scc, decay_corr, eigen_corr, bias1Path, bias2Path) values (NULL, %d, %d, %d, %d, %f, %f, %f, '%s', '%s', %d, %d) """ % (jobid, ncolumns, nbad_columns, opts.reso , reprod, scc, std, decay_corr, eigen_corr, biasid1, biasid2)) if opts.workdir1: if 'tmpdb' in opts and opts.tmpdb: # tmp file dbfile1 = opts.tmpdb1 try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir1, 'trace.db'), dbfile1) except IOError: pass else: dbfile1 = path.join(opts.workdir1, 'trace.db') tmpcon = lite.connect(dbfile1) with tmpcon: tmpcur = tmpcon.cursor() tmpcur.execute("select Name, PATHid, Count from filter_outputs") for name, pathid, count in tmpcur.fetchall(): res = tmpcur.execute("select Path from PATHs where Id = %d" % (pathid)) tmppath = res.fetchall()[0][0] masked1[name] = {'path': tmppath, 'count': count} if 'tmpdb' in opts and opts.tmpdb: remove(dbfile1) if opts.workdir2: if 'tmpdb' in opts and opts.tmpdb: # tmp file dbfile2 = opts.tmpdb2 try: # to copy in case read2 was already mapped for example copyfile(path.join(opts.workdir2, 'trace.db'), dbfile2) except IOError: pass else: dbfile2 = path.join(opts.workdir2, 'trace.db') tmpcon = lite.connect(dbfile2) with tmpcon: tmpcur = tmpcon.cursor() tmpcur.execute("select Name, PATHid, Count from filter_outputs") for name, pathid, count in tmpcur.fetchall(): res = tmpcur.execute("select Path from PATHs where Id = %d" % (pathid)) tmppath = res.fetchall()[0][0] masked2[name] = {'path': tmppath, 'count': count} if 'tmpdb' in opts and opts.tmpdb: remove(dbfile2) for f in masked1: if f != 'valid-pairs': outmask = path.join(opts.workdir, '03_filtered_reads', 'all_r1-r2_intersection_%s.tsv_%s.tsv' % ( param_hash, f.replace(' ', '_'))) out = open(outmask, 'w') try: fh = magic_open(path.join(opts.workdir1, masked1[f]['path'])) except FileNotFoundError: fh = magic_open(path.join(opts.workdir1, masked1[f]['path'] + '.gz')) for line in fh: out.write(line) try: fh = magic_open(path.join(opts.workdir2, masked2[f]['path'])) except FileNotFoundError: fh = magic_open(path.join(opts.workdir2, masked2[f]['path'] + '.gz')) for line in fh: out.write(line) add_path(cur, outmask, 'FILTER', jobid, opts.workdir) else: if opts.skip_merge: outmask = 'NA' else: outmask = outbed try: path_id = get_path_id(cur, outmask, opts.workdir) except IndexError: path_id = -1 cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, JOBid) values (NULL, %d, '%s', '%s', %d) """ % (path_id, f, masked1[f]['count'] + masked2[f]['count'], jobid)) print_db(cur, 'PATHs') print_db(cur, 'JOBs') print_db(cur, 'MERGE_OUTPUTs') print_db(cur, 'MERGE_STATs') print_db(cur, 'FILTER_OUTPUTs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass
def save_to_db(opts, outfiles, launch_time, finish_time): # write little DB to keep track of processes and options con = lite.connect(path.join(opts.workdir, 'trace.db')) with con: # check if table exists cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='MAPPED_INPUTs'""") if not cur.fetchall(): cur.execute(""" create table PATHs (Id integer primary key, JOBid int, Path text, Type text, unique (Path))""") cur.execute(""" create table JOBs (Id integer primary key, Parameters text, Launch_time text, Finish_time text, Type text, Parameters_md5 text, unique (Parameters_md5))""") cur.execute(""" create table MAPPED_INPUTs (Id integer primary key, PATHid int, Entries int, Trim text, Frag text, Read int, Enzyme text, WRKDIRid int, MAPPED_OUTPUTid int, INDEXid int, unique (PATHid,Entries,Read,Enzyme,WRKDIRid,MAPPED_OUTPUTid,INDEXid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Map', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, opts.workdir, 'WORKDIR', jobid) add_path(cur, opts.fastq , 'MAPPED_FASTQ' , jobid, opts.workdir) add_path(cur, opts.index , 'INDEX' , jobid, opts.workdir) for i, (out, num) in enumerate(outfiles): try: window = opts.windows[i] except IndexError: window = opts.windows[-1] except TypeError: window = 'None' add_path(cur, out, 'SAM/MAP', jobid, opts.workdir) frag = ('none' if opts.iterative else 'frag' if i==len(outfiles) - 1 else 'full') try: cur.execute(""" insert into MAPPED_INPUTs (Id , PATHid, Entries, Trim, Frag, Read, Enzyme, WRKDIRid, MAPPED_OUTPUTid, INDEXid) values (NULL, %d, %d, '%s', '%s', %d, '%s', %d, %d, %d) """ % (get_path_id(cur, opts.fastq, opts.workdir), num, window, frag, opts.read, opts.renz, get_path_id(cur, opts.workdir), get_path_id(cur, out, opts.workdir), get_path_id(cur, opts.index, opts.workdir))) except lite.IntegrityError: pass print_db(cur, 'MAPPED_INPUTs') print_db(cur, 'PATHs' ) print_db(cur, 'JOBs' )
def save_to_db(opts, mreads1, mreads2, decay_corr_dat, decay_corr_fig, nbad_columns, ncolumns, scc, std, reprod, eigen_corr_dat, eigen_corr_fig, outbed, corr, eig_corr, biases1, biases2, launch_time, finish_time): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='MERGE_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table PATHs (Id integer primary key, JOBid int, Path text, Type text, unique (Path))""") cur.execute(""" create table JOBs (Id integer primary key, Parameters text, Launch_time text, Finish_time text, Type text, Parameters_md5 text, unique (Parameters_md5))""") cur.execute(""" create table FILTER_OUTPUTs (Id integer primary key, PATHid int, Name text, Count int, JOBid int, unique (PATHid))""") cur.execute(""" create table MERGE_OUTPUTs (Id integer primary key, JOBid int, Wrkd1Path int, Wrkd2Path int, Bed1Path int, Bed2Path int, MergePath int, unique (JOBid))""") cur.execute(""" create table MERGE_STATs (Id integer primary key, JOBid int, Inputs text, decay_corr text, eigen_corr text, reprod real, scc real, std_scc real, N_columns int, N_filtered int, Resolution int, bias1Path int, bias2Path int, unique (JOBid))""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True ) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Merge', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) add_path(cur, decay_corr_dat, 'CORR' , jobid, opts.workdir) add_path(cur, decay_corr_fig, 'FIGURE' , jobid, opts.workdir) add_path(cur, eigen_corr_dat, 'CORR' , jobid, opts.workdir) add_path(cur, eigen_corr_fig, 'FIGURE' , jobid, opts.workdir) add_path(cur, opts.workdir , 'WORKDIR' , jobid) add_path(cur, opts.workdir1, 'WORKDIR1' , jobid, opts.workdir) add_path(cur, opts.workdir2, 'WORKDIR2' , jobid, opts.workdir) add_path(cur, mreads1 , 'EXT_HIC_BAM', jobid, opts.workdir) add_path(cur, mreads2 , 'EXT_HIC_BAM', jobid, opts.workdir) add_path(cur, outbed , 'HIC_BAM' , jobid, opts.workdir) if opts.norm: add_path(cur, biases1 , 'BIASES' , jobid, opts.workdir) add_path(cur, biases2 , 'BIASES' , jobid, opts.workdir) biasid1 = get_path_id(cur, biases1, opts.workdir) biasid2 = get_path_id(cur, biases2, opts.workdir) else: biasid1 = 0 biasid2 = 0 cur.execute("select id from paths where path = '%s'" % ( path.relpath(mreads1, opts.workdir))) bed1 = cur.fetchall()[0][0] if opts.workdir1: cur.execute("select id from paths where path = '%s'" % ( path.relpath(opts.workdir1, opts.workdir))) w1path = cur.fetchall()[0][0] else: w1path = 0 cur.execute("select id from paths where path = '%s'" % ( path.relpath(mreads2, opts.workdir))) bed2 = cur.fetchall()[0][0] if opts.workdir2: cur.execute("select id from paths where path = '%s'" % ( path.relpath(opts.workdir2, opts.workdir))) w2path = cur.fetchall()[0][0] else: w2path = 0 cur.execute("select id from paths where path = '%s'" % ( path.relpath(outbed, opts.workdir))) outbedid = cur.fetchall()[0][0] if not opts.skip_comparison: decay_corr = '-'.join(['%.1f' % (v) for v in corr[:10:2]]).replace('0.', '.') eigen_corr = '-'.join(['%.2f' % (max(v)) for v in eig_corr[:4]]).replace('0.', '.') else: decay_corr = eigen_corr = None cur.execute(""" insert into MERGE_OUTPUTs (Id , JOBid, Wrkd1Path, Wrkd2Path, Bed1Path, Bed2Path, MergePath) values (NULL, %d, %d, %d, %d, %d, %d) """ % (jobid, w1path, w2path, bed1, bed2, outbedid)) if not opts.skip_comparison: cur.execute(""" insert into MERGE_STATs (Id , JOBid, N_columns, N_filtered, Resolution, reprod, scc, std_scc, decay_corr, eigen_corr, bias1Path, bias2Path) values (NULL, %d, %d, %d, %d, %f, %f, %f, '%s', '%s', %d, %d) """ % (jobid, ncolumns, nbad_columns, opts.reso , reprod, scc, std, decay_corr, eigen_corr, biasid1, biasid2)) masked1 = {'valid-pairs': {'count': 0}} if opts.workdir1: if 'tmpdb' in opts and opts.tmpdb: # tmp file dbfile1 = opts.tmpdb1 try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir1, 'trace.db'), dbfile1) except IOError: pass else: dbfile1 = path.join(opts.workdir1, 'trace.db') tmpcon = lite.connect(dbfile1) with tmpcon: tmpcur = tmpcon.cursor() tmpcur.execute("select Name, PATHid, Count from filter_outputs") for name, pathid, count in tmpcur.fetchall(): res = tmpcur.execute("select Path from PATHs where Id = %d" % (pathid)) tmppath = res.fetchall()[0][0] masked1[name] = {'path': tmppath, 'count': count} if 'tmpdb' in opts and opts.tmpdb: remove(dbfile1) masked2 = {'valid-pairs': {'count': 0}} if opts.workdir2: if 'tmpdb' in opts and opts.tmpdb: # tmp file dbfile2 = opts.tmpdb2 try: # to copy in case read2 was already mapped for example copyfile(path.join(opts.workdir2, 'trace.db'), dbfile2) except IOError: pass else: dbfile2 = path.join(opts.workdir2, 'trace.db') tmpcon = lite.connect(dbfile2) with tmpcon: tmpcur = tmpcon.cursor() tmpcur.execute("select Name, PATHid, Count from filter_outputs") for name, pathid, count in tmpcur.fetchall(): res = tmpcur.execute("select Path from PATHs where Id = %d" % (pathid)) tmppath = res.fetchall()[0][0] masked2[name] = {'path': tmppath, 'count': count} if 'tmpdb' in opts and opts.tmpdb: remove(dbfile2) for f in masked1: if f != 'valid-pairs': outmask = path.join(opts.workdir, '03_filtered_reads', 'all_r1-r2_intersection_%s.tsv_%s.tsv' % ( param_hash, f)) out = open(outmask, 'w') for line in open(path.join(opts.workdir1, masked1[f]['path'])): out.write(line) for line in open(path.join(opts.workdir2, masked2[f]['path'])): out.write(line) add_path(cur, outmask, 'FILTER', jobid, opts.workdir) else: outmask = outbed cur.execute(""" insert into FILTER_OUTPUTs (Id , PATHid, Name, Count, JOBid) values (NULL, %d, '%s', '%s', %d) """ % (get_path_id(cur, outmask, opts.workdir), f, masked1[f]['count'] + masked2[f]['count'], jobid)) print_db(cur, 'PATHs') print_db(cur, 'JOBs') print_db(cur, 'MERGE_OUTPUTs') print_db(cur, 'MERGE_STATs') print_db(cur, 'FILTER_OUTPUTs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass
def save_to_db(opts, cmp_result, tad_result, reso, inputs, launch_time, finish_time): if 'tmp' in opts and opts.tmp: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'wa').close() # tmp file dbfile = opts.tmp copyfile(path.join(opts.workdir, 'trace.db'), dbfile) else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() cur.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='SEGMENT_OUTPUTs'""") if not cur.fetchall(): cur.execute(""" create table SEGMENT_OUTPUTs (Id integer primary key, JOBid int, Inputs text, TADs int, Compartments int, Chromosome text, Resolution int)""") try: parameters = digest_parameters(opts, get_md5=False) param_hash = digest_parameters(opts, get_md5=True ) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Segment', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass jobid = get_jobid(cur) for crm in max(cmp_result.keys(), tad_result.keys(), key=lambda x: len(x)): if crm in cmp_result: add_path(cur, cmp_result[crm]['path'], 'COMPARTMENT', jobid, opts.workdir) if crm in tad_result: add_path(cur, tad_result[crm]['path'], 'TAD', jobid, opts.workdir) cur.execute(""" insert into SEGMENT_OUTPUTs (Id , JOBid, Inputs, TADs, Compartments, Chromosome, Resolution) values (NULL, %d, '%s', %d, %d, '%s', %d) """ % (jobid, ','.join([str(i) for i in inputs]), tad_result[crm]['num'] if crm in tad_result else 0, cmp_result[crm]['num'] if crm in cmp_result else 0, crm, reso)) print_db(cur, 'PATHs') print_db(cur, 'JOBs') print_db(cur, 'SEGMENT_OUTPUTs') if 'tmp' in opts and opts.tmp: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock remove(path.join(opts.workdir, '__lock_db'))
def save_to_db(opts, launch_time, finish_time, out_files, out_plots): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() try: parameters = digest_parameters(opts, get_md5=False, extra=['quiet']) param_hash = digest_parameters(opts, get_md5=True, extra=['quiet']) cur.execute( """ insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Bin', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass except lite.OperationalError: try: cur.execute(""" create table PATHs (Id integer primary key, JOBid int, Path text, Type text, unique (Path))""") except lite.OperationalError: pass # may append when mapped files cleaned cur.execute(""" create table JOBs (Id integer primary key, Parameters text, Launch_time text, Finish_time text, Type text, Parameters_md5 text, unique (Parameters_md5))""") cur.execute( """ insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Bin', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) jobid = get_jobid(cur) for fnam in out_files: add_path(cur, out_files[fnam], fnam + '_MATRIX', jobid, opts.workdir) for fnam in out_plots: add_path(cur, out_plots[fnam], fnam + '_FIGURE', jobid, opts.workdir) if not opts.quiet: print_db(cur, 'JOBs') print_db(cur, 'PATHs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass
def save_to_db(opts, launch_time, finish_time, out_files, out_plots): if 'tmpdb' in opts and opts.tmpdb: # check lock while path.exists(path.join(opts.workdir, '__lock_db')): time.sleep(0.5) # close lock open(path.join(opts.workdir, '__lock_db'), 'a').close() # tmp file dbfile = opts.tmpdb try: # to copy in case read1 was already mapped for example copyfile(path.join(opts.workdir, 'trace.db'), dbfile) except IOError: pass else: dbfile = path.join(opts.workdir, 'trace.db') con = lite.connect(dbfile) with con: cur = con.cursor() try: parameters = digest_parameters(opts, get_md5=False, extra=['quiet']) param_hash = digest_parameters(opts, get_md5=True , extra=['quiet']) cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Bin', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) except lite.IntegrityError: pass except lite.OperationalError: try: cur.execute(""" create table PATHs (Id integer primary key, JOBid int, Path text, Type text, unique (Path))""") except lite.OperationalError: pass # may append when mapped files cleaned cur.execute(""" create table JOBs (Id integer primary key, Parameters text, Launch_time text, Finish_time text, Type text, Parameters_md5 text, unique (Parameters_md5))""") cur.execute(""" insert into JOBs (Id , Parameters, Launch_time, Finish_time, Type , Parameters_md5) values (NULL, '%s', '%s', '%s', 'Bin', '%s') """ % (parameters, time.strftime("%d/%m/%Y %H:%M:%S", launch_time), time.strftime("%d/%m/%Y %H:%M:%S", finish_time), param_hash)) jobid = get_jobid(cur) for fnam in out_files: add_path(cur, out_files[fnam], fnam + '_MATRIX', jobid, opts.workdir) for fnam in out_plots: add_path(cur, out_plots[fnam], fnam + '_FIGURE', jobid, opts.workdir) if not opts.quiet: print_db(cur, 'JOBs') print_db(cur, 'PATHs') if 'tmpdb' in opts and opts.tmpdb: # copy back file copyfile(dbfile, path.join(opts.workdir, 'trace.db')) remove(dbfile) # release lock try: remove(path.join(opts.workdir, '__lock_db')) except OSError: pass