def create_5_3_prime_align_tables(align_table_in, args, db, cursor): three_prime_table = align_table_in + '_3prime' five_prime_table = align_table_in + '_5prime' fields = ( # index # index # index # index # index # index # index for combined queries 'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)', 'basename_id INT(7) NOT NULL, INDEX (basename_id)', 'refid INT(3) DEFAULT NULL, INDEX (refid)', 'alignnum INT(4) DEFAULT NULL, INDEX (alignnum)', 'covcount INT(4) DEFAULT NULL, INDEX (covcount)', 'alignstrand VARCHAR(1), INDEX (alignstrand)', 'score INT(4), INDEX (score)', 'seqpos INT(6) DEFAULT NULL, INDEX (seqpos)', 'refpos INT(6) DEFAULT NULL, INDEX (refpos)', 'seqbase VARCHAR(1) DEFAULT NULL, INDEX (seqbase)', 'refbase VARCHAR(1) DEFAULT NULL, INDEX (refbase)', 'seqbasequal INT(2) DEFAULT NULL, INDEX (seqbasequal)', 'cigarclass VARCHAR(1) DEFAULT NULL, INDEX (cigarclass)', 'index combindex (refid,refpos,cigarclass)', ) colheaders = ','.join(fields) sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (three_prime_table, colheaders) args, db, cursor = cursor_execute(args, db, cursor, sql) sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (five_prime_table, colheaders) args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_align_table_sam(tablename, args, db, cursor): fields = ( # --------- now add extra columns as needed 'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)', 'basename_id INT(7) NOT NULL, INDEX (basename_id)', 'qname VARCHAR(150)', 'flag INT(4) ', 'rname VARCHAR(150)', 'pos INT(7)', 'mapq INT(3)', 'cigar TEXT', 'rnext VARCHAR(100)', 'pnext INT(4)', 'tlen INT(4)', 'seq LONGTEXT', 'qual LONGTEXT', 'n_m VARCHAR(10)', 'm_d TEXT', 'a_s VARCHAR(10)', 'x_s VARCHAR(10)', ) colheaders = ','.join(fields) sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_caller_table_noindex(tablename, args, db, cursor): fields = ( 'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)', 'basename_id INT(7) NOT NULL', 'mean FLOAT(25,17) NOT NULL', 'start FLOAT(25,17) NOT NULL', 'stdv FLOAT(25,17) NOT NULL', 'length FLOAT(25,17) NOT NULL', 'model_state VARCHAR(10) NOT NULL', 'model_level FLOAT(25,17) NOT NULL', 'move INT(64) NOT NULL', 'p_model_state FLOAT(25,17) NOT NULL', 'mp_state VARCHAR(10) NOT NULL', 'p_mp_state FLOAT(25,17) NOT NULL', 'p_A FLOAT(25,17) NOT NULL', 'p_C FLOAT(25,17) NOT NULL', 'p_G FLOAT(25,17) NOT NULL', 'p_T FLOAT(25,17) NOT NULL', 'raw_index INT(64) NOT NULL', ) colheaders = ','.join(fields) sql = 'CREATE TABLE %s (%s) ENGINE=MyISAM' % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def mysql_load_from_hashes( args, db, cursor, tablename, data_hash, ): """ Load hash data into database """ cols = list() vals = list() for (_, entry) in data_hash.iteritems(): if isinstance(entry, basestring): vals.append("'%s'" % entry) else: vals.append(str(entry)) cols = ','.join(data_hash.keys()) values = ','.join(vals) sql = 'INSERT INTO %s (%s) VALUES (%s) ' % (tablename, cols, values) if args.verbose == "high": print sql args, db, cursor = cursor_execute(args, db, cursor, sql) ids = cursor.lastrowid return ids
def upload_model_data( tablename, model_name, model_location, hdf, args, db, cursor, ): table = hdf[model_location][()] sqlarray = list() for r in table: i = list(r) if len(i) == 6: i.insert(1, 0) eventdeats = "('" + str(model_name) + "'" for j in i: if isinstance(j, (int, long, float, complex)): eventdeats += ',' + str(j) else: eventdeats += ",'" + str(j) + "'" eventdeats += ')' sqlarray.append(eventdeats) stringvals = ','.join(sqlarray) sql = \ 'INSERT INTO %s (model,kmer,variant,level_mean,level_stdv,sd_mean,sd_stdv,weight) VALUES %s;' \ % (tablename, stringvals) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit()
def create_mincontrol_barcode_control_table(tablename, args, db, cursor): fields = \ ('job_index INT NOT NULL AUTO_INCREMENT, PRIMARY KEY (job_index)' , 'barcodeid MEDIUMTEXT NOT NULL', 'complete INT NOT NULL') colheaders = ','.join(fields) sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_model_list_table(tablename, args, db, cursor): fields = ('basename_id INT(7), PRIMARY KEY(basename_id)', 'template_model VARCHAR(200), INDEX (template_model)', 'complement_model VARCHAR(200), INDEX (complement_model)' ) # 'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)', colheaders = ','.join(fields) sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_2d_alignment_table(tablename, args, db, cursor): fields = ('ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)', 'basename_id INT(7) NOT NULL, INDEX (basename_id)', 'template INT(5) NOT NULL', 'complement INT(5) NOT NULL', 'kmer VARCHAR(10) NOT NULL') colheaders = ','.join(fields) sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def modify_gru(args, db, cursor): # -------- This bit adds columns to Gru.minIONruns #### # # Add column 'mt_ctrl_flag' to Gru.minIONruns table if it doesn't exist sql = \ 'SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA="Gru" AND TABLE_NAME="minIONruns" AND column_name="mt_ctrl_flag" ' # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) if cursor.rowcount == 0: # print "adding mt_ctrl_flag to Gru.minIONruns" sql = \ 'ALTER TABLE Gru.minIONruns ADD mt_ctrl_flag INT(1) DEFAULT 0' # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() # # Add column 'watch_dir' to Gru.minIONruns table if it doesn't exist sql = \ 'SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA="Gru" AND TABLE_NAME="minIONruns" AND column_name="watch_dir" ' # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) if cursor.rowcount == 0: # print "adding 'watch_dir' to Gru.minIONruns" sql = 'ALTER TABLE Gru.minIONruns ADD watch_dir TEXT(200)' # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() # # Add column 'host_ip' to Gru.minIONruns table if it doesn't exist sql = \ 'SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA="Gru" AND TABLE_NAME="minIONruns" AND column_name="host_ip" ' # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) if cursor.rowcount == 0: # print "adding mt_ctrl_flag to Gru.minIONruns" sql = 'ALTER TABLE Gru.minIONruns ADD host_ip TEXT(16)' # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit()
def create_xml_table(tablename, args, db, cursor): fields = \ ('xmlindex INT(11) NOT NULL AUTO_INCREMENT, PRIMARY KEY(xmlindex)' , 'type VARCHAR(20) NOT NULL', 'primary_id VARCHAR(30) NOT NULL', 'filename VARCHAR(30) NOT NULL', 'xml TEXT DEFAULT NULL') colheaders = ','.join(fields) sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \ % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_reference_table(tablename, args, db, cursor): fields = ('refid INT(3) NOT NULL AUTO_INCREMENT, PRIMARY KEY(refid)', 'refname VARCHAR(50), UNIQUE INDEX (refname)', 'reflen INT(7), INDEX (reflen)', 'reffile VARCHAR(100), INDEX (reffile)', 'ref_total_len VARCHAR(100), INDEX (ref_total_len)' ) # PLSP57501_17062014lambda_3216_1_ch101_file10_strand colheaders = ','.join(fields) sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \ % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_mincontrol_messages_table(tablename, args, db, cursor): fields = ( 'message_index INT NOT NULL AUTO_INCREMENT, PRIMARY KEY (message_index)', 'message MEDIUMTEXT NOT NULL', 'target MEDIUMTEXT NOT NULL', 'param1 MEDIUMTEXT', 'param2 MEDIUMTEXT', 'complete INT NOT NULL', ) colheaders = ','.join(fields) sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_ref_kmer_table(tablename, args, db, cursor): fields = ( 'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)', 'kmer VARCHAR(10) NOT NULL, INDEX (kmer)', 'refid INT(3) NOT NULL, INDEX (refid)', 'count INT(7) NOT NULL', 'total INT(7) NOT NULL', 'freq float(13,10) NOT NULL', ) colheaders = ','.join(fields) sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_comment_table_if_not_exists(tablename, args, db, cursor): fields = ( 'comment_id INT(11) NOT NULL AUTO_INCREMENT, PRIMARY KEY(comment_id)', 'runindex INT(11) NOT NULL', 'runname TEXT NOT NULL', 'user_name TEXT NOT NULL', 'date DATETIME NOT NULL', 'comment TEXT NOT NULL', 'name TEXT NOT NULL', ) colheaders = ','.join(fields) sql = \ 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB DEFAULT CHARSET=utf8' \ % (tablename, colheaders) filterwarnings('ignore', "Table 'comments' already exists") args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_trackingid_table(tablename, args, db, cursor): fields = ( # PLSP57501_17062014lambda_3216_1_ch101_file10_strand # PLSP57501_17062014lambda_3216_1_ch101_file10_strand # = 48133 # = 48133 # = 38.4 # = MN02935 # = sequencing_run # =./python/recipes/MAP_48Hr_Sequencing_Run_SQK_MAP006.py # = 1403015537 # # = 35.625 # = 9be694a4d40804eb6ea5761774723318ae3b3346 # = 0.45.1.6 b201406111512 'basename_id INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(basename_id)', 'basename VARCHAR(150) NOT NULL', 'asic_id BIGINT(15) NOT NULL', 'asic_id_17 BIGINT(15) ', 'asic_id_eeprom INT(5) ', 'asic_temp DOUBLE(4,1) NOT NULL', 'device_id TEXT(8) NOT NULL', 'exp_script_purpose VARCHAR(50) NOT NULL', 'exp_script_name VARCHAR(150)', 'exp_start_time INT(15) NOT NULL', 'flow_cell_id VARCHAR(20) NOT NULL', 'heatsink_temp FLOAT(10) NOT NULL', 'hostname TEXT', 'run_id TEXT(40) NOT NULL', 'version_name VARCHAR(30) ', 'file_path TEXT(300) NOT NULL', 'channel_number int(7)', 'digitisation float', 'offset float', 'range_val float', 'sampling_rate float', 'pass INT(1) NOT NULL', 'md5sum TEXT(33) NOT NULL', ) colheaders = ','.join(fields) sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \ % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_model_data_table(tablename, args, db, cursor): fields = ( 'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)', 'model VARCHAR(200) NOT NULL, INDEX (model)', 'kmer VARCHAR(10) NOT NULL, INDEX (kmer)', 'variant INT(10) NOT NULL', 'level_mean FLOAT(25,17) NOT NULL', 'level_stdv FLOAT(25,17) NOT NULL', 'sd_mean FLOAT(25,17) NOT NULL', 'sd_stdv FLOAT(25,17) NOT NULL', 'weight FLOAT(25,17) NOT NULL', ) colheaders = ','.join(fields) sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def load_ref_kmer_hash(args, db, cursor, tablename, kmers, refid): sql = "INSERT INTO %s (kmer, refid, count, total, freq) VALUES " % ( tablename) totalkmercount = sum(kmers.itervalues()) for kmer, count in kmers.iteritems(): #n+=1 f = 1 / (totalkmercount * float(count)) freq = "{:.10f}".format(f) #print f, freq, totalkmercount, count sql += "('%s',%s,%s,%s,%s)," % (kmer, refid, count, totalkmercount, freq) sql = sql[:-1] #print sql if args.verbose == "high": print sql debug() args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit()
def create_basic_read_info(tablename, args, db, cursor): fields = ( 'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)', 'basename_id INT(7) NOT NULL, INDEX (basename_id)', 'abasic_event_index INT(1) NOT NULL', 'abasic_found INT(1) NOT NULL', 'abasic_peak_height FLOAT(25,17)', 'duration INT(15)', 'hairpin_event_index INT(10)', 'hairpin_found INT(1)', 'hairpin_peak_height FLOAT(25,17)', 'hairpin_polyt_level FLOAT(25,17)', 'median_before FLOAT(25,17)', 'read_id VARCHAR(37)', 'read_number int(10)', 'scaling_used int(5)', 'start_mux int(1)', 'start_time int(20)', 'sampling_rate float', '1minwin INT, INDEX(1minwin)', '5minwin INT, INDEX(5minwin)', '10minwin INT, INDEX(10minwin)', '15minwin INT, INDEX(15minwin)', 's1minwin INT, INDEX(s1minwin)', 's5minwin INT, INDEX(s5minwin)', 's10minwin INT, INDEX(s10minwin)', 's15minwin INT, INDEX(s15minwin)', 'g_1minwin INT, INDEX(g_1minwin)', 'g_5minwin INT, INDEX(g_5minwin)', 'g_10minwin INT, INDEX(g_10minwin)', 'g_15minwin INT, INDEX(g_15minwin)', 'g_s1minwin INT, INDEX(g_s1minwin)', 'g_s5minwin INT, INDEX(g_s5minwin)', 'g_s10minwin INT, INDEX(g_s10minwin)', 'g_s15minwin INT, INDEX(g_s15minwin)', 'align INT DEFAULT 0, INDEX(align)', 'pass INT(1) NOT NULL', ) colheaders = ','.join(fields) sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_barcode_table(tablename, args, db, cursor): fields = ( 'basename_id INT(10), PRIMARY KEY(basename_id)', 'pos0_start INT(5) ', 'score INT(6) ', 'design VARCHAR(10) ', 'pos1_end INT(5) ', 'pos0_end INT(5) ', 'pos1_start INT(5) ', 'variant VARCHAR(8) ', 'barcode_arrangement VARCHAR(12) ', ) colheaders = ','.join(fields) sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \ % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_pre_align_table(tablename, args, db, cursor): fields = ( # index 'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)', 'basename_id INT(7), INDEX (basename_id)', 'refid INT(3) DEFAULT NULL, INDEX (refid)', 'alignstrand VARCHAR(1) DEFAULT NULL, INDEX (alignstrand)', 'r_start INT(7) DEFAULT NULL', 'q_start INT(5) DEFAULT NULL', 'r_align_len INT(7) DEFAULT NULL', 'q_align_len INT(5) DEFAULT NULL', ) # 'index combindex (refid,refpos,cigarclass)') # index for combined queries colheaders = ','.join(fields) sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def upload_2dalignment_data( basenameid, channel, alignment, args, db, cursor, ): cursor = db.cursor() sqlarray = list() for i in alignment: val_str = "(%d,%d,%d,'%s')" % (basenameid, i[0], i[1], i[2]) sqlarray.append(val_str) stringvals = ','.join(sqlarray) sql = \ 'INSERT INTO caller_basecalled_2d_alignment_%s (basename_id,template,complement,kmer) VALUES %s;' \ % (channel, stringvals) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit()
bcs = list() for i in range(len(barcode_align_obj)): if barcode_align_obj[i].startswith('>'): bc = re.split('>| ', barcode_align_obj[i])[-1] b = "('%s',0)" % bc bcs.append(b) sql = \ 'INSERT INTO barcode_control (barcodeid,complete) VALUES %s' \ % ','.join(bcs) # print sql if args.verbose == "high": print sql debug() args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() dbcheckhash['barcode_info'][dbname] = True return dbname # --------------------------------------------------------------------------- def get_hdf_attribute(args, hdf, group, attr): try: hdf5object = hdf[group] value = hdf5object.attrs[attr] except: value = -1
def create_basecall_summary_info(tablename, args, db, cursor): fields = ( # = 1403015537 'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)', 'basename_id INT(7) NOT NULL, INDEX (basename_id)', 'abasic_dur float', 'abasic_index int', 'abasic_peak float', 'duration_comp float', 'duration_temp float', 'end_index_comp int', 'end_index_temp int', 'hairpin_abasics int', 'hairpin_dur float', 'hairpin_events int', 'hairpin_peak float', 'median_level_comp float', 'median_level_temp float', 'median_sd_comp float', 'median_sd_temp float', 'num_comp int', 'num_events int', 'num_temp int', 'pt_level float', 'range_comp float', 'range_temp float', 'split_index int', 'start_index_comp int', 'start_index_temp int', 'driftC float ', 'mean_qscoreC float', 'num_skipsC int', 'num_staysC int', 'scaleC float', 'scale_sdC float', 'sequence_lengthC int', 'shiftC float', 'strand_scoreC float', 'varC float', 'var_sdC float', 'driftT float ', 'mean_qscoreT float', 'num_skipsT int', 'num_staysT int', 'scaleT float', 'scale_sdT float', 'sequence_lengthT int', 'shiftT float', 'strand_scoreT float', 'varT float', 'var_sdT float', 'mean_qscore2 float', 'sequence_length2 int', 'exp_start_time INT(15) NOT NULL', 'sampling_rate float', '1minwin INT, INDEX(1minwin)', '5minwin INT, INDEX(5minwin)', '10minwin INT, INDEX(10minwin)', '15minwin INT, INDEX(15minwin)', 's1minwin INT, INDEX(s1minwin)', 's5minwin INT, INDEX(s5minwin)', 's10minwin INT, INDEX(s10minwin)', 's15minwin INT, INDEX(s15minwin)', 'g_1minwin INT, INDEX(g_1minwin)', 'g_5minwin INT, INDEX(g_5minwin)', 'g_10minwin INT, INDEX(g_10minwin)', 'g_15minwin INT, INDEX(g_15minwin)', 'g_s1minwin INT, INDEX(g_s1minwin)', 'g_s5minwin INT, INDEX(g_s5minwin)', 'g_s10minwin INT, INDEX(g_s10minwin)', 'g_s15minwin INT, INDEX(g_s15minwin)', 'align INT DEFAULT 0, INDEX(align)', 'pass INT(1) NOT NULL', ) colheaders = ','.join(fields) sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def check_read(db, args, connection_pool, minup_version, comments, xml_file_dict, ref_fasta_hash, dbcheckhash, logfolder, filepath, hdf, cursor, oper, ip): global runindex if args.verbose == "high": print "Checking read ..." sys.stdout.flush() filename = os.path.basename(filepath) if args.verbose == "high": print time.strftime('%Y-%m-%d %H:%M:%S'), 'processing:', filename sys.stdout.flush() # Remove "_sequencing_run_" and/or "_mux_scan_" from filename_ # as used to ake dbname ... filename_ = filename filename_ = filename_.replace("_sequencing_run_", '_') filename_ = filename_.replace("_mux_scan_", '_') parts = filename_.split('_') strSep = '_' # Changing the number below enables the removal of the random four digit number from run names on restart dbname = strSep.join(parts[0:len(parts) - 5]) dbname = re.sub('[.!,; ]', '', dbname) if len(args.custom_name) > 0: dbname = args.minotourusername + '_' + args.custom_name + '_' \ + dbname else: dbname = args.minotourusername + '_' + dbname if len(dbname) > 64: dbname = dbname[:64] if dbname.endswith('_'): #ml dbname = dbname[:-1] #ml if args.verbose == "high": print "dbname is ", dbname print "Parts were ", parts debug() # --------------------------------------------------------------------------- if dbname in dbcheckhash[ 'dbname']: # so data from this run has been seen before in this instance of minup so switch to it! if dbcheckhash['dbname'][dbname] is False: if args.verbose == "high": print 'switching to database: ', dbname sys.stdout.flush() sql = 'USE %s' % dbname args, db, cursor = cursor_execute(args, db, cursor, sql) # --------------------------------------------------------------------------- try: runindex = dbcheckhash['runindex'][dbname] # MS .. except: print "checkRead(): line 112, dbcheckhash, key error: " \ + dbname sys.stdout.flush() #sys.exit() return () comment_string = 'minUp switched runname' start_time = time.strftime('%Y-%m-%d %H:%M:%S') sql = \ "INSERT INTO Gru.comments (runindex,runname,user_name,comment,name,date) VALUES (%s,'%s','%s','%s','%s','%s')" \ % ( runindex, dbname, args.minotourusername, comment_string, args.minotourusername, start_time, ) if args.verbose == "high": print sql debug() db.escape_string(sql) args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() # --------------------------------------------------------------------------- ks = dbcheckhash['dbname'].keys() n = len(ks) bar = mk_bar(n) bar.start() for i, e in enumerate(ks): bar.update(i) dbcheckhash['dbname'][e] = False bar.finish() dbcheckhash['dbname'][dbname] = True # --------------------------------------------------------------------------- if dbname not in dbcheckhash[ 'dbname']: # # so the db has not been seen before.. time to set up lots of things... dbcheckhash['barcoded'][dbname] = False dbcheckhash['barcode_info'][dbname] = False dbcheckhash['logfile'][dbname] = os.path.join(os.path.sep, logfolder, dbname + '.minup.log') if args.verbose == "high": print 'trying database: ', dbname sys.stdout.flush() sql = "SHOW DATABASES LIKE \'%s\'" % dbname # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) if cursor.fetchone(): if args.verbose == "high": print 'database exists!' sys.stdout.flush() # # drop the existing database, if selected if args.drop_db is True: sql = 'DROP DATABASE %s' % dbname # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() if args.verbose == "high": print 'database dropped.' sys.stdout.flush() else: print >> sys.stderr, "=" * 80 print >> sys.stderr, \ 'WARNING: DATABASE \"%s\" already EXISTS.\nTo write over the data re-run the minUP command with option -d' % dbname print >> sys.stderr, "=" * 80 sys.stdout.flush() if args.batch_fasta == False: # MS next 6 lines ... print >> sys.stderr, \ 'not in batch mode so exiting ...' sys.stdout.flush() terminate_minup(args, dbcheckhash, oper, minup_version) #terminate_minup(args, dbcheckhash, oper, minup_version) #sys.exit() if args.drop_db is True: print 'Deleting exisiting run from Gru now ...' sys.stdout.flush() sql = \ 'DELETE FROM Gru.userrun WHERE runindex IN (SELECT runindex FROM Gru.minIONruns WHERE runname = "%s")' \ % dbname # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() sql = "DELETE FROM Gru.minIONruns WHERE runname = \'%s\'" \ % dbname # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() print '.... Run deleted.' sys.stdout.flush() # --------------------------------------------------------------------------- # -------- This bit adds columns to Gru.minIONruns -------- modify_gru(args, db, cursor) # --------------------------------------------------------------------------- # -------- Create a new empty database #if args.verbose == "high": print 'Making new database: ', dbname print '=' * 80 sys.stdout.flush() dbF = open('dbname.txt', 'w') dbF.write(dbname + "\n") dbF.close() sql = 'CREATE DATABASE %s' % dbname #print sql args, db, cursor = cursor_execute(args, db, cursor, sql) sql = 'USE %s' % dbname args, db, cursor = cursor_execute(args, db, cursor, sql) # Create Tables .... create_general_table('config_general', args, db, cursor) create_trackingid_table('tracking_id', args, db, cursor) create_basecall_summary_info('basecall_summary', args, db, cursor) create_events_model_fastq_table('basecalled_template', args, db, cursor) create_events_model_fastq_table('basecalled_complement', args, db, cursor) create_basecalled2d_fastq_table('basecalled_2d', args, db, cursor) if args.pin is not False: create_mincontrol_interaction_table('interaction', args, db, cursor) create_mincontrol_messages_table('messages', args, db, cursor) create_mincontrol_barcode_control_table('barcode_control', args, db, cursor) #print "DONE" # --------------------------------------------------------------------------- ''' # DEPRECATIN TELEM MS 11.10.16 if args.telem is True: for i in xrange(0, 10): temptable = 'caller_basecalled_template_%d' % i comptable = 'caller_basecalled_complement_%d' % i twod_aligntable = 'caller_basecalled_2d_alignment_%d' \ % i create_caller_table_noindex(temptable, args, db, cursor) create_caller_table_noindex(comptable, args, db, cursor) create_2d_alignment_table(twod_aligntable, args, db, cursor) create_model_list_table('model_list', args, db, cursor) create_model_data_table('model_data', args, db, cursor) ''' # --------------------------------------------------------------------------- if args.preproc is True: create_pretrackingid_table('pre_tracking_id', args, db, cursor) # make another table create_pre_general_table('pre_config_general', args, db, cursor) # pre config general table # -------- Assign the correct reference fasta for this dbname if applicable if args.batch_fasta is not False: for refbasename in ref_fasta_hash.keys(): common_path = \ os.path.commonprefix((ref_fasta_hash[refbasename]['path' ], filepath)).rstrip('\\|\/|re|\\re|\/re') if common_path.endswith('downloads'): ref_fasta_hash[dbname] = ref_fasta_hash[refbasename] # del ref_fasta_hash[refbasename] if args.ref_fasta is not False: for refbasename in ref_fasta_hash.keys( ): # there should only be one key ref_fasta_hash[dbname] = ref_fasta_hash[refbasename] # --------------------------------------------------------------------------- if dbname in ref_fasta_hash: # great, we assigned the reference fasta to this dbname create_reference_table('reference_seq_info', args, db, cursor) create_5_3_prime_align_tables('last_align_basecalled_template', args, db, cursor) create_5_3_prime_align_tables('last_align_basecalled_complement', args, db, cursor) create_5_3_prime_align_tables('last_align_basecalled_2d', args, db, cursor) if args.last_align is True: # create_align_table('last_align_basecalled_template', args, db, cursor) # create_align_table('last_align_basecalled_complement', args, db, cursor) # create_align_table('last_align_basecalled_2d', args, db, cursor) create_align_table_maf('last_align_maf_basecalled_template', args, db, cursor) create_align_table_maf('last_align_maf_basecalled_complement', args, db, cursor) create_align_table_maf('last_align_maf_basecalled_2d', args, db, cursor) if args.bwa_align is True: create_align_table_sam('align_sam_basecalled_template', args, db, cursor) create_align_table_sam('align_sam_basecalled_complement', args, db, cursor) create_align_table_sam('align_sam_basecalled_2d', args, db, cursor) # dbcheckhash["mafoutdict"][dbname]=open(dbname+"."+process+".align.maf","w") ''' # DEPRECATIN TELEM MS 11.10.16 if args.telem is True: create_ref_kmer_table('ref_sequence_kmer', args, db, cursor) ''' if args.prealign is True: create_pre_align_table('pre_align_template', args, db, cursor) create_pre_align_table('pre_align_complement', args, db, cursor) create_pre_align_table('pre_align_2d', args, db, cursor) create_align_table_raw('last_align_raw_template', args, db, cursor) create_align_table_raw('last_align_raw_complement', args, db, cursor) create_align_table_raw('last_align_raw_2d', args, db, cursor) for refname in ref_fasta_hash[dbname]['seq_len'].iterkeys(): # print "refname", refname reference = ref_fasta_hash[dbname]['seq_file'][refname] reflen = ref_fasta_hash[dbname]['seq_len'][refname] reflength = ref_fasta_hash[dbname]['seq_file_len'][reference] refid = mysql_load_from_hashes( args, db, cursor, 'reference_seq_info', { 'refname': refname, 'reflen': reflen, 'reffile': reference, 'ref_total_len': reflength, }) ref_fasta_hash[dbname]['refid'][refname] = refid ''' # DEPRECATIN TELEM MS 11.10.16 if args.telem is True: kmers = ref_fasta_hash[dbname]['kmer'][refname] load_ref_kmer_hash(args, db, 'ref_sequence_kmer', kmers, refid, args, db, cursor) ''' # --------------------------------------------------------------------------- # -------- See if theres any ENA XML stuff to add. # -------- Need to do this now as it changes the "comment" # -------- in Gru.minionRuns entry # print "C", comment ena_flowcell_owner = None for xml_to_downloads_path in xml_file_dict.keys(): # xmlpath=xml_file_dict["study"][study_id]["path"] common_path = os.path.commonprefix( (xml_to_downloads_path, filepath)).rstrip('\\|\/|re') if common_path.endswith('downloads'): print 'found XML data for:', dbname sys.stdout.flush() create_xml_table('XML', args, db, cursor) # --------------------------------------------------------------------------- downloadsPath = xml_file_dict[xml_to_downloads_path] for study_id in \ downloadsPath['study'].keys(): ena_flowcell_owner = study_id study_xml = \ downloadsPath['study'][study_id]['xml'] study_file = \ downloadsPath['study'][study_id]['file'] study_title = \ downloadsPath['study'][study_id]['title'] study_abstract = \ downloadsPath['study'][study_id]['abstract'] exp_c = 'NA' samp_c = 'NA' run_c = 'NA' mysql_load_from_hashes( args, db, cursor, 'XML', { 'type': 'study', 'primary_id': study_id, 'filename': study_file, 'xml': study_xml, }) for exp_id in \ downloadsPath['experiment'].keys(): if study_id \ == downloadsPath['experiment'][exp_id]['study_id']: exp_c = exp_id exp_xml = \ downloadsPath['experiment'][exp_id]['xml'] exp_file = \ downloadsPath['experiment'][exp_id]['file'] sample_id = \ downloadsPath['experiment'][exp_id]['sample_id'] mysql_load_from_hashes( args, db, cursor, 'XML', { 'type': 'experiment', 'primary_id': exp_id, 'filename': exp_file, 'xml': exp_xml, }) if sample_id \ in downloadsPath['sample' ]: samp_c = sample_id sample_xml = \ downloadsPath['sample'][sample_id]['xml'] sample_file = \ downloadsPath['sample'][sample_id]['file'] mysql_load_from_hashes( args, db, cursor, 'XML', { 'type': 'sample', 'primary_id': sample_id, 'filename': sample_file, 'xml': sample_xml, }) for run_id in \ downloadsPath['run'].keys(): if exp_id \ == downloadsPath['run'][run_id]['exp_id']: run_c = run_id run_xml = \ downloadsPath['run'][run_id]['xml'] run_file = \ downloadsPath['run'][run_id]['file'] mysql_load_from_hashes( args, db, cursor, 'XML', { 'type': 'run', 'primary_id': run_id, 'filename': run_file, 'xml': run_xml, }) comments[dbname] = \ 'ENA data. Study:%s Title: %s Abstract: %s Experiment:%s Sample:%s Run:%s' \ % ( study_id, study_title, study_abstract, exp_c, samp_c, run_c, ) # --------------------------------------------------------------------------- # --------- Make entries in the Gru database # try and get the right basecall-configuration general file_type = check_read_type(args, filepath, hdf) #print "FILETYPE is", file_type basecalltype = getBasecalltype(args, file_type) # MS basecalldir = '' basecalldirconfig = '' basecallindexpos = '' #ML ''' try: if file_type == 2: basecalltype2="Basecall_2D" string2='' #ML for x in range (0,9): string2 = '/Analyses/Hairpin_Split_00%s/Configuration/general' % (x) #ML if (string2 in hdf): basecallindexpos=x #ml #print "BASECALLINDEXPOS",basecallindexpos basecalldirconfig=string2 #ML string='/Analyses/%s_00%s/Configuration/general' % (basecalltype, basecallindexpos) #print string if (string in hdf): # print "YES 1" basecalldir='/Analyses/%s_00%s/' % (basecalltype,basecallindexpos) #basecallindexpos=x #ml #break string='/Analyses/%s_00%s/Configuration/general' % (basecalltype2, basecallindexpos) #print string if (string2 in hdf): #print "YES 2" basecalldir='/Analyses/%s_00%s/' % (basecalltype2,basecallindexpos) #basecalldirconfig=string2 #ML #break except: print "checkReads(): error line 467." sys.exit() try: if file_type in [1,0]: basecalltype = 'Basecall_1D_CDNA' basecalltype2 = 'Basecall_2D' basecalldir = '' basecalldirconfig = '' basecallindexpos='' ''' try: # MS for x in range(0, 9): string = '/Analyses/%s_00%s/Configuration/general' \ % (basecalltype, x) if string in hdf: basecalldir = '/Analyses/%s_00%s/' % (basecalltype, x) basecalldirconfig = string basecallindexpos = x break ''' string = '/Analyses/%s_00%s/Configuration/general' \ % (basecalltype2, x) if string in hdf: basecalldir = '/Analyses/%s_00%s/' % (basecalltype, x) basecalldirconfig = string basecallindexpos=x break ''' # print "basecalldirconfig", basecalldirconfig # # get some data out of tacking_id and general except: print "checkReads(): error line 496." sys.stdout.flush() #sys.exit() #print basecalldirconfig #print basecalldir if len(basecalldirconfig) > 0: configdata = hdf[basecalldirconfig] if len(basecalldir) > 0: metrichor_info = hdf[basecalldir] # else: # ....configdata.attrs['workflow_name'] ="preanalysed" trackingid = hdf['/UniqueGlobalKey/tracking_id'] print trackingid.attrs['exp_start_time'] #print dateutil.parser.parse(trackingid.attrs['exp_start_time']) #print int(time.mktime(dateutil.parser.parse(trackingid.attrs['exp_start_time']).timetuple())) #print datetime.datetime.fromtimestamp(int(time.mktime(dateutil.parser.parse(trackingid.attrs['exp_start_time']).timetuple()))).strftime('%Y-%m-%d') expstarttimecode = \ datetime.datetime.fromtimestamp(int(testtime(trackingid.attrs['exp_start_time' ]))).strftime('%Y-%m-%d') flowcellid = trackingid.attrs['device_id'] if len(basecalldirconfig) > 0: basecalleralg = configdata.attrs['workflow_name'] else: basecalleralg = 'preanalysed' if len(basecalldir) > 0: #version = metrichor_info.attrs['chimaera version'] try: version = metrichor_info.attrs['chimaera version'] # MS except: version = metrichor_info.attrs['version'] # MS else: version = 'unknown' runnumber = args.run_num flowcellowner = 'NULL' username = args.minotourusername if args.flowcell_owner is not None: flowcellowner = args.flowcell_owner if ena_flowcell_owner is not None: flowcellowner = ena_flowcell_owner # # get info on the reference sequence, if used big_reference = 'NOREFERENCE' big_reflength = '0' if dbname in ref_fasta_hash: # so there's some reference data for this dbname big_reference = ref_fasta_hash[dbname]['big_name'] big_reflength = ref_fasta_hash[dbname]['big_len'] # # make entries into Gru for this new database comment = comments['default'] if dbname in comments: comment = comments[dbname] process = 'noalign' if args.last_align is True: process = 'LAST' if args.bwa_align is True: process = 'BWA' wdir = args.watchdir if wdir.endswith('\\'): # remove trailing slash for windows. wdir = wdir[:-1] sql = \ "INSERT INTO Gru.minIONruns (date,user_name,flowcellid,runname,activeflag,comment,FlowCellOwner,RunNumber,reference,reflength,basecalleralg,version,minup_version,process,mt_ctrl_flag,watch_dir,host_ip) VALUES ('%s','%s','%s','%s',%s,'%s','%s',%s,'%s',%s,'%s','%s','%s','%s',%s,'%s','%s')" \ % ( expstarttimecode, args.minotourusername, flowcellid, dbname, 1, comment, flowcellowner, runnumber, big_reference, big_reflength, basecalleralg, version, minup_version, process, 1, wdir, ip, ) #print sql if args.verbose == "high": print sql debug() #if args.verbose == "high": print '... Database created.' sys.stdout.flush() db.escape_string(sql) args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() runindex = cursor.lastrowid dbcheckhash['runindex'][dbname] = runindex #print "Runindex:",runindex # # add us">> ", view_users if args.verbose == "high": print "Adding users..." sys.stdout.flush() view_users = [username] if args.view_users: extra_names = args.view_users.split(',') # view_users = args.view_users + extra_names # MS view_users = view_users + extra_names # MS for user_name in view_users: sql = \ "SELECT user_id FROM Gru.users WHERE user_name =\'%s\'" \ % user_name # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) if 0 < cursor.rowcount: sql = \ 'INSERT INTO Gru.userrun (user_id, runindex) VALUES ((SELECT user_id FROM Gru.users WHERE user_name =\'%s\') , (SELECT runindex FROM Gru.minIONruns WHERE runname = "%s") )' \ % (user_name, dbname) if args.verbose == "high": print sql debug() # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() else: print 'The MinoTour username "%s" does not exist. Please create it or remove it from the input arguments' \ % user_name sys.stdout.flush() sys.exit() # # Create comment table if it doesn't exist create_comment_table_if_not_exists('Gru.comments', args, db, cursor) # # Add first comment to table start_time = time.strftime('%Y-%m-%d %H:%M:%S') comment_string = 'minUp version %s started' % minup_version mysql_load_from_hashes( args, db, cursor, 'Gru.comments', { 'runindex': runindex, 'runname': dbname, 'user_name': args.minotourusername, 'comment': comment_string, 'name': args.dbusername, 'date': start_time, }) # --------------------------------------------------------------------------- # --------- make log file and initinal entry with open(dbcheckhash['logfile'][dbname], 'w') as logfilehandle: logfilehandle.write('minup started at:\t%s%s' % (start_time, os.linesep)) logfilehandle.write('minup version:\t%s%s' % (minup_version, os.linesep)) logfilehandle.write('options:' + os.linesep) logfilehandle.write('minotour db host:\t%s%s' % (args.dbhost, os.linesep)) logfilehandle.write('minotour db user:\t%s%s' % (args.dbusername, os.linesep)) logfilehandle.write('minotour username:\t%s%s' % (args.minotourusername, os.linesep)) logfilehandle.write('minotour viewer usernames:\t%s%s' % (view_users, os.linesep)) logfilehandle.write('flowcell owner:\t%s%s' % (flowcellowner, os.linesep)) logfilehandle.write('run number:\t%s%s' % (args.run_num, os.linesep)) logfilehandle.write('watch directory:\t%s%s' % (args.watchdir, os.linesep)) ''' # DEPRECATIN TELEM MS 11.10.16 logfilehandle.write('upload telemetry:\t%s%s' % (args.telem, os.linesep)) ''' logfilehandle.write('Reference Sequences:' + os.linesep) if dbname in ref_fasta_hash: for refname in ref_fasta_hash[dbname]['seq_len'].iterkeys(): logfilehandle.write( 'Fasta:\t%s\tlength:\t%d%s' % (ref_fasta_hash[dbname]['seq_file'][refname], ref_fasta_hash[dbname]['seq_len'][refname], os.linesep)) else: logfilehandle.write('No reference sequence set' + os.linesep) logfilehandle.write('comment:\t%s%s' % (comment, os.linesep)) logfilehandle.write('Errors:' + os.linesep) logfilehandle.close() #startMincontrol(args, dbname, cursor, dbcheckhash,\ # minup_version, oper) # # connection_pool for this db connection_pool[dbname] = list() ''' # DEPRECATIN LAST TELEM MS 11.10.16 if args.last_align is True \ or args.bwa_align is True \ or args.telem is True: ''' if args.bwa_align is True: try: db_a = MySQLdb.connect(host=args.dbhost, user=args.dbusername, passwd=args.dbpass, port=args.dbport, db=dbname) connection_pool[dbname].append(db_a) db_b = MySQLdb.connect(host=args.dbhost, user=args.dbusername, passwd=args.dbpass, port=args.dbport, db=dbname) connection_pool[dbname].append(db_b) db_c = MySQLdb.connect(host=args.dbhost, user=args.dbusername, passwd=args.dbpass, port=args.dbport, db=dbname) connection_pool[dbname].append(db_c) except Exception, err: err_string = 'Error bwa_align: %s ' % err print >> sys.stderr, \ "Can't setup MySQL connection pool: %s" % err sys.stdout.flush() with open(dbcheckhash['logfile'][dbname], 'a') as \ logfilehandle: logfilehandle.write(err_string + os.linesep) logfilehandle.close() sys.stdout.flush() sys.exit() # --------- this bit last to set the active database in this hash if dbcheckhash['dbname']: for e in dbcheckhash['dbname'].keys(): dbcheckhash['dbname'][e] = False dbcheckhash['dbname'][dbname] = True
def create_events_model_fastq_table(tablename, args, db, cursor): fields = ( # 'basename VARCHAR(300), PRIMARY KEY', # PLSP57501_17062014lambda_3216_1_ch101_file10_strand # PLSP57501_17062014lambda_3216_1_ch101_file10_strand.whatever # = 51.80799999999954 # = 2347.2034000000003 # = 1.0063618778594416 # = 0.20855518951022478 # = -0.10872176688437207 # = 0.004143787533549812 # = 0.9422581300419306 # = 1.3286319210403454 # = 1.0368718353240443 # = 1403015537 'basename_id INT(10) NOT NULL, PRIMARY KEY (basename_id)', 'seqid VARCHAR(150) NOT NULL, UNIQUE INDEX (seqid)', 'duration FLOAT(25,17)', 'start_time FLOAT(25,17)', 'scale FLOAT(25,17)', 'shift FLOAT(25,17)', 'gross_shift FLOAT(25,17) DEFAULT NULL', 'drift FLOAT(25,17)', 'scale_sd FLOAT(25,17)', 'var_sd FLOAT(25,17)', 'var FLOAT(25,17)', 'seqlen INT NOT NULL', 'sampling_rate float', '1minwin INT, INDEX(1minwin)', '5minwin INT, INDEX(5minwin)', '10minwin INT, INDEX(10minwin)', '15minwin INT, INDEX(15minwin)', 's1minwin INT, INDEX(s1minwin)', 's5minwin INT, INDEX(s5minwin)', 's10minwin INT, INDEX(s10minwin)', 's15minwin INT, INDEX(s15minwin)', 'g_1minwin INT, INDEX(g_1minwin)', 'g_5minwin INT, INDEX(g_5minwin)', 'g_10minwin INT, INDEX(g_10minwin)', 'g_15minwin INT, INDEX(g_15minwin)', 'g_s1minwin INT, INDEX(g_s1minwin)', 'g_s5minwin INT, INDEX(g_s5minwin)', 'g_s10minwin INT, INDEX(g_s10minwin)', 'g_s15minwin INT, INDEX(g_s15minwin)', 'align INT DEFAULT 0, INDEX(align)', 'pass INT(1) NOT NULL', 'exp_start_time INT(15) NOT NULL', 'sequence MEDIUMTEXT DEFAULT NULL', 'qual MEDIUMTEXT DEFAULT NULL', 'index 1minalign (align,1minwin)', 'index 5minalign (align,5minwin)', 'index 10minalign (align,10minwin)', 'index 15minalign (align,15minwin)', 'index s1minalign (align,s1minwin)', 'index s5minalign (align,s5minwin)', 'index s10minalign (align,s10minwin)', 'index s15minalign (align,s15minwin)', 'index g_1minalign (align,g_1minwin)', 'index g_5minalign (align,g_5minwin)', 'index g_10minalign (align,g_10minwin)', 'index g_15minalign (align,g_15minwin)', 'index g_s1minalign (align,g_s1minwin)', 'index g_s5minalign (align,g_s5minwin)', 'index g_s10minalign (align,g_s10minwin)', 'index g_s15minalign (align,g_s15minwin)', ) colheaders = ','.join(fields) sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \ % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) #### Create a tmp table to hold trigger data. fields = ( '1minwin INT NOT NULL', #'exp_start_time INT NOT NULL', 'bases INT', 'maxlen INT', 'minlen INT', 'average_length INT', 'readcount INT', 'passcount INT default 0', 'cumuduration INT default 0 ', 'cumulength INT default 0', 'distchan INT default 0', 'PRIMARY KEY (1minwin)', #,exp_start_time)', ) colheaders = ','.join(fields) tablename2 = tablename + "_1minwin_sum" sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' % (tablename2, colheaders) #print sql args, db, cursor = cursor_execute(args, db, cursor, sql) ## Create a short running event eventname = tablename2 + "_event" sql = ' create event %s \ on schedule every 15 second \ starts current_timestamp \ ends current_timestamp + interval 1 week\ do \ insert into \ %s(1minwin,readcount,bases,maxlen,minlen,average_length,cumuduration,cumulength) \ select g_1minwin as 1minwin,count(*) as readcount,sum(seqlen) as bases, max(seqlen) as maxlen,min(seqlen) as minlen , ROUND(AVG(seqlen)) as average_length, sum(duration) as cumuduration, sum(seqlen) as cumuseqlen FROM %s group by 1 order by 1 desc limit 30 \ ON DUPLICATE KEY UPDATE readcount=VALUES(readcount),bases=VALUES(bases), maxlen=VALUES(maxlen), minlen=VALUES(minlen),average_length=VALUES(average_length),cumuduration=VALUES(cumuduration),cumulength=VALUES(cumulength)' % ( eventname, tablename2, tablename) #print sql args, db, cursor = cursor_execute(args, db, cursor, sql) ## Create a short running event eventname = tablename2 + "_eventpass" sql = ' create event %s \ on schedule every 15 second \ starts current_timestamp \ ends current_timestamp + interval 1 week\ do \ insert into \ %s(1minwin,passcount) \ select g_1minwin as 1minwin,count(*) as passcount FROM %s where pass = 1 group by 1 order by 1 desc limit 30 \ ON DUPLICATE KEY UPDATE passcount=VALUES(passcount)' % ( eventname, tablename2, tablename) #print sql args, db, cursor = cursor_execute(args, db, cursor, sql) ## Create a long running event eventname = tablename2 + "_eventL" sql = ' create event %s \ on schedule every 20 minute \ starts current_timestamp \ ends current_timestamp + interval 1 week\ do \ insert into \ %s(1minwin,readcount,bases,maxlen,minlen,average_length,cumuduration,cumulength) \ select g_1minwin as 1minwin,count(*) as readcount,sum(seqlen) as bases, max(seqlen) as maxlen,min(seqlen) as minlen , ROUND(AVG(seqlen)) as average_length, sum(duration) as cumuduration, sum(seqlen) as cumulength FROM %s group by 1 order by 1 desc \ ON DUPLICATE KEY UPDATE readcount=VALUES(readcount),bases=VALUES(bases), maxlen=VALUES(maxlen), minlen=VALUES(minlen),average_length=VALUES(average_length),cumuduration=VALUES(cumuduration),cumulength=VALUES(cumulength)' % ( eventname, tablename2, tablename) #print sql args, db, cursor = cursor_execute(args, db, cursor, sql) eventname = tablename2 + "_eventpassL" sql = ' create event %s \ on schedule every 20 minute \ starts current_timestamp \ ends current_timestamp + interval 1 week\ do \ insert into \ %s(1minwin,passcount) \ select g_1minwin as 1minwin,count(*) as passcount FROM %s where pass = 1 group by 1 order by 1 desc \ ON DUPLICATE KEY UPDATE passcount=VALUES(passcount)' % ( eventname, tablename2, tablename) #print sql args, db, cursor = cursor_execute(args, db, cursor, sql) eventname = tablename2 + "_conf" sql = ' create event %s \ on schedule every 15 second \ starts current_timestamp \ ends current_timestamp + interval 1 week\ do \ insert into \ %s(1minwin,distchan) \ select g_1minwin as 1minwin,count(distinct channel) as distchan from config_general group by 1 order by 1 desc limit 30\ ON DUPLICATE KEY UPDATE distchan=VALUES(distchan)' % ( eventname, tablename2) #print sql args, db, cursor = cursor_execute(args, db, cursor, sql) eventname = tablename2 + "_confL" sql = ' create event %s \ on schedule every 20 minute \ starts current_timestamp \ ends current_timestamp + interval 1 week\ do \ insert into \ %s(1minwin,distchan) \ select g_1minwin as 1minwin,count(distinct channel) as distchan from config_general group by 1 order by 1 desc \ ON DUPLICATE KEY UPDATE distchan=VALUES(distchan)' % ( eventname, tablename2) #print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_pre_general_table(tablename, args, db, cursor): fields = ( # PLSP57501_17062014lambda_3216_1_ch101_file10_strand # = 2 # = 1 # = 124.29830200195312 # = 199290 # = 1382 # = 1 # = 125.83346438759253 # = 0.6979265001059975 # = 228.775431986964 # = ad4812d9-0f84-468b-a932-c2f91b6152ef # = 5 # = 1 # = 3 # = 2544301 # THIS IS CALCULATED BY THE MINUP SCRIPT FROM THE EVENTS TABLE # THIS IS CALCULATED BY THE MINUP SCRIPT FROM THE EVENTS TABLE 'basename_id INT(10) NOT NULL, PRIMARY KEY(basename_id)', 'basename VARCHAR(150) NOT NULL, UNIQUE KEY(basename)', 'abasic_event_index INT(10) DEFAULT NULL', 'abasic_found INT(1) DEFAULT NULL', 'abasic_peak_height varchar(40) DEFAULT NULL', 'duration varchar(40) DEFAULT NULL', 'hairpin_event_index INT(10) DEFAULT NULL', 'hairpin_found INT(1) DEFAULT NULL', 'hairpin_peak_height VARCHAR(40) DEFAULT NULL', 'hairpin_polyt_level VARCHAR(40) DEFAULT NULL', 'median_before VARCHAR(40) DEFAULT NULL', 'channel INT(4) ', 'read_id VARCHAR(40) DEFAULT NULL', 'read_number INT(10) DEFAULT NULL', 'scaling_used INT(1) DEFAULT NULL', 'start_mux INT(1) DEFAULT NULL', 'start_time varchar(20) DEFAULT NULL', 'total_events INT(20) DEFAULT NULL', 'sampling_rate float', '1minwin INT, INDEX(1minwin)', '5minwin INT, INDEX(5minwin)', '10minwin INT, INDEX(10minwin)', '15minwin INT, INDEX(15minwin)', 's1minwin INT, INDEX(s1minwin)', 's5minwin INT, INDEX(s5minwin)', 's10minwin INT, INDEX(s10minwin)', 's15minwin INT, INDEX(s15minwin)', 'g_1minwin INT, INDEX(g_1minwin)', 'g_5minwin INT, INDEX(g_5minwin)', 'g_10minwin INT, INDEX(g_10minwin)', 'g_15minwin INT, INDEX(g_15minwin)', 'g_s1minwin INT, INDEX(g_s1minwin)', 'g_s5minwin INT, INDEX(g_s5minwin)', 'g_s10minwin INT, INDEX(g_s10minwin)', 'g_s15minwin INT, INDEX(g_s15minwin)', 'exp_start_time INT(15) NOT NULL', # Maybe change to pre_tracking_id ? ) colheaders = ','.join(fields) sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \ % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)
def create_general_table(tablename, args, db, cursor): fields = ( # PLSP57501_17062014lambda_3216_1_ch101_file10_strand # = basecall_2d_workflow.py"config_general" # = Basecall_2D_000 # = 10 # = False # = channel_101_read_10 # = /opt/metrichor/model # = auto # = 100000 # = /tmp/input/PLSP57501_17062014lambda_3216_1_ch101_file10_strand.fast5 # = 1000 # = /opt/metrichor/config/basecall_2d.cfg # = auto # = 101 # version = 0.8.3 # time_stamp = 2014-Jul-02 09:10:13 # = 1403015537 'basename_id INT(10) NOT NULL, PRIMARY KEY(basename_id)', 'basename VARCHAR(150) NOT NULL, UNIQUE KEY(basename)', 'read_type VARCHAR(50)', 'local_folder VARCHAR(50) ', 'workflow_script VARCHAR(50) DEFAULT NULL', 'workflow_name VARCHAR(50) ', 'read_id TEXT ', 'use_local VARCHAR(10) ', 'tag VARCHAR(50) ', 'model_path VARCHAR(50) ', 'complement_model TEXT(200) ', 'max_events INT(10) ', 'input VARCHAR(200) ', 'min_events INT(4) ', 'config VARCHAR(100) ', 'template_model VARCHAR(200) ', 'channel INT(4) ', 'metrichor_version VARCHAR(10) ', 'metrichor_time_stamp VARCHAR(20) ', 'abasic_event_index INT(1)', 'abasic_found INT(1)', 'abasic_peak_height FLOAT(25,17)', 'duration INT(15)', 'sampling_rate FLOAT(25,17)', 'hairpin_event_index INT(10)', 'hairpin_found INT(1)', 'hairpin_peak_height FLOAT(25,17)', 'hairpin_polyt_level FLOAT(25,17)', 'median_before FLOAT(25,17)', 'read_name VARCHAR(37)', 'read_number int(10)', 'scaling_used int(5)', 'start_mux int(1)', 'start_time int(20)', 'end_mux INT(1) DEFAULT NULL', 'exp_start_time INT(15) NOT NULL', '1minwin INT, INDEX(1minwin)', '5minwin INT, INDEX(5minwin)', '10minwin INT, INDEX(10minwin)', '15minwin INT, INDEX(15minwin)', 's1minwin INT, INDEX(s1minwin)', 's5minwin INT, INDEX(s5minwin)', 's10minwin INT, INDEX(s10minwin)', 's15minwin INT, INDEX(s15minwin)', 'g_1minwin INT, INDEX(g_1minwin)', 'g_5minwin INT, INDEX(g_5minwin)', 'g_10minwin INT, INDEX(g_10minwin)', 'g_15minwin INT, INDEX(g_15minwin)', 'g_s1minwin INT, INDEX(g_s1minwin)', 'g_s5minwin INT, INDEX(g_s5minwin)', 'g_s10minwin INT, INDEX(g_s10minwin)', 'g_s15minwin INT, INDEX(g_s15minwin)', 'align INT DEFAULT 0, INDEX(align)', 'pass INT(1) ', ) colheaders = ','.join(fields) sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \ % (tablename, colheaders) # print sql args, db, cursor = cursor_execute(args, db, cursor, sql)