Ejemplo n.º 1
0
def create_5_3_prime_align_tables(align_table_in, args, db, cursor):

    three_prime_table = align_table_in + '_3prime'
    five_prime_table = align_table_in + '_5prime'
    fields = (  # index
        # index
        # index
        # index
        # index
        # index
        # index for combined queries
        'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)',
        'basename_id INT(7) NOT NULL, INDEX (basename_id)',
        'refid INT(3) DEFAULT NULL, INDEX (refid)',
        'alignnum INT(4) DEFAULT NULL, INDEX (alignnum)',
        'covcount INT(4) DEFAULT NULL, INDEX (covcount)',
        'alignstrand VARCHAR(1), INDEX (alignstrand)',
        'score INT(4), INDEX (score)',
        'seqpos INT(6) DEFAULT NULL, INDEX (seqpos)',
        'refpos INT(6) DEFAULT NULL, INDEX (refpos)',
        'seqbase VARCHAR(1) DEFAULT NULL, INDEX (seqbase)',
        'refbase VARCHAR(1) DEFAULT NULL, INDEX (refbase)',
        'seqbasequal INT(2) DEFAULT NULL, INDEX (seqbasequal)',
        'cigarclass VARCHAR(1) DEFAULT NULL, INDEX (cigarclass)',
        'index combindex (refid,refpos,cigarclass)',
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (three_prime_table,
                                                  colheaders)
    args, db, cursor = cursor_execute(args, db, cursor, sql)
    sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (five_prime_table, colheaders)
    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 2
0
def create_align_table_sam(tablename, args, db, cursor):
    fields = (  # --------- now add extra columns as needed
        'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)',
        'basename_id INT(7) NOT NULL, INDEX (basename_id)',
        'qname VARCHAR(150)',
        'flag INT(4) ',
        'rname VARCHAR(150)',
        'pos INT(7)',
        'mapq INT(3)',
        'cigar TEXT',
        'rnext VARCHAR(100)',
        'pnext INT(4)',
        'tlen INT(4)',
        'seq LONGTEXT',
        'qual LONGTEXT',
        'n_m VARCHAR(10)',
        'm_d TEXT',
        'a_s VARCHAR(10)',
        'x_s VARCHAR(10)',
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 3
0
def create_caller_table_noindex(tablename, args, db, cursor):
    fields = (
        'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)',
        'basename_id INT(7) NOT NULL',
        'mean FLOAT(25,17) NOT NULL',
        'start FLOAT(25,17) NOT NULL',
        'stdv FLOAT(25,17) NOT NULL',
        'length FLOAT(25,17) NOT NULL',
        'model_state VARCHAR(10) NOT NULL',
        'model_level FLOAT(25,17) NOT NULL',
        'move INT(64) NOT NULL',
        'p_model_state FLOAT(25,17) NOT NULL',
        'mp_state VARCHAR(10) NOT NULL',
        'p_mp_state FLOAT(25,17) NOT NULL',
        'p_A FLOAT(25,17) NOT NULL',
        'p_C FLOAT(25,17) NOT NULL',
        'p_G FLOAT(25,17) NOT NULL',
        'p_T FLOAT(25,17) NOT NULL',
        'raw_index INT(64) NOT NULL',
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE %s (%s) ENGINE=MyISAM' % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 4
0
def mysql_load_from_hashes(
    args,
    db,
    cursor,
    tablename,
    data_hash,
):
    """ Load hash data into database """
    cols = list()
    vals = list()
    for (_, entry) in data_hash.iteritems():
        if isinstance(entry, basestring):
            vals.append("'%s'" % entry)
        else:
            vals.append(str(entry))
    cols = ','.join(data_hash.keys())
    values = ','.join(vals)
    sql = 'INSERT INTO %s (%s) VALUES (%s) ' % (tablename, cols, values)

    if args.verbose == "high":
        print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
    ids = cursor.lastrowid
    return ids
Ejemplo n.º 5
0
def upload_model_data(
    tablename,
    model_name,
    model_location,
    hdf,
    args,
    db,
    cursor,
):
    table = hdf[model_location][()]
    sqlarray = list()
    for r in table:
        i = list(r)
        if len(i) == 6:
            i.insert(1, 0)
        eventdeats = "('" + str(model_name) + "'"
        for j in i:
            if isinstance(j, (int, long, float, complex)):
                eventdeats += ',' + str(j)
            else:
                eventdeats += ",'" + str(j) + "'"
        eventdeats += ')'
        sqlarray.append(eventdeats)
    stringvals = ','.join(sqlarray)
    sql = \
        'INSERT INTO %s (model,kmer,variant,level_mean,level_stdv,sd_mean,sd_stdv,weight) VALUES %s;' \
        % (tablename, stringvals)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
    db.commit()
Ejemplo n.º 6
0
def create_mincontrol_barcode_control_table(tablename, args, db, cursor):
    fields = \
        ('job_index INT NOT NULL AUTO_INCREMENT, PRIMARY KEY (job_index)'
         , 'barcodeid MEDIUMTEXT NOT NULL', 'complete INT NOT NULL')
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 7
0
def create_model_list_table(tablename, args, db, cursor):
    fields = ('basename_id INT(7), PRIMARY KEY(basename_id)',
              'template_model VARCHAR(200), INDEX (template_model)',
              'complement_model VARCHAR(200), INDEX (complement_model)'
              )  # 'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)',
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 8
0
def create_2d_alignment_table(tablename, args, db, cursor):
    fields = ('ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)',
              'basename_id INT(7) NOT NULL, INDEX (basename_id)',
              'template INT(5) NOT NULL', 'complement INT(5) NOT NULL',
              'kmer VARCHAR(10) NOT NULL')
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 9
0
def modify_gru(args, db, cursor):

    # -------- This bit adds columns to Gru.minIONruns ####
    # # Add column 'mt_ctrl_flag' to Gru.minIONruns table if it doesn't exist

    sql = \
        'SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA="Gru" AND TABLE_NAME="minIONruns" AND column_name="mt_ctrl_flag" '

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
    if cursor.rowcount == 0:

        # print "adding mt_ctrl_flag to Gru.minIONruns"

        sql = \
            'ALTER TABLE Gru.minIONruns ADD mt_ctrl_flag INT(1) DEFAULT 0'

        # print sql

        args, db, cursor = cursor_execute(args, db, cursor, sql)
        db.commit()

    # # Add column 'watch_dir' to Gru.minIONruns table if it doesn't exist

    sql = \
        'SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA="Gru" AND TABLE_NAME="minIONruns" AND column_name="watch_dir" '

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
    if cursor.rowcount == 0:

        # print "adding 'watch_dir' to Gru.minIONruns"

        sql = 'ALTER TABLE Gru.minIONruns ADD watch_dir TEXT(200)'

        # print sql

        args, db, cursor = cursor_execute(args, db, cursor, sql)
        db.commit()

    # # Add column 'host_ip' to Gru.minIONruns table if it doesn't exist

    sql = \
        'SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA="Gru" AND TABLE_NAME="minIONruns" AND column_name="host_ip" '

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
    if cursor.rowcount == 0:

        # print "adding mt_ctrl_flag to Gru.minIONruns"

        sql = 'ALTER TABLE Gru.minIONruns ADD host_ip TEXT(16)'

        # print sql

        args, db, cursor = cursor_execute(args, db, cursor, sql)
        db.commit()
Ejemplo n.º 10
0
def create_xml_table(tablename, args, db, cursor):
    fields = \
        ('xmlindex INT(11) NOT NULL AUTO_INCREMENT, PRIMARY KEY(xmlindex)'
         , 'type VARCHAR(20) NOT NULL',
         'primary_id VARCHAR(30) NOT NULL',
         'filename VARCHAR(30) NOT NULL', 'xml TEXT DEFAULT NULL')
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \
        % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 11
0
def create_reference_table(tablename, args, db, cursor):
    fields = ('refid INT(3) NOT NULL AUTO_INCREMENT, PRIMARY KEY(refid)',
              'refname VARCHAR(50), UNIQUE INDEX (refname)',
              'reflen INT(7), INDEX (reflen)',
              'reffile VARCHAR(100), INDEX (reffile)',
              'ref_total_len VARCHAR(100), INDEX (ref_total_len)'
              )  # PLSP57501_17062014lambda_3216_1_ch101_file10_strand
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \
        % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 12
0
def create_mincontrol_messages_table(tablename, args, db, cursor):
    fields = (
        'message_index INT NOT NULL AUTO_INCREMENT, PRIMARY KEY (message_index)',
        'message MEDIUMTEXT NOT NULL',
        'target MEDIUMTEXT NOT NULL',
        'param1 MEDIUMTEXT',
        'param2 MEDIUMTEXT',
        'complete INT NOT NULL',
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 13
0
def create_ref_kmer_table(tablename, args, db, cursor):
    fields = (
        'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)',
        'kmer VARCHAR(10) NOT NULL, INDEX (kmer)',
        'refid INT(3) NOT NULL, INDEX (refid)',
        'count INT(7) NOT NULL',
        'total INT(7) NOT NULL',
        'freq float(13,10) NOT NULL',
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 14
0
def create_comment_table_if_not_exists(tablename, args, db, cursor):
    fields = (
        'comment_id INT(11) NOT NULL AUTO_INCREMENT, PRIMARY KEY(comment_id)',
        'runindex INT(11) NOT NULL',
        'runname TEXT NOT NULL',
        'user_name TEXT NOT NULL',
        'date DATETIME NOT NULL',
        'comment TEXT NOT NULL',
        'name TEXT NOT NULL',
    )
    colheaders = ','.join(fields)
    sql = \
        'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB DEFAULT CHARSET=utf8' \
        % (tablename, colheaders)
    filterwarnings('ignore', "Table 'comments' already exists")
    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 15
0
def create_trackingid_table(tablename, args, db, cursor):
    fields = (  # PLSP57501_17062014lambda_3216_1_ch101_file10_strand
        # PLSP57501_17062014lambda_3216_1_ch101_file10_strand
        #  = 48133
        #  = 48133
        # = 38.4
        # = MN02935
        # = sequencing_run
        # =./python/recipes/MAP_48Hr_Sequencing_Run_SQK_MAP006.py
        # = 1403015537
        #
        # = 35.625
        # = 9be694a4d40804eb6ea5761774723318ae3b3346
        # = 0.45.1.6 b201406111512
        'basename_id INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(basename_id)',
        'basename VARCHAR(150) NOT NULL',
        'asic_id BIGINT(15) NOT NULL',
        'asic_id_17 BIGINT(15) ',
        'asic_id_eeprom INT(5) ',
        'asic_temp DOUBLE(4,1) NOT NULL',
        'device_id TEXT(8) NOT NULL',
        'exp_script_purpose VARCHAR(50) NOT NULL',
        'exp_script_name VARCHAR(150)',
        'exp_start_time INT(15) NOT NULL',
        'flow_cell_id VARCHAR(20) NOT NULL',
        'heatsink_temp FLOAT(10) NOT NULL',
        'hostname TEXT',
        'run_id TEXT(40) NOT NULL',
        'version_name VARCHAR(30) ',
        'file_path TEXT(300) NOT NULL',
        'channel_number int(7)',
        'digitisation float',
        'offset float',
        'range_val float',
        'sampling_rate float',
        'pass INT(1) NOT NULL',
        'md5sum TEXT(33) NOT NULL',
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \
        % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 16
0
def create_model_data_table(tablename, args, db, cursor):
    fields = (
        'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)',
        'model VARCHAR(200) NOT NULL, INDEX (model)',
        'kmer VARCHAR(10) NOT NULL, INDEX (kmer)',
        'variant INT(10) NOT NULL',
        'level_mean FLOAT(25,17) NOT NULL',
        'level_stdv FLOAT(25,17) NOT NULL',
        'sd_mean FLOAT(25,17) NOT NULL',
        'sd_stdv FLOAT(25,17) NOT NULL',
        'weight FLOAT(25,17) NOT NULL',
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 17
0
def load_ref_kmer_hash(args, db, cursor, tablename, kmers, refid):
    sql = "INSERT INTO %s (kmer, refid, count, total, freq) VALUES " % (
        tablename)
    totalkmercount = sum(kmers.itervalues())
    for kmer, count in kmers.iteritems():
        #n+=1
        f = 1 / (totalkmercount * float(count))
        freq = "{:.10f}".format(f)
        #print f, freq, totalkmercount, count
        sql += "('%s',%s,%s,%s,%s)," % (kmer, refid, count, totalkmercount,
                                        freq)
    sql = sql[:-1]
    #print sql
    if args.verbose == "high":
        print sql
        debug()
    args, db, cursor = cursor_execute(args, db, cursor, sql)
    db.commit()
Ejemplo n.º 18
0
def create_basic_read_info(tablename, args, db, cursor):
    fields = (
        'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)',
        'basename_id INT(7) NOT NULL, INDEX (basename_id)',
        'abasic_event_index INT(1) NOT NULL',
        'abasic_found INT(1) NOT NULL',
        'abasic_peak_height FLOAT(25,17)',
        'duration INT(15)',
        'hairpin_event_index INT(10)',
        'hairpin_found INT(1)',
        'hairpin_peak_height FLOAT(25,17)',
        'hairpin_polyt_level FLOAT(25,17)',
        'median_before FLOAT(25,17)',
        'read_id VARCHAR(37)',
        'read_number int(10)',
        'scaling_used int(5)',
        'start_mux int(1)',
        'start_time int(20)',
        'sampling_rate float',
        '1minwin INT, INDEX(1minwin)',
        '5minwin INT, INDEX(5minwin)',
        '10minwin INT, INDEX(10minwin)',
        '15minwin INT, INDEX(15minwin)',
        's1minwin INT, INDEX(s1minwin)',
        's5minwin INT, INDEX(s5minwin)',
        's10minwin INT, INDEX(s10minwin)',
        's15minwin INT, INDEX(s15minwin)',
        'g_1minwin INT, INDEX(g_1minwin)',
        'g_5minwin INT, INDEX(g_5minwin)',
        'g_10minwin INT, INDEX(g_10minwin)',
        'g_15minwin INT, INDEX(g_15minwin)',
        'g_s1minwin INT, INDEX(g_s1minwin)',
        'g_s5minwin INT, INDEX(g_s5minwin)',
        'g_s10minwin INT, INDEX(g_s10minwin)',
        'g_s15minwin INT, INDEX(g_s15minwin)',
        'align INT DEFAULT 0,  INDEX(align)',
        'pass INT(1) NOT NULL',
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 19
0
def create_barcode_table(tablename, args, db, cursor):
    fields = (
        'basename_id INT(10), PRIMARY KEY(basename_id)',
        'pos0_start INT(5) ',
        'score INT(6) ',
        'design VARCHAR(10) ',
        'pos1_end INT(5) ',
        'pos0_end INT(5) ',
        'pos1_start INT(5) ',
        'variant VARCHAR(8) ',
        'barcode_arrangement VARCHAR(12) ',
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \
        % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 20
0
def create_pre_align_table(tablename, args, db, cursor):
    fields = (  # index
        'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)',
        'basename_id INT(7), INDEX (basename_id)',
        'refid INT(3) DEFAULT NULL, INDEX (refid)',
        'alignstrand VARCHAR(1) DEFAULT NULL, INDEX (alignstrand)',
        'r_start INT(7) DEFAULT NULL',
        'q_start INT(5) DEFAULT NULL',
        'r_align_len INT(7) DEFAULT NULL',
        'q_align_len INT(5) DEFAULT NULL',
    )

    # 'index combindex (refid,refpos,cigarclass)') # index for combined queries

    colheaders = ','.join(fields)
    sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 21
0
def upload_2dalignment_data(
    basenameid,
    channel,
    alignment,
    args,
    db,
    cursor,
):
    cursor = db.cursor()
    sqlarray = list()
    for i in alignment:
        val_str = "(%d,%d,%d,'%s')" % (basenameid, i[0], i[1], i[2])
        sqlarray.append(val_str)
    stringvals = ','.join(sqlarray)
    sql = \
        'INSERT INTO caller_basecalled_2d_alignment_%s (basename_id,template,complement,kmer) VALUES %s;' \
        % (channel, stringvals)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
    db.commit()
Ejemplo n.º 22
0
            bcs = list()
            for i in range(len(barcode_align_obj)):
                if barcode_align_obj[i].startswith('>'):
                    bc = re.split('>| ', barcode_align_obj[i])[-1]
                    b = "('%s',0)" % bc
                    bcs.append(b)
            sql = \
                'INSERT INTO barcode_control (barcodeid,complete) VALUES %s' \
                % ','.join(bcs)

            # print sql
            if args.verbose == "high":
                print sql
                debug()

            args, db, cursor = cursor_execute(args, db, cursor, sql)
            db.commit()
            dbcheckhash['barcode_info'][dbname] = True

    return dbname


# ---------------------------------------------------------------------------


def get_hdf_attribute(args, hdf, group, attr):
    try:
        hdf5object = hdf[group]
        value = hdf5object.attrs[attr]
    except:
        value = -1
Ejemplo n.º 23
0
def create_basecall_summary_info(tablename, args, db, cursor):
    fields = (  # = 1403015537
        'ID INT(10) NOT NULL AUTO_INCREMENT, PRIMARY KEY(ID)',
        'basename_id INT(7) NOT NULL, INDEX (basename_id)',
        'abasic_dur float',
        'abasic_index int',
        'abasic_peak float',
        'duration_comp float',
        'duration_temp float',
        'end_index_comp int',
        'end_index_temp int',
        'hairpin_abasics int',
        'hairpin_dur float',
        'hairpin_events int',
        'hairpin_peak float',
        'median_level_comp float',
        'median_level_temp float',
        'median_sd_comp float',
        'median_sd_temp float',
        'num_comp int',
        'num_events int',
        'num_temp int',
        'pt_level float',
        'range_comp float',
        'range_temp float',
        'split_index int',
        'start_index_comp int',
        'start_index_temp int',
        'driftC float ',
        'mean_qscoreC float',
        'num_skipsC int',
        'num_staysC int',
        'scaleC float',
        'scale_sdC float',
        'sequence_lengthC int',
        'shiftC float',
        'strand_scoreC float',
        'varC float',
        'var_sdC float',
        'driftT float ',
        'mean_qscoreT float',
        'num_skipsT int',
        'num_staysT int',
        'scaleT float',
        'scale_sdT float',
        'sequence_lengthT int',
        'shiftT float',
        'strand_scoreT float',
        'varT float',
        'var_sdT float',
        'mean_qscore2 float',
        'sequence_length2 int',
        'exp_start_time INT(15) NOT NULL',
        'sampling_rate float',
        '1minwin INT, INDEX(1minwin)',
        '5minwin INT, INDEX(5minwin)',
        '10minwin INT, INDEX(10minwin)',
        '15minwin INT, INDEX(15minwin)',
        's1minwin INT, INDEX(s1minwin)',
        's5minwin INT, INDEX(s5minwin)',
        's10minwin INT, INDEX(s10minwin)',
        's15minwin INT, INDEX(s15minwin)',
        'g_1minwin INT, INDEX(g_1minwin)',
        'g_5minwin INT, INDEX(g_5minwin)',
        'g_10minwin INT, INDEX(g_10minwin)',
        'g_15minwin INT, INDEX(g_15minwin)',
        'g_s1minwin INT, INDEX(g_s1minwin)',
        'g_s5minwin INT, INDEX(g_s5minwin)',
        'g_s10minwin INT, INDEX(g_s10minwin)',
        'g_s15minwin INT, INDEX(g_s15minwin)',
        'align INT DEFAULT 0,  INDEX(align)',
        'pass INT(1) NOT NULL',
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE %s (%s) ENGINE=InnoDB' % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 24
0
def check_read(db, args, connection_pool, minup_version, comments,
               xml_file_dict, ref_fasta_hash, dbcheckhash, logfolder, filepath,
               hdf, cursor, oper, ip):

    global runindex

    if args.verbose == "high":
        print "Checking read ..."
        sys.stdout.flush()

    filename = os.path.basename(filepath)

    if args.verbose == "high":
        print time.strftime('%Y-%m-%d %H:%M:%S'), 'processing:', filename
        sys.stdout.flush()

    # Remove "_sequencing_run_" and/or "_mux_scan_" from filename_
    # as used to ake dbname ...

    filename_ = filename
    filename_ = filename_.replace("_sequencing_run_", '_')
    filename_ = filename_.replace("_mux_scan_", '_')

    parts = filename_.split('_')
    strSep = '_'

    # Changing the number below enables the removal of the random four digit number from run names on restart

    dbname = strSep.join(parts[0:len(parts) - 5])
    dbname = re.sub('[.!,; ]', '', dbname)

    if len(args.custom_name) > 0:
        dbname = args.minotourusername + '_' + args.custom_name + '_' \
         + dbname
    else:
        dbname = args.minotourusername + '_' + dbname
    if len(dbname) > 64:
        dbname = dbname[:64]
    if dbname.endswith('_'):  #ml
        dbname = dbname[:-1]  #ml

    if args.verbose == "high":
        print "dbname is ", dbname
        print "Parts were ", parts
        debug()

    # ---------------------------------------------------------------------------

    if dbname in dbcheckhash[
            'dbname']:  # so data from this run has been seen before in this instance of minup so switch to it!
        if dbcheckhash['dbname'][dbname] is False:
            if args.verbose == "high":
                print 'switching to database: ', dbname
                sys.stdout.flush()
            sql = 'USE %s' % dbname
            args, db, cursor = cursor_execute(args, db, cursor, sql)

            # ---------------------------------------------------------------------------

            try:
                runindex = dbcheckhash['runindex'][dbname]  # MS ..
            except:
                print "checkRead(): line 112, dbcheckhash, key error: " \
                                + dbname
                sys.stdout.flush()
                #sys.exit()
                return ()

            comment_string = 'minUp switched runname'
            start_time = time.strftime('%Y-%m-%d %H:%M:%S')
            sql = \
                "INSERT INTO Gru.comments (runindex,runname,user_name,comment,name,date) VALUES (%s,'%s','%s','%s','%s','%s')" \
                % (
                runindex,
                dbname,
                args.minotourusername,
                comment_string,
                args.minotourusername,
                start_time,
                )

            if args.verbose == "high":
                print sql
                debug()

            db.escape_string(sql)
            args, db, cursor = cursor_execute(args, db, cursor, sql)
            db.commit()

            # ---------------------------------------------------------------------------

            ks = dbcheckhash['dbname'].keys()
            n = len(ks)
            bar = mk_bar(n)
            bar.start()
            for i, e in enumerate(ks):
                bar.update(i)
                dbcheckhash['dbname'][e] = False
            bar.finish()
            dbcheckhash['dbname'][dbname] = True

    # ---------------------------------------------------------------------------

    if dbname not in dbcheckhash[
            'dbname']:  # # so the db has not been seen before.. time to set up lots of things...
        dbcheckhash['barcoded'][dbname] = False
        dbcheckhash['barcode_info'][dbname] = False
        dbcheckhash['logfile'][dbname] = os.path.join(os.path.sep, logfolder,
                                                      dbname + '.minup.log')
        if args.verbose == "high":
            print 'trying database: ', dbname
            sys.stdout.flush()
        sql = "SHOW DATABASES LIKE \'%s\'" % dbname

        # print sql

        args, db, cursor = cursor_execute(args, db, cursor, sql)
        if cursor.fetchone():
            if args.verbose == "high":
                print 'database exists!'
                sys.stdout.flush()

            # # drop the existing database, if selected

            if args.drop_db is True:
                sql = 'DROP DATABASE %s' % dbname

                # print sql

                args, db, cursor = cursor_execute(args, db, cursor, sql)
                db.commit()
                if args.verbose == "high":
                    print 'database dropped.'
                    sys.stdout.flush()
            else:
                print >> sys.stderr, "=" * 80
                print >> sys.stderr, \
                    'WARNING: DATABASE \"%s\" already EXISTS.\nTo write over the data re-run the minUP command with option -d' % dbname
                print >> sys.stderr, "=" * 80
                sys.stdout.flush()
                if args.batch_fasta == False:

                    # MS next 6 lines ...

                    print >> sys.stderr, \
                        'not in batch mode so exiting ...'
                    sys.stdout.flush()
                    terminate_minup(args, dbcheckhash, oper, minup_version)

            #terminate_minup(args, dbcheckhash, oper, minup_version)
            #sys.exit()

        if args.drop_db is True:
            print 'Deleting exisiting run from Gru now ...'
            sys.stdout.flush()
            sql = \
                'DELETE FROM Gru.userrun WHERE runindex IN (SELECT runindex FROM Gru.minIONruns WHERE runname = "%s")' \
                % dbname

            # print sql

            args, db, cursor = cursor_execute(args, db, cursor, sql)
            db.commit()
            sql = "DELETE FROM Gru.minIONruns WHERE runname = \'%s\'" \
                % dbname

            # print sql

            args, db, cursor = cursor_execute(args, db, cursor, sql)
            db.commit()
            print '.... Run deleted.'
            sys.stdout.flush()

        # ---------------------------------------------------------------------------
        # -------- This bit adds columns to Gru.minIONruns --------

        modify_gru(args, db, cursor)

        # ---------------------------------------------------------------------------

        # -------- Create a new empty database

        #if args.verbose == "high":
        print 'Making new database: ', dbname
        print '=' * 80
        sys.stdout.flush()
        dbF = open('dbname.txt', 'w')
        dbF.write(dbname + "\n")
        dbF.close()

        sql = 'CREATE DATABASE %s' % dbname
        #print sql
        args, db, cursor = cursor_execute(args, db, cursor, sql)
        sql = 'USE %s' % dbname
        args, db, cursor = cursor_execute(args, db, cursor, sql)

        # Create Tables ....
        create_general_table('config_general', args, db, cursor)
        create_trackingid_table('tracking_id', args, db, cursor)
        create_basecall_summary_info('basecall_summary', args, db, cursor)
        create_events_model_fastq_table('basecalled_template', args, db,
                                        cursor)
        create_events_model_fastq_table('basecalled_complement', args, db,
                                        cursor)
        create_basecalled2d_fastq_table('basecalled_2d', args, db, cursor)

        if args.pin is not False:
            create_mincontrol_interaction_table('interaction', args, db,
                                                cursor)
            create_mincontrol_messages_table('messages', args, db, cursor)
            create_mincontrol_barcode_control_table('barcode_control', args,
                                                    db, cursor)
            #print "DONE"

        # ---------------------------------------------------------------------------
        '''
        # DEPRECATIN TELEM MS 11.10.16
        if args.telem is True:
            for i in xrange(0, 10):
                temptable = 'caller_basecalled_template_%d' % i
                comptable = 'caller_basecalled_complement_%d' % i
                twod_aligntable = 'caller_basecalled_2d_alignment_%d' \
                    % i
                create_caller_table_noindex(temptable, args, db, cursor)
                create_caller_table_noindex(comptable, args, db, cursor)
                create_2d_alignment_table(twod_aligntable, args, db, cursor)
            create_model_list_table('model_list', args, db, cursor)
            create_model_data_table('model_data', args, db, cursor)
        '''

        # ---------------------------------------------------------------------------
        if args.preproc is True:
            create_pretrackingid_table('pre_tracking_id', args, db,
                                       cursor)  # make another table
            create_pre_general_table('pre_config_general', args, db,
                                     cursor)  # pre config general table

        # -------- Assign the correct reference fasta for this dbname if applicable

        if args.batch_fasta is not False:
            for refbasename in ref_fasta_hash.keys():
                common_path = \
                    os.path.commonprefix((ref_fasta_hash[refbasename]['path'
                        ], filepath)).rstrip('\\|\/|re|\\re|\/re')
                if common_path.endswith('downloads'):
                    ref_fasta_hash[dbname] = ref_fasta_hash[refbasename]

                    # del ref_fasta_hash[refbasename]

        if args.ref_fasta is not False:
            for refbasename in ref_fasta_hash.keys(
            ):  # there should only be one key
                ref_fasta_hash[dbname] = ref_fasta_hash[refbasename]

        # ---------------------------------------------------------------------------

        if dbname in ref_fasta_hash:  # great, we assigned the reference fasta to this dbname
            create_reference_table('reference_seq_info', args, db, cursor)
            create_5_3_prime_align_tables('last_align_basecalled_template',
                                          args, db, cursor)
            create_5_3_prime_align_tables('last_align_basecalled_complement',
                                          args, db, cursor)
            create_5_3_prime_align_tables('last_align_basecalled_2d', args, db,
                                          cursor)

            if args.last_align is True:

                # create_align_table('last_align_basecalled_template', args, db, cursor)
                # create_align_table('last_align_basecalled_complement', args, db, cursor)
                # create_align_table('last_align_basecalled_2d', args, db, cursor)

                create_align_table_maf('last_align_maf_basecalled_template',
                                       args, db, cursor)
                create_align_table_maf('last_align_maf_basecalled_complement',
                                       args, db, cursor)
                create_align_table_maf('last_align_maf_basecalled_2d', args,
                                       db, cursor)

            if args.bwa_align is True:
                create_align_table_sam('align_sam_basecalled_template', args,
                                       db, cursor)
                create_align_table_sam('align_sam_basecalled_complement', args,
                                       db, cursor)
                create_align_table_sam('align_sam_basecalled_2d', args, db,
                                       cursor)

            # dbcheckhash["mafoutdict"][dbname]=open(dbname+"."+process+".align.maf","w")
            '''
            # DEPRECATIN TELEM MS 11.10.16
            if args.telem is True:
                create_ref_kmer_table('ref_sequence_kmer', args, db, cursor)
            '''

            if args.prealign is True:
                create_pre_align_table('pre_align_template', args, db, cursor)
                create_pre_align_table('pre_align_complement', args, db,
                                       cursor)
                create_pre_align_table('pre_align_2d', args, db, cursor)
                create_align_table_raw('last_align_raw_template', args, db,
                                       cursor)
                create_align_table_raw('last_align_raw_complement', args, db,
                                       cursor)
                create_align_table_raw('last_align_raw_2d', args, db, cursor)

            for refname in ref_fasta_hash[dbname]['seq_len'].iterkeys():

                # print "refname", refname

                reference = ref_fasta_hash[dbname]['seq_file'][refname]
                reflen = ref_fasta_hash[dbname]['seq_len'][refname]
                reflength = ref_fasta_hash[dbname]['seq_file_len'][reference]
                refid = mysql_load_from_hashes(
                    args, db, cursor, 'reference_seq_info', {
                        'refname': refname,
                        'reflen': reflen,
                        'reffile': reference,
                        'ref_total_len': reflength,
                    })
                ref_fasta_hash[dbname]['refid'][refname] = refid
                '''
                # DEPRECATIN TELEM MS 11.10.16
                if args.telem is True:
                    kmers = ref_fasta_hash[dbname]['kmer'][refname]
                    load_ref_kmer_hash(args, db, 'ref_sequence_kmer', kmers,
                            refid, args, db, cursor)
                '''

        # ---------------------------------------------------------------------------
        # -------- See if theres any ENA XML stuff to add.
        # -------- Need to do this now as it changes the "comment"
        # -------- in Gru.minionRuns entry
        # print "C", comment

        ena_flowcell_owner = None
        for xml_to_downloads_path in xml_file_dict.keys():

            # xmlpath=xml_file_dict["study"][study_id]["path"]

            common_path = os.path.commonprefix(
                (xml_to_downloads_path, filepath)).rstrip('\\|\/|re')
            if common_path.endswith('downloads'):
                print 'found XML data for:', dbname
                sys.stdout.flush()
                create_xml_table('XML', args, db, cursor)

                # ---------------------------------------------------------------------------
                downloadsPath = xml_file_dict[xml_to_downloads_path]

                for study_id in \
                    downloadsPath['study'].keys():
                    ena_flowcell_owner = study_id
                    study_xml = \
                        downloadsPath['study'][study_id]['xml']
                    study_file = \
                        downloadsPath['study'][study_id]['file']
                    study_title = \
                        downloadsPath['study'][study_id]['title']
                    study_abstract = \
                        downloadsPath['study'][study_id]['abstract']
                    exp_c = 'NA'
                    samp_c = 'NA'
                    run_c = 'NA'
                    mysql_load_from_hashes(
                        args, db, cursor, 'XML', {
                            'type': 'study',
                            'primary_id': study_id,
                            'filename': study_file,
                            'xml': study_xml,
                        })
                    for exp_id in \
                        downloadsPath['experiment'].keys():
                        if study_id \
                            == downloadsPath['experiment'][exp_id]['study_id']:
                            exp_c = exp_id
                            exp_xml = \
                                downloadsPath['experiment'][exp_id]['xml']
                            exp_file = \
                                downloadsPath['experiment'][exp_id]['file']
                            sample_id = \
                                downloadsPath['experiment'][exp_id]['sample_id']
                            mysql_load_from_hashes(
                                args, db, cursor, 'XML', {
                                    'type': 'experiment',
                                    'primary_id': exp_id,
                                    'filename': exp_file,
                                    'xml': exp_xml,
                                })

                            if sample_id \
                                in downloadsPath['sample'
                                    ]:
                                samp_c = sample_id
                                sample_xml = \
                                    downloadsPath['sample'][sample_id]['xml']
                                sample_file = \
                                    downloadsPath['sample'][sample_id]['file']
                                mysql_load_from_hashes(
                                    args, db, cursor, 'XML', {
                                        'type': 'sample',
                                        'primary_id': sample_id,
                                        'filename': sample_file,
                                        'xml': sample_xml,
                                    })

                            for run_id in \
                                downloadsPath['run'].keys():
                                if exp_id \
                                    == downloadsPath['run'][run_id]['exp_id']:
                                    run_c = run_id
                                    run_xml = \
                                        downloadsPath['run'][run_id]['xml']
                                    run_file = \
                                        downloadsPath['run'][run_id]['file']
                                    mysql_load_from_hashes(
                                        args, db, cursor, 'XML', {
                                            'type': 'run',
                                            'primary_id': run_id,
                                            'filename': run_file,
                                            'xml': run_xml,
                                        })
                    comments[dbname] = \
                        'ENA data. Study:%s Title: %s Abstract: %s Experiment:%s Sample:%s Run:%s' \
                        % (
                        study_id,
                        study_title,
                        study_abstract,
                        exp_c,
                        samp_c,
                        run_c,
                        )

        # ---------------------------------------------------------------------------
        # --------- Make entries in the Gru database
        # try and get the right basecall-configuration general

        file_type = check_read_type(args, filepath, hdf)
        #print "FILETYPE is", file_type

        basecalltype = getBasecalltype(args, file_type)  # MS
        basecalldir = ''
        basecalldirconfig = ''
        basecallindexpos = ''  #ML
        '''
        try:
         if file_type == 2:
            basecalltype2="Basecall_2D"
            string2='' #ML
            for x in range (0,9):
                string2 = '/Analyses/Hairpin_Split_00%s/Configuration/general' % (x) #ML
                if (string2 in hdf):
                    basecallindexpos=x #ml
                    #print "BASECALLINDEXPOS",basecallindexpos
                    basecalldirconfig=string2 #ML

            string='/Analyses/%s_00%s/Configuration/general' % (basecalltype, basecallindexpos)
            #print string
            if (string in hdf):
            #    print "YES 1"
                basecalldir='/Analyses/%s_00%s/' % (basecalltype,basecallindexpos)
                #basecallindexpos=x #ml
                #break

            string='/Analyses/%s_00%s/Configuration/general' % (basecalltype2, basecallindexpos)
            #print string
            if (string2 in hdf):
                #print "YES 2"
                basecalldir='/Analyses/%s_00%s/' % (basecalltype2,basecallindexpos)
                #basecalldirconfig=string2 #ML
                #break
        except:
                print "checkReads(): error line 467."
                sys.exit()
        try:
          if file_type in [1,0]:
            basecalltype = 'Basecall_1D_CDNA'
            basecalltype2 = 'Basecall_2D'
            basecalldir = ''
            basecalldirconfig = ''
            basecallindexpos=''
        '''
        try:  # MS
            for x in range(0, 9):
                string = '/Analyses/%s_00%s/Configuration/general' \
                    % (basecalltype, x)
                if string in hdf:
                    basecalldir = '/Analyses/%s_00%s/' % (basecalltype, x)
                    basecalldirconfig = string
                    basecallindexpos = x
                    break
                '''
                string = '/Analyses/%s_00%s/Configuration/general' \
                    % (basecalltype2, x)
                if string in hdf:
                    basecalldir = '/Analyses/%s_00%s/' % (basecalltype, x)
                    basecalldirconfig = string
                    basecallindexpos=x
                    break
                '''

        # print "basecalldirconfig", basecalldirconfig
        # # get some data out of tacking_id and general
        except:
            print "checkReads(): error line 496."
            sys.stdout.flush()
            #sys.exit()

        #print basecalldirconfig
        #print basecalldir
        if len(basecalldirconfig) > 0:
            configdata = hdf[basecalldirconfig]
            if len(basecalldir) > 0:
                metrichor_info = hdf[basecalldir]

        # else:
        # ....configdata.attrs['workflow_name'] ="preanalysed"

        trackingid = hdf['/UniqueGlobalKey/tracking_id']
        print trackingid.attrs['exp_start_time']
        #print dateutil.parser.parse(trackingid.attrs['exp_start_time'])
        #print int(time.mktime(dateutil.parser.parse(trackingid.attrs['exp_start_time']).timetuple()))
        #print datetime.datetime.fromtimestamp(int(time.mktime(dateutil.parser.parse(trackingid.attrs['exp_start_time']).timetuple()))).strftime('%Y-%m-%d')

        expstarttimecode = \
            datetime.datetime.fromtimestamp(int(testtime(trackingid.attrs['exp_start_time'
                ]))).strftime('%Y-%m-%d')
        flowcellid = trackingid.attrs['device_id']

        if len(basecalldirconfig) > 0:
            basecalleralg = configdata.attrs['workflow_name']
        else:
            basecalleralg = 'preanalysed'
        if len(basecalldir) > 0:
            #version = metrichor_info.attrs['chimaera version']
            try:
                version = metrichor_info.attrs['chimaera version']  # MS
            except:
                version = metrichor_info.attrs['version']  # MS
        else:
            version = 'unknown'
        runnumber = args.run_num
        flowcellowner = 'NULL'
        username = args.minotourusername
        if args.flowcell_owner is not None:
            flowcellowner = args.flowcell_owner
        if ena_flowcell_owner is not None:
            flowcellowner = ena_flowcell_owner

        # # get info on the reference sequence, if used

        big_reference = 'NOREFERENCE'
        big_reflength = '0'
        if dbname in ref_fasta_hash:  # so there's some reference data for this dbname
            big_reference = ref_fasta_hash[dbname]['big_name']
            big_reflength = ref_fasta_hash[dbname]['big_len']

        # # make entries into Gru for this new database

        comment = comments['default']
        if dbname in comments:
            comment = comments[dbname]

        process = 'noalign'
        if args.last_align is True:
            process = 'LAST'
        if args.bwa_align is True:
            process = 'BWA'

        wdir = args.watchdir
        if wdir.endswith('\\'):  # remove trailing slash for windows.
            wdir = wdir[:-1]
        sql = \
            "INSERT INTO Gru.minIONruns (date,user_name,flowcellid,runname,activeflag,comment,FlowCellOwner,RunNumber,reference,reflength,basecalleralg,version,minup_version,process,mt_ctrl_flag,watch_dir,host_ip) VALUES ('%s','%s','%s','%s',%s,'%s','%s',%s,'%s',%s,'%s','%s','%s','%s',%s,'%s','%s')" \
            % (
            expstarttimecode,
            args.minotourusername,
            flowcellid,
            dbname,
            1,
            comment,
            flowcellowner,
            runnumber,
            big_reference,
            big_reflength,
            basecalleralg,
            version,
            minup_version,
            process,
            1,
            wdir,
            ip,
            )

        #print sql
        if args.verbose == "high":
            print sql
            debug()

        #if args.verbose == "high":
        print '... Database created.'
        sys.stdout.flush()

        db.escape_string(sql)
        args, db, cursor = cursor_execute(args, db, cursor, sql)
        db.commit()
        runindex = cursor.lastrowid
        dbcheckhash['runindex'][dbname] = runindex

        #print "Runindex:",runindex

        # # add us">> ", view_users

        if args.verbose == "high":
            print "Adding users..."
            sys.stdout.flush()

        view_users = [username]

        if args.view_users:
            extra_names = args.view_users.split(',')
            # view_users = args.view_users + extra_names # MS
            view_users = view_users + extra_names  # MS

        for user_name in view_users:
            sql = \
                "SELECT user_id FROM Gru.users WHERE user_name =\'%s\'" \
                % user_name

            # print sql

            args, db, cursor = cursor_execute(args, db, cursor, sql)
            if 0 < cursor.rowcount:
                sql = \
                    'INSERT INTO Gru.userrun (user_id, runindex) VALUES ((SELECT user_id FROM Gru.users WHERE user_name =\'%s\') , (SELECT runindex FROM Gru.minIONruns WHERE runname = "%s") )' \
                    % (user_name, dbname)

                if args.verbose == "high":
                    print sql
                    debug()
                # print sql

                args, db, cursor = cursor_execute(args, db, cursor, sql)
                db.commit()
            else:
                print 'The MinoTour username "%s" does not exist. Please create it or remove it from the input arguments' \
                    % user_name
                sys.stdout.flush()
                sys.exit()

        # # Create comment table if it doesn't exist

        create_comment_table_if_not_exists('Gru.comments', args, db, cursor)

        # # Add first comment to table

        start_time = time.strftime('%Y-%m-%d %H:%M:%S')
        comment_string = 'minUp version %s started' % minup_version
        mysql_load_from_hashes(
            args, db, cursor, 'Gru.comments', {
                'runindex': runindex,
                'runname': dbname,
                'user_name': args.minotourusername,
                'comment': comment_string,
                'name': args.dbusername,
                'date': start_time,
            })

        # ---------------------------------------------------------------------------
        # --------- make log file and initinal entry

        with open(dbcheckhash['logfile'][dbname], 'w') as logfilehandle:
            logfilehandle.write('minup started at:\t%s%s' %
                                (start_time, os.linesep))
            logfilehandle.write('minup version:\t%s%s' %
                                (minup_version, os.linesep))
            logfilehandle.write('options:' + os.linesep)
            logfilehandle.write('minotour db host:\t%s%s' %
                                (args.dbhost, os.linesep))
            logfilehandle.write('minotour db user:\t%s%s' %
                                (args.dbusername, os.linesep))
            logfilehandle.write('minotour username:\t%s%s' %
                                (args.minotourusername, os.linesep))
            logfilehandle.write('minotour viewer usernames:\t%s%s' %
                                (view_users, os.linesep))
            logfilehandle.write('flowcell owner:\t%s%s' %
                                (flowcellowner, os.linesep))
            logfilehandle.write('run number:\t%s%s' %
                                (args.run_num, os.linesep))
            logfilehandle.write('watch directory:\t%s%s' %
                                (args.watchdir, os.linesep))
            '''
            # DEPRECATIN TELEM MS 11.10.16
            logfilehandle.write('upload telemetry:\t%s%s'
                                % (args.telem, os.linesep))
            '''
            logfilehandle.write('Reference Sequences:' + os.linesep)
            if dbname in ref_fasta_hash:
                for refname in ref_fasta_hash[dbname]['seq_len'].iterkeys():
                    logfilehandle.write(
                        'Fasta:\t%s\tlength:\t%d%s' %
                        (ref_fasta_hash[dbname]['seq_file'][refname],
                         ref_fasta_hash[dbname]['seq_len'][refname],
                         os.linesep))
            else:
                logfilehandle.write('No reference sequence set' + os.linesep)

            logfilehandle.write('comment:\t%s%s' % (comment, os.linesep))
            logfilehandle.write('Errors:' + os.linesep)
            logfilehandle.close()

        #startMincontrol(args, dbname, cursor, dbcheckhash,\
        #             minup_version, oper)

        # # connection_pool for this db

        connection_pool[dbname] = list()
        '''
        # DEPRECATIN LAST TELEM MS 11.10.16
        if args.last_align is True \
            or args.bwa_align is True \
            or args.telem is True:
        '''
        if args.bwa_align is True:
            try:
                db_a = MySQLdb.connect(host=args.dbhost,
                                       user=args.dbusername,
                                       passwd=args.dbpass,
                                       port=args.dbport,
                                       db=dbname)
                connection_pool[dbname].append(db_a)
                db_b = MySQLdb.connect(host=args.dbhost,
                                       user=args.dbusername,
                                       passwd=args.dbpass,
                                       port=args.dbport,
                                       db=dbname)
                connection_pool[dbname].append(db_b)
                db_c = MySQLdb.connect(host=args.dbhost,
                                       user=args.dbusername,
                                       passwd=args.dbpass,
                                       port=args.dbport,
                                       db=dbname)
                connection_pool[dbname].append(db_c)
            except Exception, err:
                err_string = 'Error bwa_align: %s ' % err
                print >> sys.stderr, \
                    "Can't setup MySQL connection pool: %s" % err
                sys.stdout.flush()
                with open(dbcheckhash['logfile'][dbname], 'a') as \
                    logfilehandle:
                    logfilehandle.write(err_string + os.linesep)
                    logfilehandle.close()
                sys.stdout.flush()
                sys.exit()

        # --------- this bit last to set the active database in this hash

        if dbcheckhash['dbname']:
            for e in dbcheckhash['dbname'].keys():
                dbcheckhash['dbname'][e] = False
        dbcheckhash['dbname'][dbname] = True
Ejemplo n.º 25
0
def create_events_model_fastq_table(tablename, args, db, cursor):
    fields = (  # 'basename VARCHAR(300), PRIMARY KEY', # PLSP57501_17062014lambda_3216_1_ch101_file10_strand
        # PLSP57501_17062014lambda_3216_1_ch101_file10_strand.whatever
        # = 51.80799999999954
        # = 2347.2034000000003
        # = 1.0063618778594416
        # = 0.20855518951022478
        # = -0.10872176688437207
        # = 0.004143787533549812
        # = 0.9422581300419306
        # = 1.3286319210403454
        # = 1.0368718353240443
        # = 1403015537
        'basename_id INT(10) NOT NULL, PRIMARY KEY (basename_id)',
        'seqid VARCHAR(150) NOT NULL, UNIQUE INDEX (seqid)',
        'duration FLOAT(25,17)',
        'start_time FLOAT(25,17)',
        'scale FLOAT(25,17)',
        'shift FLOAT(25,17)',
        'gross_shift FLOAT(25,17) DEFAULT NULL',
        'drift FLOAT(25,17)',
        'scale_sd FLOAT(25,17)',
        'var_sd FLOAT(25,17)',
        'var FLOAT(25,17)',
        'seqlen INT NOT NULL',
        'sampling_rate float',
        '1minwin INT, INDEX(1minwin)',
        '5minwin INT, INDEX(5minwin)',
        '10minwin INT, INDEX(10minwin)',
        '15minwin INT, INDEX(15minwin)',
        's1minwin INT, INDEX(s1minwin)',
        's5minwin INT, INDEX(s5minwin)',
        's10minwin INT, INDEX(s10minwin)',
        's15minwin INT, INDEX(s15minwin)',
        'g_1minwin INT, INDEX(g_1minwin)',
        'g_5minwin INT, INDEX(g_5minwin)',
        'g_10minwin INT, INDEX(g_10minwin)',
        'g_15minwin INT, INDEX(g_15minwin)',
        'g_s1minwin INT, INDEX(g_s1minwin)',
        'g_s5minwin INT, INDEX(g_s5minwin)',
        'g_s10minwin INT, INDEX(g_s10minwin)',
        'g_s15minwin INT, INDEX(g_s15minwin)',
        'align INT DEFAULT 0,  INDEX(align)',
        'pass INT(1) NOT NULL',
        'exp_start_time INT(15) NOT NULL',
        'sequence MEDIUMTEXT DEFAULT NULL',
        'qual MEDIUMTEXT DEFAULT NULL',
        'index 1minalign (align,1minwin)',
        'index 5minalign (align,5minwin)',
        'index 10minalign (align,10minwin)',
        'index 15minalign (align,15minwin)',
        'index s1minalign (align,s1minwin)',
        'index s5minalign (align,s5minwin)',
        'index s10minalign (align,s10minwin)',
        'index s15minalign (align,s15minwin)',
        'index g_1minalign (align,g_1minwin)',
        'index g_5minalign (align,g_5minwin)',
        'index g_10minalign (align,g_10minwin)',
        'index g_15minalign (align,g_15minwin)',
        'index g_s1minalign (align,g_s1minwin)',
        'index g_s5minalign (align,g_s5minwin)',
        'index g_s10minalign (align,g_s10minwin)',
        'index g_s15minalign (align,g_s15minwin)',
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \
        % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)

    #### Create a tmp table to hold trigger data.
    fields = (
        '1minwin INT NOT NULL',
        #'exp_start_time INT NOT NULL',
        'bases INT',
        'maxlen INT',
        'minlen INT',
        'average_length INT',
        'readcount INT',
        'passcount INT default 0',
        'cumuduration INT default 0 ',
        'cumulength INT default 0',
        'distchan INT  default 0',
        'PRIMARY KEY (1minwin)',  #,exp_start_time)',
    )
    colheaders = ','.join(fields)
    tablename2 = tablename + "_1minwin_sum"
    sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' % (tablename2,
                                                                colheaders)
    #print sql
    args, db, cursor = cursor_execute(args, db, cursor, sql)

    ## Create a short running event
    eventname = tablename2 + "_event"
    sql = ' create event %s \
            on schedule every 15 second \
	    starts current_timestamp \
	    ends current_timestamp + interval 1 week\
            do \
            insert into \
            %s(1minwin,readcount,bases,maxlen,minlen,average_length,cumuduration,cumulength) \
            select g_1minwin as 1minwin,count(*) as readcount,sum(seqlen) as bases, max(seqlen) as maxlen,min(seqlen) as minlen , ROUND(AVG(seqlen)) as average_length, sum(duration) as cumuduration, sum(seqlen) as cumuseqlen FROM %s group by 1 order by 1 desc limit 30 \
            ON DUPLICATE KEY UPDATE  readcount=VALUES(readcount),bases=VALUES(bases), maxlen=VALUES(maxlen), minlen=VALUES(minlen),average_length=VALUES(average_length),cumuduration=VALUES(cumuduration),cumulength=VALUES(cumulength)' % (
        eventname, tablename2, tablename)
    #print sql
    args, db, cursor = cursor_execute(args, db, cursor, sql)

    ## Create a short running event
    eventname = tablename2 + "_eventpass"
    sql = ' create event %s \
            on schedule every 15 second \
	    starts current_timestamp \
	    ends current_timestamp + interval 1 week\
            do \
            insert into \
            %s(1minwin,passcount) \
            select g_1minwin as 1minwin,count(*) as passcount FROM %s where pass = 1 group by 1 order by 1 desc limit 30 \
            ON DUPLICATE KEY UPDATE passcount=VALUES(passcount)' % (
        eventname, tablename2, tablename)
    #print sql
    args, db, cursor = cursor_execute(args, db, cursor, sql)

    ## Create a long running event
    eventname = tablename2 + "_eventL"
    sql = ' create event %s \
            on schedule every 20 minute \
	    starts current_timestamp \
	    ends current_timestamp + interval 1 week\
            do \
            insert into \
            %s(1minwin,readcount,bases,maxlen,minlen,average_length,cumuduration,cumulength) \
            select g_1minwin as 1minwin,count(*) as readcount,sum(seqlen) as bases, max(seqlen) as maxlen,min(seqlen) as minlen , ROUND(AVG(seqlen)) as average_length, sum(duration) as cumuduration, sum(seqlen) as cumulength FROM %s group by 1 order by 1 desc \
            ON DUPLICATE KEY UPDATE  readcount=VALUES(readcount),bases=VALUES(bases), maxlen=VALUES(maxlen), minlen=VALUES(minlen),average_length=VALUES(average_length),cumuduration=VALUES(cumuduration),cumulength=VALUES(cumulength)' % (
        eventname, tablename2, tablename)
    #print sql
    args, db, cursor = cursor_execute(args, db, cursor, sql)

    eventname = tablename2 + "_eventpassL"
    sql = ' create event %s \
            on schedule every 20 minute \
	    starts current_timestamp \
	    ends current_timestamp + interval 1 week\
            do \
            insert into \
            %s(1minwin,passcount) \
            select g_1minwin as 1minwin,count(*) as passcount FROM %s where pass = 1 group by 1 order by 1 desc  \
            ON DUPLICATE KEY UPDATE  passcount=VALUES(passcount)' % (
        eventname, tablename2, tablename)
    #print sql
    args, db, cursor = cursor_execute(args, db, cursor, sql)

    eventname = tablename2 + "_conf"
    sql = ' create event %s \
            on schedule every 15 second \
            starts current_timestamp \
            ends current_timestamp + interval 1 week\
                do \
                insert into \
                %s(1minwin,distchan) \
                select g_1minwin as 1minwin,count(distinct channel) as distchan from config_general group by 1 order by 1 desc limit 30\
                ON DUPLICATE KEY UPDATE distchan=VALUES(distchan)' % (
        eventname, tablename2)
    #print sql
    args, db, cursor = cursor_execute(args, db, cursor, sql)

    eventname = tablename2 + "_confL"
    sql = ' create event %s \
            on schedule every 20 minute \
            starts current_timestamp \
            ends current_timestamp + interval 1 week\
                do \
                insert into \
                %s(1minwin,distchan) \
                select g_1minwin as 1minwin,count(distinct channel) as distchan from config_general group by 1 order by 1 desc \
                ON DUPLICATE KEY UPDATE distchan=VALUES(distchan)' % (
        eventname, tablename2)
    #print sql
    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 26
0
def create_pre_general_table(tablename, args, db, cursor):
    fields = (  # PLSP57501_17062014lambda_3216_1_ch101_file10_strand
        # = 2
        # = 1
        # = 124.29830200195312
        # = 199290
        # = 1382
        # = 1
        # = 125.83346438759253
        # = 0.6979265001059975
        # = 228.775431986964
        # = ad4812d9-0f84-468b-a932-c2f91b6152ef
        # = 5
        # = 1
        # = 3
        # = 2544301
        # THIS IS CALCULATED BY THE MINUP SCRIPT FROM THE EVENTS TABLE
        # THIS IS CALCULATED BY THE MINUP SCRIPT FROM THE EVENTS TABLE
        'basename_id INT(10) NOT NULL, PRIMARY KEY(basename_id)',
        'basename VARCHAR(150) NOT NULL, UNIQUE KEY(basename)',
        'abasic_event_index INT(10) DEFAULT NULL',
        'abasic_found INT(1) DEFAULT NULL',
        'abasic_peak_height varchar(40) DEFAULT NULL',
        'duration varchar(40) DEFAULT NULL',
        'hairpin_event_index INT(10) DEFAULT NULL',
        'hairpin_found INT(1) DEFAULT NULL',
        'hairpin_peak_height VARCHAR(40) DEFAULT NULL',
        'hairpin_polyt_level VARCHAR(40) DEFAULT NULL',
        'median_before VARCHAR(40) DEFAULT NULL',
        'channel INT(4) ',
        'read_id VARCHAR(40) DEFAULT NULL',
        'read_number INT(10) DEFAULT NULL',
        'scaling_used INT(1) DEFAULT NULL',
        'start_mux INT(1) DEFAULT NULL',
        'start_time varchar(20) DEFAULT NULL',
        'total_events INT(20) DEFAULT NULL',
        'sampling_rate float',
        '1minwin INT, INDEX(1minwin)',
        '5minwin INT, INDEX(5minwin)',
        '10minwin INT, INDEX(10minwin)',
        '15minwin INT, INDEX(15minwin)',
        's1minwin INT, INDEX(s1minwin)',
        's5minwin INT, INDEX(s5minwin)',
        's10minwin INT, INDEX(s10minwin)',
        's15minwin INT, INDEX(s15minwin)',
        'g_1minwin INT, INDEX(g_1minwin)',
        'g_5minwin INT, INDEX(g_5minwin)',
        'g_10minwin INT, INDEX(g_10minwin)',
        'g_15minwin INT, INDEX(g_15minwin)',
        'g_s1minwin INT, INDEX(g_s1minwin)',
        'g_s5minwin INT, INDEX(g_s5minwin)',
        'g_s10minwin INT, INDEX(g_s10minwin)',
        'g_s15minwin INT, INDEX(g_s15minwin)',
        'exp_start_time INT(15) NOT NULL',  # Maybe change to pre_tracking_id ?
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \
        % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)
Ejemplo n.º 27
0
def create_general_table(tablename, args, db, cursor):
    fields = (  # PLSP57501_17062014lambda_3216_1_ch101_file10_strand
        #  = basecall_2d_workflow.py"config_general"
        # = Basecall_2D_000
        #  = 10
        # = False
        # = channel_101_read_10
        # = /opt/metrichor/model
        #  = auto
        #  = 100000
        # = /tmp/input/PLSP57501_17062014lambda_3216_1_ch101_file10_strand.fast5
        # = 1000
        # = /opt/metrichor/config/basecall_2d.cfg
        #  = auto
        #  = 101
        # version = 0.8.3
        # time_stamp = 2014-Jul-02 09:10:13
        # = 1403015537
        'basename_id INT(10) NOT NULL, PRIMARY KEY(basename_id)',
        'basename VARCHAR(150) NOT NULL, UNIQUE KEY(basename)',
        'read_type VARCHAR(50)',
        'local_folder VARCHAR(50) ',
        'workflow_script VARCHAR(50) DEFAULT NULL',
        'workflow_name VARCHAR(50) ',
        'read_id TEXT ',
        'use_local VARCHAR(10) ',
        'tag VARCHAR(50) ',
        'model_path VARCHAR(50) ',
        'complement_model TEXT(200) ',
        'max_events INT(10) ',
        'input VARCHAR(200) ',
        'min_events INT(4) ',
        'config VARCHAR(100) ',
        'template_model VARCHAR(200) ',
        'channel INT(4) ',
        'metrichor_version VARCHAR(10) ',
        'metrichor_time_stamp VARCHAR(20) ',
        'abasic_event_index INT(1)',
        'abasic_found INT(1)',
        'abasic_peak_height FLOAT(25,17)',
        'duration INT(15)',
        'sampling_rate FLOAT(25,17)',
        'hairpin_event_index INT(10)',
        'hairpin_found INT(1)',
        'hairpin_peak_height FLOAT(25,17)',
        'hairpin_polyt_level FLOAT(25,17)',
        'median_before FLOAT(25,17)',
        'read_name VARCHAR(37)',
        'read_number int(10)',
        'scaling_used int(5)',
        'start_mux int(1)',
        'start_time int(20)',
        'end_mux INT(1) DEFAULT NULL',
        'exp_start_time INT(15) NOT NULL',
        '1minwin INT, INDEX(1minwin)',
        '5minwin INT, INDEX(5minwin)',
        '10minwin INT, INDEX(10minwin)',
        '15minwin INT, INDEX(15minwin)',
        's1minwin INT, INDEX(s1minwin)',
        's5minwin INT, INDEX(s5minwin)',
        's10minwin INT, INDEX(s10minwin)',
        's15minwin INT, INDEX(s15minwin)',
        'g_1minwin INT, INDEX(g_1minwin)',
        'g_5minwin INT, INDEX(g_5minwin)',
        'g_10minwin INT, INDEX(g_10minwin)',
        'g_15minwin INT, INDEX(g_15minwin)',
        'g_s1minwin INT, INDEX(g_s1minwin)',
        'g_s5minwin INT, INDEX(g_s5minwin)',
        'g_s10minwin INT, INDEX(g_s10minwin)',
        'g_s15minwin INT, INDEX(g_s15minwin)',
        'align INT DEFAULT 0,  INDEX(align)',
        'pass INT(1) ',
    )
    colheaders = ','.join(fields)
    sql = 'CREATE TABLE IF NOT EXISTS %s (%s) ENGINE=InnoDB' \
        % (tablename, colheaders)

    # print sql

    args, db, cursor = cursor_execute(args, db, cursor, sql)