Beispiel #1
0
def process_model_file(args, dbcheckhash, oper, minup_version, model_file):
    #print "Processing model file..."
    #sys.stdout.flush()

    model_kmers = dict()
    with open(model_file, 'rb') as csv_file:
        reader = csv.reader(csv_file, delimiter="\t")
        d = list(reader)
        #n = len(d)
        #bar = mk_bar(n)
        #bar.start()
        for r in range(0, len(d)):
            #    bar.update(r)
            #print r
            kmer = d[r][0]
            #print kmer
            mean = d[r][1]  # args.model_index]
            #print type(mean)
            try:
                if (float(mean) <= 5):
                    print "Looks like you have a poorly formatted model file. These aren't the means you are looking for.\n"
                    print "The value supplied for " + kmer + " was " + str(
                        mean)
                    terminate_minup(args, dbcheckhash, oper, minup_version)
            except Exception, err:
                print "Problem with means - but it isn't terminal - we assume this is the header line!"
            #if (args.verbose is True): print kmer, mean
            model_kmers[kmer] = mean
Beispiel #2
0
    def __init__(self, dbcheckhash, oper, db, args, xml_file_dict,
                 check_read_args, minup_version, bwaclassrunner):

        self.creates, xml_file_dict = \
                file_dict_of_folder(args, xml_file_dict, args.watchdir)

        self.processed = dict()
        self.running = True

        self.rawcount = dict()
        self.rawprocessed = dict()
        self.p = multiprocessing.Pool(args.procs)
        # self.p = multiprocessing.Pool(multiprocessing.cpu_count())
        self.kmerhashT = dict()
        self.kmerhashC = dict()
        self.args = args
        self.oper = oper
        self.db = db
        self.check_read_args = check_read_args
        self.xml_file_dict = xml_file_dict
        self.minup_version = minup_version
        self.hdf = ''
        self.bwaclassrunner = bwaclassrunner
        '''
        print "Sorting files by timestamps...."
        sys.stdout.flush()
        self.sortedFiles = sorted(self.creates.items(), key=lambda x: x[1])
        '''

        t = threading.Thread(target=self.processfiles)
        t.daemon = True

        try:
            t.start()
        except (KeyboardInterrupt, SystemExit):
            # MS -- Order here is critical ...
            print 'Ctrl-C entered -- exiting'

            t.clear()
            t.stop()

            self.p.close()
            self.p.terminate()
            terminate_minup(args, dbcheckhash, oper, self.minup_version)
            exit_gracefully(args, dbcheckhash, self.minup_version)
            sys.exit(1)

        if args.bwa_align is True and args.ref_fasta is not False:
            fasta_file = args.ref_fasta
            seqlen = get_seq_len(fasta_file)

            # print type(seqlen)

            if args.verbose == "high": print seqlen

            shortestSeq = np.min(seqlen.values())
            if args.verbose == "high": print shortestSeq
            if args.verbose == "high": print args.largerRef
            '''

            # DEPRECARTINE LARGE REF MS 11.10.16

            if not args.largerRef and shortestSeq > 10 ** 8:
                if args.verbose == "high": print "Length of references is >10^8: processing may be *EXTREMELY* slow. To overide rerun using the '-largerRef' option"  # MS
                terminate_minup(args, dbcheckhash, oper, self.minup_version)
            elif not args.largerRef and shortestSeq > 10 ** 7:

                if args.verbose == "high": print "Length of references is >10^7: processing may be *VERY* slow. To overide rerun using the '-largerRef' option"  # MS
                terminate_minup(args, dbcheckhash, oper, self.minup_version)
            else:

                if args.verbose == "high": print 'Length of references is <10^7: processing should be ok .... continuing .... '  # MS
            '''

            # model_file = "model.txt"
            # model_kmer_means=process_model_file(model_file)

            if args.preproc is True:  #  and args.prealign is True:
                model_file_template = \
                        'template.model'
                model_file_complement = \
                        'complement.model'
                model_kmer_means_template = \
                        process_model_file(args, dbcheckhash, oper, self.minup_version, model_file_template)
                model_kmer_means_complement = \
                        process_model_file(args, dbcheckhash, oper, self.minup_version, model_file_complement)

                # model_kmer_means = retrieve_model()
                # global kmerhash
                # kmerhash = process_ref_fasta_raw(fasta_file,model_kmer_means)

                self.kmerhashT = process_ref_fasta_raw(
                    fasta_file, model_kmer_means_template)
                self.kmerhashC = process_ref_fasta_raw(
                    fasta_file, model_kmer_means_complement)
Beispiel #3
0
def emergencyexit2(signum, frame):
    print 'stopping monitor....'
    observer.stop()
    terminate_minup(args, dbcheckhash, OPER, MINUP_VERSION)
Beispiel #4
0
    def processfiles(self):
        args = self.args
        db = self.db
        oper = self.oper
        xml_file_dict = self.xml_file_dict
        connection_pool, minup_version, \
                comments, ref_fasta_hash, dbcheckhash, \
                logfolder, cursor = self.check_read_args

        # analyser=RawAnalyser()

        everyten = 0
        customtimeout = 0

        # if args.timeout_true is not None:
        #               timeout=args.timeout_true

        ip = startMincontrol(args, cursor, dbcheckhash,\
                     minup_version, oper)

        while self.running:
            ts = time.time()
            if args.preproc is True:
                print datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S'
                        ), 'CACHED:', len(self.creates), 'PROCESSED:', \
                    len(self.processed), 'RAW FILES:', \
                    len(self.rawcount), 'RAW WARPED:', \
                    len(self.rawprocessed)
            else:
                print datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S'
                        ), 'CACHED:', len(self.creates), 'PROCESSED:', \
                    len(self.processed)

            # MS UPDATE SummaryStatsTable ....

            if args.customup is True:
                #print "In customup"
                if len(self.creates) > 0:
                    customtimeout = 0
                else:
                    customtimeout += 1
                if customtimeout > 6:
                    terminate_minup(args, dbcheckhash, oper,
                                    self.minup_version)
            '''
            ks = self.creates.keys()
            n = len(ks)
            bar = mk_bar(n)
            bar.start()
            bar.update(10*n/100)
            bar.update(25*n/100)


            bar.update(75*n/100)
            bar.update(100*n/100)
            bar.finish()
            sys.stdout.flush()
            '''
            '''
            if args.verbose is False and args.debug is False:

                #print "Processing files ..."
                #sys.stdout.flush()

                #n = len(sortedFiles)
                #bar = mk_bar(n)
                #bar.start()
                #i=0

                # ??? MS
            if len(self.creates.keys())==0:
                print "No files found."
                terminate_minup(args, dbcheckhash, oper, self.minup_version)
                exit_gracefully(args, dbcheckhash, self.minup_version)
                sys.exit()
            '''

            print "Sorting files by timestamps...."
            sys.stdout.flush()
            self.sortedFiles = sorted(self.creates.items(), key=lambda x: x[1])

            metadata_sql_list = []
            for (fast5file, createtime) in self.sortedFiles:
                '''
                #if args.verbose is False and args.debug is False:
                        #bar.update(i) # self.processed)
                        #i+=1
                '''
                if args.verbose in ["high", "low"]:
                    print "Processing: ", fast5file
                    print int(createtime), time.time()

                # tn=time.time()

                if int(createtime) + 20 < time.time():
                    # file created 20 sec ago, so should be complete ....
                    if fast5file not in self.processed.keys():

                        # minoTour Metadata Adding ....
                        minoTour_meta_file = args.watchdir + os.sep + "minoTour_meta.txt"
                        if args.verbose == "high":
                            print minoTour_meta_file
                        if os.path.isfile(minoTour_meta_file):
                            try:
                                add_metadata_to_hdf(args, minoTour_meta_file,
                                                    fast5file)
                            except:
                                print "Adding metadata failed."
                                pass
                        else:
                            "No minoTour_meta.txt file."
                        sys.stdout.flush()

                        if args.debug is True:
                            try:
                                self.do_file_processing(
                                    fast5file, db, connection_pool,
                                    minup_version, comments, ref_fasta_hash,
                                    dbcheckhash, logfolder, cursor,
                                    metadata_sql_list, ip)
                            except Exception, err:
                                #if self.hdf: # CI
                                #    self.hdf.close() # CI

                                # print "This is a pre basecalled file"

                                print "MyHandler(): except -- " + fast5file
                                err_string = \
                                    'Error with fast5 file: %s : %s' \
                                    % (fast5file, err)
                                #print >> sys.stderr, err_string
                                print err_string
                                print "X" * 80
                                debug()
                                sys.exit()

                                #moveFile(args, fast5file)
                                #if args.verbose == "high": sys.exit()

                                #return ()

                        else:
                            self.do_file_processing(
                                fast5file, db, connection_pool, minup_version,
                                comments, ref_fasta_hash, dbcheckhash,
                                logfolder, cursor, metadata_sql_list, ip)
                        everyten += 1
                        if everyten == 25:
                            tm = time.time()
                            if ts + 5 < tm:  # just to stop it printing two status messages one after the other.
                                if args.preproc is True:
                                    print datetime.datetime.fromtimestamp(tm).strftime('%Y-%m-%d %H:%M:%S'
                                    ), 'CACHED:', len(self.creates), 'PROCESSED:', \
        len(self.processed), 'RAW FILES:', len(self.rawcount), \
        'RAW WARPED:', len(self.rawprocessed)
                                else:
                                    print datetime.datetime.fromtimestamp(tm).strftime('%Y-%m-%d %H:%M:%S'
                                    ), 'CACHED:', len(self.creates), 'PROCESSED:', \
        len(self.processed)
                            everyten = 0
                    '''
                    if args.verbose is False and args.debug is False:
                        #bar.finish()
                        print "... finished processing files."
                        sys.stdout.flush()
                    '''
            time.sleep(5)
Beispiel #5
0
def check_read(db, args, connection_pool, minup_version, comments,
               xml_file_dict, ref_fasta_hash, dbcheckhash, logfolder, filepath,
               hdf, cursor, oper, ip):

    global runindex

    if args.verbose == "high":
        print "Checking read ..."
        sys.stdout.flush()

    filename = os.path.basename(filepath)

    if args.verbose == "high":
        print time.strftime('%Y-%m-%d %H:%M:%S'), 'processing:', filename
        sys.stdout.flush()

    # Remove "_sequencing_run_" and/or "_mux_scan_" from filename_
    # as used to ake dbname ...

    filename_ = filename
    filename_ = filename_.replace("_sequencing_run_", '_')
    filename_ = filename_.replace("_mux_scan_", '_')

    parts = filename_.split('_')
    strSep = '_'

    # Changing the number below enables the removal of the random four digit number from run names on restart

    dbname = strSep.join(parts[0:len(parts) - 5])
    dbname = re.sub('[.!,; ]', '', dbname)

    if len(args.custom_name) > 0:
        dbname = args.minotourusername + '_' + args.custom_name + '_' \
         + dbname
    else:
        dbname = args.minotourusername + '_' + dbname
    if len(dbname) > 64:
        dbname = dbname[:64]
    if dbname.endswith('_'):  #ml
        dbname = dbname[:-1]  #ml

    if args.verbose == "high":
        print "dbname is ", dbname
        print "Parts were ", parts
        debug()

    # ---------------------------------------------------------------------------

    if dbname in dbcheckhash[
            'dbname']:  # so data from this run has been seen before in this instance of minup so switch to it!
        if dbcheckhash['dbname'][dbname] is False:
            if args.verbose == "high":
                print 'switching to database: ', dbname
                sys.stdout.flush()
            sql = 'USE %s' % dbname
            args, db, cursor = cursor_execute(args, db, cursor, sql)

            # ---------------------------------------------------------------------------

            try:
                runindex = dbcheckhash['runindex'][dbname]  # MS ..
            except:
                print "checkRead(): line 112, dbcheckhash, key error: " \
                                + dbname
                sys.stdout.flush()
                #sys.exit()
                return ()

            comment_string = 'minUp switched runname'
            start_time = time.strftime('%Y-%m-%d %H:%M:%S')
            sql = \
                "INSERT INTO Gru.comments (runindex,runname,user_name,comment,name,date) VALUES (%s,'%s','%s','%s','%s','%s')" \
                % (
                runindex,
                dbname,
                args.minotourusername,
                comment_string,
                args.minotourusername,
                start_time,
                )

            if args.verbose == "high":
                print sql
                debug()

            db.escape_string(sql)
            args, db, cursor = cursor_execute(args, db, cursor, sql)
            db.commit()

            # ---------------------------------------------------------------------------

            ks = dbcheckhash['dbname'].keys()
            n = len(ks)
            bar = mk_bar(n)
            bar.start()
            for i, e in enumerate(ks):
                bar.update(i)
                dbcheckhash['dbname'][e] = False
            bar.finish()
            dbcheckhash['dbname'][dbname] = True

    # ---------------------------------------------------------------------------

    if dbname not in dbcheckhash[
            'dbname']:  # # so the db has not been seen before.. time to set up lots of things...
        dbcheckhash['barcoded'][dbname] = False
        dbcheckhash['barcode_info'][dbname] = False
        dbcheckhash['logfile'][dbname] = os.path.join(os.path.sep, logfolder,
                                                      dbname + '.minup.log')
        if args.verbose == "high":
            print 'trying database: ', dbname
            sys.stdout.flush()
        sql = "SHOW DATABASES LIKE \'%s\'" % dbname

        # print sql

        args, db, cursor = cursor_execute(args, db, cursor, sql)
        if cursor.fetchone():
            if args.verbose == "high":
                print 'database exists!'
                sys.stdout.flush()

            # # drop the existing database, if selected

            if args.drop_db is True:
                sql = 'DROP DATABASE %s' % dbname

                # print sql

                args, db, cursor = cursor_execute(args, db, cursor, sql)
                db.commit()
                if args.verbose == "high":
                    print 'database dropped.'
                    sys.stdout.flush()
            else:
                print >> sys.stderr, "=" * 80
                print >> sys.stderr, \
                    'WARNING: DATABASE \"%s\" already EXISTS.\nTo write over the data re-run the minUP command with option -d' % dbname
                print >> sys.stderr, "=" * 80
                sys.stdout.flush()
                if args.batch_fasta == False:

                    # MS next 6 lines ...

                    print >> sys.stderr, \
                        'not in batch mode so exiting ...'
                    sys.stdout.flush()
                    terminate_minup(args, dbcheckhash, oper, minup_version)

            #terminate_minup(args, dbcheckhash, oper, minup_version)
            #sys.exit()

        if args.drop_db is True:
            print 'Deleting exisiting run from Gru now ...'
            sys.stdout.flush()
            sql = \
                'DELETE FROM Gru.userrun WHERE runindex IN (SELECT runindex FROM Gru.minIONruns WHERE runname = "%s")' \
                % dbname

            # print sql

            args, db, cursor = cursor_execute(args, db, cursor, sql)
            db.commit()
            sql = "DELETE FROM Gru.minIONruns WHERE runname = \'%s\'" \
                % dbname

            # print sql

            args, db, cursor = cursor_execute(args, db, cursor, sql)
            db.commit()
            print '.... Run deleted.'
            sys.stdout.flush()

        # ---------------------------------------------------------------------------
        # -------- This bit adds columns to Gru.minIONruns --------

        modify_gru(args, db, cursor)

        # ---------------------------------------------------------------------------

        # -------- Create a new empty database

        #if args.verbose == "high":
        print 'Making new database: ', dbname
        print '=' * 80
        sys.stdout.flush()
        dbF = open('dbname.txt', 'w')
        dbF.write(dbname + "\n")
        dbF.close()

        sql = 'CREATE DATABASE %s' % dbname
        #print sql
        args, db, cursor = cursor_execute(args, db, cursor, sql)
        sql = 'USE %s' % dbname
        args, db, cursor = cursor_execute(args, db, cursor, sql)

        # Create Tables ....
        create_general_table('config_general', args, db, cursor)
        create_trackingid_table('tracking_id', args, db, cursor)
        create_basecall_summary_info('basecall_summary', args, db, cursor)
        create_events_model_fastq_table('basecalled_template', args, db,
                                        cursor)
        create_events_model_fastq_table('basecalled_complement', args, db,
                                        cursor)
        create_basecalled2d_fastq_table('basecalled_2d', args, db, cursor)

        if args.pin is not False:
            create_mincontrol_interaction_table('interaction', args, db,
                                                cursor)
            create_mincontrol_messages_table('messages', args, db, cursor)
            create_mincontrol_barcode_control_table('barcode_control', args,
                                                    db, cursor)
            #print "DONE"

        # ---------------------------------------------------------------------------
        '''
        # DEPRECATIN TELEM MS 11.10.16
        if args.telem is True:
            for i in xrange(0, 10):
                temptable = 'caller_basecalled_template_%d' % i
                comptable = 'caller_basecalled_complement_%d' % i
                twod_aligntable = 'caller_basecalled_2d_alignment_%d' \
                    % i
                create_caller_table_noindex(temptable, args, db, cursor)
                create_caller_table_noindex(comptable, args, db, cursor)
                create_2d_alignment_table(twod_aligntable, args, db, cursor)
            create_model_list_table('model_list', args, db, cursor)
            create_model_data_table('model_data', args, db, cursor)
        '''

        # ---------------------------------------------------------------------------
        if args.preproc is True:
            create_pretrackingid_table('pre_tracking_id', args, db,
                                       cursor)  # make another table
            create_pre_general_table('pre_config_general', args, db,
                                     cursor)  # pre config general table

        # -------- Assign the correct reference fasta for this dbname if applicable

        if args.batch_fasta is not False:
            for refbasename in ref_fasta_hash.keys():
                common_path = \
                    os.path.commonprefix((ref_fasta_hash[refbasename]['path'
                        ], filepath)).rstrip('\\|\/|re|\\re|\/re')
                if common_path.endswith('downloads'):
                    ref_fasta_hash[dbname] = ref_fasta_hash[refbasename]

                    # del ref_fasta_hash[refbasename]

        if args.ref_fasta is not False:
            for refbasename in ref_fasta_hash.keys(
            ):  # there should only be one key
                ref_fasta_hash[dbname] = ref_fasta_hash[refbasename]

        # ---------------------------------------------------------------------------

        if dbname in ref_fasta_hash:  # great, we assigned the reference fasta to this dbname
            create_reference_table('reference_seq_info', args, db, cursor)
            create_5_3_prime_align_tables('last_align_basecalled_template',
                                          args, db, cursor)
            create_5_3_prime_align_tables('last_align_basecalled_complement',
                                          args, db, cursor)
            create_5_3_prime_align_tables('last_align_basecalled_2d', args, db,
                                          cursor)

            if args.last_align is True:

                # create_align_table('last_align_basecalled_template', args, db, cursor)
                # create_align_table('last_align_basecalled_complement', args, db, cursor)
                # create_align_table('last_align_basecalled_2d', args, db, cursor)

                create_align_table_maf('last_align_maf_basecalled_template',
                                       args, db, cursor)
                create_align_table_maf('last_align_maf_basecalled_complement',
                                       args, db, cursor)
                create_align_table_maf('last_align_maf_basecalled_2d', args,
                                       db, cursor)

            if args.bwa_align is True:
                create_align_table_sam('align_sam_basecalled_template', args,
                                       db, cursor)
                create_align_table_sam('align_sam_basecalled_complement', args,
                                       db, cursor)
                create_align_table_sam('align_sam_basecalled_2d', args, db,
                                       cursor)

            # dbcheckhash["mafoutdict"][dbname]=open(dbname+"."+process+".align.maf","w")
            '''
            # DEPRECATIN TELEM MS 11.10.16
            if args.telem is True:
                create_ref_kmer_table('ref_sequence_kmer', args, db, cursor)
            '''

            if args.prealign is True:
                create_pre_align_table('pre_align_template', args, db, cursor)
                create_pre_align_table('pre_align_complement', args, db,
                                       cursor)
                create_pre_align_table('pre_align_2d', args, db, cursor)
                create_align_table_raw('last_align_raw_template', args, db,
                                       cursor)
                create_align_table_raw('last_align_raw_complement', args, db,
                                       cursor)
                create_align_table_raw('last_align_raw_2d', args, db, cursor)

            for refname in ref_fasta_hash[dbname]['seq_len'].iterkeys():

                # print "refname", refname

                reference = ref_fasta_hash[dbname]['seq_file'][refname]
                reflen = ref_fasta_hash[dbname]['seq_len'][refname]
                reflength = ref_fasta_hash[dbname]['seq_file_len'][reference]
                refid = mysql_load_from_hashes(
                    args, db, cursor, 'reference_seq_info', {
                        'refname': refname,
                        'reflen': reflen,
                        'reffile': reference,
                        'ref_total_len': reflength,
                    })
                ref_fasta_hash[dbname]['refid'][refname] = refid
                '''
                # DEPRECATIN TELEM MS 11.10.16
                if args.telem is True:
                    kmers = ref_fasta_hash[dbname]['kmer'][refname]
                    load_ref_kmer_hash(args, db, 'ref_sequence_kmer', kmers,
                            refid, args, db, cursor)
                '''

        # ---------------------------------------------------------------------------
        # -------- See if theres any ENA XML stuff to add.
        # -------- Need to do this now as it changes the "comment"
        # -------- in Gru.minionRuns entry
        # print "C", comment

        ena_flowcell_owner = None
        for xml_to_downloads_path in xml_file_dict.keys():

            # xmlpath=xml_file_dict["study"][study_id]["path"]

            common_path = os.path.commonprefix(
                (xml_to_downloads_path, filepath)).rstrip('\\|\/|re')
            if common_path.endswith('downloads'):
                print 'found XML data for:', dbname
                sys.stdout.flush()
                create_xml_table('XML', args, db, cursor)

                # ---------------------------------------------------------------------------
                downloadsPath = xml_file_dict[xml_to_downloads_path]

                for study_id in \
                    downloadsPath['study'].keys():
                    ena_flowcell_owner = study_id
                    study_xml = \
                        downloadsPath['study'][study_id]['xml']
                    study_file = \
                        downloadsPath['study'][study_id]['file']
                    study_title = \
                        downloadsPath['study'][study_id]['title']
                    study_abstract = \
                        downloadsPath['study'][study_id]['abstract']
                    exp_c = 'NA'
                    samp_c = 'NA'
                    run_c = 'NA'
                    mysql_load_from_hashes(
                        args, db, cursor, 'XML', {
                            'type': 'study',
                            'primary_id': study_id,
                            'filename': study_file,
                            'xml': study_xml,
                        })
                    for exp_id in \
                        downloadsPath['experiment'].keys():
                        if study_id \
                            == downloadsPath['experiment'][exp_id]['study_id']:
                            exp_c = exp_id
                            exp_xml = \
                                downloadsPath['experiment'][exp_id]['xml']
                            exp_file = \
                                downloadsPath['experiment'][exp_id]['file']
                            sample_id = \
                                downloadsPath['experiment'][exp_id]['sample_id']
                            mysql_load_from_hashes(
                                args, db, cursor, 'XML', {
                                    'type': 'experiment',
                                    'primary_id': exp_id,
                                    'filename': exp_file,
                                    'xml': exp_xml,
                                })

                            if sample_id \
                                in downloadsPath['sample'
                                    ]:
                                samp_c = sample_id
                                sample_xml = \
                                    downloadsPath['sample'][sample_id]['xml']
                                sample_file = \
                                    downloadsPath['sample'][sample_id]['file']
                                mysql_load_from_hashes(
                                    args, db, cursor, 'XML', {
                                        'type': 'sample',
                                        'primary_id': sample_id,
                                        'filename': sample_file,
                                        'xml': sample_xml,
                                    })

                            for run_id in \
                                downloadsPath['run'].keys():
                                if exp_id \
                                    == downloadsPath['run'][run_id]['exp_id']:
                                    run_c = run_id
                                    run_xml = \
                                        downloadsPath['run'][run_id]['xml']
                                    run_file = \
                                        downloadsPath['run'][run_id]['file']
                                    mysql_load_from_hashes(
                                        args, db, cursor, 'XML', {
                                            'type': 'run',
                                            'primary_id': run_id,
                                            'filename': run_file,
                                            'xml': run_xml,
                                        })
                    comments[dbname] = \
                        'ENA data. Study:%s Title: %s Abstract: %s Experiment:%s Sample:%s Run:%s' \
                        % (
                        study_id,
                        study_title,
                        study_abstract,
                        exp_c,
                        samp_c,
                        run_c,
                        )

        # ---------------------------------------------------------------------------
        # --------- Make entries in the Gru database
        # try and get the right basecall-configuration general

        file_type = check_read_type(args, filepath, hdf)
        #print "FILETYPE is", file_type

        basecalltype = getBasecalltype(args, file_type)  # MS
        basecalldir = ''
        basecalldirconfig = ''
        basecallindexpos = ''  #ML
        '''
        try:
         if file_type == 2:
            basecalltype2="Basecall_2D"
            string2='' #ML
            for x in range (0,9):
                string2 = '/Analyses/Hairpin_Split_00%s/Configuration/general' % (x) #ML
                if (string2 in hdf):
                    basecallindexpos=x #ml
                    #print "BASECALLINDEXPOS",basecallindexpos
                    basecalldirconfig=string2 #ML

            string='/Analyses/%s_00%s/Configuration/general' % (basecalltype, basecallindexpos)
            #print string
            if (string in hdf):
            #    print "YES 1"
                basecalldir='/Analyses/%s_00%s/' % (basecalltype,basecallindexpos)
                #basecallindexpos=x #ml
                #break

            string='/Analyses/%s_00%s/Configuration/general' % (basecalltype2, basecallindexpos)
            #print string
            if (string2 in hdf):
                #print "YES 2"
                basecalldir='/Analyses/%s_00%s/' % (basecalltype2,basecallindexpos)
                #basecalldirconfig=string2 #ML
                #break
        except:
                print "checkReads(): error line 467."
                sys.exit()
        try:
          if file_type in [1,0]:
            basecalltype = 'Basecall_1D_CDNA'
            basecalltype2 = 'Basecall_2D'
            basecalldir = ''
            basecalldirconfig = ''
            basecallindexpos=''
        '''
        try:  # MS
            for x in range(0, 9):
                string = '/Analyses/%s_00%s/Configuration/general' \
                    % (basecalltype, x)
                if string in hdf:
                    basecalldir = '/Analyses/%s_00%s/' % (basecalltype, x)
                    basecalldirconfig = string
                    basecallindexpos = x
                    break
                '''
                string = '/Analyses/%s_00%s/Configuration/general' \
                    % (basecalltype2, x)
                if string in hdf:
                    basecalldir = '/Analyses/%s_00%s/' % (basecalltype, x)
                    basecalldirconfig = string
                    basecallindexpos=x
                    break
                '''

        # print "basecalldirconfig", basecalldirconfig
        # # get some data out of tacking_id and general
        except:
            print "checkReads(): error line 496."
            sys.stdout.flush()
            #sys.exit()

        #print basecalldirconfig
        #print basecalldir
        if len(basecalldirconfig) > 0:
            configdata = hdf[basecalldirconfig]
            if len(basecalldir) > 0:
                metrichor_info = hdf[basecalldir]

        # else:
        # ....configdata.attrs['workflow_name'] ="preanalysed"

        trackingid = hdf['/UniqueGlobalKey/tracking_id']
        print trackingid.attrs['exp_start_time']
        #print dateutil.parser.parse(trackingid.attrs['exp_start_time'])
        #print int(time.mktime(dateutil.parser.parse(trackingid.attrs['exp_start_time']).timetuple()))
        #print datetime.datetime.fromtimestamp(int(time.mktime(dateutil.parser.parse(trackingid.attrs['exp_start_time']).timetuple()))).strftime('%Y-%m-%d')

        expstarttimecode = \
            datetime.datetime.fromtimestamp(int(testtime(trackingid.attrs['exp_start_time'
                ]))).strftime('%Y-%m-%d')
        flowcellid = trackingid.attrs['device_id']

        if len(basecalldirconfig) > 0:
            basecalleralg = configdata.attrs['workflow_name']
        else:
            basecalleralg = 'preanalysed'
        if len(basecalldir) > 0:
            #version = metrichor_info.attrs['chimaera version']
            try:
                version = metrichor_info.attrs['chimaera version']  # MS
            except:
                version = metrichor_info.attrs['version']  # MS
        else:
            version = 'unknown'
        runnumber = args.run_num
        flowcellowner = 'NULL'
        username = args.minotourusername
        if args.flowcell_owner is not None:
            flowcellowner = args.flowcell_owner
        if ena_flowcell_owner is not None:
            flowcellowner = ena_flowcell_owner

        # # get info on the reference sequence, if used

        big_reference = 'NOREFERENCE'
        big_reflength = '0'
        if dbname in ref_fasta_hash:  # so there's some reference data for this dbname
            big_reference = ref_fasta_hash[dbname]['big_name']
            big_reflength = ref_fasta_hash[dbname]['big_len']

        # # make entries into Gru for this new database

        comment = comments['default']
        if dbname in comments:
            comment = comments[dbname]

        process = 'noalign'
        if args.last_align is True:
            process = 'LAST'
        if args.bwa_align is True:
            process = 'BWA'

        wdir = args.watchdir
        if wdir.endswith('\\'):  # remove trailing slash for windows.
            wdir = wdir[:-1]
        sql = \
            "INSERT INTO Gru.minIONruns (date,user_name,flowcellid,runname,activeflag,comment,FlowCellOwner,RunNumber,reference,reflength,basecalleralg,version,minup_version,process,mt_ctrl_flag,watch_dir,host_ip) VALUES ('%s','%s','%s','%s',%s,'%s','%s',%s,'%s',%s,'%s','%s','%s','%s',%s,'%s','%s')" \
            % (
            expstarttimecode,
            args.minotourusername,
            flowcellid,
            dbname,
            1,
            comment,
            flowcellowner,
            runnumber,
            big_reference,
            big_reflength,
            basecalleralg,
            version,
            minup_version,
            process,
            1,
            wdir,
            ip,
            )

        #print sql
        if args.verbose == "high":
            print sql
            debug()

        #if args.verbose == "high":
        print '... Database created.'
        sys.stdout.flush()

        db.escape_string(sql)
        args, db, cursor = cursor_execute(args, db, cursor, sql)
        db.commit()
        runindex = cursor.lastrowid
        dbcheckhash['runindex'][dbname] = runindex

        #print "Runindex:",runindex

        # # add us">> ", view_users

        if args.verbose == "high":
            print "Adding users..."
            sys.stdout.flush()

        view_users = [username]

        if args.view_users:
            extra_names = args.view_users.split(',')
            # view_users = args.view_users + extra_names # MS
            view_users = view_users + extra_names  # MS

        for user_name in view_users:
            sql = \
                "SELECT user_id FROM Gru.users WHERE user_name =\'%s\'" \
                % user_name

            # print sql

            args, db, cursor = cursor_execute(args, db, cursor, sql)
            if 0 < cursor.rowcount:
                sql = \
                    'INSERT INTO Gru.userrun (user_id, runindex) VALUES ((SELECT user_id FROM Gru.users WHERE user_name =\'%s\') , (SELECT runindex FROM Gru.minIONruns WHERE runname = "%s") )' \
                    % (user_name, dbname)

                if args.verbose == "high":
                    print sql
                    debug()
                # print sql

                args, db, cursor = cursor_execute(args, db, cursor, sql)
                db.commit()
            else:
                print 'The MinoTour username "%s" does not exist. Please create it or remove it from the input arguments' \
                    % user_name
                sys.stdout.flush()
                sys.exit()

        # # Create comment table if it doesn't exist

        create_comment_table_if_not_exists('Gru.comments', args, db, cursor)

        # # Add first comment to table

        start_time = time.strftime('%Y-%m-%d %H:%M:%S')
        comment_string = 'minUp version %s started' % minup_version
        mysql_load_from_hashes(
            args, db, cursor, 'Gru.comments', {
                'runindex': runindex,
                'runname': dbname,
                'user_name': args.minotourusername,
                'comment': comment_string,
                'name': args.dbusername,
                'date': start_time,
            })

        # ---------------------------------------------------------------------------
        # --------- make log file and initinal entry

        with open(dbcheckhash['logfile'][dbname], 'w') as logfilehandle:
            logfilehandle.write('minup started at:\t%s%s' %
                                (start_time, os.linesep))
            logfilehandle.write('minup version:\t%s%s' %
                                (minup_version, os.linesep))
            logfilehandle.write('options:' + os.linesep)
            logfilehandle.write('minotour db host:\t%s%s' %
                                (args.dbhost, os.linesep))
            logfilehandle.write('minotour db user:\t%s%s' %
                                (args.dbusername, os.linesep))
            logfilehandle.write('minotour username:\t%s%s' %
                                (args.minotourusername, os.linesep))
            logfilehandle.write('minotour viewer usernames:\t%s%s' %
                                (view_users, os.linesep))
            logfilehandle.write('flowcell owner:\t%s%s' %
                                (flowcellowner, os.linesep))
            logfilehandle.write('run number:\t%s%s' %
                                (args.run_num, os.linesep))
            logfilehandle.write('watch directory:\t%s%s' %
                                (args.watchdir, os.linesep))
            '''
            # DEPRECATIN TELEM MS 11.10.16
            logfilehandle.write('upload telemetry:\t%s%s'
                                % (args.telem, os.linesep))
            '''
            logfilehandle.write('Reference Sequences:' + os.linesep)
            if dbname in ref_fasta_hash:
                for refname in ref_fasta_hash[dbname]['seq_len'].iterkeys():
                    logfilehandle.write(
                        'Fasta:\t%s\tlength:\t%d%s' %
                        (ref_fasta_hash[dbname]['seq_file'][refname],
                         ref_fasta_hash[dbname]['seq_len'][refname],
                         os.linesep))
            else:
                logfilehandle.write('No reference sequence set' + os.linesep)

            logfilehandle.write('comment:\t%s%s' % (comment, os.linesep))
            logfilehandle.write('Errors:' + os.linesep)
            logfilehandle.close()

        #startMincontrol(args, dbname, cursor, dbcheckhash,\
        #             minup_version, oper)

        # # connection_pool for this db

        connection_pool[dbname] = list()
        '''
        # DEPRECATIN LAST TELEM MS 11.10.16
        if args.last_align is True \
            or args.bwa_align is True \
            or args.telem is True:
        '''
        if args.bwa_align is True:
            try:
                db_a = MySQLdb.connect(host=args.dbhost,
                                       user=args.dbusername,
                                       passwd=args.dbpass,
                                       port=args.dbport,
                                       db=dbname)
                connection_pool[dbname].append(db_a)
                db_b = MySQLdb.connect(host=args.dbhost,
                                       user=args.dbusername,
                                       passwd=args.dbpass,
                                       port=args.dbport,
                                       db=dbname)
                connection_pool[dbname].append(db_b)
                db_c = MySQLdb.connect(host=args.dbhost,
                                       user=args.dbusername,
                                       passwd=args.dbpass,
                                       port=args.dbport,
                                       db=dbname)
                connection_pool[dbname].append(db_c)
            except Exception, err:
                err_string = 'Error bwa_align: %s ' % err
                print >> sys.stderr, \
                    "Can't setup MySQL connection pool: %s" % err
                sys.stdout.flush()
                with open(dbcheckhash['logfile'][dbname], 'a') as \
                    logfilehandle:
                    logfilehandle.write(err_string + os.linesep)
                    logfilehandle.close()
                sys.stdout.flush()
                sys.exit()

        # --------- this bit last to set the active database in this hash

        if dbcheckhash['dbname']:
            for e in dbcheckhash['dbname'].keys():
                dbcheckhash['dbname'][e] = False
        dbcheckhash['dbname'][dbname] = True