Ejemplo n.º 1
0
def process_model_file(args, oper, model_file):
    model_kmers = dict()
    with open(model_file, 'rb') as csv_file:
        reader = csv.reader(csv_file, delimiter="\t")
        d = list(reader)
        for r in range(0, len(d)):
            #print r
            kmer = d[r][0]
            #print kmer
            mean = d[r][1] # args.model_index]
            #print type(mean)
            try:
                if (float(mean) <= 5):
                    print "Looks like you have a poorly formatted model file. These aren't the means you are looking for.\n"
                    print "The value supplied for "+kmer+" was "+str(mean)
		    terminateSubProcesses(args, dbcheckhash, oper, self.minup_version)
            except Exception,err:
                print "Problem with means - but it isn't terminal - we assume this is the header line!"
            #if (args.verbose is True): print kmer, mean
            model_kmers[kmer]=mean
Ejemplo n.º 2
0
def check_read(
    db,
    args,
    connection_pool,
    minup_version,
    comments,
    xml_file_dict,
    ref_fasta_hash,
    dbcheckhash,
    logfolder,
    filepath,
    hdf,
    cursor,
    oper,
    ):

    global runindex

    filename = os.path.basename(filepath)
    if args.verbose is True:
        print time.strftime('%Y-%m-%d %H:%M:%S'), 'processing:', \
            filename
    parts = filename.split('_')
    str = '_'

    # Changing the number below enables the removal of the random four digit number from run names on restart

    dbname = str.join(parts[0:len(parts) - 5])
    dbname = re.sub('[.!,; ]', '', dbname)
    if len(args.custom_name) > 0:
           dbname = args.minotourusername + '_' + args.custom_name + '_' \
            + dbname
    else:
        dbname = args.minotourusername + '_' + dbname
    if len(dbname) > 64:
        dbname = dbname[:64]
    if dbname.endswith('_'): #ml
        dbname = dbname[:-1] #ml




    # print "dbname is ",dbname
    # print "Parts were " ,parts

    # ---------------------------------------------------------------------------

    if dbname in dbcheckhash['dbname']:  # so data from this run has been seen before in this instance of minup so switch to it!
        if dbcheckhash['dbname'][dbname] is False:
            if args.verbose is True:
                print 'switching to database: ', dbname
            sql = 'USE %s' % dbname
            cursor.execute(sql)

            # ---------------------------------------------------------------------------

            try: runindex = dbcheckhash['runindex'][dbname] # MS .. 
            except:  
                print "checkRead(): line 112, dbcheckhash, key error: " \
				+ dbname
                #sys.exit()
		return ()


            comment_string = 'minUp switched runname'
            start_time = time.strftime('%Y-%m-%d %H:%M:%S')
            sql = \
                "INSERT INTO Gru.comments (runindex,runname,user_name,comment,name,date) VALUES (%s,'%s','%s','%s','%s','%s')" \
                % (
                runindex,
                dbname,
                args.minotourusername,
                comment_string,
                args.minotourusername,
                start_time,
                )

            # print sql

            db.escape_string(sql)
            cursor.execute(sql)
            db.commit()

            # ---------------------------------------------------------------------------

            for e in dbcheckhash['dbname'].keys():
                dbcheckhash['dbname'][e] = False
            dbcheckhash['dbname'][dbname] = True


    # ---------------------------------------------------------------------------

    if dbname not in dbcheckhash['dbname']:  # # so the db has not been seen before.. time to set up lots of things...
        dbcheckhash['barcoded'][dbname] = False
        dbcheckhash['barcode_info'][dbname] = False
        dbcheckhash['logfile'][dbname] = os.path.join(os.path.sep,
                logfolder, dbname + '.minup.log')
        if args.verbose is True:
            print 'trying database: ', dbname
        sql = "SHOW DATABASES LIKE \'%s\'" % dbname

        # print sql

        cursor.execute(sql)
        if cursor.fetchone():
            if args.verbose is True:
                print 'database exists!'

            # # drop the existing database, if selected

            if args.drop_db is True:
                sql = 'DROP DATABASE %s' % dbname

                # print sql

                cursor.execute(sql)
                db.commit()
                if args.verbose is True:
                    print 'database dropped.'
            else:
                print >> sys.stderr, \
                    '%s run database already exists. To write over the data re-run the minUP command with option -d' \
                    % dbname
                if args.batch_fasta == False:

                                  # MS next 6 lines ...

                    print >> sys.stderr, \
                        'not in batch mode so exiting ...'
                    terminateSubProcesses(args, dbcheckhash, oper, minup_version)

        if args.drop_db is True:
            print 'deleting exisiting run from Gru now.'
            sql = \
                'DELETE FROM Gru.userrun WHERE runindex IN (SELECT runindex FROM Gru.minIONruns WHERE runname = "%s")' \
                % dbname

            # print sql

            cursor.execute(sql)
            db.commit()
            sql = "DELETE FROM Gru.minIONruns WHERE runname = \'%s\'" \
                % dbname

            # print sql

            cursor.execute(sql)
            db.commit()

        # -------- mincontrol --------
        # # get the IP address of the host

        ip = '127.0.0.1'
        try:
            ip = socket.gethostbyname(socket.gethostname())
        except Exception, err:
            err_string = 'Error obtaining upload IP adress'
            #print >> sys.stderr, err_string
            print err_string

        # ---------------------------------------------------------------------------
        # -------- This bit adds columns to Gru.minIONruns --------

        modify_gru(cursor)

        # ---------------------------------------------------------------------------

        # -------- Create a new empty database

        if args.verbose is True:
            print 'making new database: ', dbname

        sql = 'CREATE DATABASE %s' % dbname
        cursor.execute(sql)
        sql = 'USE %s' % dbname
        cursor.execute(sql)

	# Create Tables ....
        create_general_table('config_general', cursor)  
        create_trackingid_table('tracking_id', cursor)  
        create_basecall_summary_info('basecall_summary', cursor)
        create_events_model_fastq_table('basecalled_template', cursor) 
        create_events_model_fastq_table('basecalled_complement', cursor) 
        create_basecalled2d_fastq_table('basecalled_2d', cursor) 

        # ---------------------------------------------------------------------------

        if args.telem is True:
            for i in xrange(0, 10):
                temptable = 'caller_basecalled_template_%d' % i
                comptable = 'caller_basecalled_complement_%d' % i
                twod_aligntable = 'caller_basecalled_2d_alignment_%d' \
                    % i
                create_caller_table_noindex(temptable, cursor)
                create_caller_table_noindex(comptable, cursor)
                create_2d_alignment_table(twod_aligntable, cursor)
            create_model_list_table('model_list', cursor)
            create_model_data_table('model_data', cursor)

        # ---------------------------------------------------------------------------
        # -------- Assign the correct reference fasta for this dbname if applicable

        if args.batch_fasta is not False:
            for refbasename in ref_fasta_hash.keys():
                common_path = \
                    os.path.commonprefix((ref_fasta_hash[refbasename]['path'
                        ], filepath)).rstrip('\\|\/|re|\\re|\/re')
                if common_path.endswith('downloads'):
                    ref_fasta_hash[dbname] = ref_fasta_hash[refbasename]

                    # del ref_fasta_hash[refbasename]

        if args.ref_fasta is not False:
            for refbasename in ref_fasta_hash.keys():  # there should only be one key
                ref_fasta_hash[dbname] = ref_fasta_hash[refbasename]

        # ---------------------------------------------------------------------------

        if dbname in ref_fasta_hash:  # great, we assigned the reference fasta to this dbname
            create_reference_table('reference_seq_info', cursor)
            create_5_3_prime_align_tables('last_align_basecalled_template'
                    , cursor)
            create_5_3_prime_align_tables('last_align_basecalled_complement'
                    , cursor)
            create_5_3_prime_align_tables('last_align_basecalled_2d',
                    cursor)

            if args.preproc is True:
                create_pretrackingid_table('pre_tracking_id', cursor)  # make another table
                create_pre_general_table('pre_config_general', cursor)  # pre config general table
                create_pre_align_table('pre_align_template', cursor)
                create_pre_align_table('pre_align_complement', cursor)
                create_pre_align_table('pre_align_2d', cursor)
                create_align_table_raw('last_align_raw_template',
                        cursor)
                create_align_table_raw('last_align_raw_complement',
                        cursor)
                create_align_table_raw('last_align_raw_2d', cursor)

            if args.last_align is True:

                # create_align_table('last_align_basecalled_template', cursor)
                # create_align_table('last_align_basecalled_complement', cursor)
                # create_align_table('last_align_basecalled_2d', cursor)

                create_align_table_maf('last_align_maf_basecalled_template'
                        , cursor)
                create_align_table_maf('last_align_maf_basecalled_complement'
                        , cursor)
                create_align_table_maf('last_align_maf_basecalled_2d',
                        cursor)

            if args.bwa_align is True:
                create_align_table_sam('align_sam_basecalled_template',
                        cursor)
                create_align_table_sam('align_sam_basecalled_complement'
                        , cursor)
                create_align_table_sam('align_sam_basecalled_2d',
                        cursor)

            # dbcheckhash["mafoutdict"][dbname]=open(dbname+"."+process+".align.maf","w")

            if args.telem is True:
                create_ref_kmer_table('ref_sequence_kmer', cursor)

            for refname in ref_fasta_hash[dbname]['seq_len'].iterkeys():

                # print "refname", refname

                reference = ref_fasta_hash[dbname]['seq_file'][refname]
                reflen = ref_fasta_hash[dbname]['seq_len'][refname]
                reflength = ref_fasta_hash[dbname]['seq_file_len'
                        ][reference]
                refid = mysql_load_from_hashes(db, cursor,
                        'reference_seq_info', {
                    'refname': refname,
                    'reflen': reflen,
                    'reffile': reference,
                    'ref_total_len': reflength,
                    })
                ref_fasta_hash[dbname]['refid'][refname] = refid
                if args.telem is True:
                    kmers = ref_fasta_hash[dbname]['kmer'][refname]
                    load_ref_kmer_hash(db, 'ref_sequence_kmer', kmers,
                            refid, cursor)

        # ---------------------------------------------------------------------------
        # -------- See if theres any ENA XML stuff to add. 
        # -------- Need to do this now as it changes the "comment" 
        # -------- in Gru.minionRuns entry
        # print "C", comment

        ena_flowcell_owner = None
        for xml_to_downloads_path in xml_file_dict.keys():

            # xmlpath=xml_file_dict["study"][study_id]["path"]

            common_path = os.path.commonprefix((xml_to_downloads_path,
                    filepath)).rstrip('\\|\/|re')
            if common_path.endswith('downloads'):
                print 'found XML data for:', dbname
                create_xml_table('XML', cursor)

                # ---------------------------------------------------------------------------
		downloadsPath = xml_file_dict[xml_to_downloads_path]

                for study_id in \
                    downloadsPath['study'].keys():
                    ena_flowcell_owner = study_id
                    study_xml = \
                        downloadsPath['study'][study_id]['xml']
                    study_file = \
                        downloadsPath['study'][study_id]['file']
                    study_title = \
                        downloadsPath['study'][study_id]['title']
                    study_abstract = \
                        downloadsPath['study'][study_id]['abstract']
                    exp_c = 'NA'
                    samp_c = 'NA'
                    run_c = 'NA'
                    mysql_load_from_hashes(db, cursor, 'XML', {
                        'type': 'study',
                        'primary_id': study_id,
                        'filename': study_file,
                        'xml': study_xml,
                        })
                    for exp_id in \
                        downloadsPath['experiment'].keys():
                        if study_id \
                            == downloadsPath['experiment'][exp_id]['study_id']:
                            exp_c = exp_id
                            exp_xml = \
                                downloadsPath['experiment'][exp_id]['xml']
                            exp_file = \
                                downloadsPath['experiment'][exp_id]['file']
                            sample_id = \
                                downloadsPath['experiment'][exp_id]['sample_id']
                            mysql_load_from_hashes(db, cursor, 'XML', {
                                'type': 'experiment',
                                'primary_id': exp_id,
                                'filename': exp_file,
                                'xml': exp_xml,
                                })

                            if sample_id \
                                in downloadsPath['sample'
                                    ]:
                                samp_c = sample_id
                                sample_xml = \
                                    downloadsPath['sample'][sample_id]['xml']
                                sample_file = \
                                    downloadsPath['sample'][sample_id]['file']
                                mysql_load_from_hashes(db, cursor, 'XML'
                                        , {
                                    'type': 'sample',
                                    'primary_id': sample_id,
                                    'filename': sample_file,
                                    'xml': sample_xml,
                                    })

                            for run_id in \
                                downloadsPath['run'].keys():
                                if exp_id \
                                    == downloadsPath['run'][run_id]['exp_id']:
                                    run_c = run_id
                                    run_xml = \
				    	downloadsPath['run'][run_id]['xml']
                                    run_file = \
    					downloadsPath['run'][run_id]['file']
                                    mysql_load_from_hashes(db, cursor,
        'XML', {
                                        'type': 'run',
                                        'primary_id': run_id,
                                        'filename': run_file,
                                        'xml': run_xml,
                                        })
                    comments[dbname] = \
                        'ENA data. Study:%s Title: %s Abstract: %s Experiment:%s Sample:%s Run:%s' \
                        % (
                        study_id,
                        study_title,
                        study_abstract,
                        exp_c,
                        samp_c,
                        run_c,
                        )

        # ---------------------------------------------------------------------------
        # --------- Make entries in the Gru database
        # try and get the right basecall-configuration general
        file_type = check_read_type(filepath,hdf)
        #print "FILETYPE is", file_type

	try: 
         if file_type == 2:
            basecalltype="Basecall_1D" #ML
            basecalltype2="Basecall_2D"
            basecalldir=''
            basecalldirconfig=''
            basecallindexpos='' #ML
            string2='' #ML
            for x in range (0,9):
                string2 = '/Analyses/Hairpin_Split_00%s/Configuration/general' % (x) #ML
                if (string2 in hdf):
                    basecallindexpos=x #ml
                    #print "BASECALLINDEXPOS",basecallindexpos
                    basecalldirconfig=string2 #ML

            string='/Analyses/%s_00%s/Configuration/general' % (basecalltype, basecallindexpos)
            #print string
            if (string in hdf):
            #    print "YES 1"
                basecalldir='/Analyses/%s_00%s/' % (basecalltype,basecallindexpos)
                #basecallindexpos=x #ml
                #break

            string='/Analyses/%s_00%s/Configuration/general' % (basecalltype2, basecallindexpos)
            #print string
            if (string2 in hdf):
                #print "YES 2"
                basecalldir='/Analyses/%s_00%s/' % (basecalltype2,basecallindexpos)
                #basecalldirconfig=string2 #ML
                #break
	except: 
		print "checkReads(): error line 467."
		sys.exit()
        try: 
          if file_type in [1,0]:
            basecalltype = 'Basecall_1D_CDNA'
            basecalltype2 = 'Basecall_2D'
            basecalldir = ''
            basecalldirconfig = ''
            basecallindexpos=''
            for x in range(0, 9):
                string = '/Analyses/%s_00%s/Configuration/general' \
                    % (basecalltype, x)
                if string in hdf:
                    basecalldir = '/Analyses/%s_00%s/' % (basecalltype, x)
                    basecalldirconfig = string
                    basecallindexpos=x
                    break
                string = '/Analyses/%s_00%s/Configuration/general' \
                    % (basecalltype2, x)
                if string in hdf:
                    basecalldir = '/Analyses/%s_00%s/' % (basecalltype2, x)
                    basecalldirconfig = string
                    basecallindexpos=x
                    break


        # print "basecalldirconfig", basecalldirconfig
        # # get some data out of tacking_id and general
	except: 
		print "checkReads(): error line 496."
		sys.exit()
        print basecalldirconfig
        print basecalldir
        if len(basecalldirconfig) > 0:
            configdata = hdf[basecalldirconfig]
            if len(basecalldir) > 0:
                metrichor_info = hdf[basecalldir]

        # else:
        # ....configdata.attrs['workflow_name'] ="preanalysed"

        trackingid = hdf['/UniqueGlobalKey/tracking_id']

        expstarttimecode = \
            datetime.datetime.fromtimestamp(int(trackingid.attrs['exp_start_time'
                ])).strftime('%Y-%m-%d')
        flowcellid = trackingid.attrs['device_id']

        if len(basecalldirconfig) > 0:
            basecalleralg = configdata.attrs['workflow_name']
        else:
            basecalleralg = 'preanalysed'
        if len(basecalldir) > 0:
            #version = metrichor_info.attrs['chimaera version']
            try: version = metrichor_info.attrs['chimaera version'] # MS
            except: version = metrichor_info.attrs['version'] # MS
        else:
            version = 'unknown'
        runnumber = args.run_num
        flowcellowner = 'NULL'
        username = args.minotourusername
        if args.flowcell_owner is not None:
            flowcellowner = args.flowcell_owner
        if ena_flowcell_owner is not None:
            flowcellowner = ena_flowcell_owner

        # # get info on the reference sequence, if used

        big_reference = 'NOREFERENCE'
        big_reflength = '0'
        if dbname in ref_fasta_hash:  # so there's some reference data for this dbname
            big_reference = ref_fasta_hash[dbname]['big_name']
            big_reflength = ref_fasta_hash[dbname]['big_len']

        # # make entries into Gru for this new database

        comment = comments['default']
        if dbname in comments:
            comment = comments[dbname]

        process = 'noalign'
        if args.last_align is True:
            process = 'LAST'
        if args.bwa_align is True:
            process = 'BWA'

        wdir = args.watchdir
        if wdir.endswith('\\'):  # remove trailing slash for windows.
            wdir = wdir[:-1]
        sql = \
            "INSERT INTO Gru.minIONruns (date,user_name,flowcellid,runname,activeflag,comment,FlowCellOwner,RunNumber,reference,reflength,basecalleralg,version,minup_version,process,mt_ctrl_flag,watch_dir,host_ip) VALUES ('%s','%s','%s','%s',%s,'%s','%s',%s,'%s',%s,'%s','%s','%s','%s',%s,'%s','%s')" \
            % (
            expstarttimecode,
            args.minotourusername,
            flowcellid,
            dbname,
            1,
            comment,
            flowcellowner,
            runnumber,
            big_reference,
            big_reflength,
            basecalleralg,
            version,
            minup_version,
            process,
            1,
            wdir,
            ip,
            )

        #print sql

	print 'OK'

        db.escape_string(sql)
        cursor.execute(sql)
        db.commit() 
        runindex = cursor.lastrowid
        dbcheckhash['runindex'][dbname] = runindex 

        #print "Runindex:",runindex

        # # add us">> ", view_users

        if args.verbose is True:
            print "adding users."

        view_users=[username]
	
        if args.view_users:
            extra_names = args.view_users.split(',')
            # view_users = args.view_users + extra_names # MS
            view_users = view_users + extra_names # MS

        for user_name in view_users:
            sql = \
                "SELECT user_id FROM Gru.users WHERE user_name =\'%s\'" \
                % user_name

            # print sql

            cursor.execute(sql)
            if 0 < cursor.rowcount:
                sql = \
                    'INSERT INTO Gru.userrun (user_id, runindex) VALUES ((SELECT user_id FROM Gru.users WHERE user_name =\'%s\') , (SELECT runindex FROM Gru.minIONruns WHERE runname = "%s") )' \
                    % (user_name, dbname)

                # print sql

                cursor.execute(sql)
                db.commit()
            else:
                print 'The MinoTour username "%s" does not exist. Please create it or remove it from the input arguments' \
                    % user_name
                sys.exit()

        # # Create comment table if it doesn't exist

        create_comment_table_if_not_exists('Gru.comments', cursor)

        # # Add first comment to table

        start_time = time.strftime('%Y-%m-%d %H:%M:%S')
        comment_string = 'minUp version %s started' % minup_version
        mysql_load_from_hashes(db, cursor, 'Gru.comments', {
            'runindex': runindex,
            'runname': dbname,
            'user_name': args.minotourusername,
            'comment': comment_string,
            'name': args.dbusername,
            'date': start_time,
            })

        # ---------------------------------------------------------------------------
        # --------- make log file and initinal entry

        with open(dbcheckhash['logfile'][dbname], 'w') as logfilehandle:
            logfilehandle.write('minup started at:\t%s%s'
                                % (start_time, os.linesep))
            logfilehandle.write('minup version:\t%s%s'
                                % (minup_version, os.linesep))
            logfilehandle.write('options:' + os.linesep)
            logfilehandle.write('minotour db host:\t%s%s'
                                % (args.dbhost, os.linesep))
            logfilehandle.write('minotour db user:\t%s%s'
                                % (args.dbusername, os.linesep))
            logfilehandle.write('minotour username:\t%s%s'
                                % (args.minotourusername, os.linesep))
            logfilehandle.write('minotour viewer usernames:\t%s%s'
                                % (view_users, os.linesep))
            logfilehandle.write('flowcell owner:\t%s%s'
                                % (flowcellowner, os.linesep))
            logfilehandle.write('run number:\t%s%s' % (args.run_num,
                                os.linesep))
            logfilehandle.write('watch directory:\t%s%s'
                                % (args.watchdir, os.linesep))
            logfilehandle.write('upload telemetry:\t%s%s'
                                % (args.telem, os.linesep))
            logfilehandle.write('Reference Sequences:' + os.linesep)
            if dbname in ref_fasta_hash:
                for refname in ref_fasta_hash[dbname]['seq_len'
                        ].iterkeys():
                    logfilehandle.write('Fasta:\t%s\tlength:\t%d%s'
                            % (ref_fasta_hash[dbname]['seq_file'
                            ][refname], ref_fasta_hash[dbname]['seq_len'
                            ][refname], os.linesep))
            else:
                logfilehandle.write('No reference sequence set'
                                    + os.linesep)

            logfilehandle.write('comment:\t%s%s' % (comment,
                                os.linesep))
            logfilehandle.write('Errors:' + os.linesep)
            logfilehandle.close()
        if args.pin is not False:
            if args.verbose is True:
                print 'starting mincontrol'
            control_ip = ip
            if args.ip_address is not False:
                control_ip = args.ip_address

            # print "IP", control_ip
            # else the IP is the address of this machine

            create_mincontrol_interaction_table('interaction', cursor)
            create_mincontrol_messages_table('messages', cursor)
            create_mincontrol_barcode_control_table('barcode_control',
                    cursor)

            try:
                if oper is 'linux':
                    cmd = \
                        'python mincontrol.py -dbh %s -dbu %s -dbp %d -pw %s -db %s -pin %s -ip %s' \
                        % (
                        args.dbhost,
                        args.dbusername,
                        args.dbport,
                        args.dbpass,
                        dbname,
                        args.pin,
                        control_ip,
                        )

                    # print "CMD", cmd

                    subprocess.Popen(cmd, stdout=None, stderr=None,
                            stdin=None, shell=True)
                if oper is 'windows':
                    cmd = \
                        'mincontrol.exe -dbh %s -dbu %s -dbp %d -pw %s -db %s -pin %s -ip %s' \
                        % (
                        args.dbhost,
                        args.dbusername,
                        args.dbport,
                        args.dbpass,
                        dbname,
                        args.pin,
                        control_ip,
                        )

                    # print "CMD", cmd

                    subprocess.Popen(cmd, stdout=None, stderr=None,
                            stdin=None, shell=True)  # , creationflags=subprocess.CREATE_NEW_CONSOLE)
            except Exception, err:
                err_string = 'Error starting mincontrol: %s ' % err
                print >> sys.stderr, err_string
                with open(dbcheckhash['logfile'][dbname], 'a') as \
                    logfilehandle:
                    logfilehandle.write(err_string + os.linesep)
                    logfilehandle.close()
Ejemplo n.º 3
0
        comments['default'] = ' '.join(args.added_comment)  # MS
    if args.add_comment is True:
        comment = \
            raw_input('Type comment then press Enter to continue : ')
        comments['default'] = comment

    # MS -- Then, only import this if all is OK ....
    from MyHandler import MyHandler

    print 'monitor started.'
    try:
        check_read_args = connection_pool, minup_version, \
         comments, ref_fasta_hash, dbcheckhash, \
         logfolder, cursor
        observer = Observer()
        event_handler = MyHandler(dbcheckhash, oper, db, args, xml_file_dict,
                                  check_read_args, minup_version)
        observer.schedule(event_handler, path=args.watchdir, recursive=True)
        observer.start()
        while True:
            time.sleep(1)

    except (KeyboardInterrupt, SystemExit):
        print 'stopping monitor....'
        observer.stop()
        terminateSubProcesses(args, dbcheckhash, oper, minup_version)

        print "finished."
        observer.join()
        sys.exit(1)
Ejemplo n.º 4
0
    def __init__(self, dbcheckhash, oper, db, args, xml_file_dict, check_read_args, minup_version):

        self.creates, xml_file_dict = \
		file_dict_of_folder(args, xml_file_dict, args.watchdir)


        self.processed = dict()
        self.running = True

        self.rawcount = dict()
        self.rawprocessed = dict()
        self.p = multiprocessing.Pool(args.procs)
        self.kmerhashT = dict()
        self.kmerhashC = dict()
	self.args = args
	self.oper = oper
	self.db = db
	self.check_read_args = check_read_args
	self.xml_file_dict = xml_file_dict
	self.minup_version = minup_version

        t = threading.Thread(target=self.processfiles)
        t.daemon = True
        
        try:
            t.start()
        except (KeyboardInterrupt, SystemExit):
	    # MS -- Order here is critical ...
            print 'Ctrl-C entered -- exiting'  

	    t.clear() 
            t.stop() 

            self.p.close()  
            self.p.terminate()  
            terminateSubProcesses(args, dbcheckhash, oper, self.minup_version)
            exitGracefully(args, dbcheckhash, self.minup_version)
	    sys.exit(1) 


        if args.ref_fasta is not False:
            fasta_file = args.ref_fasta
            seqlen = get_seq_len(fasta_file)

            # print type(seqlen)

            if args.verbose is True: print seqlen
            shortestSeq = np.min(seqlen.values())
            if args.verbose is True: print shortestSeq
            if args.verbose is True: print args.largerRef

            if not args.largerRef and shortestSeq > 10 ** 8:
                if args.verbose is True: print "Length of references is >10^8: processing may be *EXTREMELY* slow. To overide rerun using the '-largerRef' option"  # MS
                terminateSubProcesses(args, dbcheckhash, oper, self.minup_version)
            elif not args.largerRef and shortestSeq > 10 ** 7:

                if args.verbose is True: print "Length of references is >10^7: processing may be *VERY* slow. To overide rerun using the '-largerRef' option"  # MS
                terminateSubProcesses(args, dbcheckhash, oper, self.minup_version)
            else:

                if args.verbose is True: print 'Length of references is <10^7: processing should be ok .... continuing .... '  # MS

                                                # model_file = "model.txt"
                                                # model_kmer_means=process_model_file(model_file)

	    if args.preproc is True: #  and args.prealign is True:
            	model_file_template = \
			'template.model'
            	model_file_complement = \
               	 	'complement.model'
           	model_kmer_means_template = \
                	process_model_file(args, oper, model_file_template)
            	model_kmer_means_complement = \
                	process_model_file(args, oper, model_file_complement)

                # model_kmer_means = retrieve_model()
                # global kmerhash
                # kmerhash = process_ref_fasta_raw(fasta_file,model_kmer_means)

            	self.kmerhashT = process_ref_fasta_raw(fasta_file,
                    model_kmer_means_template)
            	self.kmerhashC = process_ref_fasta_raw(fasta_file,
                    model_kmer_means_complement)
Ejemplo n.º 5
0
    def processfiles(self):
	args = self.args
	db = self.db
	oper = self.oper
	xml_file_dict = self.xml_file_dict
	connection_pool, minup_version, \
                comments, ref_fasta_hash, dbcheckhash, \
                logfolder, cursor = self.check_read_args


                                # analyser=RawAnalyser()

        everyten = 0
	customtimeout = 0

                                # if args.timeout_true is not None:
                                #               timeout=args.timeout_true

        while self.running:
            time.sleep(5)
            ts = time.time()
            if args.preproc is True:
                print datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S'
                        ), 'CACHED:', len(self.creates), 'PROCESSED:', \
                    len(self.processed), 'RAW FILES:', \
                    len(self.rawcount), 'RAW WARPED:', \
                    len(self.rawprocessed)
            else:
                print datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S'
                        ), 'CACHED:', len(self.creates), 'PROCESSED:', \
                    len(self.processed)

            if args.customup is True:
                #print "In customup"
                if len(self.creates) > 0:
                    customtimeout=0
                else:
                    customtimeout+=1
                if customtimeout > 6:
		    terminateSubProcesses(args, dbcheckhash, oper, self.minup_version)

            for (fast5file, createtime) in sorted(self.creates.items(), 
						key=lambda x: x[1]):

                # tn=time.time()

                if int(createtime) + 20 < time.time():  
		# file created 20 sec ago, so should be complete ....
                    if fast5file not in self.processed.keys():

                        try:
			  self.hdf = readFast5File(fast5file)
                          self.creates.pop(fast5file, None)
                          self.processed[fast5file] = time.time()
                          # starttime = time.time()


# ##             We want to check if this is a raw read or a basecalled read

                          self.file_type = check_read_type(fast5file,
                                    self.hdf)
		   	  #print str(("file_type: ", self.file_type) )

                            #print "Basecalled Read"
                            #print fast5file
                          if self.file_type > 0 :
                            self.db_name = check_read(
                                    db,
                                    args,
                                    connection_pool,
                                    minup_version,
                                    comments,
                                    xml_file_dict,
                                    ref_fasta_hash,
                                    dbcheckhash,
                                    logfolder,
                                    fast5file,
                                    self.hdf,
                                    cursor,
                                    oper,
                                    )
                            process_fast5(
				    oper,
                                    db,
                                    connection_pool,
                                    args,
                                    ref_fasta_hash,
                                    dbcheckhash,
                                    fast5file,
                                    self.hdf,
                                    self.db_name,
                                    cursor,
                                    )
                          else:
                            #print "Not Basecalled"
                            #print fast5file
                            self.db_name = check_read(
                                    db,
                                    args,
                                    connection_pool,
                                    minup_version,
                                    comments,
                                    xml_file_dict,
                                    ref_fasta_hash,
                                    dbcheckhash,
                                    logfolder,
                                    fast5file,
                                    self.hdf,
                                    cursor,
				    oper,
                                    )
                            self.rawcount[fast5file] = time.time()
                            rawbasename_id = process_fast5_raw(
                                    db,
                                    args,
                                    fast5file,
                                    self.hdf,
                                    self.db_name,
                                    cursor,
                                    )

                            # analyser.apply_async_with_callback(fast5file,rawbasename_id,self.db_name)

			    if args.prealign is True:
                        	print "prealigning", fast5file
                            	x = \
                                    self.apply_async_with_callback(fast5file,
                                        rawbasename_id, self.db_name)
                            	if args.verbose is True: print x  # x.get()
                        	print "prealign finished ", fast5file
			except Exception, err:
			    if self.hdf: # CI
                                self.hdf.close() # CI


                            # print "This is a pre basecalled file"

                            print "MyHandler(): except -- "+ fast5file 
                            err_string = \
                                'Error with fast5 file: %s : %s' \
                                % (fast5file, err)
                            #print >> sys.stderr, err_string
                            print err_string

			'''
                                                                                                #               if dbname is not None:
                                                                                                #                               if dbname in dbcheckhash["dbname"]:
                                                                                                #                                               with open(dbcheckhash["logfile"][dbname],"a") as logfilehandle:
                                                                                                #                                                               logfilehandle.write(err_string+os.linesep)
                                                                                                # s                                                              logfilehandle.close()
			'''

                        everyten += 1
                        if everyten == 10:
                            tm = time.time()
                            if ts + 5 < tm:  # just to stop it printing two status messages one after the other.
                                if args.preproc is True:
                                    print datetime.datetime.fromtimestamp(tm).strftime('%Y-%m-%d %H:%M:%S'
        ), 'CACHED:', len(self.creates), 'PROCESSED:', \
    len(self.processed), 'RAW FILES:', len(self.rawcount), \
    'RAW WARPED:', len(self.rawprocessed)
                                else:
                                    print datetime.datetime.fromtimestamp(tm).strftime('%Y-%m-%d %H:%M:%S'
        ), 'CACHED:', len(self.creates), 'PROCESSED:', \
    len(self.processed)
                            everyten = 0
Ejemplo n.º 6
0
            raw_input('Type comment then press Enter to continue : ')
        comments['default'] = comment


    # MS -- Then, only import this if all is OK ....
    from MyHandler import MyHandler

    print 'monitor started.'
    try:
	check_read_args = connection_pool, minup_version, \
		comments, ref_fasta_hash, dbcheckhash, \
		logfolder, cursor
        observer = Observer()
        event_handler = MyHandler(dbcheckhash, oper, db, args, xml_file_dict, check_read_args, minup_version)
        observer.schedule(event_handler, path=args.watchdir, recursive=True)
        observer.start()
        while True:
            time.sleep(1)

    except (KeyboardInterrupt, SystemExit):
        print 'stopping monitor....'
        observer.stop()
        terminateSubProcesses(args, dbcheckhash, oper, minup_version)
	
	print "finished."
        observer.join()
	sys.exit(1)



Ejemplo n.º 7
0
def check_read(
    db,
    args,
    connection_pool,
    minup_version,
    comments,
    xml_file_dict,
    ref_fasta_hash,
    dbcheckhash,
    logfolder,
    filepath,
    hdf,
    cursor,
    oper,
):

    global runindex

    filename = os.path.basename(filepath)
    if args.verbose is True:
        print time.strftime('%Y-%m-%d %H:%M:%S'), 'processing:', \
            filename
    parts = filename.split('_')
    str = '_'

    # Changing the number below enables the removal of the random four digit number from run names on restart

    dbname = str.join(parts[0:len(parts) - 5])
    dbname = re.sub('[.!,; ]', '', dbname)
    if len(args.custom_name) > 0:
        dbname = args.minotourusername + '_' + args.custom_name + '_' \
         + dbname
    else:
        dbname = args.minotourusername + '_' + dbname
    if len(dbname) > 64:
        dbname = dbname[:64]
    if dbname.endswith('_'):  #ml
        dbname = dbname[:-1]  #ml

    # print "dbname is ",dbname
    # print "Parts were " ,parts

    # ---------------------------------------------------------------------------

    if dbname in dbcheckhash[
            'dbname']:  # so data from this run has been seen before in this instance of minup so switch to it!
        if dbcheckhash['dbname'][dbname] is False:
            if args.verbose is True:
                print 'switching to database: ', dbname
            sql = 'USE %s' % dbname
            cursor.execute(sql)

            # ---------------------------------------------------------------------------

            try:
                runindex = dbcheckhash['runindex'][dbname]  # MS ..
            except:
                print "checkRead(): line 112, dbcheckhash, key error: " \
    + dbname
                #sys.exit()
                return ()

            comment_string = 'minUp switched runname'
            start_time = time.strftime('%Y-%m-%d %H:%M:%S')
            sql = \
                "INSERT INTO Gru.comments (runindex,runname,user_name,comment,name,date) VALUES (%s,'%s','%s','%s','%s','%s')" \
                % (
                runindex,
                dbname,
                args.minotourusername,
                comment_string,
                args.minotourusername,
                start_time,
                )

            # print sql

            db.escape_string(sql)
            cursor.execute(sql)
            db.commit()

            # ---------------------------------------------------------------------------

            for e in dbcheckhash['dbname'].keys():
                dbcheckhash['dbname'][e] = False
            dbcheckhash['dbname'][dbname] = True

    # ---------------------------------------------------------------------------

    if dbname not in dbcheckhash[
            'dbname']:  # # so the db has not been seen before.. time to set up lots of things...
        dbcheckhash['barcoded'][dbname] = False
        dbcheckhash['barcode_info'][dbname] = False
        dbcheckhash['logfile'][dbname] = os.path.join(os.path.sep, logfolder,
                                                      dbname + '.minup.log')
        if args.verbose is True:
            print 'trying database: ', dbname
        sql = "SHOW DATABASES LIKE \'%s\'" % dbname

        # print sql

        cursor.execute(sql)
        if cursor.fetchone():
            if args.verbose is True:
                print 'database exists!'

            # # drop the existing database, if selected

            if args.drop_db is True:
                sql = 'DROP DATABASE %s' % dbname

                # print sql

                cursor.execute(sql)
                db.commit()
                if args.verbose is True:
                    print 'database dropped.'
            else:
                print >> sys.stderr, \
                    '%s run database already exists. To write over the data re-run the minUP command with option -d' \
                    % dbname
                if args.batch_fasta == False:

                    # MS next 6 lines ...

                    print >> sys.stderr, \
                        'not in batch mode so exiting ...'
                    terminateSubProcesses(args, dbcheckhash, oper,
                                          minup_version)

        if args.drop_db is True:
            print 'deleting exisiting run from Gru now.'
            sql = \
                'DELETE FROM Gru.userrun WHERE runindex IN (SELECT runindex FROM Gru.minIONruns WHERE runname = "%s")' \
                % dbname

            # print sql

            cursor.execute(sql)
            db.commit()
            sql = "DELETE FROM Gru.minIONruns WHERE runname = \'%s\'" \
                % dbname

            # print sql

            cursor.execute(sql)
            db.commit()

        # -------- mincontrol --------
        # # get the IP address of the host

        ip = '127.0.0.1'
        try:
            ip = socket.gethostbyname(socket.gethostname())
        except Exception, err:
            err_string = 'Error obtaining upload IP adress'
            #print >> sys.stderr, err_string
            print err_string

        # ---------------------------------------------------------------------------
        # -------- This bit adds columns to Gru.minIONruns --------

        modify_gru(cursor)

        # ---------------------------------------------------------------------------

        # -------- Create a new empty database

        if args.verbose is True:
            print 'making new database: ', dbname

        sql = 'CREATE DATABASE %s' % dbname
        cursor.execute(sql)
        sql = 'USE %s' % dbname
        cursor.execute(sql)

        # Create Tables ....
        create_general_table('config_general', cursor)
        create_trackingid_table('tracking_id', cursor)
        create_basecall_summary_info('basecall_summary', cursor)
        create_events_model_fastq_table('basecalled_template', cursor)
        create_events_model_fastq_table('basecalled_complement', cursor)
        create_basecalled2d_fastq_table('basecalled_2d', cursor)

        # ---------------------------------------------------------------------------

        if args.telem is True:
            for i in xrange(0, 10):
                temptable = 'caller_basecalled_template_%d' % i
                comptable = 'caller_basecalled_complement_%d' % i
                twod_aligntable = 'caller_basecalled_2d_alignment_%d' \
                    % i
                create_caller_table_noindex(temptable, cursor)
                create_caller_table_noindex(comptable, cursor)
                create_2d_alignment_table(twod_aligntable, cursor)
            create_model_list_table('model_list', cursor)
            create_model_data_table('model_data', cursor)

        # ---------------------------------------------------------------------------
        # -------- Assign the correct reference fasta for this dbname if applicable

        if args.batch_fasta is not False:
            for refbasename in ref_fasta_hash.keys():
                common_path = \
                    os.path.commonprefix((ref_fasta_hash[refbasename]['path'
                        ], filepath)).rstrip('\\|\/|re|\\re|\/re')
                if common_path.endswith('downloads'):
                    ref_fasta_hash[dbname] = ref_fasta_hash[refbasename]

                    # del ref_fasta_hash[refbasename]

        if args.ref_fasta is not False:
            for refbasename in ref_fasta_hash.keys(
            ):  # there should only be one key
                ref_fasta_hash[dbname] = ref_fasta_hash[refbasename]

        # ---------------------------------------------------------------------------

        if dbname in ref_fasta_hash:  # great, we assigned the reference fasta to this dbname
            create_reference_table('reference_seq_info', cursor)
            create_5_3_prime_align_tables('last_align_basecalled_template',
                                          cursor)
            create_5_3_prime_align_tables('last_align_basecalled_complement',
                                          cursor)
            create_5_3_prime_align_tables('last_align_basecalled_2d', cursor)

            if args.preproc is True:
                create_pretrackingid_table('pre_tracking_id',
                                           cursor)  # make another table
                create_pre_general_table('pre_config_general',
                                         cursor)  # pre config general table
                create_pre_align_table('pre_align_template', cursor)
                create_pre_align_table('pre_align_complement', cursor)
                create_pre_align_table('pre_align_2d', cursor)
                create_align_table_raw('last_align_raw_template', cursor)
                create_align_table_raw('last_align_raw_complement', cursor)
                create_align_table_raw('last_align_raw_2d', cursor)

            if args.last_align is True:

                # create_align_table('last_align_basecalled_template', cursor)
                # create_align_table('last_align_basecalled_complement', cursor)
                # create_align_table('last_align_basecalled_2d', cursor)

                create_align_table_maf('last_align_maf_basecalled_template',
                                       cursor)
                create_align_table_maf('last_align_maf_basecalled_complement',
                                       cursor)
                create_align_table_maf('last_align_maf_basecalled_2d', cursor)

            if args.bwa_align is True:
                create_align_table_sam('align_sam_basecalled_template', cursor)
                create_align_table_sam('align_sam_basecalled_complement',
                                       cursor)
                create_align_table_sam('align_sam_basecalled_2d', cursor)

            # dbcheckhash["mafoutdict"][dbname]=open(dbname+"."+process+".align.maf","w")

            if args.telem is True:
                create_ref_kmer_table('ref_sequence_kmer', cursor)

            for refname in ref_fasta_hash[dbname]['seq_len'].iterkeys():

                # print "refname", refname

                reference = ref_fasta_hash[dbname]['seq_file'][refname]
                reflen = ref_fasta_hash[dbname]['seq_len'][refname]
                reflength = ref_fasta_hash[dbname]['seq_file_len'][reference]
                refid = mysql_load_from_hashes(
                    db, cursor, 'reference_seq_info', {
                        'refname': refname,
                        'reflen': reflen,
                        'reffile': reference,
                        'ref_total_len': reflength,
                    })
                ref_fasta_hash[dbname]['refid'][refname] = refid
                if args.telem is True:
                    kmers = ref_fasta_hash[dbname]['kmer'][refname]
                    load_ref_kmer_hash(db, 'ref_sequence_kmer', kmers, refid,
                                       cursor)

        # ---------------------------------------------------------------------------
        # -------- See if theres any ENA XML stuff to add.
        # -------- Need to do this now as it changes the "comment"
        # -------- in Gru.minionRuns entry
        # print "C", comment

        ena_flowcell_owner = None
        for xml_to_downloads_path in xml_file_dict.keys():

            # xmlpath=xml_file_dict["study"][study_id]["path"]

            common_path = os.path.commonprefix(
                (xml_to_downloads_path, filepath)).rstrip('\\|\/|re')
            if common_path.endswith('downloads'):
                print 'found XML data for:', dbname
                create_xml_table('XML', cursor)

                # ---------------------------------------------------------------------------
                downloadsPath = xml_file_dict[xml_to_downloads_path]

                for study_id in \
                    downloadsPath['study'].keys():
                    ena_flowcell_owner = study_id
                    study_xml = \
                        downloadsPath['study'][study_id]['xml']
                    study_file = \
                        downloadsPath['study'][study_id]['file']
                    study_title = \
                        downloadsPath['study'][study_id]['title']
                    study_abstract = \
                        downloadsPath['study'][study_id]['abstract']
                    exp_c = 'NA'
                    samp_c = 'NA'
                    run_c = 'NA'
                    mysql_load_from_hashes(
                        db, cursor, 'XML', {
                            'type': 'study',
                            'primary_id': study_id,
                            'filename': study_file,
                            'xml': study_xml,
                        })
                    for exp_id in \
                        downloadsPath['experiment'].keys():
                        if study_id \
                            == downloadsPath['experiment'][exp_id]['study_id']:
                            exp_c = exp_id
                            exp_xml = \
                                downloadsPath['experiment'][exp_id]['xml']
                            exp_file = \
                                downloadsPath['experiment'][exp_id]['file']
                            sample_id = \
                                downloadsPath['experiment'][exp_id]['sample_id']
                            mysql_load_from_hashes(
                                db, cursor, 'XML', {
                                    'type': 'experiment',
                                    'primary_id': exp_id,
                                    'filename': exp_file,
                                    'xml': exp_xml,
                                })

                            if sample_id \
                                in downloadsPath['sample'
                                    ]:
                                samp_c = sample_id
                                sample_xml = \
                                    downloadsPath['sample'][sample_id]['xml']
                                sample_file = \
                                    downloadsPath['sample'][sample_id]['file']
                                mysql_load_from_hashes(
                                    db, cursor, 'XML', {
                                        'type': 'sample',
                                        'primary_id': sample_id,
                                        'filename': sample_file,
                                        'xml': sample_xml,
                                    })

                            for run_id in \
                                downloadsPath['run'].keys():
                                if exp_id \
                                    == downloadsPath['run'][run_id]['exp_id']:
                                    run_c = run_id
                                    run_xml = \
         downloadsPath['run'][run_id]['xml']
                                    run_file = \
         downloadsPath['run'][run_id]['file']
                                    mysql_load_from_hashes(
                                        db, cursor, 'XML', {
                                            'type': 'run',
                                            'primary_id': run_id,
                                            'filename': run_file,
                                            'xml': run_xml,
                                        })
                    comments[dbname] = \
                        'ENA data. Study:%s Title: %s Abstract: %s Experiment:%s Sample:%s Run:%s' \
                        % (
                        study_id,
                        study_title,
                        study_abstract,
                        exp_c,
                        samp_c,
                        run_c,
                        )

        # ---------------------------------------------------------------------------
        # --------- Make entries in the Gru database
        # try and get the right basecall-configuration general
        file_type = check_read_type(filepath, hdf)
        #print "FILETYPE is", file_type

        try:
            if file_type == 2:
                basecalltype = "Basecall_1D"  #ML
                basecalltype2 = "Basecall_2D"
                basecalldir = ''
                basecalldirconfig = ''
                basecallindexpos = ''  #ML
                string2 = ''  #ML
                for x in range(0, 9):
                    string2 = '/Analyses/Hairpin_Split_00%s/Configuration/general' % (
                        x)  #ML
                    if (string2 in hdf):
                        basecallindexpos = x  #ml
                        #print "BASECALLINDEXPOS",basecallindexpos
                        basecalldirconfig = string2  #ML

                string = '/Analyses/%s_00%s/Configuration/general' % (
                    basecalltype, basecallindexpos)
                #print string
                if (string in hdf):
                    #    print "YES 1"
                    basecalldir = '/Analyses/%s_00%s/' % (basecalltype,
                                                          basecallindexpos)
                    #basecallindexpos=x #ml
                    #break

                string = '/Analyses/%s_00%s/Configuration/general' % (
                    basecalltype2, basecallindexpos)
                #print string
                if (string2 in hdf):
                    #print "YES 2"
                    basecalldir = '/Analyses/%s_00%s/' % (basecalltype2,
                                                          basecallindexpos)
                    #basecalldirconfig=string2 #ML
                    #break
        except:
            print "checkReads(): error line 467."
            sys.exit()
        try:
            if file_type in [1, 0]:
                basecalltype = 'Basecall_1D_CDNA'
                basecalltype2 = 'Basecall_2D'
                basecalldir = ''
                basecalldirconfig = ''
                basecallindexpos = ''
                for x in range(0, 9):
                    string = '/Analyses/%s_00%s/Configuration/general' \
                        % (basecalltype, x)
                    if string in hdf:
                        basecalldir = '/Analyses/%s_00%s/' % (basecalltype, x)
                        basecalldirconfig = string
                        basecallindexpos = x
                        break
                    string = '/Analyses/%s_00%s/Configuration/general' \
                        % (basecalltype2, x)
                    if string in hdf:
                        basecalldir = '/Analyses/%s_00%s/' % (basecalltype2, x)
                        basecalldirconfig = string
                        basecallindexpos = x
                        break

        # print "basecalldirconfig", basecalldirconfig
        # # get some data out of tacking_id and general
        except:
            print "checkReads(): error line 496."
            sys.exit()
        print basecalldirconfig
        print basecalldir
        if len(basecalldirconfig) > 0:
            configdata = hdf[basecalldirconfig]
            if len(basecalldir) > 0:
                metrichor_info = hdf[basecalldir]

        # else:
        # ....configdata.attrs['workflow_name'] ="preanalysed"

        trackingid = hdf['/UniqueGlobalKey/tracking_id']

        expstarttimecode = \
            datetime.datetime.fromtimestamp(int(trackingid.attrs['exp_start_time'
                ])).strftime('%Y-%m-%d')
        flowcellid = trackingid.attrs['device_id']

        if len(basecalldirconfig) > 0:
            basecalleralg = configdata.attrs['workflow_name']
        else:
            basecalleralg = 'preanalysed'
        if len(basecalldir) > 0:
            #version = metrichor_info.attrs['chimaera version']
            try:
                version = metrichor_info.attrs['chimaera version']  # MS
            except:
                version = metrichor_info.attrs['version']  # MS
        else:
            version = 'unknown'
        runnumber = args.run_num
        flowcellowner = 'NULL'
        username = args.minotourusername
        if args.flowcell_owner is not None:
            flowcellowner = args.flowcell_owner
        if ena_flowcell_owner is not None:
            flowcellowner = ena_flowcell_owner

        # # get info on the reference sequence, if used

        big_reference = 'NOREFERENCE'
        big_reflength = '0'
        if dbname in ref_fasta_hash:  # so there's some reference data for this dbname
            big_reference = ref_fasta_hash[dbname]['big_name']
            big_reflength = ref_fasta_hash[dbname]['big_len']

        # # make entries into Gru for this new database

        comment = comments['default']
        if dbname in comments:
            comment = comments[dbname]

        process = 'noalign'
        if args.last_align is True:
            process = 'LAST'
        if args.bwa_align is True:
            process = 'BWA'

        wdir = args.watchdir
        if wdir.endswith('\\'):  # remove trailing slash for windows.
            wdir = wdir[:-1]
        sql = \
            "INSERT INTO Gru.minIONruns (date,user_name,flowcellid,runname,activeflag,comment,FlowCellOwner,RunNumber,reference,reflength,basecalleralg,version,minup_version,process,mt_ctrl_flag,watch_dir,host_ip) VALUES ('%s','%s','%s','%s',%s,'%s','%s',%s,'%s',%s,'%s','%s','%s','%s',%s,'%s','%s')" \
            % (
            expstarttimecode,
            args.minotourusername,
            flowcellid,
            dbname,
            1,
            comment,
            flowcellowner,
            runnumber,
            big_reference,
            big_reflength,
            basecalleralg,
            version,
            minup_version,
            process,
            1,
            wdir,
            ip,
            )

        #print sql

        print 'OK'

        db.escape_string(sql)
        cursor.execute(sql)
        db.commit()
        runindex = cursor.lastrowid
        dbcheckhash['runindex'][dbname] = runindex

        #print "Runindex:",runindex

        # # add us">> ", view_users

        if args.verbose is True:
            print "adding users."

        view_users = [username]

        if args.view_users:
            extra_names = args.view_users.split(',')
            # view_users = args.view_users + extra_names # MS
            view_users = view_users + extra_names  # MS

        for user_name in view_users:
            sql = \
                "SELECT user_id FROM Gru.users WHERE user_name =\'%s\'" \
                % user_name

            # print sql

            cursor.execute(sql)
            if 0 < cursor.rowcount:
                sql = \
                    'INSERT INTO Gru.userrun (user_id, runindex) VALUES ((SELECT user_id FROM Gru.users WHERE user_name =\'%s\') , (SELECT runindex FROM Gru.minIONruns WHERE runname = "%s") )' \
                    % (user_name, dbname)

                # print sql

                cursor.execute(sql)
                db.commit()
            else:
                print 'The MinoTour username "%s" does not exist. Please create it or remove it from the input arguments' \
                    % user_name
                sys.exit()

        # # Create comment table if it doesn't exist

        create_comment_table_if_not_exists('Gru.comments', cursor)

        # # Add first comment to table

        start_time = time.strftime('%Y-%m-%d %H:%M:%S')
        comment_string = 'minUp version %s started' % minup_version
        mysql_load_from_hashes(
            db, cursor, 'Gru.comments', {
                'runindex': runindex,
                'runname': dbname,
                'user_name': args.minotourusername,
                'comment': comment_string,
                'name': args.dbusername,
                'date': start_time,
            })

        # ---------------------------------------------------------------------------
        # --------- make log file and initinal entry

        with open(dbcheckhash['logfile'][dbname], 'w') as logfilehandle:
            logfilehandle.write('minup started at:\t%s%s' %
                                (start_time, os.linesep))
            logfilehandle.write('minup version:\t%s%s' %
                                (minup_version, os.linesep))
            logfilehandle.write('options:' + os.linesep)
            logfilehandle.write('minotour db host:\t%s%s' %
                                (args.dbhost, os.linesep))
            logfilehandle.write('minotour db user:\t%s%s' %
                                (args.dbusername, os.linesep))
            logfilehandle.write('minotour username:\t%s%s' %
                                (args.minotourusername, os.linesep))
            logfilehandle.write('minotour viewer usernames:\t%s%s' %
                                (view_users, os.linesep))
            logfilehandle.write('flowcell owner:\t%s%s' %
                                (flowcellowner, os.linesep))
            logfilehandle.write('run number:\t%s%s' %
                                (args.run_num, os.linesep))
            logfilehandle.write('watch directory:\t%s%s' %
                                (args.watchdir, os.linesep))
            logfilehandle.write('upload telemetry:\t%s%s' %
                                (args.telem, os.linesep))
            logfilehandle.write('Reference Sequences:' + os.linesep)
            if dbname in ref_fasta_hash:
                for refname in ref_fasta_hash[dbname]['seq_len'].iterkeys():
                    logfilehandle.write(
                        'Fasta:\t%s\tlength:\t%d%s' %
                        (ref_fasta_hash[dbname]['seq_file'][refname],
                         ref_fasta_hash[dbname]['seq_len'][refname],
                         os.linesep))
            else:
                logfilehandle.write('No reference sequence set' + os.linesep)

            logfilehandle.write('comment:\t%s%s' % (comment, os.linesep))
            logfilehandle.write('Errors:' + os.linesep)
            logfilehandle.close()
        if args.pin is not False:
            if args.verbose is True:
                print 'starting mincontrol'
            control_ip = ip
            if args.ip_address is not False:
                control_ip = args.ip_address

            # print "IP", control_ip
            # else the IP is the address of this machine

            create_mincontrol_interaction_table('interaction', cursor)
            create_mincontrol_messages_table('messages', cursor)
            create_mincontrol_barcode_control_table('barcode_control', cursor)

            try:
                if oper is 'linux':
                    cmd = \
                        'python mincontrol.py -dbh %s -dbu %s -dbp %d -pw %s -db %s -pin %s -ip %s' \
                        % (
                        args.dbhost,
                        args.dbusername,
                        args.dbport,
                        args.dbpass,
                        dbname,
                        args.pin,
                        control_ip,
                        )

                    # print "CMD", cmd

                    subprocess.Popen(cmd,
                                     stdout=None,
                                     stderr=None,
                                     stdin=None,
                                     shell=True)
                if oper is 'windows':
                    cmd = \
                        'mincontrol.exe -dbh %s -dbu %s -dbp %d -pw %s -db %s -pin %s -ip %s' \
                        % (
                        args.dbhost,
                        args.dbusername,
                        args.dbport,
                        args.dbpass,
                        dbname,
                        args.pin,
                        control_ip,
                        )

                    # print "CMD", cmd

                    subprocess.Popen(
                        cmd, stdout=None, stderr=None, stdin=None, shell=True
                    )  # , creationflags=subprocess.CREATE_NEW_CONSOLE)
            except Exception, err:
                err_string = 'Error starting mincontrol: %s ' % err
                print >> sys.stderr, err_string
                with open(dbcheckhash['logfile'][dbname], 'a') as \
                    logfilehandle:
                    logfilehandle.write(err_string + os.linesep)
                    logfilehandle.close()