Ejemplo n.º 1
0
def loadfile(dfile, linect, dformat):
    '''Load linect lines from labeled data in dfile according to dformat.
    Return a list of DataPoint instances.

    dfile   -- file name
                -- one data point per line
                -- values are separated by commas
                -- final value is the label

    dformat -- data format list
                -- tuples with a function to coerce the vaulue and a function
                   which returns false if the value is invalid
                   eg:
                   >>> fmt = 48 * [(float, lambda x: 0 <= x <= 100   )] + \
                              6 * [(int,   lambda x: 0 < x           )] + \
                              1 * [(int,   lambda x: x == 1 or x == 0)]

    '''
    print 'Loading "{}"'.format(dfile)
    data = []
    with open(dfile, mode='rb') as fd:
        with pg.Progress(linect, 2, pg.bar('Lines', 32)) as pr:
            for line in fd:
                # clean the data & create the datapoint
                cleandata = [loadfeature(raw.strip(), fmt) \
                             for raw, fmt in zip(line.split(','), dformat)]
                data.append(DataPoint(cleandata[:-1], cleandata[-1]))
                # indicate progress
                try:
                    pr.next()
                except TypeError:
                    break
    return data
Ejemplo n.º 2
0
def bulk_insert(db, label, data, into):
    label = f"Creating {len(data)} {label}"
    pbar = bar(label, len(data))

    while data:
        chunk = data[:1000]
        data = data[1000:]
        db.bulk_insert_mappings(into, chunk)
        db.commit()
        pbar.next(len(chunk))
    pbar.finish()
def run(X,y,C,kernel):
    n = len(X)
    y_pred = np.zeros(n,int)
    
    for i in range(n):
        all_except_i = list(range(i)) + list(range(i+1,n))
        X_train = X[all_except_i]
        y_train = y[all_except_i]
        alg = SVC(C=C, kernel=kernel)
        alg.fit(X_train, y_train)
        loner = X[i]
        loner = loner.reshape(1, -1)
        y_pred[i] = alg.predict(loner)
        #print (y_pred[i])

        #progress bar
        progress.bar(i+1,n,"Performing Leave One Out Cross Validation")

    err = np.mean(y!=y_pred)
    print ("LEAVE ONE OUT: err=", err)
    return y_pred
Ejemplo n.º 4
0
 def from_dataset(cls, dataset):
     stumps = []
     print 'Making distinct stumps and caching mistakes.'
     maxstumps = len(dataset[0].features) * (len(dataset) - 1 + 2)
     with pg.Progress(maxstumps, 2, pg.bar('Stumps', 32)) as p:
         # generate pairs of (index, feature vector)
         for i, fv in enumerate(it.izip(*(dp.features for dp in dataset))):
             for t in cls.thresholds(fv):
                 s = i, t, Stump.mistakes((i, t, []), dataset)
                 stumps.append(s)
                 p.next()
     return stumps
Ejemplo n.º 5
0
 def from_dataset(cls, dataset):
     stumps = []
     print 'Making distinct stumps and caching mistakes.'
     maxstumps = len(dataset[0].features) * (len(dataset) - 1 + 2)
     with pg.Progress(maxstumps, 2, pg.bar('Stumps', 32)) as p:
         # generate pairs of (index, feature vector)
         for i, fv in enumerate(it.izip(*(dp.features for dp in dataset))):
             for t in cls.thresholds(fv):
                 s = i, t, Stump.mistakes((i, t, []), dataset)
                 stumps.append(s)
                 p.next()
     return stumps
Ejemplo n.º 6
0
def main():
    """Main loop"""
    start_time = time.time()
    options, arg = interface()
    motd()
    print "Started: ", time.strftime("%a %b %d, %Y  %H:%M:%S", time.localtime(start_time))
    conf = ConfigParser.ConfigParser()
    conf.read(options.conf)
    # build our configuration
    params = Parameters(conf)
    conn = MySQLdb.connect(user=params.user, passwd=params.pwd, db=params.db)
    cur = conn.cursor()
    # crank out a new table for the data
    createSeqTable(cur)
    conn.commit()
    seqcount = sequenceCount(conf.get("Input", "sequence"))
    sequence = QualityIO.PairedFastaQualIterator(
        open(conf.get("Input", "sequence"), "rU"), open(conf.get("Input", "qual"), "rU")
    )
    # pdb.set_trace()
    if conf.getboolean("Multiprocessing", "MULTIPROCESSING"):
        # get num processors
        n_procs = conf.get("Multiprocessing", "processors")
        if n_procs == "Auto":
            # we'll use x-1 cores (where x = avail. cores)
            n_procs = multiprocessing.cpu_count() - 1
        else:
            n_procs = int(n_procs)
        print "Multiprocessing.  Number of processors = ", n_procs
        # to test with fewer sequences
        # count = 0
        try:
            threads = []
            pb = progress.bar(0, seqcount, 60)
            pb_inc = 0
            while sequence:
                if len(threads) < n_procs:
                    p = multiprocessing.Process(target=linkerWorker, args=(sequence.next(), params))
                    p.start()
                    threads.append(p)
                    if (pb_inc + 1) % 1000 == 0:
                        pb.__call__(pb_inc + 1)
                    elif pb_inc + 1 == seqcount:
                        pb.__call__(pb_inc + 1)
                    pb_inc += 1
                else:
                    for t in threads:
                        if not t.is_alive():
                            threads.remove(t)
        except StopIteration:
            pass
    else:
        print "Not using multiprocessing"
        count = 0
        try:
            pb = progress.bar(0, seqcount, 60)
            pb_inc = 0
            # while count < 1000:
            while sequence:
                # count +=1
                linkerWorker(sequence.next(), params)
                if (pb_inc + 1) % 1000 == 0:
                    pb.__call__(pb_inc + 1)
                elif pb_inc + 1 == seqcount:
                    pb.__call__(pb_inc + 1)
                pb_inc += 1
        except StopIteration:
            pass
    print "\n"
    cur.close()
    conn.close()
    end_time = time.time()
    print "Ended: ", time.strftime("%a %b %d, %Y  %H:%M:%S", time.localtime(end_time))
    print "\nTime for execution: ", (end_time - start_time) / 60, "minutes"
Ejemplo n.º 7
0
    test_x = tests[index][0]
    test_y = tests[index][-1][CFGS["DATA"][y_tag]]

    # dimensionality reduction
    if dec_flag:
        decomodel,train_x_ = dimensionality_reduction(train_x, n_dim=6, method="TruncatedSVD")
        test_x_ = decomodel.transform(test_x)
    else:
        train_x_,test_x_ = train_x, test_x

    # sklearn model
    best_score = 0.0
    best_model = None
    fit_time   = 0.0
    pred_time  = 0.0
    for regressor in bar(regressors):
        with Timer() as t:
            model = regressor()
            model.fit(train_x_, train_y)
        fit_time = t.toc()
        y_true = test_y.to_numpy().astype(float)
        with Timer() as t:
            y_pred = model.predict(test_x_)
        pred_time = t.toc()
        accuracy = calc_accuracy(y_true, y_pred, display=True)

        result[y_tag]["info"][regressor] = {
            "r2_score":  accuracy["score"]["r2_score"],
            "fit_time":  fit_time,
            "pred_time": pred_time,
        }
Ejemplo n.º 8
0
        value = ''
        for v in np.reshape(H_ba, (9)):
            value += str(v) + ','
        f_ba.write(idxn + value + '\n')

        value = ''
        for v in np.reshape(delta_AB, (8)):
            value += str(v) + ','
        fd_ab.write(idxn + value + '\n')

        value = ''
        for v in np.reshape(delta_BA, (8)):
            value += str(v) + ','
        fd_ba.write(idxn + value + '\n')

        save_file_name = str(idx).zfill(8) + '.png'
        A_save_path = os.path.join(Image_A_dir, save_file_name)
        B_save_path = os.path.join(Image_B_dir, save_file_name)
        cv2.imwrite(A_save_path, img_A)
        cv2.imwrite(B_save_path, img_B)

        bar(0, idx, num)

    f_ab.close()
    f_ba.close()
    fd_ab.close()
    fd_ba.close()
    fidxn.close()

#%%
Ejemplo n.º 9
0
def main():
    '''Main loop'''
    start_time = time.time()
    options, arg = interface()
    motd()
    print 'Started: ', time.strftime("%a %b %d, %Y  %H:%M:%S",
                                     time.localtime(start_time))
    conf = ConfigParser.ConfigParser()
    conf.read(options.conf)
    # build our configuration
    params = Parameters(conf)
    conn = MySQLdb.connect(user=params.user, passwd=params.pwd, db=params.db)
    cur = conn.cursor()
    # crank out a new table for the data
    createSeqTable(cur)
    conn.commit()
    seqcount = sequenceCount(conf.get('Input', 'sequence'))
    sequence = QualityIO.PairedFastaQualIterator(
        open(conf.get('Input', 'sequence'), "rU"),
        open(conf.get('Input', 'qual'), "rU"))
    #pdb.set_trace()
    if conf.getboolean('Multiprocessing', 'MULTIPROCESSING'):
        # get num processors
        n_procs = conf.get('Multiprocessing', 'processors')
        if n_procs == 'Auto':
            # we'll use x-1 cores (where x = avail. cores)
            n_procs = multiprocessing.cpu_count() - 1
        else:
            n_procs = int(n_procs)
        print 'Multiprocessing.  Number of processors = ', n_procs
        # to test with fewer sequences
        #count = 0
        try:
            threads = []
            pb = progress.bar(0, seqcount, 60)
            pb_inc = 0
            while sequence:
                if len(threads) < n_procs:
                    p = multiprocessing.Process(target=linkerWorker,
                                                args=(
                                                    sequence.next(),
                                                    params,
                                                ))
                    p.start()
                    threads.append(p)
                    if (pb_inc + 1) % 1000 == 0:
                        pb.__call__(pb_inc + 1)
                    elif pb_inc + 1 == seqcount:
                        pb.__call__(pb_inc + 1)
                    pb_inc += 1
                else:
                    for t in threads:
                        if not t.is_alive():
                            threads.remove(t)
        except StopIteration:
            pass
    else:
        print 'Not using multiprocessing'
        count = 0
        try:
            pb = progress.bar(0, seqcount, 60)
            pb_inc = 0
            #while count < 1000:
            while sequence:
                #count +=1
                linkerWorker(sequence.next(), params)
                if (pb_inc + 1) % 1000 == 0:
                    pb.__call__(pb_inc + 1)
                elif pb_inc + 1 == seqcount:
                    pb.__call__(pb_inc + 1)
                pb_inc += 1
        except StopIteration:
            pass
    print '\n'
    cur.close()
    conn.close()
    end_time = time.time()
    print 'Ended: ', time.strftime("%a %b %d, %Y  %H:%M:%S",
                                   time.localtime(end_time))
    print '\nTime for execution: ', (end_time - start_time) / 60, 'minutes'
Ejemplo n.º 10
0
        

def bar(rn, fill='.'):
    import time



    loading = '\b' * rn  # for strings, * is the repeat operator
    rest = fill * int(100 - rn)

    # this loop replaces each dot with a hash!
    print('[\r%0s%1s] loading at %2d percent!' % (loading, rest, rn), end='\n')

if __name__ == '__main__':
     for rn in range(1, 101):
        bar(rn)
def get_commands(args):
    if len(args) < 3:
        return
    ret = {}
    begin = 2
    while begin < len(args):
        ret[str(str(str(args[start]).replace('-', '', 1)).replace('--', '', 1))] = args[int(start + 1)]
    return ret
from progress.bar import *
from progress.spinner import *
from progress.counter import *
class progress_types:
    __all__ = ['bar', 'charging_bar', 'filling_sqares_bar', 'filling_circles_bar', 'incremental_bar', 'pixel_bar',
               'shady_bar', 'spinner', 'pie_spinner', 'moon_spinner', 'line_spinner', 'pixel_spinner',
               'counter', 'countdown', 'stack', 'pie']
Ejemplo n.º 11
0
def main():
    """Main loop"""
    start_time = time.time()
    options, arg = interface()
    motd()
    print "Started: ", time.strftime("%a %b %d, %Y  %H:%M:%S", time.localtime(start_time))
    conf = ConfigParser.ConfigParser()
    conf.read(options.conf)
    conn = MySQLdb.connect(
        user=conf.get("Database", "USER"), passwd=conf.get("Database", "PASSWORD"), db=conf.get("Database", "DATABASE")
    )
    cur = conn.cursor()
    qualTrim = conf.getboolean("Steps", "TRIM")
    qual = conf.getint("Qual", "MIN_SCORE")
    linkerTrim = conf.getboolean("Steps", "LINKERTRIM")
    if qualTrim and not linkerTrim:
        createQualSeqTable(cur)
        conn.commit()
    elif qualTrim and linkerTrim:
        mid, reverse_mid = dict(conf.items("MID")), reverse(conf.items("MID"))
        linkers, reverse_linkers = dict(conf.items("Linker")), reverse(conf.items("Linker"))
        # TODO:  Add levenshtein distance script to automagically determine
        # distance
        reverse_mid[None] = None
        reverse_linkers[None] = None
        clust = conf.items("Clusters")
        # build tag library 1X
        tags = tagLibrary(mid, linkers, clust)
        all_tags, all_tags_regex = allPossibleTags(mid, linkers, clust)
        # crank out a new table for the data
        createSeqTable(cur)
        conn.commit()
    seqcount = sequenceCount(conf.get("Input", "sequence"))
    record = QualityIO.PairedFastaQualIterator(
        open(conf.get("Input", "sequence"), "rU"), open(conf.get("Input", "qual"), "rU")
    )
    # pdb.set_trace()
    if conf.getboolean("Multiprocessing", "MULTIPROCESSING"):
        # get num processors
        n_procs = conf.get("Multiprocessing", "processors")
        if n_procs == "Auto":
            # TODO:  change this?
            # we'll start 2X-1 threads (X = processors).
            n_procs = multiprocessing.cpu_count() - 1
        else:
            n_procs = int(n_procs)
        print "Multiprocessing.  Number of processors = ", n_procs
        # to test with fewer sequences
        # count = 0
        try:
            threads = []
            pb = progress.bar(0, seqcount, 60)
            pb_inc = 0
            while record:
                if len(threads) < n_procs:
                    if qualTrim and not linkerTrim:
                        p = multiprocessing.Process(target=qualOnlyWorker, args=(record.next(), qual, conf))
                    elif qualTrim and linkerTrim:
                        p = multiprocessing.Process(
                            target=linkerWorker,
                            args=(
                                record.next(),
                                qual,
                                tags,
                                all_tags,
                                all_tags_regex,
                                reverse_mid,
                                reverse_linkers,
                                conf,
                            ),
                        )
                    p.start()
                    threads.append(p)
                    if (pb_inc + 1) % 1000 == 0:
                        pb.__call__(pb_inc + 1)
                    elif pb_inc + 1 == seqcount:
                        pb.__call__(pb_inc + 1)
                    pb_inc += 1
                else:
                    for t in threads:
                        if not t.is_alive():
                            threads.remove(t)
        except StopIteration:
            pass
    else:
        print "Not using multiprocessing"
        count = 0
        try:
            pb = progress.bar(0, seqcount, 60)
            pb_inc = 0
            # while count < 1000:
            while record:
                # count +=1
                if qualTrim and not linkerTrim:
                    qualOnlyWorker(record.next(), qual, conf)
                elif qualTrim and linkerTrim:
                    linkerWorker(
                        record.next(), qual, tags, all_tags, all_tags_regex, reverse_mid, reverse_linkers, conf
                    )
                if (pb_inc + 1) % 1000 == 0:
                    pb.__call__(pb_inc + 1)
                elif pb_inc + 1 == seqcount:
                    pb.__call__(pb_inc + 1)
                pb_inc += 1
        except StopIteration:
            pass
    print "\n"
    cur.close()
    conn.close()
    end_time = time.time()
    print "Ended: ", time.strftime("%a %b %d, %Y  %H:%M:%S", time.localtime(end_time))
    print "\nTime for execution: ", (end_time - start_time) / 60, "minutes"
Ejemplo n.º 12
0
def main():
    start_time = time.time()
    print 'Started: ', time.strftime("%a %b %d, %Y  %H:%M:%S", time.localtime(start_time))
    options, arg    = interface()
    conf            = ConfigParser.ConfigParser()
    conf.read(options.conf)
    have_sequence_table = conf.getboolean('MicrosatelliteParameters', 'HaveSequenceTable')
    db = (conf.get('Database','USER'), 
          conf.get('Database','PASSWORD'), 
          conf.get('Database','DATABASE')
          )
    m_processing = conf.getboolean('Multiprocessing', 'MULTIPROCESSING')
    num_procs = conf.get('Multiprocessing','processors')
    fasta_engine = conf.get('MicrosatelliteParameters', 'FastaEngine').lower()
    try:
        data_type = conf.get('Input', 'Type')
    except:
        data_type = 'fasta'
    # build our configuration
    conn = MySQLdb.connect(
        user    = db[0],
        passwd  = db[1],
        db      = db[2]
        )
    cur = conn.cursor()
    # get groups of 1000 sequences, package them, send them to muliprocessing
    # for primer design
    db_chunk = []
    # get count of total combined msats in the table
    cur.execute('''SELECT sequence.name, combined.sequence_id, combined.id, 
            combined.start, combined.end FROM sequence, combined 
            where combined.sequence_id = sequence.id''')
    sequence_ids = cur.fetchall()
    # split our msats into chunks or blocks of 1000 reads, one of which we
    # will sequentially pass to each processing core across the set of reads
    if len(sequence_ids) > 1000:
        for span in range(0,len(sequence_ids), 1000):
            db_chunk.append(sequence_ids[span:span + 1000])
    db_chunk_len = float(len(db_chunk))
    db_chunk = iter(db_chunk)
    data = Sequence(input = conf.get('Input','sequence'), engine = fasta_engine, data_type = data_type)
    # we only need a single instance of our primer design settings
    settings = p3wrapr.primer.Settings()
    settings.basic()
    # ensure that we get pretty different primers for each set of X designed
    # => offset by 10 bp.  this makes primer3 somewhat slower.
    settings.params['PRIMER_MIN_THREE_PRIME_DISTANCE'] = 10
    # override the default location for the mispriming library
    settings.params['PRIMER_MISPRIMING_LIBRARY'] = '/Users/bcf/Bin/misprime_lib_weight'
    # we only need a single instance of our tag settings
    tag_settings = p3wrapr.primer.Settings()
    tag_settings.reduced(PRIMER_PICK_ANYWAY=1)
    # create primers and tagged primers table
    create_primers_table(cur)
    create_tagged_primers_table(cur)
    if m_processing:
        if num_procs == 'Auto':
            num_procs = multiprocessing.cpu_count() - 2
        else:
            num_procs = int(num_procs)
        print 'Multiprocessing.  Number of processors = %s\n' % num_procs
        threads = []
        # access the data on sequence by sequence basis to avoid reading the 
        # entire table contents into memory        
        pb = progress.bar(0,db_chunk_len,60)
        pb_inc = 0
        try:
            while db_chunk:
                if len(threads) < num_procs:
                    container = get_sequence_for_chunk(data, db_chunk.next())
                    p = multiprocessing.Process(target=worker, args=(db, container, settings, tag_settings))
                    p.start()
                    threads.append(p)
                    if (pb_inc+1)%1 == 0:
                        pb.__call__(pb_inc+1)
                    elif pb_inc + 1 == db_chunk_len:
                        pb.__call__(pb_inc+1)
                    pb_inc += 1
                else:
                    for t in threads:
                        if not t.is_alive():
                            threads.remove(t)
        except StopIteration:
            pass
    # for single processing
    else:
        try:
            while db_chunk:
                if have_sequence_table:
                    pass
                else:
                    container = get_sequence_for_chunk(data, db_chunk.next())
                    worker(db, container, settings, tag_settings)
        except StopIteration:
            pass
    print '\n'
    cur.close()
    conn.close()
    end_time = time.time()
    print 'Ended: ', time.strftime("%a %b %d, %Y  %H:%M:%S", time.localtime(end_time))
    print '\nTime for execution: ', (end_time - start_time)/60, 'minutes'
Ejemplo n.º 13
0
               1 * [(int, lambda x: 0 <= x)] +\
               1 * [(int, lambda x: x == 1 or x == 0)]
 #
 TABLE_ = '+------------+------------+------------+------------+------------+------------+------------+'
 TABLEH = '| Test Fold  | Test Ct    | Train Ct   | Operat Pnt | FP Rate    | FN Rate    | Error Rate |'
 TABLE  = '| {: ^10} | {: >10} | {: >10} | {: <10.4f} | {: <10.8f} | {: <10.8f} | {: <10.8f} |'
 #
 FOLDCOUNT = 10
 ROCFOLD = 0
 folds = [[] for i in xrange(FOLDCOUNT)]
 k = 0 # kurrent fold
 #
 print
 print 'Loading "{}"'.format(DATAFILE)
 with open(DATAFILE, mode='rb') as fd:
     with pg.Progress(4601, timeout=2, callback=pg.bar('Loading', 32)) as pr:
         for line in fd:
             # clean the data & create the datapoint
             cleandata = [load(raw.strip(), fmt) for raw, fmt in zip(line.split(','), DATAFORMAT)]
             datapoint = {'features':cleandata[:-1], 'label':cleandata[-1]}
             # add to the current fold & switch to the next fold
             folds[k].append(datapoint)
             k = (k + 1) % FOLDCOUNT
             # indicate progress
             pr.next()
 #
 results = collections.defaultdict(list)
 for name, model in [(m.__name__, m.model) for m in [bernoulli, gaussian, histogram]]:
     print
     print '10-fold cross-validation for {} Naive Bayes'.format(name.capitalize())
     with pg.Progress(FOLDCOUNT, timeout=4, callback=pg.bar(name.capitalize(), 32)) as pr:
Ejemplo n.º 14
0
def main():
    start_time = time.time()
    options, arg = interface()
    motd()
    print 'Started: ', time.strftime("%a %b %d, %Y  %H:%M:%S", time.localtime(start_time))
    conf = ConfigParser.ConfigParser()
    conf.read(options.conf)
    # =============================================
    # = Setup additional configuration parameters =
    # =============================================
    db = (conf.get('Database','USER'), 
          conf.get('Database','PASSWORD'), 
          conf.get('Database','DATABASE')
          )
    have_sequence_table = conf.getboolean('MicrosatelliteParameters', 'HaveSequenceTable')
    fasta_engine = conf.get('MicrosatelliteParameters', 'FastaEngine').lower()
    try:
        data_type = conf.get('Input', 'Type')
    except:
        data_type = 'fasta'
    combine_loci = conf.getboolean('MicrosatelliteParameters', 'CombineLoci')
    combine_loci_dist = conf.getint('MicrosatelliteParameters', 'CombineLociDist')
    m_processing = conf.getboolean('Multiprocessing', 'MULTIPROCESSING')
    get_num_procs = conf.get('Multiprocessing','processors')
    
    conn = MySQLdb.connect(user     = db[0],
                           passwd   = db[1],
                           db       = db[2]
                           )
    cur = conn.cursor()
    # Drop old tables
    drop_old_tables(cur, have_sequence_table)
    if have_sequence_table:
        data = Sequence(engine = 'mysql', cursor = cur)
    else:
        # create a quasi-sequence table
        createSequenceTable(cur)
        # get out data
        data = Sequence(engine = fasta_engine, 
            input = conf.get('Input','sequence'), 
            data_type = data_type
            )
        cur.executemany('''INSERT INTO sequence (id, name) VALUES (%s, %s)''', data.db_values)
    # create our msat table
    createMaskTableWithForeign(cur)
    # create the combined msat table
    if combine_loci:
        createCombinedLociWithForeign(cur)
    conn.commit()
    scan_type = conf.get('MicrosatelliteParameters', 'ScanType')
    motifs = motifCollection(min_length = [10,6,4,4,4,4], scan_type = scan_type, \
                perfect = True)
    if m_processing:
        # get num processors
        n_procs = get_num_procs
        if n_procs == 'Auto':
            n_procs = multiprocessing.cpu_count() - 2
        else:
            n_procs = int(n_procs)
        print 'Multiprocessing.  Number of processors = %s\n' % n_procs
        # to test with fewer sequences
        #count = 0
        threads = []
        # access the data on sequence by sequence basis to avoid reading the 
        # entire table contents into memory        
        pb = progress.bar(0,data.readcount,60)
        pb_inc = 0
        try:
            while data:
                if len(threads) < n_procs:
                    # convert BLOB back to sequence record
                    if data.engine == 'mysql':
                        # convert BLOB back to sequence record
                        record = data.read.next()
                        iden = record[0]
                        record = cPickle.loads(record[1])
                    elif data.engine == 'pyfasta' or data.engine == 'biopython':
                        iden, chromo = data.read.next()
                        record = SequenceWrapper(iden, data.fasta[chromo])
                    elif data.engine == 'twobit':
                        iden, chromo = data.read.next()
                        record = SequenceWrapper(iden, data.fasta[chromo][:])
                    p = multiprocessing.Process(target=worker, args=(
                                    iden,
                                    record,
                                    motifs,
                                    db,
                                    have_sequence_table,
                                    combine_loci, 
                                    combine_loci_dist)
                                    )
                    p.start()
                    threads.append(p)
                    if (pb_inc+1)%1000 == 0:
                        pb.__call__(pb_inc+1)
                    elif pb_inc + 1 == data.readcount:
                        pb.__call__(pb_inc+1)
                    pb_inc += 1
                else:
                    for t in threads:
                        if not t.is_alive():
                            threads.remove(t)
        except StopIteration:
            pass
    else:
        print 'Not using multiprocessing\n'
        # access the data on sequence by sequence basis to avoid 
        # reading the entire table contents into memory
        pb = progress.bar(0,data.readcount,60)
        pb_inc = 0
        #pdb.set_trace()
        try:
            #pdb.set_trace()
            while data:
                if data.engine == 'mysql':
                    # convert BLOB back to sequence record
                    record = data.read.next()
                    iden = record[0]
                    record = cPickle.loads(record[1])
                elif data.engine == 'pyfasta':
                    iden, chromo = data.read.next()
                    record = SequenceWrapper(iden, data.fasta[chromo])
                elif data.engine == 'biopython':
                    iden, chromo = data.read.next()
                    record = data.fasta[chromo]
                elif data.engine == 'twobit':
                    iden, chromo = data.read.next()
                    record = SequenceWrapper(iden, data.fasta[chromo][:])
                #pdb.set_trace()
                worker(iden, record, motifs, db, have_sequence_table,
                        combine_loci, combine_loci_dist)
                row = cur.fetchone()
                if (pb_inc+1)%1000 == 0:
                    pb.__call__(pb_inc+1)
                elif pb_inc + 1 == data.readcount:
                    pb.__call__(pb_inc+1)
                pb_inc += 1
        except StopIteration:
            pass
    print '\n'
    cur.close()
    conn.close()
    end_time = time.time()
    print 'Ended: ', time.strftime("%a %b %d, %Y  %H:%M:%S", time.localtime(end_time))
    print '\nTime for execution: ', (end_time - start_time)/60, 'minutes'