Esempio n. 1
0
    from PhysicsTools.PythonAnalysis.rootplot.core import report_progress
import ROOT
import multiprocessing as multi
from Queue import Empty

import os
os.chdir('..')  # return to the directory with the ROOT files

calls = []

calls.append("""
canvas, objects = plot('PileUp_2011_truth_finebin_64600microbarn.root', 'PileUp_2011_truth_finebin_68000microbarn.root', 'PileUp_2011_truth_finebin_71400microbarn.root', 'pileup', ext='root', xlabel='Number of interactions per crossing', title='CMS Preliminary 5.1 fb^{-1} at #sqrt{s} = 7 TeV', legend_entries='inelastic cross-section = 64600 #mub,inelastic cross-section = 68000 #mub,inelastic cross-section = 71400 #mub')
canvas.SaveAs('plots/pileup.root')
""")

queue = multi.JoinableQueue()
qglobals = multi.Manager().Namespace()
qglobals.nfinished = 0
qglobals.ntotal = len(calls)
for call in calls:
    queue.put(call)


def qfunc(queue, qglobals):
    while True:
        try:
            mycall = queue.get(timeout=5)
        except (Empty, IOError):
            break
        exec(mycall)
        ROOT.gROOT.GetListOfCanvases().Clear()
Esempio n. 2
0
def parallel_preprocess_gene(ensembl,out_dir,n_processes,readcount_min,readcount_max,resume):
    
    # Create output paths and locks.
    out_paths,locks = dict(),dict()
    for out_filetype in ['json','index','log','readcount']:
        out_paths[out_filetype] = os.path.join(out_dir,'data.%s' %out_filetype)
        locks[out_filetype] = multiprocessing.Lock()
                
    # Writing the starting of the files.
    gene_ids_done = []
    if resume and os.path.exists(out_paths['index']):
        df_index = pd.read_csv(out_paths['index'],sep=',')
        gene_ids_done = list(df_index['idx'].unique())
    else:
        # with open(out_paths['json'],'w') as f:
        #     f.write('{\n')
        #     f.write('"genes":{')
        open(out_paths['json'],'w').close()
        with open(out_paths['index'],'w') as f:
            f.write('idx,start,end\n') # header
        with open(out_paths['readcount'],'w') as f:
            f.write('idx,n_reads\n') # header
        open(out_paths['log'],'w').close()

    # Create communication queues.
    task_queue = multiprocessing.JoinableQueue(maxsize=n_processes * 2)

    # Create and start consumers.
    consumers = [helper.Consumer(task_queue=task_queue,task_function=preprocess_gene,locks=locks) for i in range(n_processes)]
    for p in consumers:
        p.start()

    # Get all gene ids.
    gene_ids = set()
    tx_ensembl = dict()
    with h5py.File(os.path.join(out_dir,'eventalign.hdf5'),'r') as f:
        for tx_id in f.keys():
            tx_id,tx_version = tx_id.split('.') # Based on Ensembl
            tx_ensembl[tx_id] = tx_version
            try:
                g_id = ensembl.transcript_by_id(tx_id).gene_id 
            except ValueError:
                continue
            else:
                gene_ids = gene_ids.union([g_id])
    #

    # Load tasks into task_queue.
    gene_ids_processed = []
    with h5py.File(os.path.join(out_dir,'eventalign.hdf5'),'r') as f:
        for gene_id in gene_ids:
            if resume and (gene_id in gene_ids_done):
                continue
                
            # mapping a gene <-> transcripts
            tx_ids, t2g_mapping = t2g(gene_id,ensembl)
            #
            read_ids = []
            data_dict = dict()
            n_reads = 0
            for tx_id in tx_ids:
                
                if tx_id not in tx_ensembl:
                    continue
                tx_id += '.' + tx_ensembl[tx_id]
        
                if tx_id not in f: # no eventalign for tx_id
                    continue
                    
                # n_reads += len(f[tx_id])                
                for read_id in f[tx_id].keys():
                    if (n_reads < readcount_max) and (read_id not in read_ids):
                        data_dict[read_id] = f[tx_id][read_id]['events'][:]
                        read_ids += [read_id]
                        n_reads += 1
                    elif n_reads >= readcount_max:
                        break
                    
            if n_reads >= readcount_min:
                task_queue.put((gene_id,data_dict,t2g_mapping,out_paths)) # Blocked if necessary until a free slot is available. 
                gene_ids_processed += [gene_id]

    # Put the stop task into task_queue.
    task_queue = helper.end_queue(task_queue,n_processes)

    # Wait for all of the tasks to finish.
    task_queue.join()

    # Write the ending of the json file.
    # with open(out_paths['json'],'a+') as f:
    #     f.seek(0,2)  # end of file
    #     f.truncate(f.tell()-1) 
    #     f.write('\n}\n}\n')
    ###
    
    with open(out_paths['log'],'a+') as f:
        f.write('Total %d genes.\n' %len(gene_ids_processed))
        f.write(helper.decor_message('successfully finished'))
def multi_process(func, data, num_process=None, verbose=True, **args):
    '''Function to use multiprocessing to process pandas Dataframe.

    This function applies a function on each row of the input DataFrame by
    multiprocessing.

    Args:
        func (function): The function to apply on each row of the input
            Dataframe. The func must accept pandas.Series as the first
            positional argument and return a pandas.Series.
        data (pandas.DataFrame): A DataFrame to be processed.
        num_process (int, optional): The number of processes to run in
            parallel. Defaults to be the number of CPUs of the computer.
        verbose (bool, optional): Set to False to disable verbose output.
        args (dict): Keyword arguments to pass as keywords arguments to `func`
    return:
        A dataframe containing the results
    '''
    # Check arguments value
    assert isinstance(data, pd.DataFrame), \
        'Input data must be a pandas.DataFrame instance'
    if num_process is None:
        num_process = multiprocessing.cpu_count()
    # Establish communication queues
    tasks = multiprocessing.JoinableQueue()
    results = multiprocessing.Queue()
    error_queue = multiprocessing.Queue()
    start_time = time.time()
    # Enqueue tasks
    num_task = len(data)
    for i in range(num_task):
        tasks.put(data.iloc[i, :])
    # Add a poison pill for each consumer
    for i in range(num_process):
        tasks.put(None)

    logger.info('Create {} processes'.format(num_process))
    consumers = [
        Consumer(func, tasks, results, error_queue, **args)
        for i in range(num_process)
    ]
    for w in consumers:
        w.start()
    # Add a task tracking process
    task_tracker = TaskTracker(tasks, verbose)
    task_tracker.start()
    # Wait for all input data to be processed
    tasks.join()
    # If there is any error in any process, output the error messages
    num_error = error_queue.qsize()
    if num_error > 0:
        for i in range(num_error):
            logger.error(error_queue.get())
        raise RuntimeError('Multi process jobs failed')
    else:
        # Collect results
        result_table = []
        while num_task:
            result_table.append(results.get())
            num_task -= 1
        df_results = pd.DataFrame(result_table)
        logger.info("Jobs finished in {0:.2f}s".format(time.time() -
                                                       start_time))
        return df_results
Esempio n. 4
0
 def create_queue(self):
     return multiprocessing.JoinableQueue()
def main():
    style = os.path.dirname(os.path.abspath(__file__))+"/osm.xml"
    dir = "tiles"
    type = "png"
    scale = 22800000 
    minzoom = 1
    maxzoom = 6
    threads = 1
    context = 3
    
    parser = OptionParser()
    parser.add_option("-s", "--style", action="store", type="string", dest="style", 
                      help="path to the mapnik stylesheet xml, defaults to: "+style)
    
    parser.add_option("-d", "--dir", action="store", type="string", dest="dir", 
                      help="path to the destination folder, defaults to "+type)
    
    parser.add_option("-t", "--type", action="store", type="string", dest="type", 
                      help="file type to render (png, png256, jpg), defaults to "+type)
    
    parser.add_option("-z", "--minzoom", action="store", type="int", dest="minzoom", 
                      help="minimum zoom level to render, defaults to "+str(minzoom))
    
    parser.add_option("-Z", "--maxzoom", action="store", type="int", dest="maxzoom", 
                      help="maximum zoom level to render, defaults to "+str(maxzoom))
    
    parser.add_option("-T", "--threads", action="store", type="int", dest="threads", 
                      help="number of threads to launch, defaults to "+str(threads))

    parser.add_option("-i", "--only-interesting", action="store_true", dest="onlyinteresting", 
                      help="only render around interesting places (buildings, peaks, islands, ...)")

    parser.add_option("-c", "--only-interesting-context", action="store", type="int", dest="context",
                      help="when rendering tiles around interesting places, how many tiles around those places should be rendered?"+
                      "0 means that only the tile with the interesting feature will be rendered; "+
                      "1 means that the 8 surrounding tiles will be rendered for each zoom level, too; "+
                      "2 adds 24 extra tiles; 3 adds 48 extra tiles; 4 adds 80 extra tiles; "+
                      "defaults to "+str(context)+", which should fill the most screens")

    parser.add_option("-l", "--only-interesting-list", action="store", type="string", dest="listfile", 
                      help="write a GeoJSON-List of interesting places")

    parser.add_option("-D", "--db", action="store", type="string", dest="dsn", default="", 
                      help="database connection string used for finding interesting places")
    
    parser.add_option("-e", "--skip-existing", action="store_true", dest="skipexisting", 
                      help="skip existing tiles, only render missing")
    
    (options, args) = parser.parse_args()
    if options.style:
        style = options.style
    
    if options.dir:
        dir = options.dir
    
    if options.type:
        type = options.type
    
    if options.minzoom:
        minzoom = options.minzoom
    
    if options.maxzoom:
        maxzoom = options.maxzoom

    if options.threads:
        threads = options.threads

    if options.context != None:
        context = options.context
    
    queue = multiprocessing.JoinableQueue(32)
    lock = multiprocessing.Lock()

    renderers = {}
    print "Starting %u render-threads" % (threads)
    for i in range(threads):
        renderer = RenderThread(i, queue, style, scale, dir, type, lock)
        render_thread = multiprocessing.Process(target=renderer.run)
        render_thread.start()
        renderers[i] = render_thread

    if options.onlyinteresting:
        import psycopg2
        tileset = set()
        features = []
        con = psycopg2.connect(options.dsn)
        sql = """
        SELECT 'point' AS type, osm_id, name, ST_X(way), ST_Y(way), ST_X(ST_Transform(way, 3411)), ST_Y(ST_Transform(way, 3411)) FROM ant_point
            WHERE (place IS NOT NULL AND place IN ('hamlet', 'town', 'isolated_dwelling', 'cape', 'locality', 'island', 'islet'))
            OR building IS NOT NULL
            OR aeroway IS NOT NULL
            OR ("natural" IS NOT NULL AND "natural" IN ('volcano', 'ridge', 'cliff', 'cape', 'peak', 'valley', 'bay'))

        UNION  ALL

        SELECT 'line' AS type, osm_id, name, ST_X(ST_Centroid(way)), ST_Y(ST_Centroid(way)), ST_X(ST_Transform(ST_Centroid(way), 3411)), ST_Y(ST_Transform(ST_Centroid(way), 3411)) FROM ant_line
            WHERE (place IS NOT NULL AND place IN ('hamlet', 'town', 'isolated_dwelling', 'cape', 'locality', 'island', 'islet'))
            OR building IS NOT NULL
            OR aeroway IS NOT NULL

        UNION  ALL

        SELECT 'polygon' AS type, osm_id, name, ST_X(ST_Centroid(way)), ST_Y(ST_Centroid(way)), ST_X(ST_Transform(ST_Centroid(way), 3411)), ST_Y(ST_Transform(ST_Centroid(way), 3411)) FROM ant_polygon
            WHERE (name IS NOT NULL AND place IS NOT NULL AND place IN ('hamlet', 'town', 'isolated_dwelling', 'cape', 'locality', 'island', 'islet'))
            OR building IS NOT NULL
            OR aeroway IS NOT NULL;
        """;
        cur = con.cursor()
        cur.execute(sql)
        lock.acquire()
        print "found %u interesting nodes" % (cur.rowcount)
        lock.release()
        i = 0
        for record in cur:
            (obj_type, osm_id, name, lat, lng, xmeter, ymeter) = record
            lock.acquire()
            i += 1
            print "found interesting %s %u of %u: #%u (%s)" % (obj_type, i, cur.rowcount, osm_id, name)
            lock.release()
            if(options.listfile):
                features += ({
                    "type": "Feature",
                    "properties": {
                        "osm_id": osm_id,
                        "name": name
                    },
                    "geometry": {
                        "type": "Point",
                        "coordinates" : [ lat, lng ] 
                    }
                },)

            for z in range(minzoom, maxzoom+1):
                n = 2**z
                n2 = n/2
                tilesz = float(scale) / float(n)
                xoff = float(xmeter) / tilesz
                yoff = float(ymeter) / tilesz
                x = int(xoff + n2)
                y = int(n2 - yoff)
                for xctx in range(-context, context+1):
                    for yctx in range(-context, context+1):
                        absx = x+xctx
                        absy = y+yctx
                        t = (z, absx, absy)
                        if absx >= 0 and absx < n and absy >= 0 and absy < n and not t in tileset:
                            queue.put(t)
                            tileset.add(t)

        if(options.listfile):
            import json
            f = open(options.listfile, "w")
            f.write(json.dumps({
                "type": "FeatureCollection",
                "features": features
                }
            ))
            f.close()

    else:
        for z in range(minzoom, maxzoom+1):
            n = 2**z
            for x in range(0, n):
                for y in range(0, n):
                    if options.skipexisting and os.path.exists(dir + "/" + str(z) + "/" + str(x) + "/" + str(y) + "." + type):
                        continue
                    t = (z, x, y)
                    queue.put(t)

    # Signal render threads to exit by sending empty request to queue
    for i in range(threads):
        queue.put(None)

    # wait for pending rendering jobs to complete
    queue.join()
    for i in range(threads):
        renderers[i].join()
    def spawn_core_test(self):
        """Spawn concurrent scale testing on all online cores."""
        def run_worker_process(_result_queue, affinity):
            """ Subclass instantiation & constructor for
            individual core.
            """
            _worker = psutil.Process()
            # assign affinity, pin to core
            _worker.cpu_affinity(affinity)
            # intantiate core_test
            cpu_freq_ctest = CpuFreqCoreTest(affinity[0], _worker.pid)
            # execute freq scaling
            cpu_freq_ctest.scale_all_freq()
            # get results
            res_freq_map = cpu_freq_ctest.__call__()
            # place in result_queue
            _result_queue.put(res_freq_map)

        def process_rqueue(queue_depth, _result_queue):
            """Get and process core_test result_queue."""
            # get queued core_test results
            for _ in range(queue_depth):
                # pipe results from core_test
                worker_queue = _result_queue.get()
                # append to chainmap object
                self.freq_chainmap = self.freq_chainmap.new_child(worker_queue)
                # signal processing complete
                _result_queue.task_done()
            logging.info('----------------------------')
            logging.info('* joining and closing queues')
            # nicely join and close queue
            try:
                _result_queue.join()
            finally:
                _result_queue.close()

        worker_list = []  # track spawned multiproc processes
        pid_list = []  # track spawned multiproc pids
        online_cores = self._get_cores('online')
        # delegate & spawn tests on other cores first
        # then run core 0 last (main() thread)
        online_cores.append(online_cores.pop(0))
        # create queue for piping results
        result_queue = multiprocessing.JoinableQueue()

        # assign affinity and spawn core_test
        for core in online_cores:
            affinity = [int(core)]
            affinity_dict = dict(affinity=affinity)
            worker = multiprocessing.Process(target=run_worker_process,
                                             args=(result_queue, ),
                                             kwargs=affinity_dict)
            # start core_test
            worker.start()
            worker_list.append(worker)
            # track and log active child pids
            pid_list.append(worker.pid)

        # get, process queues
        process_rqueue(len(worker_list), result_queue)

        # cleanup core_test pids
        logging.info('* joining worker processes:')
        for idx, worker in enumerate(worker_list):
            # join worker processes
            worker_return = worker.join()
            time.sleep(.1)
            if worker_return is None:
                logging.info('  - PID %s joined parent', pid_list[idx])
            else:
                # can cleanup in reset subroutine
                continue
        # update attribute for a 2nd pass terminate
        self.__proc_list = worker_list
Esempio n. 7
0
def main():
    """Launch the script that computes frequencies.

    - Read the file (or all files within a directory) and put each line in
      queue
    - Spawn multiple processes (Worker)
    - Collect results from processes, merge them and write the results in a
      file.

    """
    parser = argparse.ArgumentParser(
        description="Script to compute unigrams and/or bigrams frequencies.")
    parser.add_argument("-f", "--file", help="source file to be processed")
    parser.add_argument("-d", "--directory", help="directory containing a set "
                        "of files to be processed")
    parser.add_argument("-t", "--type", help="whether computing 'unigrams' or "
                        "'bigrams'", required=True)
    parser.add_argument("-o", "--output", help="output file with results",
                        required=True)
    parser.add_argument("-v", "--verbose", action='store_true',
                        help="print debugging information")

    args = parser.parse_args()

    # Adjust logger verbosity.
    if args.verbose is True:
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s %(levelname)-8s %(message)s',
                            datefmt='%Y-%m-%d %H:%M:%S')
    else:
        logging.basicConfig(level=logging.WARNING,
                            format='%(asctime)s %(levelname)-8s %(message)s',
                            datefmt='%Y-%m-%d %H:%M:%S')

    logger = logging.getLogger()

    # Make sure that one parameter has been setted.
    if args.file is None and args.directory is None:
        logger.critical("No source specified.")
        return -1
    if args.file is not None and args.directory is not None:
        logger.critical("Either specify a file or a directory.")
        return -1

    # Validate the type of computation requested.
    if not (args.type == "unigrams" or args.type == "bigrams"):
        logger.critical("Wrong type: please specify 'unigrams' or 'bigrams'")
        return -1

    # Create a list with valid files ready to be processed.
    if args.file is not None:
        if isfile(args.file):
            files = [args.file]
        else:
            logger.critical("Unable to find %s." % args.file)
            return -1
    else:
        if isdir(args.directory):
            files = [f for f in listdir(args.directory)
                     if isfile(join(args.directory, f))]
            if len(files) == 0:
                logger.critical("%s doesn't contain valid file(s)."
                                % args.directory)
                return -1
        else:
            logger.critical("%s is not a directory." % args.directory)
            return -1

    begin_time = time.time()

    workers = []
    # Limit queue size to 100k items (this is due to the fact that reading
    # can be way more fast than computing; RAM is filled up abnormally).
    queue = multiprocessing.JoinableQueue(100000)
    results_queue = multiprocessing.Queue()

    # Spawn a process for every CPU.
    for _ in range(multiprocessing.cpu_count()):
        w = Worker(queue, results_queue, args.type)
        w.start()
        workers.append(w)

    for idx, filename in enumerate(files):
        logger.debug("Begin read %s." % filename)
        directory = args.directory or "."
        with codecs.open(join(directory, filename), 'r', 'utf8') as f:
            for line in f:
                queue.put(line)
        logger.debug("File %s successfully read." % filename)

    logger.debug("All files successfully read.")

    # Join the queue with the words to be processed. This is a synchronous
    # call, so main() will wait for workers to complete their work.
    queue.join()

    logger.debug("Every file has been processed. Merging...")

    # Merge the counters with the '+=' operator.
    counter = Counter()
    for _ in workers:
        counter += results_queue.get()

    # Clean process table by joining workers.
    for w in workers:
        w.join()

    logger.debug("Computing finished. Writing results...")

    with codecs.open(args.output, 'w', 'utf8') as out:
        # Write the header.
        out.write("%d %d\n" % (len(counter.values()), sum(counter.values())))

        # For each element, write the key and its value (space separated).
        for k, v in counter.most_common():
            out.write("%s %d\n" % (k, v))

    logger.debug("Done in %s seconds." % (time.time() - begin_time))
Esempio n. 8
0
def prepare(args, logger):
    """Main script function.

    :param args: the ArgumentParser-derived namespace.
    :param logger: a logging instance
    :type logger: logging.Logger
    """

    if hasattr(args.json_conf["reference"]["genome"], "close"):
        args.json_conf["reference"]["genome"].close()
        if hasattr(args.json_conf["reference"]["genome"], "filename"):
            args.json_conf["reference"]["genome"] = getattr(
                args.json_conf["reference"]["genome"], "filename")
        elif hasattr(args.json_conf["reference"]["genome"], "name"):
            args.json_conf["reference"]["genome"] = getattr(
                args.json_conf["reference"]["genome"], "name")
        else:
            logger.critical("Invalid FASTA file: %s",
                            args.json_conf["reference"]["genome"])
            raise AttributeError
    elif not isinstance(args.json_conf["reference"]["genome"], (str, bytes)):
        logger.critical("Invalid FASTA file: %s",
                        args.json_conf["reference"]["genome"])
        raise AttributeError

    if not os.path.exists(args.json_conf["reference"]["genome"]):
        logger.critical("Invalid FASTA file: %s",
                        args.json_conf["reference"]["genome"])
        raise AttributeError

    assert len(args.json_conf["prepare"]["files"]["gff"]) > 0
    assert len(args.json_conf["prepare"]["files"]["gff"]) == len(
        args.json_conf["prepare"]["files"]["labels"]), (
            args.json_conf["prepare"]["files"]["gff"],
            args.json_conf["prepare"]["files"]["labels"])

    if args.json_conf["prepare"]["strand_specific"] is True:
        args.json_conf["prepare"]["files"]["strand_specific_assemblies"] = [
            True
        ] * len(args.json_conf["prepare"]["files"]["gff"])
    else:
        args.json_conf["prepare"]["files"]["strand_specific_assemblies"] = [
            (member in args.json_conf["prepare"]["files"]
             ["strand_specific_assemblies"])
            for member in args.json_conf["prepare"]["files"]["gff"]
        ]

    args.json_conf["prepare"]["files"]["reference"] = [
        (member in args.json_conf["prepare"]["files"]["reference"]
         or label in args.json_conf["prepare"]["files"]["reference"])
        for member, label in zip(args.json_conf["prepare"]["files"]["gff"],
                                 args.json_conf["prepare"]["files"]["labels"])
    ]

    shelve_names = [
        path_join(args.json_conf["prepare"]["files"]["output_dir"],
                  "mikado_shelf_{}.db".format(str(_).zfill(5)))
        for _ in range(len(args.json_conf["prepare"]["files"]["gff"]))
    ]

    logger.propagate = False
    if args.json_conf["prepare"]["single"] is False and args.json_conf[
            "threads"] > 1:
        multiprocessing.set_start_method(
            args.json_conf["multiprocessing_method"], force=True)
        args.logging_queue = multiprocessing.JoinableQueue(-1)
        log_queue_handler = logging.handlers.QueueHandler(args.logging_queue)
        log_queue_handler.setLevel(logging.DEBUG)
        # logger.addHandler(log_queue_handler)
        args.tempdir = tempfile.TemporaryDirectory(
            dir=args.json_conf["prepare"]["files"]["output_dir"])
        args.listener = logging.handlers.QueueListener(args.logging_queue,
                                                       logger)
        args.listener.propagate = False
        args.listener.start()

    args.json_conf["prepare"]["files"]["out_fasta"] = open(
        path_join(args.json_conf["prepare"]["files"]["output_dir"],
                  args.json_conf["prepare"]["files"]["out_fasta"]), 'w')
    args.json_conf["prepare"]["files"]["out"] = open(
        path_join(args.json_conf["prepare"]["files"]["output_dir"],
                  args.json_conf["prepare"]["files"]["out"]), 'w')

    logger.info("Output dir: %s. Output GTF: %s. Output Fasta: %s",
                args.json_conf["prepare"]["files"]["output_dir"],
                args.json_conf["prepare"]["files"]["out"].name,
                args.json_conf["prepare"]["files"]["out_fasta"].name)
    logger.info("Loading reference file")
    args.json_conf["reference"]["genome"] = pysam.FastaFile(
        args.json_conf["reference"]["genome"])
    logger.info("Finished loading genome file")
    logger.info("Started loading exon lines")
    shelf_stacks = dict()
    try:
        load_exon_lines(
            args,
            shelve_names,
            logger,
            min_length=args.json_conf["prepare"]["minimum_cdna_length"],
            max_intron=args.json_conf["prepare"]["max_intron_length"],
        )

        logger.info("Finished loading exon lines")

        # Prepare the sorted data structure
        sorter = functools.partial(
            store_transcripts,
            logger=logger,
            seed=args.json_conf["seed"],
            keep_redundant=args.json_conf["prepare"]["keep_redundant"])

        shelve_source_scores = []
        for label in args.json_conf["prepare"]["files"]["labels"]:
            shelve_source_scores.append(
                args.json_conf["prepare"]["files"]["source_score"].get(
                    label, 0))

        try:
            for shelf, score, is_reference in zip(
                    shelve_names, shelve_source_scores,
                    args.json_conf["prepare"]["files"]["reference"]):
                assert isinstance(is_reference, bool)
                conn = sqlite3.connect(shelf)
                shelf_stacks[shelf] = {
                    "conn": conn,
                    "cursor": conn.cursor(),
                    "score": score,
                    "is_reference": is_reference
                }
            # shelf_stacks = dict((_, shelve.open(_, flag="r")) for _ in shelve_names)
        except Exception as exc:
            raise TypeError((shelve_names, exc))
        perform_check(sorter(shelf_stacks), shelf_stacks, args, logger)
    except Exception as exc:
        logger.exception(exc)
        __cleanup(args, shelve_names)
        logger.error("Mikado has encountered an error, exiting")
        # sys.exit(1)

    if args.json_conf["prepare"]["single"] is False and args.json_conf[
            "threads"] > 1:
        args.tempdir.cleanup()
        args.listener.enqueue_sentinel()

    logger.setLevel(logging.INFO)
    __cleanup(args, shelve_names)

    logger.addHandler(logging.StreamHandler())
    logger.info(
        """Mikado prepare has finished correctly. The output %s FASTA file can now be used for BLASTX \
and/or ORF calling before the next step in the pipeline, `mikado serialise`.""",
        args.json_conf["prepare"]["files"]["out_fasta"])
    logging.shutdown()
def main():
    #--see if a restart flag was passed
    try:
        if sys.argv[1].upper() == 'R':
            restart = True
        else:
            restart = False
    except:
        restart = False
    if restart:
        print 'Using existing dir and files'

    #--a dict of data types that are of interest - these become nested folders
    use_dtypes = {'GW':['PSI','WELL'],'SW':['BOARD','FLOW','GATE','RPM','STG'],\
                  'RAIN':['RAIN'],'EVAP':['EVAP','ETP','ETPI']}

    #--create the directory structure
    if restart is False:
        for key, val in use_dtypes.iteritems():
            if os.path.exists(key):
                shutil.rmtree(key)
            os.mkdir(key)
            for v in val:
                os.mkdir(key + '\\' + v)
    else:
        for key, val in use_dtypes.iteritems():
            if not os.path.exists(key):
                os.mkdir(key)
                for v in val:
                    os.mkdir(key + '\\' + v)

    #--the time series listing CSV from dbhydro
    fname = 'ts_listing.csv'
    f = open(fname, 'r')
    header = f.readline().strip().split(',')

    #--some column indices
    idx = {}
    idx['dbkey'] = 0
    idx['station'] = 1
    idx['dtype'] = 3
    idx['freq'] = 4
    idx['stat'] = 5
    idx['sdate'] = 8
    idx['edate'] = 9
    idx['opnum'] = 12
    idx['basin'] = 17
    idx['struc'] = 18

    #--get a list of file names and dbkeys to retrieve
    #--build queue_args = [[dbkey,sdate,edate,fname]]
    dbkeys = []
    fnames = []
    queue_args = []
    for i, line in enumerate(f):
        raw = line.strip().split(',')
        dbkey = raw[idx['dbkey']].strip()
        station = raw[idx['station']].strip()
        freq = raw[idx['freq']].strip()
        stat = raw[idx['stat']].strip()
        sdate = raw[idx['sdate']].strip()
        edate = raw[idx['edate']].strip()
        dtype = raw[idx['dtype']].strip()
        opnum = raw[idx['opnum']].strip()
        struc = raw[idx['struc']].strip()

        #--fix the dbkey since excel is a giant turd and removes leading '0's
        if len(dbkey) < 5:
            dbkey = '%05d' % int(dbkey)
            #print dbkey
            #break

        #--check if this is some data we want
        dir1, dir2 = None, None
        for key, val in use_dtypes.iteritems():
            if dtype in val:
                dir1 = key + '\\'
                dir2 = dtype + '\\'
                break

        #--if this isn't a dup and it is a data type we want and it has valid date ranges
        if dbkey not in dbkeys and dir1 != None and sdate != '' and edate != '':
            #--if opnum is null, make it 1
            if opnum == '':
                opnum = '1'

            dbkeys.append(dbkey)
            #--convert sdate and edate to dbhydro format
            s = datetime.strptime(sdate, '%d-%b-%Y')
            sdate2 = s.strftime('%Y%m%d')
            e = datetime.strptime(edate, '%d-%b-%Y')
            edate2 = e.strftime('%Y%m%d')
            #--build the output file name
            station_mod = station.replace('.', '_')
            station_mod = station_mod.replace(' ', '_')
            fname = dir1+dir2+station_mod+'.'+freq+'.'+stat+'.'+opnum+'.'+\
                    sdate2+'.'+edate2+'.'+struc+'.dat'

            if restart:
                #--check that this fname doesn't exist
                if os.path.exists(fname) == False and fname not in fnames:
                    queue_args.append([dbkey, sdate2, edate2, fname])
                    fnames.append(fname)
            #--if not restart
            else:
                if fname not in fnames:
                    queue_args.append([dbkey, sdate2, edate2, fname])
                    fnames.append(fname)

    print 'number of records to retrieve:', len(fnames)

    #--multiprocessing
    #--number of process to spawn - do my bidding!
    num_procs = 20

    #--create a queue for jobs and to track failed retrievals
    jobq = mp.JoinableQueue()
    failq = mp.Queue()

    #--create and start the process instances
    procs = []
    for i in range(num_procs):
        #--pass the woker function both queues and a PID
        p = mp.Process(target=worker, args=(jobq, failq, i + 1))
        p.daemon = True
        print 'starting process', p.name
        p.start()
        procs.append(p)

    #--add the args to the queue
    for qa in queue_args:
        jobq.put(qa)
        #break

    #--add the sentinels so processes know when to terminate
    for p in procs:
        jobq.put(None)

    #--block until all finish
    for p in procs:
        p.join()
        print p.name, 'Finished'

    #--process the failed retrievals
    failq.put_nowait(None)
    f_out = open('failed.dat', 'w')
    for args in iter(failq.get, None):
        f_out.write(args[0] + '\n')
    f_out.close()
Esempio n. 10
0
def perform_check(keys, shelve_stacks, args, logger):
    """
    This is the most important method. After preparing the data structure,
    this function creates the real transcript instances and checks that
    they are correct when looking at the underlying genome sequence.
    This is also the point at which we start using multithreading, if
    so requested.
    :param keys: sorted list of [tid, sequence]
    :param shelve_stacks: dictionary containing the name and the handles of the shelf DBs
    :param args: the namespace
    :param logger: logger
    :return:
    """

    counter = 0

    # FASTA extraction *has* to be done at the main process level, it's too slow
    # to create an index in each process.

    if args.json_conf["prepare"]["single"] is True or args.json_conf[
            "threads"] == 1:

        # Use functools to pre-configure the function
        # with all necessary arguments aside for the lines
        partial_checker = functools.partial(
            create_transcript,
            canonical_splices=args.json_conf["prepare"]["canonical"],
            logger=logger,
            force_keep_cds=not args.json_conf["prepare"]["strip_cds"])

        for tid, chrom, key in keys:
            tid, shelf_name = tid
            try:
                tobj = json.loads(
                    next(shelve_stacks[shelf_name]["cursor"].execute(
                        "SELECT features FROM dump WHERE tid = ?",
                        (tid, )))[0])
            except sqlite3.ProgrammingError as exc:
                raise sqlite3.ProgrammingError("{}. Tids: {}".format(exc, tid))

            transcript_object = partial_checker(
                tobj,
                str(args.json_conf["reference"]["genome"].fetch(
                    chrom, key[0] - 1, key[1])),
                key[0],
                key[1],
                lenient=args.json_conf["prepare"]["lenient"],
                is_reference=tobj["is_reference"],
                strand_specific=tobj["strand_specific"])
            if transcript_object is None:
                continue
            counter += 1
            if counter >= 10**4 and counter % (10**4) == 0:
                logger.info("Retrieved %d transcript positions", counter)
            elif counter >= 10**3 and counter % (10**3) == 0:
                logger.debug("Retrieved %d transcript positions", counter)
            print(transcript_object.format("gtf"),
                  file=args.json_conf["prepare"]["files"]["out"])
            print(transcript_object.fasta,
                  file=args.json_conf["prepare"]["files"]["out_fasta"])
    else:
        # pylint: disable=no-member

        submission_queue = multiprocessing.JoinableQueue(-1)

        working_processes = [
            CheckingProcess(
                submission_queue,
                args.logging_queue,
                args.json_conf["reference"]["genome"].filename,
                _ + 1,
                os.path.basename(
                    args.json_conf["prepare"]["files"]["out_fasta"].name),
                os.path.basename(
                    args.json_conf["prepare"]["files"]["out"].name),
                args.tempdir.name,
                seed=args.json_conf["seed"],
                lenient=args.json_conf["prepare"]["lenient"],
                canonical_splices=args.json_conf["prepare"]["canonical"],
                force_keep_cds=not args.json_conf["prepare"]["strip_cds"],
                log_level=args.level) for _ in range(args.json_conf["threads"])
        ]

        [_.start() for _ in working_processes]

        for counter, keys in enumerate(keys):
            tid, chrom, (pos) = keys
            tid, shelf_name = tid
            tobj = json.loads(
                next(shelve_stacks[shelf_name]["cursor"].execute(
                    "SELECT features FROM dump WHERE tid = ?", (tid, )))[0])
            submission_queue.put((tobj, pos[0], pos[1], counter + 1))

        submission_queue.put(tuple(["EXIT"] * 4))

        [_.join() for _ in working_processes]

        partial_gtf = [
            os.path.join(
                args.tempdir.name, "{0}-{1}".format(
                    os.path.basename(
                        args.json_conf["prepare"]["files"]["out"].name),
                    _ + 1)) for _ in range(args.json_conf["threads"])
        ]
        merge_partial(partial_gtf, args.json_conf["prepare"]["files"]["out"])

        partial_fasta = [
            os.path.join(
                args.tempdir.name, "{0}-{1}".format(
                    os.path.basename(
                        args.json_conf["prepare"]["files"]["out_fasta"].name),
                    _ + 1)) for _ in range(args.json_conf["threads"])
        ]
        merge_partial(partial_fasta,
                      args.json_conf["prepare"]["files"]["out_fasta"])

    args.json_conf["prepare"]["files"]["out_fasta"].close()
    args.json_conf["prepare"]["files"]["out"].close()

    logger.setLevel(logging.INFO)
    # logger.info("Finished to analyse %d transcripts (%d retained)",
    #             len(exon_lines), counter)
    logger.setLevel(args.level)
    return
Esempio n. 11
0
def _load_exon_lines_multi(args,
                           shelve_names,
                           logger,
                           min_length,
                           strip_cds,
                           threads,
                           max_intron=3 * 10**5):
    logger.info("Starting to load lines from %d files (using %d processes)",
                len(args.json_conf["prepare"]["files"]["gff"]), threads)
    submission_queue = multiprocessing.JoinableQueue(-1)

    working_processes = []
    # working_processes = [ for _ in range(threads)]

    for num in range(threads):
        proc = AnnotationParser(submission_queue,
                                args.logging_queue,
                                num + 1,
                                log_level=args.level,
                                min_length=min_length,
                                max_intron=max_intron,
                                strip_cds=strip_cds,
                                seed=args.json_conf["seed"])
        proc.start()
        working_processes.append(proc)

    # [_.start() for _ in working_processes]
    for new_shelf, label, strand_specific, is_reference, gff_name in zip(
            shelve_names, args.json_conf["prepare"]["files"]["labels"],
            args.json_conf["prepare"]["files"]["strand_specific_assemblies"],
            args.json_conf["prepare"]["files"]["reference"],
            args.json_conf["prepare"]["files"]["gff"]):
        submission_queue.put(
            (label, gff_name, strand_specific, is_reference, new_shelf))

    submission_queue.put(("EXIT", "EXIT", "EXIT", "EXIT", "EXIT"))

    [_.join() for _ in working_processes]

    tid_counter = Counter()
    for shelf in shelve_names:
        conn = sqlite3.connect(
            "file:{}?mode=ro".format(shelf),
            uri=True,  # Necessary to use the Read-only mode from file string
            isolation_level="DEFERRED",
            timeout=60,
            check_same_thread=
            False  # Necessary for SQLite3 to function in multiprocessing
        )
        cursor = conn.cursor()
        tid_counter.update(
            [_[0] for _ in cursor.execute("SELECT tid FROM dump")])
        if tid_counter.most_common()[0][1] > 1:
            if set(args.json_conf["prepare"]["files"]["labels"]) == {""}:
                exception = exceptions.RedundantNames(
                    """Found redundant names during multiprocessed file analysis.\
Please repeat using distinct labels for your input files. Aborting. Redundant names:\n\
{}""".format("\n".join(tid_counter.most_common())))
            else:
                exception = exceptions.RedundantNames(
                    """Found redundant names during multiprocessed file analysis, even if \
unique labels were provided. Please try to repeat with a different and more unique set of labels. Aborting. Redundant names:\n\
{}""".format("\n".join([_[0] for _ in tid_counter.most_common() if _[1] > 1])))
            logger.exception(exception)
            raise exception

    del working_processes
    gc.collect()
Esempio n. 12
0
import logging
import logging.handlers
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Integer, Column, BLOB
from sqlalchemy.orm import sessionmaker
import tempfile
import os
from collections import defaultdict
import operator

__doc__ = """Script to try to translate the CDS from one coordinate system to another."""

transfer_base = declarative_base()

logging_queue = mp.JoinableQueue(-1)
log_queue_handler = logging.handlers.QueueHandler(logging_queue)
log_queue_handler.setLevel(logging.DEBUG)


class _Storer(transfer_base):

    __tablename__ = "storer"

    id = Column(Integer, primary_key=True)
    bed = Column(BLOB)
    gff3 = Column(BLOB)

    def __init__(self, id, bed, gff3):

        self.id, self.bed, self.gff3 = id, bed, gff3
Esempio n. 13
0
        for k in range(len(matrizB)):
            valor = valor + matrizA[i][k] * matrizB[k][j]
        queue.task_done()
        queue_resultados.put((i, j, valor))


if __name__ == '__main__':
    linhas, colunas = 400, 400

    print("{}: Gerando matrizes".format(time.strftime('%c')))
    matrizA = cria_matriz(linhas, colunas)
    matrizB = cria_matriz(linhas, colunas)
    matrizC = numpy.zeros(shape=(linhas, colunas))

    print("{}: Multiplicando matrizes".format(time.strftime('%c')))
    queue = multiprocessing.JoinableQueue()
    queue_resultados = multiprocessing.JoinableQueue()
    for i in range(2):
        worker = multiprocessing.Process(target=multiplica_linha_coluna,
                                         args=(
                                             queue,
                                             queue_resultados,
                                             matrizA,
                                             matrizB,
                                         ))
        worker.daemon = True
        worker.start()

    for i in range(len(matrizA)):
        for j in range(len(matrizA[0])):
            queue.put((i, j))
Esempio n. 14
0
def joinable_queue(res, w):
    def pcheck_joins2(q, resultq, weight_type='ROOK'):
        while True:
            work = q.get()
            if work == None:
                #print "Got the pill."
                q.task_done()
                break
            #Unpack the args from q
            potential_neighbors = work[0]
            shapes = work[1]
            polygon_ids = work[2]
            mdict = {}
            weight_type = weight_type.upper()
            #print "Process {} working on polygons {} - {}.".format(pid, polygon_ids[0], polygon_ids[-1])
            if weight_type == 'QUEEN':
                # check for a shared vertex
                vertCache = {}
                for polyId in polygon_ids:
                    iVerts = shapes[polyId].vertices
                    nbrs = potential_neighbors[polyId]
                    if polyId not in vertCache:
                        vertCache[polyId] = set(iVerts)
                    if polyId not in w:
                        w[polyId] = set()
                    for j in nbrs:
                        join = False
                        if j not in vertCache:
                            vertCache[j] = set(shapes[j].vertices)
                        common = vertCache[polyId].intersection(vertCache[j])
                        if len(common) > 0:
                            join = True
                        if join:
                            w[polyId].add(j)
                            if j not in w:
                                w[j] = set()
                            w[j].add(polyId)
                return w
            elif weight_type == 'ROOK':
                # check for a shared edge
                edgeCache = {}

                for polyId in polygon_ids:
                    if polyId not in edgeCache:
                        iEdges = {}
                        iVerts = shapes[polyId].vertices
                        nv = len(iVerts)
                        ne = nv - 1
                        for i in range(ne):
                            l = iVerts[i]
                            r = iVerts[i + 1]
                            iEdges[(l, r)] = []
                            iEdges[(r, l)] = []
                        edgeCache[polyId] = iEdges
                    nbrs = potential_neighbors[polyId]
                    if polyId not in mdict:
                        mdict[polyId] = []
                    for j in nbrs:
                        join = False
                        if j not in edgeCache:
                            jVerts = shapes[j].vertices
                            jEdges = {}
                            nv = len(jVerts)
                            ne = nv - 1
                            for e in range(ne):
                                l = jVerts[e]
                                r = jVerts[e + 1]
                                jEdges[(l, r)] = []
                                jEdges[(r, l)] = []
                            edgeCache[j] = jEdges
                        for edge in edgeCache[j]:
                            if edge in edgeCache[polyId]:
                                join = True
                                d = mdict[polyId]
                                d.append(j)
                                mdict[polyId] = d
                                if j not in mdict:
                                    mdict[j] = []
                                k = mdict[j]
                                k.append(polyId)
                                mdict[j] = k
                                break
            #Put the resultant dict back into the queue and alert that the work is done.
            resultq.put(mdict)
            q.task_done()
        return

    t6 = time.time()
    cores = mp.cpu_count()
    #print
    #print "Managed Queue"
    #cores = 2
    #Create a joinable queue from which to draw cells and a solution queue to get results
    ta = time.time()
    q = mp.JoinableQueue()
    resultq = mp.Queue()
    tb = time.time()
    #print "Made queues {}.".format(tb-ta)
    #Start up a number of child workers equal to the number of cores
    #This is a great way to manage a web service.
    jobs = [
        mp.Process(target=pcheck_joins2, args=(q, resultq))
        for x in range(cores)
    ]
    for job in jobs:
        job.start()
    tc = time.time()
    #print "Spawned processes {}".format(tc-tb)

    n = len(res['shapes'])
    starts = range(0, n, n / cores)
    ends = starts[1:]
    ends.append(n)
    offsets = [range(z[0], z[1]) for z in zip(starts, ends)]
    td = time.time()
    #print "Computing offsets {} ".format(td-tc)
    #Load the work into the queue
    #As the jobs are loaded, they start, so we avoid some of the packing overhead.
    for i in offsets:
        args = []
        args.append(res['potential_neighbors'])
        args.append(res['shapes'])
        args.append(i)
        #args.append(weight_type='Queen')
        q.put_nowait(args)
    te = time.time()
    #print "Putting work on queue: {}".format(te-td)
    #Load a poison pill into the queue to kill the children when work is done
    for i in range(cores):
        q.put_nowait(None)

    results = []
    for i in range(len(offsets)):
        results.append(resultq.get())
    t7 = time.time()
    #tf = time.time()
    #print "Getting work off queue, i.e. processing done {}".format(tf-te)

    ddict = defaultdict(set)
    for d in (results):
        for key, value in d.items():
            for v in value:
                ddict[key].add(v)
    tg = time.time()
    for job in jobs:
        job.join()
    #print "Joining results {}".format(tg-tf)
    t8 = time.time()
    for job in jobs:
        job.join()

    print "Joinable Queue Time: {0}".format(t8 - t6)
    print "Are the results the same? {0}".format(ddict == w)
Esempio n. 15
0
    apg_other.add_argument('--threads', type = int, metavar = 'N', help = 'number of threads (default: 2)', default = 2)
    apg_other.add_argument('--debug', type = int, help = 'print debug information; 0 = off, 1 = info, 2 = debug, 3 = details (default: 0)', default = 0)

    options = parser.parse_args()

    # check for required argument
    if options.bbox == None:
        parser.print_help()
        sys.exit()

    print ("Bounding Box: %s" % options.bbox)
    print ("Metasize: {}".format(options.metasize))
    print ("Zoom: {}-{}".format(options.zooms[0], options.zooms[1]))

    # setup queue to be used as a transfer pipeline from the render processes to the writer
    writerQueue = multiprocessing.JoinableQueue(options.metasize * options.metasize)

    # setup a lock for parts that only one process can execute (e.g. access the same file, print to screen)
    if MULTIPROCESSING:
      lock = multiprocessing.Lock()       # multiprocessing
    else:
      lock = threading.Lock()        # threading

    writer = WriterThread(options, writerQueue, lock)
    if MULTIPROCESSING:
      writer_thread = multiprocessing.Process(target = writer.loop) # multiprocessing
    else:
      writer_thread = threading.Thread(target = writer.loop)        # threading
    writer_thread.start()

    render_tiles(options.bbox, options.zooms, mapfile, options.metasize, writerQueue, lock, num_threads = options.threads, scale = 1.0, debug = options.debug)
Esempio n. 16
0
            horizontal_heatmap = np.reshape(horizontal_heatmap,
                                            (1, 32, 57, 28, 1))
            vertical_heatmap = np.reshape(vertical_heatmap, (1, 32, 37, 28, 1))
            model_input = ((horizontal_heatmap, vertical_heatmap), ())
            start = time.time()
            predictions = loaded_model.predict(model_input)
            end = time.time()
            print(end - start)
            nano_serv.send_data_queue.put(predictions)
            print('inference done')

        cross_process_signal.put('switch')


if __name__ == "__main__":

    cross_process_signal = mp.Queue()
    cross_process_data = mp.JoinableQueue()
    received_data_queue = Qthread.Queue(5)
    send_data_queue = Qthread.Queue(5)

    nano_serv = ns.Nano_Server(received_data_queue, send_data_queue)

    pid = os.fork()

    if (pid):
        time.sleep(30)
        red_blue_buffer(nano_serv, cross_process_signal, cross_process_data)
    else:
        inference_machine(nano_serv, cross_process_signal, cross_process_data)
Esempio n. 17
0
def render_tiles(bbox, zooms, mapfile, metasize, writer, lock, num_threads = NUM_THREADS, scale = 1, debug = 0):
    # setup queue to be used as a transfer pipeline to the render processes
    renderQueue = multiprocessing.JoinableQueue(32)

    print "Setting up maps. Please wait..."

    # Launch render processes
    renderers = {}
    for i in range(num_threads):
        renderer = RenderThread(writer, mapfile, renderQueue, lock, zooms[1])
        if MULTIPROCESSING:
          render_thread = multiprocessing.Process(target = renderer.loop)
        else:
          render_thread = threading.Thread(target = renderer.loop)
        render_thread.start()
        renderers[i] = render_thread

    # setup projection shortcuts
    gprj = GoogleProjection(zooms[1] + 1)
    LLtoPx = gprj.fromLLtoPixel

    # our map window to render
    ll0 = (bbox[0], bbox[3])
    ll1 = (bbox[2], bbox[1])

    # dimensions of map area for each zoom level ((left, top), (right, bottom))
    px = [[LLtoPx(ll0, z), LLtoPx(ll1, z)] for z in xrange(0, zooms[1] + 1)]

    # setup tile and metadata dictionarys (https://docs.python.org/2/tutorial/datastructures.html#dictionaries)
    tileData = {'sum': 0};  # holds information of all tiles
    metaData = {};          # holds information of all metatiles

    # iterate over all requested zoom levels
    for z in range(zooms[0], zooms[1] + 1):
      # setup nested dictionaries for this zoom level
      tileData[z] = {}
      metaData[z] = {}

      # compute how many tiles need to be rendered at current zoom level
      tileData[z]['cols'] = int(ceil((px[z][1][0] - px[z][0][0]) / TILE_SIZE))
      tileData[z]['rows'] = int(ceil((px[z][1][1] - px[z][0][1]) / TILE_SIZE))

      # number of tiles for this zoom level
      tileData[z]['sum'] = tileData[z]['cols'] * tileData[z]['rows']

      # update number of tiles overall
      tileData['sum'] += tileData[z]['sum']

      # determine optimal metatile size
      if tileData[z]['sum'] <= (metasize * metasize):
        # whole map at this zoom level fits into one metatile (does not need to be a square)
        metaData[z]['width'] = tileData[z]['cols']
        metaData[z]['height'] = tileData[z]['rows']
      else:
        if tileData[z]['cols'] <= tileData[z]['rows']:
          metaData[z]['width'] = min(metasize, tileData[z]['cols'])
          metaData[z]['height'] = int(floor(metasize * metasize / metaData[z]['width']))
        else:
          metaData[z]['height'] = min(metasize, tileData[z]['rows'])
          metaData[z]['width'] = int(floor(metasize * metasize / metaData[z]['height']))

      # amount of metatiles for this zoom level
      metaData[z]['sum'] = int(ceil(float(tileData[z]['sum']) / float(metaData[z]['width'] * metaData[z]['height'])))

      if debug >= 2:
        print "px at z=", z, ": ", px[z]
        print "tileData at z=", z, ": ", tileData[z]
        print "metaData at z=", z, ": ", metaData[z]
        print ""

    print "Tiles to render: ", tileData['sum'], "\n"

    # transfer tile count to writer thread
    item = (Command.sum, tileData['sum'], None, None, None)
    writer.put(item)

    # loop over tiles in every zoom level and render metatiles
    for z in range(zooms[0], zooms[1] + 1):

      # tiles are rendered from left to right beginning at the top left corner and ending at the bottom right corner
      for y in range(0, int(ceil(float(tileData[z]['rows']) / metaData[z]['height']))):

        # calculate height of current metatile (can be reduced at bottom/right border of map)
        # check if bottom edge of metatile exceeds overall number of tiles in this column
        if ((y + 1) * metaData[z]['height']) > tileData[z]['rows']:
          # yes, limit to max possible
          metaheight = min(metaData[z]['height'], max(0, tileData[z]['rows'] - y * metaData[z]['height']))
        else:
          # no, use full metatile height
          metaheight = metaData[z]['height']

        for x in range(0, int(ceil(float(tileData[z]['cols']) / metaData[z]['width']))):

          # calculate width of current metatile (can be reduced at bottom/right border of map)
          # check if right border of metatile exceeds overall tiles in this row
          if ((x + 1) * metaData[z]['width']) > tileData[z]['cols']:
            # yes, limit metatile dimensions to maximum possible
            metawidth = min(metaData[z]['width'], max(0, tileData[z]['cols'] - x * metaData[z]['width']))
          else:
            # no, use full width of metatile
            metawidth = metaData[z]['width']

          # calculate dimensions of current metatile in pixels
          left   = TILESIZE * (int(px[z][0][0] / TILE_SIZE) +  x * metaData[z]['width'])
          top    = TILESIZE * (int(px[z][0][1] / TILE_SIZE) +  y * metaData[z]['height'])
          right  = left + TILESIZE * metawidth
          bottom = top + TILE_SIZE * metaheight

          # create set of current metatile for the render queue
          metatile = (z, scale, (left, bottom), (right, top), metawidth, metaheight, debug)

          if debug >= 3:
            print "x=", x, " y=", y, " metawidth=", metawidth, "metaheight=", metaheight, " metatile=", metatile

          # add metatile to rendering queue
          renderQueue.put(metatile)

    # Signal render threads to exit by sending empty request to queue
    for i in range(num_threads):
        renderQueue.put(None)
    # wait for pending rendering jobs to complete
    renderQueue.join()
    for i in range(num_threads):
        renderers[i].join()
Esempio n. 18
0
def create_photometric_flatfield(
        filelist=None,
        input_hdus=None,
        strict_ota=False,
        smoothing=None,
        debug=False,
        return_interpolator=False,
        parallel=True,
        n_processes=-1,
):

    logger = logging.getLogger("PhotFlat")

    if (n_processes == 0):
        n_processes = multiprocessing.cpu_count()
    elif (n_processes < 0):
        n_processes = sitesetup.number_cpus

    if (smoothing is None):
        smoothing = 120.
    smoothing_pixels = smoothing / 0.11

    logger.info("Using PF smoothing length of %.1f arcsec" % (smoothing))

    pf = PhotFlatHandler(
        filelist=filelist,
        input_hdus=input_hdus
    )
    n_frames = len(filelist) if filelist is not None else 0
    n_hdus = len(input_hdus) if input_hdus is not None else 0
    logger.info("Computing photometric flatfield from %d disk-files and %d memory-files" % (
        n_frames, n_hdus)
    )
    # logger.info("Input files:\n-- %s" % ("\n-- ".join(filelist)))

    pf.read_catalogs()

    reference_pos = [4., -4.]
    # that's in arc-min relative to reference point from CRVAL1/2

    reference_zp = pf.get_reference_zeropoint(
        ra=reference_pos[0],
        dec=reference_pos[1],
        radius=3,
        relative_coords=True,
        max_error=0.05)
    logger.debug("Using reference ZP: %s" % (reference_zp))

    # reference_zp = {}
    # list_of_otas = []
    # list_of_extnames = []
    # for framename in pf.phot_frames:
    #     logger.info("Adding photometric data from file %s" % (framename))
    #     frame = pf.phot_frames[framename]
    #
    #     zps = frame.get_zeropoints(ra=reference_pos[0],
    #                                dec=reference_pos[1],
    #                                radius=3,
    #                                relative_coords=True, max_error=0.05)
    #     print zps
    #     reference_zp[framename] = numpy.median(zps)
    #
    #     # also collect a list of all available OTAs
    #     list_of_otas.extend(frame.get_ota_list())
    #     list_of_extnames.extend(frame.get_extname_list())
    #
    # print reference_zp

    unique_otas = pf.get_ota_set()
    unique_extnames = pf.get_extname_set()

        # set(list_of_otas)
    # print list_of_otas
    #
    # unique_extnames = set(list_of_extnames)
    # print unique_extnames

    #
    # Now extract the relative ZP differences for each of the sectors in each ota
    #
    sampling = 512
    otalist = [pyfits.PrimaryHDU()]
    running_sum = 0

    all_photflat = []
    all_photflat_err = []
    all_extnames = []
    if (parallel):

        logger.debug("Calculating photometric flatfield in parallel")
        # prepare jobs
        extname_queue = multiprocessing.JoinableQueue()
        for i, extname in enumerate(unique_extnames):
            extname_queue.put(extname)
        result_queue = multiprocessing.Queue()

        # start parallel execution in separate processes
        processes = []
        for i in range(n_processes):

            # start the process
            p = multiprocessing.Process(
                target=parallel_create_photometric_flatfields_worker,
                kwargs=dict(
                    input_queue=extname_queue,
                    result_queue=result_queue,
                    pf=pf,
                    reference_zp=reference_zp,
                    sampling=sampling,
                    smoothing=smoothing_pixels,
                )
            )
            # p.daemon = True
            p.start()
            processes.append(p)

            # also add a termination command to the job queue
            extname_queue.put(None)

        # Gather all results
        for _ in unique_extnames:
            (imghdu, photflat, photflat_err) = result_queue.get()
            otalist.append(imghdu)
            all_photflat.append(photflat)
            all_photflat_err.append(photflat_err)
            all_extnames.append(imghdu.name)

        logger.info("Received %d phot-flat extensions from parallel workers" % (len(otalist)-1))

    else:

        logger.debug("Using the serial approach towards the photometric flatfield")
        for i, extname in enumerate(unique_extnames):

            logger.info("Computing photometric flat-field for OTA %s (%2d of %2d)" % (extname, i+1, len(unique_otas)))

            imghdu, photflat, photflat_err = create_photometric_flatfield_single_ota(
                extname=extname,
                pf=pf,
                reference_zp=reference_zp,
                sampling=sampling,
                enlarge=enlarge,
            )
            otalist.append(imghdu)
            all_photflat.append(photflat)
            all_photflat_err.append(photflat_err)
            all_extnames.append(imghdu.name)

    logger.debug("Total sum of reference values: %d" % (running_sum))
    # break

    #
    # Calculate the mean and/or median level of the photflat across
    # the mean level
    #
    all_photflat = numpy.array(all_photflat)
    fluxcorr = numpy.power(10., 0.4*all_photflat)
    numpy.savetxt("photcorr", all_photflat.ravel())
    numpy.savetxt("flatcorr", fluxcorr.ravel())
    numpy.save("photcorr_npy", all_photflat)

    mean_level = numpy.nanmean(fluxcorr)
    mean_mag = numpy.nanmean(all_photflat)
    logger.info("Mean photometric flatfield level: %8.5f (delta-mag=%7.4f)" % (mean_level, mean_mag))


    import pickle
    fluxcorr /= mean_level
    pickle.dump((fluxcorr, all_extnames), open("photflat.pickle", "wb"))


    logger.debug("Correcting photometric flatfield mean level")
    for ota in otalist[1:]:
        ota.data /= mean_level
    logger.debug("Done correcting photometric flatfield mean level")

    hdulist = pyfits.HDUList(otalist)

    if (return_interpolator):
        return hdulist, (fluxcorr, all_extnames)

    return hdulist
import os
import req_proxy
from bs4 import BeautifulSoup
from urlparse import urlparse
import multiprocessing
import logging
import time
import os
from threading import Thread
import sys
from lxml import html 

logging.basicConfig(level=logging.DEBUG, format='(%(threadName)-10s) %(message)s',)

num_fetch_threads = 100
enclosure_queue = multiprocessing.JoinableQueue()




class pl_to_info(object):
   
   def __init__(self, line):
       line = str(line).strip()
       self.line_list = ast.literal_eval(line)
       f = open("to_extract_downloads")
       self.direc = f.read().strip()
       f.close()


Esempio n. 20
0
def prepare(mikado_config: MikadoConfiguration, logger):
    """Main script function.

    :param mikado_config: the ArgumentParser-derived namespace.
    :param logger: a logging instance
    :type logger: logging.Logger
    """

    if not hasattr(mikado_config.reference, "genome"):
        raise InvalidConfiguration(
            "Invalid configuration; reference: {}".format(mikado_config))

    if hasattr(mikado_config.reference.genome, "close"):
        mikado_config.reference.genome.close()
        if hasattr(mikado_config.reference.genome, "filename"):
            mikado_config.reference.genome = getattr(
                mikado_config.reference.genome, "filename")
        elif hasattr(mikado_config.reference.genome, "name"):
            mikado_config.reference.genome = getattr(
                mikado_config.reference.genome, "name")
        else:
            logger.critical("Invalid FASTA file: %s",
                            mikado_config.reference.genome)
            raise AttributeError
    elif not isinstance(mikado_config.reference.genome, (str, bytes)):
        logger.critical("Invalid FASTA file: %s",
                        mikado_config.reference.genome)
        raise AttributeError

    if not os.path.exists(mikado_config.reference.genome):
        error = "Invalid FASTA file: {}".format(mikado_config.reference.genome)
        logger.critical(error)
        raise AttributeError(error)

    assert len(mikado_config.prepare.files.gff) > 0
    assert len(mikado_config.prepare.files.gff) == len(
        mikado_config.prepare.files.labels), (
            mikado_config.prepare.files.gff,
            mikado_config.prepare.files.labels)

    if mikado_config.prepare.strand_specific is True:
        mikado_config.prepare.files.strand_specific_assemblies = mikado_config.prepare.files.gff[:]

    ref_len = len(mikado_config.prepare.files.reference)
    file_len = len(mikado_config.prepare.files.gff)
    if ref_len == 0:
        mikado_config.prepare.files.reference = ([False] * file_len)
    elif (ref_len != file_len) or (mikado_config.prepare.files.reference[0]
                                   not in (True, False)):
        ref_set = set(mikado_config.prepare.files.reference)
        mikado_config.prepare.files.reference = [
            (_ in ref_set) for _ in mikado_config.prepare.files.gff
        ]

    if not mikado_config.prepare.files.exclude_redundant:
        mikado_config.prepare.files.exclude_redundant = (
            [getattr(mikado_config, "exclude_redundant", False)] *
            len(mikado_config.prepare.files.gff))

    shelve_names = [
        path_join(mikado_config.prepare.files.output_dir,
                  "mikado_shelf_{}.db".format(str(_).zfill(5)))
        for _ in range(len(mikado_config.prepare.files.gff))
    ]

    logger.propagate = False
    if mikado_config.prepare.single is False and mikado_config.threads > 1:
        multiprocessing.set_start_method(mikado_config.multiprocessing_method,
                                         force=True)

        mikado_config.logging_queue = multiprocessing.JoinableQueue(-1)
        log_queue_handler = logging.handlers.QueueHandler(
            mikado_config.logging_queue)
        log_queue_handler.setLevel(logging.DEBUG)
        # logger.addHandler(log_queue_handler)
        mikado_config.tempdir = tempfile.TemporaryDirectory(
            dir=mikado_config.prepare.files.output_dir)
        mikado_config.listener = logging.handlers.QueueListener(
            mikado_config.logging_queue, logger)
        mikado_config.listener.propagate = False
        mikado_config.listener.start()

    mikado_config.prepare.files.out_fasta = open(
        path_join(mikado_config.prepare.files.output_dir,
                  mikado_config.prepare.files.out_fasta), 'w')
    mikado_config.prepare.files.out = open(
        path_join(mikado_config.prepare.files.output_dir,
                  mikado_config.prepare.files.out), 'w')

    logger.info("Output dir: %s. Output GTF: %s. Output Fasta: %s",
                mikado_config.prepare.files.output_dir,
                mikado_config.prepare.files.out.name,
                mikado_config.prepare.files.out_fasta.name)
    logger.info("Loading reference file")
    mikado_config.reference.genome = pysam.FastaFile(
        mikado_config.reference.genome)
    logger.info("Finished loading genome file")
    logger.info("Started loading exon lines")
    errored = False
    try:
        # chrom, start, end, strand, tid, write_start, write_length, shelf
        rows = load_exon_lines(
            mikado_config,
            shelve_names,
            logger,
            min_length=mikado_config.prepare.minimum_cdna_length,
            max_intron=mikado_config.prepare.max_intron_length,
        )

        logger.info("Finished loading exon lines")

        shelve_source_scores = []
        for label in mikado_config.prepare.files.labels:
            shelve_source_scores.append(
                mikado_config.prepare.files.source_score.get(label, 0))

        shelve_table = []

        for shelf, score, is_reference, exclude_redundant in zip(
                shelve_names, shelve_source_scores,
                mikado_config.prepare.files.reference,
                mikado_config.prepare.files.exclude_redundant):
            assert isinstance(is_reference, bool), \
                (is_reference, mikado_config.prepare.files.reference)
            shelve_table.append(
                (shelf, score, is_reference, exclude_redundant))

        shelve_table = pd.DataFrame(
            shelve_table,
            columns=["shelf", "score", "is_reference", "exclude_redundant"])

        rows = rows.merge(shelve_table, on="shelf", how="left")
        random.seed(mikado_config.seed)

        shelves = dict((shelf_name, open(shelf_name, "rb"))
                       for shelf_name in shelve_table["shelf"].unique())

        def divide_by_chrom():
            # chrom, start, end, strand, tid, write_start, write_length, shelf
            transcripts = rows.groupby(["chrom"])
            columns = rows.columns[1:]
            for chrom in sorted(transcripts.groups.keys()):
                logger.debug("Starting with %s (%d positions)", chrom,
                             transcripts.size()[chrom])

                yield from _analyse_chrom(chrom,
                                          rows.loc[transcripts.groups[chrom],
                                                   columns],
                                          shelves,
                                          logger=logger)

        perform_check(divide_by_chrom(), shelve_names, mikado_config, logger)
    except Exception as exc:
        # TODO: Consider using stderr to signal errors here too?
        logger.exception(exc)
        __cleanup(mikado_config, shelve_names)
        errored = True
        logger.error("Mikado has encountered an error, exiting")
        # sys.exit(1)

    if mikado_config.prepare.single is False and mikado_config.threads > 1:
        mikado_config.tempdir.cleanup()
        mikado_config.listener.enqueue_sentinel()

    logger.setLevel(logging.INFO)
    __cleanup(mikado_config, shelve_names)

    logger.addHandler(logging.StreamHandler())
    if errored is False:
        logger.info(
            "Mikado prepare has finished correctly with seed %s. The output %s FASTA file can now be "
            "used for BLASTX and/or ORF calling before the next step in the pipeline, `mikado serialise`.",
            mikado_config.seed, mikado_config.prepare.files.out_fasta)
        logging.shutdown()
    else:
        logger.error("Mikado prepare has encountered a fatal error. Please check the logs and, if there is a bug,"\
                     "report it to https://github.com/EI-CoreBioinformatics/mikado/issues")
        logging.shutdown()
        exit(1)
Esempio n. 21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-wd',
                        dest='wd',
                        help='full path to working directory',
                        default=-1)
    parser.add_argument('-d',
                        dest='DIR',
                        help='full path to prep_TF directory')
    parser.add_argument('-s', dest='samples', help='samples file')
    parser.add_argument('-i', dest='ID', help='unique id of this sample')
    parser.add_argument('-eb',
                        dest='exeBWA',
                        help='full path to bwa executable',
                        default="bwa")
    parser.add_argument('-es',
                        dest='exeSAM',
                        help='full path to samtools executable',
                        default="samtools")
    parser.add_argument('-l1',
                        dest='level',
                        help='level of hierarchy to guide initial search')
    parser.add_argument('-l2',
                        dest='cLevel',
                        help='level of hierarchy to cluster')
    parser.add_argument('-q',
                        dest='qual',
                        help='map quality threshold',
                        type=int)
    parser.add_argument(
        '-exclude',
        dest='exclude',
        help='newline separated list of te families to exclude from analysis',
        default=-1)
    parser.add_argument('-sd',
                        dest='stdev',
                        help='insert size standard deviation override',
                        type=int,
                        default=-1)
    parser.add_argument('-cov',
                        dest='cov',
                        help='manual coverage override',
                        type=int,
                        default=-1)
    parser.add_argument("-t",
                        dest="nProc",
                        type=int,
                        default=1,
                        help="Specify number of processes")
    args = parser.parse_args()

    # identify current working directory
    if args.wd == -1:
        cwd = os.getcwd()
    else:
        cwd = os.path.abspath(args.wd)

    # import options
    prep_TF = os.path.abspath(args.DIR)
    prefix = os.path.abspath(args.DIR).split("/")[-1].replace(".prep_TF", "")
    exeSAM = args.exeSAM
    exeBWA = args.exeBWA
    level = args.level
    cLevel = args.cLevel
    qual = args.qual
    nProc = args.nProc

    # check dependencies for function
    check_dependency(exeSAM)
    check_dependency(exeBWA)

    # import hierarchy
    hierFILE = os.path.join(prep_TF, prefix + ".hier")
    hierarchy, label = {}, []
    ct = 0
    with open(hierFILE, 'r') as fIN:
        for line in fIN:
            if ct == 0:
                label = line.split()[1:]
            else:
                hierarchy[line.split()[0]] = line.split()[1:]
            ct += 1
    bam, pre = "", ""
    with open(os.path.abspath(args.samples), "r") as fIN:
        for line in fIN:
            if line.split()[1] == args.ID:
                pre = line.split()[1]
                bam = line.split()[0]
    if pre == "" or bam == "":
        print "Warning: prefix in samples file different from path in options"
        sys.exit()

    # identify the group-name of all TEs for the specified level of the hierarchy
    groups = []
    groupIndex = label.index(level)
    for ID in hierarchy:
        groups.append(hierarchy[ID][groupIndex])
    groups = sorted(set(groups))

    # import the TE annotation
    annotation = []
    with open(os.path.join(prep_TF, prefix + ".te.pseudo.bed"), 'r') as fIN:
        for line in fIN:
            arr = line.split()
            annotation.append([arr[0], int(arr[1]), int(arr[2]), arr[3]])

    # import the chromosome lengths
    chromosomes, lengths = [], []
    genomeSizeFILE = os.path.join(prep_TF, prefix + ".genomeSize.txt")
    with open(genomeSizeFILE, 'r') as fIN:
        for line in fIN:
            arr = line.split()
            chromosomes.append(arr[0])
            lengths.append(int(arr[2]))

    # run samtools stats
    statsOutFile = bam.replace(".bam", ".stats.txt")
    print "Calculating alignment statistics"
    cmd = "%s stats -t %s %s" % (exeSAM, genomeSizeFILE, bam)
    print "cmd:", cmd
    p = sp.Popen(shlex.split(cmd),
                 stdout=open(statsOutFile, 'w'),
                 stderr=sp.PIPE)
    perr = p.communicate()[1]
    if p.returncode != 0:
        print "samtools stats issued error: %s" % (perr)
        sys.exit(1)

    # calculate coverage
    covFILE = bam.replace(".bam", ".cov.txt")
    cmd = """%s depth -Q %s %s | awk '{sum+=$3; sumsq+=$3*$3} END {print "Average = ",sum/NR; print "Stdev = ",sqrt(sumsq/NR - (sum/NR)**2)}' > %s""" % (
        exeSAM, str(qual), bam, covFILE)
    print "cmd:", cmd
    os.system(cmd)

    # read samtools stats file
    with open(statsOutFile, 'r') as fIN:
        for line in fIN:
            if 'average length' in line:
                readLen = int(float(line.split()[-1]))
            if 'insert size average' in line:
                insz = int(float(line.split()[-1]))
            if 'insert size standard deviation' in line:
                sd = int(float(line.split()[-1]))

    if args.stdev == -1:
        print "Insert size standard deviation estimated as %s. Use the override option if you suspect this is incorrect!" % (
            sd)
        if sd > 100:
            print "!!! Warning: insert size standard deviation reported as", sd, "!!!"
            print "Please ensure this is correct and use the override option!"
            sys.exit()
    else:
        sd = args.stdev

    # read coverage file
    cov = args.cov
    with open(covFILE, "r") as fIN:
        for line in fIN:
            if line.startswith("Av"):
                cov = int(float(line.split()[-1]))
    if cov == -1:
        print "Warning: coverage could not be estimated, enter coverage manually"
        sys.exit()

    # read list of TE groups to exclude from analysis
    if args.exclude == -1:
        excludeList = []
    else:
        excludeList = []
        with open(args.exclude, "r") as fIN:
            for line in fIN:
                excludeList.append(line.split()[0])

    # define and create subdirectories
    bedDir = os.path.join(cwd, pre + ".bed_files")
    samDir = os.path.join(cwd, pre + ".sam_files")
    posDir = os.path.join(cwd, pre + ".te_positions")
    suppDir = os.path.join(cwd, pre + ".supplemental_alignments")
    outDir = os.path.join(cwd, "countPos")

    mkdir_if_not_exist(bedDir, posDir, samDir, suppDir, outDir)
    groups = [group for group in groups if group not in excludeList]
    #groups= ["doc3"] #debug single family
    print "Groups to search:", groups

    print "\nwriting TE bed files..."
    for group in groups:
        #print "group:",group
        wb.write_bed_portal(hierarchy, label, group, level, bedDir)
    print "writing TE bed files completed!"

    # reduce search-space 1
    print "reducing search space..."
    try:
        bedFILE = os.path.join(bedDir, "mega_complete.bed")
        bamFILE = os.path.join(samDir, "mega_complete.bam")
        cmd = "%s view -@ %s -L %s %s -b" % (exeSAM, str(nProc), bedFILE, bam)
        print "cmd:", cmd
        p = sp.Popen(shlex.split(cmd),
                     stdout=open(bamFILE, 'w'),
                     stderr=sp.PIPE)
        perr = p.communicate()[
            1]  # communicate returns a tuple (stdout, stderr)
        #print perr
        if p.returncode != 0:
            print "Error running samtools: p.returncode =", p.returncode
            sys.exit(1)
    except OSError:
        print "Cannot run samtools"
    print "search space succesfully reduced..."
    print "new reduced bam file:", bamFILE

    # run multiprocess 2
    print "clustering TE positions..."
    task_q = mp.JoinableQueue()
    params = [
        annotation, bamFILE, chromosomes, exeSAM, hierarchy, insz, label,
        lengths, level, cLevel, qual, readLen, sd, cov, bedDir, samDir, posDir,
        suppDir
    ]
    create_proc2(nProc, task_q, params)
    assign_task(groups, task_q, nProc)
    try:
        task_q.join()
    except KeyboardInterrupt:
        print "KeyboardInterrupt"
        sys.exit(0)
    else:
        print "\nclustering TE positions completed!"

    # combine bed files from all groups
    with open(os.path.join(bedDir, "mega_clustered.bed"), "w") as fOUT:
        for group in groups:
            with open(os.path.join(bedDir, "%s_clustered.bed" % (group)),
                      "r") as fIN:
                for line in fIN:
                    fOUT.write(line)

    # reduce search-space 2
    print "final reduction of search space..."
    try:
        bedFILE = os.path.join(bedDir, "mega_clustered.bed")
        bamFILE = os.path.join(samDir, "mega_clustered.bam")
        cmd = "%s view -@ %s -q %s -L %s %s -b" % (exeSAM, str(nProc),
                                                   str(qual), bedFILE, bam)
        print "cmd:", cmd
        p = sp.Popen(shlex.split(cmd),
                     stdout=open(bamFILE, 'w'),
                     stderr=sp.PIPE)
        perr = p.communicate()[
            1]  # communicate returns a tuple (stdout, stderr)
        #print perr
        if p.returncode != 0:
            print "Error running samtools: p.returncode =", p.returncode
            sys.exit(1)
    except OSError:
        print "Cannot run samtools"
    print "search space succesfully reduced..."
    print "new reduced bam file:", bamFILE

    # run multiprocess 3
    print "estimating TE breakpoints..."
    bamFILE = os.path.join(samDir, "mega_clustered.bam")
    task_q = mp.JoinableQueue()
    params = [
        annotation, bamFILE, chromosomes, exeSAM, hierarchy, insz, label,
        lengths, level, cLevel, qual, readLen, sd, cov, bedDir, samDir, posDir,
        suppDir
    ]
    create_proc3(nProc, task_q, params)
    assign_task(groups, task_q, nProc)
    try:
        task_q.join()
    except KeyboardInterrupt:
        print "KeyboardInterrupt"
        sys.exit(0)
    else:
        print "\nestimating TE breakpoints completed!"

    # concatonate position estimates
    catFile = os.path.join(outDir, pre + ".all_positions.txt")
    try:
        files = ""
        for file in glob.glob(os.path.join(posDir, "*.txt")):
            files += file + " "
        cmd = "cat %s" % (files)
        #print "cmd:", cmd  #p = sp.Popen(shlex.split(cmd), stdout=open(catFile, 'w'), stderr=sp.PIPE)
        p = sp.Popen(shlex.split(cmd),
                     stdout=open(catFile, 'w'),
                     stderr=sp.PIPE)
        perr = p.communicate()[
            1]  # communicate returns a tuple (stdout, stderr)
        #print perr
        if p.returncode != 0:
            print "error concatenating positions"
            sys.exit(1)
    except OSError:
        print "Cannot concatenate positions"
        sys.exit(1)

    # sort position estimates
    print "Sorting positions..."
    sortp.sort_portal(catFile)

    # remove temporary directories
    shutil.rmtree(bedDir)
    shutil.rmtree(samDir)
    shutil.rmtree(posDir)
    shutil.rmtree(suppDir)

    print "TEFLON DISCOVERY FINISHED!"
Esempio n. 22
0
    FIXME: dynamically fetch & update the RIPE managed tree
    """
    def __init__(self, lookup_queue, result_queue):
        multiprocessing.Process.__init__(self)
        self.lookup_queue = lookup_queue
        self.result_queue = result_queue
        self.tree = radix.Radix()
        self.prefixes = []
        self.dbname = "RIPE-AUTH"
        self.ready_event = multiprocessing.Event()
        self.lookup = RIPELookupWorker(self.tree, self.prefixes,
                                       self.lookup_queue, self.result_queue)
        self.lookup.setDaemon(True)
        self.lookup.start()

    def run(self):
        print "INFO: loaded the RIPE managed tree"
        self.ready_event.set()  # yay


if __name__ == "__main__":
    lookup_queue = multiprocessing.JoinableQueue()
    result_queue = multiprocessing.JoinableQueue()

    a = RIPEWorker(lookup_queue, result_queue)
    a.start()
    a.ready_event.wait()
    lookup_queue.put(("is_covered", "194.33.96.0/24"))
    lookup_queue.join()
    print result_queue.get()
Esempio n. 23
0
    def __loadCache(self, file):
        mp = False
        nan = 0
        processes = []
        single = False
        cache_file = None
        try:
            temp = RopperService.CACHE_FOLDER
            cache_file = temp + os.path.sep + self.__getCacheFileName(file)

            if not os.path.exists(cache_file):
                if not os.path.exists(cache_file + '_%d' % 1):
                    return
                else:
                    if isWindows():
                        raise RopperError('Cache has to be cleared.')
                    mp = True and multiprocessing.cpu_count() > 1
            else:
                single = True
            if self.__callbacks and hasattr(self.__callbacks, '__message__'):
                self.__callbacks.__message__('Load gadgets from cache')
            if self.__callbacks and hasattr(self.__callbacks,
                                            '__gadgetSearchProgress__'):
                self.__callbacks.__gadgetSearchProgress__(None, [], 0)
            if not mp:
                all_gadgets = []
                if single:
                    with open(cache_file, 'rb') as f:
                        data = f.read()
                        all_gadgets.extend(eval(decode(data, 'zip')))
                        if self.__callbacks and hasattr(
                                self.__callbacks, '__gadgetSearchProgress__'):
                            self.__callbacks.__gadgetSearchProgress__(
                                None, all_gadgets, 1.0)
                else:
                    for i in range(1, RopperService.CACHE_FILE_COUNT + 1):
                        if os.path.exists(cache_file + '_%d' % i):
                            with open(cache_file + '_%d' % i, 'rb') as f:
                                data = f.read()
                                all_gadgets.extend(eval(decode(data, 'zip')))
                                if self.__callbacks and hasattr(
                                        self.__callbacks,
                                        '__gadgetSearchProgress__'):
                                    self.__callbacks.__gadgetSearchProgress__(
                                        None, all_gadgets,
                                        float(i) /
                                        RopperService.CACHE_FILE_COUNT)
                return all_gadgets

            else:
                count = min(multiprocessing.cpu_count(),
                            RopperService.CACHE_FILE_COUNT)

                gqueue = multiprocessing.Queue()
                fqueue = multiprocessing.JoinableQueue()
                for i in range(1, RopperService.CACHE_FILE_COUNT + 1):
                    fqueue.put(cache_file + '_%d' % i)
                all_gadgets = []
                for i in range(count):
                    p = multiprocessing.Process(
                        target=self.__loadCachePerProcess,
                        args=(fqueue, gqueue))
                    p.start()
                    processes.append(p)

                for i in range(count):
                    fqueue.put(None)

                for i in range(RopperService.CACHE_FILE_COUNT):
                    gadgets = gqueue.get()
                    all_gadgets.extend(gadgets)
                    if self.__callbacks and hasattr(
                            self.__callbacks, '__gadgetSearchProgress__'):
                        self.__callbacks.__gadgetSearchProgress__(
                            None, all_gadgets,
                            float(i + 1) / RopperService.CACHE_FILE_COUNT)

                return sorted(all_gadgets, key=Gadget.simpleInstructionString)
        except KeyboardInterrupt:
            if mp:
                for p in processes:
                    if p and p.is_alive():
                        p.terminate()
        except BaseException as e:
            if mp:
                for p in processes:
                    if p and p.is_alive():
                        p.terminate()
            if cache_file:
                for i in range(1, RopperService.CACHE_FILE_COUNT + 1):
                    if os.path.exists(cache_file + '_%d' % i):
                        os.remove(cache_file + '_%d' % i)
Esempio n. 24
0
import multiprocessing as mp


def washer(dishes, output):
    for dish in dishes:
        print('Washing', dish, 'dish')
        output.put(dish)


def dryer(input):
    while True:
        dish = input.get()
        print('Drying', dish, 'dish')
        input.task_done()


dish_queue = mp.JoinableQueue()
dryer_proc = mp.Process(target=dryer, args=(dish_queue,))
dryer_proc.daemon = True
dryer_proc.start()

dishes = ['salad', 'bread', 'entree', 'dessert']
washer(dishes, dish_queue)
dish_queue.join()
Esempio n. 25
0
def run_cmh(args):
	''' run Cochran-Mantel-Hasenzle test '''

	sz_utils.make_dirs_if_necessary(args.outp)
	allele_counts = {}
	pvals = {}
	tables = collections.defaultdict(list)
	ntests = 0
	tables, ntables_per_snp = sz_utils._count2table(args.table_file)
	ColorText().info("[poolseq_tk]: %d tables prepared\n" %(len(tables)), "stderr")

	task_q = mp.JoinableQueue()
	result_q = mp.Queue()
	create_procs(args.nproc,task_q, result_q, ntables_per_snp, args.outp)
	sz_utils._assign_tables(tables, task_q, args.nproc)

	# waiting for all tasks to be finished
	try:
		task_q.join()
	except KeyboardInterrupt:
		ColorText().info("[poolseq_tk]: Terminated unexpectedly by keyboard\n", "stderr")
		sys.exit()
	else:
		# merge results
		pvals, odds_ratios = {}, {}
		while args.nproc:
			file = result_q.get()
			with open(file, 'r') as fIN:
				for line in fIN:
					tmp_line = line.strip().split("\t")
					chr = tmp_line[0]
					pos = int(tmp_line[1])
					pval = float(tmp_line[2])
					odds_ratio = float(tmp_line[3])
					if (chr, pos) not in pvals:
						pvals[chr, pos] = pval
					if (chr, pos) not in odds_ratios:
						odds_ratios[chr, pos] = odds_ratio
			os.remove(file)
#			pvals_split, odds_ratios_split = result_q.get()
#			pvals.update(pvals_split)
#			odds_ratios.update(odds_ratios_split)
			args.nproc -= 1
		ColorText().info("[poolseq_tk]: Running CMH tests successfully\n", "stderr")

		# correcting raw p-values
		ColorText().info("[poolseq_tk]: multi-testing correction using %s method at %d%% level ..."
						 %(args.adj_method, args.adj_cutoff*100), "stderr")
		raw_pvals = [pvals[chr, pos] for chr, pos in sorted(pvals.iterkeys())]
		raw_pvals_vector = robjects.FloatVector(raw_pvals)
		padjust = robjects.r['p.adjust'](raw_pvals_vector, method=args.adj_method)
		ColorText().info(" [done]\n", "stderr")
		pcutoff = sz_utils.getFDR_BH(pvals, args.adj_cutoff)
		ColorText().info("[poolseq_tk]: p-value cutoff using Benjamini.Hochberg procedure %.5e"
						 %(pcutoff), "stderr")
		ColorText().info(" [done]\n", "stderr")

		# output p-values
		ColorText().info("[poolseq_tk]: output to files ...", "stderr")
		out_all = args.outp + ".cmh.all"
		out_fdr = args.outp + ".cmh.fdr%d" %(args.adj_cutoff*100)
		out_expect = args.outp + ".cmh.fdr%d.expect" %(args.adj_cutoff*100)
		sz_utils.make_dirs_if_necessary(out_all, out_fdr)
		with open(out_all, 'w') as fALL, \
			 open(out_fdr, 'w') as fFDR, \
			 open(out_expect, 'w') as fEXPECT:
			for i, k in enumerate(sorted(pvals.iterkeys())):
				chr = k[0]
				pos = k[1]
				raw_pval = pvals[chr, pos]
				log_pval = None
				if raw_pval == 0.0:
					log_pval = "Inf"
				elif raw_pval == "Nan":
					raw_pval = 1.0
					log_pval = 0.0
				else:
					log_pval = -1 * math.log10(raw_pval)
				odds_ratio = odds_ratios[k]
				if padjust[i] <= args.adj_cutoff:
					sz_utils._results_outputter(fFDR, pos, chr, "\t".join(tables[chr, pos][1:3]), tables[chr, pos][3:], raw_pval, log_pval, padjust[i], odds_ratio)
					if ((args.oddsr_direction == "greater" and odds_ratios[chr, pos] > 1) or
						(args.oddsr_direction == "less" and odds_ratios[chr, pos] < 1)):
						sz_utils._results_outputter(fEXPECT, pos, chr, "\t".join(tables[chr, pos][1:3]), tables[chr, pos][3:], raw_pval, log_pval, padjust[i], odds_ratio)
				sz_utils._results_outputter(fALL, pos, chr, "\t".join(tables[chr, pos][1:3]), tables[chr, pos][3:], raw_pval, log_pval, padjust[i], odds_ratio)
		ColorText().info(" [done]\n", "stderr")
		ColorText().info("[poolseq_tk]: Program finishes successfully\n", "stderr")
Esempio n. 26
0
def appSearchMP(dbfilenameFullPath, searchType, search_space, options):
    (outputFile, maxCores) = (options.outputFile, options.maxCores)
    known_bad_data = None
    # Start timer
    t0 = time.time()

    # If possible use the available indexes
    if searchType == 'LITERAL' and options.searchLiteral[0][0] not in [
            '=', '>', '<'
    ] and (search_space.lower() == 'filename'
           or search_space.lower() == 'filepath'):
        num_hits = namedtuple('hits', 'value')
        num_hits_suppressed = namedtuple('hits', 'value')
        (num_hits.value, num_hits_suppressed.value,
         results) = runIndexedSearch(dbfilenameFullPath, search_space, options)

    else:
        # Get total number of entries to search
        DB = appDB.DBClass(dbfilenameFullPath, True, settings.__version__)
        conn = DB.appConnectDB()
        entriesCount = DB.CountEntries()
        logger.debug("Total entries in search space: %d" % entriesCount)

        # Pre-load known_bad if required
        if searchType == 'KNOWNBAD':
            known_bad_data = LoadRegexBulkSearch(options.knownbad_file)

        # Establish communication queues
        tasks = multiprocessing.JoinableQueue()
        resultsProducers = multiprocessing.Queue()
        resultsConsumers = multiprocessing.Queue()
        hitHistogram_queue = multiprocessing.Queue()

        # Start producers/consumers
        num_consumers = 1
        num_producers = max(1, maxCores - 1)

        # Prep lock for progress update Producers
        progProducers = multiprocessing.Value('i', 0)
        # Prep lock for progress update Consumers
        progConsumers = multiprocessing.Value('i', 0)
        # Prep Consumers return values
        num_hits = multiprocessing.Value('i', 0)
        num_hits_suppressed = multiprocessing.Value('i', 0)

        logger.debug(
            'Using %d cores for searching / %d cores for dumping results' %
            (num_producers, num_consumers))

        # Queue tasks for Producers
        # Limit rowsPerJob to constrain memory use and ensure reasonable progress updates
        rowsPerJob = min((entriesCount / 8), 5000)
        logger.debug("RowsPerJob: %d" % rowsPerJob)
        num_tasks = 0
        for startingRowID in range(0, entriesCount - rowsPerJob, rowsPerJob):
            tasks.put(Task(startingRowID, rowsPerJob - 1))
            logger.debug(
                "Creating search job %d: [%d - %d]" %
                (num_tasks, startingRowID, startingRowID + rowsPerJob - 1))
            num_tasks += 1
        logger.debug("Creating search job %d: [%d - %d]" %
                     (num_tasks, num_tasks * (rowsPerJob),
                      ((num_tasks * rowsPerJob) +
                       (entriesCount - (num_tasks * (rowsPerJob) - 1)))))
        # Special consideration for the last one:
        tasks.put(
            Task(num_tasks * (rowsPerJob),
                 (entriesCount - ((num_tasks * rowsPerJob) - 1))))
        logger.debug("Number of tasks: %d" % num_tasks)

        # Add a poison pill for each producer
        for i in xrange(num_producers):
            tasks.put(None)

        # Start producer threads
        producers = [Producer(tasks, resultsProducers, dbfilenameFullPath, progProducers, num_consumers, \
                              searchType, search_space, options, num_hits, known_bad_data) for i in xrange(num_producers)]
        for producer in producers:
            producer.daemon = True  # Remove for debugging
            producer.start()

        # Start consumer threads
        consumers = [Consumer(resultsProducers, resultsConsumers, progConsumers, num_producers, outputFile, \
                              dbfilenameFullPath, searchType, search_space, options, num_hits, \
                              num_hits_suppressed, hitHistogram_queue, known_bad_data) for i in xrange(num_consumers)]
        for consumer in consumers:
            consumer.daemon = True  # Remove for debugging
            consumer.start()

        # Producer progress loop
        while (num_tasks > progProducers.value and progProducers.value >= 0):
            logger.debug("Producer num_tasks: %d - v.value: %d" %
                         (num_tasks, progProducers.value))
            update_progress(
                min(1,
                    float(progProducers.value) / float(num_tasks)),
                "Searching [%d]" %
                (num_hits.value - num_hits_suppressed.value))
            time.sleep(0.5)
        update_progress(
            1, "Searching [%d]" % (num_hits.value - num_hits_suppressed.value))

        # Wait for consumers dumping results to finish too
        while (num_hits.value > progConsumers.value
               and progConsumers.value >= 0):
            logger.debug("Consuming hit: %d / %d" %
                         (progConsumers.value, num_hits.value))
            update_progress(
                min(1,
                    float(progConsumers.value) / float(num_hits.value)),
                "Dumping results to disk [%d]" % progConsumers.value)
            time.sleep(0.5)

        # Make sure we dumped as many hits as we found
        assert (num_hits.value == progConsumers.value)
        update_progress(1,
                        "Dumping results to disk [%d]" % progConsumers.value)

        # Track Consumers deaths
        logger.debug("Waiting for consumer reverse-poison pills")
        while num_consumers > 0:
            tmp = resultsConsumers.get()
            # Check for reverse-poison pill
            if tmp is None:
                num_consumers -= 1
                logger.debug("Consumer finished!")
        logger.debug("All consumers accounted for")

        # Wait for consumer threads to finish
        logger.debug("Waiting for consumer threads to finish")
        for consumer in consumers:
            consumer.join()
        logger.debug("Consumer threads finished")

        # Print hit histogram:
        results = []
        results.append(('cyan', ("Hit histogram:", "", "")))
        while not hitHistogram_queue.empty():
            (name, regex, regex_hits) = hitHistogram_queue.get()
            results.append(('white', (name, regex, regex_hits)))
        if len(results) > 1:
            outputcolum(results)

    # Stop timer
    t1 = time.time()

    logger.info("Search hits: %d" % num_hits.value)
    logger.info("Suppresed duplicate hits: %d" % num_hits_suppressed.value)
    logger.info("Search time: %s" % (str(timedelta(seconds=(t1 - t0)))))

    if num_hits.value:
        logger.info("Head:")
        # Dump head of output file:
        num_lines = file_len(options.outputFile)
        from itertools import islice
        with open(options.outputFile) as myfile:
            head = list(islice(myfile, 5))
        for line in head:
            logger.info(line.strip('\n\r'))
        logger.info("(%d lines suppressed)" % max(0, (num_lines - 5)))

    return (num_hits.value, num_hits_suppressed.value, results)
Esempio n. 27
0
def export_interpolated_data(path,
                             X,
                             Y,
                             Z,
                             fesvar,
                             vdim=1,
                             complex=False,
                             nproc=1,
                             ncfile='data.nc',
                             curl=False,
                             return_mask=False):

    from netCDF4 import Dataset

    results = mp.JoinableQueue()
    workers = [None] * nproc

    for i in range(nproc):
        w = exporter_child(results,
                           i,
                           nproc,
                           path,
                           X,
                           Y,
                           Z,
                           fesvar,
                           vdim,
                           complex,
                           curl=curl)
        workers[i] = w
        time.sleep(0.1)
    for w in workers:
        w.daemon = True
        w.start()

    res = [results.get() for x in range(len(workers))]
    for x in range(len(workers)):
        results.task_done()

    size = len(X.flatten())
    if complex:
        ans = np.zeros((vdim, size), dtype=np.complex)
    else:
        ans = np.zeros((vdim, size), dtype=np.float)

    mask = np.zeros(len(X.flatten()), dtype=int) - 1
    for idx, mm, dd in res:
        if mm is None:
            print(dd)
            assert False, "Child Process Failed"
        else:
            if idx.size == 0: continue
            print("here", idx.shape, dd.shape)
            if vdim == 1:
                ans[idx] = dd
            else:
                ans[:, idx] = dd
            mask[idx] = mm

    ans = ans.reshape(-1, X.shape[0], X.shape[1], X.shape[2])
    mask = mask.reshape(X.shape[0], X.shape[1], X.shape[2])

    if ncfile != '':
        nc = Dataset(ncfile, "w", format='NETCDF4')
        nc.createDimension('vdim', vdim)
        nc.createDimension('dim_0', X.shape[0])
        nc.createDimension('dim_1', X.shape[1])
        nc.createDimension('dim_2', X.shape[2])
        if complex:
            a_real = nc.createVariable(fesvar + '_real', np.dtype('double'),
                                       ('vdim', 'dim_0', 'dim_1', 'dim_2'))
            a_real[:] = ans.real

            a_imag = nc.createVariable(fesvar + '_imag', np.dtype('double'),
                                       ('vdim', 'dim_0', 'dim_1', 'dim_2'))
            a_imag[:] = ans.imag
        else:
            a_real = nc.createVariable(fesvar, np.dtype('double'),
                                       ('vdim', 'dim_0', 'dim_1', 'dim_2'))
            a_real[:] = ans

        xx = nc.createVariable('X', np.dtype('double'),
                               ('dim_0', 'dim_1', 'dim_2'))
        yy = nc.createVariable('Y', np.dtype('double'),
                               ('dim_0', 'dim_1', 'dim_2'))
        zz = nc.createVariable('Z', np.dtype('double'),
                               ('dim_0', 'dim_1', 'dim_2'))
        rank = nc.createVariable('rank', np.dtype('double'),
                                 ('dim_0', 'dim_1', 'dim_2'))

        xx[:] = X
        yy[:] = Y
        zz[:] = Z
        rank[:] = mask

        nc.close()

    if return_mask:
        return ans, mask
    else:
        return ans
Esempio n. 28
0
        json_files = []
        for file_name in sorted(os.listdir(INPUT_JSON_DIR)):
            file_path = os.path.abspath(os.path.join(INPUT_JSON_DIR,
                                                     file_name))
            json_files.append(file_path)
    if args.input_avg_mq_json is not None:
        json_avg_mq_files = [args.input_avg_mq_json]
    else:
        json_avg_mq_files = []
        for file_name in sorted(os.listdir(INPUT_JSON_AVG_MQ_DIR)):
            file_path = os.path.abspath(
                os.path.join(INPUT_JSON_AVG_MQ_DIR, file_name))
            json_avg_mq_files.append(file_path)

    multiprocessing.set_start_method('spawn')
    queue1 = multiprocessing.JoinableQueue()
    queue2 = multiprocessing.JoinableQueue()
    num_files = len(newick_files)
    cpus = set_num_cpus(num_files, args.processes)
    # Set a timeout for get()s in the queue.
    timeout = 0.05

    for i, newick_file in enumerate(newick_files):
        json_file = json_files[i]
        json_avg_mq_file = json_avg_mq_files[i]
        queue1.put((newick_file, json_file, json_avg_mq_file))

    # Complete the preprocess_tables task.
    processes = [
        multiprocessing.Process(target=preprocess_tables,
                                args=(
Esempio n. 29
0
        self.a = a
        self.b = b

    def __call__(self):
        time.sleep(.1)  # pretend to take time to do the work
        return f'{self.a} * {self.b} = {self.a * self.b}'

    def __str__(self):
        return f'{self.a} * {self.b}'

    pass


if __name__ == "__main__":
    ### establishes communicate queues.
    tasks = multiprocessing.JoinableQueue()
    results = multiprocessing.Queue()

    ### starts consumers
    num_consumers = multiprocessing.cpu_count() * 2
    logging.debug(f'creating {num_consumers} consumers')
    consumers = [Consumer(tasks, results) for _ in range(num_consumers)]
    for w in consumers:
        w.start()

    ### Enqueues jobs
    num_jobs = 10
    for i in range(num_jobs):
        tasks.put(Task(i, i))
    ### adds poison pill for each consumer
    for _ in range(num_consumers):
Esempio n. 30
0
def genCM(n_mc_numbers, processes=0):
    '''Uses multiple cores to try generate [n_mc_numbers] carmichael numbers
    using [processes] cores (processes = 0: auto detection of num cores)
    basic idea for multicore implementation from this site:
    http://www.doughellmann.com/PyMOTW/multiprocessing/communication.html#multiprocessing-queues
    '''

    # Establish communication queues
    tasks = multiprocessing.JoinableQueue()
    results = multiprocessing.Queue()

    # Start workers
    if processes == 0:
        if multiprocessing.cpu_count() == 1:  # single processor machine
            num_workers = 1
        else:
            num_workers = multiprocessing.cpu_count(
            ) - 1  # give one to the os...
    else:
        num_workers = processes

    print '\n---------------------------------------------------------------------'
    print 'Starting Carmichael Number generation'
    print '---------------------------------------------------------------------'
    print '\n                             ...creating %d workers\n' % num_workers
    workers = [Worker(tasks, results) for i in xrange(num_workers)]
    for w in workers:
        w.start()

    loop_cnt = 0
    taskcnt = 0
    aborted = False

    print 'Searching for', n_mc_numbers, 'Carmichael Numbers...'

    while results.qsize(
    ) < n_mc_numbers:  # check from time to time whether there's a result or empty queue
        if taskcnt > 20000:  # abort anyways after cnt checked numbers
            aborted = True  #implement handle for this later
            break
        if tasks.qsize(
        ) < num_workers * 2:  # fill up queue if it's running low
            for i in range(num_workers):
                tasks.put(Task_cm_check(
                    taskcnt, taskcnt +
                    1))  # each task is checking a number, with offset 1..
                if taskcnt % 1000 == 0:
                    print '   ... checking range: ', taskcnt + 1, '-', taskcnt + 1000
                taskcnt += 1
        loop_cnt += 1
        #sleep(0.1) # suspend loop, free up some cpu time for workers, don't do this here, workers are waiting for jobs else...

    # Add a poison pill for each Worker
    print '\nsending kill signal to processes....'
    for i in xrange(num_workers):
        tasks.put(None)
    print '                                    ...done'

    # calm down (let some old tasks finish running)
    print 'cleaning up processes and queues...'
    while tasks.qsize() > 0:
        sleep(0.5)
    print '                                    ...done'

    # Wait for all of the tasks to finish
    print 'waiting for processes to shutdown....'
    tasks.join()
    print '                                    ...done'

    print 'extracting results...'
    r = []
    while results.qsize():
        r.append(results.get(True, 0.1))  # trow away additional solutions
    print '                                    ...done'

    print '\n\n------------------------------------------------'
    print ' Final Result: '
    print '------------------------------------------------'
    for i, ele in enumerate(r):
        print 'CM Numer #:', i + 1, 'is:', ele
    print '------------------------------------------------'
    print 'stats:'
    print '   spawned tasks:', taskcnt
    print '   loop counter :', loop_cnt
    print '------------------------------------------------'

    return True