Exemple #1
0
def multi_validate_rows(rows, col_size):
    n_cores = 4
    print('N_CORES', n_cores)
 
    pool = Pool(n_cores)
    chunks = ((rows[i::n_cores], col_size) for i in range(n_cores))
    pool.imap(validate_rows, chunks)
    pool.close()
    pool.join()
Exemple #2
0
def repackage_revisions(revisions, revision_map, verify_run, staging_dir,
                        context, quit_event=None, progress_event=None):
  """Repackages all Chrome builds listed in revisions.

  This function calls 'repackage_single_revision' with multithreading pool.
  """
  p = Pool(3)
  func = partial(repackage_single_revision, revision_map, verify_run,
                 staging_dir, context)
  p.imap(func, revisions)
  p.close()
  p.join()
Exemple #3
0
def send(accounts, message):
    
    num = 0
    pool = Pool(processes=cpu_count()*2)

    for data in accounts :
        
        email, password = data['email'], data['pass']
        proxy, num = proxies[num], num + 1
        cookie = "cookies/" + str(data['email']) + "_cookie"
        
        pool.imap(do_send, [(email, password, message, proxy, cookie)])
        
    pool.close()
Exemple #4
0
def build_condensed_matrix(seqs, mode=2):
    result = np.array([], dtype=default_dtype)
    p = Pool(processes=cpu_count())
    if mode == 1:
        n = len(seqs)
        #chunksize = 500000
        chunksize = int(n * (n - 1) / 2 / cpu_count() / 2)
        result_one = p.imap(get_score, make_iter(seqs, mode=1), chunksize=chunksize)
        result = np.array(list(result_one), dtype=default_dtype)
    else:
        result_one_row = p.imap(get_scores_one_row, make_iter(seqs, mode=2), chunksize=100)
        result = np.concatenate(list(result_one_row))
    #p.close()
    #p.join()
    return result
def translate_concurrent(input_object, url, weights=None, num_processes=8):

    pool = Pool(processes=num_processes)
    text_args = [(line, weights, url) for line in input_object]

    for translated_line in pool.imap(translate_single_line, text_args):
        print translated_line
def main(data, total):
	global all_headlines
	global inv
	with open(INV_DOC_COUNTS) as inf:
		inv = json.load(inf)
	all_headlines = get_headlines(data)
	pool = Pool(1)
	counter = 0
	out_data = []
	for article, possible_headlines in pool.imap(assign_headline_tfidf_total, data[:3]):
		print counter, article["headline"], possible_headlines[0]
		counter += 1
		article["top_tfidf"] = possible_headlines
		out_data += [article]

	with open(RESULT_FILE,'w') as outf:
		json.dump(out_data, outf)

	num_correct = 0
	num_incorrect = 0
	incrt = []
	for article in out_data:
		if article["headline"] == article["tf_idf_prediction"][0][0]:
			num_correct += 1
		else:
			num_incorrect += 1
			incrt += [article]
	print "Num correct: %i" % num_correct
	print "Num incorrect: %i" % num_incorrect

	with open(OUT_INCORRECT,'w') as outf:
		json.dump(incrt, outf)
def main(opts):
    """The main loop of the module, do the renaming in parallel etc."""
    log = logging.getLogger("exif2timestream")
    setup_logs(opts)
    # beginneth the actual main loop
    start_time = time()
    cameras = parse_camera_config_csv(opts["-c"])
    n_images = 0
    for camera in cameras:
        msg = "Processing experiment {}, location {}\n".format(
            camera[FIELDS["expt"]],
            camera[FIELDS["location"]],
        )
        msg += "Images are coming from {}, being put in {}".format(
            camera[FIELDS["source"]],
            camera[FIELDS["destination"]],
        )
        print(msg)
        log.info(msg)
        for ext, images in find_image_files(camera).iteritems():
            images = sorted(images)
            n_cam_images = len(images)
            print("{0} {1} images from this camera".format(n_cam_images, ext))
            log.info("Have {0} {1} images from this camera".format(
                n_cam_images, ext))
            n_images += n_cam_images
            last_date = None
            subsec = 0
            count = 0
            # TODO: sort out the whole subsecond clusterfuck
            if "-1" in opts and opts["-1"]:
                log.info("Using 1 process (What is this? F*****g 1990?)")
                for image in images:
                    count += 1
                    print("Processed {: 5d} Images".format(count), end='\r')
                    process_image((image, camera, ext))
            else:
                from multiprocessing import Pool, cpu_count
                if "-t" in opts and opts["-t"] is not None:
                    try:
                        threads = int(opts["-t"])
                    except ValueError:
                        threads = cpu_count() - 1
                else:
                    threads = cpu_count() - 1
                # Ensure that we're using at least one thread
                threads = max(threads, 1)
                log.info("Using {0:d} processes".format(threads))
                # set the function's camera-wide arguments
                args = zip(images, cycle([camera]), cycle([ext]))
                pool = Pool(threads)
                for _ in pool.imap(process_image, args):
                    count += 1
                    print("Processed {: 5d} Images".format(count), end='\r')
                pool.close()
                pool.join()
            print("Processed {: 5d} Images. Finished this cam!".format(count))
    secs_taken = time() - start_time
    print("\nProcessed a total of {0} images in {1:.2f} seconds".format(
          n_images, secs_taken))
class SimStream(object):
  def __init__(self, target_dir, configs, super_seed, copy_op=move, num_workers=1):
    self.template = get_config_template()
    self.work_dir = tempfile.mkdtemp(prefix='craysim')

    self.seed_stream = seed_stream(super_seed)
    self.pool = Pool(num_workers, maxtasksperchild=1)

    config_stream = izip(
      configs, repeat(self.work_dir), self.seed_stream, repeat(self.template)
    )

    self.result_stream = self.pool.imap(sim_worker, config_stream, chunksize=1)
    self.copy_op = copy_op
    self.target_dir = target_dir

  def stream(self):
    for config, stdout, stderr, workspace, output_path in self.result_stream:
      if output_path is None:
        raise Exception(stdout + '\n\n' + stderr)

      try:
        yield self.copy_op(self.target_dir, config, stdout, stderr)
      except Exception as e:
        import traceback
        import warnings

        warnings.warn(str(config))
        traceback.print_exc()

  def clean(self):
    import shutil as sh
    sh.rmtree(self.work_dir)
Exemple #9
0
def main(out):
    out.write(('P4\n%d %d\n' % (size, size)).encode('ASCII'))

    pool = Pool()
    step = 2.0j / size
    for row in pool.imap(do_row, (step*y-(1.5+1j) for y in range(size))):
        out.write(row)
Exemple #10
0
def main():

    parser = ArgumentParser(description="Speed up your SHA. A different hash style.")
    parser.add_argument("-1", "--sha1", action="store_true")
    parser.add_argument("-2", "--sha224", action="store_true")
    parser.add_argument("-3", "--sha256", action="store_true")
    parser.add_argument("-4", "--sha384", action="store_true")
    parser.add_argument("-5", "--sha512", action="store_true")
    parser.add_argument("-f", "--file", type=str, help="The path to the file")

    if len(sys.argv) == 1:
        parser.print_help()
        return

    global args
    args = parser.parse_args()

    hashtree = ""

    big_file = open(args.file, "rb")
    pool = Pool(multiprocessing.cpu_count())

    for chunk_hash in pool.imap(hashing, chunks(big_file)):
        hashtree = hashtree + chunk_hash

    pool.terminate()

    if os.path.getsize(args.file) < 20971520:
        print(hashtree)
    else:
        print(str(hashing(hashtree)))
Exemple #11
0
def main_internal(args, name='mxsniff'):
    """
    Console script

    >>> main_internal(['*****@*****.**'])
    [email protected]: google-gmail
    """
    import argparse
    import json
    from multiprocessing import Pool

    parser = argparse.ArgumentParser(
        prog=name,
        description='Identify email service providers given an email address, URL or domain name',
        fromfile_prefix_chars='@')
    parser.add_argument('names', metavar='email_or_url', nargs='+',
        help="email or URL to look up; use @filename to load from a file")
    parser.add_argument('-v', '--verbose', action='store_true',
        help="show both provider name and mail server names")
    parser.add_argument('-i', '--ignore-errors', action='store_true',
        help="ignore DNS lookup errors and continue with next item")
    args = parser.parse_args(args)

    pool = Pool(processes=10)
    it = pool.imap(multiprocess_mxsniff, args.names, 10)
    try:
        for result in it:
            if args.verbose:
                print(json.dumps(result)) + ','
            else:
                print("{item}: {provider}".format(item=result['query'], provider=', '.join(result['match'])))
    except KeyboardInterrupt:
        pool.terminate()
Exemple #12
0
 def process(self):
     
     try:
         urls = redis_one.hkeys(self.sitemap_prefix)
         ofh = open('test_urls.txt', 'w+')
         urls.sort()
         ofh.write(('\n'.join(urls)).encode('utf8', 'ignore'))
         logger.error('total urls len %s' % len(urls))
         dict_res = defaultdict(int)
         i = 0
         while i <= len(urls):
             pool = Pool(processes=15)
             q = Queue()
             dict_subres = defaultdict(int)
             list_urls = [urls[i + j * 10000:i+(j+1)*10000] for j in range(15)]
             #list_dict_res = list(pool.map_async(parse_content, list_urls))
             for d in pool.imap(parse_content, list_urls):
                 for k, v in d.iteritems():
                     dict_res[k] += v
             logger.error('Parser %s %s' % (len(list_urls), len(dict_res)))
             i += 10000 * 15
         sorted_dict_res = sorted(dict_res.iteritems(), key = lambda s: s[1], reverse=True)
         ofh = open('./test_sitemap_keywords', 'w+')
         ofh.write('\n'.join(['%s\t%s' % (k,v) for (k,v) in sorted_dict_res if v>=3]).encode('utf8', 'ignore'))
         ofh.close()
     except:
         logger.error(traceback.format_exc())
Exemple #13
0
def y():
    pool = Pool(2)
    x, y = ({}, {})
    x = numpy.array([2,3])
    y = numpy.array([-1,3])
    for a in pool.imap(change, [conv_str(x), conv_str(y)]):
        print a
Exemple #14
0
def main():
    seq = stdin.read()
    ilen = len(seq)

    seq = sub('>.*\n|\n', '', seq)
    clen = len(seq)

    pool = Pool(initializer = init, initargs = (seq,))

    variants = (
          'agggtaaa|tttaccct',
          '[cgt]gggtaaa|tttaccc[acg]',
          'a[act]ggtaaa|tttacc[agt]t',
          'ag[act]gtaaa|tttac[agt]ct',
          'agg[act]taaa|ttta[agt]cct',
          'aggg[acg]aaa|ttt[cgt]ccct',
          'agggt[cgt]aa|tt[acg]accct',
          'agggta[cgt]a|t[acg]taccct',
          'agggtaa[cgt]|[acg]ttaccct')
    for f in zip(variants, pool.imap(var_find, variants)):
        print(f[0], f[1])

    subst = {
          'B' : '(c|g|t)', 'D' : '(a|g|t)',   'H' : '(a|c|t)', 'K' : '(g|t)',
          'M' : '(a|c)',   'N' : '(a|c|g|t)', 'R' : '(a|g)',   'S' : '(c|g)',
          'V' : '(a|c|g)', 'W' : '(a|t)',     'Y' : '(c|t)'}
    for f, r in list(subst.items()):
        seq = sub(f, r, seq)

    print()
    print(ilen)
    print(clen)
    print(len(seq))
Exemple #15
0
def parmap(f,problems,leavefree=1,debug=False,verbose=False):
    global mypool
    problems = list(problems)
    njobs    = len(problems)

    if njobs==0:
        if verbose: print('NOTHING TO DO?')
        return []

    if not debug and (not 'mypool' in globals() or mypool is None):
        if verbose: print('NO POOL FOUND. RESTARTING.')
        mypool = Pool(cpu_count()-leavefree)

    enumerator = map(f,problems) if debug else mypool.imap(f,problems)
    results = {}
    sys.stdout.write('\n')
    for i,result in enumerator:
        sys.stdout.write('\rdone %0.1f%% '%((i+1)*100./njobs))
        sys.stdout.flush()
        if isinstance(result,tuple) and len(result)==1:
            result=result[0]
        results[i]=result
        if verbose and type(result) is RuntimeError:
            print('ERROR PROCESSING',problems[i])

    sys.stdout.write('\r            \r')
    return [results[i] if i in results else None \
        for i,k in enumerate(problems)]
Exemple #16
0
def parmap_dict(f,problems,leavefree=1,debug=False,verbose=False):
    global mypool
    problems = list(problems)
    njobs    = len(problems)

    if njobs==0:
        if verbose: print('NOTHING TO DO?')
        return []

    if not debug and (not 'mypool' in globals() or mypool is None):
        if verbose: print('NO POOL FOUND. RESTARTING.')
        mypool = Pool(cpu_count()-leavefree)

    enumerator = map(f,problems) if debug else mypool.imap(f,problems)
    results = {}
    sys.stdout.write('\n')
    for key,result in enumerator:
        if isinstance(result,tuple) and len(result)==1:
            result=result[0]
        results[key]=result
        if verbose and type(result) is RuntimeError:
            print('ERROR PROCESSING',problems[i])

    sys.stdout.write('\r            \r')
    
    results = {key:results[key] for key in problems if key in results and not results[key] is None}
    return results
Exemple #17
0
def newest_snapshot(project_id, hosts=None, timeout=20):
    """
    Return most recent snapshot or empty string if none.

    If host is a single ip address, return newest snapshot on that host.

    If hosts is a list of ip addresses (or hostnames),
    returns a dictionary with keys the entries in hosts
    and the values the names of the newest snapshots.
    Hosts that don't respond within timeout seconds are
    ignored.
    """
    if not isinstance(hosts, list):
        return _newest_snapshot(project_id, hosts)

    pool = Pool(processes=len(hosts))
    start = time.time()
    x = pool.imap(mp_newest_snapshot, [(project_id, dest) for dest in hosts])
    result = []
    while True:
        try:
            t = timeout - (start-time.time())
            if t > 0:
                result.append(x.next(t))
            else:
                raise TimeoutError
        except TimeoutError, mesg:
            log.info("timed out connecting to some destination -- %s", mesg)
            pool.terminate()
            break
        except StopIteration:
            break
Exemple #18
0
def process(dataset, result):
  """
  For each unknown_author, calculate the distance between his AR
  and the IR of each known_author. Then find the true place of
  unknown_author. Save the true place and distance value into @result.
  """
  global AR_TYPE
  if AR_TYPE.startswith('fixed'):
    ar_authors = loader.get_fixed_authors()
  else:
    ar_authors = dataset.authors[0:40]

  tups = []
  for unknown in ar_authors:
    tups.append((unknown, dataset))

  pool = Pool(processes=NUMBER_OF_CORES)
  it = pool.imap(process_distance_unknown, tups)
  pool.close()
  pool.join()

  for unknown in ar_authors:
    distance_results = it.next()
    for distance_result in distance_results:
      [ar_size, position, distance] = distance_result
      result.add(ar_size, unknown, position, distance)
  return
def subsample(cache_dir, image_sets, ipython_profile):
    parameters = [(cache_dir, images) for images in image_sets]

    if ipython_profile:
        from IPython.parallel import Client, LoadBalancedView
        client = Client(profile='lsf')
        lview = client.load_balanced_view()
        generator = lview.imap(_compute_group_subsample, parameters)
    elif ipython_profile == False:
        generator = (_compute_group_subsample(p) for p in parameters)
    else:
        from multiprocessing import Pool
        lview = Pool()
        generator = lview.imap(_compute_group_subsample, parameters)
    progress = progressbar.ProgressBar(widgets=['Subsampling:',
                                                progressbar.Percentage(), ' ',
                                                progressbar.Bar(), ' ', 
                                                progressbar.Counter(), '/', 
                                                str(len(parameters)), ' ',
                                                progressbar.ETA()],
                                       maxval=len(parameters))
    results = list(generator)

    subsample = []
    for i, (p, r) in enumerate(zip(parameters, results)):
        if r is None:
            print >>sys.stderr, '#### There was an error, recomputing locally: %s' % parameters[i][1]
            results[i] = _compute_group_subsample(p) # just to see throw the exception
        subsample.extend(r)

    print "the subsampling set contains %d items" % len(subsample)
    return subsample
def run():
    H = np.random.rand(2,300)
    W = np.random.rand(3000, 300)

    t = time()
    C1 = cdist(H, W, "sqeuclidean")
    print time() - t

    print "done 1"

    t = time()
    k = cpu_count()
    N = H.shape[0]
    idxs = np.array_split(np.arange(N), k*10)
    jobs = [(H[ix], W, ix) for ix in idxs]

    p = Pool(k)
    C1p = np.empty((N, W.shape[0]))
#    for h, w, ix in jobs:
#        C1p[ix] = cdist(h, w, "sqeuclidean")
    t2 = time()
    for h0, ix in p.imap(f, jobs):
        C1p[ix] = h0
    print time() - t2
    p.close()
    print time() - t
    assert np.allclose(C1, C1p)
Exemple #21
0
    def find_words(self):
        """ Run all words through find_word using Pool.map """

        if not all((self.number, self.wordlist, self.combos)):
            raise ValueError('Must have a number, a wordlist, and combos!')

        # TODO: Reduce memory footprint and waste on this whole operation.
        def format_results(resultsets):
            """ format final results """
            if resultsets:
                resultsfmt = {}
                for resultset in resultsets:
                    if resultset:
                        resultsfmt.update(resultset)
                return resultsfmt
            return {}

        # setup a pool of processes/workers.
        pool = Pool(processes=self.processes)

        # map find_word to the wordlist, and format final results.
        rawresult = pool.imap(
            self.find_word,
            self.wordlist,
            chunksize=self.chunksize)
        results = format_results(rawresult)

        return results, self.totallen
def main():
	starttime = datetime.now()
	concatenate = False
	parser = argparse.ArgumentParser(description="This program will run \
KaKs_Calculator on a directory.")
	parser.add_argument("-i", help = "Path to input file.")
	parser.add_argument("-o", help = "Path to output file.")
	parser.add_argument("-m", default = "NG", help = "Method for calculating Ka/Ks.") 
	parser.add_argument("-t", type = int, default = 1, help = "Number of threads.")
	# Parse arguments and assign to variables
	args = parser.parse_args()
	indir = args.i
	if indir[-1] != "/":
		indir += "/"
	outdir = args.o
	if outdir != "/":
		outdir += "/"
	method = args.m
	cpu = args.t
	if cpu > MAXCPU:
		cpu = MAXCPU
	# Call Ka/Ks_Calculator in parallel.
	genes = glob(indir + "*.axt")
	l = int(len(genes))
	pool = Pool(processes = cpu)
	func = partial(calculateKaKs, indir, outdir, method)
	print("\tRunning KaKs_Caclulator with", str(cpu), "threads....")
	rcml = pool.imap(func, genes)
	pool.close()
	pool.join()
	# Compile output
	compileKsKs(outdir)
	print("\tKaKs_Calculator runtime: ", datetime.now() - starttime)
def main(formula_list):
    formulas = open(formula_list).read().split("\n")[:MAX_NUMBER]
    try:
        os.mkdir(IMAGE_DIR)
    except OSError as e:
        pass  # except because throws OSError if dir exists
    print("Turning formulas into images...")
    pool = Pool(THREADS)
    names = list(pool.imap(formula_to_image, formulas))

    zipped = list(zip(formulas, names))

    new_dataset_lines = []
    new_formulas = []
    ctr = 0
    for formula in zipped:
        if formula[1] is None:
            continue
        for rendering_setup in formula[1]:
            new_dataset_lines.append(str(ctr) + " " + " ".join(rendering_setup))
        new_formulas.append(formula[0])
        ctr += 1

    with open(NEW_FORMULA_FILE, "w") as f:
        f.write("\n".join(new_formulas))

    with open(DATASET_FILE, "w") as f:
        f.write("\n".join(new_dataset_lines))
Exemple #24
0
def extract_new_dataframes(dirs):
    pool = Pool(8)
    pbar = tqdm.tqdm(total=len(dirs))
    for job in pool.imap(extract_dataframe_subdir, dirs):
        pbar.update(1)
    pbar.close()
    pool.close()
Exemple #25
0
def process_fasta_file(fasta_file, out_file, num_processes):

    pool = Pool(processes = num_processes)

    outgen = pool.imap(SubmitELMServer, fasta_iter(fasta_file), chunksize=2*num_processes)

    with open(out_file, 'w') as handle:
        writer = csv.writer(handle, delimiter = '\t')
        writer.writerow(['Header', 'ELM', 'Start', 'End', 'Match'])
        for name, html in outgen:
            if html:
                try:
                    out = ReadData(html)
                except:
                    continue

                logging.warning('%s had %i matches' % (name, len(out)))
                for elm, pos in out:
                    try:
                        outrow = [name, elm] + extract_numbers(pos[0]) + [pos[1]]
                    except:
                        continue
                    writer.writerow(outrow)
            else:
                logging.warning('%s had no ELMs' % name)
Exemple #26
0
def calc_mv_classifier(clf, scorer, regions=None, processes=7, method='sequential'):
    import os.path as path
    from tempfile import mkdtemp

    n_regions = clf.data.shape[0]
    if regions is None:
        regions = range(0, n_regions)

    if processes != 1:
        from multiprocessing import Pool
        pool = Pool(processes=processes)
    else:
        pool = itertools

    pb = tools.ProgressBar(len(regions), start=True)

    filename = path.join(mkdtemp(), 'data.dat')
    data = np.memmap(filename, dtype='object', mode='w+', shape=clf.comp_dims)
    data[:] = clf.data[:]

    overall_results = []
    for result in pool.imap(calc_mv_parallel_classifier, itertools.izip(itertools.repeat((filename, clf.classifier, scorer,
                                                                                          clf.comp_dims, clf.feature_importances, np.array(clf.feature_names), method)), regions)):
        pb.next()
        for row in result:
            overall_results.append(row)

    overall_results = pd.DataFrame(
        overall_results, columns=['score', 'num_features', 'region', 'feature'])
    overall_results.region += 1
    return overall_results
Exemple #27
0
    def fit_parallel(self, X, num_workers=4):
        import gc
        gc.collect()
        pool = Pool(num_workers, maxtasksperchild=2)

        share = min(int(2e5), math.ceil(len(X)/num_workers))
        # share = int(1e5)
        tagger = GrammarTagger()

        num_parts = math.ceil(len(X)/share)
        x_gen = (X[i*share:i*share+share] for i in range(num_parts))

        # delegate work to all available processes
        i  = 0
        for result in pool.imap(Processor(tagger, self.tagtype), x_gen):
            tag_results, base_struct_results = result
            for base_struct, count in base_struct_results.items():
                self.base_structures[base_struct] += count
                self.counter += count
            for tag, terminals in tag_results.items():
                for string, count in terminals.items():
                    self.tag_dicts[tag][string] += count
            i += 1
            log.info("Processed {}/{} result batches...".format(i, num_parts))


        log.info("Fitting completed.")
Exemple #28
0
def get_valid_fragments(G, stoich_rank):
    #reactions, complexes = bipartite.sets(G)
    complexes, reactions = bipartite.sets(G)

    complexes = list(complexes)
    reactions = list(reactions)

    if 'w1' not in complexes and 'w1' not in reactions:
        raise Exception('my hack to resolve this unexpected behavior shown by bipartite.sets assumes that reaction nodes are named \'w1\', \'w2\', ...')
    
    if 'w1' in complexes:
        complexes, reactions = reactions, complexes

    if not ('w1' in reactions and 's1' in complexes):
        raise Exception('Something went wrong generating the lists of complexes of reactions.')

    complex_perms = list(it.combinations(complexes,stoich_rank))
    reaction_perms = list(it.combinations_with_replacement(reactions,stoich_rank))
    fragments = list(it.product(complex_perms, reaction_perms))

    valid_fragments = []
    
    pool = Pool()
    chunksize = 100

    myval = functools.partial(validate_fragments, G, stoich_rank)
    
    fragment_list = pool.imap(myval, fragments, chunksize)
    valid_fragments = [f for f in fragment_list if f is not None]

    return get_unique_fragments(valid_fragments)
Exemple #29
0
def main():

    parser = ArgumentParser(description="Speed up your SHA. A different hash style.")
    parser.add_argument('-1', '--sha1', action='store_true')
    parser.add_argument('-2', '--sha224', action='store_true')
    parser.add_argument('-3', '--sha256', action='store_true')
    parser.add_argument('-4', '--sha384', action='store_true')
    parser.add_argument('-5', '--sha512', action='store_true')
    parser.add_argument('-f', '--file', type=str, help="The path to the file")

    if len(sys.argv) == 1:
        parser.print_help()
        return

    global args
    args = parser.parse_args()

    hashtree = ''

    big_file = open(args.file, 'rb')
    pool = Pool(multiprocessing.cpu_count())

    for chunk_hash in pool.imap(hashing, chunks(big_file)):
        hashtree += chunk_hash + ":hash"

    pool.terminate()

    print(str(hashing(hashtree.encode('ascii'))))
Exemple #30
0
def save_make_pseudo_data(
        pred_data_dir='/data/pneumo_log/val_1/2019_0815_1742/submission/snapshot_model_2/',
        zero_max=0.005,
        one_min=0.8,
        cpu_num=16,
        test_base_path='/data/pneumo/dicom-images-test/',
        test_data=True):
    '''
    save pseudo label as dictionary {'img':, 'mask'} under pred_data_dir+'/pseudo/'
    This can be applied to train data (fold) too. set test_data=False
    '''

    if test_data:
        save_path = pred_data_dir + '/pseudo/'
    else:
        save_path = pred_data_dir + '/pseudo_train_fold/'
    data_prep._make_dir(save_path)
    print('start to make pseudo label under {}'.format(save_path))
    pred_data_path_list = glob(pred_data_dir + '/*.npy')

    p = Pool(processes=cpu_num)
    job_args = [(pred_data_path, save_path, zero_max, one_min, test_base_path,
                 test_data) for pred_data_path in pred_data_path_list]
    list(tqdm(p.imap(_wrap_save_pseudo_label, job_args), total=len(job_args)))
Exemple #31
0
def parse_nl_data(path, outpath):
    Path(outpath).mkdir(parents=True, exist_ok=True)
    pool = Pool(cpu_count())
    total_files = sum(1 for _ in glob.glob("{}/*.json".format(path)))
    for part in range(total_files):
        with open('{}/split-{:03d}.json'.format(path, part),
                  'r',
                  encoding='utf-8') as f:
            data = [json.loads(line.strip()) for line in f]

        results = []
        with tqdm(total=len(data), desc='Processing') as pbar:
            for i, ex in enumerate(pool.imap(process_chunk, data, 1000)):
                pbar.update()
                tokens = ex.split()
                if len(tokens) > 10:
                    results.append(' '.join(tokens))

        if part == total_files - 1:
            with open('{}/test.description.txt'.format(outpath),
                      'w',
                      encoding='utf-8') as fw:
                fw.write('\n'.join(results[:10000]))
            with open('{}/valid.description.txt'.format(outpath),
                      'w',
                      encoding='utf-8') as fw:
                fw.write('\n'.join(results[10000:20000]))
            with open('{}/train.{}.description.txt'.format(outpath, part),
                      'w',
                      encoding='utf-8') as fw:
                fw.write('\n'.join(results[20000:]))
        else:
            with open('{}/train.{}.description.txt'.format(outpath, part),
                      'w',
                      encoding='utf-8') as fw:
                fw.write('\n'.join(results))
Exemple #32
0
def main(data_path: str = None):
    '''
    Download quarterly and base data from https://finance.yahoo.com

    Parameters
    ----------
    data_path:
        path to folder in which downloaded data will be stored.
        OR ``None`` (downloading path will be as ``yahoo_data_path`` from 
        `~/.ml_investment/config.json`
    '''
    if data_path is None:
        config = load_config()
        data_path = config['yahoo_data_path']

    global _data_path
    _data_path = data_path
    tickers = load_tickers()['base_us_stocks']
    os.makedirs('{}/quarterly'.format(data_path), exist_ok=True)
    os.makedirs('{}/base'.format(data_path), exist_ok=True)

    p = Pool(12)
    for _ in tqdm(p.imap(_single_ticker_download, tickers)):
        None
Exemple #33
0
def cross_validate(answersets, labels, cfier_factory, num_rounds=num_rounds):
    """
    Perform num_rounds-fold cross-validation of the model, returning
    the list of test scores in each fold.
    """

    # Do not pass cv_data as parameters as that'll create a separate copy
    # for each sub-process, dramatically increasing memory improvements;
    # 16GB RAM is not enough for 8-thread cross-validation on large2180.
    global _g_cv_data
    _g_cv_data = (answersets, labels, cfier_factory)

    processes = os.environ.get('ANSWERTRAIN_N_THREADS',
                               os.environ.get('YODAQA_N_THREADS', None))
    if processes is not None: processes = int(processes)
    pool = Pool(processes=processes)

    scores = []
    for res in pool.imap(cross_validate_one, range(num_rounds)):
        print('// (test) ' + test_msg(*res))
        scores.append(list(res))
    pool.close()

    return np.array(scores)
Exemple #34
0
 def _f(self, w):
     # it turned out that it doesn't pay off to evaluate the function
     # in separate processes, so we turn it off
     if False:  # self.multicore:
         likelihood = 0
         pool = Pool()
         try:
             for i, (f_, d_) in enumerate(
                     pool.imap(
                         with_tracing(_methodcaller('_f',
                                                    sideeffects=True)),
                         [(l, w) for l in self.learners])):
                 self.learners[i].__dict__ = d_
                 likelihood += f_
         except Exception as e:
             logger.error('Error in child process. Terminating pool...')
             pool.close()
             raise e
         finally:
             pool.terminate()
             pool.join()
         return likelihood
     else:
         return sum([l._f(w) for l in self.learners])
Exemple #35
0
    def calculate(self, data_loader, info_df: pd.DataFrame) -> pd.DataFrame:
        '''     
        Interface to calculate targets for dates and tickers in info_df
        based on data from data_loader
        
        Parameters
        ----------
        data_loader:
            class implements load_quarterly_data(tickers: List[str]) -> 
                                                 pd.DataFrame interface
        info_df:
            pd.DataFrame containing information of tickers and dates
            to calculate targets for. Should have columns: ["ticker", "date"].               
                      
        Returns
        -------
            pd.DataFrame with targets having 'y' column
        '''
        self._data_loader = data_loader
        grouped = info_df.groupby('ticker')['date'].apply(
            lambda x: x.tolist()).reset_index()
        params = [(ticker, dates) for ticker, dates in grouped.values]

        n_jobs = 10
        p = Pool(n_jobs)
        result = []
        for ticker_result in tqdm(p.imap(self._single_ticker_target, params)):
            result.append(ticker_result)

        result = pd.concat(result, axis=0)
        result = result.drop_duplicates(['ticker', 'date'])
        result = pd.merge(info_df, result, on=['ticker', 'date'], how='left')
        result = result.set_index(['ticker', 'date'])
        result = result.infer_objects()

        return result
  def create_image_thumbs(self):
    '''
    Create output thumbs in 32px, 64px, and 128px
    '''
    print(' * creating image thumbs')
    resize_args = []
    n_thumbs = len(self.image_files)
    for c, j in enumerate(self.image_files):
      sizes = []
      out_paths = []
      for i in sorted(self.sizes, key=int, reverse=True):
        out_dir = join(self.output_dir, 'thumbs', str(i) + 'px')
        out_path = join( out_dir, get_filename(j) + '.png' )
        if os.path.exists(out_path) and not self.rewrite_image_thumbs:
          continue
        sizes.append(i)
        out_paths.append(out_path)
      if len(sizes) > 0:
        resize_args.append([j, c, n_thumbs, sizes, out_paths])

    pool = Pool()
    for result in pool.imap(resize_thumb, resize_args):
      if result:
        self.errored_images.add( get_filename(result) )
    def remove_incomplete_flows(self):
        print('\nStarted removing incomplete flows.')
        pool = Pool(self.num_worker)

        num_cases = np.max(self.csv_file['Case_ID']) + 1
        print('\tFound number of cases.')
        case_ids = range(num_cases)
        cases = []
        for i in case_ids:
            cases.append(
                self.csv_file.loc[self.csv_file['Case_ID'] == i].values)
        print('\tSeparated cases.')

        sub_cases = []
        k, m = divmod(num_cases, self.num_worker)
        for i in range(self.num_worker):
            head = i * k
            tail = head + k

            if i == self.num_worker - 1:
                tail = num_cases

            sub_cases.append(cases[head:tail])

        processed = []

        print('\tBuilt subsets.')

        for chunk in pool.imap(Functions.fun_remove_incomplete, sub_cases):
            processed.append(np.concatenate(chunk, axis=0))

        processed = np.concatenate(processed, axis=0)
        self.csv_file.loc[:, :] = processed
        self.csv_file = self.csv_file[self.csv_file['Flags'] != 'Bad']

        print('Removed incomplete flows.')
Exemple #38
0
    def _preprocess_docs_odin(self, texts, vocabulary, keep_order):
        # ====== main processing ====== #
        def initializer(filters, preprocessors, lang, lemma, charlevel,
                        stopwords):
            globals()['__preprocessors'] = preprocessors
            globals()['__filters'] = filters
            globals()['__lang'] = lang
            globals()['__lemma'] = lemma
            globals()['__charlevel'] = charlevel
            globals()['__stopwords'] = stopwords

        # add the index for ordering
        nb_docs = 0
        pool = Pool(processes=self.nb_processors,
                    initializer=initializer,
                    initargs=(self.filters, self.preprocessors, self.language,
                              self.lemmatization, self.char_level,
                              self.stopwords))
        # return the tokenized documents as original order.
        if keep_order:
            it = pool.imap(func=_preprocess_func,
                           iterable=texts,
                           chunksize=self.batch_size)
        # don't care about the order, often used for fitting
        else:
            it = pool.imap_unordered(func=_preprocess_func,
                                     iterable=texts,
                                     chunksize=self.batch_size)
        # iterate over each return document
        for doc in it:
            nb_docs += 1
            if vocabulary is not None:
                doc = [token for token in doc if token in vocabulary]
            yield nb_docs, doc
        pool.close()
        pool.join()
Exemple #39
0
def main(args):

    import sys
    import os
    sys.path.append(
        os.path.normpath(
            os.path.join(os.path.dirname(__file__), '..', 'helpers')))
    # how to search for all ground truth
    searchFine = os.path.join(args.datadir, "gtFine", "*", "*",
                              "*_gt*_polygons.json")

    # search files
    filesFine = glob.glob(searchFine)
    filesFine.sort()

    files = filesFine

    if not files:
        tqdm.writeError("Did not find any files. Please consult the README.")

    # a bit verbose
    tqdm.write("Processing {} annotation files".format(len(files)))

    # iterate through files
    progress = 0
    tqdm.write("Progress: {:>3} %".format(progress * 100 / len(files)),
               end=' ')

    from multiprocessing import Pool
    import time

    pool = Pool(args.num_workers)
    # results = pool.map(process_pred_gt_pair, pairs)
    results = list(tqdm(pool.imap(process_folder, files), total=len(files)))
    pool.close()
    pool.join()
Exemple #40
0
    def eval_on_dev(self):

        t = time.time()
        print >> logs, "garbage collection...",
        gc.collect()
        print >> logs, "took %.1f seconds" % (time.time() - t)

        Parser.debuglevel = 0
        if FLAGS.multi != 1:
            ncpus = FLAGS.multi
            print >> logs, "using %d CPUs for eval... chunksize=%d" % (
                ncpus, len(self.devchunks[0]))
            tot = self.decoder.evalclass()

            pool = Pool(processes=ncpus)

            for sub in pool.imap(self.eval_worker, self.devchunks,
                                 chunksize=1):
                tot += sub
        else:
            tot = self.eval_worker(self.devlines)

        Parser.debuglevel = FLAGS.debuglevel  # restore
        return tot
def main(formula_list):
    formulas = open(formula_list).read().split("\n")[:MAX_NUMBER]
    try:
        os.mkdir(IMAGE_DIR)
    except OSError as e:
        pass  #except because throws OSError if dir exists
    print("Turning formulas into images...")

    # Running a thread pool masks debug output. Uncomment command below to run
    # formulas over images sequentially to see debug errors more clearly

    # names = [formula_to_image(formula) for formula in formulas]

    # Also remember to comment threaded version if you use sequential:
    pool = Pool(THREADS)
    names = list(pool.imap(formula_to_image, formulas))

    zipped = list(zip(formulas, names))

    new_dataset_lines = []
    new_formulas = []
    ctr = 0
    for formula in zipped:
        if formula[1] is None:
            continue
        for rendering_setup in formula[1]:
            new_dataset_lines.append(
                str(ctr) + " " + " ".join(rendering_setup))
        new_formulas.append(formula[0])
        ctr += 1

    with open(NEW_FORMULA_FILE, "w") as f:
        f.write("\n".join(new_formulas))

    with open(DATASET_FILE, "w") as f:
        f.write("\n".join(new_dataset_lines))
Exemple #42
0
def main():
    n_proc = 50
    #blensor_result_path = args.br_path
    root_path = 'data/scannet'
    gt_dir = "gtFine"
    img_dir = "leftImg8bit"
    imglists = "imglists"
    dirs = [gt_dir, img_dir, imglists]
    splits = [0.5, 0.3, 0.2]  # train/val/test
    splits = np.cumsum(splits)
    splits_dict = {0: "train", 1: "val", 2: "test"}

    check_mkdir(root_path)
    for i in range(len(dirs)):
        d = os.path.join(root_path, dirs[i])
        check_mkdir(d)
        for spl in splits_dict.values():
            check_mkdir(os.path.join(d, spl))
    ids_ = os.listdir(blensor_result_path)  # eg. scene0041_01

    l = multiprocessing.Lock()
    pool = Pool(n_proc, initializer=init_pool, initargs=(l, ))

    files = {}
    for spl in splits_dict.values():
        files[spl] = open("%s/%s/%s.lst" % (root_path, imglists, spl), 'w')

    len_ids = len(ids_)
    workers = {}
    for spl in splits_dict.values():
        workers['train'] = lambda i: worker(i, spl, img_dir, gt_dir, files)
    pool.imap(worker, ids_[:splits[0] * len_ids])
    pool.imap(worker,
              ids_[splits[0] * len_ids:(splits[0] + splits[1]) * len_ids])
    pool.imap(worker, ids_[(splits[0] + splits[1]) * len_ids:])
    pool.close()
    pool.join()
Exemple #43
0
    def represent(self, molecules):
        """
        provides coulomb matrix representation for input molecules.

        Parameters
        ----------
        molecules : chemml.chem.Molecule object or array
            If list, it must be a list of chemml.chem.Molecule objects, otherwise we raise a ValueError.
            In addition, all the molecule objects must provide the XYZ information. Please make sure the XYZ geometry has been
            stored or optimized in advance.

        Returns
        -------
        features : Pandas DataFrame
            A data frame with same number of rows as number of molecules will be returned.
            The exact shape of the dataframe depends on the type of CM as follows:
                - shape of Unsorted_Matrix (UM): (n_molecules, max_n_atoms**2)
                - shape of Unsorted_Triangular (UT): (n_molecules, max_n_atoms*(max_n_atoms+1)/2)
                - shape of eigenspectrums (E): (n_molecules, max_n_atoms)
                - shape of Sorted_Coulomb (SC): (n_molecules, max_n_atoms*(max_n_atoms+1)/2)
                - shape of Random_Coulomb (RC): (n_molecules, nPerm * max_n_atoms * (max_n_atoms+1)/2)
        """
        # check input molecules
        if isinstance(molecules, (list, np.ndarray)):
            molecules = np.array(molecules)
        elif isinstance(molecules, Molecule):
            molecules = np.array([molecules])
        else:
            msg = "The molecule must be a chemml.chem.Molecule object or a list of objects."
            raise ValueError(msg)

        if molecules.ndim > 1:
            msg = "The molecule must be a chemml.chem.Molecule object or a list of objects."
            raise ValueError(msg)

        self.n_molecules_ = molecules.shape[0]

        # max number of atoms based on the list of molecules
        if self.max_n_atoms_ == 'auto':
            try:
                self.max_n_atoms_ = max(
                    [m.xyz.atomic_numbers.shape[0] for m in molecules])
            except:
                msg = "The xyz representation of molecules is not available."
                raise ValueError(msg)

        # pool of processes
        if self.n_jobs == -1:
            self.n_jobs = cpu_count()
        pool = Pool(processes=self.n_jobs)

        # Create an iterator
        # http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
        def chunks(l, n):
            """Yield successive n-sized chunks from l."""
            for i in range(0, len(l), n):
                yield l[i:i + n]

        # find size of each batch
        batch_size = int(len(molecules) / self.n_jobs)
        if batch_size == 0:
            batch_size = 1

        molecule_chunks = chunks(molecules, batch_size)

        # MAP: CM in parallel
        map_function = partial(self._represent)
        if self.verbose:
            print('featurizing molecules in batches of %i ...' % batch_size)
            pbar = Progbar(len(molecules), width=50)
            tensor_list = []
            for tensors in pool.imap(map_function, molecule_chunks):
                pbar.add(len(tensors[0]))
                tensor_list.append(tensors)
            print('Merging batch features ...    ', end='')
        else:
            tensor_list = pool.map(map_function, molecule_chunks)
        if self.verbose:
            print('[DONE]')

        # REDUCE: Concatenate the obtained tensors
        pool.close()
        pool.join()
        return pd.concat(tensor_list, axis=0, ignore_index=True)
Exemple #44
0
import subprocess
from multiprocessing import Pool


def runACO(runid):
    result = subprocess.check_output(['python3', 'RunACOExperiments.py'])
    return str(runid) + "," + result.decode('utf-8')


if __name__ == "__main__":

    runs = 25
    p = Pool(runs)
    outputs = p.imap(runACO, range(runs))
    for output in outputs:
        print(output)
    print("Loading model...")
    model = VGG16(weights=None, pooling=pool, include_top=False)
    model.load_weights(
        '../pretrained/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5')

    n_items = Value('i', -1)  # Async number of items
    sparse_features = []
    # items_ids = []
    pool = Pool(2)
    bar = None
    X_batch = []
    try:
        # Threaded generator is usful for both parallel blocking read and to limit
        # items buffered by pool.imap (may cause OOM)
        generator = ThreadedGenerator(generate_files(n_items), 50)
        for item_id, im in pool.imap(im_decode_resize, generator):
            if bar is None:
                bar = tqdm(total=n_items.value,
                           mininterval=bar_iterval,
                           unit_scale=True)

            # Replace None with empty image
            if im is None:
                im = empty_im

            X_batch.append(im)
            # items_ids.append(item_id)
            del im

            if len(X_batch) == batch_size:
                sparse_features.append(predict_batch(model, X_batch))
def rerank_by_m2():
    data_dir = ASSESS_DIR + "data/"
    k_best_dir = data_dir + "K-best/"
    system_file = k_best_dir + "conll14st.output.1.best100"

    reference_dir = data_dir + "references/"
    first_nucle = reference_dir + "NUCLEA.m2"
    combined_nucle = reference_dir + "NUCLE.m2"
    BN = reference_dir + "BN.m2"
    ALL = reference_dir + "ALL.m2"
    gold_files = [first_nucle, combined_nucle, BN, ALL]

    (path, dirs, files) = next(os.walk(reference_dir))
    for fl in files:
        if "subset" in fl:
            gold_files.append(path + fl)

    calculations_dir = "calculations_data/"
    output_file = "first_rank_results"
    for gold_file in gold_files:
        out_text_file = calculations_dir + \
            output_file + name_extension(gold_file)[0]
        out_res_file = calculations_dir + "prf_" + \
            output_file + name_extension(gold_file)[0]
        if os.path.isfile(out_text_file):
            print("file already found", out_text_file)
        else:
            print("processing " + gold_file)
            source_sentences, gold_edits = m2scorer.load_annotation(gold_file)

            # load system hypotheses
            fin = m2scorer.smart_open(system_file, 'r')
            system_sentences = [line.strip() for line in fin.readlines()]

            fin.close()

            # pack and parse RoRo's k-best
            packed_system_sentences = get_roro_packed(system_sentences)
            # candidate_num = 0
            # for sentence_num, (source, this_edits) in enumerate(zip(source_sentences, gold_edits)):
            #   curr_sentences = []
            #   # keep packing until reached another sentence, assumes k-best are consequetive
            #   while (candidate_num < len(system_sentences) and
            #         system_sentences[candidate_num].split()[0] == str(sentence_num)):
            #       sentence = re.sub("\|\d+-\d+\| ","",system_sentences[candidate_num].split("|||")[1][1:])
            #       candidate_num += 1
            #       curr_sentences.append(sentence)
            #   packed_system_sentences.append(curr_sentences)
            # print(len(packed_system_sentences), len(gold_edits),
            # len(source_sentences))

            # find top ranking
            pool = Pool(POOL_SIZE)
            assert (len(packed_system_sentences) == len(gold_edits)
                    and len(gold_edits) == len(source_sentences))
            results = pool.imap(
                M2SCORER_oracle,
                zip(source_sentences, gold_edits, packed_system_sentences))
            pool.close()
            pool.join()
            results = list(results)
            sentences = "\n".join(list(zip(*results))[0])
            results = list(zip(*results))[1]
            results = "\n".join([str(x) for x in results])

            print("writing to " + out_text_file)
            with codecs.open(out_text_file, "w+", "utf-8") as fl:
                fl.write(sentences)
            with open(out_res_file, "w+") as fl:
                fl.write(results)
from multiprocessing import Pool


def md5_file(filename):
    with open(filename) as f:
        return (hashlib.md5(f.read()).hexdigest(), filename)


directories = [
    "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
    "p", "q", "r", "s", "t", "the", "u", "v", "w", "x", "y"
]

try:
    base_directory = sys.argv[1]

    pool = Pool(8)
    with open("md5sums.txt", "w") as f:
        writer = csv.writer(f)
        for d in directories:
            print "Calculating hashes for the {} directory.".format(d)

            image_files = glob.iglob("{}/{}/*".format(base_directory, d))
            for hash_and_name in pool.imap(md5_file, image_files):
                writer.writerow(hash_and_name)

except IndexError:
    print "{0}: Syntax: {0} <album covers base directory>".format(sys.argv[0])
    sys.exit(0)
Exemple #48
0
 def _run(self):
     """
     verbose: whether to print results (or anything at all, in fact)
     details: (given that verbose is true) whether to output additional
              status information
     debug:   (given that verbose is true) if true, outputs debug
              information, in particular the distribution over possible
              worlds
     debugLevel: level of detail for debug mode
     """
     # check consistency with hard constraints:
     self._watch.tag('check hard constraints', verbose=self.verbose)
     hcgrounder = FastConjunctionGrounding(self.mrf, simplify=False, unsatfailure=True, 
                                           formulas=[f for f in self.mrf.formulas if f.weight == HARD], 
                                           **(self._params + {'multicore': False, 'verbose': False}))
     for gf in hcgrounder.itergroundings():
         if isinstance(gf, Logic.TrueFalse) and gf.truth() == .0:
             raise SatisfiabilityException('MLN is unsatisfiable due to hard constraint violation by evidence: {} ({})'.format(str(gf), str(self.mln.formula(gf.idx))))
     self._watch.finish('check hard constraints')
     # compute number of possible worlds
     worlds = 1
     for variable in self.mrf.variables:
         values = variable.valuecount(self.mrf.evidence)
         worlds *= values
     numerators = [0.0 for i in range(len(self.queries))]
     denominator = 0.
     # start summing
     logger.debug("Summing over %d possible worlds..." % worlds)
     if worlds > 500000 and self.verbose:
         print colorize('!!! %d WORLDS WILL BE ENUMERATED !!!' % worlds, (None, 'red', True), True)
     k = 0
     self._watch.tag('enumerating worlds', verbose=self.verbose)
     global global_enumAsk
     global_enumAsk = self
     bar = None
     if self.verbose:
         bar = ProgressBar(width=100, steps=worlds, color='green')
     if self.multicore:
         pool = Pool()
         logger.debug('Using multiprocessing on {} core(s)...'.format(pool._processes))
         try:
             for num, denum in pool.imap(with_tracing(eval_queries), self.mrf.worlds()):
                 denominator += denum
                 k += 1
                 for i, v in enumerate(num):
                     numerators[i] += v
                 if self.verbose: bar.inc()
         except Exception as e:
             logger.error('Error in child process. Terminating pool...')
             pool.close()
             raise e
         finally:
             pool.terminate()
             pool.join()
     else:  # do it single core
         for world in self.mrf.worlds():
             # compute exp. sum of weights for this world
             num, denom = eval_queries(world)
             denominator += denom
             for i, _ in enumerate(self.queries):
                 numerators[i] += num[i]
             k += 1
             if self.verbose:
                 bar.update(float(k) / worlds)
     logger.debug("%d worlds enumerated" % k)
     self._watch.finish('enumerating worlds')
     if 'grounding' in self.grounder.watch.tags:
         self._watch.tags['grounding'] = self.grounder.watch['grounding']
     if denominator == 0:
         raise SatisfiabilityException(
             'MLN is unsatisfiable. All probability masses returned 0.')
     # normalize answers
     dist = map(lambda x: float(x) / denominator, numerators)
     result = {}
     for q, p in zip(self.queries, dist):
         result[str(q)] = p
     return result
Exemple #49
0
    def represent(self, molecules):
        """
        provides bag of bonds representation for input molecules.

        Parameters
        ----------
        molecules : chemml.chem.Molecule object or array
            If list, it must be a list of chemml.chem.Molecule objects, otherwise we raise a ValueError.
            In addition, all the molecule objects must provide the XYZ information. Please make sure the XYZ geometry has been
            stored or optimized in advance.

        Returns
        -------
        features : pandas data frame, shape: (n_molecules, max_length_of_combinations)
            The bag of bond features.

        """
        if isinstance(molecules, (list, np.ndarray)):
            molecules = np.array(molecules)
        elif isinstance(molecules, Molecule):
            molecules = np.array([molecules])
        else:
            msg = "The input molecules must be a chemml.chem.Molecule object or a list of objects."
            raise ValueError(msg)

        if molecules.ndim > 1:
            msg = "The molecule must be a chemml.chem.Molecule object or a list of objects."
            raise ValueError(msg)

        # pool of processes
        if self.n_jobs == -1:
            self.n_jobs = cpu_count()
        pool = Pool(processes=self.n_jobs)

        # Create an iterator
        # http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
        def chunks(l, n):
            """Yield successive n-sized chunks from l."""
            for i in range(0, len(l), n):
                yield l[i:i + n]

        # find size of each batch
        batch_size = int(len(molecules) / self.n_jobs)
        if batch_size == 0:
            batch_size = 1

        molecule_chunks = chunks(molecules, batch_size)

        # MAP: CM in parallel
        map_function = partial(self._represent)
        if self.verbose:
            print('featurizing molecules in batches of %i ...' % batch_size)
            pbar = Progbar(len(molecules), width=50)
            bbs_info = []
            for tensors in pool.imap(map_function, molecule_chunks):
                pbar.add(len(tensors[0]))
                bbs_info.append(tensors)
            print('Merging batch features ...    ', end='')
        else:
            bbs_info = pool.map(map_function, molecule_chunks)
        if self.verbose:
            print('[DONE]')

        # REDUCE: Concatenate the obtained tensors
        pool.close()
        pool.join()
        return self.concat_mol_features(bbs_info)
Exemple #50
0
            "noretrmedian": noretrmedian,
            "noretrmin": noretrmin,
            "noretrmax": noretrmax,
            "neverretrmedian": neverretrmedian,
            "neverretrmin": neverretrmin,
            "neverretrmax": neverretrmax,
        }
        for val in [
                noretrmedian, noretrmin, noretrmax, neverretrmedian,
                neverretrmin, neverretrmax
        ]:
            results[val] = {}

print "Calculating VICBF parameters..."
pool = Pool(maxtasksperchild=1)
resiter = pool.imap(find_params, results.keys())
pbar = ProgressBar(maxval=len(results.keys()))
pbar.start()
c = 0
for i in results.keys():
    n = resiter.next()
    c += 1
    pbar.update(c)
    results[i] = {
        "hash_functions": str(n[0]),
        "slots": str(n[1]),
        "probability": str(n[2]),
        "len_uncompressed": str(n[1] + 10),
        "len_compressed": str(int(round((n[1] + 10) * 0.52)))
    }
Exemple #51
0
def main():
    """
    Helper script to encode raw text with the GPT-2 BPE using multiple processes.
    The encoder.json and vocab.bpe files can be obtained here:
    - https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json
    - https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.zbpe
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model-file",
        help='path to encoder.json',
    )
    parser.add_argument(
        "--inputs",
        nargs="+",
        default=['-'],
        help="input files to filter/encode",
    )
    parser.add_argument(
        "--outputs",
        nargs="+",
        default=['-'],
        help="path to save encoded outputs",
    )
    parser.add_argument(
        "--keep-empty",
        action="store_true",
        help="keep empty lines",
    )
    parser.add_argument("--max_len", type=int, default=510)
    parser.add_argument("--workers", type=int, default=20)
    args = parser.parse_args()

    assert len(args.inputs) == len(args.outputs), \
        "number of input and output paths should match"

    with contextlib.ExitStack() as stack:
        inputs = [
            stack.enter_context(open(input, "r", encoding="utf-8"))
            if input != "-" else sys.stdin
            for input in args.inputs
        ]
        outputs = [
            stack.enter_context(open(output, "w", encoding="utf-8"))
            if output != "-" else sys.stdout
            for output in args.outputs
        ]

        encoder = MultiprocessingEncoder(args)
        pool = Pool(args.workers, initializer=encoder.initializer)
        encoded_lines = pool.imap(encoder.encode_lines, zip(*inputs), 100)

        stats = Counter()
        for i, (filt, enc_lines) in enumerate(encoded_lines, start=1):
            if filt == "PASS":
                for enc_line, output_h in zip(enc_lines, outputs):
                    print(enc_line, file=output_h)
            else:
                stats["num_filtered_" + filt] += 1
            if i % 10000 == 0:
                print("processed {} lines".format(i), file=sys.stderr)

        for k, v in stats.most_common():
            print("[{}] filtered {} lines".format(k, v), file=sys.stderr)
Exemple #52
0
class Simpletxt2Json():
    def __init__(self,
                 dst_version,
                 city,
                 sub_imageset_folds,
                 multi_processing=False,
                 num_processor=16):
        self.splitted_image_dir = './data/buildchange/{}/{}/images'.format(
            dst_version, city)
        self.splitted_label_dir = './data/buildchange/{}/{}/labels'.format(
            dst_version, city)
        self.json_dir = './data/buildchange/v2/{}/labels_json'.format(city)
        self.wrong_shp_file_dict = dict()
        for sub_fold in sub_imageset_folds[city]:
            wrong_file = './data/buildchange/v0/{}/{}/wrongShpFile.txt'.format(
                city, sub_fold)
            ori_filenames = self.read_wrong_file(wrong_file)
            self.wrong_shp_file_dict[sub_fold] = ori_filenames
        wwtool.mkdir_or_exist(self.json_dir)
        self.city = city
        self.multi_processing = multi_processing
        self.pool = Pool(num_processor)

    def read_wrong_file(self, wrong_file):
        ori_filenames = []
        with open(wrong_file, 'r') as f:
            lines = f.readlines()
            for line in lines:
                ori_filename = line.strip('\n').split('/')[-1].split('.csv')[0]
                ori_filenames.append(ori_filename)

        return ori_filenames

    def simpletxt_parse(self, label_file):
        """parse simpletxt style dataset label file
        
        Arguments:
            label_file {str} -- label file path
        
        Returns:
            dict, {'bbox': [...], 'label': class_name} -- objects' location and class
        """
        with open(label_file, 'r') as f:
            lines = f.readlines()

        objects = []
        basic_label_str = " "
        for line in lines:
            object_struct = dict()
            line = line.rstrip().split(' ')
            label = basic_label_str.join(line[-1])
            polygon = [float(_) for _ in line[0:-1]]
            object_struct['polygon'] = polygon
            object_struct['label'] = label
            objects.append(object_struct)

        return objects

    def get_footprint(self, mask, coordinate, roof_polygons, roof_properties):
        # print(mask, coordinate, roof_polygon, roof_property)
        transform_matrix = [1, 0, 0, 1, coordinate[0], coordinate[1]]
        roi_mask = affinity.affine_transform(mask, transform_matrix)
        # print("move: ", mask, moved_mask, coordinate)
        for idx, roof_polygon in enumerate(roof_polygons):
            if roof_polygon.equals(roi_mask):
                xoffset = roof_properties[idx].to_dict()['xoffset']
                yoffset = roof_properties[idx].to_dict()['yoffset']
                break
            else:
                xoffset, yoffset = 0, 0

        transform_matrix = [1, 0, 0, 1, -coordinate[0], -coordinate[1]]
        split_mask = affinity.affine_transform(roi_mask, transform_matrix)
        transform_matrix = [1, 0, 0, 1, -xoffset, -yoffset]
        footprint_polygon = affinity.affine_transform(split_mask,
                                                      transform_matrix)

        return footprint_polygon, xoffset, yoffset

    def simpletxt2json(self, image_fn):
        # 1. open the ignore file and get the polygons
        base_name = wwtool.get_basename(image_fn)
        sub_fold = base_name.split("__")[0].split('_')[0]
        ori_image_fn = "_".join(base_name.split("__")[0].split('_')[1:])
        # if ori_image_fn in self.wrong_shp_file_dict[sub_fold]:
        #     print("Skip this wrong shape file")
        #     return
        coord_x, coord_y = base_name.split("__")[1].split(
            '_')  # top left corner
        coord_x, coord_y = int(coord_x), int(coord_y)
        print(
            f"splitted items: {self.city}, {sub_fold}, {ori_image_fn}, {(coord_x, coord_y)}"
        )

        ignore_file = './data/buildchange/{}/{}/{}/pixel_anno_v2/{}'.format(
            src_version, self.city, sub_fold, ori_image_fn + '.png')
        # print("ignore file name: ", ignore_file)
        roof_shp_file = './data/buildchange/{}/{}/{}/roof_shp_4326/{}'.format(
            src_version, self.city, sub_fold, ori_image_fn + '.shp')
        geo_info_file = './data/buildchange/{}/{}/{}/geo_info/{}'.format(
            src_version, self.city, sub_fold, ori_image_fn + '.png')

        objects = shp_parser(roof_shp_file, geo_info_file)
        roof_polygon_4326 = [obj['converted_polygon'] for obj in objects]
        roof_property = [obj['converted_property'] for obj in objects]

        pixel_anno = cv2.imread(ignore_file)
        if pixel_anno is None:
            return
        objects = mask_parser(pixel_anno[coord_y:coord_y + sub_img_h,
                                         coord_x:coord_x + sub_img_w, :],
                              category=255)
        if objects == []:
            return
        ignore_polygons = [obj['polygon'] for obj in objects]
        # print("ignore polygon: ", ignore_polygons)

        # 2. read the simpletxt file and convert to polygons
        objects = self.simpletxt_parse(
            os.path.join(self.splitted_label_dir, base_name + '.txt'))
        roof_polygons = [
            wwtool.mask2polygon(obj['polygon']) for obj in objects
        ]
        # print("roof polygon: ", roof_polygons)

        _, ignore_indexes = wwtool.cleaning_polygon_by_polygon(
            roof_polygons[:], ignore_polygons, show=False)
        ignore_list = len(roof_polygons) * [0]
        for ignore_index in ignore_indexes:
            ignore_list[ignore_index] = 1

        new_anno_objects = []
        for idx, roof_polygon in enumerate(roof_polygons):
            footprint_polygon, xoffset, yoffset = self.get_footprint(
                roof_polygon, [coord_x, coord_y], roof_polygon_4326,
                roof_property)
            object_struct = dict()
            ignore_flag = ignore_list[idx]
            object_struct['roof'] = wwtool.polygon2mask(roof_polygon)
            object_struct['footprint'] = wwtool.polygon2mask(footprint_polygon)
            object_struct['offset'] = [xoffset, yoffset]
            object_struct['ignore'] = ignore_flag
            new_anno_objects.append(object_struct)

        image_info = {
            "ori_filename": ori_image_fn + '.jpg',
            "subimage_filename": image_fn,
            "width": 1024,
            "height": 1024,
            "city": self.city,
            "sub_fold": sub_fold,
            "coordinate": [coord_x, coord_y]
        }

        json_data = {"image": image_info, "annotations": new_anno_objects}

        json_file = os.path.join(self.json_dir, f'{base_name}.json')
        with open(json_file, "w") as jsonfile:
            json.dump(json_data, jsonfile, indent=4)

    def core(self):
        if self.multi_processing:
            image_fn_list = os.listdir(self.splitted_image_dir)
            num_image = len(image_fn_list)
            worker = partial(self.simpletxt2json)
            # self.pool.map(worker, image_fn_list)
            ret = list(
                tqdm.tqdm(self.pool.imap(worker, image_fn_list),
                          total=num_image))
            self.pool.close()
            self.pool.join()
        else:
            image_fn_list = os.listdir(self.splitted_image_dir)
            progress_bar = mmcv.ProgressBar(len(image_fn_list))
            for _, image_fn in enumerate(image_fn_list):
                self.simpletxt2json(image_fn)
                progress_bar.update()

    def __getstate__(self):
        self_dict = self.__dict__.copy()
        del self_dict['pool']
        return self_dict

    def __setstate__(self, state):
        self.__dict__.update(state)
Exemple #53
0
def main():
    parser = argparse.ArgumentParser(
        prog='PrePARE',
        description='Validate CMIP6 file for ESGF publication.')

    parser.add_argument(
        '-l',
        '--log',
        metavar='CWD',
        type=str,
        const='{}/logs'.format(os.getcwd()),
        nargs='?',
        help='Logfile directory. Default is the working directory.\n'
        'If not, standard output is used. Only available in multiprocessing mode.'
    )

    parser.add_argument('--variable',
                        help='Specify geophysical variable name.\n'
                        'If not variable is deduced from filename.')

    parser.add_argument(
        '--table-path',
        action=DIRECTORYAction,
        default=os.environ['CMIP6_CMOR_TABLES']
        if 'CMIP6_CMOR_TABLES' in list(os.environ.keys()) else './Tables',
        help='Specify the CMIP6 CMOR tables path (JSON file).\n'
        'If not submitted read the CMIP6_CMOR_TABLES environment variable if exists.\n'
        'If a directory is submitted table is deduced from filename (default is "./Tables").'
    )

    parser.add_argument(
        '--max-processes',
        metavar='4',
        type=processes_validator,
        default=4,
        help=
        'Number of maximal processes to simultaneously treat several files.\n'
        'Set to one seems sequential processing (default). Set to "-1" seems\n'
        'all available resources as returned by "multiprocessing.cpu_count()".'
    )

    parser.add_argument(
        '--all',
        action='store_true',
        default=False,
        help=
        'Show all results. Default only shows error(s) (i.e., file(s) not compliant)'
    )

    parser.add_argument(
        '--ignore-dir',
        metavar="PYTHON_REGEX",
        type=str,
        default='^.*/\.[\w]*$',
        help='Filter directories NON-matching the regular expression.\n'
        'Default ignores paths with folder name(s) starting with "."')

    parser.add_argument('--include-file',
                        metavar='PYTHON_REGEX',
                        type=regex_validator,
                        action='append',
                        help='Filter files matching the regular expression.\n'
                        'Duplicate the flag to set several filters.\n'
                        'Default only include NetCDF files.')

    parser.add_argument(
        '--exclude-file',
        metavar='PYTHON_REGEX',
        type=regex_validator,
        action='append',
        help='Filter files NON-matching the regular expression.\n'
        'Duplicate the flag to set several filters.\n'
        'Default only exclude hidden files (with names not\n'
        'starting with ".").')

    parser.add_argument(
        'input',
        nargs='+',
        action=INPUTAction,
        help=
        'Input CMIP6 netCDF data to validate (ex: clisccp_cfMon_DcppC22_NICAM_gn_200001-200001.nc).\n'
        'If a directory is submitted all netCDF recursively found will be validate independently.'
    )

    # Check command-line error
    try:
        args = parser.parse_args()
    except argparse.ArgumentTypeError as errmsg:
        print(str(errmsg), file=sys.stderr)
        return 1
    except SystemExit:
        return 1
    # Get log
    logname = 'PrePARE-{}.log'.format(datetime.now().strftime("%Y%m%d-%H%M%S"))
    log = None
    if args.log:
        if not os.path.isdir(args.log):
            os.makedirs(args.log)
        log = os.path.join(args.log, logname)
    # Collects netCDF files for process
    sources = Collector(args.input)
    # Set scan filters
    file_filters = list()
    if args.include_file:
        file_filters.extend([(f, True) for f in args.include_file])
    else:
        # Default includes netCDF only
        file_filters.append(('^.*\.nc$', True))
    if args.exclude_file:
        # Default exclude hidden files
        file_filters.extend([(f, False) for f in args.exclude_file])
    else:
        file_filters.append(('^\..*$', False))
    # Init collector file filter
    for regex, inclusive in file_filters:
        sources.FileFilter.add(regex=regex, inclusive=inclusive)
    # Init collector dir filter
    sources.PathFilter.add(regex=args.ignore_dir, inclusive=False)
    nb_sources = len(sources)
    errors = 0
    # Init process context
    cctx = dict()
    cctx['table_path'] = args.table_path
    cctx['variable'] = args.variable
    cctx['all'] = args.all
    # Separate sequential process and multiprocessing
    if args.max_processes != 1:
        # Create pool of processes
        pool = Pool(processes=args.max_processes,
                    initializer=initializer,
                    initargs=(list(cctx.keys()), list(cctx.values())))
        # Run processes
        logfiles = list()
        progress = 0
        for logfile, rc in pool.imap(process, sources):
            progress += 1
            percentage = int(progress * 100 / nb_sources)
            msg = BCOLORS.OKGREEN + '\rCheck netCDF file(s): ' + BCOLORS.ENDC
            msg += '{}% | {}/{} files'.format(percentage, progress, nb_sources)
            sys.stdout.write(msg)
            sys.stdout.flush()
            logfiles.append(logfile)
            errors += rc
        sys.stdout.write('\r\033[K')
        sys.stdout.flush()
        # Print results from logfiles and remove them
        for logfile in set(logfiles):
            if not os.stat(logfile).st_size == 0:
                with open(logfile, 'r') as f:
                    if log:
                        with open(log, 'a+') as r:
                            r.write(f.read())
                    else:
                        sys.stdout.write(f.read())
                        sys.stdout.flush()
            os.remove(logfile)
        # Close pool of processes
        pool.close()
        pool.join()
    else:
        print('Checking data, please wait...')
        initializer(list(cctx.keys()), list(cctx.values()))
        for source in sources:
            errors += sequential_process(source)
    # Print results summary
    msg = BCOLORS.HEADER + '\nNumber of files scanned: {}'.format(
        nb_sources) + BCOLORS.ENDC
    if errors:
        msg += BCOLORS.FAIL
    else:
        msg += BCOLORS.OKGREEN
    msg += '\nNumber of file with error(s): {}'.format(errors) + BCOLORS.ENDC
    if log:
        with open(log, 'a+') as r:
            r.write(msg)
    print(msg)
    # Evaluate errors and exit with appropriate return code
    if errors != 0:
        if errors == nb_sources:
            # All files has error(s). Error code = -1
            sys.exit(-1)
        else:
            # Some files (at least one) has error(s). Error code = nb files with error(s)
            sys.exit(errors)
    else:
        # No errors. Error code = 0
        sys.exit(0)
    with open(args.rid_phase_map) as f:
        for row in f:
            row = row.strip().split()
            arid2phase[row[0]] = (row[1], row[2], row[3]
                                  )  #ctg_id, phase_blk_id, phase_id

    exe_pool = Pool(args.n_core)

    file_list = open(args.fofn).read().split("\n")
    inputs = []
    for fn in file_list:
        if len(fn) != 0:
            inputs.append((db_fn, fn, max_diff, max_cov, min_cov, min_len))

    ignore_all = []
    for res in exe_pool.imap(filter_stage1, inputs):
        ignore_all.extend(res[1])

    inputs = []
    ignore_all = set(ignore_all)
    for fn in file_list:
        if len(fn) != 0:
            inputs.append(
                (db_fn, fn, max_diff, max_cov, min_cov, min_len, ignore_all))
    contained = set()
    for res in exe_pool.imap(filter_stage2, inputs):
        contained.update(res[1])
        #print res[0], len(res[1]), len(contained)

    #print "all", len(contained)
    inputs = []
Exemple #55
0
            if not os.path.isdir(os.path.split(path)[0]):
                os.makedirs(os.path.split(path)[0])
            imwrite(path, img_crop)

            tformed_landmarks.shape = -1
            name_landmark_str = ('%s' + ' %.1f' * n_landmark * 2) % (
                (name, ) + tuple(tformed_landmarks))
            succeed = True
            break
        except:
            succeed = False
    if succeed:
        return name_landmark_str
    else:
        print('%s fails!' % img_names[i])


if __name__ == '__main__':
    pool = Pool(args.n_worker)
    name_landmark_strs = list(
        tqdm.tqdm(pool.imap(work, range(len(img_names))),
                  total=len(img_names)))
    pool.close()
    pool.join()

    landmarks_path = os.path.join(save_dir, 'landmark.txt')
    with open(landmarks_path, 'w') as f:
        for name_landmark_str in name_landmark_strs:
            if name_landmark_str:
                f.write(name_landmark_str + '\n')
Exemple #56
0
    def to_nx_OLD(
            self,
            graph_id,
            directed=False,
            parallel_processing=True,
            n_jobs=multiprocessing.cpu_count(),
            progress=True,
            chunksize=100,
    ):
        """Convert the graph specified by its graph_id to networkx graph"""

        if (
                graph_id in self.G_nx.keys()
        ):  # if self.G_nx[graph_id] already exists, just return it, otherwise evaluate it
            return self.G_nx[graph_id]
        else:
            print("Converting the EPGM graph {} to NetworkX graph...".format(
                graph_id))
            if not any([graph_id in g["id"] for g in self.G["graphs"]]):
                raise Exception(
                    "Graph with id {} does not exist".format(graph_id))

            # List relevant nodes and edges:
            print("...extracting relevant nodes...", end="")
            nodes = [
                v["id"] for v in self.G["vertices"]
                if graph_id in v["meta"]["graphs"]
            ]
            print(" ...{} nodes extracted...".format(len(nodes)))
            print("...extracting relevant edges...", end="")
            edges = [(e["source"], e["target"]) for e in self.G["edges"]
                     if graph_id in e["meta"]["graphs"]]
            print(" ...{} edges extracted...".format(len(edges)))
            # TODO: implement the case of weighted edges

            # create a graph as dict of lists in the format (node_id: [neighbour nodes])
            print("...building the graph as dict of lists...")
            print("...[parallel_processing: {}, n_jobs: {}, progress_bar: {}]".
                  format(parallel_processing, n_jobs, progress))

            if parallel_processing:  # parallel execution
                pool = Pool(processes=n_jobs)
                if progress:
                    n = len(nodes)
                    self.G_nx[graph_id] = []

                    # pbar = ProgressBar(
                    #     widgets=[
                    #         SimpleProgress(
                    #             format="%(value_s)s of %(max_value_s)s nodes processed (%(percentage)3d%%)"
                    #         )
                    #     ],
                    #     maxval=n,
                    # ).start()
                    # _ = [pool.apply_async(partial(node_neighbours, edges=edges), args=(v,),
                    #                       callback=self.G_nx[graph_id].append) for v in nodes]
                    # it seems that appending results using callback works much slower than either pool.map_async or pool.map
                    # while len(self.G_nx[graph_id]) != n:
                    #     pbar.update(len(self.G_nx[graph_id]))
                    #     sleep(1)

                    graph = pool.imap(partial(node_neighbours, edges=edges),
                                      nodes, chunksize)  # lazy map
                    # evaluate batches of imap, as the progress bar is being updated:
                    while len(self.G_nx[graph_id]) != n:
                        self.G_nx[graph_id].append(next(graph))
                        # pbar.update(len(self.G_nx[graph_id]))

                    # pbar.finish()

                    self.G_nx[graph_id] = dict(self.G_nx[graph_id])

                else:
                    self.G_nx[graph_id] = dict(
                        pool.map(partial(node_neighbours, edges=edges), nodes))

                pool.close()
                pool.join()

            else:  # sequential execution
                self.G_nx[graph_id] = {
                    v: [e[1] for e in edges if e[0] == v]
                    for v in nodes
                }  # this works ~2.5x faster (for cora dataset) than the above for loop

            print("...converting the graph to nx format...")
            self.G_nx[graph_id] = nx.from_dict_of_lists(self.G_nx[graph_id])

            if directed:
                self.G_nx[graph_id] = self.G_nx[graph_id].to_directed()
            else:
                self.G_nx[graph_id] = self.G_nx[graph_id].to_undirected()

            return self.G_nx[graph_id]
Exemple #57
0
def multiprocess_get_flag(beg, end, n_processes):
    from multiprocessing import Pool
    pool = Pool(processes=n_processes)
    return ''.join(pool.imap(get_char, range(beg, end)))
Exemple #58
0
from keras.datasets import cifar10
from multiprocessing import Pool, cpu_count
import cv2
import numpy as np
from tqdm import tqdm


def save_image(n):
    array = x_test[n]
    # array = array.transpose(1,2,0)
    array = cv2.cvtColor(array, cv2.COLOR_RGB2BGR)
    return cv2.imwrite("cifar10/image" + str(n) + ".png", array)


(x_train, y_train), (x_test, y_test) = cifar10.load_data()

pool = Pool(cpu_count())
images = list(
    tqdm(pool.imap(save_image, range(len(x_test))), total=len(x_test)))
pool.close()
pool.join()
Exemple #59
0
        default=1.5)
    parser.add_argument('--perfect', action='store', dest='perfect',
        type=bool, help='Do you want a perfect signal and sequence',
        default=False)
    parser.add_argument('--perflen', action='store', dest='perflen',
        type=int, help='repeat length for perfect mode',
        default=1)


    #---------- input list ---------------#
    arg = parser.parse_args()
    seq_list = get_seq_list(arg.input)
    id_list = get_id_list(arg.input)
    in_list = zip(seq_list, id_list)

    #---------- load pore model ----------#
    kmer_poremodel=load_official_poremodel(arg.poremodel)

    #---------- partial function ---------#
    func=partial(sequence_to_true_signal, \
    	kmer_poremodel=kmer_poremodel, perfect=arg.perfect, p_len=arg.perflen, \
    	event_std=arg.event_std, filter_freq=arg.filter_freq, noise_std=arg.noise_std, \
        repeat_alpha=arg.alpha, sigroot=arg.output, aliroot=arg.alignment)

    #---------- multi process ------------#
    p = Pool(arg.threads)
    list(tqdm(p.imap(func, in_list),total=len(in_list)))
    p.close()
    p.join()

 def update_vertex_positions_mt(self):
     num_procs = 16
     pool = Pool(processes = num_procs)
     self.blobj = self.get_blender_object().data.vertices
     
     pool.imap(update_one_vertex_no_matrix, self._vertices, len(self._vertices)//8)
     pool.close()
     pool.join()