def run(): from multiprocessing.dummy import Pool as ThreadPool t = [ ('users', User), ('forums', Forum), ('threads', Thread), ('posts', Post), ] for entity, factory in t: entities = [True for i in range(int(settings[entity]))] ready_factory = factory().create pool = ThreadPool(8) pool.imap(ready_factory, entities) pool.close() pool.join() a = [ (int(settings['followers']), User().follow), (int(settings['subscribptions']), Thread().subscribe), ] for it, method in a: for i in range(it): url, args = method() print "Requesting %s with %s" % (url, args) try: args = json.loads(args) tools.Request(url, args, post=True).get_response() except: pass
def run_query_simulations(states, engine='hoomd'): """Run all query simulations for a single iteration. """ # Gather hardware info. gpus = _get_gpu_info() if gpus is None: n_procs = cpu_count() gpus = [] logging.info("Launching {n_procs} CPU threads...".format(**locals())) else: n_procs = len(gpus) logging.info("Launching {n_procs} GPU threads...".format(**locals())) if engine.lower() == 'hoomd': worker = _hoomd_worker if engine.lower() == 'lammps': worker = _lammps_worker else: raise UnsupportedEngine(engine) n_states = len(states) worker_args = zip(states, range(n_states), itertools.repeat(gpus)) chunk_size = ceil(n_states / n_procs) pool = Pool(n_procs) pool.imap(worker, worker_args, chunk_size) pool.close() pool.join() for state in states: _post_query(state)
def download_img(folder, dataset_dir, class_name, images_list, threads): ''' Download the images. :param folder: train, validation or test :param dataset_dir: self explanatory :param class_name: self explanatory :param images_list: list of the images to download :param threads: number of threads :return: None ''' image_dir = folder download_dir = os.path.join(dataset_dir, image_dir, class_name) downloaded_images_list = [f.split('.')[0] for f in os.listdir(download_dir)] images_list = list(set(images_list) - set(downloaded_images_list)) pool = ThreadPool(threads) if len(images_list) > 0: print("[INFO] Download of {} images in {}.".format(len(images_list), folder)) commands = [] for image in images_list: path = image_dir + '/' + str(image) + '.jpg ' + '"' + download_dir + '"' command = 'aws s3 --no-sign-request --only-show-errors cp s3://open-images-dataset/' + path commands.append(command) list(tqdm(pool.imap(os.system, commands), total = len(commands) )) print('[INFO] Done!') pool.close() pool.join() else: print('[INFO] All images already downloaded.')
def parallel_bulk(client, actions, thread_count=4, chunk_size=500, max_chunk_bytes=100 * 1014 * 1024, expand_action_callback=expand_action, **kwargs): """ Parallel version of the bulk helper run in multiple threads at once. :arg client: instance of :class:`~elasticsearch.Elasticsearch` to use :arg actions: iterator containing the actions :arg thread_count: size of the threadpool to use for the bulk requests :arg chunk_size: number of docs in one chunk sent to es (default: 500) :arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB) :arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`) from the execution of the last chunk when some occur. By default we raise. :arg raise_on_exception: if ``False`` then don't propagate exceptions from call to ``bulk`` and just report the items that failed as failed. :arg expand_action_callback: callback executed on each action passed in, should return a tuple containing the action line and the data line (`None` if data line should be omitted). """ # Avoid importing multiprocessing unless parallel_bulk is used # to avoid exceptions on restricted environments like App Engine from multiprocessing.dummy import Pool actions = map(expand_action_callback, actions) pool = Pool(thread_count) for result in pool.imap( lambda chunk: list(_process_bulk_chunk(client, chunk, **kwargs)), _chunk_actions(actions, chunk_size, max_chunk_bytes, client.transport.serializer) ): for item in result: yield item pool.close() pool.join()
def put_from_manifest( s3_bucket, s3_connection_host, s3_ssenc, s3_base_path, aws_access_key_id, aws_secret_access_key, manifest, bufsize, reduced_redundancy, rate_limit, concurrency=None, incremental_backups=False): """ Uploads files listed in a manifest to amazon S3 to support larger than 5GB files multipart upload is used (chunks of 60MB) files are uploaded compressed with lzop, the .lzo suffix is appended """ exit_code = 0 bucket = get_bucket( s3_bucket, aws_access_key_id, aws_secret_access_key, s3_connection_host) manifest_fp = open(manifest, 'r') buffer_size = int(bufsize * MBFACTOR) files = manifest_fp.read().splitlines() pool = Pool(concurrency) for f in pool.imap(upload_file, ((bucket, f, destination_path(s3_base_path, f), s3_ssenc, buffer_size, reduced_redundancy, rate_limit) for f in files if f)): if f is None: # Upload failed. exit_code = 1 elif incremental_backups: # Delete files that were successfully uploaded. os.remove(f) pool.terminate() exit(exit_code)
def render_one_category_model_views(shape_list, view_params): tmp_dirname = tempfile.mkdtemp(dir=g_data_folder, prefix='tmp_view_') if not os.path.exists(tmp_dirname): os.mkdir(tmp_dirname) print('Generating rendering commands...') commands = [] for shape_synset, shape_md5, shape_file, view_num in shape_list: # write tmp view file tmp = tempfile.NamedTemporaryFile(dir=tmp_dirname, delete=False) for i in range(view_num): paramId = random.randint(0, len(view_params)-1) tmp_string = '%f %f %f %f\n' % (view_params[paramId][0], view_params[paramId][1], view_params[paramId][2], max(0.01,view_params[paramId][3])) tmp.write(tmp_string) tmp.close() command = '%s %s --background --python %s -- %s %s %s %s %s > /dev/null 2>&1' % (g_blender_executable_path, g_blank_blend_file_path, os.path.join(BASE_DIR, 'render_model_views.py'), shape_file, shape_synset, shape_md5, tmp.name, os.path.join(g_syn_images_folder, shape_synset, shape_md5)) commands.append(command) print('done (%d commands)!'%(len(commands))) print commands[0] print('Rendering, it takes long time...') report_step = 100 if not os.path.exists(os.path.join(g_syn_images_folder, shape_synset)): os.mkdir(os.path.join(g_syn_images_folder, shape_synset)) pool = Pool(g_syn_rendering_thread_num) for idx, return_code in enumerate(pool.imap(partial(call, shell=True), commands)): if idx % report_step == 0: print('[%s] Rendering command %d of %d' % (datetime.datetime.now().time(), idx, len(shape_list))) if return_code != 0: print('Rendering command %d of %d (\"%s\") failed' % (idx, len(shape_list), commands[idx])) shutil.rmtree(tmp_dirname)
def find_process_files(root_dir): lock = Lock() pool = Pool() hash_db = load_hashes(HASH_FILE) # Keep changed .pxi hashes in a separate dict until the end # because if we update hash_db and multiple files include the same # .pxi file the changes won't be detected. pxi_hashes = {} jobs = [] for cur_dir, dirs, files in os.walk(root_dir): for filename in files: in_file = os.path.join(cur_dir, filename + ".in") if filename.endswith('.pyx') and os.path.isfile(in_file): continue for fromext, function in rules.items(): if filename.endswith(fromext): toext = ".c" with open(os.path.join(cur_dir, filename), 'rb') as f: data = f.read() m = re.search(br"^\s*#\s*distutils:\s*language\s*=\s*c\+\+\s*$", data, re.I|re.M) if m: toext = ".cxx" fromfile = filename tofile = filename[:-len(fromext)] + toext jobs.append((cur_dir, fromfile, tofile, function, hash_db, pxi_hashes, lock)) for result in pool.imap(lambda args: process(*args), jobs): pass hash_db.update(pxi_hashes) save_hashes(hash_db, HASH_FILE)
def ffmpeg_encode(threads=1): cmd = ['ffmpeg', '-y', '-vcodec', 'ppm','-r','23.97', '-f', 'image2pipe','-i', '-'] cmd.extend(['-vcodec', 'libx264','-pix_fmt','yuv420p', '-profile', 'baseline','-vb','15M','-crf', '16']) cmd.extend([os.path.expanduser('~/out.mov')]) print subprocess.list2cmdline(cmd) p = None pool = Pool(threads) #with ThreadPoolExecutor(max_workers=threads) as e: for result in pool.imap(rotate,xrange(360)): if p is None: p = subprocess.Popen(cmd,stdin=subprocess.PIPE) p.stdin.write(result) p.stdin.flush() p.stdin.close() p.wait() pool.close() pool.join()
def run(): t = [ ('users', User().create), ('forums', Forum().create), ('threads', Thread().create), ('posts', Post().create), ("followers", User().follow), ("subscribptions", Thread().subscribe), ] for entity, factory in t: entities = [True for i in range(int(settings[entity]))] num_tasks = len(entities) pool = ThreadPool(int(settings['num_threads'])) try: progress = range(5, 105, 5) for i, _ in enumerate(pool.imap(factory, entities)): perc = i * 100 / num_tasks if perc % 5 == 0 and perc in progress: log.print_out('Creating %s: %d%% done' % (entity, perc)) progress.remove(perc) pool.close() pool.join() except Exception, e: print e pool.terminate() sys.exit(1)
def runLocalCommands(args, outputDir, commands): # NOTE: this is going to BREAK meff optimisation if we re-cycle histograms. # Needs to be updated to run in successive orde if we implement that. N = len(commands) if N > 50: print("") print("Are you sure you want to run %d commands locally?" % N) if args.dry_run: print("[NB: this is a dry run]") var = input("Press enter to continue") print("") cmds = [] for i, x in enumerate(commands): (cuts, name, cmd) = x cmd = "cd %s && echo '%d/%d\t%s' && %s 2>&1 >/dev/null" % (outputDir, i+1, N, cmd, cmd) cmds.append(cmd) if args.dry_run: print("Would run following commands:") for cmd in cmds: print(" %s" % cmd) return pool = Pool(10) # concurrent commands at a time for i, returncode in enumerate(pool.imap(partial(subprocess.call, shell=True), cmds)): if returncode != 0: print(("%d command failed: %d" % (i, returncode)))
def put_from_manifest( s3_bucket, s3_connection_host, s3_ssenc, s3_base_path, aws_access_key_id, aws_secret_access_key, manifest, bufsize, concurrency=None, incremental_backups=False, ): """ Uploads files listed in a manifest to amazon S3 to support larger than 5GB files multipart upload is used (chunks of 60MB) files are uploaded compressed with lzop, the .lzo suffix is appended """ bucket = get_bucket(s3_bucket, aws_access_key_id, aws_secret_access_key, s3_connection_host) manifest_fp = open(manifest, "r") buffer_size = int(bufsize * MBFACTOR) files = manifest_fp.read().splitlines() pool = Pool(concurrency) for _ in pool.imap( upload_file, ((bucket, f, destination_path(s3_base_path, f), s3_ssenc, buffer_size) for f in files) ): pass pool.terminate() if incremental_backups: for f in files: os.remove(f)
def runPool(fname): pool = Pool(8) data = open(fname) for i in pool.imap(poolWorker, data): print i #for i in data: # print poolWorker(i) return
def parallel_build(jobs, log, verbose=True): p = Pool(cpu_count) for ok, stdout, stderr in p.imap(run_worker, jobs): if verbose or not ok: log(stdout) if stderr: log(stderr) if not ok: return False return True
def parallel_check_output(jobs, log): p = Pool(cpu_count) for ok, stdout, stderr in p.imap( partial(run_worker, decorate=False), ((j, '') for j in jobs)): if not ok: log(stdout) if stderr: log(stderr) raise SystemExit(1) yield stdout
def _install_coreos(self): commands = [] log.info(self.config_dict) for key, value in self.config_dict.iteritems(): log.info("installing coreos on {}".format(value['disk'])) commands.append("coreos-install -v -d {} -C {} -c {}".format(value['disk'], cfg.coreos_update_channel, value['tmpfile'])) pool = Pool(len(self.dns_names)) for i, retval in enumerate(pool.imap(partial(runcmd), commands)): if retval[0]: log.error("%s command failed: %s" % (i, retval[2]))
def objectFeatureValues(dataSet, column, row, plate, exemplars, probes): objects = allObjects(dataSet, column, row, plate, exemplars, probes) cols = data.features(dataSet) pool = Pool() results = pool.imap(forObjectFeatureValues, [(dataSet, col, objects) for col in cols]) pool.close() pool.join() mrg = pd.DataFrame(index=objects, columns=cols) for c, vals in results: mrg[c] = vals return mrg
def main(args): """ Main process that: * Instantiates processing context, * Creates mapfiles output directory if necessary, * Instantiates threads pools, * Copies mapfile(s) to the output directory, * Removes the temporary directory and its content, * Implements exit status values. :param ArgumentParser args: Parsed command-line arguments """ # Instantiate processing context from command-line arguments or SYNDA job dictionary ctx = ProcessingContext(args) logging.info('==> Scan started') # All incomplete mapfiles from a previous run are silently removed for root, _, filenames in os.walk(ctx.outdir): for filename in filter(filenames, '*{0}'.format(WORKING_EXTENSION)): os.remove(os.path.join(root, filename)) logging.info('Output directory cleaned') # Start threads pool over files list in supplied directory pool = ThreadPool(int(ctx.threads)) # Return the list of generated mapfiles full paths outfiles_all = [x for x in pool.imap(wrapper, yield_inputs(ctx))] outfiles = [x for x in outfiles_all if isinstance(x, str)] # Close threads pool pool.close() pool.join() # Raises exception when all processed files failed (i.e., filtered list empty) if not outfiles: if process.called == 0: logging.warning('==> No files found') sys.exit(2) else: logging.warning('==> All files have been ignored or have failed leading to no mapfile.') sys.exit(3) # Replace mapfile working extension by final extension # A final mapfile is silently overwritten if already exists for outfile in list(set(outfiles)): os.rename(outfile, outfile.replace(WORKING_EXTENSION, FINAL_EXTENSION)) # Display summary logging.info('==> Scan completed ({0} file(s) scanned)'.format(len(outfiles))) # Non-zero exit status if any files got filtered if None in outfiles_all: logging.warning('{0} file(s) have been skipped'.format(outfiles_all.count(None))) sys.exit(1) sys.exit(0)
def main(argv): itteration = '' users = '' try: opts, args = getopt.getopt(argv,"hi:u:",["iter=","user="******"-i", "--iter"): itteration = arg elif opt in ("-u", "--user"): users = arg print 'The number of iteration is : ', itteration print 'The number of user is : ', users itteration = int(itteration) users = int(users) def child(cmd): p = Popen(cmd, stdout=PIPE, shell=True) out, err = p.communicate() return out, p.returncode commands = [] command = "curl -s " for i in range(itteration): # run 10 curl commands in total #print os.system(command) commands.append(command) pool = Pool(users) # Nummber of concurrent commands at a time times = [] for i, (output, returncode) in enumerate(pool.imap(child, commands)): if returncode != 0: print("{} command failed: {}".format(i, returncode)) else: print("{} success: {}".format(i, output)) times.append(float(output)) print 'Average: {}'.format(sum(times) / len(times) if times else 0)
def objectHistogramMatrix(dataSet, features, exemplars, bins): tasks = [(dataSet, features, exemplars, xFtr, yFtr, bins) for yFtr in features for xFtr in features if xFtr < yFtr] if data.mdsColumnsPresent(dataSet): tasks.append((dataSet, features, exemplars, data.mdsColumns[0], data.mdsColumns[1], bins)) pool = Pool() results = pool.imap(objectHistogram2D, tasks) pool.close() pool.join() recDict = lambda: defaultdict(recDict) histograms = recDict() for xFeature, yFeature, contours in results: for c, lvlCnt in contours.iteritems(): histograms[xFeature][yFeature][c] = lvlCnt.tolist() histograms[yFeature][xFeature][c] = lvlCnt.transpose().tolist() return dict(histograms)
def run_multi_shell_cmds(cmds, max_parallel=2): """Run multiple shell commands in parallel. """ stopwatch = StopWatch() # Based on: http://stackoverflow.com/questions/14533458/python-threading-multiple-bash-subprocesses pool = Pool(max_parallel) for i, return_code in enumerate(pool.imap(partial(call, shell=True), cmds)): if return_code != 0: log("CMD: " + cmds[i]) log("ERROR: CMD ended with status code %d" % return_code) sys.exit(return_code) # endif # endfor h, m, s = stopwatch.stop() if h > 0 or m > 0 or s > 5: log("Elapsed time: %d h %d m %d s" % (h, m, s))
def featureHistograms(dataSet, featureSet, exemplars, bins): partition = clustersAsMap(dataSet, featureSet, exemplars) # All computation combinations. #print "Compute feature histograms." tasks = [(dataSet, featureSet, exemplars, feature, cluster, bins) for feature in data.imageFeatures(dataSet) for cluster, clusterMap in partition.iteritems()] pool = Pool() results = pool.imap(featureHistogram, tasks) pool.close() pool.join() histograms = {c: {} for c, table in partition.iteritems()} for feature, cluster, histogram in results: histograms[cluster][feature] = histogram #print "Finish compute feature histograms." return histograms
def pcd2spin(path): string = '/home/mdm/Projects/cellseer_build/pcd_spin_image ' if not os.path.exists('./spin_images/'): os.makedirs('./spin_images/') list_of_spins = glob.glob(path + '*.bin') list_of_pcl = glob.glob(path + '*.pcd') compare_list_of_spins = [x[:-14] for x in list_of_spins] set_spins = set(compare_list_of_spins) set_pcl = set(list_of_pcl) set_to_do = set_pcl - set_spins sorted_files = sorted(set_to_do, key=os.path.getsize) print(str(len(set_to_do)) + 's PLCs to convert') commands_to_run = [string + x for x in sorted_files] pool = Pool(4) for i, returncode in enumerate(pool.imap(partial(call, shell=True), commands_to_run)): if returncode != 0: print("%d command failed: %d" % (i, returncode)) else: print("%d command done! %d " % (i, returncode))
def upload(data): def worker(doc): client = montage.Client('apn-builder', os.environ.get('MONTAGE_TOKEN')) query = montage.Query('apn').get_all(doc['apn'], index='apn').filter( montage.Field('county') == doc['county'], montage.Field('state') == doc['state'], montage.Field('year') == doc['year'], ).count() response = client.execute(query=query) message = "[{status}] {doc[county]}, {doc[state]}: {doc[apn]}" if response['data']['query'] == 0: print(message.format(status='save', doc=doc)) return doc else: print(message.format(status='dupe', doc=doc)) def save(client, batch): try: print('Saving batch of {0}...'.format(len(batch))) return client.documents.save('apn', *batch) except Exception as err: print('Error:', err) code.interact(local=locals()) sys.exit() client = montage.Client('apn-builder', os.environ.get('MONTAGE_TOKEN')) pool = Pool(processes=10) docs = pool.imap(worker, data) batch = [] for doc in (doc for doc in docs if doc is not None): batch.append(doc) if len(batch) >= 200: save(client, None) batch = [] if batch: save(client, batch)
def _download_keys(self, keys, total_size, pool_size=5): logging.info("Starting to download...") progress_string = "" read_bytes = 0 thread_pool = Pool(pool_size) for size in thread_pool.imap(self._download_key, keys): old_width = len(progress_string) read_bytes += size progress_string = "%s / %s (%.2f%%)" % (self._human_size(read_bytes), self._human_size(total_size), (read_bytes/float(total_size))*100.0) width = len(progress_string) padding = "" if width < old_width: padding = " "*(width-old_width) progress_string = "%s%s\r" % (progress_string, padding) sys.stderr.write(progress_string)
def write_to_file(in_path, out_path): claims = [] with open(in_path, 'r') as f: for line in f.readlines(): claims.append(json.loads(line.strip())['claim']) processed_claims = len(claims) print("Total: {}".format(processed_claims)) # Use thread pool to parallelize pool = ThreadPool(16) predicted_docs = list( tqdm.tqdm(pool.imap(doc_retriever, claims), total=processed_claims)) pool.close() pool.join() for i in range(processed_claims): file_dict = {} file_dict['claim'] = claims[i] file_dict['docs'] = predicted_docs[i] with open(out_path, 'a+') as fout: json.dump(file_dict, fout) fout.write('\n')
import os import pandas as pd import numpy as np from functools import partial from multiprocessing.dummy import Pool from subprocess import call if __name__ == '__main__': data_Folder = 'data' dirs = os.listdir(os.path.join(os.getcwd(), data_Folder)) files = [d for d in dirs if ((not os.path.isdir(d)) and d.endswith('.nc'))] commands = [] for filename in files: commands.append("python convert_toCSV.py " + os.path.join(data_Folder, filename)) pool = Pool(1) # two concurrent commands at a time for i, returncode in enumerate( pool.imap(partial(call, shell=True), commands)): if returncode != 0: print("%d command failed: %d" % (i, returncode))
tmp_prefix = '_tmp_treemixrunner' pids = npr.choice(1000000, size=num_opt, replace=False) prefixes = [tmp_prefix + str(pid) for pid in pids] cmds = [] for pid, prefix in zip(pids, prefixes): cmd_p = cmd[:] + ['-seed', str(pid), '-o', prefix] cmds.append(cmd_p) pool = Pool(num_processes) # two concurrent commands at a time fnull = open(os.devnull, 'w') try: for i, returncode in enumerate( pool.imap(partial(call, shell=False, stdout=fnull, stderr=fnull), cmds)): if returncode != 0: print "command %d failed with code: %d" % (i, returncode) lliks = [] for repidx, prefix in enumerate(prefixes): like_filename = prefix + '.llik' with open(like_filename, 'r') as fin: for line in fin: pass llik = float(line.split(':')[1].strip()) lliks.append(llik) if print_lls: print 'rep', repidx, llik best_prefix_idx = np.array(lliks).argmax() best_prefix = prefixes[best_prefix_idx]
return str(err) if not response.ok: print(response) for block in response.iter_content(1024): if not block: break handle.write(block) return fname base_url = 'http://www.herdofwy.com/' request = requests.get('http://www.herdofwy.com/adopteddogs.html') soup = bs4.BeautifulSoup(request.text, 'html.parser') imgs = soup.findAll('img') imgs = set(imgs) picture_formats = set() for img in imgs: picture_formats.add(img['src'].split('.')[-1].lower()) new_picture_formats = picture_formats.copy() for pic_format in picture_formats: if pic_format not in ['gif', 'jpg', 'png', 'jpeg']: new_picture_formats.remove(pic_format) if not os.path.exists('./imgs'): os.makedirs('./imgs') pool = Pool(16) for name in pool.imap(handler, imgs): print(name)
def main(): parser = argparse.ArgumentParser( "Python script to analyze files in parallel") parser.add_argument("executable", type=str, help="Executable you want to run") parser.add_argument("-i", "--input", required=True, type=str, nargs="+", help="Path to the directories you want to analyze") parser.add_argument("-r", "--run-id", required=True, type=str, help="Number of run which you are analyzing") parser.add_argument("-e", "--extension", required=False, type=str, default="root", help="Extention of files you want to analyze") parser.add_argument("-t", "--type", required=False, type=str, default="root", help="Path to the directory you want to analyze") parser.add_argument( "-o", "--output", required=False, help= "Path to the output directory in which you want to save analyzed files" ) parser.add_argument("-p", "--progress-bar", action="store_false", help="Using this option turns progress bar off") parser.add_argument("-n", "--number-of-threads", required=False, default=20, type=int, help="Number of threads to run simultaneously") args = vars(parser.parse_args()) executable = args["executable"] input_directories = args["input"] output_directory = args["output"] progress_bar = args["progress_bar"] run_id = args["run_id"] file_type = args["type"] threads = args["number_of_threads"] extension = args["extension"] input_directories = [ directory + "/" if directory[-1] != "/" else directory for directory in input_directories ] if output_directory: if output_directory[-1] != "/": output_directory += "/" run_id_setup = get_run_id_setup_mapping(run_id) if not are_valid_args(threads, input_directories, output_directory, file_type, run_id_setup, extension): sys.exit() list_of_params = get_parameters_for_analysis(executable, file_type, extension, run_id, run_id_setup, input_directories, output_directory) print("\033[32m" + "All checks passed, running analysis now." + "\033[0m") pool = PoolThread(threads) if progress_bar: for _ in tqdm.tqdm(pool.imap(run_analysis, list_of_params), total=len(list_of_params)): pass else: pool.map(run_analysis, list_of_params) pool.close() pool.join()
geoj_features["features"].append(g) neighb_features["features"].append(g) gf.write(json.dumps(neighb_features)) # Check if path isn't already created, otherwise create directories and topojson for neighb in neighborhoods: if not os.path.exists(BASE_DIR + "/data/{0}".format(neighb)): subprocess.call(["mkdir", "-p", "data/{0}".format(neighb)]) # Remove any hyphens to match topojson file naming conventions rn = re.sub('[^a-z]+', '', neighb) topo_call = topo_url.format(neighb, rn) geo_call = geo_url.format(neighb) wget_topo = "wget -O data/{0}/{0}.topojson {1}".format(neighb, topo_call) wget_geo = "wget -O data/{0}/{0}.geojson {1}".format(neighb, geo_call) calls.extend((wget_topo, wget_geo)) # Run wget calls for GeoJSON in multiple processes to speed up pool = Pool(4) for i, returncode in enumerate(pool.imap(partial(subprocess.call, shell=True), calls)): if returncode != 0: errors.append(calls[i]) # List comprehension to clean all GeoJSON files and create master file [clean_geojson(n) for n in neighborhoods] with open("dna_neighborhoods.geojson", "w") as gf: gf.write(json.dumps(geoj_features))
class virtualscreening: def __init__(self, n_cpu=-1, verbose=False): self._pipe = None self.n_cpu = n_cpu self.num_input = 0 self.num_output = 0 self.verbose = verbose # setup pool self._pool = Pool(n_cpu if n_cpu > 0 else None) def load_ligands(self, file_type, ligands_file): self._pipe = self._ligand_pipe(toolkit.readfile(file_type, ligands_file)) def _ligand_pipe(self, ligands): for n, mol in enumerate(ligands): self.num_input = n+1 yield mol def apply_filter(self, expression, filter_type='expression', soft_fail = 0): if filter_type == 'expression': self._pipe = self._filter(self._pipe, expression, soft_fail = soft_fail) elif filter_type == 'preset': # define presets # TODO: move presets to another config file # Lipinski rule of 5's if expression.lower() in ['l5', 'ro5']: self._pipe = self._filter(self._pipe, ['mol.molwt < 500', 'mol.calcdesc(["HBA1"])["HBA1"] <= 10', 'mol.calcdesc(["HBD"])["HBD"] <= 5', 'mol.calcdesc(["logP"])["logP"] <= 5'], soft_fail = soft_fail) # Rule of three elif expression.lower() in ['ro3']: self._pipe = self._filter(self._pipe, ['mol.molwt < 300', 'mol.calcdesc(["HBA1"])["HBA1"] <= 3', 'mol.calcdesc(["HBD"])["HBD"] <= 3', 'mol.calcdesc(["logP"])["logP"] <= 3'], soft_fail = soft_fail) def _filter(self, pipe, expression, soft_fail = 0): for mol in pipe: if type(expression) is list: fail = 0 for e in expression: if not eval(e): fail += 1 if fail <= soft_fail: yield mol else: if eval(expression): yield mol def dock(self, engine, protein, *args, **kwargs): if engine.lower() == 'autodock_vina': from .docking.autodock_vina import autodock_vina engine = autodock_vina(protein, *args, **kwargs) else: raise ValueError('Docking engine %s was not implemented in ODDT' % engine) def _iter_conf(results): """ Generator to go through docking results, and put them to pipe """ for confs in results: for conf in confs: yield conf if self.n_cpu != 1: docking_results = self._pool.imap(_parallel_helper, ((engine, "dock", {'ligands':lig, 'single': True}) for lig in self._pipe)) else: docking_results = (engine.dock(lig, single=True) for lig in self._pipe) self._pipe = _iter_conf(docking_results) def score(self, function, protein, *args, **kwargs): if type(protein) is str: extension = protein.split('.')[-1] protein = toolkit.readfile(extension, protein).next() protein.protein = True if function.lower() == 'rfscore': from .scoring.functions.RFScore import rfscore sf = rfscore.load() sf.set_protein(protein) elif function.lower() == 'nnscore': from .scoring.functions.NNScore import nnscore sf = nnscore.load() sf.set_protein(protein) else: raise ValueError('Scoring Function %s was not implemented in ODDT' % function) if self.n_cpu != 1: self._pipe = self._pool.imap(_parallel_helper, ((sf, 'predict_ligand', {'ligand': lig}) for lig in self._pipe)) else: self._pipe = sf.predict_ligands(self._pipe) def fetch(self): for n, mol in enumerate(self._pipe): self.num_output = n+1 if self.verbose and self.num_input % 100 == 0: print "\rPassed: %i (%.2f%%)\tTotal: %i" % (self.num_output, float(self.num_output)/float(self.num_input)*100, self.num_input), yield mol if self.verbose: print "" # Consume the pipe def write(self, fmt, filename, csv_filename = None, **kwargs): output_mol_file = toolkit.Outputfile(fmt, filename, **kwargs) if csv_filename: f = open(csv_filename, 'w') csv_file = None for mol in self.fetch(): if csv_filename: data = dict(mol.data) #filter some internal data blacklist_keys = ['OpenBabel Symmetry Classes', 'MOL Chiral Flag', 'PartialCharges', 'TORSDO', 'REMARK'] for b in blacklist_keys: if data.has_key(b): del data[b] if len(data) > 0: data['name'] = mol.title else: print "There is no data to write in CSV file" return False if csv_file is None: csv_file = csv.DictWriter(f, data.keys(), **kwargs) csv_file.writeheader() csv_file.writerow(data) # write ligand output_mol_file.write(mol) output_mol_file.close() if csv_filename: f.close() # if kwargs.has_key('keep_pipe') and kwargs['keep_pipe']: #FIXME destroys data self._pipe = toolkit.readfile(fmt, filename) def write_csv(self, csv_filename, keep_pipe = False, **kwargs): f = open(csv_filename, 'w') csv_file = None for mol in self.fetch(): data = dict(mol.data) #filter some internal data blacklist_keys = ['OpenBabel Symmetry Classes', 'MOL Chiral Flag', 'PartialCharges', 'TORSDO', 'REMARK'] for b in blacklist_keys: if data.has_key(b): del data[b] if len(data) > 0: data['name'] = mol.title else: print "There is no data to write in CSV file" return False if csv_file is None: csv_file = csv.DictWriter(f, data.keys(), **kwargs) csv_file.writeheader() csv_file.writerow(data) if keep_pipe: #write ligand using pickle pass f.close()
if not os.path.exists(renderRootFolder): os.mkdir(renderRootFolder,0777) tcount = 0 cmd = [] # Loop over all the models. for shape_property in shape_list: shape_synset = shape_property[0] shape_md5 = shape_property[1] shape_file = os.path.join(g_shapenet_root_folder, shape_synset, shape_md5, 'model.obj') renderFolder = os.path.join(renderRootFolder, shape_md5) if not os.path.exists(renderFolder): cmd.append('mkdir -p %s' % renderFolder) elif len(glob.glob(os.path.join(renderFolder, '*.png'))) >= num: print 'skip', shape_md5 continue command_per_model = '%s %s --background --python %s -- %s %s %s %s > /dev/null 2>&1' % (g_blender_executable_path, g_blank_blend_file_path, render_program, shape_file, renderFolder) cmd.append(command_per_model) # backup pool = Pool(12) # 5 concurrent commands at a time for i, returncode in enumerate(pool.imap(partial(call, shell=True), cmd)): print i if returncode != 0: print("%d command failed: %d" % (i, returncode)) tcount = tcount + 1 print tcount
import argparse import os import subprocess from functools import partial from multiprocessing.dummy import Pool from termcolor import colored if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('list_path') parser.add_argument('intrinsics') parser.add_argument('output_dir') parser.add_argument('-n', type=int, default=1, help='number of trajectories') parser.add_argument('-p', type=int, default=8, help='number of processes') args = parser.parse_args() with open(os.path.join(args.list_path)) as file: model_list = [line.strip() for line in file] commands = [['/opt/blender/blender', '-b', '-P', 'render_single.py', model_id, args.intrinsics, args.output_dir, '%d' % args.n] for model_id in model_list] pool = Pool(args.p) print(colored('=== Rendering %d models on %d workers...' % (len(commands), args.p), 'white', 'on_blue')) for idx, completed in enumerate(pool.imap(partial(subprocess.run), commands)): pass
from multiprocessing.dummy import Pool from subprocess import run if __name__ == "__main__": model_type = 'LSTM' cmds = [ f'python train.py --input_length {i} --model_type {model_type}' for i in range(5, 21) ] def run_cmd(cmd): run(cmd, shell=True) p = Pool(4) p.imap(run_cmd, cmds) p.join()
def fix(data): # fix subtasks duration for subtask in data['subtasks']: name = subtask['name'] sec = subtask['data']['benchmark']['duration_seconds'] data[name + "_duration_seconds"] = sec return data def index_file(file): with open(DIR + file) as json_data: data = fix(json.load(json_data)) index(file, data) if __name__ == "__main__": start = time.time() es = Elasticsearch(hosts=[{"host": "localhost", "port": 9200}]) es.indices.create(index=INDEX_NAME, ignore=400) files = [f for f in listdir(DIR) if isfile(join(DIR, f))] pool = ThreadPool(8) for _ in tqdm.tqdm(pool.imap(index_file, files), total=len(files)): pass end = time.time() print "took: " + str(end - start) + "s"
'-s', type=int, help='start of range', default=1) parser.add_argument('--end', '-e', type=int, help='end of range', default=500) parser.add_argument('--tcp', '-t', action='store_true', help='scan tcp ports') parser.add_argument('--udp', '-u', action='store_true', help='scan udp ports') return parser if __name__ == '__main__': args = create_arg_parser().parse_args() host = args.host pool = Pool(50) if args.tcp: pool.imap(scan_tcp_port, range(args.start, args.end)) if args.udp: pool.imap(scan_udp_port, range(args.start, args.end)) pool.close() pool.join()
multi_processing_list = list() for i in tqdm(range(len(test_src_sentences))): pair = [str(test_src_sentences[i]), train_src_sentences] multi_processing_list.append(pair) # sentence_distances = np.array( # [edit_distance(str(test_src_sentences[i]), str(train_src_sentences[j])) for j in # range(len(train_src_sentences))]) # # sentence_distances = list() # for j in range(len(train_src_sentences)): # distance = edit_distance(test_src_sentences[i], train_src_sentences[j]) # sentence_distances_list = pool.map(multi_run_wrapper_edit_distance, multi_processing_list) sentence_distances_list = list( tqdm(pool.imap(multi_run_wrapper_edit_distance, multi_processing_list), total=len(multi_processing_list))) for i in tqdm(range(len(sentence_distances_list))): sentence_distances = sentence_distances_list[i] # sentence_distances = edit_distances([str(test_src_sentences[i]), train_src_sentences]) closestIdx = np.argmin(sentence_distances) closet_distance = np.amin(sentence_distances) retrieved_src_sentence = train["source"].astype(str).tolist()[closestIdx] retrieved_target_sentence = train["target"].astype( str).tolist()[closestIdx] bleu_score = calculate_bleu_score(test["target"].astype(str).tolist()[i], retrieved_target_sentence) meteor_score = calculate_meteor_score( test["target"].astype(str).tolist()[i], retrieved_target_sentence)
def dispatch_commands(cmds): pool = Pool(max_processes) for i, returncode in enumerate(pool.imap(partial(call, shell=True), cmds)): if returncode != 0: print("{} command failed: {}".format(i, returncode))
if __name__ == '__main__': # # crawling one by one # while crt_lst: # id = crt_lst.pop() # crawler(id) # crawling via multiprocessing and queue try: new.renew_connection() the_queue = Queue() pool = Pool( cpu_count() + 2, worker, [the_queue]) # Can create a Pool with cpu_count * 2 threads. pool.imap(crawler, crt_lst) pool.close() count = 0 while True: lim = randint(2, 4) time.sleep(randint(3, 8)) if count > lim: new.renew_connection() count = 0 the_queue.get(True) count = count + 1 except Exception as e: print e traceback.print_exc() print get_current_datetime()
for t in mysql_tables: cmd = get_cmd(conf_utils.get_kv_from_conf("cmds", "mysql_to_hive"), t, False) cmds.__setitem__(t[6], cmd) return cmds def exec_cmd(cmd): table = cmd[0] command = cmd[1] global index index += 1 print(str(index) + " Starting import " + table) Logger.info("Starting import " + table) (status, text) = commands.getstatusoutput(command) if status != 0: print(table + 'status: ' + str(status)) Logger.info("commands: " + command) Logger.info("End of import, status: " + str(status)) Logger.info("End of import, content: " + text) pool = ThreadPool(8) pool.imap(exec_cmd, get_cmds().items()) pool.close() pool.join() if __name__ == '__main__': pass
def megadown_getfile(self,file_id, file_key): key = base64_to_a32(file_key) k = (key[0] ^ key[4], key[1] ^ key[5], key[2] ^ key[6], key[3] ^ key[7]) iv = key[4:6] + (0, 0) meta_mac = key[6:8] self.megadown_print("\n\nObteniendo acceso al link de mega.nz: %s"%(file_id)) self.megadown_print("Espere por favor...") file = api_req({'a': 'g', 'g': 1, 'p': file_id}) dl_url = file['g'] size = file['s'] attributes = base64urldecode(file['at']) attributes = dec_attr(attributes, k) self.megadown_print ("Fichero encontrado: %s [%s]\n" % (attributes['n'], self.megadown_GetHumanReadable(size))) decryptor = AES.new(a32_to_str(k), AES.MODE_CTR, counter = Counter.new(128, initial_value = ((iv[0] << 32) + iv[1]) << 64)) file_mac = [0, 0, 0, 0] url=dl_url filename = attributes['n'] pool = Pool(self.conexiones) # define number of concurrent connections #--------------------- # PROGRESSBAR #--------------------- widgets = ['Descargando: ', Percentage(), ' ', Bar(marker='#'), ' ', ETA(), ' ', FileTransferSpeed()] pbar = ProgressBar(widgets=widgets, maxval=size).start() #--------------------- listchunks=sorted(get_chunks(file['s']).items()) # print listchunks ranges=listchunks lentotal=0 # with open(filename, 'wb') as file: directory="%s/tmp_%s"%(self.dirdescarga,file_id) if not os.path.exists(directory): os.makedirs(directory) for content in pool.imap(partial(self.download_chunk, url, os.path.abspath(directory)), ranges): if not content: print "Error EOF" break # error or EOF # content=decryptor.decrypt(content) # file.write(content) lentotal+=content pbar.update(lentotal) # if len(s) != size: # break # EOF (servers with no Range support end up here) #--------------------- pbar.finish() #--------------------- #--------------------- # PROGRESSBAR #--------------------- widgets = ['Desencriptando: ', Percentage(), ' ', Bar(marker='#'), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=size).start() #--------------------- output=open(attributes['n'],'wb') for chunk_start,chunk_size in ranges: input_name="%s/%s.chunk"%(os.path.abspath(directory),chunk_start) input_tmp=open(input_name,'rb') chunk=input_tmp.read() chunk=decryptor.decrypt(chunk) output.write(chunk) input_tmp.close() os.remove(input_name) pbar.update(os.path.getsize(attributes['n'])) shutil.rmtree(os.path.abspath(directory)) output.close() #--------------------- pbar.finish()
help='threads count', default=config.threads_count) parser.add_argument('-y', help="yes all", default=False, action='store_true') return parser.parse_args() if __name__ == "__main__": """ usage: python bot.py --threads=2 --workflow=SyncWorkflow """ start = time.time() ids = fetch() args = parse_args() threads_count = int(args.threads) workflow = args.workflow print "syncing {num} records with workflow: {workflow}, threads: {threads_count}".format( num=len(ids), workflow=workflow, threads_count=threads_count) if lib.ask(args.y): pool = ThreadPool(threads_count) for _ in tqdm.tqdm(pool.imap(sync, ids), total=len(ids)): pass print "Finished. Took: " + str(time.time() - start) + "s"
def displayTiling(self, animating=True): ## Axiom0: All functions and data structures such as the QuadTree and QuadNode classes are implemented correctly # This means that all states (and there are many) that this algorithm accumulates on is taken axiomatically as correct # even if that is not so (which it in fact is not so). ## Axiom1: If multip is 'multithread', there are at least six available threads ## Axiom2: If multip is 'multiprocess', there are enough available cpus such that os.cpu_count()-1 is greater than or equal to one ## Intent: the intent of displayTiling() is to iterate over all tiles in the tiling and add the proper patch corresponding to a tiling to the figure. # Furthermore, we can do this in one of three ways, by creating a pool of threads, by creating a pool of processes, and by traditionally iterating linearly # over a loop with a single process and a single thread. Because of Python's GIL (Global Interpreter Lock), we are unable to take advantage of multithreading # in any way that I could find, though this would work in other languages. ## Prec0: animating is an input parameter, either True or False ## Prec1: createPatch is a function that takes in a tuple of four integers (r,a,s,b) st: 0<=r<s<self.dim and -self.size<=a<=b<=self.size ## Prec2: createPatch is implemented properly (being outside the scope of this project) and returns a patch that can be easily plotted in the axis self.ax ## Post0: self.patches is a list containing all the patches in the tiling, where each patch is a plottable object comprising vertices, colors, opacities, etc.. ## Post1: self.ax contains the patches in self.patches ## Post2: if the PA is not automated, we save the tiling frames in a folder containing all the tiling frames in the PA animation ## Post3: If the tiling is animated and is the original tiling, self.ax is manually returned ## State0: multip in {'multithreading', 'multiprocessing', allOtherInputs} and the figure is handled by an auxiliary helper method self.setFigExtras() multip = False ## State1: we conditionally map {'multithreading', 'multiprocessing', allOtherInputs} -> {State1.0.0, 1.1.0, 1.2.0} if multip == 'multithread': ## State1.0.0: All necessary multithreading modules are imported from multiprocessing.dummy import Pool as ThreadPool ## State1.0.1: For each of the tiles, the input parameter to the createPatch function is added to the list inputs = [] for r in range(self.dim): for a in range(-self.size, self.size + 1): for s in range(r + 1, self.dim): for b in range(-self.size, self.size + 1): inputC = (r, a, s, b) inputs.append(inputC) ## State 1.0.2: A thread pool of six thread workers is created pool = ThreadPool(6) ## Stable 1.0.3: The thread pool maps the createPatch function onto all the input parameters patches = pool.imap(self.createPatch, inputs) pool.close() pool.join() ## Stable1.0.4: All the patches are added to the axis self.ax for later display self.patches = [] for patch in patches: self.ax.add_patch(patch) self.patches.append(patch) elif multip == 'multiprocess': ## State1.1.0: All necessary multiprocessing modules are imported from multiprocessing import get_context ## State1.1.1: For each of the tiles, the input parameter to the createPatch function is added to the list inputs = [] for r in range(self.dim): for a in range(-self.size, self.size + 1): for s in range(r + 1, self.dim): for b in range(-self.size, self.size + 1): inputC = (r, a, s, b) inputs.append(inputC) ## State1.1.2: A multiprocessing pool along with a context switch enables multiprocessing in python3 (accessed through dispPool) with get_context("spawn").Pool(os.cpu_count() - 1) as dispPool: ## State1.1.2.0: A multiprocessing pool maps the createPatch function onto all the input parameters in inputs patches = [ item for item in dispPool.imap(self.createPatch, inputs) ] dispPool.close() dispPool.join() ## State1.1.3: All the patches are added to the figure for display self.patches = [] for patch in patches: self.ax.add_patch(patch) self.patches.append(patch) else: ## State1.2.0: All necessary multithreading modules self.patches = [] for r in range(self.dim): for a in range(-self.size, self.size + 1): for s in range(r + 1, self.dim): for b in range(-self.size, self.size + 1): ## Staet1.2.0.0: For all input parameters, the patch is constructed and added to the list of patches and figure linearly inputC = (r, a, s, b) patch = self.createPatch(inputC) self.ax.add_patch(patch) self.patches.append(patch) ## State2: If the pt is the original tiling, the axis is returned manually if animating and self.ptIndex == 0: return self.ax ## State3: If the PA is not animated, the first figure is saved manually to the IO filestructure if not animating: self.saveFig()
class SplitPerListIndex: """manages a local index, that does the coarse quantization and a set of sub_indexes. The sub_indexes search a subset of the inverted lists. The SplitPerListIndex merges results from the sub-indexes""" def __init__(self, index, sub_indexes): self.index = index self.code_size = faiss.extract_index_ivf(index.index).code_size self.sub_indexes = sub_indexes self.ni = len(self.sub_indexes) # pool of threads. Each thread manages one sub-index. self.pool = ThreadPool(self.ni) self.verbose = False def set_nprobe(self, nprobe): self.index.set_nprobe(nprobe) self.pool.map(lambda i: self.sub_indexes[i].set_nprobe(nprobe), range(self.ni)) def set_omp_num_threads(self, nt): faiss.omp_set_num_threads(nt) self.pool.map(lambda idx: idx.set_omp_num_threads(nt), self.sub_indexes) def set_parallel_mode(self, pm): self.index.set_parallel_mode(pm) self.pool.map(lambda idx: idx.set_parallel_mode(pm), self.sub_indexes) def set_prefetch_nthread(self, nt): self.index.set_prefetch_nthread(nt) self.pool.map(lambda idx: idx.set_prefetch_nthread(nt), self.sub_indexes) def balance_lists(self, list_nos): big_il = self.index.big_il weights = np.array( [big_il.list_size(int(i)) for i in list_nos.ravel()]) bins, assign = distribute_weights(weights, self.ni) if self.verbose: print('bins weight range %d:%d total %d (%.2f MiB)' % (bins.min(), bins.max(), bins.sum(), bins.sum() * (self.code_size + 8) / 2**20)) self.nscan = bins.sum() return assign.reshape(list_nos.shape) def search(self, x, k): xqo, list_nos, coarse_dis = self.index.transform_and_assign(x) assign = self.balance_lists(list_nos) def do_query(i): sub_index = self.sub_indexes[i] list_nos_i = list_nos.copy() list_nos_i[assign != i] = -1 t0 = time.time() Di, Ii = sub_index.ivf_search_preassigned(xqo, list_nos_i, coarse_dis, k) #print(list_nos_i, Ii) if self.verbose: print('client %d: %.3f s' % (i, time.time() - t0)) return Di, Ii rh = ResultHeap(x.shape[0], k) for Di, Ii in self.pool.imap(do_query, range(self.ni)): #print("ADD", Ii, rh.I) rh.add_batch_result(Di, Ii, 0) rh.finalize() return rh.D, rh.I def range_search(self, x, radius): xqo, list_nos, coarse_dis = self.index.transform_and_assign(x) assign = self.balance_lists(list_nos) nq = len(x) def do_query(i): sub_index = self.sub_indexes[i] list_nos_i = list_nos.copy() list_nos_i[assign != i] = -1 t0 = time.time() limi, Di, Ii = sub_index.ivf_range_search_preassigned( xqo, list_nos_i, coarse_dis, radius) if self.verbose: print('slice %d: %.3f s' % (i, time.time() - t0)) return limi, Di, Ii D = [[] for i in range(nq)] I = [[] for i in range(nq)] sizes = np.zeros(nq, dtype=int) for lims, Di, Ii in self.pool.imap(do_query, range(self.ni)): for i in range(nq): l0, l1 = lims[i:i + 2] D[i].append(Di[l0:l1]) I[i].append(Ii[l0:l1]) sizes[i] += l1 - l0 lims = np.zeros(nq + 1, dtype=int) lims[1:] = np.cumsum(sizes) D = np.hstack([j for i in D for j in i]) I = np.hstack([j for i in I for j in i]) return lims, D, I
class virtualscreening: def __init__(self, n_cpu=-1, verbose=False): """Virtual Screening pipeline stack Parameters ---------- n_cpu: int (default=-1) The number of parallel procesors to use verbose: bool (default=False) Verbosity flag for some methods """ self._pipe = None self.n_cpu = n_cpu self.num_input = 0 self.num_output = 0 self.verbose = verbose # setup pool self._pool = Pool(n_cpu if n_cpu > 0 else None) def load_ligands(self, file_type, ligands_file): """Loads file with ligands. Parameters ---------- file_type: string Type of molecular file ligands_file: string Path to a file, which is loaded to pipeline """ self._pipe = self._ligand_pipe( toolkit.readfile(file_type, ligands_file)) def _ligand_pipe(self, ligands): for n, mol in enumerate(ligands): self.num_input = n + 1 yield mol def apply_filter(self, expression, filter_type='expression', soft_fail=0): """Filtering method, can use raw expressions (strings to be evaled in if statement, can use oddt.toolkit.Molecule methods, eg. 'mol.molwt < 500') Currently supported presets: * Lipinski Rule of 5 ('r5' or 'l5') * Fragment Rule of 3 ('r3') Parameters ---------- expression: string or list of strings Expresion(s) to be used while filtering. filter_type: 'expression' or 'preset' (default='expression') Specify filter type: 'expression' or 'preset'. Default strings are treated as expressions. soft_fail: int (default=0) The number of faulures molecule can have to pass filter, aka. soft-fails. """ if filter_type == 'expression': self._pipe = self._filter(self._pipe, expression, soft_fail=soft_fail) elif filter_type == 'preset': # define presets # TODO: move presets to another config file # Lipinski rule of 5's if expression.lower() in ['l5', 'ro5']: self._pipe = self._filter(self._pipe, [ 'mol.molwt < 500', 'mol.calcdesc(["HBA1"])["HBA1"] <= 10', 'mol.calcdesc(["HBD"])["HBD"] <= 5', 'mol.calcdesc(["logP"])["logP"] <= 5' ], soft_fail=soft_fail) # Rule of three elif expression.lower() in ['ro3']: self._pipe = self._filter(self._pipe, [ 'mol.molwt < 300', 'mol.calcdesc(["HBA1"])["HBA1"] <= 3', 'mol.calcdesc(["HBD"])["HBD"] <= 3', 'mol.calcdesc(["logP"])["logP"] <= 3' ], soft_fail=soft_fail) # PAINS filter elif expression.lower() in ['pains']: pains_smarts = {} with open(dirname(__file__) + 'filter/pains.smarts') as pains_file: csv_reader = csv.reader(pains_file, delimiter="\t") for line in csv_reader: if len(line) > 1: pains_smarts[line[1][8:-2]] = line[0] self._pipe = self._filter_smarts(self._pipe, pains_smarts.values(), soft_fail=soft_fail) def _filter_smarts(self, pipe, smarts, soft_fail=0): for mol in pipe: if type(smarts) is list: compiled_smarts = [toolkit.Smarts(s) for s in smarts] fail = 0 for s in compiled_smarts: if len(s.findall(mol)) > 0: fail += 1 if fail > soft_fail: break if fail <= soft_fail: yield mol else: compiled_smarts = toolkit.Smarts(smarts) if len(compiled_smiles.findall(mol)) == 0: yield mol def _filter(self, pipe, expression, soft_fail=0): for mol in pipe: if type(expression) is list: fail = 0 for e in expression: if not eval(e): fail += 1 if fail > soft_fail: break if fail <= soft_fail: yield mol else: if eval(expression): yield mol def dock(self, engine, protein, *args, **kwargs): """Docking procedure. Parameters ---------- engine: string Which docking engine to use. Note ---- Additional parameters are passed directly to the engine. """ if engine.lower() == 'autodock_vina': from .docking.autodock_vina import autodock_vina engine = autodock_vina(protein, *args, **kwargs) else: raise ValueError('Docking engine %s was not implemented in ODDT' % engine) def _iter_conf(results): """ Generator to go through docking results, and put them to pipe """ for confs in results: for conf in confs: yield conf if self.n_cpu != 1: docking_results = self._pool.imap(_parallel_helper, ((engine, "dock", { 'ligands': lig, 'single': True }) for lig in self._pipe)) else: docking_results = (engine.dock(lig, single=True) for lig in self._pipe) self._pipe = _iter_conf(docking_results) def score(self, function, protein, *args, **kwargs): """Scoring procedure. Parameters ---------- function: string Which scoring function to use. protein: oddt.toolkit.Molecule Default protein to use as reference Note ---- Additional parameters are passed directly to the scoring function. """ if type(protein) is str: extension = protein.split('.')[-1] protein = toolkit.readfile(extension, protein).next() protein.protein = True if function.lower() == 'rfscore': from .scoring.functions.RFScore import rfscore sf = rfscore.load() sf.set_protein(protein) elif function.lower() == 'nnscore': from .scoring.functions.NNScore import nnscore sf = nnscore.load() sf.set_protein(protein) else: raise ValueError( 'Scoring Function %s was not implemented in ODDT' % function) if self.n_cpu != 1: self._pipe = self._pool.imap(_parallel_helper, ((sf, 'predict_ligand', { 'ligand': lig }) for lig in self._pipe)) else: self._pipe = sf.predict_ligands(self._pipe) def fetch(self): for n, mol in enumerate(self._pipe): self.num_output = n + 1 if self.verbose and self.num_input % 100 == 0: print "\rPassed: %i (%.2f%%)\tTotal: %i" % ( self.num_output, float(self.num_output) / float(self.num_input) * 100, self.num_input), yield mol if self.verbose: print "" # Consume the pipe def write(self, fmt, filename, csv_filename=None, **kwargs): """Outputs molecules to a file Parameters ---------- file_type: string Type of molecular file ligands_file: string Path to a output file csv_filename: string Optional path to a CSV file """ output_mol_file = toolkit.Outputfile(fmt, filename, **kwargs) if csv_filename: f = open(csv_filename, 'w') csv_file = None for mol in self.fetch(): if csv_filename: data = dict(mol.data) #filter some internal data blacklist_keys = [ 'OpenBabel Symmetry Classes', 'MOL Chiral Flag', 'PartialCharges', 'TORSDO', 'REMARK' ] for b in blacklist_keys: if data.has_key(b): del data[b] if len(data) > 0: data['name'] = mol.title else: print "There is no data to write in CSV file" return False if csv_file is None: csv_file = csv.DictWriter(f, data.keys(), **kwargs) csv_file.writeheader() csv_file.writerow(data) # write ligand output_mol_file.write(mol) output_mol_file.close() if csv_filename: f.close() # if kwargs.has_key('keep_pipe') and kwargs['keep_pipe']: #FIXME destroys data self._pipe = toolkit.readfile(fmt, filename) def write_csv(self, csv_filename, keep_pipe=False, **kwargs): """Outputs molecules to a csv file Parameters ---------- csv_filename: string Optional path to a CSV file keep_pipe: bool (default=False) If set to True, the ligand pipe is sustained. """ f = open(csv_filename, 'w') csv_file = None for mol in self.fetch(): data = dict(mol.data) #filter some internal data blacklist_keys = [ 'OpenBabel Symmetry Classes', 'MOL Chiral Flag', 'PartialCharges', 'TORSDO', 'REMARK' ] for b in blacklist_keys: if data.has_key(b): del data[b] if len(data) > 0: data['name'] = mol.title else: print "There is no data to write in CSV file" return False if csv_file is None: csv_file = csv.DictWriter(f, data.keys(), **kwargs) csv_file.writeheader() csv_file.writerow(data) if keep_pipe: #write ligand using pickle pass f.close()
ra = (v.REF, v.ALT[0]) if ra != (s[2], s[3]) and ra != (s[3], s[2]): continue if v.start != s[1] - 1: continue matches.append(v) print("found %d out of %d sites in %.1f seconds" % (len(matches), len(sites), time.time() - t0), file=sys.stderr) return matches print("n-sites: %d" % len(sites), file=sys.stderr) vcf_path = "/scratch/general/lustre/u0806040/data/vcf_files/P231_hg_38.vcf.gz" #import multiprocessing as mp from multiprocessing.dummy import Pool p = Pool(54) wtr = Writer("-", VCF(vcf_path)) step = 500 for res in p.imap(match_sites, ((vcf_path, sites[idx:idx + step]) for idx in range(0, len(sites), step))): for v in res: wtr.write_record(v) wtr.close()
def download_movies(url, movie_path, selected_episodes=range(1, int(1e5))): ''' :param url: 'http://532movie.bnu.edu.cn/player/3379.html' :return: None download *.ts to RAM write mp4 movie file to local disk from RAM ''' movie_name, urls = get_vedio_url(url) invalid_char = '/\:*"<>|?' for ic in invalid_char: if ic in movie_name: movie_name = movie_name.replace(ic, '.') try: print(movie_name.decode('utf-8').encode('gbk', 'ignore')) except: print(movie_name.split()[0].decode('utf-8').encode('gbk', 'ignore')) try: movie_name_utf8 = movie_name.decode('utf-8').encode('gbk', 'ignore') except: movie_name_utf8 = movie_name.split()[0].decode('utf-8').encode( 'gbk', 'ignore') episode = 0 flag = 0 time_init = time.time() for i in urls: time_start = time.time() if len(urls) == 1: if os.path.isfile(movie_path + movie_name_utf8 + '.mp4'): print(movie_path + movie_name_utf8 + '.mp4 is already existed') return None ts = split_videos(i) pool = ThreadPool(20) bar_fmt = 'Downloading\t' + '|{bar}|{percentage:3.0f}%' results = list( tqdm(pool.imap(download_ts, ts), total=len(ts), ncols=50, bar_format=bar_fmt)) pool.close() pool.join() # print('Writing to disk...') movie = codecs.open(movie_path + movie_name_utf8 + '.mp4', 'wb') bar_fmt1 = 'writing to disk\t' + '|{bar}|{percentage:3.0f}%' for i in tqdm(range(len(results)), bar_format=bar_fmt1, ncols=50): movie.write(results[i]) movie.close() else: episode += 1 if episode not in selected_episodes: continue TV_dir = movie_path + movie_name_utf8 + '\\' if not os.path.isdir(TV_dir): os.makedirs(TV_dir) if os.path.isfile(TV_dir + 'Episode ' + '%02d' % episode + '.mp4'): print(TV_dir + 'Episode ' + '%02d' % episode + '.mp4 is already existed') flag += 1 continue pool = ThreadPool(20) ts = split_videos(i) bar_fmt = 'Episode %02d' % episode + '|{bar}|{percentage:3.0f}%' results = list( tqdm(pool.imap(download_ts, ts), total=len(ts), ncols=50, bar_format=bar_fmt)) pool.close() pool.join() movie = codecs.open( TV_dir + 'Episode ' + '%02d' % episode + '.mp4', 'wb') for r in results: movie.write(r) time_end = time.time() lenurl = len(urls) len_selected = len(selected_episodes) length = min([lenurl, len_selected]) try: name = movie_name.decode('utf-8').encode('gbk', 'ignore') except: name = movie_name.split()[0].decode('utf-8').encode( 'gbk', 'ignore') log_process.process_bar(flag, length, time_init, time_start, time_end, name + '\n') flag += 1
'medoits': medoits, 'r': r, 'num': obj_array.shape[0], } print("precomputing done!") return models if __name__ == "__main__": with open(os.path.join(base_dir, train_fn)) as file: train_ids = file.read().splitlines() scene_ids = train_ids # process each scene if len(sys.argv) < 2: nproc = 4 pool = Pool(nproc) log = open('log.txt', 'w') for i, result in enumerate(pool.imap(partial(create_label), scene_ids)): try: print(i, scene_ids[i]) except: print(i, scene_ids[i], 'error occurs!') log.write('Error: {}'.format(scene_ids[i])) continue else: create_label(scene_ids[int(sys.argv[1])])
tcount = 0 cmd = [] # Loop over all the models. for shape_property in shape_list: shape_synset = shape_property[0] shape_md5 = shape_property[1] shape_file = os.path.join(g_shapenet_root_folder, shape_synset, shape_md5, g_shapenet_model) renderFolder = os.path.join(renderRootFolder, shape_md5) if not os.path.exists(renderFolder): cmd.append('mkdir -p %s' % renderFolder) elif len(glob.glob(os.path.join(renderFolder, '*.png'))) >= num: print('skip', shape_md5) continue command_per_model = '%s %s --background --python %s -- %s %s %s %s > /dev/null 2>&1' % ( g_blender_executable_path, g_blank_blend_file_path, render_program, shape_file, renderFolder) cmd.append(command_per_model) # backup pool = Pool(12) # 5 concurrent commands at a time for i, returncode in enumerate(pool.imap(partial(call, shell=True), cmd)): print(i) if returncode != 0: print("%d command failed: %d" % (i, returncode)) tcount = tcount + 1 print(tcount)
def __init__(self): disable_warnings() clear = lambda: system('cls') self.version = '0.4' self.printing = Queue() self.caputer = Queue() self.hits = Queue() self.bad = Queue() self.mailheaders = { 'User-Agent': 'MyCom/12436 CFNetwork/758.2.8 Darwin/15.0.0', 'Pragma': 'no-cache' } self.mcurl = 'https://authserver.mojang.com/authenticate' self.jsonheaders = { "Content-Type": "application/json", 'Pragma': 'no-cache' } self.secureurl = 'https://api.mojang.com/user/security/challenges' self.lunarr = compile( r'premium-box\">\n.*<span class=.*>\n(.*)\n</span>') self.veltrank = compile(r'<h2 style=\"color: .*\">(.*)</h2>') self.rankhv = compile(r'class=\"rank.*\">(.*)<') self.levelmp = compile(r'>Level (.*)</b>') self.rankmp = compile(r'class=\"www-mp-rank\".*>(.*)</span>') self.debug = OxygenX.debug self.savebad = OxygenX.save_bad self.hypl = OxygenX.Level.hypixel self.hypr = OxygenX.Rank.hypixel_rank self.mpl = OxygenX.Level.mineplex self.mpr = OxygenX.Rank.mineplex_rank self.liquidcape = OxygenX.Cape.liquidbounce self.hypminl = OxygenX.Level.hypixel_level if self.liquidcape: capesz = str(self.liquidbounce()) if self.liquidcape: self.lbcape = capesz else: self.liquidcape = False self.proxylist = OxygenX.Proxy.proxylist self.proxy_type = OxygenX.Proxy.type windll.kernel32.SetConsoleTitleW( f'OxygenX-{self.version} | by ShadowOxygen') self.t = f'''{Fore.LIGHTCYAN_EX}________ ____ ___ \_____ \ ___ ______.__. ____ ____ ____ \ \/ / / | \\\ \/ < | |/ ___\_/ __ \ / \ \ / / | \> < \___ / /_/ > ___/| | \/ \\ \_______ /__/\_ \/ ____\___ / \___ >___| /___/\ \\ \/ \/\/ /_____/ \/ \/ \_/ \n''' if OxygenX.version_check: try: gitversion = str( get(url= "https://raw.githubusercontent.com/ShadowBlader/OxygenX/master/version.txt" ).text) if f'{self.version}\n' != gitversion: print(self.t) print(f"{Fore.LIGHTRED_EX}Your version is outdated.") print( f"Your version: {self.version}\nLatest version: {gitversion}\nGet latest version in the link below" ) print( f"https://github.com/ShadowOxygen/OxygenX/releases\nStarting in 5 seconds...{Fore.LIGHTCYAN_EX}" ) sleep(5) clear() except Exception as e: if self.debug: print(f'\nError for updating checking:\n {e}\n') pass try: self.announcement = get( url= 'https://raw.githubusercontent.com/ShadowOxygen/OxygenX/master/announcement' ).text except Exception as e: if self.debug: print(f'{Fore.LIGHTRED_EX}Error with announcement: {e}') self.announcement = '' pass print(self.t) if OxygenX.Proxy.proxy and not OxygenX.Proxy.proxy_use_api: while True: try: self.proxylist = open(self.proxylist, 'r', encoding='u8', errors='ignore').read().split('\n') print(Fore.LIGHTCYAN_EX) break except FileNotFoundError: print( f'{Fore.LIGHTRED_EX}{self.proxylist} not found, Please make sure {self.proxylist} is in folder' ) self.proxylist = input( 'Please type the correct proxies file name: ') continue elif OxygenX.Proxy.proxy_use_api and OxygenX.Proxy.proxy: while True: try: self.proxylist = [ x.strip() for x in get( url=OxygenX.Proxy.proxy_api).text.splitlines() if ':' in x ] if OxygenX.Proxy.refresh_api > 30: Thread(target=self.refresh_api_link, daemon=True).start() break except Exception as e: if self.debug: print( f'{Fore.LIGHTRED_EX}Error connecting with api link: {e}\n' ) print( f'{Fore.LIGHTRED_EX}Proxy Api link down or Connection Error\nPlease check your connection or make sure you entered the correct api link\n\nClosing program in 6 seconds...' ) sleep(6) exit() while True: file = input( "Please Enter Combolist Name (Please include extension name, Example: combolist.txt): " ) try: self.combolist = open(file, 'r', encoding='u8', errors='ignore').read().split('\n') break except FileNotFoundError: print( f'\n{Fore.LIGHTRED_EX}File not found, please try again.{Fore.LIGHTCYAN_EX}\n' ) continue print('Starting OxygenX...') self.dictorary = open('dictionary.txt', 'a+', errors='ignore').read() unix = str(strftime('[%d-%m-%Y %H-%M-%S]')) self.folder = f'results/{unix}' if not path.exists('results'): mkdir('results') if not path.exists(self.folder): mkdir(self.folder) self.accounts = [x for x in self.combolist if ':' in x] Thread(target=self.prints, daemon=True).start() Thread(target=self.writecap, daemon=True).start() Thread(target=self.save_hits, daemon=True).start() Thread(target=cpm_counter, daemon=True).start() if self.savebad: Thread(target=self.save_bad, daemon=True).start() pool = ThreadPool(processes=OxygenX.threads) clear() Thread(target=self.title, daemon=True).start() print(self.t) print(self.announcement) pool.imap(func=self.prep, iterable=self.accounts) pool.close() pool.join() while True: if int(self.printing.qsize() and self.caputer.qsize() and self.bad.qsize() and self.hits.qsize()) == 0: sleep(1) print( f'{Fore.LIGHTGREEN_EX}\n\nResults: \n' f'Hits: {Counter.hits}\n' f'Bad: {Counter.bad}\n' f'Demo: {Counter.demo}\n' f'Secured: {Counter.nfa}\n' f'Unsecured: {Counter.sfa}\n' f'Email Access: {Counter.emailaccess}\n' f'Unmigrated: {Counter.unfa}\n' f'NoHypixel Login accounts: {Counter.nohypixel}\n' f'NoMineplex Login accounts: {Counter.nomineplex}\n' f'Mojang/Minecon cape: {Counter.mojang}\n' f'Optifine cape: {Counter.optifine}\n' f'Labymod cape: {Counter.labymod}\n' f'LiquidBounce cape: {Counter.liquidbounce}\n' f'Hypixel Ranked accounts: {Counter.hypixelrank}\n' f'Mineplex Ranked accounts: {Counter.mineplexrank}\n' f'HiveMC Ranked accounts: {Counter.hivemcrank}\n' f'Veltpvp Ranked accounts: {Counter.veltrank}\n' f'Lunar Ranked accounts: {Counter.lunarrank}\n' f'Hypixel {self.hypminl}+ accounts: {Counter.hypixelhl}\n' f'Mineplex {OxygenX.Level.mineplex_level}+ accounts: {Counter.mineplexhl}\n' f'\n{now_time()}{Fore.LIGHTMAGENTA_EX}Finished checking\n{Fore.LIGHTRED_EX}' ) input('[Exit] You can now close OxygenX...') break
def api_request(oformat, stream, params, yr, mntlist, tstep, back): """ Build a list of CDSapi requests based on arguments Call do_request to submit them and start parallel download If download successful, compress file and move to era5/netcdf """ # open connection to era5 files db conn = db_connect(cfg) # create empty list to store cdsapi requests rqlist = [] # list of faster ips to alternate ips = cfg['altips'] users = cfg['users'] i = 0 # list of years when ERA5.1 should be donwloaded instead of ERA5 era51 = [str(y) for y in range(2000, 2007)] if mntlist == []: mntlist = ["%.2d" % i for i in range(1, 13)] # retrieve stream arguments dsargs = define_args(stream, tstep) era5log.debug(f'Stream attributes: {dsargs}') # get variables details from json file vardict = read_vars(stream) # define params to download if params == []: params = dsargs['params'] era5log.debug(f'Params: {params}') # according to ECMWF, best to loop through years and months and do either multiple # variables in one request, or at least loop through variables in the innermost loop. for y in yr: era5log.debug(f'Year: {y}') # change dsid if pressure and year between 2000 and 2006 included mars = False if y in era51 and stream == 'pressure': era5log.debug(f'Submitting using mars for ERA5.1') mars = True dsargs = define_args(stream + "51", tstep) dsargs['dsid'] = 'reanalysis-era5.1-complete' # build Copernicus requests for each month and submit it using cdsapi modified module for mn in mntlist: era5log.debug(f'Month: {mn}') # for each output file build request and append to list # loop through params and months requested for varp in params: era5log.debug(f'Param: {varp}') queue, var, cdsname = define_var(vardict, varp, era5log) # if grib code exists but cds name is not defined skip var and print warning if not queue: continue # create list of filenames already existing for this var and yr nclist = [] sql = "select filename from file where location=?" tup = (f"{stream}/{var}/{y}", ) if tstep == 'mon': tup = (f"{stream}/{var}/monthly", ) nclist += query(conn, sql, tup) era5log.debug(nclist) stagedir, destdir, fname, daylist = target( stream, var, y, mn, dsargs, tstep, back, oformat) # if file already exists in datadir then skip if file_exists(fname, nclist): era5log.info(f'Skipping {fname} already exists') continue if mars: rdict = build_mars(dsargs, y, mn, varp, oformat, tstep, back) else: rdict = build_dict(dsargs, y, mn, cdsname, daylist, oformat, tstep, back) rqlist.append( (dsargs['dsid'], rdict, os.path.join(stagedir, fname), os.path.join(destdir, fname), ips[i % len(ips)], users[i % len(users)])) # progress index to alternate between ips and users i += 1 era5log.info(f'Added request for {fname}') if back: era5log.debug(f'Breaking cycle back is True') break era5log.debug(f'{rqlist}') # parallel downloads if len(rqlist) > 0: # set num of threads = number of params, or use default from config if len(params) > 1: nthreads = len(params) else: nthreads = cfg['nthreads'] pool = ThreadPool(nthreads) results = pool.imap(do_request, rqlist) pool.close() pool.join() else: era5log.info('No files to download!') era5log.info('--- Done ---')
gf.write(json.dumps(neighb_features)) # Check if path isn't already created, otherwise create directories and topojson for neighb in neighborhoods: if not os.path.exists(BASE_DIR + "/data/{0}".format(neighb)): subprocess.call(["mkdir", "-p", "data/{0}".format(neighb)]) # Remove any hyphens to match topojson file naming conventions rn = re.sub('[^a-z]+', '', neighb) topo_call = topo_url.format(neighb, rn) geo_call = geo_url.format(neighb) wget_topo = "wget -O data/{0}/{0}.topojson {1}".format( neighb, topo_call) wget_geo = "wget -O data/{0}/{0}.geojson {1}".format(neighb, geo_call) calls.extend((wget_topo, wget_geo)) # Run wget calls for GeoJSON in multiple processes to speed up pool = Pool(4) for i, returncode in enumerate( pool.imap(partial(subprocess.call, shell=True), calls)): if returncode != 0: errors.append(calls[i]) # List comprehension to clean all GeoJSON files and create master file [clean_geojson(n) for n in neighborhoods] with open("dna_neighborhoods.geojson", "w") as gf: gf.write(json.dumps(geoj_features))
def download_s3_files(s3_links_arr, output_dir, log_dir, pool_size=1): """ """ bad_download = [] commands = [] success_log = os.path.join(log_dir, 'successful_downloads.txt') failed_log = os.path.join(log_dir, 'failed_downloads.txt') only_one_needed = [ "CHANGES", "dataset_description.json", "README", "task-MID_bold.json", "task-nback_bold.json", "task-rest_bold.json", "task-SST_bold.json", "Gordon2014FreeSurferSubcortical_dparc.dlabel.nii", "HCP2016FreeSurferSubcortical_dparc.dlabel.nii", "Markov2012FreeSurferSubcortical_dparc.dlabel.nii", "Power2011FreeSurferSubcortical_dparc.dlabel.nii", "Yeo2011FreeSurferSubcortical_dparc.dlabel.nii" ] only_one_tuple = list(zip([0] * len(only_one_needed), only_one_needed)) if os.path.isfile(success_log): with open(success_log) as f: success_set = set(f.readlines()) else: success_set = set() download_set = set() print('Creating unique download list...') for s3_link in s3_links_arr: if s3_link[:4] != 's3:/': s3_path = 's3:/' + s3_link else: s3_path = s3_link dest = os.path.join(output_dir, '/'.join(s3_path.split('/')[4:])) skip = False for i, only_one_pair in enumerate(only_one_tuple): only_one_count = only_one_pair[0] only_one = only_one_pair[1] if only_one in s3_path: if only_one_count == 0: only_one_tuple[i] = (1, only_one) else: skip = True break if not skip and s3_path not in success_set: # Check if the filename already in the success log dest = os.path.join(output_dir, '/'.join(s3_path.split('/')[4:])) if not os.path.isfile(dest): download_set.add((s3_path, dest)) # make unique s3 downloads print('Creating download commands...') for s3_path, dest in sorted(download_set, key=lambda x: x[1]): commands.append(' ; '.join([ "mkdir -p " + os.path.dirname(dest), "aws s3 cp " + s3_path + " " + dest + " --profile NDA" ])) if pool_size == 1: print('\nDownloading files serially...') elif pool_size > 1: print('\nParallel downloading with %d core(s)...' % pool_size) elif pool_size < 1: print( '\nCannot download with less than 1 core. Try changing your "-p" argument. Quitting...' ) sys.exit() pool = Pool(pool_size) # pool_size concurrent commands at a time for i, returncode in enumerate( pool.imap(partial(call, shell=True), commands)): s3_path = re.search('.+aws\ s3\ cp\ (s3://.+)\ ' + output_dir + '.+', commands[i]).group(1) if returncode == 0: with open(success_log, 'a+') as s: s.write(s3_path + '\n') else: print("Command failed: {}".format(commands[i])) bad_download.append(s3_path) with open(failed_log, 'a+') as f: f.write(s3_path + '\n') bad_download.append(commands[i]) pool.close() return bad_download
def load_index(fname): print("loading", fname) try: index = faiss.read_index( fname, faiss.IO_FLAG_MMAP | faiss.IO_FLAG_READ_ONLY) except RuntimeError as e: print('could not load %s: %s' % (fname, e)) return fname, None print(" %d entries" % index.ntotal) return fname, index index0 = None for _, index in pool.imap(load_index, args.inputs): if index is None: continue index_ivf = faiss.extract_index_ivf(index) il = faiss.downcast_InvertedLists(index_ivf.invlists) index_ivf.invlists = None il.this.own() ils_dont_dealloc.append(il) if (args.l0, args.l1) != (0, -1): print('restricting to lists %d:%d' % (args.l0, args.l1)) # il = faiss.SliceInvertedLists(il, args.l0, args.l1) il.crop_invlists(args.l0, args.l1) ils_dont_dealloc.append(il) ils.push_back(il)
def update(self, no_jobs=None): pool = Pool(len(self.discs)) if no_jobs == None else Pool(no_jobs) for _ in pool.imap(lambda disc: disc.update(), self.discs): pass
def thread(function, dataFeed): # Creates pool of 8 workers pool = Pool(8) # Returns generator of item yeilded from each threads production return (j for i in pool.imap(function, dataFeed) for j in i)
#print("3 cores in use") else: if count==2: pool = Pool(2); command_it.append(command[k]);command_it.append(command[k+1]); k=k+2; count=count-2; # two concurrent commands at a time #print("2 cores in use") else: if count==1: pool = Pool(1) # one concurrent commands if the number of files in the folder is impar command_it.append(command[k]) count=count-1 #print("1 core in use") #RUNNING PARALLEL PROCESS for i, returncode in enumerate(pool.imap(partial(subprocess.call, shell=True), command_it)): if returncode != 0: print("%d command failed: %d" % (i, returncode)) command_it=[]; # close the pool and wait for the work to finish pool.close() pool.join() t1_stop = time.perf_counter() t2_stop = time.process_time() print("--------------------------------------------------") print("Elapsed time: %.1f [sec]" % ((t1_stop-t1_start))) print("CPU process time: %.1f [sec]" % ((t2_stop-t2_start))) print("--------------------------------------------------")
lambda path: (path.endswith(".pdf") or path.endswith(".html") or path.endswith( ".htm") or path.endswith(".doc") or path.endswith(".txt")) and (not bool(re.search('[A-Z]+ [0-9]$', path.strip())) ) and path not in finished_path_upload, bill_paths)) remaining_files = check_for_missing_files(state_path, bill_paths) print("{0} remaining files for {1}".format(len(remaining_files), state)) if len(remaining_files) > 0: commands = [dropbox_upload(path) for path in remaining_files] pool = Pool(4) # go fast the first time for i, returncode in enumerate( pool.imap(partial(call, shell=True, stdin=PIPE), commands)): if returncode != 0: print("%d command failed: %d" % (i, returncode)) print("starting second check for missing files") still_remaining_files = check_for_missing_files(state_path, bill_paths) print("{0} remaining files for {1}".format(len(still_remaining_files), state)) # pool.map(dropbox_upload, bill_paths) if len(still_remaining_files) > 0: commands = [dropbox_upload(path) for path in still_remaining_files] pool = Pool(4) # go slower to be safe for i, returncode in enumerate(
pass elif course == 'MINMAX0': nversions = [v for v in nversions if re.match(r'^.*0$', v)] elif course == 'MINMAX00': nversions = [v for v in nversions if re.match(r'^.*00$', v)] else: raise Exception('Unknown course %s' % course) versions = nversions else: raise Exception('Unknown course %s' % course) def runversion(v): if v in exclude: return '[%s][skip][Unsupported]\n' % v cmd = '/home/jeremy/Projects/vimrc-test/sh/run.sh %s /home/jeremy/vim' % v return subprocess.run(cmd, capture_output=True, text=True, shell=True).stdout print('%d versions in course %s' % (len(versions), course)) starttime = time.time() p = Pool(int(os.environ['NUM_THREADS'])) for output in p.imap(runversion, versions): print(output, end='', flush=True) elapsed = time.time() - starttime print('Finished in %dw %dd %dh %dm %ds' % (elapsed / 604800, (elapsed % 604800) / 86400, (elapsed % 86400) / 3600, (elapsed % 3600) / 60, elapsed % 60))
from global_variables import * report_step = 100 if __name__ == '__main__': if not os.path.exists(g_lfd_images_folder): os.mkdir(g_lfd_images_folder) shape_list = [line.strip().split(' ') for line in open(g_shape_list_file, 'r')] print(len(shape_list), 'shapes are going to be rendered!') print('Generating rendering commands...', end = '') commands = [] for shape_property in shape_list: shape_synset = shape_property[0] shape_md5 = shape_property[1] shape_file = os.path.join(g_shapenet_root_folder, shape_synset, shape_md5, 'model.obj') command = '%s ../blank.blend --background --python render_lfd_single_shape.py -- %s %s %s > /dev/null 2>&1' % (g_blender_executable_path, shape_file, shape_synset, shape_md5) #command = '%s ../blank.blend --background --python render_lfd_single_shape.py -- %s %s %s ' % (g_blender_executable_path, shape_file, shape_synset, shape_md5) commands.append(command) print('done(%d commands)'%(len(commands))) print('Rendering, it takes long time...') pool = Pool(g_lfd_rendering_thread_num) for idx, return_code in enumerate(pool.imap(partial(call, shell=True), commands)): if idx % report_step == 0: print('[%s] Rendering command %d of %d' % (datetime.datetime.now().time(), idx, len(shape_list))) if return_code != 0: print('Rendering command %d of %d (\"%s\") failed' % (idx, len(shape_list), commands[idx]))