def run_domain_checks(rounded_time, env, output, pool): # Get the list of domains we handle mail for. mail_domains = get_mail_domains(env) # Get the list of domains we serve DNS zones for (i.e. does not include subdomains). dns_zonefiles = dict(get_dns_zones(env)) dns_domains = set(dns_zonefiles) # Get the list of domains we serve HTTPS for. web_domains = set(get_web_domains(env)) domains_to_check = mail_domains | dns_domains | web_domains # Get the list of domains that we don't serve web for because of a custom CNAME/A record. domains_with_a_records = get_domains_with_a_records(env) ssl_certificates = get_ssl_certificates(env) # Serial version: #for domain in sort_domains(domains_to_check, env): # run_domain_checks_on_domain(domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains) # Parallelize the checks across a worker pool. args = ((domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains, domains_with_a_records, ssl_certificates) for domain in domains_to_check) ret = pool.starmap(run_domain_checks_on_domain, args, chunksize=1) ret = dict(ret) # (domain, output) => { domain: output } for domain in sort_domains(ret, env): ret[domain].playback(output)
def run_domain_checks(rounded_time, env, output, pool): # Get the list of domains we handle mail for. mail_domains = get_mail_domains(env) # Get the list of domains we serve DNS zones for (i.e. does not include subdomains). dns_zonefiles = dict(get_dns_zones(env)) dns_domains = set(dns_zonefiles) # Get the list of domains we serve HTTPS for. web_domains = set(get_web_domains(env) + get_default_www_redirects(env)) domains_to_check = mail_domains | dns_domains | web_domains # Serial version: # for domain in sort_domains(domains_to_check, env): # run_domain_checks_on_domain(domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains) # Parallelize the checks across a worker pool. args = ( (domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains) for domain in domains_to_check ) ret = pool.starmap(run_domain_checks_on_domain, args, chunksize=1) ret = dict(ret) # (domain, output) => { domain: output } for domain in sort_domains(ret, env): ret[domain].playback(output)
def starmap_parallel(function, values, number_of_processes=None, chunksize=1): assert callable(function) logger.debug('Creating multiprocessing pool with {} processes and chunksize {}.'.format(number_of_processes, chunksize)) with multiprocessing.pool.Pool(processes=number_of_processes) as pool: results = pool.starmap(function, values, chunksize=chunksize) results = tuple(results) logger.debug('Parallel calculation with {} results completed.'.format(len(results))) return results
def run_services_checks(env, output, pool): # Check that system services are running. all_running = True fatal = False ret = pool.starmap(check_service, ((i, service, env) for i, service in enumerate(get_services())), chunksize=1) for i, running, fatal2, output2 in sorted(ret): if output2 is None: continue # skip check (e.g. no port was set, e.g. no sshd) all_running = all_running and running fatal = fatal or fatal2 output2.playback(output) if all_running: output.print_ok("All system services are running.") return not fatal
def train(seed, TrainingSet_Array, Labels_Array, List_TimeBatch, max_epoch, nTraj): #seed List_TimeOnline = [] List_RewardOnline = [] List_STDOnline = [] List_LikelihoodOnline = [] List_TimeLikelihoodOnline = [] #given seed, trajectories Time_array_online = np.empty((0)) RewardOnline_array = np.empty((0)) STDOnline_array = np.empty((0)) Likelihood_online_list = [] time_likelihood_online_list =[] TrainingSet_tot = TrainingSet_Array[:, :] Labels_tot = Labels_Array[:, :] TimeBatch = List_TimeBatch[0] pool = multiprocessing.Pool(processes=3) args = [(i, nTraj, TrainingSet_tot, Labels_tot, TimeBatch, seed) for i in range(len(nTraj))] givenSeed_training_results = pool.starmap(DifferentTrainingSet, args) pool.close() pool.join() for i in range(len(nTraj)): Time_array_online = np.append(Time_array_online, givenSeed_training_results[i][0]) Likelihood_online_list.append(givenSeed_training_results[i][1]) time_likelihood_online_list.append(givenSeed_training_results[i][2]) RewardOnline_array = np.append(RewardOnline_array, givenSeed_training_results[i][3]) STDOnline_array = np.append(STDOnline_array, givenSeed_training_results[i][4]) List_TimeOnline.append(Time_array_online) List_RewardOnline.append(RewardOnline_array) List_STDOnline.append(STDOnline_array) List_LikelihoodOnline.append(Likelihood_online_list) List_TimeLikelihoodOnline.append(time_likelihood_online_list) return List_TimeOnline, List_RewardOnline, List_STDOnline, List_LikelihoodOnline, List_TimeLikelihoodOnline
def prop_coad(ref_dir, make_fits=False): """ Proper coaddition function """ if len(ref_dir) == 1: F = glob.glob(ref_dir[0] + '/*.fits') # collect images you want to coadd print(F) else: F = ref_dir print(F) psf_dat, psf_hed, sexcat1, psfcat1 = get_psf(F[0]) pool = multiprocessing.Pool(len(F) - 1) tmp_array = pool.starmap(coad_func, [(F[i + 1], psfcat1, sexcat1) for i in range(len(F) - 1)]) Nomin = sum(x[0] for x in tmp_array) Denom = sum(x[1] for x in tmp_array) Denom = np.sqrt(Denom) if np.any(Denom == 0): print('ZEROS') R_hat = Nomin / Denom R = np.real(fft.ifft2(R_hat)) subprocess.call(['rm', sexcat1, psfcat1, sexcat1.replace('.fits', '.psf')]) if make_fits == True: hed = fits.getheader(F[0]) hed['COMMENT'] = 'ZO coaddition from ZiP' hed['COMMENT'] = 'List of stacked fits' hed['COMMENT'] = ', '.join(F) fits.writeto(F[0].replace('.fits', '_COAD.fits'), R, hed, overwrite=True) return (F[0].replace('.fits', '_COAD.fits'), R) else: return (R)
def run_services_checks(env, output, pool): # Check that system services are running. services = [ { "name": "Local DNS (bind9)", "port": 53, "public": False, }, #{ "name": "NSD Control", "port": 8952, "public": False, }, { "name": "Local DNS Control (bind9/rndc)", "port": 953, "public": False, }, { "name": "Dovecot LMTP LDA", "port": 10026, "public": False, }, { "name": "Postgrey", "port": 10023, "public": False, }, { "name": "Spamassassin", "port": 10025, "public": False, }, { "name": "OpenDKIM", "port": 8891, "public": False, }, { "name": "OpenDMARC", "port": 8893, "public": False, }, { "name": "Memcached", "port": 11211, "public": False, }, { "name": "Sieve (dovecot)", "port": 4190, "public": False, }, { "name": "Mail-in-a-Box Management Daemon", "port": 10222, "public": False, }, { "name": "SSH Login (ssh)", "port": get_ssh_port(), "public": True, }, { "name": "Public DNS (nsd4)", "port": 53, "public": True, }, { "name": "Incoming Mail (SMTP/postfix)", "port": 25, "public": True, }, { "name": "Outgoing Mail (SMTP 587/postfix)", "port": 587, "public": True, }, #{ "name": "Postfix/master", "port": 10587, "public": True, }, { "name": "IMAPS (dovecot)", "port": 993, "public": True, }, { "name": "HTTP Web (nginx)", "port": 80, "public": True, }, { "name": "HTTPS Web (nginx)", "port": 443, "public": True, }, ] all_running = True fatal = False ret = pool.starmap(check_service, ((i, service, env) for i, service in enumerate(services)), chunksize=1) for i, running, fatal2, output2 in sorted(ret): if output2 is None: continue # skip check (e.g. no port was set, e.g. no sshd) all_running = all_running and running fatal = fatal or fatal2 output2.playback(output) if all_running: output.print_ok("All system services are running.") return not fatal
def run_services_checks(env, output, pool): # Check that system services are running. services = [ { "name": "Local DNS (bind9)", "port": 53, "public": False, }, #{ "name": "NSD Control", "port": 8952, "public": False, }, { "name": "Local DNS Control (bind9/rndc)", "port": 953, "public": False, }, { "name": "Dovecot LMTP LDA", "port": 10026, "public": False, }, { "name": "Postgrey", "port": 10023, "public": False, }, { "name": "Spamassassin", "port": 10025, "public": False, }, { "name": "OpenDKIM", "port": 8891, "public": False, }, { "name": "OpenDMARC", "port": 8893, "public": False, }, { "name": "Memcached", "port": 11211, "public": False, }, { "name": "Mail-in-a-Box Management Daemon", "port": 10222, "public": False, }, { "name": "SSH Login (ssh)", "port": get_ssh_port(), "public": True, }, { "name": "Public DNS (nsd4)", "port": 53, "public": True, }, { "name": "Incoming Mail (SMTP/postfix)", "port": 25, "public": True, }, { "name": "Outgoing Mail (SMTP 587/postfix)", "port": 587, "public": True, }, #{ "name": "Postfix/master", "port": 10587, "public": True, }, { "name": "IMAPS (dovecot)", "port": 993, "public": True, }, { "name": "Mail Filters (Sieve/dovecot)", "port": 4190, "public": True, }, { "name": "HTTP Web (nginx)", "port": 80, "public": True, }, { "name": "HTTPS Web (nginx)", "port": 443, "public": True, }, ] all_running = True fatal = False ret = pool.starmap(check_service, ((i, service, env) for i, service in enumerate(services)), chunksize=1) for i, running, fatal2, output2 in sorted(ret): if output2 is None: continue # skip check (e.g. no port was set, e.g. no sshd) all_running = all_running and running fatal = fatal or fatal2 output2.playback(output) if all_running: output.print_ok("All system services are running.") return not fatal
def mkt_on_list(glist, data_df, pops=None, tests=None, cutoffs=None, do_trims=None, bootstrap=None, b_reps=None): if do_trims is None: do_trims = [True, False] if cutoffs is None: cutoffs = [0.05, 0.15] if tests is None: tests = ['eMKT', 'aMKT'] if pops is None: pops = ['AFR', 'EUR'] if bootstrap is None: bootstrap = False if b_reps is None: b_reps = 100 df = data_df[data_df['id'].isin(glist)] pars = [] for pop in pops: subdata = df[df['pop'] == pop] if bootstrap: pars.append((subdata, pop, tests, cutoffs, do_trims, b_reps)) else: pars.append((subdata, pop, tests, cutoffs, do_trims)) func = bootstrap_on_subdata if bootstrap else mkt_on_subdata # Loads the models for all the parameters parsed using multiprocessing to speed up computations pool = MyPool(processes=2) # multiprocessing.cpu_count()) results_list = pool.starmap(func, pars) pool.terminate() results = pd.concat(results_list, axis=0, ignore_index=True) return results
def run_domain_checks(rounded_time, env, output, pool): # Get the list of domains we handle mail for. mail_domains = get_mail_domains(env) # Get the list of domains we serve DNS zones for (i.e. does not include subdomains). dns_zonefiles = dict(get_dns_zones(env)) dns_domains = set(dns_zonefiles) # Get the list of domains we serve HTTPS for. web_domains = set(get_web_domains(env)) domains_to_check = mail_domains | dns_domains | web_domains # Remove "www", "autoconfig", "autodiscover", and "mta-sts" subdomains, which we group with their parent, # if their parent is in the domains to check list. domains_to_check = [ d for d in domains_to_check if not ( d.split(".", 1)[0] in ("www", "autoconfig", "autodiscover", "mta-sts") and len(d.split(".", 1)) == 2 and d.split(".", 1)[1] in domains_to_check ) ] # Get the list of domains that we don't serve web for because of a custom CNAME/A record. domains_with_a_records = get_domains_with_a_records(env) # Serial version: #for domain in sort_domains(domains_to_check, env): # run_domain_checks_on_domain(domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains) # Parallelize the checks across a worker pool. args = ((domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains, domains_with_a_records) for domain in domains_to_check) ret = pool.starmap(run_domain_checks_on_domain, args, chunksize=1) ret = dict(ret) # (domain, output) => { domain: output } for domain in sort_domains(ret, env): ret[domain].playback(output)
def mkt_on_subdata(subdata, pop=None, tests=None, cutoffs=None, do_trims=None): if do_trims is None: do_trims = [True, False] if cutoffs is None: cutoffs = [0.05, 0.15] if tests is None: tests = ['eMKT', 'aMKT'] nogenes = len(subdata.index.values) if nogenes <= 0: results = pd.DataFrame(index=[0]) else: if 'aMKT' in tests: daf_cum, div = makeSfs(subdata, cum=True) if 'eMKT' in tests: daf, div = makeSfs(subdata, cum=False) pars = [] for test in tests: if test == 'eMKT': for cutoff in cutoffs: pars.append([daf, div, test, cutoff]) elif test == 'aMKT': for do_trim in do_trims: pars.append((daf_cum, div, test, do_trim)) # Loads the models for all the parameters parsed using multiprocessing to speed up computations pool = MyPool(processes=2) # multiprocessing.cpu_count()) results_list = pool.starmap(mkt_on_daf, pars) pool.terminate() results = pd.concat(results_list, axis=0, ignore_index=True) if pop is not None: results['pop'] = pop results['nogenes'] = nogenes return results
def main(argv): opts = get_parser().parse_args(argv) build_test_cases = ( #(sysroot path, target triple, debug/release, should test?) (opts.arm_sysroot, ARM_TRIPLE, "debug", False, opts.clean), (opts.arm_sysroot, ARM_TRIPLE, "release", False, opts.clean), (opts.aarch64_sysroot, AARCH64_TRIPLE, "debug", False, opts.clean), (opts.aarch64_sysroot, AARCH64_TRIPLE, "release", False, opts.clean), (opts.x86_64_sysroot, X86_64_TRIPLE, "debug", False, opts.clean), (opts.x86_64_sysroot, X86_64_TRIPLE, "release", False, opts.clean), (opts.x86_64_sysroot, X86_64_TRIPLE, "debug", True, opts.clean), (opts.x86_64_sysroot, X86_64_TRIPLE, "release", True, opts.clean), ) os.chdir(os.path.dirname(sys.argv[0])) pool = multiprocessing.pool.Pool(len(build_test_cases)) results = pool.starmap(check_build, build_test_cases, 1) print('---') print('build test summary:') for test_case, result in zip(build_test_cases, results): _, triple, kind, test_it, _ = test_case title = '%s_%s' % (triple.split('-')[0], kind) if test_it: title += "_test" result_color = FAIL_COLOR if result == 'pass': result_color = PASS_COLOR display_size = '' if result == 'pass' and kind == 'release' and not test_it: display_size = get_stripped_size(triple) + ' stripped binary' print('%20s: %s%15s%s %s' % (title, result_color, result, END_COLOR, display_size))
def run_domain_checks(rounded_time, env, output, pool): # Get the list of domains we handle mail for. mail_domains = get_mail_domains(env) # Get the list of domains we serve DNS zones for (i.e. does not include subdomains). dns_zonefiles = dict(get_dns_zones(env)) dns_domains = set(dns_zonefiles) # Get the list of domains we serve HTTPS for. web_domains = set(get_web_domains(env) + get_default_www_redirects(env)) domains_to_check = mail_domains | dns_domains | web_domains # Serial version: #for domain in sort_domains(domains_to_check, env): # run_domain_checks_on_domain(domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains) # Parallelize the checks across a worker pool. args = ((domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains) for domain in domains_to_check) ret = pool.starmap(run_domain_checks_on_domain, args, chunksize=1) ret = dict(ret) # (domain, output) => { domain: output } for domain in sort_domains(ret, env): ret[domain].playback(output)
def mkt_on_df(gene_df, data_df, label=None, pops=None, tests=None, cutoffs=None, do_trims=None, bootstrap=None, b_size=None, b_reps=None): if do_trims is None: do_trims = [True, False] if cutoffs is None: cutoffs = [0.05, 0.15] if tests is None: tests = ['eMKT', 'aMKT'] if pops is None: pops = ['AFR', 'EUR'] if bootstrap is None: bootstrap = False if b_reps is None: b_reps = 100 pars = [(gene_df.iloc[:, i], data_df, pops, tests, cutoffs, do_trims, bootstrap, b_size, b_reps) for i in range(len(gene_df.columns.values))] # Loads the models for all the parameters parsed using multiprocessing to speed up computations pool = MyPool(processes=8) # multiprocessing.cpu_count()) results_list = pool.starmap(mkt_on_col, pars) pool.terminate() results = pd.concat(results_list, axis=0, ignore_index=True) if label is not None: results['label'] = label return results
def do_work(c_id__c_genes, odir, faa_files, ffn_files, cores=1): # NOTE had no fnn_files until 2016.09.28 centroid = c_id__c_genes[0] # key c_genes = c_id__c_genes[1] # values genes_faa = os.path.join(odir,'%s.faa' % centroid) #genes_ffn = os.path.join(odir,'%s.ffn' % centroid) genes_faa_msa="%s.msa" % genes_faa #genes_ffn_msa="%s.msa" % genes_ffn genes_faa_hmm="%s.hmm" % genes_faa #genes_ffn_hmm="%s.hmm" % genes_ffn if os.path.isfile(genes_faa_hmm) and os.path.isfile(genes_ffn_hmm): sys.stdout.write('SKIP: %s: HMMs exist: %s, %s\n' % (centroid, genes_faa_hmm, genes_ffn_hmm)) return # protein sequences prot_seqs = {} # FAA (protein sequences) pool = Pool(cores) # create pool for parallel computing pool_iter = itertools.product(faa_files, [c_genes]) # iterable for the pool results = pool.starmap( scan_faa , pool_iter ) # perform parallel computing pool.close(); pool.join() # wait until all finished and close the pool for records in results: assert all( [ r_id not in prot_seqs for r_id in records.keys() ] ) # TEST prot_seqs.update( records ) # FFN (translate nucl. sequences) remaining = c_genes.difference( prot_seqs.keys() ) if len(remaining) > 0: sys.stdout.write('\tINFO: for %d genes need to scan ffn files\n' % len(remaining)) pool = Pool(cores) # create pool for parallel computing pool_iter = itertools.product(ffn_files, [remaining], [True]) # iterable for the pool NOTE added True for translation 2016.09.28 results = pool.starmap( scan_ffn , pool_iter ) # perform parallel computing NOTE had scan_faa until 2016.09.28 pool.close(); pool.join() # wait until all finished and close the pool for records in results: assert all( [ r_id not in prot_seqs for r_id in records.keys() ] ) # TEST prot_seqs.update( records ) assert all([ r_id in c_genes for r_id in prot_seqs.keys() ]), ';'.join( list(set(prot_seqs.keys()).difference( c_genes )) ) # TEST NOTE changed to prot_seqs - c_genes since 2016.09.28 assert all([ r_id in prot_seqs.keys() for r_id in c_genes ]), ';'.join( list(c_genes.difference( prot_seqs.keys() )) ) # TEST NOTE New, since 2016.09.28 ## nucl. sequences #nucl_seqs = {} #pool = Pool(cores) # create pool for parallel computing #pool_iter = itertools.product(ffn_files, [c_genes], [False]) # iterable for the pool #results = pool.starmap( scan_ffn , pool_iter ) # perform parallel computing #pool.close(); pool.join() # wait until all finished and close the pool #for records in results: #assert all( [ r_id not in nucl_seqs for r_id in records.keys() ] ) # TEST #nucl_seqs.update( records ) #assert all( [r_id in c_genes for r_id in nucl_seqs.keys()] ), ';'.join( list(c_genes.difference( nucl_seqs.keys() )) ) # TEST # write FASTA with open(genes_faa, "w") as f: for c_gene in c_genes: check_seq(c_gene,prot_seqs[ c_gene ].seq,'prot') SeqIO.write(prot_seqs[ c_gene ],f, "fasta") #with open(genes_ffn, "w") as f: #for c_gene in c_genes: #check_seq(c_gene,nucl_seqs[ c_gene ].seq,'nucl') #SeqIO.write(nucl_seqs[ c_gene ],f, "fasta") # run MUSCLE cmd, cmd_stdout, cmd_status = run_muscle(ifile=genes_faa, params=__MUSCLE_PARAMS_PROT__) assert cmd_status == 0, '%s: %d: %s' % (cmd, cmd_status, cmd_stdout) # TEST #cmd, cmd_stdout, cmd_status = run_muscle(ifile=genes_ffn) #assert cmd_status == 0, '%s: %d: %s' % (cmd, cmd_status, cmd_stdout) # TEST # build HMM cmd, cmd_stdout, cmd_status = run_hmmbuild(ifile=genes_faa_msa, alphabet='--amino') assert cmd_status == 0, '%s: %d: %s' % (cmd, cmd_status, cmd_stdout) # TEST #cmd, cmd_stdout, cmd_status = run_hmmbuild(ifile=genes_ffn_msa, alphabet='--dna') #assert cmd_status == 0, '%s: %d: %s' % (cmd, cmd_status, cmd_stdout) # TEST # rm fasta os.remove(genes_faa)
centroids[centroidID] = set() for cl_genes in clustered_genes: if cl_genes == "": # this sample has no gene belonging to that cluster continue for cl_gene in cl_genes.split('\t'): assert cl_gene not in centroids[centroidID], '%s already found' % cl_gene assert cl_gene not in all_genes, '%s already found' % cl_gene centroids[centroidID].add( cl_gene ) all_genes.add( cl_gene ) if args.verbose: sys.stdout.write("%s: Genes: %d\n" % (timestamp(), len(all_genes))) # NOTE do work: extract protein sequences, create MSA, build HMM pool = MyPool(int(args.cores/args.job_cores)) # create pool for parallel computing pool_iter = itertools.product(centroids.items(), ['%s/tmp' % args.odir], [faa_files], [ffn_files], [args.job_cores]) # iterable for the pool NOTE had no ffn_files until 2016.09.28 results = pool.starmap( do_work , pool_iter ) # perform parallel computing pool.close(); pool.join() # wait until all finished and close the pool if args.verbose: sys.stdout.write("%s: Created HMM models\n" % timestamp() ) # NOTE Clean up # archive *.msa if args.verbose: sys.stdout.write("%s: Creating MSA archive\n" % timestamp() ) cmd, cmd_stdout, cmd_status = compress_MSA(sdir='%s/tmp' % args.odir, obname=os.path.join(args.odir,'MSA')) if args.verbose: sys.stdout.write('\t%s\n' % cmd) assert cmd_status == 0, '%s: %d: %s' % (cmd, cmd_status, cmd_stdout) # TEST # sync *.hmm if args.verbose:
def main(argv): global verbose os.chdir(os.path.dirname(sys.argv[0])) opts = get_parser().parse_args(argv) if opts.verbose: verbose = True virgl_src_dir = opts.virglrenderer virgl_src_dir_temp = None if '://' in opts.virglrenderer: virgl_src_dir_temp = tempfile.TemporaryDirectory( prefix='virglrenderer-src') virgl_src_dir = virgl_src_dir_temp.name if not download_virgl(opts.virglrenderer, virgl_src_dir, opts.virgl_branch): print('failed to clone \'{}\' to \'{}\''.format( virgl_src_dir, opts.virgl_branch)) sys.exit(1) clang_args = ['-I', os.path.join(opts.sysroot, 'usr/include')] modules = ( ( 'virglrenderer', '(virgl|VIRGL)_.+', os.path.join(opts.sysroot, 'usr/include/virgl/virglrenderer.h'), clang_args, 'virglrenderer', True, ), ( 'virgl_protocol', '(virgl)|(VIRGL)_.+', os.path.join(virgl_src_dir, 'src/virgl_protocol.h'), clang_args, None, False, ), ( 'p_defines', '(pipe)|(PIPE).+', os.path.join(virgl_src_dir, 'src/gallium/include/pipe/p_defines.h'), clang_args, None, False, ), ( 'p_format', 'pipe_format', os.path.join(virgl_src_dir, 'src/gallium/include/pipe/p_format.h'), clang_args, None, False, ), ) pool = multiprocessing.pool.Pool(len(modules)) results = pool.starmap(generate_module, modules, 1) return_fail = False print('---') print('generate module summary:') for module, result in zip(modules, results): result_color = FAIL_COLOR if result == 'pass': result_color = PASS_COLOR else: return_fail = True print('%15s: %s%s%s' % (module[0], result_color, result, END_COLOR)) if return_fail: sys.exit(1) with open('mod.rs', 'w') as f: print('/* generated by generate.py */', file=f) print('#![allow(dead_code)]', file=f) print('#![allow(non_camel_case_types)]', file=f) print('#![allow(non_snake_case)]', file=f) print('#![allow(non_upper_case_globals)]', file=f) for module in modules: print('pub mod', module[0] + ';', file=f)
def infer_filetype_via_coverage_for_parameter_parallel( self, parameter: str, probe: bool = True) -> (str, int, bool): """ This function tries to infer the filetype of an executable via coverage. The assumption is that the file that yields the most coverage is of the right filetype :parameter parameter The parameter that tells the binary to work with this file. :parameter probe probe Using "probing" when calculating coverage distribution :return: The filetype as str """ self.failed_invocations = 0 # We want to reset the failed invocations for each parameter is_network_param = True if "@@" in parameter: is_network_param = False if self.try_invocation(parameter, stdin=True, without_desock=True): is_network_param = False PROBE = probe max_cov = 0 max_file = None cov_list = [] file_list = [] result_dict = {} cmin_argument_list = [] # A list which contains the argument max_coverage_per_filetype = {} if PROBE: probed_filetypes, min_cov_value, max_coverage_per_filetype = self.probe_possible_filetypes_for_parameter( parameter=parameter) logger.debug("Probed filetypes: %s", probed_filetypes) for filetype, cov in max_coverage_per_filetype.items(): result_dict["." + filetype.split("_")[0]] = cov file_list.append(filetype.split("_")[0]) cov_list.append(cov) max_coverage_per_filetype[filetype] = cov if not probed_filetypes: # They all yielded the same coverage :( return None # self.coverage_lists[parameter] = zip(["garbage"],[min_cov_value]) # return self.seeds_path+"/garbage_samples",min_cov_value if len(probed_filetypes) == 1: # We can already be sure max_file, max_cov = ( self.seeds_path + "/" + probed_filetypes[0], int(max(max_coverage_per_filetype.values()))) p = "None" if parameter: p = parameter self.coverage_lists[p] = zip(file_list, cov_list) return [max_file], [max_cov], False else: probed_filetypes = [ entity for entity in os.listdir(self.seeds_path) if os.path.isdir(os.path.join(self.seeds_path, entity)) ] cov_list = [0] * len(probed_filetypes) file_list = [None] * len(probed_filetypes) for entity in probed_filetypes: if not os.path.isdir(self.seeds_path + "/" + entity): continue if len(os.listdir(self.seeds_path + "/" + entity)) <= 0: continue if entity == ".git": continue if ( entity == "pcap-network_samples" or entity == "pcap-network" ) and ( not is_network_param ): # Do not try the network seeds for file handling programs - it simply takes too long continue cmin_argument_list.append( (parameter, self.seeds_path + "/" + entity, "." + str(entity.split("_")[0]), result_dict)) with multiprocessing.pool.ThreadPool( processes=self.cores ) as pool: # instead of multiprocessor.cpu_count() results = pool.starmap(self.try_filetype_with_coverage, cmin_argument_list) for counter, entity in enumerate(probed_filetypes): if not os.path.isdir(self.seeds_path + "/" + entity): continue cov = result_dict.get("." + str(entity.split("_")[0])) if cov is None: cov = 0 if cov > 0: cov_list[counter] = cov file_list[counter] = entity.split("_")[0] if cov > max_cov: max_cov = cov max_file = self.seeds_path + "/" + entity # print("Max coverage per filetype", max_coverage_per_filetype) # print("Entity",entity) max_coverage_per_filetype[entity] = max( cov, max_coverage_per_filetype.get(entity, 0)) p = "None" if parameter: p = parameter self.coverage_lists[p] = zip(file_list, cov_list) value_list = list([abs(x) for x in max_coverage_per_filetype.values()]) std = sp.std(value_list) avg_value = sp.mean(value_list) logger.debug("Average: %s", avg_value) logger.debug("Std. Deviation: %s", std) if std > 0: possible_filetypes = [ k for k, v in max_coverage_per_filetype.items() if ((v - avg_value) / std) > 2.5 ] if len(possible_filetypes ) >= 1: # 4 ticks away, that is pretty obvious return [ os.path.join(self.seeds_path, p) for p in possible_filetypes ], [ max_coverage_per_filetype[filetype] for filetype in possible_filetypes ], False # No file over >4 ticks from std deviation: logger.debug("Max file") logger.debug(max_file) return [max_file], [max_cov], True
def download_all(config, path, urllist): with multiprocessing.pool.Pool(config.max_processes) as pool: pool.starmap(download, ((config, url, os.path.join(path, filename)) for (filename, url) in urllist))
def buildDecisionTree(df, root, file, config, dataset_features, parent_level = 0, leaf_id = 0, parents = 'root', tree_id = 0, validation_df = None, main_process_id = None): models = [] decision_rules = [] feature_names = df.columns[0:-1] enableParallelism = config['enableParallelism'] algorithm = config['algorithm'] json_file = file.split(".")[0]+".json" random_forest_enabled = config['enableRandomForest'] enableGBM = config['enableGBM'] enableAdaboost = config['enableAdaboost'] if root == 1: if random_forest_enabled != True and enableGBM != True and enableAdaboost != True: raw_df = df.copy() #-------------------------------------- df_copy = df.copy() winner_name, num_of_instances, metric, metric_name = findDecision(df, config) #find winner index, this cannot be returned by find decision because columns dropped in previous steps j = 0 for i in dataset_features: if i == winner_name: winner_index = j j = j + 1 numericColumn = False if dataset_features[winner_name] != 'object': numericColumn = True #restoration columns = df.shape[1] for i in range(0, columns-1): #column_name = df.columns[i]; column_type = df[column_name].dtypes #numeric field already transformed to object. you cannot check it with df itself, you should check df_copy column_name = df_copy.columns[i]; column_type = df_copy[column_name].dtypes if column_type != 'object' and column_name != winner_name: df[column_name] = df_copy[column_name] classes = df[winner_name].value_counts().keys().tolist() #print("classes: ",classes," in ", winner_name) #----------------------------------------------------- num_cores = config["num_cores"] input_params = [] #serial approach for i in range(0,len(classes)): current_class = classes[i] subdataset = df[df[winner_name] == current_class] subdataset = subdataset.drop(columns=[winner_name]) branch_index = i * 1 #create branches serially if enableParallelism != True: if i == 0: descriptor = { "feature": winner_name, "instances": num_of_instances, #"metric_name": metric_name, "metric_value": round(metric, 4), "depth": parent_level + 1 } descriptor = "# "+json.dumps(descriptor) functions.storeRule(file, (functions.formatRule(root), "", descriptor)) results = createBranch(config, current_class, subdataset, numericColumn, branch_index , winner_name, winner_index, root, parents, file, dataset_features, num_of_instances, metric, tree_id = tree_id, main_process_id = main_process_id) decision_rules = decision_rules + results else: input_params.append((config, current_class, subdataset, numericColumn, branch_index , winner_name, winner_index, root, parents, file, dataset_features, num_of_instances, metric, tree_id, main_process_id)) #--------------------------- #add else condition in the decision tree if df.Decision.dtypes == 'object': #classification pivot = pd.DataFrame(subdataset.Decision.value_counts()).reset_index() pivot = pivot.rename(columns = {"Decision": "Instances","index": "Decision"}) pivot = pivot.sort_values(by = ["Instances"], ascending = False).reset_index() else_decision = "return '%s'" % (pivot.iloc[0].Decision) if enableParallelism != True: functions.storeRule(file,(functions.formatRule(root), "else:")) functions.storeRule(file,(functions.formatRule(root+1), else_decision)) else: #parallelism leaf_id = str(uuid.uuid1()) check_rule = "else: "+else_decision sample_rule = {} sample_rule["current_level"] = root sample_rule["leaf_id"] = leaf_id sample_rule["parents"] = parents sample_rule["rule"] = check_rule sample_rule["feature_idx"] = -1 sample_rule["feature_name"] = "" sample_rule["instances"] = df.shape[0] sample_rule["metric"] = 0 sample_rule["return_statement"] = 0 sample_rule["tree_id"] = tree_id #json to string sample_rule = json.dumps(sample_rule) decision_rules.append(sample_rule) else: #regression else_decision = "return %s" % (subdataset.Decision.mean()) if enableParallelism != True: functions.storeRule(file,(functions.formatRule(root), "else:")) functions.storeRule(file,(functions.formatRule(root+1), else_decision)) else: leaf_id = str(uuid.uuid1()) check_rule = "else: "+else_decision sample_rule = {} sample_rule["current_level"] = root sample_rule["leaf_id"] = leaf_id sample_rule["parents"] = parents sample_rule["rule"] = check_rule sample_rule["tree_id"] = tree_id sample_rule["feature_name"] = "" sample_rule["instances"] = 0 sample_rule["metric"] = 0 sample_rule["return_statement"] = 1 #json to string sample_rule = json.dumps(sample_rule) decision_rules.append(sample_rule) #--------------------------- try: main_process = psutil.Process(main_process_id) children = main_process.children(recursive=True) active_processes = len(children) + 1 #plus parent #active_processes = len(children) except: active_processes = 100 #set a large initial value results = [] #create branches in parallel if enableParallelism == True: if parent_level == 0 and random_forest_enabled != True: #if main_process_id != None and num_cores >= active_processes + len(classes): #len(classes) branches will be run in parallel #this causes hang and deadlock #-------------------------------- """ #causes hang problem if number of input_params is greater than num_cores pool = MyPool(num_cores) branch_results = pool.starmap(createBranch, input_params) for branch_result in branch_results: for leaf_result in branch_result: results.append(leaf_result) pool.close() pool.join() pool.terminate() gc.collect() """ #-------------------------------- #workaround for hang problem. set num_cores and active threads same. #len(classes) == len(input_params) #e.g. len(input_params) = 5, num_cores = 2, cycles = 3 #we will feed 2 items to pool in for loops instead of 5 cycles = int(len(input_params) / num_cores) + 1 for i in range(0, cycles): filter_begin = i * num_cores filter_end = i * num_cores + num_cores if filter_end > len(input_params): filter_end = filter_end input_frame = input_params[filter_begin: filter_end] pool = MyPool(num_cores) branch_results = pool.starmap(createBranch, input_frame) pool.close() pool.join() pool.terminate() gc.collect() for branch_result in branch_results: for leaf_result in branch_result: results.append(leaf_result) #-------------------------------- else: for input_param in input_params: sub_results = createBranchWrapper(createBranch, input_param) for sub_result in sub_results: results.append(sub_result) #-------------------------------- decision_rules = decision_rules + results #-------------------------------- if root != 1: #return children results until the root node return decision_rules #--------------------------------------------- if root == 1: if enableParallelism == True: #custom rules are stored in decision_rules. merge them all in a json file first json_rules = "[\n" #initialize file_index = 0 for custom_rule in decision_rules: json_rules += custom_rule if file_index < len(decision_rules) - 1: json_rules += ", " json_rules += "\n" file_index = file_index + 1 #----------------------------------- json_rules += "]" functions.createFile(json_file, json_rules) #----------------------------------- #reconstruct rules from json to py reconstructRules(json_file, feature_names) #----------------------------------- #is regular decision tree if config['enableRandomForest'] != True and config['enableGBM'] != True and config['enableAdaboost'] != True: #this is reguler decision tree. find accuracy here. moduleName = "outputs/rules/rules" fp, pathname, description = imp.find_module(moduleName) myrules = imp.load_module(moduleName, fp, pathname, description) #rules0 models.append(myrules) return models
RewardOnline_array = np.append(RewardOnline_array, givenSeed_training_results[0][3]) STDOnline_array = np.append(STDOnline_array, givenSeed_training_results[0][4]) List_TimeOnline.append(Time_array_online) List_RewardOnline.append(RewardOnline_array) List_STDOnline.append(STDOnline_array) List_LikelihoodOnline.append(Likelihood_online_list) List_TimeLikelihoodOnline.append(time_likelihood_online_list) return List_TimeOnline, List_RewardOnline, List_STDOnline, List_LikelihoodOnline, List_TimeLikelihoodOnline Nseed = 30 results_online = [] for i in range(len(nTraj)): pool = MyPool(Nseed) args = [(seed, TrainingSet_Array, Labels_Array, List_TimeBatch, max_epoch, nTraj, i) for seed in range(Nseed)] partial_results = pool.starmap(train, args) pool.close() pool.join() results_online.append(partial_results) # %% with open('Comparison/Online/results_online.npy', 'wb') as f: np.save(f, results_online)
eval_lists = [[] for i in range(eval_record_number)] for i, filename in enumerate(eval_filenames): eval_lists[i % eval_record_number].append(filename) for i, eval_list in enumerate(eval_lists): out_path = os.path.join(args.output_folder, "eval-%05d.record" % i) tasks.append(("eval-%05d" % i, ignored_labels, ignore_classes, eval_list, out_path)) return tasks if __name__ == "__main__": t_start = time.time() random.seed(42) # Make sure the shuffle order is the same # Make the tasks for the workers to process tasks = get_record_writing_tasks() eprint("Starting %d record writing tasks" % len(tasks)) # Actually have the workers generate the records pool = multiprocessing.pool.ThreadPool() results = pool.starmap(write_record_from_list, tasks) print_results(results) t_end = time.time() eprint("Took %5.2fs to write records" % (t_end - t_start))
def png2tex(data, card_x, card_y): """Run threads for convert png to pygame surface.""" with multiprocessing.pool.ThreadPool() as pool: results = pool.starmap(__thread_png2tex, zip(data, repeat(card_x), repeat(card_y))) return {name: tex for name, tex in results}
def sounds(data, volume): """Thread for load wav sound in pygame.""" with multiprocessing.pool.ThreadPool() as pool: results = pool.starmap(__thread_sound, zip(data, repeat(volume))) return {name: wav for name, wav in results}
def starmap_with_kwargs(pool: multiprocessing.pool.Pool, fn: Callable, args_iter: Iterable, kwargs_iter: Iterable, N: int): args_for_starmap = zip(repeat(fn), args_iter, kwargs_iter) return pool.starmap(apply_args_and_kwargs, args_for_starmap, N // pool._processes)
args = [(i, nTraj, TrainingSet_tot, Labels_tot, TimeBatch, seed) for i in range(len(nTraj))] givenSeed_training_results = pool.starmap(DifferentTrainingSet, args) pool.close() pool.join() for i in range(len(nTraj)): Time_array_online = np.append(Time_array_online, givenSeed_training_results[i][0]) Likelihood_online_list.append(givenSeed_training_results[i][1]) time_likelihood_online_list.append(givenSeed_training_results[i][2]) RewardOnline_array = np.append(RewardOnline_array, givenSeed_training_results[i][3]) STDOnline_array = np.append(STDOnline_array, givenSeed_training_results[i][4]) List_TimeOnline.append(Time_array_online) List_RewardOnline.append(RewardOnline_array) List_STDOnline.append(STDOnline_array) List_LikelihoodOnline.append(Likelihood_online_list) List_TimeLikelihoodOnline.append(time_likelihood_online_list) return List_TimeOnline, List_RewardOnline, List_STDOnline, List_LikelihoodOnline, List_TimeLikelihoodOnline pool = MyPool(10) args = [(seed, TrainingSet_Array, Labels_Array, List_TimeBatch, nTraj) for seed in range(10)] results_online = pool.starmap(train, args) pool.close() pool.join() # %% with open('Comparison/Online/results_online.npy', 'wb') as f: np.save(f, results_online)
pool = multiprocessing.pool.ThreadPool(processes=args.num_threads) elif SYS_TYPE == 'fbsd': pool = multiprocessing.pool.ThreadPool(processes=args.num_threads) else: death('Unsupported platform' + SYS_TYPE) # Perform IPv4 tests if not args.no_v4: ipv4_addresses = [rsi.ipv4 for rsi in ROOT_SERVERS] if not args.no_traceroute: fancy_output( 0, "\rRunning traceroute with " + str(args.num_threads) + " threads") traces = pool.starmap( trace_route, zip(itertools.repeat(find_binary('traceroute')), ipv4_addresses)) lengths = [] for rsi, trace in zip(ROOT_SERVERS, traces): dbgLog( LOG_DEBUG, "traceroute_" + rsi.name + " len:" + str(len(trace)) + " first:" + repr(trace[0])) lengths.append(len(trace)) rsi.traceroute_v4 = trace median = str(statistics.median(lengths)) minimum = str(min(lengths)) maximum = str(max(lengths)) fancy_output( 5, "\rtraceroute hops min:" + minimum + " max:" + maximum + " median:" + median)
def svg2png(svg_cards, svg_start_pos, svg_card_size, defs_dict): """Run processes for convert svg to png.""" svg_cards_string = [etree.tostring(card) for card in svg_cards.values()] defs_string = [etree.tostring(obj) for obj in defs_dict.values()] with multiprocessing.Pool() as pool: return pool.starmap(__thread_svg2png, zip(svg_cards_string, repeat(svg_start_pos), repeat(svg_card_size), repeat(defs_string)))
def buildDecisionTree(df, root, file, config, dataset_features, parent_level=0, leaf_id=0, parents='root'): models = [] enableParallelism = config['enableParallelism'] algorithm = config['algorithm'] json_file = file.split(".")[0] + ".json" if root == 1: if config['enableRandomForest'] != True and config[ 'enableGBM'] != True and config['enableAdaboost'] != True: raw_df = df.copy() #-------------------------------------- df_copy = df.copy() winner_name = findDecision(df, config) #find winner index, this cannot be returned by find decision because columns dropped in previous steps j = 0 for i in dataset_features: if i == winner_name: winner_index = j j = j + 1 numericColumn = False if dataset_features[winner_name] != 'object': numericColumn = True #restoration columns = df.shape[1] for i in range(0, columns - 1): column_name = df.columns[i] column_type = df[column_name].dtypes if column_type != 'object' and column_name != winner_name: df[column_name] = df_copy[column_name] classes = df[winner_name].value_counts().keys().tolist() #----------------------------------------------------- #TO-DO: you should specify the number of cores in config num_cores = int(multiprocessing.cpu_count() / 2) #allocate half of your total cores input_params = [] #serial approach for i in range(0, len(classes)): current_class = classes[i] subdataset = df[df[winner_name] == current_class] subdataset = subdataset.drop(columns=[winner_name]) branch_index = i * 1 #create branches serially if enableParallelism != True: createBranch(config, current_class, subdataset, numericColumn, branch_index, winner_index, root, parents, file, dataset_features) else: input_params.append((config, current_class, subdataset, numericColumn, branch_index, winner_index, root, parents, file, dataset_features)) #--------------------------- #add else condition in the decision tree if df.Decision.dtypes == 'object': #classification pivot = pd.DataFrame(subdataset.Decision.value_counts()).reset_index() pivot = pivot.rename(columns={ "Decision": "Instances", "index": "Decision" }) pivot = pivot.sort_values(by=["Instances"], ascending=False).reset_index() else_decision = "return '%s'" % (pivot.iloc[0].Decision) if enableParallelism != True: functions.storeRule(file, (functions.formatRule(root), "else:")) functions.storeRule( file, (functions.formatRule(root + 1), else_decision)) else: #parallelism leaf_id = str(uuid.uuid1()) custom_rule_file = "outputs/rules/" + str(leaf_id) + ".txt" check_rule = "else: " + else_decision sample_rule = " {\n" sample_rule += " \"current_level\": " + str(root) + ",\n" sample_rule += " \"leaf_id\": \"" + str(leaf_id) + "\",\n" sample_rule += " \"parents\": \"" + parents + "\",\n" sample_rule += " \"rule\": \"" + check_rule + "\"\n" sample_rule += " }" functions.createFile(custom_rule_file, "") functions.storeRule(custom_rule_file, sample_rule) else: #regression else_decision = "return %s" % (subdataset.Decision.mean()) if enableParallelism != True: functions.storeRule(file, (functions.formatRule(root), "else:")) functions.storeRule( file, (functions.formatRule(root + 1), else_decision)) else: leaf_id = str(uuid.uuid1()) custom_rule_file = "outputs/rules/" + str(leaf_id) + ".txt" check_rule = "else: " + else_decision sample_rule = " {\n" sample_rule += " \"current_level\": " + str(root) + ",\n" sample_rule += " \"leaf_id\": \"" + str(leaf_id) + "\",\n" sample_rule += " \"parents\": \"" + parents + "\",\n" sample_rule += " \"rule\": \"" + check_rule + "\"\n" sample_rule += " }" functions.createFile(custom_rule_file, "") functions.storeRule(custom_rule_file, sample_rule) #--------------------------- #create branches in parallel if enableParallelism == True: """ #this usage causes trouble for recursive functions with Pool(number_of_cpus) as pool: pool.starmap(createBranch, input_params) """ pool = MyPool(num_cores) results = pool.starmap(createBranch, input_params) pool.close() pool.join() #--------------------------------------------- #calculate accuracy metrics if root == 1: if enableParallelism == True: #custom rules are stored in .txt files. merge them all in a json file functions.createFile(json_file, "[\n") custom_rules = [] file_index = 0 for file in os.listdir(os.getcwd() + "/outputs/rules"): if file.endswith(".txt"): custom_rules.append(os.getcwd() + "/outputs/rules/" + file) #print(file) #this file stores a custom rule f = open(os.getcwd() + "/outputs/rules/" + file, "r") custom_rule = f.read() if file_index > 0: custom_rule = ", " + custom_rule functions.storeRule(json_file, custom_rule) f.close() file_index = file_index + 1 functions.storeRule(json_file, "]") #----------------------------------- #custom rules are already merged in a json file. clear messy custom rules #TO-DO: if random forest trees are handled in parallel, this would be a problem. You cannot know the related tree of a rule. You should store a global tree id in a rule. for file in custom_rules: os.remove(file) #----------------------------------- reconstructRules(json_file) #----------------------------------- if config['enableRandomForest'] != True and config[ 'enableGBM'] != True and config['enableAdaboost'] != True: #this is reguler decision tree. find accuracy here. moduleName = "outputs/rules/rules" fp, pathname, description = imp.find_module(moduleName) myrules = imp.load_module(moduleName, fp, pathname, description) #rules0 models.append(myrules) num_of_features = df.shape[1] - 1 instances = df.shape[0] classified = 0 mae = 0 mse = 0 #instead of for loops, pandas functions perform well raw_df['Prediction'] = raw_df.apply(findPrediction, axis=1) if algorithm != 'Regression': idx = raw_df[raw_df['Prediction'] == raw_df['Decision']].index #raw_df['Classified'] = 0 #raw_df.loc[idx, 'Classified'] = 1 #print(raw_df) accuracy = 100 * len(idx) / instances print("Accuracy: ", accuracy, "% on ", instances, " instances") else: raw_df['Absolute_Error'] = abs(raw_df['Prediction'] - raw_df['Decision']) raw_df['Absolute_Error_Squared'] = raw_df[ 'Absolute_Error'] * raw_df['Absolute_Error'] #print(raw_df) mae = raw_df['Absolute_Error'].sum() / instances print("MAE: ", mae) mse = raw_df['Absolute_Error_Squared'].sum() / instances rmse = math.sqrt(mse) print("RMSE: ", rmse) mean = raw_df['Decision'].mean() print("Mean: ", mean) if mean > 0: print("MAE / Mean: ", 100 * mae / mean, "%") print("RMSE / Mean: ", 100 * rmse / mean, "%") return models
def main(args): if args.moul_scripts and not args.moul_scripts.exists(): logging.error(f"Scripts path '{args.moul_scripts}' does not exist.") return False if args.age: age_info = load_age(make_asset_path("data", f"{args.age}.age", client_path=args.source)) if age_info is None: return False age_infos = (age_info,) elif not args.no_ages: logging.info("Loading age files...") age_source_path = make_asset_path("data", client_path=args.source) age_infos = [load_age(age_file_path) for age_file_path in age_source_path.glob("*.age")] if not age_infos: logging.warning("No age files found in client!") return True elif not all(age_infos): return False else: age_infos = [] # Collect a list of all age pages to be abused for the purpose of finding its resources # Would be nice if this were a common function of libHSPlasma... all_outputs = {} all_pages = [i for i in find_all_pages(all_outputs, make_asset_path("data", client_path=args.source), *age_infos)] logging.info(f"Found {len(all_pages)} Plasma pages.") # We want to get the age dependency data. Presently, those are the python and ogg files. # Unfortunately, libHSPlasma insists on reading in the entire page before allowing us to # do any of that. So, we will execute this part in a process pool. pool = multiprocessing.pool.Pool(initializer=_utils.multiprocess_init) try: dlevel = plDebug.kDLWarning if args.verbose else plDebug.kDLNone results = pool.starmap(find_page_externals, ((page_path, dlevel) for age_name, page_path in all_pages)) except: pool.terminate() pool.join() raise # What we have now is a list of dicts, each nearly obeying the output format spec. # Now, we have to merge them... ugh. logging.info(f"Merging results from {len(results)} dependency lists...") coerce_asset_dicts(all_outputs, all_pages, results) # PythonFileMods can import other python modules and be a STATEDESC if not args.no_pfm_dependencies: py_exe = args.python if args.python else _utils.find_python_exe() if not py_exe: logging.critical("Uru-compatible python interpreter unavailable.") return False logging.info("Searching for PythonFileMod dependencies...") find_pfm_externals(all_outputs, py_exe, args.no_pfm_py_dependencies, args.no_pfm_sdl_dependencies, make_asset_path("python", client_path=args.source, scripts_path=args.moul_scripts), make_asset_path("sdl", client_path=args.source, scripts_path=args.moul_scripts)) # Gather client exes, DLLs, and installers. if not args.no_client: logging.info("Searching for client files...") find_client_dependencies(all_outputs, args.source, args.moul_scripts, args.client_arch) # OK, now everything is (mostly) sane. logging.info("Beginning final pass over assets...") prepare_packages(all_outputs, args.source, args.moul_scripts, dataset=args.dataset, distribute=args.distribute) # Time to produce the bundle logging.info("Producing final asset bundle...") output_packages(all_outputs, args.source, args.moul_scripts, args.destination) return True
for i in range(len(nTraj)): Time_array_batch = np.append(Time_array_batch, givenSeed_training_results[i][0]) Likelihood_batch_list.append(givenSeed_training_results[i][1]) time_likelihood_batch_list.append(givenSeed_training_results[i][2]) RewardBatch_array = np.append(RewardBatch_array, givenSeed_training_results[i][3]) STDBatch_array = np.append(STDBatch_array, givenSeed_training_results[i][4]) List_TimeBatch.append(Time_array_batch) List_RewardBatch.append(RewardBatch_array) List_STDBatch.append(STDBatch_array) List_LikelihoodBatch.append(Likelihood_batch_list) List_TimeLikelihoodBatch.append(time_likelihood_batch_list) return List_TimeBatch, List_RewardBatch, List_STDBatch, List_LikelihoodBatch, List_TimeLikelihoodBatch pool = MyPool(10) args = [(seed, TrainingSet_Array, Labels_Array, max_epoch, nTraj) for seed in range(10)] results_batch = pool.starmap(train, args) pool.close() pool.join() # %% with open('Comparison/Batch/results_batch.npy', 'wb') as f: np.save(f, results_batch)
with open(output_loc, 'wb') as f: f.write(x) # print("Wrote it") except Exception: print("Failed " + trackId) return None # # Code for downloading from the catalog tsv including track IDs. # catalog_file = './data/catalog_out.tsv' trackIds = [] with open(catalog_file, 'r') as f: reader = csv.reader(f, delimiter='\t') next(reader) for row in reader: trackIds += [(row[4], )] if type(trackIds[-1][0]) is not str: print("WAMP WOW") # # Do it... # for tid in trackIds: download_sample(tid[0]) pool = pool.Pool(8) pool.starmap(download_sample, trackIds) # download_sample('166297')
directory_p = args.directory #If you want to do by hand #directory_p = r'C:\Users\Gianl\Desktop\Call_with_Chiara' pcap_app = [] for r, d, f in os.walk(directory_p): for file in f: if ('.pcap' in file or '.pcapng' in file): pcap_app.append(os.path.join(r, file)) #print("Pcap found: {}\n".format(pcap_app)) #For each .pcap in the folders, do the process manager = multiprocessing.Manager() result_list = manager.list() #Cerco le porte pool= multiprocessing.Pool(processes = n_process) #Limito il numero di processi ai core della cpu -1 pool_tuple = [(x, result_list) for x in pcap_app] pool.starmap(main2, pool_tuple) pool.close() pool.join() #logging.info("Finish Process main2\n") list_app_port = [] list_app_port = [j for i in result_list for j in i] port_used = set(list(map(int, list_app_port))) print (port_used) result_list[:] = [] #print(result_list) #Decodifico su porta e creo .csv pool= multiprocessing.Pool(processes = n_process) #Limito il numero di processi ai core della cpu -1 pool_tuple = [(x, port_used, args.screen, args.quality, args.plot) for x in pcap_app] #result_list, args.plot pool.map(pcap_to_json, pool_tuple) pool.close() pool.join()
def buildDecisionTree(df, root, file, config, dataset_features, parent_level = 0, leaf_id = 0, parents = 'root', validation_df = None): models = [] feature_names = df.columns[0:-1] enableParallelism = config['enableParallelism'] algorithm = config['algorithm'] json_file = file.split(".")[0]+".json" if root == 1: if config['enableRandomForest'] != True and config['enableGBM'] != True and config['enableAdaboost'] != True: raw_df = df.copy() #-------------------------------------- df_copy = df.copy() winner_name, num_of_instances, metric, metric_name = findDecision(df, config) #find winner index, this cannot be returned by find decision because columns dropped in previous steps j = 0 for i in dataset_features: if i == winner_name: winner_index = j j = j + 1 numericColumn = False if dataset_features[winner_name] != 'object': numericColumn = True #restoration columns = df.shape[1] for i in range(0, columns-1): column_name = df.columns[i]; column_type = df[column_name].dtypes if column_type != 'object' and column_name != winner_name: df[column_name] = df_copy[column_name] classes = df[winner_name].value_counts().keys().tolist() #----------------------------------------------------- num_cores = config["num_cores"] input_params = [] #serial approach for i in range(0,len(classes)): current_class = classes[i] subdataset = df[df[winner_name] == current_class] subdataset = subdataset.drop(columns=[winner_name]) branch_index = i * 1 #create branches serially if enableParallelism != True: if i == 0: #descriptor = "# Feature: "+winner_name+", Instances: "+str(num_of_instances)+", "+metric_name+": "+str(round(metric, 4)) descriptor = { "feature": winner_name, "instances": num_of_instances, #"metric_name": metric_name, "metric_value": round(metric, 4), "depth": parent_level + 1 } descriptor = "# "+json.dumps(descriptor) functions.storeRule(file, (functions.formatRule(root), "", descriptor)) createBranch(config, current_class, subdataset, numericColumn, branch_index , winner_name, winner_index, root, parents, file, dataset_features, num_of_instances, metric) else: input_params.append((config, current_class, subdataset, numericColumn, branch_index , winner_name, winner_index, root, parents, file, dataset_features, num_of_instances, metric)) #--------------------------- #add else condition in the decision tree if df.Decision.dtypes == 'object': #classification pivot = pd.DataFrame(subdataset.Decision.value_counts()).reset_index() pivot = pivot.rename(columns = {"Decision": "Instances","index": "Decision"}) pivot = pivot.sort_values(by = ["Instances"], ascending = False).reset_index() else_decision = "return '%s'" % (pivot.iloc[0].Decision) if enableParallelism != True: functions.storeRule(file,(functions.formatRule(root), "else:")) functions.storeRule(file,(functions.formatRule(root+1), else_decision)) else: #parallelism leaf_id = str(uuid.uuid1()) custom_rule_file = "outputs/rules/"+str(leaf_id)+".txt" check_rule = "else: "+else_decision sample_rule = {} sample_rule["current_level"] = root sample_rule["leaf_id"] = leaf_id sample_rule["parents"] = parents sample_rule["rule"] = check_rule sample_rule["feature_idx"] = -1 sample_rule["feature_name"] = "" sample_rule["instances"] = df.shape[0] sample_rule["metric"] = 0 sample_rule["return_statement"] = 0 #json to string sample_rule = json.dumps(sample_rule) functions.createFile(custom_rule_file, "") functions.storeRule(custom_rule_file, sample_rule) else: #regression else_decision = "return %s" % (subdataset.Decision.mean()) if enableParallelism != True: functions.storeRule(file,(functions.formatRule(root), "else:")) functions.storeRule(file,(functions.formatRule(root+1), else_decision)) else: leaf_id = str(uuid.uuid1()) custom_rule_file = "outputs/rules/"+str(leaf_id)+".txt" check_rule = "else: "+else_decision sample_rule = " {\n" sample_rule += " \"current_level\": "+str(root)+",\n" sample_rule += " \"leaf_id\": \""+str(leaf_id)+"\",\n" sample_rule += " \"parents\": \""+parents+"\",\n" sample_rule += " \"rule\": \""+check_rule+"\"\n" sample_rule += " }" functions.createFile(custom_rule_file, "") functions.storeRule(custom_rule_file, sample_rule) #--------------------------- #create branches in parallel if enableParallelism == True: """ #this usage causes trouble for recursive functions with Pool(number_of_cpus) as pool: pool.starmap(createBranch, input_params) """ pool = MyPool(num_cores) results = pool.starmap(createBranch, input_params) pool.close() pool.join() #--------------------------------------------- if root == 1: if enableParallelism == True: #custom rules are stored in .txt files. merge them all in a json file functions.createFile(json_file, "[\n") custom_rules = [] file_index = 0 for file in os.listdir(os.getcwd()+"/outputs/rules"): if file.endswith(".txt"): custom_rules.append(os.getcwd()+"/outputs/rules/"+file) #print(file) #this file stores a custom rule f = open(os.getcwd()+"/outputs/rules/"+file, "r") custom_rule = f.read() if file_index > 0: custom_rule = ", "+custom_rule functions.storeRule(json_file, custom_rule) f.close() file_index = file_index + 1 functions.storeRule(json_file, "]") #----------------------------------- #custom rules are already merged in a json file. clear messy custom rules #TO-DO: if random forest trees are handled in parallel, this would be a problem. You cannot know the related tree of a rule. You should store a global tree id in a rule. for file in custom_rules: os.remove(file) #----------------------------------- reconstructRules(json_file, feature_names) #feature importance should be calculated by demand? feature_importance(json_file, dataset_features) #----------------------------------- #is regular decision tree if config['enableRandomForest'] != True and config['enableGBM'] != True and config['enableAdaboost'] != True: #this is reguler decision tree. find accuracy here. moduleName = "outputs/rules/rules" fp, pathname, description = imp.find_module(moduleName) myrules = imp.load_module(moduleName, fp, pathname, description) #rules0 models.append(myrules) return models
dishes = read_JSON('./Resources/dish_freq.json') os_type = platform.system() similarity = Similarity() freq_dist = json.loads(open('./Resources/dish_freq.json').read()) start = int(input("Enter starting index (max= 33302 , min = 0) :")) end = int(input("Enter End (max= 33302 , min = 0) :")) dishes_to_process = dishes[start:end] out_file = open('Search_tags.json', 'a+', encoding='utf-8') with multiprocessing.Pool(processes=cpu_count) as pool: parent = psutil.Process() if os_type == 'Windows': parent.nice(psutil.REALTIME_PRIORITY_CLASS) if os_type == 'Linux': parent.nice(-20) for child in parent.children(): if os_type == 'Windows': child.nice(psutil.REALTIME_PRIORITY_CLASS) if os_type == 'Linux': child.nice(-20) results = pool.starmap(similarity.create_test_cases, [dishes_to_process]) for result in results: print(result) json.dump(result, out_file) print('\n Done!') end = time.time() print('Time taken:', end - start) # Time taken: 1548227540.7543015 prev ver