Example #1
0
def run_domain_checks(rounded_time, env, output, pool):
	# Get the list of domains we handle mail for.
	mail_domains = get_mail_domains(env)

	# Get the list of domains we serve DNS zones for (i.e. does not include subdomains).
	dns_zonefiles = dict(get_dns_zones(env))
	dns_domains = set(dns_zonefiles)

	# Get the list of domains we serve HTTPS for.
	web_domains = set(get_web_domains(env))

	domains_to_check = mail_domains | dns_domains | web_domains

	# Get the list of domains that we don't serve web for because of a custom CNAME/A record.
	domains_with_a_records = get_domains_with_a_records(env)

	ssl_certificates = get_ssl_certificates(env)

	# Serial version:
	#for domain in sort_domains(domains_to_check, env):
	#	run_domain_checks_on_domain(domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains)

	# Parallelize the checks across a worker pool.
	args = ((domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains, domains_with_a_records, ssl_certificates)
		for domain in domains_to_check)
	ret = pool.starmap(run_domain_checks_on_domain, args, chunksize=1)
	ret = dict(ret) # (domain, output) => { domain: output }
	for domain in sort_domains(ret, env):
		ret[domain].playback(output)
Example #2
0
def run_domain_checks(rounded_time, env, output, pool):
    # Get the list of domains we handle mail for.
    mail_domains = get_mail_domains(env)

    # Get the list of domains we serve DNS zones for (i.e. does not include subdomains).
    dns_zonefiles = dict(get_dns_zones(env))
    dns_domains = set(dns_zonefiles)

    # Get the list of domains we serve HTTPS for.
    web_domains = set(get_web_domains(env) + get_default_www_redirects(env))

    domains_to_check = mail_domains | dns_domains | web_domains

    # Serial version:
    # for domain in sort_domains(domains_to_check, env):
    # 	run_domain_checks_on_domain(domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains)

    # Parallelize the checks across a worker pool.
    args = (
        (domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains)
        for domain in domains_to_check
    )
    ret = pool.starmap(run_domain_checks_on_domain, args, chunksize=1)
    ret = dict(ret)  # (domain, output) => { domain: output }
    for domain in sort_domains(ret, env):
        ret[domain].playback(output)
Example #3
0
def starmap_parallel(function, values, number_of_processes=None, chunksize=1):
    assert callable(function)

    logger.debug('Creating multiprocessing pool with {} processes and chunksize {}.'.format(number_of_processes, chunksize))

    with multiprocessing.pool.Pool(processes=number_of_processes) as pool:
        results = pool.starmap(function, values, chunksize=chunksize)
        results = tuple(results)

    logger.debug('Parallel calculation with {} results completed.'.format(len(results)))

    return results
Example #4
0
def run_services_checks(env, output, pool):
    # Check that system services are running.
    all_running = True
    fatal = False
    ret = pool.starmap(check_service, ((i, service, env) for i, service in enumerate(get_services())), chunksize=1)
    for i, running, fatal2, output2 in sorted(ret):
        if output2 is None: continue  # skip check (e.g. no port was set, e.g. no sshd)
        all_running = all_running and running
        fatal = fatal or fatal2
        output2.playback(output)

    if all_running:
        output.print_ok("All system services are running.")

    return not fatal
Example #5
0
def train(seed, TrainingSet_Array, Labels_Array, List_TimeBatch, max_epoch, nTraj):
    #seed
    List_TimeOnline = []
    List_RewardOnline = []
    List_STDOnline = []
    List_LikelihoodOnline = []
    List_TimeLikelihoodOnline = []

    #given seed, trajectories
    Time_array_online = np.empty((0))
    RewardOnline_array = np.empty((0))
    STDOnline_array = np.empty((0))
    Likelihood_online_list = []
    time_likelihood_online_list =[]

    TrainingSet_tot = TrainingSet_Array[:, :]
    Labels_tot = Labels_Array[:, :]
    TimeBatch = List_TimeBatch[0]
        
    pool = multiprocessing.Pool(processes=3)
    args = [(i, nTraj, TrainingSet_tot, Labels_tot, TimeBatch, seed) for i in range(len(nTraj))]
    givenSeed_training_results = pool.starmap(DifferentTrainingSet, args) 
    
    pool.close()
    pool.join()
    
    for i in range(len(nTraj)):
        Time_array_online = np.append(Time_array_online, givenSeed_training_results[i][0]) 
        Likelihood_online_list.append(givenSeed_training_results[i][1])
        time_likelihood_online_list.append(givenSeed_training_results[i][2])
        RewardOnline_array = np.append(RewardOnline_array, givenSeed_training_results[i][3])
        STDOnline_array = np.append(STDOnline_array, givenSeed_training_results[i][4])
        
    List_TimeOnline.append(Time_array_online)
    List_RewardOnline.append(RewardOnline_array)
    List_STDOnline.append(STDOnline_array)
    List_LikelihoodOnline.append(Likelihood_online_list)
    List_TimeLikelihoodOnline.append(time_likelihood_online_list)
        
    return List_TimeOnline, List_RewardOnline, List_STDOnline, List_LikelihoodOnline, List_TimeLikelihoodOnline
Example #6
0
def prop_coad(ref_dir, make_fits=False):
    """
    Proper coaddition function
    """

    if len(ref_dir) == 1:
        F = glob.glob(ref_dir[0] +
                      '/*.fits')  # collect images you want to coadd
        print(F)
    else:
        F = ref_dir
        print(F)

    psf_dat, psf_hed, sexcat1, psfcat1 = get_psf(F[0])
    pool = multiprocessing.Pool(len(F) - 1)
    tmp_array = pool.starmap(coad_func, [(F[i + 1], psfcat1, sexcat1)
                                         for i in range(len(F) - 1)])

    Nomin = sum(x[0] for x in tmp_array)
    Denom = sum(x[1] for x in tmp_array)

    Denom = np.sqrt(Denom)
    if np.any(Denom == 0):
        print('ZEROS')

    R_hat = Nomin / Denom
    R = np.real(fft.ifft2(R_hat))
    subprocess.call(['rm', sexcat1, psfcat1, sexcat1.replace('.fits', '.psf')])
    if make_fits == True:
        hed = fits.getheader(F[0])
        hed['COMMENT'] = 'ZO coaddition from ZiP'
        hed['COMMENT'] = 'List of stacked fits'
        hed['COMMENT'] = ', '.join(F)
        fits.writeto(F[0].replace('.fits', '_COAD.fits'),
                     R,
                     hed,
                     overwrite=True)
        return (F[0].replace('.fits', '_COAD.fits'), R)
    else:
        return (R)
Example #7
0
def run_services_checks(env, output, pool):
	# Check that system services are running.

	services = [
		{ "name": "Local DNS (bind9)", "port": 53, "public": False, },
		#{ "name": "NSD Control", "port": 8952, "public": False, },
		{ "name": "Local DNS Control (bind9/rndc)", "port": 953, "public": False, },
		{ "name": "Dovecot LMTP LDA", "port": 10026, "public": False, },
		{ "name": "Postgrey", "port": 10023, "public": False, },
		{ "name": "Spamassassin", "port": 10025, "public": False, },
		{ "name": "OpenDKIM", "port": 8891, "public": False, },
		{ "name": "OpenDMARC", "port": 8893, "public": False, },
		{ "name": "Memcached", "port": 11211, "public": False, },
		{ "name": "Sieve (dovecot)", "port": 4190, "public": False, },
		{ "name": "Mail-in-a-Box Management Daemon", "port": 10222, "public": False, },

		{ "name": "SSH Login (ssh)", "port": get_ssh_port(), "public": True, },
		{ "name": "Public DNS (nsd4)", "port": 53, "public": True, },
		{ "name": "Incoming Mail (SMTP/postfix)", "port": 25, "public": True, },
		{ "name": "Outgoing Mail (SMTP 587/postfix)", "port": 587, "public": True, },
		#{ "name": "Postfix/master", "port": 10587, "public": True, },
		{ "name": "IMAPS (dovecot)", "port": 993, "public": True, },
		{ "name": "HTTP Web (nginx)", "port": 80, "public": True, },
		{ "name": "HTTPS Web (nginx)", "port": 443, "public": True, },
	]

	all_running = True
	fatal = False
	ret = pool.starmap(check_service, ((i, service, env) for i, service in enumerate(services)), chunksize=1)
	for i, running, fatal2, output2 in sorted(ret):
		if output2 is None: continue # skip check (e.g. no port was set, e.g. no sshd)
		all_running = all_running and running
		fatal = fatal or fatal2
		output2.playback(output)

	if all_running:
		output.print_ok("All system services are running.")

	return not fatal
Example #8
0
def run_services_checks(env, output, pool):
	# Check that system services are running.

	services = [
		{ "name": "Local DNS (bind9)", "port": 53, "public": False, },
		#{ "name": "NSD Control", "port": 8952, "public": False, },
		{ "name": "Local DNS Control (bind9/rndc)", "port": 953, "public": False, },
		{ "name": "Dovecot LMTP LDA", "port": 10026, "public": False, },
		{ "name": "Postgrey", "port": 10023, "public": False, },
		{ "name": "Spamassassin", "port": 10025, "public": False, },
		{ "name": "OpenDKIM", "port": 8891, "public": False, },
		{ "name": "OpenDMARC", "port": 8893, "public": False, },
		{ "name": "Memcached", "port": 11211, "public": False, },
		{ "name": "Mail-in-a-Box Management Daemon", "port": 10222, "public": False, },

		{ "name": "SSH Login (ssh)", "port": get_ssh_port(), "public": True, },
		{ "name": "Public DNS (nsd4)", "port": 53, "public": True, },
		{ "name": "Incoming Mail (SMTP/postfix)", "port": 25, "public": True, },
		{ "name": "Outgoing Mail (SMTP 587/postfix)", "port": 587, "public": True, },
		#{ "name": "Postfix/master", "port": 10587, "public": True, },
		{ "name": "IMAPS (dovecot)", "port": 993, "public": True, },
		{ "name": "Mail Filters (Sieve/dovecot)", "port": 4190, "public": True, },
		{ "name": "HTTP Web (nginx)", "port": 80, "public": True, },
		{ "name": "HTTPS Web (nginx)", "port": 443, "public": True, },
	]

	all_running = True
	fatal = False
	ret = pool.starmap(check_service, ((i, service, env) for i, service in enumerate(services)), chunksize=1)
	for i, running, fatal2, output2 in sorted(ret):
		if output2 is None: continue # skip check (e.g. no port was set, e.g. no sshd)
		all_running = all_running and running
		fatal = fatal or fatal2
		output2.playback(output)

	if all_running:
		output.print_ok("All system services are running.")

	return not fatal
Example #9
0
def mkt_on_list(glist,
                data_df,
                pops=None,
                tests=None,
                cutoffs=None,
                do_trims=None,
                bootstrap=None,
                b_reps=None):
    if do_trims is None:
        do_trims = [True, False]
    if cutoffs is None:
        cutoffs = [0.05, 0.15]
    if tests is None:
        tests = ['eMKT', 'aMKT']
    if pops is None:
        pops = ['AFR', 'EUR']
    if bootstrap is None:
        bootstrap = False
    if b_reps is None:
        b_reps = 100

    df = data_df[data_df['id'].isin(glist)]

    pars = []
    for pop in pops:
        subdata = df[df['pop'] == pop]
        if bootstrap:
            pars.append((subdata, pop, tests, cutoffs, do_trims, b_reps))
        else:
            pars.append((subdata, pop, tests, cutoffs, do_trims))

    func = bootstrap_on_subdata if bootstrap else mkt_on_subdata
    # Loads the models for all the parameters parsed using multiprocessing to speed up computations
    pool = MyPool(processes=2)  # multiprocessing.cpu_count())
    results_list = pool.starmap(func, pars)
    pool.terminate()
    results = pd.concat(results_list, axis=0, ignore_index=True)

    return results
Example #10
0
def run_domain_checks(rounded_time, env, output, pool):
	# Get the list of domains we handle mail for.
	mail_domains = get_mail_domains(env)

	# Get the list of domains we serve DNS zones for (i.e. does not include subdomains).
	dns_zonefiles = dict(get_dns_zones(env))
	dns_domains = set(dns_zonefiles)

	# Get the list of domains we serve HTTPS for.
	web_domains = set(get_web_domains(env))

	domains_to_check = mail_domains | dns_domains | web_domains

	# Remove "www", "autoconfig", "autodiscover", and "mta-sts" subdomains, which we group with their parent,
	# if their parent is in the domains to check list.
	domains_to_check = [
		d for d in domains_to_check
		if not (
		   d.split(".", 1)[0] in ("www", "autoconfig", "autodiscover", "mta-sts")
		   and len(d.split(".", 1)) == 2
		   and d.split(".", 1)[1] in domains_to_check
		)
	]

	# Get the list of domains that we don't serve web for because of a custom CNAME/A record.
	domains_with_a_records = get_domains_with_a_records(env)

	# Serial version:
	#for domain in sort_domains(domains_to_check, env):
	#	run_domain_checks_on_domain(domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains)

	# Parallelize the checks across a worker pool.
	args = ((domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains, domains_with_a_records)
		for domain in domains_to_check)
	ret = pool.starmap(run_domain_checks_on_domain, args, chunksize=1)
	ret = dict(ret) # (domain, output) => { domain: output }
	for domain in sort_domains(ret, env):
		ret[domain].playback(output)
Example #11
0
def mkt_on_subdata(subdata, pop=None, tests=None, cutoffs=None, do_trims=None):
    if do_trims is None:
        do_trims = [True, False]
    if cutoffs is None:
        cutoffs = [0.05, 0.15]
    if tests is None:
        tests = ['eMKT', 'aMKT']

    nogenes = len(subdata.index.values)
    if nogenes <= 0:
        results = pd.DataFrame(index=[0])
    else:
        if 'aMKT' in tests:
            daf_cum, div = makeSfs(subdata, cum=True)
        if 'eMKT' in tests:
            daf, div = makeSfs(subdata, cum=False)

        pars = []
        for test in tests:
            if test == 'eMKT':
                for cutoff in cutoffs:
                    pars.append([daf, div, test, cutoff])
            elif test == 'aMKT':
                for do_trim in do_trims:
                    pars.append((daf_cum, div, test, do_trim))

        # Loads the models for all the parameters parsed using multiprocessing to speed up computations
        pool = MyPool(processes=2)  # multiprocessing.cpu_count())
        results_list = pool.starmap(mkt_on_daf, pars)
        pool.terminate()

        results = pd.concat(results_list, axis=0, ignore_index=True)

    if pop is not None: results['pop'] = pop
    results['nogenes'] = nogenes

    return results
Example #12
0
def main(argv):
    opts = get_parser().parse_args(argv)
    build_test_cases = (
        #(sysroot path, target triple, debug/release, should test?)
        (opts.arm_sysroot, ARM_TRIPLE, "debug", False, opts.clean),
        (opts.arm_sysroot, ARM_TRIPLE, "release", False, opts.clean),
        (opts.aarch64_sysroot, AARCH64_TRIPLE, "debug", False, opts.clean),
        (opts.aarch64_sysroot, AARCH64_TRIPLE, "release", False, opts.clean),
        (opts.x86_64_sysroot, X86_64_TRIPLE, "debug", False, opts.clean),
        (opts.x86_64_sysroot, X86_64_TRIPLE, "release", False, opts.clean),
        (opts.x86_64_sysroot, X86_64_TRIPLE, "debug", True, opts.clean),
        (opts.x86_64_sysroot, X86_64_TRIPLE, "release", True, opts.clean),
    )

    os.chdir(os.path.dirname(sys.argv[0]))
    pool = multiprocessing.pool.Pool(len(build_test_cases))
    results = pool.starmap(check_build, build_test_cases, 1)

    print('---')
    print('build test summary:')
    for test_case, result in zip(build_test_cases, results):
        _, triple, kind, test_it, _ = test_case
        title = '%s_%s' % (triple.split('-')[0], kind)
        if test_it:
            title += "_test"

        result_color = FAIL_COLOR
        if result == 'pass':
            result_color = PASS_COLOR

        display_size = ''
        if result == 'pass' and kind == 'release' and not test_it:
            display_size = get_stripped_size(triple) + ' stripped binary'

        print('%20s: %s%15s%s %s' %
              (title, result_color, result, END_COLOR, display_size))
Example #13
0
def run_domain_checks(rounded_time, env, output, pool):
	# Get the list of domains we handle mail for.
	mail_domains = get_mail_domains(env)

	# Get the list of domains we serve DNS zones for (i.e. does not include subdomains).
	dns_zonefiles = dict(get_dns_zones(env))
	dns_domains = set(dns_zonefiles)

	# Get the list of domains we serve HTTPS for.
	web_domains = set(get_web_domains(env) + get_default_www_redirects(env))

	domains_to_check = mail_domains | dns_domains | web_domains

	# Serial version:
	#for domain in sort_domains(domains_to_check, env):
	#	run_domain_checks_on_domain(domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains)

	# Parallelize the checks across a worker pool.
	args = ((domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains)
		for domain in domains_to_check)
	ret = pool.starmap(run_domain_checks_on_domain, args, chunksize=1)
	ret = dict(ret) # (domain, output) => { domain: output }
	for domain in sort_domains(ret, env):
		ret[domain].playback(output)
Example #14
0
def mkt_on_df(gene_df,
              data_df,
              label=None,
              pops=None,
              tests=None,
              cutoffs=None,
              do_trims=None,
              bootstrap=None,
              b_size=None,
              b_reps=None):
    if do_trims is None:
        do_trims = [True, False]
    if cutoffs is None:
        cutoffs = [0.05, 0.15]
    if tests is None:
        tests = ['eMKT', 'aMKT']
    if pops is None:
        pops = ['AFR', 'EUR']
    if bootstrap is None:
        bootstrap = False
    if b_reps is None:
        b_reps = 100

    pars = [(gene_df.iloc[:, i], data_df, pops, tests, cutoffs, do_trims,
             bootstrap, b_size, b_reps)
            for i in range(len(gene_df.columns.values))]

    # Loads the models for all the parameters parsed using multiprocessing to speed up computations
    pool = MyPool(processes=8)  # multiprocessing.cpu_count())
    results_list = pool.starmap(mkt_on_col, pars)
    pool.terminate()
    results = pd.concat(results_list, axis=0, ignore_index=True)

    if label is not None: results['label'] = label

    return results
Example #15
0
def do_work(c_id__c_genes, odir, faa_files, ffn_files, cores=1): # NOTE had no fnn_files until 2016.09.28
    centroid = c_id__c_genes[0] # key
    c_genes  = c_id__c_genes[1] # values
    genes_faa = os.path.join(odir,'%s.faa' % centroid)
    #genes_ffn = os.path.join(odir,'%s.ffn' % centroid)
    genes_faa_msa="%s.msa" % genes_faa
    #genes_ffn_msa="%s.msa" % genes_ffn
    genes_faa_hmm="%s.hmm" % genes_faa
    #genes_ffn_hmm="%s.hmm" % genes_ffn
    
    if os.path.isfile(genes_faa_hmm) and os.path.isfile(genes_ffn_hmm):
        sys.stdout.write('SKIP: %s: HMMs exist: %s, %s\n' % (centroid, genes_faa_hmm, genes_ffn_hmm))
        return
    
    # protein sequences
    prot_seqs = {}
    # FAA (protein sequences)
    pool        = Pool(cores) # create pool for parallel computing
    pool_iter   = itertools.product(faa_files, [c_genes]) # iterable for the pool
    results     = pool.starmap( scan_faa , pool_iter ) # perform parallel computing
    pool.close(); pool.join() # wait until all finished and close the pool
    for records in results:
        assert all( [ r_id not in prot_seqs for r_id in records.keys() ] ) # TEST
        prot_seqs.update( records )
    # FFN (translate nucl. sequences)
    remaining = c_genes.difference( prot_seqs.keys() )
    if len(remaining) > 0:
        sys.stdout.write('\tINFO: for %d genes need to scan ffn files\n' % len(remaining))
        pool            = Pool(cores) # create pool for parallel computing
        pool_iter       = itertools.product(ffn_files, [remaining], [True]) # iterable for the pool NOTE added True for translation 2016.09.28
        results         = pool.starmap( scan_ffn , pool_iter ) # perform parallel computing NOTE had scan_faa until 2016.09.28
        pool.close(); pool.join() # wait until all finished and close the pool
        for records in results:
            assert all( [ r_id not in prot_seqs for r_id in records.keys() ] ) # TEST
            prot_seqs.update( records )
    assert all([ r_id in c_genes for r_id in prot_seqs.keys() ]), ';'.join( list(set(prot_seqs.keys()).difference( c_genes )) ) # TEST NOTE changed to prot_seqs - c_genes since 2016.09.28
    assert all([ r_id in prot_seqs.keys() for r_id in c_genes ]), ';'.join( list(c_genes.difference( prot_seqs.keys() )) ) # TEST NOTE New, since 2016.09.28
    
    ## nucl. sequences
    #nucl_seqs = {}
    #pool        = Pool(cores) # create pool for parallel computing
    #pool_iter   = itertools.product(ffn_files, [c_genes], [False]) # iterable for the pool
    #results     = pool.starmap( scan_ffn , pool_iter ) # perform parallel computing
    #pool.close(); pool.join() # wait until all finished and close the pool
    #for records in results:
        #assert all( [ r_id not in nucl_seqs for r_id in records.keys() ] ) # TEST
        #nucl_seqs.update( records )
    #assert all( [r_id in c_genes for r_id in nucl_seqs.keys()] ), ';'.join( list(c_genes.difference( nucl_seqs.keys() )) ) # TEST
    
    # write FASTA
    with open(genes_faa, "w") as f:
        for c_gene in c_genes:
            check_seq(c_gene,prot_seqs[ c_gene ].seq,'prot')
            SeqIO.write(prot_seqs[ c_gene ],f, "fasta")
    #with open(genes_ffn, "w") as f:
        #for c_gene in c_genes:
            #check_seq(c_gene,nucl_seqs[ c_gene ].seq,'nucl')
            #SeqIO.write(nucl_seqs[ c_gene ],f, "fasta")
    
    # run MUSCLE
    cmd, cmd_stdout, cmd_status = run_muscle(ifile=genes_faa, params=__MUSCLE_PARAMS_PROT__)
    assert cmd_status == 0, '%s: %d: %s' % (cmd, cmd_status, cmd_stdout) # TEST
    #cmd, cmd_stdout, cmd_status = run_muscle(ifile=genes_ffn)
    #assert cmd_status == 0, '%s: %d: %s' % (cmd, cmd_status, cmd_stdout) # TEST
    # build HMM
    cmd, cmd_stdout, cmd_status = run_hmmbuild(ifile=genes_faa_msa, alphabet='--amino')
    assert cmd_status == 0, '%s: %d: %s' % (cmd, cmd_status, cmd_stdout) # TEST
    #cmd, cmd_stdout, cmd_status = run_hmmbuild(ifile=genes_ffn_msa, alphabet='--dna')
    #assert cmd_status == 0, '%s: %d: %s' % (cmd, cmd_status, cmd_stdout) # TEST
    # rm fasta
    os.remove(genes_faa)
Example #16
0
         centroids[centroidID] = set()
         for cl_genes in clustered_genes:
             if cl_genes == "": # this sample has no gene belonging to that cluster
                 continue
             for cl_gene in cl_genes.split('\t'):
                 assert cl_gene not in centroids[centroidID], '%s already found' % cl_gene
                 assert cl_gene not in all_genes, '%s already found' % cl_gene
                 centroids[centroidID].add( cl_gene )
                 all_genes.add( cl_gene )
 if args.verbose:
     sys.stdout.write("%s: Genes: %d\n" % (timestamp(), len(all_genes)))
 
 # NOTE do work: extract protein sequences, create MSA, build HMM
 pool            = MyPool(int(args.cores/args.job_cores)) # create pool for parallel computing
 pool_iter       = itertools.product(centroids.items(), ['%s/tmp' % args.odir], [faa_files], [ffn_files], [args.job_cores]) # iterable for the pool NOTE had no ffn_files until 2016.09.28
 results         = pool.starmap( do_work , pool_iter ) # perform parallel computing
 pool.close(); pool.join() # wait until all finished and close the pool
 
 if args.verbose:
     sys.stdout.write("%s: Created HMM models\n" % timestamp() )
 
 # NOTE Clean up
 # archive *.msa
 if args.verbose:
     sys.stdout.write("%s: Creating MSA archive\n" % timestamp() )
 cmd, cmd_stdout, cmd_status = compress_MSA(sdir='%s/tmp' % args.odir, obname=os.path.join(args.odir,'MSA'))
 if args.verbose:
     sys.stdout.write('\t%s\n' % cmd)
 assert cmd_status == 0, '%s: %d: %s' % (cmd, cmd_status, cmd_stdout) # TEST
 # sync *.hmm
 if args.verbose:
Example #17
0
def main(argv):
    global verbose
    os.chdir(os.path.dirname(sys.argv[0]))
    opts = get_parser().parse_args(argv)
    if opts.verbose:
        verbose = True

    virgl_src_dir = opts.virglrenderer
    virgl_src_dir_temp = None
    if '://' in opts.virglrenderer:
        virgl_src_dir_temp = tempfile.TemporaryDirectory(
            prefix='virglrenderer-src')
        virgl_src_dir = virgl_src_dir_temp.name
        if not download_virgl(opts.virglrenderer, virgl_src_dir,
                              opts.virgl_branch):
            print('failed to clone \'{}\' to \'{}\''.format(
                virgl_src_dir, opts.virgl_branch))
            sys.exit(1)

    clang_args = ['-I', os.path.join(opts.sysroot, 'usr/include')]

    modules = (
        (
            'virglrenderer',
            '(virgl|VIRGL)_.+',
            os.path.join(opts.sysroot, 'usr/include/virgl/virglrenderer.h'),
            clang_args,
            'virglrenderer',
            True,
        ),
        (
            'virgl_protocol',
            '(virgl)|(VIRGL)_.+',
            os.path.join(virgl_src_dir, 'src/virgl_protocol.h'),
            clang_args,
            None,
            False,
        ),
        (
            'p_defines',
            '(pipe)|(PIPE).+',
            os.path.join(virgl_src_dir,
                         'src/gallium/include/pipe/p_defines.h'),
            clang_args,
            None,
            False,
        ),
        (
            'p_format',
            'pipe_format',
            os.path.join(virgl_src_dir, 'src/gallium/include/pipe/p_format.h'),
            clang_args,
            None,
            False,
        ),
    )

    pool = multiprocessing.pool.Pool(len(modules))
    results = pool.starmap(generate_module, modules, 1)

    return_fail = False
    print('---')
    print('generate module summary:')
    for module, result in zip(modules, results):
        result_color = FAIL_COLOR
        if result == 'pass':
            result_color = PASS_COLOR
        else:
            return_fail = True

        print('%15s: %s%s%s' % (module[0], result_color, result, END_COLOR))

    if return_fail:
        sys.exit(1)

    with open('mod.rs', 'w') as f:
        print('/* generated by generate.py */', file=f)
        print('#![allow(dead_code)]', file=f)
        print('#![allow(non_camel_case_types)]', file=f)
        print('#![allow(non_snake_case)]', file=f)
        print('#![allow(non_upper_case_globals)]', file=f)
        for module in modules:
            print('pub mod', module[0] + ';', file=f)
Example #18
0
 def infer_filetype_via_coverage_for_parameter_parallel(
         self, parameter: str, probe: bool = True) -> (str, int, bool):
     """
     This function tries to infer the filetype of an executable via coverage. 
     The assumption is that the file that yields the most coverage is of the right filetype
     :parameter parameter The parameter that tells the binary to work with this file.
     :parameter probe probe Using "probing" when calculating coverage distribution
     :return: The filetype as str
     """
     self.failed_invocations = 0  # We want to reset the failed invocations for each parameter
     is_network_param = True
     if "@@" in parameter:
         is_network_param = False
     if self.try_invocation(parameter, stdin=True, without_desock=True):
         is_network_param = False
     PROBE = probe
     max_cov = 0
     max_file = None
     cov_list = []
     file_list = []
     result_dict = {}
     cmin_argument_list = []  # A list which contains the argument
     max_coverage_per_filetype = {}
     if PROBE:
         probed_filetypes, min_cov_value, max_coverage_per_filetype = self.probe_possible_filetypes_for_parameter(
             parameter=parameter)
         logger.debug("Probed filetypes: %s", probed_filetypes)
         for filetype, cov in max_coverage_per_filetype.items():
             result_dict["." + filetype.split("_")[0]] = cov
             file_list.append(filetype.split("_")[0])
             cov_list.append(cov)
             max_coverage_per_filetype[filetype] = cov
         if not probed_filetypes:  # They all yielded the same coverage :(
             return None
             # self.coverage_lists[parameter] = zip(["garbage"],[min_cov_value])
             # return self.seeds_path+"/garbage_samples",min_cov_value
         if len(probed_filetypes) == 1:  # We can already be sure
             max_file, max_cov = (
                 self.seeds_path + "/" + probed_filetypes[0],
                 int(max(max_coverage_per_filetype.values())))
             p = "None"
             if parameter:
                 p = parameter
             self.coverage_lists[p] = zip(file_list, cov_list)
             return [max_file], [max_cov], False
     else:
         probed_filetypes = [
             entity for entity in os.listdir(self.seeds_path)
             if os.path.isdir(os.path.join(self.seeds_path, entity))
         ]
         cov_list = [0] * len(probed_filetypes)
         file_list = [None] * len(probed_filetypes)
     for entity in probed_filetypes:
         if not os.path.isdir(self.seeds_path + "/" + entity):
             continue
         if len(os.listdir(self.seeds_path + "/" + entity)) <= 0:
             continue
         if entity == ".git":
             continue
         if (
                 entity == "pcap-network_samples"
                 or entity == "pcap-network"
         ) and (
                 not is_network_param
         ):  # Do not try the network seeds for file handling programs - it simply takes too long
             continue
         cmin_argument_list.append(
             (parameter, self.seeds_path + "/" + entity,
              "." + str(entity.split("_")[0]), result_dict))
     with multiprocessing.pool.ThreadPool(
             processes=self.cores
     ) as pool:  # instead of multiprocessor.cpu_count()
         results = pool.starmap(self.try_filetype_with_coverage,
                                cmin_argument_list)
     for counter, entity in enumerate(probed_filetypes):
         if not os.path.isdir(self.seeds_path + "/" + entity):
             continue
         cov = result_dict.get("." + str(entity.split("_")[0]))
         if cov is None:
             cov = 0
         if cov > 0:
             cov_list[counter] = cov
             file_list[counter] = entity.split("_")[0]
         if cov > max_cov:
             max_cov = cov
             max_file = self.seeds_path + "/" + entity
         # print("Max coverage per filetype", max_coverage_per_filetype)
         # print("Entity",entity)
         max_coverage_per_filetype[entity] = max(
             cov, max_coverage_per_filetype.get(entity, 0))
     p = "None"
     if parameter:
         p = parameter
     self.coverage_lists[p] = zip(file_list, cov_list)
     value_list = list([abs(x) for x in max_coverage_per_filetype.values()])
     std = sp.std(value_list)
     avg_value = sp.mean(value_list)
     logger.debug("Average: %s", avg_value)
     logger.debug("Std. Deviation: %s", std)
     if std > 0:
         possible_filetypes = [
             k for k, v in max_coverage_per_filetype.items()
             if ((v - avg_value) / std) > 2.5
         ]
         if len(possible_filetypes
                ) >= 1:  # 4 ticks away, that is pretty obvious
             return [
                 os.path.join(self.seeds_path, p)
                 for p in possible_filetypes
             ], [
                 max_coverage_per_filetype[filetype]
                 for filetype in possible_filetypes
             ], False
     # No file over >4 ticks from std deviation:
     logger.debug("Max file")
     logger.debug(max_file)
     return [max_file], [max_cov], True
Example #19
0
def download_all(config, path, urllist):
    with multiprocessing.pool.Pool(config.max_processes) as pool:
        pool.starmap(download, ((config, url, os.path.join(path, filename))
                                for (filename, url) in urllist))
Example #20
0
def buildDecisionTree(df, root, file, config, dataset_features, parent_level = 0, leaf_id = 0, parents = 'root', tree_id = 0, validation_df = None, main_process_id = None):
		
	models = []
	
	decision_rules = []
	
	feature_names = df.columns[0:-1]
	
	enableParallelism = config['enableParallelism']
	algorithm = config['algorithm']
	
	json_file = file.split(".")[0]+".json"
	
	random_forest_enabled = config['enableRandomForest']
	enableGBM = config['enableGBM']
	enableAdaboost = config['enableAdaboost']
	
	if root == 1:
		if random_forest_enabled != True and enableGBM != True and enableAdaboost != True:
			raw_df = df.copy()
	
	#--------------------------------------
	
	df_copy = df.copy()
	
	winner_name, num_of_instances, metric, metric_name = findDecision(df, config)
		
	#find winner index, this cannot be returned by find decision because columns dropped in previous steps
	j = 0 
	for i in dataset_features:
		if i == winner_name:
			winner_index = j
		j = j + 1
	
	numericColumn = False
	if dataset_features[winner_name] != 'object':
		numericColumn = True
	
	#restoration
	columns = df.shape[1]
	for i in range(0, columns-1):
		#column_name = df.columns[i]; column_type = df[column_name].dtypes #numeric field already transformed to object. you cannot check it with df itself, you should check df_copy
		column_name = df_copy.columns[i]; column_type = df_copy[column_name].dtypes
		if column_type != 'object' and column_name != winner_name:
			df[column_name] = df_copy[column_name]
	
	classes = df[winner_name].value_counts().keys().tolist()
	#print("classes: ",classes," in ", winner_name)
	#-----------------------------------------------------
	
	num_cores = config["num_cores"]
	
	input_params = []
	
	#serial approach
	for i in range(0,len(classes)):
		current_class = classes[i]
		subdataset = df[df[winner_name] == current_class]
		subdataset = subdataset.drop(columns=[winner_name])
		branch_index = i * 1
		
		#create branches serially
		if enableParallelism != True:
			
			if i == 0:
				
				descriptor = {
					"feature": winner_name,
					"instances": num_of_instances,
					#"metric_name": metric_name,
					"metric_value": round(metric, 4),
					"depth": parent_level + 1
				}
				descriptor = "# "+json.dumps(descriptor)
				
				functions.storeRule(file, (functions.formatRule(root), "", descriptor))
			
			results = createBranch(config, current_class, subdataset, numericColumn, branch_index
				, winner_name, winner_index, root, parents, file, dataset_features, num_of_instances, metric, tree_id = tree_id, main_process_id = main_process_id)
			
			decision_rules = decision_rules + results
			
		else:
			input_params.append((config, current_class, subdataset, numericColumn, branch_index
				, winner_name, winner_index, root, parents, file, dataset_features, num_of_instances, metric, tree_id, main_process_id))
	
	#---------------------------
	#add else condition in the decision tree
	
	if df.Decision.dtypes == 'object': #classification
		pivot = pd.DataFrame(subdataset.Decision.value_counts()).reset_index()
		pivot = pivot.rename(columns = {"Decision": "Instances","index": "Decision"})
		pivot = pivot.sort_values(by = ["Instances"], ascending = False).reset_index()
		
		else_decision = "return '%s'" % (pivot.iloc[0].Decision)
		
		if enableParallelism != True:
			functions.storeRule(file,(functions.formatRule(root), "else:"))
			functions.storeRule(file,(functions.formatRule(root+1), else_decision))
		else: #parallelism
			leaf_id = str(uuid.uuid1())
			
			check_rule = "else: "+else_decision
			
			sample_rule = {}
			sample_rule["current_level"] = root
			sample_rule["leaf_id"] = leaf_id
			sample_rule["parents"] = parents
			sample_rule["rule"] = check_rule
			sample_rule["feature_idx"] = -1
			sample_rule["feature_name"] = ""
			sample_rule["instances"] = df.shape[0]
			sample_rule["metric"] = 0
			sample_rule["return_statement"] = 0
			sample_rule["tree_id"] = tree_id
			
			#json to string
			sample_rule = json.dumps(sample_rule)
			decision_rules.append(sample_rule)
			
	else: #regression
		else_decision = "return %s" % (subdataset.Decision.mean())
				
		if enableParallelism != True:
			functions.storeRule(file,(functions.formatRule(root), "else:"))
			functions.storeRule(file,(functions.formatRule(root+1), else_decision))
		else:
			leaf_id = str(uuid.uuid1())
			
			check_rule = "else: "+else_decision
			
			sample_rule = {}
			sample_rule["current_level"] = root
			sample_rule["leaf_id"] = leaf_id
			sample_rule["parents"] = parents
			sample_rule["rule"] = check_rule
			sample_rule["tree_id"] = tree_id
			sample_rule["feature_name"] = ""
			sample_rule["instances"] = 0
			sample_rule["metric"] = 0
			sample_rule["return_statement"] = 1
			
			#json to string
			sample_rule = json.dumps(sample_rule)
			decision_rules.append(sample_rule)
	
	#---------------------------
		
	try:
		main_process = psutil.Process(main_process_id)
		children = main_process.children(recursive=True)
		active_processes = len(children) + 1 #plus parent
		#active_processes = len(children)
	except:
		active_processes = 100 #set a large initial value
	
	results = []
	#create branches in parallel
	if enableParallelism == True:
		
		if parent_level == 0 and random_forest_enabled != True:
		#if main_process_id != None and num_cores >= active_processes + len(classes): #len(classes) branches will be run in parallel #this causes hang and deadlock
			
			#--------------------------------
			"""
			#causes hang problem if number of input_params is greater than num_cores
			pool = MyPool(num_cores)
			branch_results = pool.starmap(createBranch, input_params)
			
			for branch_result in branch_results:
				for leaf_result in branch_result:
					results.append(leaf_result)
			
			pool.close()
			pool.join()
			pool.terminate()
			
			gc.collect()
			"""
			#--------------------------------
			
			#workaround for hang problem. set num_cores and active threads same.
			#len(classes) == len(input_params)
			#e.g. len(input_params) = 5, num_cores = 2, cycles = 3
			#we will feed 2 items to pool in for loops instead of 5
			
			cycles = int(len(input_params) / num_cores) + 1
			
			for i in range(0, cycles):
				
				filter_begin = i * num_cores
				filter_end = i * num_cores + num_cores
				
				if filter_end > len(input_params):
					filter_end = filter_end
				
				input_frame = input_params[filter_begin: filter_end]
				
				pool = MyPool(num_cores)
				branch_results = pool.starmap(createBranch, input_frame)
				
				pool.close()
				pool.join()
				pool.terminate()
				gc.collect()
				
				for branch_result in branch_results:
					for leaf_result in branch_result:
						results.append(leaf_result)
			
			#--------------------------------
		
		else:
			for input_param in input_params:
				sub_results = createBranchWrapper(createBranch, input_param)
				for sub_result in sub_results:
					results.append(sub_result)
		
		#--------------------------------
		
		decision_rules = decision_rules + results
		
		#--------------------------------
		
		if root != 1: #return children results until the root node
			return decision_rules
	
	#---------------------------------------------
	
	if root == 1:
		
		if enableParallelism == True:
			
			#custom rules are stored in decision_rules. merge them all in a json file first
						
			json_rules = "[\n" #initialize
			
			file_index = 0
			for custom_rule in decision_rules:
				
				json_rules += custom_rule
				
				if file_index < len(decision_rules) - 1:
					json_rules += ", "
				
				json_rules += "\n"
				
				file_index = file_index + 1				
			
			#-----------------------------------
			
			json_rules += "]"
			functions.createFile(json_file, json_rules)
			
			#-----------------------------------
			#reconstruct rules from json to py
			
			reconstructRules(json_file, feature_names)
			
			#-----------------------------------
		
		#is regular decision tree
		if config['enableRandomForest'] != True and config['enableGBM'] != True and config['enableAdaboost'] != True:
		#this is reguler decision tree. find accuracy here.
			
			moduleName = "outputs/rules/rules"
			fp, pathname, description = imp.find_module(moduleName)
			myrules = imp.load_module(moduleName, fp, pathname, description) #rules0
			models.append(myrules)
			
	return models
Example #21
0
    RewardOnline_array = np.append(RewardOnline_array,
                                   givenSeed_training_results[0][3])
    STDOnline_array = np.append(STDOnline_array,
                                givenSeed_training_results[0][4])

    List_TimeOnline.append(Time_array_online)
    List_RewardOnline.append(RewardOnline_array)
    List_STDOnline.append(STDOnline_array)
    List_LikelihoodOnline.append(Likelihood_online_list)
    List_TimeLikelihoodOnline.append(time_likelihood_online_list)

    return List_TimeOnline, List_RewardOnline, List_STDOnline, List_LikelihoodOnline, List_TimeLikelihoodOnline


Nseed = 30
results_online = []
for i in range(len(nTraj)):
    pool = MyPool(Nseed)
    args = [(seed, TrainingSet_Array, Labels_Array, List_TimeBatch, max_epoch,
             nTraj, i) for seed in range(Nseed)]
    partial_results = pool.starmap(train, args)
    pool.close()
    pool.join()

    results_online.append(partial_results)

# %%

with open('Comparison/Online/results_online.npy', 'wb') as f:
    np.save(f, results_online)
    eval_lists = [[] for i in range(eval_record_number)]
    for i, filename in enumerate(eval_filenames):
        eval_lists[i % eval_record_number].append(filename)

    for i, eval_list in enumerate(eval_lists):
        out_path = os.path.join(args.output_folder, "eval-%05d.record" % i)
        tasks.append(("eval-%05d" % i, ignored_labels, ignore_classes,
                      eval_list, out_path))

    return tasks


if __name__ == "__main__":
    t_start = time.time()

    random.seed(42)  # Make sure the shuffle order is the same

    # Make the tasks for the workers to process
    tasks = get_record_writing_tasks()

    eprint("Starting %d record writing tasks" % len(tasks))

    # Actually have the workers generate the records
    pool = multiprocessing.pool.ThreadPool()
    results = pool.starmap(write_record_from_list, tasks)

    print_results(results)

    t_end = time.time()
    eprint("Took %5.2fs to write records" % (t_end - t_start))
Example #23
0
def png2tex(data, card_x, card_y):
    """Run threads for convert png to pygame surface."""
    with multiprocessing.pool.ThreadPool() as pool:
        results = pool.starmap(__thread_png2tex, zip(data, repeat(card_x), repeat(card_y)))
        return {name: tex for name, tex in results}
Example #24
0
def sounds(data, volume):
    """Thread for load wav sound in pygame."""
    with multiprocessing.pool.ThreadPool() as pool:
        results = pool.starmap(__thread_sound, zip(data, repeat(volume)))
        return {name: wav for name, wav in results}
Example #25
0
def starmap_with_kwargs(pool: multiprocessing.pool.Pool, fn: Callable,
                        args_iter: Iterable, kwargs_iter: Iterable, N: int):
    args_for_starmap = zip(repeat(fn), args_iter, kwargs_iter)

    return pool.starmap(apply_args_and_kwargs, args_for_starmap,
                        N // pool._processes)
Example #26
0
    args = [(i, nTraj, TrainingSet_tot, Labels_tot, TimeBatch, seed) for i in range(len(nTraj))]
    givenSeed_training_results = pool.starmap(DifferentTrainingSet, args) 
    
    pool.close()
    pool.join()
    
    for i in range(len(nTraj)):
        Time_array_online = np.append(Time_array_online, givenSeed_training_results[i][0]) 
        Likelihood_online_list.append(givenSeed_training_results[i][1])
        time_likelihood_online_list.append(givenSeed_training_results[i][2])
        RewardOnline_array = np.append(RewardOnline_array, givenSeed_training_results[i][3])
        STDOnline_array = np.append(STDOnline_array, givenSeed_training_results[i][4])
        
    List_TimeOnline.append(Time_array_online)
    List_RewardOnline.append(RewardOnline_array)
    List_STDOnline.append(STDOnline_array)
    List_LikelihoodOnline.append(Likelihood_online_list)
    List_TimeLikelihoodOnline.append(time_likelihood_online_list)
        
    return List_TimeOnline, List_RewardOnline, List_STDOnline, List_LikelihoodOnline, List_TimeLikelihoodOnline

pool = MyPool(10)
args = [(seed, TrainingSet_Array, Labels_Array, List_TimeBatch, nTraj) for seed in range(10)]
results_online = pool.starmap(train, args) 
pool.close()
pool.join()

# %%

with open('Comparison/Online/results_online.npy', 'wb') as f:
    np.save(f, results_online)
Example #27
0
    pool = multiprocessing.pool.ThreadPool(processes=args.num_threads)
elif SYS_TYPE == 'fbsd':
    pool = multiprocessing.pool.ThreadPool(processes=args.num_threads)
else:
    death('Unsupported platform' + SYS_TYPE)

# Perform IPv4 tests
if not args.no_v4:
    ipv4_addresses = [rsi.ipv4 for rsi in ROOT_SERVERS]

    if not args.no_traceroute:
        fancy_output(
            0,
            "\rRunning traceroute with " + str(args.num_threads) + " threads")
        traces = pool.starmap(
            trace_route,
            zip(itertools.repeat(find_binary('traceroute')), ipv4_addresses))
        lengths = []
        for rsi, trace in zip(ROOT_SERVERS, traces):
            dbgLog(
                LOG_DEBUG, "traceroute_" + rsi.name + " len:" +
                str(len(trace)) + " first:" + repr(trace[0]))
            lengths.append(len(trace))
            rsi.traceroute_v4 = trace

        median = str(statistics.median(lengths))
        minimum = str(min(lengths))
        maximum = str(max(lengths))
        fancy_output(
            5, "\rtraceroute hops min:" + minimum + " max:" + maximum +
            " median:" + median)
Example #28
0
def svg2png(svg_cards, svg_start_pos, svg_card_size, defs_dict):
    """Run processes for convert svg to png."""
    svg_cards_string = [etree.tostring(card) for card in svg_cards.values()]
    defs_string = [etree.tostring(obj) for obj in defs_dict.values()]
    with multiprocessing.Pool() as pool:
        return pool.starmap(__thread_svg2png, zip(svg_cards_string, repeat(svg_start_pos), repeat(svg_card_size), repeat(defs_string)))
Example #29
0
def buildDecisionTree(df,
                      root,
                      file,
                      config,
                      dataset_features,
                      parent_level=0,
                      leaf_id=0,
                      parents='root'):

    models = []

    enableParallelism = config['enableParallelism']
    algorithm = config['algorithm']

    json_file = file.split(".")[0] + ".json"

    if root == 1:
        if config['enableRandomForest'] != True and config[
                'enableGBM'] != True and config['enableAdaboost'] != True:
            raw_df = df.copy()

    #--------------------------------------

    df_copy = df.copy()

    winner_name = findDecision(df, config)

    #find winner index, this cannot be returned by find decision because columns dropped in previous steps
    j = 0
    for i in dataset_features:
        if i == winner_name:
            winner_index = j
        j = j + 1

    numericColumn = False
    if dataset_features[winner_name] != 'object':
        numericColumn = True

    #restoration
    columns = df.shape[1]
    for i in range(0, columns - 1):
        column_name = df.columns[i]
        column_type = df[column_name].dtypes
        if column_type != 'object' and column_name != winner_name:
            df[column_name] = df_copy[column_name]

    classes = df[winner_name].value_counts().keys().tolist()

    #-----------------------------------------------------

    #TO-DO: you should specify the number of cores in config
    num_cores = int(multiprocessing.cpu_count() /
                    2)  #allocate half of your total cores

    input_params = []

    #serial approach
    for i in range(0, len(classes)):
        current_class = classes[i]
        subdataset = df[df[winner_name] == current_class]
        subdataset = subdataset.drop(columns=[winner_name])
        branch_index = i * 1

        #create branches serially
        if enableParallelism != True:
            createBranch(config, current_class, subdataset, numericColumn,
                         branch_index, winner_index, root, parents, file,
                         dataset_features)
        else:
            input_params.append((config, current_class, subdataset,
                                 numericColumn, branch_index, winner_index,
                                 root, parents, file, dataset_features))

    #---------------------------
    #add else condition in the decision tree

    if df.Decision.dtypes == 'object':  #classification
        pivot = pd.DataFrame(subdataset.Decision.value_counts()).reset_index()
        pivot = pivot.rename(columns={
            "Decision": "Instances",
            "index": "Decision"
        })
        pivot = pivot.sort_values(by=["Instances"],
                                  ascending=False).reset_index()

        else_decision = "return '%s'" % (pivot.iloc[0].Decision)

        if enableParallelism != True:
            functions.storeRule(file, (functions.formatRule(root), "else:"))
            functions.storeRule(
                file, (functions.formatRule(root + 1), else_decision))
        else:  #parallelism
            leaf_id = str(uuid.uuid1())
            custom_rule_file = "outputs/rules/" + str(leaf_id) + ".txt"

            check_rule = "else: " + else_decision

            sample_rule = "   {\n"
            sample_rule += "      \"current_level\": " + str(root) + ",\n"
            sample_rule += "      \"leaf_id\": \"" + str(leaf_id) + "\",\n"
            sample_rule += "      \"parents\": \"" + parents + "\",\n"
            sample_rule += "      \"rule\": \"" + check_rule + "\"\n"
            sample_rule += "   }"

            functions.createFile(custom_rule_file, "")
            functions.storeRule(custom_rule_file, sample_rule)

    else:  #regression
        else_decision = "return %s" % (subdataset.Decision.mean())

        if enableParallelism != True:
            functions.storeRule(file, (functions.formatRule(root), "else:"))
            functions.storeRule(
                file, (functions.formatRule(root + 1), else_decision))
        else:
            leaf_id = str(uuid.uuid1())
            custom_rule_file = "outputs/rules/" + str(leaf_id) + ".txt"

            check_rule = "else: " + else_decision

            sample_rule = "   {\n"
            sample_rule += "      \"current_level\": " + str(root) + ",\n"
            sample_rule += "      \"leaf_id\": \"" + str(leaf_id) + "\",\n"
            sample_rule += "      \"parents\": \"" + parents + "\",\n"
            sample_rule += "      \"rule\": \"" + check_rule + "\"\n"
            sample_rule += "   }"

            functions.createFile(custom_rule_file, "")
            functions.storeRule(custom_rule_file, sample_rule)

    #---------------------------

    #create branches in parallel
    if enableParallelism == True:
        """
		#this usage causes trouble for recursive functions
		with Pool(number_of_cpus) as pool:
			pool.starmap(createBranch, input_params)
		"""

        pool = MyPool(num_cores)
        results = pool.starmap(createBranch, input_params)
        pool.close()
        pool.join()

    #---------------------------------------------

    #calculate accuracy metrics
    if root == 1:

        if enableParallelism == True:

            #custom rules are stored in .txt files. merge them all in a json file

            functions.createFile(json_file, "[\n")

            custom_rules = []

            file_index = 0
            for file in os.listdir(os.getcwd() + "/outputs/rules"):
                if file.endswith(".txt"):
                    custom_rules.append(os.getcwd() + "/outputs/rules/" + file)
                    #print(file) #this file stores a custom rule
                    f = open(os.getcwd() + "/outputs/rules/" + file, "r")
                    custom_rule = f.read()

                    if file_index > 0:
                        custom_rule = ", " + custom_rule

                    functions.storeRule(json_file, custom_rule)
                    f.close()
                    file_index = file_index + 1

            functions.storeRule(json_file, "]")

            #-----------------------------------

            #custom rules are already merged in a json file. clear messy custom rules
            #TO-DO: if random forest trees are handled in parallel, this would be a problem. You cannot know the related tree of a rule. You should store a global tree id in a rule.

            for file in custom_rules:
                os.remove(file)

            #-----------------------------------

            reconstructRules(json_file)

            #-----------------------------------

        if config['enableRandomForest'] != True and config[
                'enableGBM'] != True and config['enableAdaboost'] != True:
            #this is reguler decision tree. find accuracy here.

            moduleName = "outputs/rules/rules"
            fp, pathname, description = imp.find_module(moduleName)
            myrules = imp.load_module(moduleName, fp, pathname,
                                      description)  #rules0
            models.append(myrules)

            num_of_features = df.shape[1] - 1
            instances = df.shape[0]
            classified = 0
            mae = 0
            mse = 0

            #instead of for loops, pandas functions perform well
            raw_df['Prediction'] = raw_df.apply(findPrediction, axis=1)
            if algorithm != 'Regression':
                idx = raw_df[raw_df['Prediction'] == raw_df['Decision']].index

                #raw_df['Classified'] = 0
                #raw_df.loc[idx, 'Classified'] = 1
                #print(raw_df)

                accuracy = 100 * len(idx) / instances
                print("Accuracy: ", accuracy, "% on ", instances, " instances")
            else:
                raw_df['Absolute_Error'] = abs(raw_df['Prediction'] -
                                               raw_df['Decision'])
                raw_df['Absolute_Error_Squared'] = raw_df[
                    'Absolute_Error'] * raw_df['Absolute_Error']

                #print(raw_df)

                mae = raw_df['Absolute_Error'].sum() / instances
                print("MAE: ", mae)

                mse = raw_df['Absolute_Error_Squared'].sum() / instances
                rmse = math.sqrt(mse)
                print("RMSE: ", rmse)

                mean = raw_df['Decision'].mean()
                print("Mean: ", mean)

                if mean > 0:
                    print("MAE / Mean: ", 100 * mae / mean, "%")
                    print("RMSE / Mean: ", 100 * rmse / mean, "%")

    return models
Example #30
0
def main(args):
    if args.moul_scripts and not args.moul_scripts.exists():
        logging.error(f"Scripts path '{args.moul_scripts}' does not exist.")
        return False

    if args.age:
        age_info = load_age(make_asset_path("data", f"{args.age}.age", client_path=args.source))
        if age_info is None:
            return False
        age_infos = (age_info,)
    elif not args.no_ages:
        logging.info("Loading age files...")
        age_source_path = make_asset_path("data", client_path=args.source)
        age_infos = [load_age(age_file_path) for age_file_path in age_source_path.glob("*.age")]
        if not age_infos:
            logging.warning("No age files found in client!")
            return True
        elif not all(age_infos):
            return False
    else:
        age_infos = []

    # Collect a list of all age pages to be abused for the purpose of finding its resources
    # Would be nice if this were a common function of libHSPlasma...
    all_outputs = {}
    all_pages = [i for i in find_all_pages(all_outputs, make_asset_path("data", client_path=args.source), *age_infos)]
    logging.info(f"Found {len(all_pages)} Plasma pages.")

    # We want to get the age dependency data. Presently, those are the python and ogg files.
    # Unfortunately, libHSPlasma insists on reading in the entire page before allowing us to
    # do any of that. So, we will execute this part in a process pool.
    pool =  multiprocessing.pool.Pool(initializer=_utils.multiprocess_init)
    try:
        dlevel = plDebug.kDLWarning if args.verbose else plDebug.kDLNone
        results = pool.starmap(find_page_externals, ((page_path, dlevel) for age_name, page_path in all_pages))
    except:
        pool.terminate()
        pool.join()
        raise

    # What we have now is a list of dicts, each nearly obeying the output format spec.
    # Now, we have to merge them... ugh.
    logging.info(f"Merging results from {len(results)} dependency lists...")
    coerce_asset_dicts(all_outputs, all_pages, results)

    # PythonFileMods can import other python modules and be a STATEDESC
    if not args.no_pfm_dependencies:
        py_exe = args.python if args.python else _utils.find_python_exe()
        if not py_exe:
            logging.critical("Uru-compatible python interpreter unavailable.")
            return False
        logging.info("Searching for PythonFileMod dependencies...")
        find_pfm_externals(all_outputs, py_exe, args.no_pfm_py_dependencies, args.no_pfm_sdl_dependencies,
                           make_asset_path("python", client_path=args.source, scripts_path=args.moul_scripts),
                           make_asset_path("sdl", client_path=args.source, scripts_path=args.moul_scripts))

    # Gather client exes, DLLs, and installers.
    if not args.no_client:
        logging.info("Searching for client files...")
        find_client_dependencies(all_outputs, args.source, args.moul_scripts, args.client_arch)

    # OK, now everything is (mostly) sane.
    logging.info("Beginning final pass over assets...")
    prepare_packages(all_outputs, args.source, args.moul_scripts, dataset=args.dataset, distribute=args.distribute)

    # Time to produce the bundle
    logging.info("Producing final asset bundle...")
    output_packages(all_outputs, args.source, args.moul_scripts, args.destination)

    return True
Example #31
0
    for i in range(len(nTraj)):
        Time_array_batch = np.append(Time_array_batch,
                                     givenSeed_training_results[i][0])
        Likelihood_batch_list.append(givenSeed_training_results[i][1])
        time_likelihood_batch_list.append(givenSeed_training_results[i][2])
        RewardBatch_array = np.append(RewardBatch_array,
                                      givenSeed_training_results[i][3])
        STDBatch_array = np.append(STDBatch_array,
                                   givenSeed_training_results[i][4])

    List_TimeBatch.append(Time_array_batch)
    List_RewardBatch.append(RewardBatch_array)
    List_STDBatch.append(STDBatch_array)
    List_LikelihoodBatch.append(Likelihood_batch_list)
    List_TimeLikelihoodBatch.append(time_likelihood_batch_list)

    return List_TimeBatch, List_RewardBatch, List_STDBatch, List_LikelihoodBatch, List_TimeLikelihoodBatch


pool = MyPool(10)
args = [(seed, TrainingSet_Array, Labels_Array, max_epoch, nTraj)
        for seed in range(10)]
results_batch = pool.starmap(train, args)
pool.close()
pool.join()

# %%

with open('Comparison/Batch/results_batch.npy', 'wb') as f:
    np.save(f, results_batch)
Example #32
0
            with open(output_loc, 'wb') as f:
                f.write(x)
                # print("Wrote it")
        except Exception:
            print("Failed " + trackId)

    return None


#
# Code for downloading from the catalog tsv including track IDs.
#
catalog_file = './data/catalog_out.tsv'
trackIds = []
with open(catalog_file, 'r') as f:
    reader = csv.reader(f, delimiter='\t')
    next(reader)
    for row in reader:
        trackIds += [(row[4], )]
        if type(trackIds[-1][0]) is not str:
            print("WAMP WOW")

#
# Do it...
#
for tid in trackIds:
    download_sample(tid[0])
pool = pool.Pool(8)
pool.starmap(download_sample, trackIds)
# download_sample('166297')
Example #33
0
 directory_p = args.directory
 #If you want to do by hand
 #directory_p = r'C:\Users\Gianl\Desktop\Call_with_Chiara'
 pcap_app = []
 for r, d, f in os.walk(directory_p):
     for file in f:
         if ('.pcap' in file or '.pcapng' in file):
             pcap_app.append(os.path.join(r, file))
 #print("Pcap found: {}\n".format(pcap_app))
 #For each .pcap in the folders, do the process
 manager = multiprocessing.Manager()
 result_list = manager.list()
 #Cerco le porte
 pool= multiprocessing.Pool(processes = n_process) #Limito il numero di processi ai core della cpu -1
 pool_tuple = [(x, result_list) for x in pcap_app]
 pool.starmap(main2, pool_tuple)
 pool.close()
 pool.join()
 #logging.info("Finish Process main2\n")
 list_app_port = []
 list_app_port = [j for i in result_list for j in i]
 port_used = set(list(map(int, list_app_port)))
 print (port_used)
 result_list[:] = []
 #print(result_list)
 #Decodifico su porta e creo .csv
 pool= multiprocessing.Pool(processes = n_process) #Limito il numero di processi ai core della cpu -1
 pool_tuple = [(x, port_used, args.screen, args.quality, args.plot) for x in pcap_app]  #result_list, args.plot
 pool.map(pcap_to_json, pool_tuple)
 pool.close()
 pool.join()
Example #34
0
def buildDecisionTree(df, root, file, config, dataset_features, parent_level = 0, leaf_id = 0, parents = 'root', validation_df = None):
	
	models = []
	feature_names = df.columns[0:-1]
	
	enableParallelism = config['enableParallelism']
	algorithm = config['algorithm']
	
	json_file = file.split(".")[0]+".json"
	
	if root == 1:
		if config['enableRandomForest'] != True and config['enableGBM'] != True and config['enableAdaboost'] != True:
			raw_df = df.copy()
	
	#--------------------------------------
	
	df_copy = df.copy()
	
	winner_name, num_of_instances, metric, metric_name = findDecision(df, config)
	
	#find winner index, this cannot be returned by find decision because columns dropped in previous steps
	j = 0 
	for i in dataset_features:
		if i == winner_name:
			winner_index = j
		j = j + 1
	
	numericColumn = False
	if dataset_features[winner_name] != 'object':
		numericColumn = True
	
	#restoration
	columns = df.shape[1]
	for i in range(0, columns-1):
		column_name = df.columns[i]; column_type = df[column_name].dtypes
		if column_type != 'object' and column_name != winner_name:
			df[column_name] = df_copy[column_name]
	
	classes = df[winner_name].value_counts().keys().tolist()
		
	#-----------------------------------------------------
	
	num_cores = config["num_cores"]
	
	input_params = []
	
	#serial approach
	for i in range(0,len(classes)):
		current_class = classes[i]
		subdataset = df[df[winner_name] == current_class]
		subdataset = subdataset.drop(columns=[winner_name])
		branch_index = i * 1
		
		#create branches serially
		if enableParallelism != True:
			
			if i == 0:
				#descriptor = "# Feature: "+winner_name+", Instances: "+str(num_of_instances)+", "+metric_name+": "+str(round(metric, 4))
				
				descriptor = {
					"feature": winner_name,
					"instances": num_of_instances,
					#"metric_name": metric_name,
					"metric_value": round(metric, 4),
					"depth": parent_level + 1
				}
				descriptor = "# "+json.dumps(descriptor)
				
				functions.storeRule(file, (functions.formatRule(root), "", descriptor))
			
			createBranch(config, current_class, subdataset, numericColumn, branch_index
				, winner_name, winner_index, root, parents, file, dataset_features, num_of_instances, metric)
		else:
			input_params.append((config, current_class, subdataset, numericColumn, branch_index
				, winner_name, winner_index, root, parents, file, dataset_features, num_of_instances, metric))
	
	#---------------------------
	#add else condition in the decision tree
	
	if df.Decision.dtypes == 'object': #classification
		pivot = pd.DataFrame(subdataset.Decision.value_counts()).reset_index()
		pivot = pivot.rename(columns = {"Decision": "Instances","index": "Decision"})
		pivot = pivot.sort_values(by = ["Instances"], ascending = False).reset_index()
		
		else_decision = "return '%s'" % (pivot.iloc[0].Decision)
		
		if enableParallelism != True:
			functions.storeRule(file,(functions.formatRule(root), "else:"))
			functions.storeRule(file,(functions.formatRule(root+1), else_decision))
		else: #parallelism
			leaf_id = str(uuid.uuid1())
			custom_rule_file = "outputs/rules/"+str(leaf_id)+".txt"
			
			check_rule = "else: "+else_decision
			
			sample_rule = {}
			sample_rule["current_level"] = root
			sample_rule["leaf_id"] = leaf_id
			sample_rule["parents"] = parents
			sample_rule["rule"] = check_rule
			sample_rule["feature_idx"] = -1
			sample_rule["feature_name"] = ""
			sample_rule["instances"] = df.shape[0]
			sample_rule["metric"] = 0
			sample_rule["return_statement"] = 0
			
			#json to string
			sample_rule = json.dumps(sample_rule)
			
			functions.createFile(custom_rule_file, "")
			functions.storeRule(custom_rule_file, sample_rule)
			
	else: #regression
		else_decision = "return %s" % (subdataset.Decision.mean())
				
		if enableParallelism != True:
			functions.storeRule(file,(functions.formatRule(root), "else:"))
			functions.storeRule(file,(functions.formatRule(root+1), else_decision))
		else:
			leaf_id = str(uuid.uuid1())
			custom_rule_file = "outputs/rules/"+str(leaf_id)+".txt"
			
			check_rule = "else: "+else_decision
			
			sample_rule = "   {\n"
			sample_rule += "      \"current_level\": "+str(root)+",\n"
			sample_rule += "      \"leaf_id\": \""+str(leaf_id)+"\",\n"
			sample_rule += "      \"parents\": \""+parents+"\",\n"
			sample_rule += "      \"rule\": \""+check_rule+"\"\n"
			sample_rule += "   }"
			
			functions.createFile(custom_rule_file, "")
			functions.storeRule(custom_rule_file, sample_rule)
	
	#---------------------------
	
	#create branches in parallel
	if enableParallelism == True:
		"""
		#this usage causes trouble for recursive functions
		with Pool(number_of_cpus) as pool:
			pool.starmap(createBranch, input_params)
		"""
		
		pool = MyPool(num_cores)
		results = pool.starmap(createBranch, input_params)
		pool.close()
		pool.join()
	
	#---------------------------------------------
	
	if root == 1:
		
		if enableParallelism == True:

			#custom rules are stored in .txt files. merge them all in a json file
			
			functions.createFile(json_file, "[\n")
			
			custom_rules = []
			
			file_index = 0
			for file in os.listdir(os.getcwd()+"/outputs/rules"):
				if file.endswith(".txt"):
					custom_rules.append(os.getcwd()+"/outputs/rules/"+file)
					#print(file) #this file stores a custom rule
					f = open(os.getcwd()+"/outputs/rules/"+file, "r")
					custom_rule = f.read()
					
					if file_index > 0:
						custom_rule = ", "+custom_rule
					
					functions.storeRule(json_file, custom_rule)
					f.close()
					file_index = file_index + 1
					
			functions.storeRule(json_file, "]")
			
			#-----------------------------------
			
			#custom rules are already merged in a json file. clear messy custom rules
			#TO-DO: if random forest trees are handled in parallel, this would be a problem. You cannot know the related tree of a rule. You should store a global tree id in a rule.
			
			for file in custom_rules:
				os.remove(file)
			
			#-----------------------------------
			
			reconstructRules(json_file, feature_names)

			#feature importance should be calculated by demand?
			feature_importance(json_file, dataset_features)
			
			#-----------------------------------
		
		#is regular decision tree
		if config['enableRandomForest'] != True and config['enableGBM'] != True and config['enableAdaboost'] != True:
		#this is reguler decision tree. find accuracy here.
			
			moduleName = "outputs/rules/rules"
			fp, pathname, description = imp.find_module(moduleName)
			myrules = imp.load_module(moduleName, fp, pathname, description) #rules0
			models.append(myrules)
			
	return models
    dishes = read_JSON('./Resources/dish_freq.json')

    os_type = platform.system()

    similarity = Similarity()

    freq_dist = json.loads(open('./Resources/dish_freq.json').read())

    start = int(input("Enter starting index (max= 33302 , min = 0) :"))
    end = int(input("Enter End (max= 33302 , min = 0) :"))
    dishes_to_process = dishes[start:end]

    out_file = open('Search_tags.json', 'a+', encoding='utf-8')

    with multiprocessing.Pool(processes=cpu_count) as pool:
        parent = psutil.Process()
        if os_type == 'Windows': parent.nice(psutil.REALTIME_PRIORITY_CLASS)
        if os_type == 'Linux': parent.nice(-20)
        for child in parent.children():
            if os_type == 'Windows': child.nice(psutil.REALTIME_PRIORITY_CLASS)
            if os_type == 'Linux': child.nice(-20)
        results = pool.starmap(similarity.create_test_cases,
                               [dishes_to_process])
        for result in results:
            print(result)
            json.dump(result, out_file)
    print('\n Done!')
    end = time.time()
    print('Time taken:', end - start)

# Time taken: 1548227540.7543015 prev ver