def process_all(args):
    multiprocessing.Pool(32).map(functools.partial(process_file, args=args),
                                 args.files)
Esempio n. 2
0
            Info_Train['cvm']=cvm
            
            Info = (Exp_temp >> select(Exp_temp[['CHROM','GeneStart','GeneEnd',
                                                 'GeneName','TargetID']])).merge(Info_Train,
                                                                                 left_on='TargetID',
                                                                                 right_on='TargetID',
                                                                                 how='outer')
            Info.to_csv(args.out_prefix+'/CHR'+str(args.chr_num)+'_elastic_net_training_info.txt',
                        header=None,index=None,sep='\t',mode='a')

###############################################################################################################
### Start thread
if (args.thread < int(len(EXP)/100) | args.thread > len(EXP)):
    args.thread = (int(len(EXP)/100)+1)*100

pool = multiprocessing.Pool(args.thread)

pool.map(thread_process,[num for num in range(len(EXP))])

pool.close()
pool.join()

####################################################################################################################
### time calculation
time=round((time.clock()-start_time)/60,2)

print(str(time)+' minutes')



Esempio n. 3
0
            line = linet.replace("|"," ")
            line = re.sub(token, " ellipsis ", line)
            f.write(line + '\t' + str(namec) + "," + str(named) +"," + str(lc) +'\n')
            submission_id.append(array[0])
            # f.write(line + '\t' + str(namec) + "," + str(named) +"," + str(lc) +'\n')   # uncomment this to debug with the 3.5M data.
            # submission_id.append(array[0])
            # f.write(line + '\t' + str(namec) + "," + str(named) +"," + str(lc) +'\n')
            # submission_id.append(array[0])
            # f.write(line + '\t' + str(namec) + "," + str(named) +"," + str(lc) +'\n')
            # submission_id.append(array[0])
            # f.write(line + '\t' + str(namec) + "," + str(named) +"," + str(lc) +'\n')
            # submission_id.append(array[0])
        f.close()
        return test_p,submission_id

p = multiprocessing.Pool(2)
preds = p.imap(pre, [0,1])
for i, pred in enumerate(preds):
    if i == 0:
        #print(pred)
        train_p = pred
    elif i==1:
        #print(pred)
        (test_p,submission_id) = pred
p.close()
p.join()
del(p)

def fill(x):
    if len(x)==0:
        return "missing"
    }
    cols_types = {'all': all_feat_cols, 'reduced': reduced_feats}

    #%%
    random_state = 777
    n_folds = 6

    #dum variables keep for compatibility
    metric2exclude = 'loss'
    n_feats2remove = 'log2'

    fold_param = (cuda_id, train_args, metric2exclude, n_feats2remove)
    all_data_in = fold_generator(df_filt, cols_types, n_folds, test_size,
                                 fold_param)
    #%%
    p = mp.Pool(pool_size)
    results = p.map(get_softmax_clf, all_data_in)
    #%%
    with open(save_name, "wb") as fid:
        pickle.dump((strain_dicts, results), fid)

    #%%
    res_db = {}
    for (set_type, i_fold), dat in results:
        if set_type not in res_db:
            res_db[set_type] = []
        res_db[set_type].append(dat)

    for set_type, dat in res_db.items():
        res_db[set_type] = list(zip(*dat))
Esempio n. 5
0
    except:
            print(BOLD+Red+"[-] Something Went Wrong!",RESET)


def splitfile():
    splitLen = totallines/splt
    outputBase = 'part' 
    input = open(args.wordlist, 'r',  encoding="ISO-8859-1").read().split('\n')
    at = 1
    global names
    names=[]
    for lines in range(0, len(input), int(splitLen)):
        outputData = input[lines:lines+int(splitLen)]
        output = open(outputBase + str(at) + '.txt', 'w',  encoding="ISO-8859-1")
        namer=(outputBase + str(at) + '.txt')
        names.append(namer)      
        output.write('\n'.join(outputData))
        output.close()
        at += 1


if __name__ == "__main__":

    splitfile()
    pool = mp.Pool(mp.cpu_count())
    results = pool.map(dbrute, [wordlst for wordlst in names])
    pool.close() 

 

Esempio n. 6
0
args = parser.parse_args()

NPAR = int(args.nproc)
simMode = args.simulate
hetMode = args.het
homMode = args.hom
maxdata = int(args.max) if args.max is not None else None
regions = readIntervalFile(args.varfile, simMode, hetMode, homMode)
fname = args.bam
subsampling_fraction = float(args.subsample)

if maxdata is not None:  #Limit on how many results to return as opposed to using the whole VCF file in --het and --hom when you only need a few thousand training examples
    i = 0
    if NPAR > 1:
        import multiprocessing as mp
        pool = mp.Pool(processes=NPAR)
        for result in pool.imap_unordered(processRegion, regions):
            if result is not None:
                print("\t".join([str(x) for x in result]))
                i += 1
                if i > maxdata: sys.exit()
    else:
        for r in regions:
            result = processRegion(r)
            if result is not None:
                print("\t".join([str(x) for x in result]))
                i += 1
                if i > maxdata: sys.exit()
else:
    if NPAR > 1:
        import multiprocessing as mp
Esempio n. 7
0
    ot = UTCDateTime(ot)
    data_dict = get_data_dict(ot, args.data_dir)
    event_name = dtime2str(ot)
    event_dir = os.path.join(temp_root, event_name)
    if not os.path.exists(event_dir): os.makedirs(event_dir)

    # cut event
    print('cutting {}'.format(event_name))
    for net_sta, [tp, ts] in pick_dict.items():
        chn_codes = chn_dict[net_sta.split('.')[0]]
        b = tp - UTCDateTime(ot.date) - t_blank
        data_paths = data_dict[net_sta]
        out_paths = [os.path.join(event_dir,'%s.%s'%(net_sta,chn)) for chn in chn_codes]
        # cut event
        sac.cut(data_paths[0], b, b+win_len, out_paths[0])
        sac.cut(data_paths[1], b, b+win_len, out_paths[1])
        sac.cut(data_paths[2], b, b+win_len, out_paths[2])
        # write header
        t0 = t_blank
        t1 = ts -tp + t_blank
        sac.ch_event(out_paths[0], lon, lat, dep, mag, [t0,t1])
        sac.ch_event(out_paths[1], lon, lat, dep, mag, [t0,t1])
        sac.ch_event(out_paths[2], lon, lat, dep, mag, [t0,t1])

# cut all events data
pool = mp.Pool(num_workers)
pool.map_async(cut_event, range(len(pha_list)))
pool.close()
pool.join()

                #if j in [76]: 
                #    image2data = Marker(image2data,[y2,x2],20)
                j += 1
            #image1data = Marker(image1data,[y1,x1],8)
            image2data = Marker(image2data,[bestMatch[1][1],bestMatch[1][0]],20)
            p2 = (pixelsToRadians(centerposition[0]-bestMatch[1][0]),pixelsToRadians(1447-bestMatch[1][1])) 
            distance = AnglesToDistance((theta1,phi1),p2)
            p1 = 1447-radiansToPixels(phi1)
            #p2 = centerposition[0] - radiansToPixels(theta1)
            p2 = 2896 - radiansToPixels(theta1)
            q.put((p1,p2,distance))
            (centerposition,image1data,image2data) = shiftImages(centerposition,image1data,image2data,-leftShift)

    statusBar = 0
    columns = list(range(0,5793,stepSize))
    pool = mp.Pool(processes=4)
    m = mp.Manager()
    q = m.Queue()
    for column in columns:
        print(column)
        arguments = (image1data,image2data,centerposition,q,stepSize,sampleSize,statusBar,1)
        a = pool.apply_async(ComputeColumn, args=(arguments,))
        print(a)
        print("hello")
        input()
        #ComputeColumn(image1data,image2data,centerposition,q,stepSize,sampleSize,statusBar,column)
        #processes.append(process)
        #process.start()
    #for process in processes:
    #    process.start()
    pool.close()
Esempio n. 9
0
    def read_all_metricdb_files(self):
        """Read all the metric-db files and create a dataframe with num_nodes X
        num_metricdb_files rows and num_metrics columns. Three additional columns
        store the node id, MPI process rank, and thread id (if applicable).
        """
        metricdb_files = glob.glob(self.dir_name + "/*.metric-db")
        metricdb_files.sort()

        # All the metric data per node and per process is read into the metrics
        # array below. The three additional columns are for storing the implicit
        # node id (nid), MPI process rank, and thread id (if applicable).
        shape = [self.num_nodes * self.num_metricdb_files, self.num_metrics + 3]
        size = int(np.prod(shape))

        # shared memory buffer for multiprocessing
        shared_buffer = mp.sharedctypes.RawArray("d", size)

        pool = mp.Pool(initializer=init_shared_array, initargs=(shared_buffer,))
        self.metrics = np.frombuffer(shared_buffer).reshape(shape)
        args = [
            (
                filename,
                self.num_nodes,
                self.num_threads_per_rank,
                self.num_metrics,
                shape,
            )
            for filename in metricdb_files
        ]
        try:
            pool.map(read_metricdb_file, args)
        finally:
            pool.close()

        # once all files have been read, create a dataframe of metrics
        metric_names = [
            self.metric_names[key] for key in sorted(self.metric_names.keys())
        ]
        for idx, name in enumerate(metric_names):
            if name == "CPUTIME (usec) (E)" or name == "CPUTIME (sec) (E)":
                metric_names[idx] = "time"
            if name == "CPUTIME (usec) (I)" or name == "CPUTIME (sec) (I)":
                metric_names[idx] = "time (inc)"

        self.metric_columns = metric_names
        df_columns = self.metric_columns + ["nid", "rank", "thread"]
        self.df_metrics = pd.DataFrame(self.metrics, columns=df_columns)
        self.df_metrics["nid"] = self.df_metrics["nid"].astype(int, copy=False)
        self.df_metrics["rank"] = self.df_metrics["rank"].astype(int, copy=False)
        self.df_metrics["thread"] = self.df_metrics["thread"].astype(int, copy=False)

        # if number of threads per rank is 1, we do not need to keep the thread ID column
        if self.num_threads_per_rank == 1:
            del self.df_metrics["thread"]

        # used to speedup parse_xml_node
        self.np_metrics = self.df_metrics[self.metric_columns].values

        # getting the number of execution threads for our stride in
        # subtract_exclusive_metric_vals/ num nodes is already calculated
        self.total_execution_threads = self.num_threads_per_rank * self.num_ranks
Esempio n. 10
0
def main(args):
    freqs = range(0, 380, args.delta)
    argsfreqs = [(args, freq) for freq in freqs]

    calname = args.calskymod[:5]
    tarname = args.tarskymod[:5]

    if not args.shortercut:
        pool = mp.Pool(int(
            args.numthreads))  # number of concurrent frequencies
        try:
            if int(args.numthreads) == 1:
                print "Using single-threaded version"
                map(threadmain, argsfreqs)
            else:
                print "Using multithreaded version"
                pool.map(threadmain, argsfreqs)
        except Exception as e:
            print e
        oscsystem('cat %s_prical_stats_*.txt > %s_prical_stats.txt' %
                  (calname, calname))
        oscsystem('cat %s_prical_stats_*.txt > %s_prical_stats.txt' %
                  (tarname, tarname))
        oscsystem('cat %s_seccal_stats_*.txt > %s_seccal_stats.txt' %
                  (tarname, tarname))

    caldata = loadtxt(calname + '_prical_stats.txt')
    pritardata = loadtxt(tarname + '_prical_stats.txt')
    sectardata = loadtxt(tarname + '_seccal_stats.txt')

    xax = arange(110., 190.1, 1.)

    fig, (ax0, ax1, ax2) = plt.subplots(1, 3, figsize=(24, 6))

    makeplot(ax0, 'Primary calibrator (%s), direct calibration' % calname, 40.,
             210.)
    ax0.plot(xax, sourcespec(calname, xax), 'k--')
    ax0.errorbar(caldata[:, 0] / 1.e6,
                 caldata[:, 1],
                 yerr=caldata[:, 2],
                 marker='o',
                 linestyle='none')

    secymin = float(args.yaxislim.split(',')[0])
    secymax = float(args.yaxislim.split(',')[1])

    makeplot(ax1, 'Secondary calibrator (%s), direct calibration' % tarname,
             secymin, secymax)
    ax1.plot(xax, sourcespec(tarname, xax), 'k--')
    ax1.errorbar(pritardata[:, 0] / 1.e6,
                 pritardata[:, 1],
                 yerr=pritardata[:, 2],
                 marker='o',
                 linestyle='none')

    makeplot(ax2,
             'Secondary calibrator (%s), transferred calibration' % tarname,
             secymin, secymax)
    ax2.plot(xax, sourcespec(tarname, xax), 'k--')
    ax2.errorbar(sectardata[:, 0] / 1.e6,
                 sectardata[:, 1],
                 yerr=sectardata[:, 2],
                 marker='o',
                 linestyle='none',
                 label='old model')
    #ax2.legend()

    savefig(args.plot, bbox_inches='tight')
    if args.onscreen: show()
def run( args ):
    # Do some additional argument checking
    if not args.weights_file and not args.permutation_directory:
        sys.stderr.write('You must set the weights file or permutation directory, '\
                         'otherwise nothing will be output.')
        sys.exit(1)

    # Load mutation data
    if args.verbose > 0:
        print '* Loading mutation data...'

    mutation_data = load_mutation_data( args.mutation_file )
    genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators = mutation_data

    geneToObserved = dict( (g, len(cases)) for g, cases in geneToCases.iteritems() )
    patientToObserved = dict( (p, len(muts)) for p, muts in patientToMutations.iteritems() )
    geneToIndex = dict( (g, i+1) for i, g in enumerate(all_genes) )
    indexToGene = dict( (i+1, g) for i, g in enumerate(all_genes) )
    patientToIndex = dict( (p, j+1) for j, p in enumerate(patients) )
    indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) )

    edges = set()
    for gene, cases in geneToCases.iteritems():
        for patient in cases:
            edges.add( (geneToIndex[gene], patientToIndex[patient]) )

    edge_list = np.array(sorted(edges), dtype=np.int)

    # Run the bipartite edge swaps
    if args.verbose > 0:
        print '* Permuting matrices...'

    m = len(all_genes)
    n = len(patients)
    num_edges = len(edges)
    max_swaps = int(args.swap_multiplier*num_edges)
    max_tries = 10**9
    seeds = [ i+args.start_index for i in range(args.num_permutations) ]

    # Run the bipartite edge swaps in parallel if more than one core indicated
    num_cores = args.num_cores if args.num_cores != -1 else mp.cpu_count()
    if num_cores != 1:
        pool = mp.Pool(num_cores)
        map_fn = pool.map
    else:
        map_fn = map

    wrapper_args = [ (edge_list, max_swaps, max_tries, seeds[i::num_cores], 0, m,
                      n, num_edges, indexToGene, indexToPatient) for i in range(num_cores) ]
    results = map_fn(permute_matrices_wrapper, wrapper_args)

    if num_cores != 1:
        pool.close()
        pool.join()

    # Create the weights file
    if args.weights_file:
        if args.verbose > 0:
            print '* Saving weights file...'

        # Merge the observeds
        observeds = [ observed for observed, _ in results ]
        P = np.add.reduce(observeds) / float(len(observeds))

        # Verify the weights
        for g, obs in geneToObserved.iteritems():
            assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < 0.1)

        for p, obs in patientToObserved.iteritems():
            assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < 0.1)

        # Add pseudocounts to entries with no mutations observed
        P[P == 0] = 1./(2. * args.num_permutations)

        # Output to file.
        # The rows/columns preserve the order given by the mutation file.
        np.save(args.weights_file, P)

    # Save the permuted mutation data
    if args.permutation_directory:
        output_prefix = args.permutation_directory + '/permuted-mutations-{}.json'
        if args.verbose > 0:
            print '* Saving permuted mutation data...'

        for _, permutation_list in results:
            for permutation in permutation_list:
                # Output in adjacency list format
                with open(output_prefix.format(permutation['permutation_number']), 'w') as OUT:
                    permutation['params'] = params
                    json.dump( permutation, OUT )
Esempio n. 12
0
    print("#                                                                     #")
    print("#     This is made for crawling                                       #")
    print("#     Graduate School of Science, Kyoto University web page.          #")
    print("#     If you have any trouble with running this code,                 #")
    print("#     please send message to my e-mail.                               #")
    print("#     [email protected]   (final update. 03/07/2020)     #")
    print("#                                                                     #")
    print("#######################################################################\n")

    start = time()
    Ext = ['.pdf', '.mp3', '.mp4', '.doc', '.docx', '.xls', '.ppt']
    sciURL = ["http://www.sci.kyoto-u.ac.jp/"]
    sciURLja = []
    sciURLen = []

    with mp.Pool(processes=2) as p:
        for i in range(3):
            for url in sciURL:
                print("Searching url from link " + url)
                sciURL = sciURL + Searching().URL(url, "ja/")
                sciURL = sciURL + Searching().URL(url, "en/")
            sciURL = list(set(sciURL))
            sciURL.sort()

    for url in sciURL:
        if "/en" in url:
            sciURLen.append(url)
        if "/ja" in url:
            sciURLja.append(url)

    sciURLja = list(set(sciURLja))
Esempio n. 13
0
        quit_link_elem.click()

    def logout(self):
        logout_link_elem = WebDriverWait(self.driver, 5).until(
            EC.presence_of_element_located((By.ID, "logout"))
        )
        logout_link_elem.click()

def user_gen(url, ids):
    return [(url, 'User%d'%x, 'User%d'%x) for x in ids]

def wrap_run_load_test(args):
    url = "http://yaksh.fossee.aero.iitb.ac.in/exam/"
    selenium_test = SeleniumTest(url=url, quiz_name=quiz_name)
    return selenium_test.run_load_test(*args)

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('url', type=str, help="url of the website being tested")
    parser.add_argument('start', type=int, help="Starting user id")
    parser.add_argument("-n", "--number", type=int, default=10, help="number of users")
    opts = parser.parse_args()

    quiz_name = "Demo quiz"
    selenium_test = SeleniumTest(url=opts.url, quiz_name=quiz_name)
    pool = multiprocessing.Pool(opts.number)
    pool.map(wrap_run_load_test, user_gen(opts.url, range(opts.start, opts.start + opts.number)))
    pool.close()
    pool.join()

    make_convex = FLAGS.convex
    fit_cylinder = FLAGS.cylinder
    print("Data in from: " + dir_in)
    print("Writing results in: " + dir_out)

    if not exists(dir_out): 
        mkdir(dir_out)

    all_files = [f for f in listdir(dir_in) if isfile(join(dir_in, f))]
    obj_files = [f for f in all_files if f.split('.')[-1] == 'obj']

    # for obj_file in obj_files:
    #     process_obj(obj_file, dir_in, dir_out, grid_size, max_faces, make_convex, fit_cylinder, quality)

    pool_size = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=pool_size, maxtasksperchild=2)
    pool.map(partial(process_obj, dir_in=dir_in, dir_out=dir_out, grid_size=grid_size, max_faces=max_faces, make_convex=make_convex, fit_cylinder=fit_cylinder, quality=quality), obj_files)
    pool.close()
    pool.join()


# mesh = pymesh.load_mesh("model_1ef68777bfdb7d6ba7a07ee616e34cd7.obj")
# print "mesh"
# print mesh.vertices.shape
# print mesh.faces.shape

# print mesh.attribute_names

# surf_mesh = pymesh.compute_outer_hull(mesh)
# print "surf"
# print surf_mesh.vertices.shape
    
iris = parallelize_dataframe(iris, multiply_columns)

#Distributed processing using pandas
#Source: http://gouthamanbalaraman.com/blog/distributed-processing-pandas.html
import pandas as pd
import multiprocessing as mp
LARGE_FILE = "D:\\my_large_file.txt"
CHUNKSIZE = 100000 # processing 100,000 rows at a time
def process_frame(df):
        # process data frame
        return len(df)

if __name__ == '__main__':
        reader = pd.read_table(LARGE_FILE, chunksize=CHUNKSIZE)
        pool = mp.Pool(4) # use 4 processes

        funclist = []
        for df in reader:
                # process each data frame
                f = pool.apply_async(process_frame,[df])
                funclist.append(f)
        result = 0
        for f in funclist:
                result += f.get(timeout=10) # timeout in 10 seconds
        print "There are %d rows of data"%(result)


#Transpose a pyspark dataframe	
df.T
np.random.seed(0)

# with open("data/pseudo_data.txt", "r") as fp:
with open("data/data.txt", "r") as fp:
    fp_lines = fp.readlines()


def train_lda(argv):
    global fp_lines
    lda = GibbsLDA(*argv, iterations=100, verbose=False)
    lda.fit(fp_lines[1:])
    lda.save_state("output/z_{}_{}_{}.npz".format(*argv))
    return lda


range_n_components = [2, 3, 5, 7, 10, 20]
range_doc_topic_prior = [0.1, 0.5, 1.0, 2.0, 5.0, 10.0]
range_topic_word_prior = [0.01, 0.02, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0]

hyperparameters = list()
for (n_components, doc_topic_prior,
     topic_word_prior) in itertools.product(range_n_components,
                                            range_doc_topic_prior,
                                            range_topic_word_prior):
    doc_topic_prior *= 1 / n_components
    topic_word_prior *= 1 / n_components
    hyperparameters.append((n_components, doc_topic_prior, topic_word_prior))

pool = multiprocessing.Pool(processes=200)
LDAs = pool.map(train_lda, hyperparameters)
Esempio n. 17
0
Script taking a data directory as commandline argument, and concurrently centers all models within.
"""

def center(model_file, data_path):
	"""
    Centers a voxel 3D model in the y- and x axis.
	"""
    if model_file.endswith(".binvox"):
        with open(f'{data_path}/{model_file}', 'rb') as f:
            try:
                print(f'File: {model_file}')
                model = binvox_rw.read_as_3d_array(f)
                model.data = binvox_rw.dense_to_sparse(model.data)
                if len(model.data[0]) != 0 and len(model.data[0]) != 0 and len(model.data[0]) != 0:
                    translate_x = (
                        model.dims[0] - max(model.data[0]) - min(model.data[0]))//2
                    translate_y = (
                        m.dims[2] - max(model.data[2]) - min(model.data[2]))//2
                    for n in range(len(m.data[0])):
                        model.data[0][n] += translate_x
                        model.data[2][n] += translate_y
                m.write(f'{data_path}/{model_file}')
            except:
                print(f'Could not center file: {model_file}')


pool = mp.Pool(processes=mp.cpu_count())
data_path = sys.argv[1]
temp = [pool.apply_async(center, args=(model_file, data_path)) for model_file in os.listdir(data_path)]
[p.get() for p in temp]
Esempio n. 18
0
    assert len(messages) == len(categories)
    tM = np.array(list(it.starmap(message_distance, it.combinations(messages, 2))))
    sM = np.array(list(it.starmap(meaning_distance, it.combinations(categories, 2))))
    return mantel_test(tM, sM, method=method, perms=perms)

def process_file(input_file):
    messages, categories = read_csv(input_file)

    m_l = mantel(messages, categories)
    m_ln = mantel(messages, categories, message_distance=levenshtein_normalised)
    m_j = mantel(messages, categories, message_distance=jaccard, map_msg_to_str=False)

    return input_file, m_l, m_ln, m_j


if __name__=="__main__":
    import argparse
    parser = argparse.ArgumentParser("compute distances and mantels for artifical languages")
    parser.add_argument("--input_dir", type=str, required=True)
    parser.add_argument("--output_file", type=str, required=True)

    args = parser.parse_args()

    files = list(pathlib.Path(args.input_dir).glob("**/*.tsv"))
    output_file = args.output_file
    with open(output_file, "w") as ostr, mp.Pool(mp.cpu_count()) as pool:
        calls = pool.imap_unordered(process_file, files)
        for input_file, m_l, m_ln, m_j in tqdm.tqdm(calls, total=len(files)):
            print(input_file.name, 'levenshtein', *m_l, 'levenshtein normalized', *m_ln, 'jaccard', *m_j, file=ostr)
Esempio n. 19
0
def find_all_optoinable_stocks_multiprocess(udlyings):
    with multiprocessing.Pool() as pool:
        pool.map(find_optionable_stocks, udlyings)
Esempio n. 20
0
        Aggregate2 = predict(model2, data)
        Output_1 = Output(Aggregate1)
        Output_2 = Output(Aggregate2)
        GE[i] = get_error(model1, XTest, YTests[MC], 2**(n - 1)) / 2
        GE_BN[i] = get_error(model2, XTest, YTests[MC], 2**(n - 1)) / 2
        LVC[i] = get_LVComplexity(Output_1)
        LVC_BN[i] = get_LVComplexity(Output_2)

        del model1
        del model2

    return (LVC, LVC_BN, GE, GE_BN)
    # LVC_outputs.append(LVC), LVC_output_BNs.append(LVC_BN), GE_outputs.append(GE), GE_output_BNs.append(GE_BN)


pool = multiprocessing.Pool(9)
tasks = range(total_MC)
result = []
with tqdm.tqdm(total=total_MC,
               mininterval=5,
               bar_format='{elapsed}{l_bar}{bar}{r_bar}') as t:
    for i, x in enumerate(pool.imap(process, tasks)):
        t.update()
        result.append(x)
pool.close()
pool.join()

for output in result:
    LVC, LVC_BN, GE, GE_BN = output
    LVC_outputs.append(LVC)
    LVC_output_BNs.append(LVC_BN)
Esempio n. 21
0
            frame_pc = raw_pc_data[str(frame_key)]
            ground_frame_pc, clean_frame_pc = get_ground(frame_pc)

            clean_pcs[str(frame_key)] = clean_frame_pc
            ground_pcs[str(frame_key)] = ground_frame_pc

            if (frame_index + 1) % 10 == 0:
                print('Ground Removal SEQ {} / {}, Frame {} / {}'.format(
                    file_index + 1, len(file_names), frame_index + 1,
                    len(keys)))

        np.savez_compressed(os.path.join(clean_pc_folder, file_name),
                            **clean_pcs)
        np.savez_compressed(os.path.join(ground_pc_folder, file_name),
                            **ground_pcs)


if __name__ == '__main__':
    if args.process > 1:
        pool = multiprocessing.Pool(args.process)
        for token in range(args.process):
            result = pool.apply_async(main,
                                      args=((token,
                                             args.process), args.raw_pc_folder,
                                            args.clean_pc_folder,
                                            args.ground_pc_folder))
        pool.close()
        pool.join()
    else:
        main((0, 1), args.raw_pc_folder, args.clean_pc_folder,
             args.ground_pc_folder)
def findRotMaxRect(data_in,flag_opt=False,flag_parallel = False, nbre_angle=10,flag_out=None,flag_enlarge_img=False,limit_image_size=300):

    '''
    flag_opt     : True only nbre_angle are tested between 90 and 180
                        and a opt descent algo is run on the best fit
                   False 100 angle are tested from 90 to 180.
    flag_parallel: only valid when flag_opt=False. the 100 angle are run on multithreading
    flag_out     : angle and rectangle of the rotated image are output together with the rectangle of the original image
    flag_enlarge_img : the image used in the function is double of the size of the original to ensure all feature stay in when rotated
    limit_image_size : control the size numbre of pixel of the image use in the function.
                       this speeds up the code but can give approximated results if the shape is not simple
    '''

    #time_s = datetime.datetime.now()

    #make the image square
    #----------------
    nx_in, ny_in = data_in.shape
    if nx_in != ny_in:
        n = max([nx_in,ny_in])
        data_square = np.ones([n,n])
        xshift = (n-nx_in)/2
        yshift = (n-ny_in)/2
        if yshift == 0:
            data_square[xshift:(xshift+nx_in),:                 ] = data_in[:,:]
        else:
            data_square[:                 ,yshift:(yshift+ny_in)] = data_in[:,:]
    else:
        xshift = 0
        yshift = 0
        data_square = data_in

    #apply scale factor if image bigger than limit_image_size
    #----------------
    if data_square.shape[0] > limit_image_size:
        data_small = cv2.resize(data_square,(limit_image_size, limit_image_size),interpolation=0)
        scale_factor = 1.*data_square.shape[0]/data_small.shape[0]
    else:
        data_small = data_square
        scale_factor = 1


    # set the input data with an odd number of point in each dimension to make rotation easier
    #----------------
    nx,ny = data_small.shape
    nx_extra = -nx; ny_extra = -ny
    if nx%2==0:
        nx+=1
        nx_extra = 1
    if ny%2==0:
        ny+=1
        ny_extra = 1
    data_odd = np.ones([data_small.shape[0]+max([0,nx_extra]),data_small.shape[1]+max([0,ny_extra])])
    data_odd[:-nx_extra, :-ny_extra] = data_small
    nx,ny = data_odd.shape


    nx_odd,ny_odd = data_odd.shape

    if flag_enlarge_img:
        data = np.zeros([2*data_odd.shape[0]+1,2*data_odd.shape[1]+1]) + 1
        nx,ny = data.shape
        data[nx/2-nx_odd/2:nx/2+nx_odd/2,ny/2-ny_odd/2:ny/2+ny_odd/2] = data_odd
    else:
        data = np.copy(data_odd)
        nx,ny = data.shape

    #print (datetime.datetime.now()-time_s).total_seconds()

    if flag_opt:
        myranges_brute = ([(-10.,10.),])
        coeff0 = np.array([0.,])
        coeff1  = optimize.brute(residual, myranges_brute, args=(data,), Ns=nbre_angle, finish=None)
        popt = optimize.fmin(residual, coeff1, args=(data,), xtol=5, ftol=1.e-5, disp=False)
        angle_selected = popt[0]

        #rotation_angle = np.linspace(0,360,100+1)[:-1]
        #mm = [residual(aa,data) for aa in rotation_angle]
        #plt.plot(rotation_angle,mm)
        #plt.show()
        #pdb.set_trace()

    else:
        rotation_angle = np.linspace(-45,45,100+1)[:-1]
        args_here=[]
        for angle in rotation_angle:
            args_here.append([angle,data])

        if flag_parallel:

            # set up a pool to run the parallel processing
            cpus = multiprocessing.cpu_count()
            pool = multiprocessing.Pool(processes=cpus)

            # then the map method of pool actually does the parallelisation

            results = pool.map(residual_star, args_here)

            pool.close()
            pool.join()


        else:
            results = []
            for arg in args_here:
                results.append(residual_star(arg))

        argmin = np.array(results).argmin()
        angle_selected = args_here[argmin][0]

    rectangle, M_rect_max, RotData  = get_rectangle_coord(angle_selected,data,flag_out=True)
    #rectangle, M_rect_max  = get_rectangle_coord(angle_selected,data)

    #print (datetime.datetime.now()-time_s).total_seconds()

    #invert rectangle
    M_invert = cv2.invertAffineTransform(M_rect_max)
    rect_coord = [rectangle[:2], [rectangle[0],rectangle[3]] ,
                  rectangle[2:], [rectangle[2],rectangle[1]] ]

    #ax = plt.subplot(111)
    #ax.imshow(RotData.T,origin='lower',interpolation='nearest')
    #patch = patches.Polygon(rect_coord, edgecolor='k', facecolor='None', linewidth=2)
    #ax.add_patch(patch)
    #plt.show()

    rect_coord_ori = []
    for coord in rect_coord:
        rect_coord_ori.append(np.dot(M_invert,[coord[0],(ny-1)-coord[1],1]))

    #transform to numpy coord of input image
    coord_out = []
    for coord in rect_coord_ori:
        coord_out.append(    [ scale_factor*round(       coord[0]-(nx/2-nx_odd/2),0)-xshift,\
                               scale_factor*round((ny-1)-coord[1]-(ny/2-ny_odd/2),0)-yshift])

    coord_out_rot = []
    # coord_out_rot_h = []
    # for coord in rect_coord:
    #     coord_out_rot.append( [ scale_factor*round(       coord[0]-(nx/2-nx_odd/2),0)-xshift, \
    #                             scale_factor*round(       coord[1]-(ny/2-ny_odd/2),0)-yshift ])
    #     coord_out_rot_h.append( [ scale_factor*round(       coord[0]-(nx/2-nx_odd/2),0), \
    #                               scale_factor*round(       coord[1]-(ny/2-ny_odd/2),0) ])

    #M = cv2.getRotationMatrix2D( ( (data_square.shape[0]-1)/2, (data_square.shape[1]-1)/2 ), angle_selected,1)
    #RotData = cv2.warpAffine(data_square,M,data_square.shape,flags=cv2.INTER_NEAREST,borderValue=1)
    #ax = plt.subplot(121)
    #ax.imshow(data_square.T,origin='lower',interpolation='nearest')
    #ax = plt.subplot(122)
    #ax.imshow(RotData.T,origin='lower',interpolation='nearest')
    #patch = patches.Polygon(coord_out_rot_h, edgecolor='k', facecolor='None', linewidth=2)
    #ax.add_patch(patch)
    #plt.show()

    #coord for data_in
    #----------------
    #print scale_factor, xshift, yshift
    #coord_out2 = []
    #for coord in coord_out:
    #    coord_out2.append([int(np.round(scale_factor*coord[0]-xshift,0)),int(np.round(scale_factor*coord[1]-yshift,0))])

    #print (datetime.datetime.now()-time_s).total_seconds()

    if flag_out is None:
        return coord_out
    elif flag_out == 'rotation':
        return coord_out, angle_selected, coord_out_rot
    else:
        print 'bad def in findRotMaxRect input. stop'
        pdb.set_trace()
Esempio n. 23
0
import multiprocessing as mp


def compute(data):
    return data**2


if __name__ == '__main__':
    with mp.Pool(10) as pool:
        print(
            pool.map(compute,
                     [1, 7, 8, -2, 1, 7, 8, -2, 1, 7, 8, -2, 1, 7, 8, -2]))
Esempio n. 24
0
 def __init__(self, threads):
     LocalScheduler.__init__(self)
     self.threads = threads
     self.tasks = {}
     self.pool = multiprocessing.Pool(self.threads or 2)
Esempio n. 25
0
        TC_Phi_5_imp_avg   = np.zeros((np.size(rc_arr), np.size(nt_arr)))
        TC_Phi_5_unalt_avg = np.zeros((np.size(rc_arr), np.size(nt_arr)))
        TC_Phi_5_imp_rms   = np.zeros((np.size(rc_arr), np.size(nt_arr)))
        TC_Phi_5_unalt_rms = np.zeros((np.size(rc_arr), np.size(nt_arr)))

        TC_A_imp_avg       = np.zeros((np.size(rc_arr), np.size(nt_arr)))
        TC_A_unalt_avg     = np.zeros((np.size(rc_arr), np.size(nt_arr)))
        TC_A_imp_rms       = np.zeros((np.size(rc_arr), np.size(nt_arr)))
        TC_A_unalt_rms     = np.zeros((np.size(rc_arr), np.size(nt_arr)))

        # Start parallel processing
        nthread = mp.cpu_count()
        nthread = 2
        print('starting pool with %i threads ...' % nthread)
        pool = mp.Pool(processes=nthread)

        for int_arr, nt in enumerate(nt_arr):
                for irc_arr, rc in enumerate(rc_arr):
                        l_fracs       = l_frac_data[irc_arr,:]                        
                        print('lc_fracs_Driver = ', lc_fracs)
                        
                        # Get list of tuples length of nsamp 
                        arg_tuple = (generate_data, nx, ny, nz, finest, l_fracs, lc_fracs, nt)
                        arg_list = []
                        [arg_list.append(arg_tuple) for i in range(nsample)]

                        # Farm out nsample to each processor
                        res_tuple = pool.map(parallel_run, arg_list)
                        print('res_tuple = ', res_tuple)
                        print('comp ratio = ', rc_arr[irc_arr])
Esempio n. 26
0
def run_test(tickers,stocks,windows):
    with mp.Pool(mp.cpu_count()) as p:
        y = p.map(partial(Mean_Variance_Rolling,tickers=tickers,stocks=stocks), windows)
    return y
Esempio n. 27
0
def main():
    if not os.path.exists(a.output_dir):
        os.makedirs(a.output_dir)

    src_paths = []
    dst_paths = []

    skipped = 0
    for src_path in im.find(a.input_dir):
        name, _ = os.path.splitext(os.path.basename(src_path))
        dst_path = os.path.join(a.output_dir, name + ".png")
        if os.path.exists(dst_path):
            skipped += 1
        else:
            src_paths.append(src_path)
            dst_paths.append(dst_path)
    
    print("skipping %d files that already exist" % skipped)
            
    global total
    total = len(src_paths)
    
    print("processing %d files" % total)

    global start
    start = time.time()
    
    if a.operation == "edges":
        # use a multiprocessing pool for this operation so it can use multiple CPUs
        # create the pool before we launch processing threads
        global edge_pool
        edge_pool = multiprocessing.Pool(a.workers)

    if a.workers == 1:
        with tf.Session() as sess:
            for src_path, dst_path in zip(src_paths, dst_paths):
                print (100*'::::')
                process(src_path, dst_path)
                complete()
    else:
        queue = tf.train.input_producer(zip(src_paths, dst_paths), shuffle=False, num_epochs=1)
        dequeue_op = queue.dequeue()

        def worker(coord):
            with sess.as_default():
                while not coord.should_stop():
                    try:
                        print (100*'""""""""""""""')
                        src_path, dst_path = sess.run(dequeue_op)
                    except tf.errors.OutOfRangeError:
                        coord.request_stop()
                        break

                    process(src_path, dst_path)
                    complete()

        # init epoch counter for the queue
        local_init_op = tf.local_variables_initializer()
        with tf.Session() as sess:
            sess.run(local_init_op)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            for i in range(a.workers):
                t = threading.Thread(target=worker, args=(coord,))
                t.start()
                threads.append(t)
            
            try:
                coord.join(threads)
            except KeyboardInterrupt:
                coord.request_stop()
                coord.join(threads)
    # Handle command line options
    parser = argparse.ArgumentParser(
        description='Compute features in parallel')
    parser.add_argument('--numprocs', required=True, type=int,
                        default=multiprocessing.cpu_count(),
                        help='Number of processors to use. ' + \
                        "Default for this machine is %d" % (multiprocessing.cpu_count(),) )

    args = parser.parse_args()

    if args.numprocs < 1:
        sys.exit('Number of processors to use must be greater than 0')

    # Start my pool
    pool = multiprocessing.Pool(args.numprocs)

    print("Using %d processors..." % (args.numprocs))

    # construct models

    base_model = VGG16(weights='imagenet')

    models = []
    for layer in base_model.layers:
        models.append(
            Model(input=base_model.input,
                  output=base_model.get_layer(layer.name).output))
    # load data

    f = open(
Esempio n. 29
0
	return sum(X[input1%10])



if __name__ == '__main__':

	X = np.random.rand(10, 3)

	print X


	pool_size = multiprocessing.cpu_count()
	# active_procs = multiprocessing.active_children()
	# print active_procs

	pool = multiprocessing.Pool(processes=pool_size)

	print 'READY?'
	
	inputs = list(range(1000))
	#print 'Input   :', inputs
	
	# start = time.time()
	# builtin_outputs = map(do_calculation, inputs)
	# print 'Built-in:', builtin_outputs
	# end = time.time()
	# print end - start
	
	# start = time.time()
	#pool_outputs = pool.map(do_calculation, inputs)
Esempio n. 30
0
            all_user_rt_tt_diff.append(st.median(single_user_rt_tt_diff))
        all_members_rting_arr.append(single_member_rting_arr)

        creation_data = db[collectionName].find({'rt_user_id': a_user}, {'rt_user_created_at':1}, no_cursor_timeout=True).limit(1)
        for cd in creation_data:
            rt_user_created_time_arr.append(datetime.datetime.strptime(dateutil.parser.parse(cd['rt_user_created_at']).strftime('%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S'))

    ipt_d = density_features(rt_tweet_created_time_arr)
    rter_creation_d_std, rter_creation_mean, rter_creation_d_cov = creation_time_dispersion(rt_user_created_time_arr)
    s_td, me_an, co_v = retweeting_time_dispersion(all_members_rting_arr)

    cov_of_response_times = st.pstdev(all_user_rt_tt_diff)/float(st.mean(all_user_rt_tt_diff))
    return sn, ipt_d, s_td, me_an, co_v, cov_of_response_times, rter_creation_d_std, rter_creation_mean, rter_creation_d_cov,label

 if __name__ == '__main__':
    pool = multiprocessing.Pool(CONFIG_POOL_SIZE)
    mongo_query = {}
    groups = db[group_collectionName].find(mongo_query, no_cursor_timeout=True)
    total_count= groups.count()
    all_data = pool.map(features, ((group, total_count) for idx,group in enumerate(groups)))
    pool.close()
    pool.join()

    df = pd.DataFrame(all_data, columns=['groupID', 'inter_posting_time_compactness', 'retweeting_time_distribution_sd',
'retweeting_time_distribution_mean', 'retweeting_time_distribution_cov', 'cov_response_time',
'user_creation_time_distribution_sd', 'user_creation_time_distribution_mean', 'user_creation_time_distribution_cov', 'label'])

    df.to_pickle('extracted_features/temporal_feature.pkl')

client.close()