Python Pool 예제들, pathos.multiprocessing.Pool Python 예제들

예제 #1

0

파일 보기

파일: gridSearch.py 프로젝트: darylchang/CS224U-Project

def gridSearch(options, use_datasets, numExamples, compute_mistakes=False, verbose=False, parallelize=False):
    if MODEL_KEYWORD not in options:
        print 'ERROR: must specify models for grid search under "%s" key.' % (MODEL_KEYWORD)
        return
    paramCombos = myProduct(options)
    partialTestCombo = partial(testCombo, use_datasets=use_datasets, numExamples=numExamples, compute_mistakes=compute_mistakes, verbose=verbose)
    if parallelize:
        from pathos.multiprocessing import Pool
        p = Pool(5)
        try:
            result = p.map_async(partialTestCombo, paramCombos)
            result = result.get(999999999)
            bestScore, bestParamsStr, bestCombo = max(result, key=lambda x:x[0])
            sys.stdout = open("best.out", "w")
            print 'Best score of %s was achieved by parameters:\n%s' % (bestScore, bestParamsStr)
        except KeyboardInterrupt:
            p.terminate()
            print "You cancelled the program!"
            sys.exit(1)
    else:
        bestScore, bestCombo, bestComboStr = float('-inf'), None, ''
        for paramCombo in paramCombos:
            score, paramsStr, _ = testCombo(paramCombo, use_datasets=use_datasets, numExamples=numExamples, compute_mistakes=compute_mistakes, verbose=verbose, parallelize=False)
            if score > bestScore:
                bestScore, bestCombo, bestComboStr = score, paramCombo, paramsStr
        print 'Best score of %s was achieved by parameters:\n%s' % (bestScore, bestComboStr)

예제 #2

0

파일 보기

파일: __init__.py 프로젝트: francescoinfante/identity

class DataTransformation(object):
    """
    Args:
        source ([dict]): list of records
        config (Configuration): configuration to apply to each record
    """

    def __init__(self, source, config, single_process=False, processes=cpu_count()):
        if single_process:
            self.results = imap(transform, izip(source, repeat(config)))
        else:
            self.pool = Pool(processes)
            self.results = self.pool.imap(transform, izip(source, repeat(config)))

    def __iter__(self):
        return self

    def next(self):
        return self.results.next()

    def __del__(self):
        try:
            self.pool.close()
        except:
            pass

예제 #3

0

파일 보기

파일: __main__.py 프로젝트: nmante/image_deduplication

def json_to_metadata_chunks(args,file_chunks = []):
    # Open the json file, and split the reading of the file among worker threads
    results = [] 

    if args.json_metadata == None:
        return None
    num_proc = len(file_chunks)
    print (file_chunks, file=sys.stderr)
    pool = Pool(processes = num_proc)
    

    with open(args.json_metadata) as json_metadata_file:
        results = [pool.map(process_json_line,json_metadata_file, chunk) for index, chunk in enumerate(file_chunks)]
    #objs = [p.get() for p in results] 
    return merge_exact_duplicates(results)

예제 #4

0

파일 보기

파일: __init__.py 프로젝트: francescoinfante/identity

class DataFusion(object):
    """
    Args:
        source ([tuple]): list of tuples, each containing the records to merge
        config (Configuration): configuration to apply to each tuple
    """

    def __init__(self, source, config, processes=cpu_count()):
        self._pool = Pool(processes)
        self._results = self._pool.imap(_merge_records, izip(source, repeat(config)))

    def __iter__(self):
        return self

    def next(self):
        return self._results.next()

    def __del__(self):
        self._pool.close()

예제 #5

0

파일 보기

파일: sigma_scat.py 프로젝트: RGTippens/dust

    def __call__(self, with_mp=False):
        cm   = self.scatm.cmodel
        scat = self.scatm.smodel

        cgeo = np.pi * np.power( self.dist.a * c.micron2cm(), 2 )

        # Test for graphite case
        if cm.cmtype == 'Graphite':
            if np.size(self.dist.a) > 1:
                for i in range( np.size(self.dist.a) ):
                    self.qsca_pe[:,i] = scat.Qsca( self.E, a=self.dist.a[i], cm=cmi.CmGraphite(size=cm.size, orient='perp') )
                    self.qsca_pa[:,i] = scat.Qsca( self.E, a=self.dist.a[i], cm=cmi.CmGraphite(size=cm.size, orient='para') )
            else:
                self.qsca_pe = scat.Qsca( self.E, a=self.dist.a, cm=cmi.CmGraphite(size=cm.size, orient='perp') )
                self.qsca_pa = scat.Qsca( self.E, a=self.dist.a, cm=cmi.CmGraphite(size=cm.size, orient='para') )
            
            self.qsca = ( self.qsca_pa + 2.0 * self.qsca_pe ) / 3.0

        else:
            if np.size(self.dist.a) > 1:
                if with_mp:
                    pool = Pool(processes=2)
                    self.qsca = np.array(pool.map(self._one_scatter,self.dist.a)).T
                else:
                    for i in range( np.size(self.dist.a) ):
                        self.qsca[:,i] = self._one_scatter(self.dist.a[i])
            else:
                self.qsca = scat.Qsca( self.E, a=self.dist.a, cm=cm )

        if np.size(self.dist.a) == 1:
            kappa = self.dist.nd * self.qsca * cgeo / self.dist.md
        else:
            kappa = np.array([])
            for j in range( np.size(self.E) ):
                kappa = np.append( kappa, \
                                   c.intz( self.dist.a, self.dist.nd * self.qsca[j,:] * cgeo ) / self.dist.md )

        self.kappa = kappa

예제 #6

0

파일 보기

def avaliacao(populacao):
    x = valores(populacao)
    n = len(populacao)
    def steps(k):
        sequence = x[k, :]
        t =  lm.move(startpoint, sequence=sequence)
        return t


    peso = None
    ncpu = cpu_count()
    with Pool(ncpu) as pool:
        peso = array(pool.map(steps, range(n)))

    return peso

예제 #7

0

파일 보기

def parallel_run(input_list, list_fn, split_n):
    def chunks(l, n):
        """Yield successive n-sized chunks from l."""
        for i in range(0, len(l), n):
            yield l[i:i + n]

    from pathos.multiprocessing import ProcessingPool as Pool
    p = Pool(split_n, daemon=True)
    args = chunks(input_list, split_n)
    result_list_list = p.map(list_fn, args)

    result = []
    for result_list in result_list_list:
        result.extend(result_list)
    return result

예제 #8

0

파일 보기

파일: decoherence.py 프로젝트: radiasoft/rsbeams

    def calculate_centroids(self, p=None):
        """
        Perform integration to find centroid at all turns up to N. Multiprocessing pool used to calculate independent
        turn values.
        Will automatically use `integrate_first_order` or `integrate_second_order` if appropriate.
        Args:
            p: Specify number of processes for pool. If not given then `cpu_count` is used.

        Returns:
            array of floats
        """
        if p:
            pool_size = p
        else:
            pool_size = cpu_count()

        pool = Pool(pool_size)

        #  attempt to speed things up by spreading out difficult integration values at the end of range
        #  appeared to not work
        #     x = []
        #     for i in range(cpu_count()):
        #         x += range(N)[i::4]

        if len(self.mu) == 1:
            integration_function = self.integrate_first_order
        elif len(self.mu) == 2:
            integration_function = self.integrate_second_order
        else:
            integration_function = self.integrate_any_order

        x = range(self.N)
        results = pool.map(integration_function, x)
        pool.close()

        return results

예제 #9

0

파일 보기

파일: dotsystem.py 프로젝트: fvanriggelen/qtt

    def simulatehoneycomb(self, verbose=1, usediag=False, multiprocess=False):
        """ Loop over the 2D matrix of parameter values defined by makeparamvalues2D, calculate the ground state
        for each point, search for transitions and save in self.honeycomb"""
        t0 = time.time()
        paramnames = list(self.vals2D.keys())
        initparamvalues = self.getall('det')
        npointsx = np.shape(self.vals2D[paramnames[0]])[0]
        npointsy = np.shape(self.vals2D[paramnames[0]])[1]
        self.hcgs = np.empty((npointsx, npointsy, self.ndots))

        self.initSparse()

        if multiprocess and _have_mp:
            pool = Pool(processes=4)
            aa = [(i, self, npointsy, usediag) for i in range(npointsx)]
            result = pool.starmap_async(_simulate_row, aa)
            out = result.get()
            self.hcgs = np.array(out)
        else:
            for i in range(npointsx):
                if verbose:
                    tprint('simulatehoneycomb: %d/%d' % (i, npointsx))

                for j in range(npointsy):
                    for name in paramnames:
                        setattr(self, name, self.vals2D[name][i][j])
                    self.makeHsparse()
                    self.solveH(usediag=usediag)
                    self.hcgs[i, j] = self.OCC
        self.honeycomb, self.deloc = self.findtransitions(self.hcgs)
        self.setall('det', initparamvalues)

        if verbose:
            print('simulatehoneycomb: %.2f [s] (multiprocess %s)' % (time.time() - t0, multiprocess))

        sys.stdout.flush()

예제 #10

0

파일 보기

파일: genetic.py 프로젝트: desio05/HyperHeuristicKnapsack

def initial_population_generator_hyper_ksp_multiproc(amount, **kwargs):
    heuristics_candidates = heuristics.get_heuristics()

    def worker(_):
        state = simple_state_generator_hyper_ksp([], heuristics_candidates,
                                                 **kwargs)
        result = {
            "heuristics": state,
            "fitness": fitness_hyper_ksp(state, **kwargs)
        }
        return result

    pool = Pool()
    population = pool.map(worker, range(amount))
    return population

예제 #11

0

파일 보기

def create_features(WRITE_DB, FP, all_tables, schemas, CPUS, selected_schema, selected_table):
 
    #define key dataframe
    
    key_df = get_key(all_tables[selected_table], schemas,selected_schema)
    key_df = key_df.sort_values(by = ['key','date'])
    
    if selected_schema == 'scoring_schema':
        key_df = key_df[['key']]
    else:
        key_df = key_df[['key','date','target']]
    
    pool = Pool(CPUS)
    
    #Features output framework
    all_functions = inspect.getmembers(FP, inspect.isfunction)
    all_functions = [x[1] for x in all_functions]
#     print(all_functions)
    
    args = (WRITE_DB,key_df, schemas, all_tables, fc_protocol,selected_schema)
    all_functions = [(x,args) for x in all_functions]
    
    
    if WRITE_DB:        
       
        temp = pool.map(trig_func, all_functions)
        df = pd.concat(temp, axis = 1)
        df = pd.concat([key_df,df], axis = 1)
        
        engine = conn_eng()
        if selected_schema == 'scoring_schema':
            df.to_sql(all_tables['scoring_table'],schema= schemas['output_schema'],
                      con=engine, index=False,if_exists ='replace')
           
        else:   
            df.to_sql(all_tables['features_table'],schema= schemas['output_schema'],
                      con=engine, index=False,if_exists ='replace')
            
        engine.dispose()
        del engine
        #print_summary(MISSING_VALUE_TREATMENT,df)       
    else:              
        temp = pool.map(trig_func, all_functions)
        df = pd.concat(temp, axis = 1)
        df = pd.concat([key_df,df], axis = 1)
#         print_summary(MISSING_VALUE_TREATMENT,df)
        return df
    return 'Files written to DB'

예제 #12

0

파일 보기

    def decrypt(self, cipher):
        if (self.mode == 'cbc'):
            #Extracing IV from cipher
            iv = cipher[0:self.block_size]
            cipher_blocks = []
            for i in range(self.block_size, len(cipher), self.block_size):
                cipher_blocks.append(cipher[i:i + self.block_size])

            message_blocks = []
            xor_inp = iv
            aes = AES.new(self.key, AES.MODE_ECB)
            #Chaining blocks and deciphering
            for i in range(len(cipher_blocks)):
                decrypt_out = aes.decrypt(cipher_blocks[i])
                message_blocks.append(byte_xor(xor_inp, decrypt_out))
                xor_inp = cipher_blocks[i]

            #Calculating pad
            padding = int(str(message_blocks[len(message_blocks) - 1])[-2], 16)

            message_text = b''
            for i in message_blocks:
                message_text += i
            #removing padding
            message_text = message_text[:-padding]
            return message_text.decode("utf-8")
        elif (self.mode == 'ctr'):
            #separate IV from cipher
            iv = cipher[0:self.block_size]
            #Split cipher into blocks of given block_size
            cipher_blocks = []
            for i in range(self.block_size, len(cipher), self.block_size):
                cipher_blocks.append(
                    cipher[i:min(i + self.block_size, len(cipher))])
            #Generate inputs for Multiprocessing function
            key_all = [self.key for i in range(len(cipher_blocks))]
            #xor_inps is the sequence iv, iv+1,iv+2... used in ctr mode
            xor_inps = [
                bytes.fromhex(
                    str(hex(int(iv.hex(), self.block_size) + i))[2:])
                for i in range(len(cipher_blocks))
            ]
            with Pool(len(cipher_blocks) +
                      2) as p:  # uses all cores available in parallel
                out = p.map(xorEncrypt, key_all, xor_inps, cipher_blocks)

            message_text = (b''.join(out)).decode("utf-8")
            return message_text

예제 #13

0

파일 보기

파일: pimpdf.py 프로젝트: UltraDiuve/PIM-Recognizer

    def threaded_contents_to_text(
        content_series,
        processes=None,
        none_content='raise',
    ):
        """Threaded version of content_to_text method

        It takes as input a series which index is the uid of the products,
        and the values are the content (in the form of bytes) of the
        documents.
        processes argument is the number of processes to launch. If omitted,
        it defaults to the number of cpu cores on the machine.
        none_content arg can be 'raise' (default) or to_empty
        """
        processer = partial(
            PDFDecoder.content_to_text,
            none_content=none_content,
        )
        processes = processes if processes else cpu_count()
        print(f'Launching {processes} processes.')
        in_ds = content_series.apply(BytesIO)

        # Pool with context manager do not seem to work due to issue 38501 of
        # standard python library. It hangs when running tests through pytest
        # see: https://bugs.python.org/issue38501
        # Below content should be tested again whenever this issue is closed
        #
        # with Pool(nodes=processes) as pool:
        #     tuples = (list(in_ds.index),
        #               pool.map(processer, in_ds))
        #
        # End of block

        # This temporary solution should be removed when tests mentioned above
        # are successful.
        # This just closes each pool after execution or exception.
        try:
            pool = Pool(nodes=processes)
            pool.restart(force=True)
            tuples = (list(in_ds.index), pool.map(processer, in_ds))
        except Exception:
            pool.close()
            raise
        pool.close()
        # End of block

        ds = pd.Series(tuples[1], index=tuples[0])
        return (ds)

예제 #14

0

파일 보기

def makeRadial():
    rad, angle = d["radial"]["rad"], d["radial"]["angle"]
    args = np.linspace(angle, angle + np.pi, frameCount)

    pool = Pool(4)

    while True:
        subIm = JuliaTools.subImage(c=rad * np.exp(1j * angle),
                                    r=r,
                                    n=10,
                                    p=p,
                                    iters=iters,
                                    split=split,
                                    save=False,
                                    aura=False)
        isBlackList = pool.map(subIm, coords)
        if not all(isBlackList):
            break
        else:
            rad *= 0.975

    # Circular arc c follows in complex plane
    cPath = rad * np.exp(1j * args)

    for frame in xrange(frameCount):
        subIm = JuliaTools.subImage(c=cPath[frame],
                                    r=r,
                                    n=n,
                                    p=p,
                                    iters=iters,
                                    split=split)
        isBlackList = pool.map(subIm, coords)
        allBlack = all(isBlackList)

        if not allBlack:
            JuliaTools.makeFrame(frame, n, split, coords)

    pool.close()

    JuliaTools.prepareForFFmpeg(frameCount=frameCount, loop=True)

    with open("tweet.txt", "w") as out:
        out.write("Images generated using constants"
                  " on a circular arc of radius {:03.2f}.".format(rad))

    stop = timeit.default_timer()

    print stop - start

예제 #15

0

파일 보기

def create(file, variables, metric, repeat, compare, compare_values, fixed):
    """
    Creates a model with extrap
    :param file: csv file
    :param variables: list of variable columns
    :param metric: metric column
    :param repeat: repeat column or None
    :param compare: compare column
    :param compare_values: unique values of compare column
    :param fixed: dictionary of column:value to fix
    :return: Model
    """
    def get_model(cmp_dict=None):
        try:
            f = fixed.copy()
            if cmp_dict is not None:
                f.update(cmp_dict)
            tmp_file_in = convert(file, variables, metric, repeat, f)

            if len(variables) == 1:
                return extrap_one_param(tmp_file_in)
            elif len(variables) == 2:
                return extrap_two_param(tmp_file_in)
            else:
                raise ValueError(
                    "Parameters with more than 2 parameters are currently not supported"
                )
        except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
            return None

    if compare is None:
        # create single model
        m, r2 = get_model()
        if m is None:
            return []
        return [Model(m, variables, adj_r2=r2)]
    else:
        # create multiple models
        def get_model_comp(compare_val):
            cmp = {compare: compare_val}
            model_str, adj_r2 = get_model(cmp)
            if model_str is None:
                return None
            return Model(model_str, variables, name=compare_val, adj_r2=adj_r2)

        with Pool(multiprocessing.cpu_count()) as p:
            models = p.map(get_model_comp, compare_values)
            return list(filter(lambda x: x is not None, models))

예제 #16

0

파일 보기

파일: RobustSTL.py 프로젝트: ariaghora/RobustSTL

def RobustSTL(input,
              season_len,
              reg1=10.0,
              reg2=0.5,
              K=2,
              H=5,
              dn1=1.,
              dn2=1.,
              ds1=50.,
              ds2=1.,
              learning_rate=0.01,
              max_iter=100,
              max_trials=10,
              verbose=True):
    if np.ndim(input) < 2:
        return _RobustSTL(input, season_len, reg1, reg2, K, H, dn1, dn2, ds1,
                          ds2, learning_rate, max_iter, max_trials, verbose)

    elif np.ndim(input) == 2 and np.shape(input)[1] == 1:
        return _RobustSTL(input[:, 0], season_len, reg1, reg2, K, H, dn1, dn2,
                          ds1, ds2, learning_rate, max_iter, max_trials,
                          verbose)

    elif np.ndim(input) == 2 or np.ndim(input) == 3:
        if np.ndim(input) == 3 and np.shape(input)[2] > 1:
            print(
                "[!] Valid input series shape: [# of Series, # of Time Steps] or [# of series, # of Time Steps, 1]"
            )
            raise
        elif np.ndim(input) == 3:
            input = input[:, :, 0]
        num_series = np.shape(input)[0]

        input_list = [input[i, :] for i in range(num_series)]

        from pathos.multiprocessing import ProcessingPool as Pool
        p = Pool(num_series)

        def run_RobustSTL(_input):
            return _RobustSTL(_input, season_len, reg1, reg2, K, H, dn1, dn2,
                              ds1, ds2)

        result = p.map(run_RobustSTL, input_list)

        return result
    else:
        print("[!] input series error")
        raise

예제 #17

0

파일 보기

파일: data_preparation.py 프로젝트: GeorgesAlkhouri/golddust

def fetch_file_links(data):

    file_links = data['file_links']
    locations = data['locations']
    save_dir = data['save_dir']

    thread_pool_size = multiprocessing.cpu_count()
    with Pool(thread_pool_size) as pool:
        fetch_iter = pool.uimap(lambda link: _fetch_file_links(link, locations, save_dir), file_links)
        results = list(tqdm(fetch_iter, total=len(file_links)))
        file_paths, error_links = zip(*results)
        file_paths =  [l for l in file_paths if l is not None]
        error_links =  [l for l in error_links if l is not None]

    print('Links with valid location:', len(file_paths))
    return returning(file_paths, 'file_paths', data)

예제 #18

0

파일 보기

파일: genetic.py 프로젝트: desio05/HyperHeuristicKnapsack

def crossover(population, crossover_reproduction_func, mutation_func,
              repair_func, fitness_func, **kwargs):
    childs = crossover_reproduction_func(population, **kwargs)

    if childs is None or len(childs) == 0:
        return population

    def worker(child):
        mutation_func(child, **kwargs)
        repair_func(child, **kwargs)
        return {"heuristics": child, "fitness": fitness_func(child, **kwargs)}

    pool = Pool()
    new_individuals = pool.map(worker, childs)
    population += new_individuals
    return population

예제 #19

0

파일 보기

def calc_path_matrix(graph: nx.Graph,
                     heuristic: Callable = None,
                     weight: str = "weight",
                     nodes: int = 1) -> Dict[Hashable, List[Hashable]]:
    """
    Calculates a shortest path matrix between all combinations of nodes in the graph.
    """
    astar_path = _astar_path_factory(graph, heuristic=heuristic, weight=weight)
    node_pairs = [(source, target) for target in graph.nodes()
                  for source in graph.nodes() if target > source]
    with Pool(nodes=nodes) as pool:
        paths = pool.map(astar_path, node_pairs)
    return {
        frozenset(node_pair): path
        for node_pair, path in zip(node_pairs, paths)
    }

예제 #20

0

파일 보기

def min_distances_parallel(features_1,
                           features_2,
                           cat_weight_=None,
                           bool_features=None):
    if bool_features is not None and cat_weight_ is None:
        cat_weight_ = np.mean(bool_features)
    elif bool_features is None and cat_weight_ is None:
        cat_weight_ = 0
    else:
        pass

    def get_min_distance(features_list):
        return min_distance(features_list, features_2, bool_features)

    distances = np.array(Pool().map(get_min_distance, features_1.tolist()))
    return distances.flatten()

예제 #21

0

파일 보기

def apply_by_multiprocessing(df, func, **kwargs):
    """
    Parallel execution function for the DataFrame
    :param df: Input DataFrame
    :param func:
    :param kwargs: additional arguments for the df.apply() such as axis and et al.
    :return: Output DataFrame
    """
    workers = kwargs.pop('workers')
    pool = Pool(processes=workers)
    result = pool.map(_apply_df,
                      [(d, func, i, kwargs)
                       for i, d in enumerate(np.array_split(df, workers))])
    pool.close()
    result = sorted(result, key=lambda x: x[0])
    return pd.concat([i[1] for i in result])

예제 #22

0

파일 보기

파일: pfmodel.py 프로젝트: zhixin-xue/geoist

    def gen_kernel(self, process = 1):
        def calc_kernel(i):
            return prism.gz(self.xp[0:1],self.yp[0:1],self.zp[0:1],[self.mesh[i]])
        if process > 1: #Winodws MP running has possiblely errors.
            print('Number of process:',process)
            with Pool(processes=process) as pool:
                kernel0 = pool.map(calc_kernel,range(len(self.mesh)))
        else:
           kernel0 = [calc_kernel(i) for i in range(len(self.mesh))]

        self.kernel0 = np.array(kernel0).reshape(self.nz,self.ny,self.nx)
        self.kernel_op = AbicLSQOperator(self.kernel0,
                                         depth_constraint=self.constraints['depth'],
                                         smooth_components=self._smooth_components,
                                         refer_constraint=self.constraints['refer'],
                                         weights=self._weights)

예제 #23

0

파일 보기

 def process(self,
             rel_pattern="",
             func=None,
             use_parallel=False,
             verbose=False,
             **kwargs):
     if use_parallel:
         self._pool = Pool(self._num_of_cpu - 1)
     result = self._process(rel_pattern,
                            func,
                            use_parallel=use_parallel,
                            verbose=verbose,
                            **kwargs)
     if use_parallel:
         self._pool.close()
     return result

예제 #24

0

파일 보기

def makePower():
    global c
    pMin, pMax = d["power"]["pMin"], d["power"]["pMax"]

    pPath = np.linspace(pMin, pMax, frameCount)

    pool = Pool(4)

    # Get interesting c
    while True:
        subIm = JuliaTools.subImage(c=c,
                                    n=10,
                                    iters=iters / 2,
                                    r=r,
                                    p=pMin,
                                    split=split,
                                    save=False,
                                    aura=False)
        isBlackList = pool.map(subIm, coords)
        if not all(isBlackList):
            break
        else:
            c *= 0.975

    for frame in xrange(frameCount):
        subIm = JuliaTools.subImage(c=c,
                                    r=r,
                                    n=n,
                                    p=pPath[frame],
                                    iters=iters / 2,
                                    split=split)
        isBlackList = pool.map(subIm, coords)
        allBlack = all(isBlackList)

        if not allBlack:
            JuliaTools.makeFrame(frame, n, split, coords)

    pool.close()

    JuliaTools.prepareForFFmpeg(frameCount=frameCount, loop=True)

    with open("tweet.txt", "w") as out:
        out.write("woooooooooooooooooooo")

    stop = timeit.default_timer()

    print stop - start

예제 #25

0

파일 보기

def climByAveragingPeriods(urls,              # list of (daily) granule URLs for a long time period (e.g. a year)
                    nEpochs,                  # compute a climatology for every N epochs (days) by 'averaging'
                    nWindow,                  # number of epochs in window needed for averaging
                    variable,                 # name of primary variable in file
                    mask,                     # name of mask variable
                    coordinates,              # names of coordinate arrays to read and pass on (e.g. 'lat' and 'lon')
                    maskFn=qcMask,            # mask function to compute mask from mask variable
                    averager='pixelAverage',  # averaging function to use, one of ['pixelAverage', 'gaussInterp']
                    mode='sequential',        # Map across time periods of N-days for concurrent work, executed by:
                                              # 'sequential' map, 'multicore' using pool.map(), 'cluster' using pathos pool.map(),
                                              # or 'spark' using PySpark
                    numNodes=1,               # number of cluster nodes to use
                    nWorkers=4,               # number of parallel workers per node
                    averagingFunctions=AveragingFunctions,    # dict of possible averaging functions
                    legalModes=ExecutionModes  # list of possiblel execution modes
                   ):
    '''Compute a climatology every N days by applying a mask and averaging function.
Writes the averaged variable grid, attributes of the primary variable, and the coordinate arrays in a dictionary.
***Assumption:  This routine assumes that the N grids will fit in memory.***
    '''
    try:
        averageFn = averagingFunctions[averager]
    except :
        averageFn = average
        print >>sys.stderr, 'climatology: Error, Averaging function must be one of: %s' % str(averagingFunctions)

    urlSplits = [s for s in fixedSplit(urls, nEpochs)]
    if VERBOSE: print >>sys.stderr, urlSplits

    def climsContoured(urls):
        n = len(urls)
        var = climByAveraging(urls, variable, mask, coordinates, maskFn, averageFn)
        return contourMap(var, variable, coordinates, n, urls[0])

    if mode == 'sequential':
        plots = map(climsContoured, urlSplits)
    elif mode == 'multicore':
        pool = Pool(nWorkers)
        plots = pool.map(climsContoured, urlSplits)        
    elif mode == 'cluster':
        pass
    elif mode == 'spark':
        pass

    plots = map(climsContoured, urlSplits)
    print plots
    return plots

예제 #26

0

파일 보기

파일: TopHatUtil.py 프로젝트: kbaseapps/kb_tophat2

    def _process_set_reads_library(self, input_object_info, genome_index_base,
                                   result_directory, cli_option_params):
        """
        _process_set_reads_library: process set reads library
        """

        reads_refs = self.fetch_reads_refs_from_sampleset(
            input_object_info['ref'], input_object_info['info'])

        set_object_name = input_object_info['info'][1]
        alignment_set_name = set_object_name + cli_option_params[
            'alignment_set_suffix']

        arg_1 = []
        arg_2 = [genome_index_base] * len(reads_refs)
        arg_3 = [result_directory] * len(reads_refs)
        arg_4 = []
        conditions = []
        for reads_ref in reads_refs:
            reads_input_object_info = self._get_input_object_info(
                reads_ref['ref'])
            option_params = cli_option_params.copy()
            option_params['reads_condition'] = reads_ref['condition']
            conditions.append(reads_ref['condition'])
            arg_1.append(reads_input_object_info)
            arg_4.append(option_params)

        cpus = min(cli_option_params.get('num_threads'),
                   multiprocessing.cpu_count())
        pool = Pool(ncpus=cpus)
        log('running _process_alignment_object with {} cpus'.format(cpus))

        reads_alignment_object_refs = pool.map(
            self._process_single_reads_library, arg_1, arg_2, arg_3, arg_4)

        for reads_alignment_object_ref in reads_alignment_object_refs:
            if reads_alignment_object_ref.startswith('ERROR'):
                error_msg = 'Caught exception in worker\n'
                error_msg += '{}'.format(reads_alignment_object_ref)
                raise ValueError(error_msg)

        workspace_name = cli_option_params['workspace_name']
        reads_alignment_set_object_ref = self._save_alignment_set(
            reads_alignment_object_refs, workspace_name, alignment_set_name,
            conditions)

        return reads_alignment_set_object_ref

예제 #27

0

파일 보기

파일: predict.py 프로젝트: sanchezy/division_detection

def sparsify_predictions(model_name, timepoints=None):
    """ Computes and saves sparse representations for model predictions

    Args:
      model_name: name of model whose predictions you wish to sparsify
      timepoints: list of timepoints to sparsify - [int]
    """
    from ipp_tools.slurm import slurm_map
    from division_detection.constants import NUM_TIMEPOINTS

    pred_dir = '/nrs/turaga/bergera/division_detection/prediction_outbox/{}'.format(model_name)
    sparse_pred_dir = '{}/sparse'.format(pred_dir)

    if not os.path.exists(sparse_pred_dir):
        os.mkdir(sparse_pred_dir)

    existing_tps = set([int(fname[:-3]) for fname in os.listdir(sparse_pred_dir)])

    if timepoints is None:
        timepoints = np.arange(3, NUM_TIMEPOINTS - 4)

    timepoints = [t for t in timepoints if t not in existing_tps]


    def _sparsify_predictions_helper(t_idx):
        """ Helper function that sparsifies a single timepoint.
        """
        from division_detection.sparse_utils import save_dense_as_coo
        pred_dir = '/nrs/turaga/bergera/division_detection/prediction_outbox/{}'.format(model_name)
        dense_pred_dir = '{}/dense'.format(pred_dir)
        sparse_pred_dir = '{}/sparse'.format(pred_dir)

        if not os.path.exists('{}/{}.h5'.format(dense_pred_dir, t_idx)):
            warn('You asked me to sparsify predictions for {} but none exist'.format(t_idx))

        try:
            with h5py.File('{}/{}.h5'.format(dense_pred_dir, t_idx), 'r') as prediction_file:
                predictions = prediction_file['predictions']
                print("Loading ", t_idx)
                tp_preds = predictions[:]
                print("Saving ", t_idx)
                save_dense_as_coo(tp_preds, '{}/{}'.format(sparse_pred_dir, t_idx))
        except OSError as os_err:
            warn("Caught OS error while trying to read {}; continuing".format(t_idx))

    pool = Pool(20)
    pool.map(_sparsify_predictions_helper, timepoints)

예제 #28

0

파일 보기

파일: gaincalc_oneset.py 프로젝트: zzuiekongning/FaultMap

def run(non_iter_args, do_multiprocessing):
    [
        weightcalcdata,
        weightcalculator,
        box,
        startindex,
        size,
        newconnectionmatrix,
        method,
        boxindex,
        filename,
        headerline,
        writeoutput,
    ] = non_iter_args

    partial_gaincalc_oneset = partial(
        calc_weights_oneset,
        weightcalcdata,
        weightcalculator,
        box,
        startindex,
        size,
        newconnectionmatrix,
        method,
        boxindex,
        filename,
        headerline,
        writeoutput,
    )

    if do_multiprocessing:
        pool = Pool(processes=pathos.multiprocessing.cpu_count())
        pool.map(partial_gaincalc_oneset, weightcalcdata.causevarindexes)

        # Current solution to no close and join methods on ProcessingPool
        # https://github.com/uqfoundation/pathos/issues/46

        s = pathos.multiprocessing.__STATE["pool"]
        s.close()
        s.join()
        pathos.multiprocessing.__STATE["pool"] = None

    else:
        for causevarindex in weightcalcdata.causevarindexes:
            partial_gaincalc_oneset(causevarindex)

    return None

예제 #29

0

파일 보기

파일: music_lsseqgan.py 프로젝트: kcrouch/seqgan-music

def calculate_bleu(sess, trainable_model, data_loader):
    # bleu score implementation
    # used for performance evaluation for pre-training & adv. training
    # separate true dataset to the valid set
    # conditionally generate samples from the start token of the valid set
    # measure similarity with nltk corpus BLEU
    smoother = SmoothingFunction()

    data_loader.reset_pointer()
    bleu_avg = 0

    references = []
    hypotheses = []

    for it in xrange(data_loader.num_batch):
        batch = data_loader.next_batch()
        # predict from the batch
        # TODO: which start tokens?
        # start_tokens = batch[:, 0]
        start_tokens = np.array([START_TOKEN] * BATCH_SIZE, dtype=np.int64)
        prediction = trainable_model.predict(sess, batch, start_tokens)

        # argmax to convert to vocab
        #prediction = np.argmax(prediction, axis=2)

        # cast batch and prediction to 2d list of strings
        batch_list = batch.astype(np.str).tolist()
        pred_list = prediction.astype(np.str).tolist()
        references.extend(batch_list)
        hypotheses.extend(pred_list)

    bleu = 0.

    # calculate bleu for each predicted seq
    # compare each predicted seq with the entire references
    # this is slow, use multiprocess
    def calc_sentence_bleu(hypothesis):
        return sentence_bleu(references,
                             hypothesis,
                             smoothing_function=smoother.method4)

    if __name__ == '__main__':
        p = Pool()
        result = (p.map(calc_sentence_bleu, hypotheses))
    bleu = np.mean(result)

    return bleu

예제 #30

0

파일 보기

파일: padder.py 프로젝트: yanske/PdfPadder

def pad_pdf(path, ratio, output_path=None):
    """Pad PDF with a <ratio>% white margin increase on the right.

  Takes a path to the original PDF file, converts them to PIL images,
  and pads them with the appropriate whitespace. Returns a path to the
  padded PDF.

  If a valid output_path is given, it will move the PDF to the given path
  and return the path.
  """

    images = pdf2image.convert_from_path(path)

    p = Pool(4)

    def overlay_and_store(img):
        """Pad the individual images by overlaying it on a white background.

    Passed to a multiprocessing pool as each individual PDF page is
    independent of each other. Saves the image in a temp path as a JPEG,
    and returns the absolute file path.
    """

        w, h = img.size
        padded_img = Image.new("RGB", (int(w * (1.0 + ratio)), h), "white")
        padded_img.paste(img, (0, 0))

        tmp_path = _generate_tmp_path(ext='.jpeg')
        padded_img.save(tmp_path, "JPEG")
        return tmp_path

    padded_images = p.map(overlay_and_store, images)

    # Output as PDF.
    output = _generate_tmp_path(ext='.pdf')
    with open(output, 'wb') as f:
        f.write(img2pdf.convert(padded_images))

    # Clean up temp image files used.
    for tmp_img in padded_images:
        os.remove(tmp_img)

    if output_path:
        os.rename(output, output_path)
        return output_path

    return output

예제 #31

0

파일 보기

파일: LMPC.py 프로젝트: TESLA-Self-Driving-Car/Racing-Learning-Model-Predictive-Control

    def __init__(self, numSS_Points, numSS_it, N, Qslack, Q, R, dR, n, d, shift, dt, track_map, Laps, TimeLMPC, Solver):
        """Initialization
        Arguments:
            numSS_Points: number of points selected from the previous trajectories to build SS
            numSS_it: number of previois trajectories selected to build SS
            N: horizon length
            Q,R: weight to define cost function h(x,u) = ||x||_Q + ||u||_R
            dR: weight to define the input rate cost h(x,u) = ||x_{k+1}-x_k||_dR
            n,d: state and input dimensiton
            shift: given the closest point x_t^j to x(t) the controller start selecting the point for SS from x_{t+shift}^j
            track_map: track_map
            Laps: maximum number of laps the controller can run (used to avoid dynamic allocation)
            TimeLMPC: maximum time [s] that an lap can last (used to avoid dynamic allocation)
            Solver: solver used in the reformulation of the LMPC as QP
        """
        self.numSS_Points = numSS_Points
        self.numSS_it     = numSS_it
        self.N = N
        self.Qslack = Qslack
        self.Q = Q
        self.R = R
        self.dR = dR
        self.n = n
        self.d = d
        self.shift = shift
        self.dt = dt
        self.track_map = track_map
        self.Solver = Solver            
        self.clustering = None
        self.OldInput = np.zeros((1,d))

        # Initialize the following quantities to avoid dynamic allocation
        # TODO: is there a more graceful way to do this in python?
        NumPoints = int(TimeLMPC / dt) + 1
        self.TimeSS  = 10000 * np.ones(Laps).astype(int)        # Time at which each j-th iteration is completed
        self.SS      = 10000 * np.ones((NumPoints, n, Laps))    # Sampled Safe SS
        self.uSS     = 10000 * np.ones((NumPoints, d, Laps))    # Input associated with the points in SS
        self.Qfun    =     0 * np.ones((NumPoints, Laps))       # Qfun: cost-to-go from each point in SS
        # TODO replace with after-the-fact mapping?
        self.SS_glob = 10000 * np.ones((NumPoints, n, Laps))    # SS in global (X-Y) used for plotting

        # Initialize the controller iteration
        self.it      = 0

        # Initialize pool for parallel computing used in the internal function _LMPC_EstimateABC
        # TODO this parameter should be tunable
        self.p = Pool(4)

예제 #32

0

파일 보기

파일: dataset_utils.py 프로젝트: Optimus1072/ICNet-tensorflow

def _process_index_to_index(filename,
                            names_map,
                            orig_map,
                            dist_map,
                            must_have_index=-1,
                            must_have_percent=0.0,
                            size=()):
    img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)

    if len(size) == 2:
        img = cv2.resize(img, size, interpolation=cv2.INTER_NEAREST)

    # The slowest operation is to find indeces. So it is multiprocessed
    colors_indeces = {}

    def get_indx(name):
        i = (img == orig_map[name])
        return name, i

    result = Pool(ncpus=16).map(get_indx, orig_map.keys())

    for res in result:
        name = res[0]
        idx = res[1]
        colors_indeces[name] = idx
    ######

    for orig_name, name in names_map.items():
        index = dist_map[name]

        i = colors_indeces[orig_name]
        img[i] = index

    if must_have_index >= 0:

        if must_have_percent > 0.0:

            percent = float(np.count_nonzero(
                img == must_have_index)) / (img.shape[0] * img.shape[1])
            if percent < must_have_percent:
                return False, None

        else:
            if not must_have_index in img:
                return False, None

    return True, img

예제 #33

0

파일 보기

파일: expected_error_reduction.py 프로젝트: SimiPixel/pool_based_active_learning

    def make_query(self, size=1):

        ## quit if nr_unlabeled_samples = 1
        if self.dataset.len_unlabeled() == 1:
            return self.dataset.get_unlabeled_entries()[0].astype(int)

        ## Set the possible labels
        self.possible_labels = list(set(self.dataset.get_labeled_entries()[1]))

        ## Train the model
        self.model.train(self.dataset)

        ## Get probabilities
        X_ids, X = self.dataset.get_unlabeled_entries()
        pred = self.model.predict_proba(
            X)  # pred.shape = (n_unlabeled, nr_of_labels)

        ## Setup pool for cpu parallelisation
        p = Pool(cpu_count(), maxtasksperchild=1000)

        ## nr of unlabeled samples -> len(X)

        ## Get uncertainty after adding every sample with every label
        total = np.asarray(
            p.map(self._eer, X_ids,
                  len(X) * [self.dataset],
                  len(X) * [self.depth]))
        # total.shape = (n_unlabeled, nr_of_labels)

        ## Close the Pool again
        p.close()
        p.join()
        p.clear()

        ## Get the total uncertainty of one sample after adding a label weighted by the labels probability
        total = np.inner(
            pred,
            total,
        ).diagonal()  # total.shape = (n_unlabeled,)

        ## Zip it
        total = zipit(X_ids, total)

        ## Sort it
        results = sort_by_2nd(total, 'min')

        return results[:size, 0].astype(int)

예제 #34

0

파일 보기

파일: detect_skipped_exons.py 프로젝트: daaaaande/FUCHS

    def run(self):

        files = os.listdir(self.folder)
        outfile_bed = self.outfile.replace('.txt', '.bed')

        output_file = open(self.outfile, 'w')
        output_file.write('circle_id\ttranscript_id\tskipped_exon\tintron\tread_names\tsplice_reads\texon_reads\n')
        output_file.close()

        output_file = open(outfile_bed, 'w')
        output_file.write('# bed12 format\n')
        output_file.close()

        from pathos.multiprocessing import ProcessingPool as Pool

        p = Pool(self.cpus)
        p.map(self.run_parallel, files)

예제 #35

0

파일 보기

파일: BDTSignificanceCollector.py 프로젝트: adarshp/Dark-Matter-at-100-TeV

    def collect_significances(self):
        with open(self.filename, 'w') as f:
            f.write(
                "Higgsino mass,Bino mass,Discovery Significance,Exclusion Limit\n"
            )

        def get_disc_sig(signal, classifier, bdt_cut):
            try:
                table = BDTCutFlowTable(signal, classifier, bdt_cut)
                calc = table.initialize_significance_calculator()
                sig = calc.calculate_discovery_significance('bdt')
                return sig
            except:
                pass

        def get_excl_lim(signal, classifier, bdt_cut):
            try:
                table = BDTCutFlowTable(signal, classifier, bdt_cut)
                calc = table.initialize_significance_calculator()
                lim = calc.calculate_exclusion_limit('bdt')
                return lim
            except:
                pass

        mySignals = self.signals

        pbar = tqdm(total=len(mySignals) / 8)

        def write_sigs(signal):
            try:
                classifier = Classifier(signal.mass_combination_tuple)
                discs = map(lambda x: get_disc_sig(signal, classifier, x),
                            np.arange(-10, 10, 0.1))
                excls = map(lambda x: get_excl_lim(signal, classifier, x),
                            np.arange(-10, 10, 0.1))

                with open(self.filename, 'a') as f:
                    f.write("{},{},{},{}\n".format(signal.higgsino_mass,
                                                   signal.bino_mass,
                                                   max(discs), max(excls)))
                pbar.update(1)
            except:
                pass

        p = Pool(8)
        p.map(write_sigs, mySignals)

예제 #36

0

파일 보기

파일: data.py 프로젝트: nathanwang000/NBA_trajectory_representation

def parallel_bball_data_helper(dataset, savedir, cpus=30, traj_per_file=10):
    # todo: deal with invalid data after data transformation
    result_list = []
    pool = Pool(cpus)

    tasks_per_cpu = max(math.ceil(len(dataset) / cpus), traj_per_file)
    # make tasks a multiple of traj_per_file
    tasks_per_cpu = math.ceil(tasks_per_cpu / traj_per_file) * traj_per_file

    # save meta information
    joblib.dump({
        'len': len(dataset),
        'traj/file': traj_per_file
    }, os.path.join(savedir, 'meta.pkl'))

    index = 0
    i = 0
    while index < len(dataset):
        indices = range(index, min(index + tasks_per_cpu, len(dataset)))
        index = index + tasks_per_cpu

        result_list.append(
            pool.apply_async(func=save_bball_data_helper,
                             args=(dataset, indices, savedir, i,
                                   traj_per_file)))
        i += int(tasks_per_cpu / traj_per_file)

    while True:
        try:

            def call_if_ready(result):
                if result.ready():
                    result.get()
                    return True
                else:
                    return False

            done_list = list(map(call_if_ready, result_list))
            print('{}/{} done'.format(sum(done_list), len(result_list)))
            if np.all(done_list):
                break
            time.sleep(3)
        except:
            pool.terminate()
            raise
    print('finished preprocessing')

예제 #37

0

파일 보기

파일: countlhs.py 프로젝트: wolfiex/DSMACC-testing

from pathos.multiprocessing import Pool
import os

tsne = True

try:
    ncores = int(os.popen('qstat -f $PBS_JOBID | grep resources.used.ncpus').read().split(' ')[-1])

except:
    ncores=1
print 'multiprocessing on ' , ncores
pool = Pool(ncores)

from collections import Counter
import glob


type = ['night','day','*']

for t in type:

    f = glob.glob('lhsgroup/*_gps.'+t)
    lf = len(f)
    print 'nfiles',lf

    def readme(x):
        return ['-'.join(set(i.strip().split('-')))  for i in tuple(open(x))]

    batch = pool.map(readme,f)

예제 #38

0

파일 보기

파일: phase_fluc_corrs.py 프로젝트: jajcayn/multi-scale

def _hilbert_ssa(args):
    i, j, data = args
    if not np.any(np.isnan(data)):
        ssa = ssa_class(data, M = 12)
        _, _, _, rc = ssa.run_ssa()
        real_part = rc[:, 0] + rc[:, 1]
        imag_part = np.imag(hilbert(real_part))
        phase_hilb_rc = np.arctan2(imag_part[12:-12], real_part[12:-12])
    else:
        phase_hilb_rc = np.nan
        
    return i, j, phase_hilb_rc


pool = Pool(NUM_WORKERS)
net.wavelet(1, 'y', pool = pool, cut = 1)

# Hilbert on RC SSA fluctuations
# args = [ (i, j, net.data[:, i, j]) for i in range(net.lats.shape[0]) for j in range(net.lons.shape[0]) ]
# results = pool.map(_hilbert_ssa, args)
# for i, j, res in results:
#     net.phase[:, i, j] = res

net.get_continuous_phase(pool = pool)
net.get_phase_fluctuations(rewrite = True, pool = pool)
pool.close()
pool.join()

# index_correlations = {}
# index_datas = {}

예제 #39

0

파일 보기

파일: scale_nets-synchronization.py 프로젝트: jajcayn/multi-scale

WORKERS = 5
NUM_SURRS = 100
to_do_periods = np.arange(2,15.5,0.5)
net = ScaleSpecificNetwork('/Users/nikola/work-ui/data/NCEP/air.mon.mean.levels.nc', 
                                    'air', date(1950,1,1), date(2014,1,1), None, None, 
                                    level = 0, dataset="NCEP", sampling='monthly', anom=False)

synchronization = {}
for period in to_do_periods:
    print("running for %.1f period..." % (period))
    _, nao_ph, sg_nao, a_nao = load_NAOindex_wavelet_phase(date(1950,1,1), date(2014,1,1), period, anom=False)
    _, nino_ph, sg_nino, a_nino = load_nino34_wavelet_phase(date(1950,1,1), date(2014,1,1), period, anom=False)
    _, sunspots_ph, sg_sunspots, a_sunspots = load_sunspot_number_phase(date(1950,1,1), date(2014,1,1), period, anom=False)
    _, pdo_ph, sg_pdo, a_pdo = load_pdo_phase(date(1950,1,1), date(2014,1,1), period, anom=False)
    pool = Pool(WORKERS)
    net.wavelet(period, period_unit='y', cut=2, pool=pool)
    args = [(net.phase[:, i, j], i, j, nao_ph, nino_ph, sunspots_ph, pdo_ph) for i in range(net.lats.shape[0]) for j in range(net.lons.shape[0])]
    result = pool.map(_compute_MI_synch, args)
    synchs = np.zeros((4, net.lats.shape[0], net.lons.shape[0]))
    synchs_surrs = np.zeros((NUM_SURRS, 4, net.lats.shape[0], net.lons.shape[0]))
    for i, j, naos, ninos, suns, pdos in result:
        synchs[0, i, j] = naos
        synchs[1, i, j] = ninos
        synchs[2, i, j] = suns
        synchs[3, i, j] = pdos
    for surr in range(NUM_SURRS):
        sg_nao.construct_fourier_surrogates(algorithm='FT')
        sg_nao.add_seasonality(a_nao[0], a_nao[1], a_nao[2])
        sg_nao.wavelet(period, period_unit="y", cut=2)
        sg_nino.construct_fourier_surrogates(algorithm='FT')

예제 #40

0

파일 보기

파일: test_mpmap_dill.py 프로젝트: WarrenWeckesser/pathos

#!/usr/bin/env python

from pathos.multiprocessing import Pool
import dill
import pickle #FIXME: multiprocessing needs cPickle + copy_reg
pool = Pool()

# pickle fails for nested functions
def adder(augend):
  zero = [0]
  def inner(addend):
    return addend+augend+zero[0]
  return inner

# test the pickle-ability of inner function
add_me = adder(5)
pinner = pickle.dumps(add_me)
p_add_me = pickle.loads(pinner)
assert add_me(10) == p_add_me(10)

# pickle fails for lambda functions
squ = lambda x:x**2

# test the pickle-ability of inner function
psqu = pickle.dumps(squ)
p_squ = pickle.loads(psqu)
assert squ(10) == p_squ(10)

# if pickle works, then multiprocessing should too
print "Evaluate 10 items on 2 proc:"
pool.ncpus = 2

예제 #41

0

파일 보기

파일: search.py 프로젝트: markm541374/GPc

def multiPESVS(ojf,lb,ub,ki,s,b,cfn,lsl,lsu,fnames):
    def f(fn):
        return PESVS(ojf,lb,ub,ki,s,b,cfn,lsl,lsu,fn)
    p = Pool(nproc)
    return p.map(f,fnames)

예제 #42

0

파일 보기

파일: __main__.py 프로젝트: nmante/image_deduplication

def generate_output(args):
    """ Main application Driver
        
        1. Partition filenames into smaller chunks/arrays of image filenames
        2. Generate worker processes
        3. Pass the chunks to the workers
        4. Each worker deduplicates it's set of image files 
        5. Merge the results from each worker to one python dictionary
        6. OPTIONAL -- Output the deduplicated image files to a directory 
    """
    

    # Partition the list of filenames
    num_chunks = args.num_jobs 
    
    # Create a pool of worker threads
    # Each worker will deduplicate a set of images
    filenames = [] 
    metadata = None
    end_str = ""
    if args.json_metadata != None:
        metadata,filenames = process_json_file(args.json_metadata)
        end_str = "from metadata file: %s" % args.json_metadata
    else:
        # Find all image files in dump directory
        filenames = find_all_images(args.dump_dir)
        end_str = "from directory: %s" % args.dump_dir

    file_chunks = partition_filenames(filenames, num_chunks)
    print("Found {} images in directory: {}".format(len(filenames), end_str))

    """
    metadata_results = []
    file_chunk_list = list(file_chunks)
    num_proc = len(file_chunk_list)
    print >> sys.stderr, "Printing file chunks"
    print >> sys.stderr, file_chunks
    pool2 = Pool(processes = num_proc)
    with open(args.json_metadata) as json_metadata_file:
        metadata_results = [pool2.map(process_json_line,json_metadata_file, chunk) for index, chunk in enumerate(file_chunk_list)]
    #objs = [p.get() for p in results] 
    metadata =  merge_exact_duplicates(metadata_results)
    """
    
    pool = Pool(processes=num_chunks)

    # Pass the partitions to each thread
    results = []
    final_dictionary = {}
    if not args.near_duplicates:
        if args.num_jobs == 1:
            # If we're only using one worker, don't make overhead of starting a process
            result = exact_deduplicate_images(filenames)
            dictionaries = [result]
        else:
            # Get the results from each worker
            results = [pool.apply_async(exact_deduplicate_images, args=(index,chunk,)) 
                    for index, chunk in enumerate(file_chunks)]
            dictionaries = [p.get() for p in results]

        # Merge the results into one dictionary
        final_dictionary = merge_exact_duplicates(dictionaries)
    else:
        if args.num_jobs == 1:
            # If we're only using one worker, don't make overhead of starting a process
            result = near_deduplicate_images(filenames, args.bit_distance, metadata = metadata)
            near_duplicate_objects = [result]
        else:
            # Get the results from each near duplicate worker
            if metadata != None:
                results = [pool.apply_async(near_deduplicate_images, (chunk,args.bit_distance, ), dict(metadata=metadata)) for chunk in file_chunks] 
            else:
                results = [pool.apply_async(near_deduplicate_images, (chunk,args.bit_distance,))  for chunk in file_chunks] 
            # create an array of near duplicate objects
            near_duplicate_objects = [p.get() for p in results]

        # Merge the dictionaries together using the info from its corresponding indexes
        final_dictionary = merge_near_duplicates(near_duplicate_objects)

    print("Number of images prior to deduplication: {}".format(len(filenames)), file=sys.stderr)
    print("Number of images after deduplication: {}".format(len(final_dictionary)), file=sys.stderr)
    
    # Write the image locations to an output file
    
    
    if args.output_json != None:
        # TODO
        # For now, just do this with exact duplicates
        # Dumping the simhash class to JSON doesn't work because the object isn't
        # JSON serializable 
        outfile_name = args.output_json
        print("Writing to image dictionary to file: {}".format(outfile_name))
        with open(outfile_name, 'w') as outfile:
            json.dump(final_dictionary, outfile, indent=4, skipkeys=True, default=str)

    # Copy the images to an output directory
    create_output_image_directory(args, final_dictionary)

    return len(final_dictionary), len(filenames) - len(final_dictionary)

예제 #43

0

파일 보기

파일: search.py 프로젝트: markm541374/GPc

def multiPESIS(ojf,lb,ub,ki,b,fnames):
    def f(fn):
        return PESIS(ojf,lb,ub,ki,b,fn)
    p = Pool(nproc)
    return p.map(f,fnames)

예제 #44

0

파일 보기

파일: train_reg_simple.py 프로젝트: zhaw/char-recognition

import mxnet as mx
import os
import numpy as np
import random
import cv2
import symbol
import cPickle as pickle

from PIL import Image
from PATH import *
from pathos.multiprocessing import Pool
pool = Pool(12)


alpha = 1e-2
batch_size = 480
ctx = mx.gpu(1)
n = len(os.listdir(DATAPATH))
n = 1500000 
imgout = mx.nd.zeros([1,3,384,384], ctx)
anno = mx.nd.zeros([1,37,384,384], ctx)
anno_np = np.zeros([1,37,384,384])
reg_anno = mx.nd.zeros([1,74,384,384], ctx)
reg_anno_np = np.zeros([1,74,384,384])
cls_grad = mx.nd.zeros([1,37,384,384], ctx)
reg_grad = mx.nd.zeros([1,74,384,384], ctx)
reg_grad_np = np.zeros([1,74,384,384])


def get_image():
    result = []

예제 #45

0

파일 보기

파일: __init__.py 프로젝트: francescoinfante/identity

 def __init__(self, source, config, single_process=False, processes=cpu_count()):
     if single_process:
         self.results = imap(transform, izip(source, repeat(config)))
     else:
         self.pool = Pool(processes)
         self.results = self.pool.imap(transform, izip(source, repeat(config)))

예제 #46

0

파일 보기

파일: __init__.py 프로젝트: francescoinfante/identity

 def __init__(self, source, config, processes=cpu_count()):
     self._pool = Pool(processes)
     self._results = self._pool.imap(_merge_records, izip(source, repeat(config)))

예제 #47

0

파일 보기

파일: compute_scale_nets.py 프로젝트: jajcayn/multi-scale

# net.save_net('networks/NCEP-SATsurface-7-8yrs-Hilb-phase-adjmat%s.bin' % ('MPC'), only_matrix = True)


for method in METHODS:

    for scale in SCALES:

        print("Computing networks using %s method..." % (method))

        # phase
        if method in ['MIEQQ', 'MIGAU', 'MPC']:
            # net = ScaleSpecificNetwork(fname, 'air', date(1948,1,1), date(2016,1,1), None, None, level = 0, dataset = "NCEP", 
            #         sampling = 'monthly', anom = False)
            net = ScaleSpecificNetwork(fname, 't2m', date(1958,1,1), date(2014,1,1), None, None, level=None, pickled=True,
                        sampling='monthly', anom=False)
            pool = Pool(NUM_WORKERS)
            # net.get_hilbert_phase_amp(period = 90, width = 12, pool = pool, cut = 1)
            net.wavelet(scale, period_unit='m', cut=2, pool=pool)
            pool.close()
            pool.join()
            net.get_adjacency_matrix(net.phase, method = method, pool = None, use_queue = True, num_workers = NUM_WORKERS)
            net.save_net('networks/ERA-SATsurface-scale%dmonths-phase-adjmat%s.bin' % (scale, method), only_matrix = True)

        # amplitude
        if method in ['MIEQQ', 'MIGAU', 'CORR']:
            # net = ScaleSpecificNetwork(fname, 'air', date(1948,1,1), date(2016,1,1), None, None, level = 0, dataset = "NCEP", 
            #         sampling = 'monthly', anom = False)
            net = ScaleSpecificNetwork(fname, 't2m', date(1958,1,1), date(2014,1,1), None, None, level=None, pickled=True,
                        sampling='monthly', anom=False)
            pool = Pool(NUM_WORKERS)
            # net.get_hilbert_phase_amp(period = 90, width = 12, pool = pool, cut = 1)