Esempio n. 1
0
def run_executors_in_parallel(
    executor_class,
    assets,
    fifo_mode=True,
    delete_workdir=True,
    parallelize=True,
    logger=None,
    result_store=None,
    optional_dict=None,
):
    """
    Run multiple Executors in parallel.
    :param executor_class:
    :param assets:
    :param fifo_mode:
    :param delete_workdir:
    :param parallelize:
    :param logger:
    :param result_store:
    :param optional_dict:
    :return:
    """
    def run_executor(args):
        executor_class, asset, fifo_mode, \
        delete_workdir, result_store, optional_dict = args
        executor = executor_class([asset], None, fifo_mode, delete_workdir,
                                  result_store, optional_dict)
        executor.run()
        return executor

    # pack key arguments to be used as inputs to map function
    list_args = []
    for asset in assets:
        list_args.append([
            executor_class, asset, fifo_mode, delete_workdir, result_store,
            optional_dict
        ])

    # map arguments to func
    if parallelize:
        try:
            from pathos.pp_map import pp_map
            executors = pp_map(run_executor, list_args)
        except ImportError:
            # fall back
            msg = "pathos.pp_map cannot be imported for parallel execution, " \
                  "fall back to sequential map()."
            if logger:
                logger.warn(msg)
            else:
                print 'Warning: {}'.format(msg)
            executors = map(run_executor, list_args)
    else:
        executors = map(run_executor, list_args)

    # aggregate results
    results = [executor.results[0] for executor in executors]

    return executors, results
Esempio n. 2
0
def run_executors_in_parallel(executor_class,
                              assets,
                              fifo_mode=True,
                              delete_workdir=True,
                              parallelize=True,
                              logger=None,
                              result_store=None,
                              optional_dict=None,
                              optional_dict2=None,
                              ):
    """
    Run multiple Executors in parallel.
    """

    def run_executor(args):
        executor_class, asset, fifo_mode, \
        delete_workdir, result_store, optional_dict, optional_dict2 = args
        executor = executor_class([asset], None, fifo_mode,
                                  delete_workdir, result_store,
                                  optional_dict, optional_dict2)
        executor.run()
        return executor

    # pack key arguments to be used as inputs to map function
    list_args = []
    for asset in assets:
        list_args.append(
            [executor_class, asset, fifo_mode,
             delete_workdir, result_store, optional_dict, optional_dict2])

    # map arguments to func
    if parallelize:
        try:
            from pathos.pp_map import pp_map
            executors = pp_map(run_executor, list_args)
        except ImportError:
            # fall back
            msg = "pathos.pp_map cannot be imported for parallel execution, " \
                  "fall back to sequential map()."
            if logger:
                logger.warn(msg)
            else:
                print 'Warning: {}'.format(msg)
            executors = map(run_executor, list_args)
    else:
        executors = map(run_executor, list_args)

    # aggregate results
    results = [executor.results[0] for executor in executors]

    return executors, results
Esempio n. 3
0
def format_string_list(_data):
    def format_string(s):
        from nltk.tokenize import word_tokenize
        from nltk.corpus import stopwords
        from nltk.stem import PorterStemmer
        import re
        
        _stopwords_list = stopwords.words('english')
        _stemmer = PorterStemmer()
        
        s = s.lower() # case lowering
        
        # non-alphanumeric-character removal
        s = re.sub('[^0-9a-zA-Z]+', ' ', s)
        #s = re.sub('[0-9]{1,3}px', ' ', s)
        #s = re.sub(' [0-9]{1,6} |000', ' ', s)
        
        _words_list = filter(lambda x: x not in _stopwords_list, word_tokenize(s)) # stopword removal
        #_words_list = map(lambda x: _stemmer.stem(x), _words_list) # stem
        
        #lemmatizer = WordNetLemmatizer()
        #_words_list = map(lambda x: lemmatizer.lemmatize(x, pos='v'), _words_list) # lemmatizer
        
        return (' ').join(_words_list)#, _pos_tags_list
    
    print _data[:10]
    print '---------------stopword removal--------------'
    print '---------------stem--------------'
    _format = pp_map(format_string, _data)
    print _format[:10]
        
#     print '-------------POS tags---------------'
#     def get_pos_tag(s):
#         from nltk import pos_tag
#         from nltk.tokenize import word_tokenize
#         return pos_tag(word_tokenize(s))
#     _format_tokens_pos = pp_map(get_pos_tag, _format)
#     print _format_tokens_pos[0]

    return _format
Esempio n. 4
0
# print the input to screen
x = np.arange(N * nodes, dtype=np.float64)
print("Input: %s\n" % x)


# run sin2 in series, then print to screen
print("Running serial python ...")
y = map(sin2, x)
print("Output: %s\n" % np.asarray(y))


# map sin2 to the workers, then print to screen
print("Running mpi4py on %d cores..." % nodes)
y = mpi_map(sin2, x, nnodes=nodes)
print("Output: %s\n" % np.asarray(y))


# map sin2 to the workers, then print to screen
print("Running multiprocesing on %d processors..." % nodes)
y = mp_map(sin2, x, nproc=nodes)
print("Output: %s\n" % np.asarray(y))


# map sin2 to the workers, then print to screen
print("Running parallelpython on %d cpus..." % nodes)
y = pp_map(sin2, x, ncpus=nodes, servers=('mycpu.mydomain.com',))
print("Output: %s\n" % np.asarray(y))

# EOF