def test_map_item_limit(self): TOO_BIG_COUNT = 100 conf = pywren.wrenconfig.default() if 'scheduler' not in conf: conf['scheduler'] = {} conf['scheduler']['map_item_limit'] = TOO_BIG_COUNT wrenexec = pywren.default_executor(config=conf) def plus_one(x): return x + 1 N = 10 x = np.arange(N) futures = wrenexec.map(plus_one, x) pywren.get_all_results(futures) # now too big with pytest.raises(ValueError) as excinfo: x = np.arange(TOO_BIG_COUNT + 1) futures = wrenexec.map(plus_one, x)
def create_wordcloud_pywren(bucket_name): S3BUCKET = bucket_name links = pickle.load(open('links.pickle', 'rb')) wrenexec = pywren.default_executor() futures = wrenexec.map(wordcloud, split_list(links, wanted_parts=100), invoke_pool_threads=128, extra_env={'S3BUCKET': S3BUCKET}) pywren.get_all_results(futures)
def get_s3_tar_structure(prefix, bucket='imagenet2datav2', verbose=True): import pywren keys = list_all_keys(prefix, bucket=bucket) if verbose: print('Found {} keys:'.format(len(keys))) for k in keys[:10]: print(' ' + k) print(' ...') print('Starting PyWren ...') start = timer() pwex = pywren.default_executor() futures = pwex.map(list_files_in_s3_tarball, keys) results = pywren.get_all_results(futures) end = timer() if verbose: print('Done, took {} seconds'.format(end - start)) assert len(results) == len(keys) final_res = {} for ii in range(len(keys)): final_res[keys[ii]] = results[ii] return final_res
def main(): imgnt = imagenet.ImageNetData() wnids = list(imgnt.train_imgs_by_wnid.keys()) train_tarball_names = get_tarball_names(wnids, 'imagenet-train/') val_tarball_names = get_tarball_names(wnids, 'imagenet-validation/val-') def flatten_train_tarball(tarball_name): return flatten_tarball(tarball_name, prefix="imagenet-train-individual/") def flatten_val_tarball(tarball_name): return flatten_tarball(tarball_name, prefix="imagenet-validation-individual/") pwex = pywren.default_executor() futures = pwex.map(flatten_val_tarball, val_tarball_names) failed_wnids = [] for future, wnid in zip(futures, wnids): try: future.result() except: failed_wnids.append(wnid) print('wnid failed', wnid) print(failed_wnids) results = pywren.get_all_results(futures)
def make_polygons_parallel(): ''' Parallelized obtaining of geojsons. ''' results = get_geojsons() futures = pwex.map(make_polygons, results) got_futures = pywren.get_all_results(futures) return got_futures
def get_geojsons_paralell(): ''' ''' pwex = pywren.default_executor() futures = pwex.map(get_geojsons, GEOJSONS) geojsons = pywren.get_all_results(futures) return geojsons
def test_simple_map(self): def plus_one(x): return x + 1 x = np.arange(4) futures = self.wrenexec.map(plus_one, x) res = pywren.get_all_results(futures) np.testing.assert_array_equal(res, x + 1)
def test_get_all_results(self): def plus_one(x): return x + 1 N = 10 x = np.arange(N) futures = self.wrenexec.map(plus_one, x) res = np.array(pywren.get_all_results(futures)) np.testing.assert_array_equal(res, x + 1)
def brute_force_split(pin, n): split = len(pin) // n l = [pin[i * n:(i + 1) * n] for i in range(split)] print('Executing a lambda for each item in - ' + str(l)) pwex = pywren.default_executor() # Executing async in parallel futures = pwex.map(brute_force_chars, l) return pywren.get_all_results(futures)
def main(thread, log=False): global outf global total_time # Initialize model print("Starting Training" + '-' * 30) start_time = time.time() fs = [] fin = batch_file_size # start jobs minibatches = get_minibatches(fin) futures = start_batch(minibatches) fin = 0 iter = 0 thread.start() print("Main thread start") while time.time() - start_time < total_time: print("hit", time.time() - start_time) # Store model fin = 0 res = [] ded = [] try: pywren.get_all_results(futures) except: continue fin = len(futures) iter += fin if fin > 0: print("Processed: %d" % fin) minibatches = get_minibatches(fin) futures = start_batch(minibatches) print("Main thread has stopped")
import pywren import time import numpy as np from pywrenext.progwait import progwait wrenexec = pywren.default_executor() def sleep(x): time.sleep(x) return x futures = wrenexec.map(sleep, np.arange(10) + 10) futures_done, _ = progwait(futures) print(pywren.get_all_results(futures_done))
import pywren import numpy as np import math import boto3 import pickle def my_function(x): return x + 7 wrenexec = pywren.default_executor() futures = wrenexec.map(my_function, range(10)) pywren.get_all_results(futures)
if __name__ == '__main__': t0 = time.time() # Get number of pages to iterate over pages = html_soup.select('li.current') pages = int(pages[0].getText().split()[-1]) # Get a list of URLs to map over catalogue_urls = [ base_url + f'catalogue/page-{i}.html' for i in range(1, pages + 1) ] # Scrape every result page using Lambda book_list = pywren.get_all_results(pwex.map(scrape_books, catalogue_urls)) book_list = list(itertools.chain(*book_list)) # flatten list db['books'].insert_many(book_list) t1 = time.time() print(f'Finished finding books in {t1 - t0} seconds') # Scrape each book batch_size = int(sys.argv[1]) lambda_tasks, result = [], [] for i in range(0, batch_size): chunk = book_list[i::batch_size] book_ids = [b['book_id'] for b in chunk] lambda_tasks.append(pwex.map(scrape_book, book_ids)) # Wait for all scrapes to complete
def mmap(pwex, func, data): futures = pwex.map(func, data) res = pywren.get_all_results(futures) return res
index += fin fs.extend(start_batch(minibatches)) fin = 0 iter = 0 while iter < 100: # Store model fin = 0 res = [] ded = [] t = time.time() print(pywren.get_all_results(fs)) print(time.time() - t) exit() exit() print("Start pool") t = time.time() pool = ThreadPool(6) resa = [] resa = pool.map(m, fs) print("End pool: %f" % (time.time() - t)) res = [] for a in resa:
import pywren import numpy as np import sys def my_function(x): return x + 7 print(my_function(3)) def version(x): return sys.version_info[0:2] print(version(None)) wrenexec = pywren.default_executor() future = wrenexec.call_async(version, 3) print(future.result()) futures = wrenexec.map(my_function, range(10)) print(pywren.get_all_results(futures))