def timing_triple_cloud(): execfile('picloud_venture_credentials.py') exp_params = experiment.exp_param_defaults({}) exp_params['intermediate_iter'] = 1 exp_params['max_initial_run_time'] = 30 exp_params['max_burn_time'] = 30 exp_params['max_sample_time'] = 30 exp_params['n_samples'] = 25 print experiment.exp_params_to_str(exp_params) data = scipy.io.loadmat("../data/irm_synth/irm_synth_20.mat", squeeze_me=True) observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat)) missing = list(zip(data['test_i'].flat, data['test_j'].flat, data['test_v'].flat)) data = {'observations' : observed, 'missing' : missing} model = models.product_IRM model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True} # Timing run print 'Timing' job_id = cloud.call(experiment.network_cv_timing_run, data, model, exp_params, model_params, _max_runtime=5, _env=cloud_environment) time_per_mh_iter = cloud.result(job_id)['time_per_mh_iter'] # Live run print 'Live' exp_params['intermediate_iter'] = max(1, int(round(0.9 * exp_params['max_sample_time'] / (exp_params['n_samples'] * time_per_mh_iter)))) job_id = cloud.call(experiment.network_cv_single_run, data, model, exp_params, model_params, _max_runtime=5, _env=cloud_environment) cloud.join(job_id) print cloud.result(job_id)
def picloud(): t1 = time.time() jids = cloud.map(testfunc, np.arange(K), _type="f2", _vol="my-vol", _env="base/precise") # get the results cloud.result(jids) t2 = time.time() return t2 - t1
def test_picloud(cloud, n, fast): begin = time.time() jid = cloud.call(get_prime, n, _high_cpu=True) cloud.result(jid) print n , print "took " , print time.time() - begin , print "seconds"
def test_picloud(cloud, n, fast): begin = time.time() jid = cloud.call(get_prime, n, _high_cpu=True) cloud.result(jid) print n, print "took ", print time.time() - begin, print "seconds"
def picloud(): t1 = time.time() jids = cloud.map(testfunc, np.arange(K), _type='f2', _vol="my-vol", _env="base/precise") # get the results cloud.result(jids) t2 = time.time() return t2 - t1
def runcode(self, code): """Execute a code object. When an exception occurs, self.showtraceback() is called to display a traceback. All exceptions are caught except SystemExit, which is reraised. A note about KeyboardInterrupt: this exception may occur elsewhere in this code, and may not always be caught. The caller should be prepared to deal with it. """ try: job = cloud.call(cloud_run, code, self.locals) cloud.join(job) result = cloud.result(job) self.locals.update(result) info = cloud.info(job, ['stderr', 'stdout'])[job] sys.stdout.write(info['stdout']) sys.stderr.write(info['stderr']) except SystemExit: raise except KeyboardInterrupt: raise OperationAborted('Interrupted') except cloud.CloudException, e: self.showcloudtraceback(e)
def generate_items_detail_and_analytics_main_job(index, findingjobids): itemdetailjobids = [] try: itemjobids, filenames = cloud.result(findingjobids) except Exception, e: cloud.cloud.cloudLog.exception(str(e)) raise e
def generate_finding_calls(index, categoryjobid=None): findingjobids = [] try: leafcategories, filenames = cloud.result(categoryjobid) except Exception, e: cloud.cloud.cloudLog.exception(str(e)) raise e
def main(): cloud.setkey(2329, '270cb3cccb9beb65d2f424b24ccbd5a920c5ccef') try: fn = raw_input() f = open(fn) L = float(f.readline()) line = f.readline() data = [] while line: d = map(eval, line.split()) data.append(d) line = f.readline() f.close() n = len(data) Gs = np.array(data[0:n / 2]) ls = np.array(data[n / 2::]) outstr = '' outstr += 'submitting cross validation to picloud\n' cloud.config.max_transmit_data = 12000000 start = time.time() jid = cloud.call(traintst, Gs, ls, L) outstr += 'submission time: %s\n' % str(time.time() - start) start = time.time() result = cloud.result(jid) outstr += 'cloud execution time: %s\n' % str(time.time() - start) outstr += 'misclassification rate: %f\n' % np.mean(result) outstr += 'standard deviation: %f\n' % np.std(result) outstr += '--------done---------------------' print outstr except: print 'an error occurred'
def do_network_call(itemidlistlist=[], callback=None): setcloudkey() etldriver = PiCloudEBayETLDriver() filenamelist = [] try: jobids = [] for sublist in itemidlistlist: jobid = cloud.call(etl.get_item_details, etldriver=etldriver, itemid=sublist, _type="c1", _label="GET NOTIFICATION ITEM DETAILS") jobids.append(jobid) filename = make_file_name(itemid=sublist, datatype="itemdetail") filenamelist.append(filename) print jobids if len(jobids) > 0: cloud.result(jobids, ignore_errors=False) except Exception, e: print e
def main(): cloud.setkey(2329,'270cb3cccb9beb65d2f424b24ccbd5a920c5ccef') try: fn = raw_input() f = open(fn) L = float(f.readline()) line = f.readline() data = [] while line: d = map(eval,line.split()) data.append(d) line = f.readline() f.close() n = len(data) Gs = np.array(data[0:n/2]) ls = np.array(data[n/2::]) outstr = '' outstr += 'submitting cross validation to picloud\n' cloud.config.max_transmit_data=12000000 start = time.time() jid = cloud.call(traintst,Gs,ls,L) outstr += 'submission time: %s\n' %str(time.time()-start) start = time.time() result = cloud.result(jid) outstr += 'cloud execution time: %s\n' %str(time.time()-start) outstr += 'misclassification rate: %f\n' %np.mean(result) outstr += 'standard deviation: %f\n' %np.std(result) outstr += '--------done---------------------' print outstr except: print 'an error occurred'
def patentComparer(): print 'Started:' print datetime.now() #wordVectorDir = "C:\\Users\\Deepak\\Dropbox\\6.864 Project\\Word Vectors" wordVectorDir = "/Users/aratner/Dropbox/6.864 Project/Word Vectors" # Get patent claims patentData = getPatentData(200) print "Got patent data..." # Obtain the tree from the claim using the rule based parser / machine learnt model trees = [(x,hm_tree(y,3)) for (x,y) in patentData] embeddingTrees = [(x,generateEmbeddingTree(tree, wordVectorDir)) for (x,tree) in trees] #print embeddingTrees #for embeddingTree in embeddingTrees: # embeddingTree[1].draw() print "Generated trees for claims..." # Partition trees into two sets - a training set and a testing set (trainingTrees, testingTrees) = partitionTrees(embeddingTrees) # Run the unfolding recursive auto-encoder on the tree, using picloud #params = train_params(trainingTrees, math.pow(10,-5), 0.01, False) jid = cloud.call(train_params, trainingTrees, math.pow(10,-5), 0.01, True, 60, _type='c2') print 'jid = ' + str(jid) params = cloud.result(jid) save_params(params, '_finalpicloud') print 'RAE training complete, parameters saved, at:' print datetime.now() print "Done..."
def logS(N, S): job_id = cloud.call(mete.get_mete_rad, S, N, _type='m1') logSeries = cloud.result(job_id) #logSeries = mete.get_mete_rad(S, N) # The expected SAD from the random sample logSeries = np.log(logSeries[0]) plt.plot(logSeries, color='gray', lw=3, label='Log-series\nN='+str(N)+' S='+str(S)) print 'log-series: done' return
def outer_map(y): jids = cloud.map(inner_map, range(y)) cloud.join(jids) results = cloud.result(jids) return list(results)
def geoS(N, S): job_id = cloud.call(get_LowerTrunc_GeomSeries, N, S, _type='m1') geomSeries = cloud.result(job_id) geomSeries = get_LowerTrunc_GeomSeries(N, S) geomSeries = np.log(geomSeries) plt.plot(geomSeries, color='m',lw=3,label='Geometric series\nN='+str(N)+' S='+str(S)) print 'geometric series: done' return
def traintst(Gs,ls,L): m = np.shape(Gs)[0] traintest = [] for i in range(10): sl = slice(i*m//10,(i+1)*m//10) traintest.append( (np.delete(Gs,sl,0),np.delete(ls,sl,0),Gs[sl,:],ls[sl,:],L) ) jids = cloud.map(crossvalidate,traintest,_type='c1') return cloud.result(jids)
def timing_run_cloud(): execfile('picloud_venture_credentials.py') exp_params = experiment.exp_param_defaults({}) exp_params['intermediate_iter'] = 1 exp_params['max_initial_run_time'] = 30 print experiment.exp_params_to_str(exp_params) data = scipy.io.loadmat("../data/irm_synth/irm_synth_20.mat", squeeze_me=True) observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat)) missing = list(zip(data['test_i'].flat, data['test_j'].flat, data['test_v'].flat)) data = {'observations' : observed, 'missing' : missing} model = models.product_IRM model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True} job_id = cloud.call(experiment.network_cv_timing_run, data, model, exp_params, model_params, _max_runtime=5, _env=cloud_environment) cloud.join(job_id) print cloud.result(job_id)
def outer_map(y): jids=cloud.map(inner_map,range(y)) cloud.join(jids) results = cloud.result(jids) return list(results)
def run_experiments(experiment_args): if args.picloud: import cloud jids = cloud.map(run_single_experiment_wrapper, experiment_args, _env="test", _type="c2") print "Now waiting for results..." results = cloud.result(jids) return zip(experiment_args, results) else: return zip(experiment_args, [run_single_experiment(**a) for a in experiment_args])
def traintst(Gs, ls, L): m = np.shape(Gs)[0] traintest = [] for i in range(10): sl = slice(i * m // 10, (i + 1) * m // 10) traintest.append((np.delete(Gs, sl, 0), np.delete(ls, sl, 0), Gs[sl, :], ls[sl, :], L)) jids = cloud.map(crossvalidate, traintest, _type='c1') return cloud.result(jids)
def clear(prefix, remote): if (remote): k = cloud.call(delete_prefix, prefix) return cloud.result(k) else: path = util.cachePath(prefix) if (os.path.isdir(path)): sh.rmtree(path) elif (os.path.isfile(path)): os.remove(path)
def calc_pi(): """Incorrect way""" # offload monte_carlo to the cloud, returns a Job Id jid = cloud.call(monte_carlo, total_tests, _type='c2') # block until job is done, and get result num_in_circle = cloud.result(jid) pi = (4 * num_in_circle) / float(total_tests) return pi
def hessian_on_the_cloud(name, args, chunk_size = 500): print("submitting") dims = [ A.size for A in args ] + [len(args), len(args)] H = np.empty(dims, 'double') ws_product_chunked = grouper(itertools.product(*args), chunk_size) jids = cloud.map(lambda ws: _hessian_on_the_cloud(name, ws), ws_product_chunked) print("waiting") chunked_results = cloud.result(jids) print("assembling") H.flat = list(itertools.chain.from_iterable(chunked_results)) return H
def main(): jobs = [] for seed in [0, 1, 2, 3, 4]: for learner_class_index in [0, 1]: jobs.append((learner_class_index, seed)) print "Scheduling jobs..." jids = cloud.map(run, jobs, _type="f2") print "Waiting for results..." results = cloud.result(jids) for job, result in zip(jobs, results): print job, result
def retrieve(jids): naccounts = len(jids) results = [] # Retrieves the results for i in xrange(naccounts): api_key = api_keys[i] api_secretkey = api_secretkeys[i] cloud.setkey(api_key=api_key, api_secretkey=api_secretkey) print "Retrieving results for account %d..." % (i + 1) results.extend(cloud.result(jids[i])) return results
def distrubutedly_train_terms(train_terms, terms, chunk_size=1000, use_cloud=False): logging.info(u'Starting distributed training <{0}>'.format(['locally','remotely'][int(use_cloud)])) if use_cloud: import cloud job_idds = cloud.map(train_terms, terms) term_iters = cloud.result(job_ids) else: import collections chunks = collections.defaultdict(list) chunk_count = len(terms) / chunk_size + 1 for term in terms: chunks[hash(term) % chunk_count].append(term) term_iters = map(train_terms, chunks.values()) return itertools.chain.from_iterable(term_iters)
def storeEvalResult(self, task, evalResult): # because picloud returns a ticket number # we need to store off that ticket number # and then get the results of that ticket number # and that's the actual evalResult ticketId = evalResult evalResult = cloud.result(ticketId) PythonModule.PythonEval.storeEvalResult(self, task, evalResult) return
def main(): names = list(uai_net.names(500)) print "Submitting {} jobs...".format(len(names)) jids = cloud.map(run_job, names, _type="f2") print "Waiting for results..." for name, marginals_string, is_exact in cloud.result(jids): print name exact = "true" if is_exact else "approx" f = open( os.path.join(os.path.dirname(__file__), "../data/marginals/uai/{}.{}.mar".format(name, exact)), "w") f.write(marginals_string) f.close()
def Jgrad_picloud(params, encoding_size, decoded_size, training, wd, num_cores): W_e, W_d, b_e, b_d = unroll_params(params, encoding_size, decoded_size) gradW_e = np.zeros(W_e.shape) gradW_d = np.zeros(W_d.shape) gradb_e = np.zeros(b_e.shape) gradb_d = np.zeros(b_d.shape) # split the training set into batches, send out to picloud cores for backprop # offset = num_cores - len(training)%num_cores # for index in range(offset): # training. split = len(training) / num_cores final_training = [] for i in range(num_cores): final_training.append(training[i * split : (i + 1) * split]) offset = len(training) % num_cores if offset > 0: final_training.append(training[len(training) - offset :]) jids = cloud.map( Jgrad_picloud_sub, [params] * num_cores, [encoding_size] * num_cores, [decoded_size] * num_cores, final_training, _type="c2", ) # call for results results = cloud.result(jids) for result in results: gradW_e += result[0] gradW_d += result[1] gradb_e += result[2] gradb_d += result[3] # add weight decay factor and normalization coefficient a = 1.0 / len(training) grad_J_W_e = a * gradW_e + wd * W_e grad_J_W_d = a * gradW_d + wd * W_d grad_J_b_e = a * gradb_e grad_J_b_d = a * gradb_d # roll up and return as 1-d array return np.concatenate((grad_J_W_e.flatten(), grad_J_W_d.flatten(), grad_J_b_e.flatten(), grad_J_b_d.flatten()))
def calc_pi(): num_jobs = 8 tests_per_call = total_tests / num_jobs # argument list has 8 duplicate elements jids = cloud.map(monte_carlo, [tests_per_call] * num_jobs, _type='c2') # get list of all counts num_in_circle_list = cloud.result(jids) # sum all counts num_in_circle = sum(num_in_circle_list) pi = (4 * num_in_circle) / float(total_tests) return pi
def calc_pi(): num_jobs = 8 tests_per_call = total_tests/num_jobs # argument list has 8 duplicate elements jids = cloud.map(monte_carlo, [tests_per_call]*num_jobs, _type='c2') # get list of all counts num_in_circle_list = cloud.result(jids) # sum all counts num_in_circle = sum(num_in_circle_list) pi = (4 * num_in_circle) / float(total_tests) return pi
def trainIntermediateRAE(): print 'Started:' print datetime.now() #wordVectorDir = "C:\\Users\\Deepak\\Dropbox\\6.864 Project\\Word Vectors" wordVectorDir = "/Users/aratner/Dropbox/6.864 Project/Word Vectors" # Get patent claims patentData = getPatentData(500) print "Got patent data..." # Obtain the tree from the claim using the rule based parser / machine learnt model trees = [(x, hm_tree(y, 3)) for (x, y) in patentData] totalTrainingdata = [] for treeData in trees: tree = treeData[1] #tree.draw() label = treeData[0] totalTrainingdata.extend(generatePhraseTree(tree, wordVectorDir)) print "Generated trees for claims..." # Partition trees into two sets - a training set and a testing set #(trainingTrees, testingTrees) = partitionTrees(totalTrainingdata) trainingTrees = totalTrainingdata testingTrees = totalTrainingdata #for tree in trainingTrees: # if (len(tree) > 1): # tree.draw() # Run the unfolding recursive auto-encoder on the tree jid = cloud.call(train_params, trainingTrees, math.pow(10, -5), 0.01, True, 60, _type='c2') print 'jid =' + str(jid) params = cloud.result(jid) save_params(params, '_finalpicloud_subtrees') print 'RAE training complete, at:' print datetime.now() print "Done..."
def get_shares_bulk(urls,limit=-1,use_cloud=False): if use_cloud: # using picloud. parallelizing on chunks chunks = list(parallel.partitions(urls,100)) def f(url): return map(lambda url: (url,get_shares(url)),url) jids = cloud.map(f,chunks) ret = list(itertools.chain(*cloud.result(jids))) else: # local. parallelizing using pool ret = list(parallel.imap(get_shares,urls,threads=10)) ret.sort(key= lambda (u,r) : -r['shares_count']) if limit == -1: return ret else: return ret[:limit]
def call_on_cloud(cmd_params, core_type, num_batches, start_batch_num, end_batch_num): ntests = len(cmd_params) batch_size = int(math.ceil(ntests/(num_batches+0.0))) batch_edges = batch_size*np.array(xrange(num_batches))[start_batch_num : end_batch_num] print batch_edges for i in xrange(len(batch_edges)): if i==len(batch_edges)-1: cmds = cmd_params[batch_edges[i]:] else: cmds = cmd_params[batch_edges[i]:min(batch_edges[i+1], len(cmd_params))] print colorize("calling on cloud..", "yellow", True) try: jids = cloud.map(run_sim_test, cmds, _vol='rss_dat', _env='RSS3', _type=core_type) res = cloud.result(jids) print colorize("got results for batch %d/%d "%(i, len(batch_edges)), "green", True) save_results(res) except Exception as e: print "Found exception %s. Not saving data for this demo."%e
def picloud(func, *args, **kwargs): """ Runs the given function in parallel over the PiCloud cluster. Parameters ---------- func : function Function to run in parallel. In addition to the function 'func' to be run in parallel, the picloud function accepts a series of arguments that are passed to the function as variables. In general, the function can have multiple input variables, and these arguments must be passed in the same order as they are defined in the function definition. Furthermore, several keyword arguments may be given that set the settings for the PiCloud cluster: _type - Type of core used in picloud: 'c1', 'c2', 'f2' (default), 'm1', 's1' _cores - Number of cores used: 1 (default) _env - Custom environment for computation. Set to current version of qutip. _label - Provide a label for the current computation. For more information see the PiCloud website: http://www.picloud.com/ """ kw = _default_cloud_settings() for keys in kwargs.keys(): if keys not in kw.keys(): raise Exception(str(keys) + ' is not a valid kwarg.') else: kw[keys] = kwargs[keys] job_ids = cloud.map(func, *args, **kw) results = cloud.result(job_ids) if isinstance(results[0], tuple): par_return = [elem for elem in results] num_elems = len(results[0]) return [ np.array([elem[ii] for elem in results]) for ii in range(num_elems) ] else: return list(results)
def picloud(func, *args, **kwargs): """ Runs the given function in parallel over the PiCloud cluster. Parameters ---------- func : function Function to run in parallel. In addition to the function 'func' to be run in parallel, the picloud function accepts a series of arguments that are passed to the function as variables. In general, the function can have multiple input variables, and these arguments must be passed in the same order as they are defined in the function definition. Furthermore, several keyword arguments may be given that set the settings for the PiCloud cluster: _type - Type of core used in picloud: 'c1', 'c2', 'f2' (default), 'm1', 's1' _cores - Number of cores used: 1 (default) _env - Custom environment for computation. Set to current version of qutip. _label - Provide a label for the current computation. For more information see the PiCloud website: http://www.picloud.com/ """ kw = _default_cloud_settings() for keys in kwargs.keys(): if keys not in kw.keys(): raise Exception(str(keys) + ' is not a valid kwarg.') else: kw[keys] = kwargs[keys] job_ids = cloud.map(func, *args, **kw) results = cloud.result(job_ids) if isinstance(results[0], tuple): par_return = [elem for elem in results] num_elems = len(results[0]) return [np.array([elem[ii] for elem in results]) for ii in range(num_elems)] else: return list(results)
def cloud_map(func, args, jobs=None, return_jobs=False, **cloud_opts): """ Call cloud.map, with some standard logging info Parameters ---------- func : function to map args : list of mapping arguments jobs : list of pre-existing job ids, or None If present, will fetch the results from these jobs return_jobs : boolean (optional, default false) If True, return the job IDs instead of the job results cloud_opts : dict (optional) Extra keyword arguments to pass to cloud.map Returns ------- Result of cloud.map if return_jobs=False, else the job ids """ import cloud cloud_opts.setdefault('_env', 'mwp') cloud_opts.setdefault('_type', 'c2') cloud_opts.setdefault('_label', func.__name__) if jobs is None: log = logging.getLogger(func.__module__) log.debug( "Starting %i jobs on PiCloud for %s" % (len(args), func.__name__)) jobs = cloud.map(func, args, **cloud_opts) log.debug("To re-fetch results, use \n" "%s(jobs=range(%i, %i))" % (func.__name__, min(jobs), max(jobs) + 1)) if return_jobs: return jobs return cloud.result(jobs)
def url_chunker(url, chunksize=1024): """Returns an iterator over contents of a file *Params* #file - an open FILE object #chunksize - how many lines to read at once? """ #url=book[0] #bookname=book[1] user_agent = {'User-agent': 'Mozilla/5.0'} result=requests.get(url,headers=user_agent) try: doc = result.content except: raise Exception("URL "+url+"not responding") text_in=StringIO(doc) chunks = [] stop = False while not stop: text="" for x in range(chunksize): try: text+=text_in.next() except StopIteration: chunks.append(text) stop=True break chunks.append(text) jobids = cloud.map(wordcount, [(url,c) for c in chunks]) cloud.join(jobids,deadlock_check=False) results = cloud.result(jobids) index=reduce_results(results) mongo_insert(index) return "OK"
def run_ip(): #Figure out how many jobs I want to create and how many requests per job job_count = int(sys.argv[1]) job_rows = range(0, job_count) #Now actually map them to run in the cloud #The "s1" type gives unique IP addresses. Eek print "Creating job map for {0} jobs.".format(len(job_rows)) jids = cloud.map(download_ip, job_rows, _type="s1") print "Waiting for jobs to complete." #The possible statuses and the statuses we are waiting for possible_job_statutes = ["waiting", "queued", "processing", "done", "error", "killed", "stalled"] pending_job_statuses = Set(["waiting", "queued", "processing"]) #Keep looping until no job statuses are in the pending_job_statuses statuses = [] while True: statuses = cloud.status(jids) tally = Counter() for status in statuses: tally[status] += 1 print "Status of jobs: " + str(tally) #If none of the statuses are in pending_job_statuses, we are done! if len(pending_job_statuses.intersection(Set(statuses))) == 0: break #Wait for 5 seconds between checks sleep(5) #Now loop through the jobs and retrieve the results ip_counter = Counter() results = cloud.result(jids) for result in results: ip_counter[result] += 1 print "IP Addresses: " + str(ip_counter)
def url_chunker(url, chunksize=1024): """Returns an iterator over contents of a file *Params* #file - an open FILE object #chunksize - how many lines to read at once? """ #url=book[0] #bookname=book[1] user_agent = {'User-agent': 'Mozilla/5.0'} result = requests.get(url, headers=user_agent) try: doc = result.content except: raise Exception("URL " + url + "not responding") text_in = StringIO(doc) chunks = [] stop = False while not stop: text = "" for x in range(chunksize): try: text += text_in.next() except StopIteration: chunks.append(text) stop = True break chunks.append(text) jobids = cloud.map(wordcount, [(url, c) for c in chunks]) cloud.join(jobids, deadlock_check=False) results = cloud.result(jobids) index = reduce_results(results) mongo_insert(index) return "OK"
def calc_pi(): """Almost correct way""" num_jobs = 8 tests_per_call = total_tests/num_jobs # list of job ids for all jobs we're spawning jids = [] for _ in range(num_jobs): # call() does not block, so jobs run in parallel jid = cloud.call(monte_carlo, tests_per_call, _type='c2') jids.append(jid) # aggregate the number of darts that land in the circle # across all jobs that we spawned num_in_circle = 0 for jid in jids: num_in_circle += cloud.result(jid) pi = (4 * num_in_circle) / float(total_tests) return pi
def trainIntermediateRAE(): print 'Started:' print datetime.now() #wordVectorDir = "C:\\Users\\Deepak\\Dropbox\\6.864 Project\\Word Vectors" wordVectorDir = "/Users/aratner/Dropbox/6.864 Project/Word Vectors" # Get patent claims patentData = getPatentData(500) print "Got patent data..." # Obtain the tree from the claim using the rule based parser / machine learnt model trees = [(x,hm_tree(y,3)) for (x,y) in patentData] totalTrainingdata = [] for treeData in trees: tree = treeData[1] #tree.draw() label = treeData[0] totalTrainingdata.extend(generatePhraseTree(tree, wordVectorDir)) print "Generated trees for claims..." # Partition trees into two sets - a training set and a testing set #(trainingTrees, testingTrees) = partitionTrees(totalTrainingdata) trainingTrees = totalTrainingdata testingTrees = totalTrainingdata #for tree in trainingTrees: # if (len(tree) > 1): # tree.draw() # Run the unfolding recursive auto-encoder on the tree jid = cloud.call(train_params, trainingTrees, math.pow(10,-5), 0.01, True, 60, _type='c2') print 'jid ='+str(jid) params = cloud.result(jid) save_params(params, '_finalpicloud_subtrees') print 'RAE training complete, at:' print datetime.now() print "Done..."
def patentComparer(): print 'Started:' print datetime.now() #wordVectorDir = "C:\\Users\\Deepak\\Dropbox\\6.864 Project\\Word Vectors" wordVectorDir = "/Users/aratner/Dropbox/6.864 Project/Word Vectors" # Get patent claims patentData = getPatentData(200) print "Got patent data..." # Obtain the tree from the claim using the rule based parser / machine learnt model trees = [(x, hm_tree(y, 3)) for (x, y) in patentData] embeddingTrees = [(x, generateEmbeddingTree(tree, wordVectorDir)) for (x, tree) in trees] #print embeddingTrees #for embeddingTree in embeddingTrees: # embeddingTree[1].draw() print "Generated trees for claims..." # Partition trees into two sets - a training set and a testing set (trainingTrees, testingTrees) = partitionTrees(embeddingTrees) # Run the unfolding recursive auto-encoder on the tree, using picloud #params = train_params(trainingTrees, math.pow(10,-5), 0.01, False) jid = cloud.call(train_params, trainingTrees, math.pow(10, -5), 0.01, True, 60, _type='c2') print 'jid = ' + str(jid) params = cloud.result(jid) save_params(params, '_finalpicloud') print 'RAE training complete, parameters saved, at:' print datetime.now() print "Done..."
def retrieve_job(lon): """ Retrieve the results of a previous job submission, and save to an hdf5 file This creates/overwrites a file at ../data/full_search/<lon>.h5 Parameters ---------- lon : int. Longitude to retrieve """ import cloud jobs = fetch_job_ids(lon) stamps = np.array(field_stamps(lon), dtype=np.float32) scores = np.hstack(cloud.result(jobs)).astype(np.float32) #write to file result_file = os.path.join(result_dir, "%3.3i.h5" % lon) with h5py.File(result_file, 'w') as f: f.create_dataset('stamps', data=stamps, compression=9) f.create_dataset('scores', data=scores, compression=9)
def fetch_results(self, iters=None, via_remote=False, run_mode='local'): """ Returns the result of the job that has already been run as a :py:class:`History` object. Typically you would call :py:meth:`run` first, then call :py:meth:`fetch_results` to get the resutlts. The method has various methods to control how much of the job is returned, to avoid excessive memory usage and data transfer between the cloud and local machine. :param iters: If *iters* is an iterable, returns only the iterations of the chain in *iters*. If *iters* is a scalar, return every *iters* state (the stride). If None, returns all states. :param via_remote: If *True*, executes the state filtering on the cloud before transferring the data to the local machine. If false, filter the state on the local machine. :param run_mode: Controls whether to search for the results on the local macine or on the cloud. Can be *local* or *cloud*. :return: A :py:class:`History` object that contains a filtered version the states of the Markov chain visited when this job ran. """ def f(): if run_mode == 'cloud': cloud.join([self.job_id]) store = storage.CloudStore() else: store = storage.LocalStore() full_history = store[self.params] partial_history = History() if iters is None: partial_history.states = full_history.states else: if isinstance(iters, int): #iters interpreted as stride iter_set = range(0, len(full_history.states), iters) else: iter_set = iters partial_history.states = [ state for state in full_history.states if state.iter in iter_set ] partial_history.job = self partial_history.summary = full_history.summary return partial_history if via_remote: job_id = cloud.call(f, _env=picloud_env) return cloud.result(job_id) else: return f()
def cloud_result(jid): result = cloud.result(jid) print "Retrieved results for trial %s" % jid return result
for s in range(0, S): coef_old[:, s] = Vf[s].getCoeffs() Nmax = 100 def solveOnCloud(Vf, c_policy, xprime_policy): diff = [0] * Nmax for i in range(0, Nmax): Vf, c_policy, xprime_policy = bellman.iterateBellmanOnCloud( Vf, c_policy, xprime_policy, Para, nCloud=5) for s_ in range(0, S): diff[i] = max(diff[i], np.max(np.abs(coef_old[:, s_] - Vf[s_].getCoeffs()))) coef_old[:, s_] = Vf[s_].getCoeffs() return Vf, c_policy, xprime_policy, diff jid = cloud.call(solveOnCloud, Vf, c_policy, xprime_policy, _env="gspy_env", _type='m1') Vf, c_policy, xprime_policy, diff = cloud.result(jid) #Now fit accurate Policy functions nx = max(min(Para.nx * 10, 1000), 1000) xgrid = np.linspace(Para.xmin, Para.xmax, nx) #c_policy,xprime_policy = bellman.fitNewPolicies(xgrid,Vf,c_policy,xprime_policy,Para)
# <codecell> # pull up status -- refresh until done cloud.status(jid) # <codecell> # this will block until job is done or errors out cloud.join(jid) # <codecell> # get your result cloud.result(jid) # <codecell> # get some basic info cloud.info(jid) # <codecell> # get some specific info cloud.info(jid, info_requested=['created', 'finished', 'runtime', 'cputime']) # <headingcell level=1> # What I got the first time
==PARAMS== results: A list of Counter() objects, as produced by cloud.result() method ==RETURNS== a Counter() object with total word-counts for the whole body of text """ total_wordcount = Counter() for r in results: total_wordcount.update(r) return total_wordcount ##job_ids=cloud.map(wordcount,chunker(f)) ## where are the files we care about? path = '../www.gutenberg.lib.md.us/etext00' ## start cloud jobs over chunks of text job_ids = cloud.map(wordcount, filechunker(path)) while True: c = cloud_status(job_ids) print c if c['processing'] == 0: break else: sleep(10) res = cloud.result(job_ids)
DATA_DIR = "data/fl" def ddir(x): return os.path.join(DATA_DIR, x) REPORT_DIR = "results" def rdir(x): return os.path.join(REPORT_DIR, x) dirs = glob(DATA_DIR + "/*") datasets = [x[len(DATA_DIR) + 1:] for x in dirs] def per_frame_wrapper(dname): algo_data = track.per_frame(ddir(dname), methods.centroid_frame, {'thold': 240}) jids = cloud.map(per_frame_wrapper, datasets, _type='f2', _vol="my-vol", _env='base/precise') cloud.result(jids)
randomword3 = randomword(5) if engdict.check(randomword3) == True: randomkey3 = randomword3 + str(random.randint(0, 99)) elif engdict.check(randomword3) == False: englist = engdict.suggest(randomword3) if len(englist) > 0: randomkey3 = englist[0] + str(random.randint(0, 99)) else: randomkey3 = randomword3 + str(random.randint(0, 99)) if 'randomkey0' and 'randomkey3' and 'randomkey1' in locals(): whasher0 = hashlib.new("md5") whasher0.update(randomkey0) whasher3 = hashlib.new("md5") whasher3.update(randomkey3) whasher1 = hashlib.new("md5") whasher1.update(randomkey1) print(randomkey0 + " + " + str(whasher0.hexdigest()) + "\n") print(randomkey3 + " + " + str(whasher3.hexdigest()) + "\n") print(randomkey1 + " + " + str(whasher1.hexdigest()) + "\n") fileb.write(randomkey0 + " + " + str(whasher0.hexdigest()) + "\n") fileb.write(randomkey3 + " + " + str(whasher3.hexdigest()) + "\n") fileb.write(randomkey1 + " + " + str(whasher1.hexdigest()) + "\n") jid = cloud.call(randomword) # square(3) evaluated on PiCloud cloud.result(jid) print('Value added to cloud') print('Password added') mainroutine()
def main(): jid = cloud.call(search_yellow(), type='m1') cloud.result(jid)
TRIALS = int(sys.argv[1]) NUM_PARTICLES = int(sys.argv[2]) DELTA = int(sys.argv[3]) INTEGRAL_PATHS = int(sys.argv[4]) def run_on_instance(trial_id): global number_of_clusters global if_zero_shortlearning global experiment_name import subprocess import os os.environ['DISPLAY'] = ":1" print "Starting" ls_output = subprocess.Popen(["/home/picloud/julia/julia", "runner.jl", str(NUM_PARTICLES), str(DELTA), str(INTEGRAL_PATHS)], \ cwd = "/home/picloud/DPMixtureModel/DPMM_SMC/", \ stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = ls_output.communicate() return out #result = run_on_instance([1]) jids = cloud.map(run_on_instance, range(TRIALS), _env=cloud_environment, _type='c2', _cores=1) print jids result = cloud.result(jids) pickle.dump(result, open("result_"+str(NUM_PARTICLES)+"particles_"+str(DELTA)+"delta_"+str(INTEGRAL_PATHS)+"path.pkl","wb")) print "RESULT:", result
gamma, p_sample, detection_step, min_dist_step, detection_window_hrs, req_consec_detections) param_product_old = set(param_product_old) param_product_new = set(param_product_new) param_product = param_product_new.difference(param_product_old) """ jids = cloud.map(detect_trials, *zip(*param_product), _type='f2') params_sub_jids = cloud.result(jids) params = [elt[0] for elt in params_sub_jids] sub_jids = [elt[1] for elt in params_sub_jids] stats = cloud.result(sub_jids) dt = datetime.now() # Write out as plain text just in case. out_path_txt = 'data/param_explore_%d%d%d%d%d%d.txt' % \ (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) open(out_path_txt, 'w').write(str((params, stats))) params, stats = fix_results_nesting((params, stats)) out_path_pkl = 'data/param_explore_%d%d%d%d%d%d.pkl' % \ (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) store_results((params, stats), out_path_pkl)
def get_dataset(self): if self.usecloud: return cloud.result(self.preprocess_job) else: return self._samples, self._labels
'dataset_name': dataset_name, 'dataset_dir': dataset_dir, 'jids': jids }, open(outfile_wait, 'w')) @transform(score_frame_queue, regex(r"(.+).wait.(.+)$"), [r"\1.pickle", r"\1.npz"]) def score_frame_wait((infile_wait, infile_npz), (outfile_pickle, outfile_npz)): dnpz = np.load(infile_npz) p = pickle.load(open(infile_wait)) jids = p['jids'] if USE_CLOUD: results = cloud.result(jids) else: results = [x for x in jids] scores = np.concatenate(results) np.savez_compressed(outfile_npz, scores=scores, **dnpz) pickle.dump(p, open(outfile_pickle, 'w')) @transform(score_frame_wait, suffix(".pickle"), [".png", ".hist.png"]) def plot_likelihood((infile_pickle, infile_npz), (outfile, outfile_hist)): data = np.load(infile_npz) data_p = pickle.load(open(infile_pickle)) scores = data['scores'] sv = create_state_vect(data['y_range'], data['x_range'], data['phi_range'], data['theta_range'])