Esempio n. 1
0
def timing_triple_cloud():
    execfile('picloud_venture_credentials.py')
    exp_params = experiment.exp_param_defaults({})
    exp_params['intermediate_iter'] = 1
    exp_params['max_initial_run_time'] = 30
    exp_params['max_burn_time'] = 30
    exp_params['max_sample_time'] = 30
    exp_params['n_samples'] = 25
    print experiment.exp_params_to_str(exp_params)
    
    data = scipy.io.loadmat("../data/irm_synth/irm_synth_20.mat", squeeze_me=True)
    observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat))
    missing  = list(zip(data['test_i'].flat,  data['test_j'].flat,  data['test_v'].flat))
    data = {'observations' : observed, 'missing' : missing}
    
    model = models.product_IRM
    model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True}
    
    # Timing run
    print 'Timing'
    job_id = cloud.call(experiment.network_cv_timing_run, data, model, exp_params, model_params, _max_runtime=5, _env=cloud_environment)
    time_per_mh_iter = cloud.result(job_id)['time_per_mh_iter']
    
    # Live run
    print 'Live'
    exp_params['intermediate_iter'] = max(1, int(round(0.9 * exp_params['max_sample_time'] / (exp_params['n_samples'] * time_per_mh_iter))))
    job_id = cloud.call(experiment.network_cv_single_run, data, model, exp_params, model_params, _max_runtime=5, _env=cloud_environment)
    cloud.join(job_id)
    print cloud.result(job_id)
Esempio n. 2
0
def picloud():
    t1 = time.time()
    jids = cloud.map(testfunc, np.arange(K), _type="f2", _vol="my-vol", _env="base/precise")
    # get the results
    cloud.result(jids)
    t2 = time.time()
    return t2 - t1
Esempio n. 3
0
def test_picloud(cloud, n, fast):
    begin = time.time()
    jid = cloud.call(get_prime, n, _high_cpu=True)
    cloud.result(jid)
    print n , 
    print "took " ,
    print time.time() - begin , 
    print "seconds"
Esempio n. 4
0
def test_picloud(cloud, n, fast):
    begin = time.time()
    jid = cloud.call(get_prime, n, _high_cpu=True)
    cloud.result(jid)
    print n,
    print "took ",
    print time.time() - begin,
    print "seconds"
Esempio n. 5
0
def picloud():
    t1 = time.time()
    jids = cloud.map(testfunc,
                     np.arange(K),
                     _type='f2',
                     _vol="my-vol",
                     _env="base/precise")
    # get the results
    cloud.result(jids)
    t2 = time.time()
    return t2 - t1
Esempio n. 6
0
    def runcode(self, code):
        """Execute a code object.

        When an exception occurs, self.showtraceback() is called to
        display a traceback.  All exceptions are caught except
        SystemExit, which is reraised.

        A note about KeyboardInterrupt: this exception may occur
        elsewhere in this code, and may not always be caught.  The
        caller should be prepared to deal with it.

        """
        try:
            job = cloud.call(cloud_run, code, self.locals)
            cloud.join(job)
            result = cloud.result(job)
            self.locals.update(result) 
            info = cloud.info(job, ['stderr', 'stdout'])[job]
            sys.stdout.write(info['stdout'])
            sys.stderr.write(info['stderr'])
        except SystemExit:
            raise
        except KeyboardInterrupt:
            raise OperationAborted('Interrupted')
        except cloud.CloudException, e:
            self.showcloudtraceback(e)
Esempio n. 7
0
def generate_items_detail_and_analytics_main_job(index, findingjobids):
    itemdetailjobids = []
    try:
        itemjobids, filenames = cloud.result(findingjobids)
    except Exception, e:
        cloud.cloud.cloudLog.exception(str(e))
        raise e
Esempio n. 8
0
def generate_finding_calls(index, categoryjobid=None):
    findingjobids = []
    try:
        leafcategories, filenames = cloud.result(categoryjobid)
    except Exception, e:
        cloud.cloud.cloudLog.exception(str(e))
        raise e
Esempio n. 9
0
def main():
    cloud.setkey(2329, '270cb3cccb9beb65d2f424b24ccbd5a920c5ccef')
    try:
        fn = raw_input()
        f = open(fn)
        L = float(f.readline())
        line = f.readline()
        data = []
        while line:
            d = map(eval, line.split())
            data.append(d)
            line = f.readline()
        f.close()
        n = len(data)
        Gs = np.array(data[0:n / 2])
        ls = np.array(data[n / 2::])
        outstr = ''
        outstr += 'submitting cross validation to picloud\n'
        cloud.config.max_transmit_data = 12000000
        start = time.time()
        jid = cloud.call(traintst, Gs, ls, L)
        outstr += 'submission time: %s\n' % str(time.time() - start)
        start = time.time()
        result = cloud.result(jid)
        outstr += 'cloud execution time: %s\n' % str(time.time() - start)
        outstr += 'misclassification rate: %f\n' % np.mean(result)
        outstr += 'standard deviation:     %f\n' % np.std(result)
        outstr += '--------done---------------------'
        print outstr
    except:
        print 'an error occurred'
Esempio n. 10
0
def do_network_call(itemidlistlist=[], callback=None):
    setcloudkey()
    etldriver = PiCloudEBayETLDriver()
    filenamelist = []
    try:
        jobids = []
        for sublist in itemidlistlist:
            jobid = cloud.call(etl.get_item_details, etldriver=etldriver, itemid=sublist, _type="c1", _label="GET NOTIFICATION ITEM DETAILS")
            jobids.append(jobid)
            filename = make_file_name(itemid=sublist, datatype="itemdetail")
            filenamelist.append(filename)
        print jobids
        if len(jobids) > 0:
            cloud.result(jobids, ignore_errors=False)
    except Exception, e:
        print e
Esempio n. 11
0
def main():
    cloud.setkey(2329,'270cb3cccb9beb65d2f424b24ccbd5a920c5ccef')   
    try:
        fn = raw_input()
        f = open(fn)
        L = float(f.readline())           
        line = f.readline()        
        data = []
        while line:
            d = map(eval,line.split())
            data.append(d)
            line = f.readline()         
        f.close()
        n = len(data)            
        Gs = np.array(data[0:n/2])
        ls = np.array(data[n/2::])
        outstr = ''
        outstr += 'submitting cross validation to picloud\n'
        cloud.config.max_transmit_data=12000000    
        start = time.time()
        jid = cloud.call(traintst,Gs,ls,L)  
        outstr += 'submission time: %s\n' %str(time.time()-start)
        start = time.time()    
        result = cloud.result(jid) 
        outstr += 'cloud execution time: %s\n' %str(time.time()-start)      
        outstr += 'misclassification rate: %f\n' %np.mean(result)
        outstr += 'standard deviation:     %f\n' %np.std(result)         
        outstr += '--------done---------------------'  
        print outstr
    except:
        print 'an error occurred'     
Esempio n. 12
0
def patentComparer():
  print 'Started:'
  print datetime.now()
  #wordVectorDir = "C:\\Users\\Deepak\\Dropbox\\6.864 Project\\Word Vectors"
  wordVectorDir = "/Users/aratner/Dropbox/6.864 Project/Word Vectors"

  # Get patent claims
  patentData = getPatentData(200)
  print "Got patent data..."

  # Obtain the tree from the claim using the rule based parser / machine learnt model
  trees = [(x,hm_tree(y,3)) for (x,y) in patentData]
  embeddingTrees = [(x,generateEmbeddingTree(tree, wordVectorDir)) for (x,tree) in trees]

  #print embeddingTrees
  #for embeddingTree in embeddingTrees:
  #  embeddingTree[1].draw()

  print "Generated trees for claims..."

  # Partition trees into two sets - a training set and a testing set
  (trainingTrees, testingTrees) = partitionTrees(embeddingTrees)

  # Run the unfolding recursive auto-encoder on the tree, using picloud
  #params = train_params(trainingTrees, math.pow(10,-5), 0.01, False)
  jid = cloud.call(train_params, trainingTrees, math.pow(10,-5), 0.01, True, 60, _type='c2')
  print 'jid = ' + str(jid)
  params = cloud.result(jid)
  save_params(params, '_finalpicloud')
  print 'RAE training complete, parameters saved, at:'
  print datetime.now()

  print "Done..."
Esempio n. 13
0
def logS(N, S):
    job_id = cloud.call(mete.get_mete_rad, S, N, _type='m1')
    logSeries = cloud.result(job_id)   
    #logSeries = mete.get_mete_rad(S, N) # The expected SAD from the random sample
    logSeries = np.log(logSeries[0])
    plt.plot(logSeries, color='gray', lw=3, label='Log-series\nN='+str(N)+' S='+str(S))
    print 'log-series: done'
    return
Esempio n. 14
0
def outer_map(y):
    jids = cloud.map(inner_map, range(y))

    cloud.join(jids)

    results = cloud.result(jids)

    return list(results)
Esempio n. 15
0
def geoS(N, S):
    job_id = cloud.call(get_LowerTrunc_GeomSeries, N, S, _type='m1')
    geomSeries = cloud.result(job_id)   
    geomSeries = get_LowerTrunc_GeomSeries(N, S)
    geomSeries = np.log(geomSeries)
    plt.plot(geomSeries, color='m',lw=3,label='Geometric series\nN='+str(N)+' S='+str(S))
    print 'geometric series: done'
    return
Esempio n. 16
0
def traintst(Gs,ls,L):
    m = np.shape(Gs)[0]
    traintest = []
    for i in range(10):
        sl = slice(i*m//10,(i+1)*m//10)
        traintest.append( (np.delete(Gs,sl,0),np.delete(ls,sl,0),Gs[sl,:],ls[sl,:],L) )
    jids = cloud.map(crossvalidate,traintest,_type='c1') 
    return cloud.result(jids)    
Esempio n. 17
0
def timing_run_cloud():
    execfile('picloud_venture_credentials.py')
    exp_params = experiment.exp_param_defaults({})
    exp_params['intermediate_iter'] = 1
    exp_params['max_initial_run_time'] = 30
    print experiment.exp_params_to_str(exp_params)
    
    data = scipy.io.loadmat("../data/irm_synth/irm_synth_20.mat", squeeze_me=True)
    observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat))
    missing  = list(zip(data['test_i'].flat,  data['test_j'].flat,  data['test_v'].flat))
    data = {'observations' : observed, 'missing' : missing}
    
    model = models.product_IRM
    model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True}
    job_id = cloud.call(experiment.network_cv_timing_run, data, model, exp_params, model_params, _max_runtime=5, _env=cloud_environment)
    cloud.join(job_id)
    print cloud.result(job_id)
Esempio n. 18
0
def outer_map(y):
    jids=cloud.map(inner_map,range(y))
    
    cloud.join(jids)
    
    results = cloud.result(jids)
    
    return list(results)
def run_experiments(experiment_args):
    if args.picloud:
        import cloud
        jids = cloud.map(run_single_experiment_wrapper, experiment_args, _env="test", _type="c2")
        print "Now waiting for results..."
        results = cloud.result(jids)
        return zip(experiment_args, results)
    else:
        return zip(experiment_args, [run_single_experiment(**a) for a in experiment_args])
Esempio n. 20
0
def traintst(Gs, ls, L):
    m = np.shape(Gs)[0]
    traintest = []
    for i in range(10):
        sl = slice(i * m // 10, (i + 1) * m // 10)
        traintest.append((np.delete(Gs, sl,
                                    0), np.delete(ls, sl,
                                                  0), Gs[sl, :], ls[sl, :], L))
    jids = cloud.map(crossvalidate, traintest, _type='c1')
    return cloud.result(jids)
Esempio n. 21
0
def clear(prefix, remote):
    if (remote):
        k = cloud.call(delete_prefix, prefix)
        return cloud.result(k)
    else:
        path = util.cachePath(prefix)
        if (os.path.isdir(path)):
            sh.rmtree(path)
        elif (os.path.isfile(path)):
            os.remove(path)
Esempio n. 22
0
def calc_pi():
    """Incorrect way"""

    # offload monte_carlo to the cloud, returns a Job Id
    jid = cloud.call(monte_carlo, total_tests, _type='c2')

    # block until job is done, and get result
    num_in_circle = cloud.result(jid)

    pi = (4 * num_in_circle) / float(total_tests)
    return pi
Esempio n. 23
0
def calc_pi():
    """Incorrect way"""

    # offload monte_carlo to the cloud, returns a Job Id
    jid = cloud.call(monte_carlo, total_tests, _type='c2')

    # block until job is done, and get result
    num_in_circle = cloud.result(jid)

    pi = (4 * num_in_circle) / float(total_tests)
    return pi
def hessian_on_the_cloud(name, args, chunk_size = 500):
    print("submitting")
    dims = [ A.size for A in args ] + [len(args), len(args)]
    H = np.empty(dims, 'double')
    ws_product_chunked = grouper(itertools.product(*args), chunk_size)
    jids = cloud.map(lambda ws: _hessian_on_the_cloud(name, ws), ws_product_chunked)
    print("waiting")
    chunked_results = cloud.result(jids)
    print("assembling")
    H.flat = list(itertools.chain.from_iterable(chunked_results))
    return H
Esempio n. 25
0
def main():
    jobs = []
    for seed in [0, 1, 2, 3, 4]:
        for learner_class_index in [0, 1]:
            jobs.append((learner_class_index, seed))
    print "Scheduling jobs..."
    jids = cloud.map(run, jobs, _type="f2")
    print "Waiting for results..."
    results = cloud.result(jids)
    for job, result in zip(jobs, results):
        print job, result
Esempio n. 26
0
def retrieve(jids):
    naccounts = len(jids)
    results = []
    # Retrieves the results
    for i in xrange(naccounts):
        api_key = api_keys[i]
        api_secretkey = api_secretkeys[i]
        cloud.setkey(api_key=api_key, api_secretkey=api_secretkey)
        print "Retrieving results for account %d..." % (i + 1)
        results.extend(cloud.result(jids[i]))

    return results
Esempio n. 27
0
def retrieve(jids):
    naccounts = len(jids)
    results = []
    # Retrieves the results
    for i in xrange(naccounts):
        api_key = api_keys[i]
        api_secretkey = api_secretkeys[i]
        cloud.setkey(api_key=api_key, api_secretkey=api_secretkey)
        print "Retrieving results for account %d..." % (i + 1)
        results.extend(cloud.result(jids[i]))

    return results
Esempio n. 28
0
def distrubutedly_train_terms(train_terms, terms, chunk_size=1000, use_cloud=False):
    logging.info(u'Starting distributed training <{0}>'.format(['locally','remotely'][int(use_cloud)]))
    if use_cloud:
        import cloud
        job_idds = cloud.map(train_terms, terms)
        term_iters = cloud.result(job_ids)
    else:
        import collections
        chunks = collections.defaultdict(list)
        chunk_count = len(terms) / chunk_size + 1
        for term in terms: chunks[hash(term) % chunk_count].append(term)
        term_iters = map(train_terms, chunks.values())
    return itertools.chain.from_iterable(term_iters)
Esempio n. 29
0
    def storeEvalResult(self, task, evalResult):

        # because picloud returns a ticket number
        # we need to store off that ticket number
        # and then get the results of that ticket number
        # and that's the actual evalResult

        ticketId = evalResult
        evalResult = cloud.result(ticketId)

        PythonModule.PythonEval.storeEvalResult(self, task, evalResult)

        return
Esempio n. 30
0
def main():
    names = list(uai_net.names(500))
    print "Submitting {} jobs...".format(len(names))
    jids = cloud.map(run_job, names, _type="f2")
    print "Waiting for results..."
    for name, marginals_string, is_exact in cloud.result(jids):
        print name
        exact = "true" if is_exact else "approx"
        f = open(
            os.path.join(os.path.dirname(__file__),
                         "../data/marginals/uai/{}.{}.mar".format(name,
                                                                  exact)), "w")
        f.write(marginals_string)
        f.close()
Esempio n. 31
0
def Jgrad_picloud(params, encoding_size, decoded_size, training, wd, num_cores):
    W_e, W_d, b_e, b_d = unroll_params(params, encoding_size, decoded_size)

    gradW_e = np.zeros(W_e.shape)
    gradW_d = np.zeros(W_d.shape)
    gradb_e = np.zeros(b_e.shape)
    gradb_d = np.zeros(b_d.shape)

    # split the training set into batches, send out to picloud cores for backprop
    # offset = num_cores - len(training)%num_cores
    # for index in range(offset):
    # training.
    split = len(training) / num_cores

    final_training = []

    for i in range(num_cores):
        final_training.append(training[i * split : (i + 1) * split])

    offset = len(training) % num_cores

    if offset > 0:
        final_training.append(training[len(training) - offset :])

    jids = cloud.map(
        Jgrad_picloud_sub,
        [params] * num_cores,
        [encoding_size] * num_cores,
        [decoded_size] * num_cores,
        final_training,
        _type="c2",
    )

    # call for results
    results = cloud.result(jids)
    for result in results:
        gradW_e += result[0]
        gradW_d += result[1]
        gradb_e += result[2]
        gradb_d += result[3]

    # add weight decay factor and normalization coefficient
    a = 1.0 / len(training)
    grad_J_W_e = a * gradW_e + wd * W_e
    grad_J_W_d = a * gradW_d + wd * W_d
    grad_J_b_e = a * gradb_e
    grad_J_b_d = a * gradb_d

    # roll up and return as 1-d array
    return np.concatenate((grad_J_W_e.flatten(), grad_J_W_d.flatten(), grad_J_b_e.flatten(), grad_J_b_d.flatten()))
Esempio n. 32
0
def calc_pi():
    num_jobs = 8
    tests_per_call = total_tests / num_jobs

    # argument list has 8 duplicate elements
    jids = cloud.map(monte_carlo, [tests_per_call] * num_jobs, _type='c2')

    # get list of all counts
    num_in_circle_list = cloud.result(jids)

    # sum all counts
    num_in_circle = sum(num_in_circle_list)

    pi = (4 * num_in_circle) / float(total_tests)
    return pi
Esempio n. 33
0
def calc_pi():
    num_jobs = 8
    tests_per_call = total_tests/num_jobs

    # argument list has 8 duplicate elements
    jids = cloud.map(monte_carlo, [tests_per_call]*num_jobs, _type='c2')

    # get list of all counts
    num_in_circle_list = cloud.result(jids)

    # sum all counts
    num_in_circle = sum(num_in_circle_list)

    pi = (4 * num_in_circle) / float(total_tests)
    return pi
Esempio n. 34
0
def trainIntermediateRAE():
    print 'Started:'
    print datetime.now()
    #wordVectorDir = "C:\\Users\\Deepak\\Dropbox\\6.864 Project\\Word Vectors"
    wordVectorDir = "/Users/aratner/Dropbox/6.864 Project/Word Vectors"

    # Get patent claims
    patentData = getPatentData(500)
    print "Got patent data..."

    # Obtain the tree from the claim using the rule based parser / machine learnt model
    trees = [(x, hm_tree(y, 3)) for (x, y) in patentData]
    totalTrainingdata = []
    for treeData in trees:
        tree = treeData[1]
        #tree.draw()
        label = treeData[0]
        totalTrainingdata.extend(generatePhraseTree(tree, wordVectorDir))

    print "Generated trees for claims..."

    # Partition trees into two sets - a training set and a testing set
    #(trainingTrees, testingTrees) = partitionTrees(totalTrainingdata)
    trainingTrees = totalTrainingdata
    testingTrees = totalTrainingdata

    #for tree in trainingTrees:
    # 	if (len(tree) > 1):
    #		tree.draw()

    # Run the unfolding recursive auto-encoder on the tree
    jid = cloud.call(train_params,
                     trainingTrees,
                     math.pow(10, -5),
                     0.01,
                     True,
                     60,
                     _type='c2')
    print 'jid =' + str(jid)
    params = cloud.result(jid)
    save_params(params, '_finalpicloud_subtrees')
    print 'RAE training complete, at:'
    print datetime.now()

    print "Done..."
Esempio n. 35
0
def get_shares_bulk(urls,limit=-1,use_cloud=False):
    if use_cloud:
        # using picloud. parallelizing on chunks
        chunks = list(parallel.partitions(urls,100))
        def f(url):
            return map(lambda url: (url,get_shares(url)),url)
        jids = cloud.map(f,chunks)
        ret = list(itertools.chain(*cloud.result(jids)))
    else:
        # local. parallelizing using pool
        ret =  list(parallel.imap(get_shares,urls,threads=10))

    ret.sort(key= lambda (u,r) : -r['shares_count'])

    if limit == -1:
        return ret
    else:
        return ret[:limit]
def call_on_cloud(cmd_params, core_type, num_batches, start_batch_num, end_batch_num):
    ntests = len(cmd_params)
    batch_size = int(math.ceil(ntests/(num_batches+0.0)))

    batch_edges = batch_size*np.array(xrange(num_batches))[start_batch_num : end_batch_num]
    print batch_edges
    for i in xrange(len(batch_edges)):
        if i==len(batch_edges)-1:
            cmds = cmd_params[batch_edges[i]:]
        else:
            cmds = cmd_params[batch_edges[i]:min(batch_edges[i+1], len(cmd_params))]
        print colorize("calling on cloud..", "yellow", True)
        try:
            jids = cloud.map(run_sim_test, cmds, _vol='rss_dat', _env='RSS3', _type=core_type)
            res  = cloud.result(jids)
            print colorize("got results for batch %d/%d "%(i, len(batch_edges)), "green", True)
            save_results(res)
        except Exception as e:
            print "Found exception %s. Not saving data for this demo."%e
Esempio n. 37
0
def picloud(func, *args, **kwargs):
    """
    Runs the given function in parallel over the PiCloud cluster.

    Parameters
    ----------
    func : function
        Function to run in parallel.

    In addition to the function 'func' to be run in parallel, the picloud
    function accepts a series of arguments that are passed to the function
    as variables. In general, the function can have multiple input variables,
    and these arguments must be passed in the same order as they are defined in
    the function definition.

    Furthermore, several keyword arguments may be given that set the settings
    for the PiCloud cluster:

    _type - Type of core used in picloud: 'c1', 'c2', 'f2' (default), 'm1',
            's1'
    _cores - Number of cores used: 1 (default)
    _env - Custom environment for computation. Set to current version of qutip.
    _label - Provide a label for the current computation.

    For more information see the PiCloud website: http://www.picloud.com/

    """
    kw = _default_cloud_settings()
    for keys in kwargs.keys():
        if keys not in kw.keys():
            raise Exception(str(keys) + ' is not a valid kwarg.')
        else:
            kw[keys] = kwargs[keys]
    job_ids = cloud.map(func, *args, **kw)
    results = cloud.result(job_ids)
    if isinstance(results[0], tuple):
        par_return = [elem for elem in results]
        num_elems = len(results[0])
        return [
            np.array([elem[ii] for elem in results]) for ii in range(num_elems)
        ]
    else:
        return list(results)
Esempio n. 38
0
def picloud(func, *args, **kwargs):
    """
    Runs the given function in parallel over the PiCloud cluster.

    Parameters
    ----------
    func : function
        Function to run in parallel.

    In addition to the function 'func' to be run in parallel, the picloud
    function accepts a series of arguments that are passed to the function
    as variables. In general, the function can have multiple input variables,
    and these arguments must be passed in the same order as they are defined in
    the function definition.

    Furthermore, several keyword arguments may be given that set the settings
    for the PiCloud cluster:

    _type - Type of core used in picloud: 'c1', 'c2', 'f2' (default), 'm1',
            's1'
    _cores - Number of cores used: 1 (default)
    _env - Custom environment for computation. Set to current version of qutip.
    _label - Provide a label for the current computation.

    For more information see the PiCloud website: http://www.picloud.com/

    """
    kw = _default_cloud_settings()
    for keys in kwargs.keys():
        if keys not in kw.keys():
            raise Exception(str(keys) + ' is not a valid kwarg.')
        else:
            kw[keys] = kwargs[keys]
    job_ids = cloud.map(func, *args, **kw)
    results = cloud.result(job_ids)
    if isinstance(results[0], tuple):
        par_return = [elem for elem in results]
        num_elems = len(results[0])
        return [np.array([elem[ii] for elem in results])
                for ii in range(num_elems)]
    else:
        return list(results)
Esempio n. 39
0
def cloud_map(func, args, jobs=None, return_jobs=False,
              **cloud_opts):
    """
    Call cloud.map, with some standard logging info

    Parameters
    ----------
    func : function to map
    args : list of mapping arguments
    jobs : list of pre-existing job ids, or None
        If present, will fetch the results from these jobs
    return_jobs : boolean (optional, default false)
        If True, return the job IDs instead of
        the job results
    cloud_opts : dict (optional)
        Extra keyword arguments to pass to cloud.map

    Returns
    -------
    Result of cloud.map if return_jobs=False, else the job ids
    """
    import cloud

    cloud_opts.setdefault('_env', 'mwp')
    cloud_opts.setdefault('_type', 'c2')
    cloud_opts.setdefault('_label', func.__name__)

    if jobs is None:
        log = logging.getLogger(func.__module__)

        log.debug(
            "Starting %i jobs on PiCloud for %s" % (len(args), func.__name__))
        jobs = cloud.map(func, args, **cloud_opts)
        log.debug("To re-fetch results, use \n"
                  "%s(jobs=range(%i, %i))" %
                  (func.__name__, min(jobs), max(jobs) + 1))

    if return_jobs:
        return jobs

    return cloud.result(jobs)
Esempio n. 40
0
def url_chunker(url, chunksize=1024):
    """Returns an iterator over contents of a file
        *Params*
        #file - an open FILE object
        #chunksize - how many lines to read at once?
    """
    #url=book[0]
    #bookname=book[1]
    
    user_agent = {'User-agent': 'Mozilla/5.0'}
    result=requests.get(url,headers=user_agent)
    
    try:
        doc = result.content
    except:
        raise Exception("URL "+url+"not responding")
    
    text_in=StringIO(doc)
    chunks = []
    stop = False
    while not stop:
        text=""
        for x in range(chunksize):
            try:
                text+=text_in.next()
            except StopIteration:
                chunks.append(text)
                stop=True
                break
                
        chunks.append(text)
        
    jobids = cloud.map(wordcount, [(url,c) for c in chunks])
    cloud.join(jobids,deadlock_check=False)
    results = cloud.result(jobids)
    
    index=reduce_results(results)
    
    mongo_insert(index)
    
    return "OK"
def run_ip():
    
    #Figure out how many jobs I want to create and how many requests per job
    job_count = int(sys.argv[1])
    
    job_rows = range(0, job_count)
    
    #Now actually map them to run in the cloud
    #The "s1" type gives unique IP addresses. Eek
    print "Creating job map for {0} jobs.".format(len(job_rows))
    jids = cloud.map(download_ip, job_rows, _type="s1")
    
    print "Waiting for jobs to complete."
    
    #The possible statuses and the statuses we are waiting for
    possible_job_statutes = ["waiting", "queued", "processing", "done", "error", "killed", "stalled"]
    pending_job_statuses = Set(["waiting", "queued", "processing"])
    
    #Keep looping until no job statuses are in the pending_job_statuses
    statuses = []
    while True:
        statuses = cloud.status(jids)
        tally = Counter()
        for status in statuses:
            tally[status] += 1
        print "Status of jobs: " + str(tally)
        
        #If none of the statuses are in pending_job_statuses, we are done!
        if len(pending_job_statuses.intersection(Set(statuses))) == 0:
            break
        
        #Wait for 5 seconds between checks
        sleep(5)
    
    #Now loop through the jobs and retrieve the results
    ip_counter = Counter()
    results = cloud.result(jids)
    for result in results:
        ip_counter[result] += 1
    
    print "IP Addresses: " + str(ip_counter)
Esempio n. 42
0
def url_chunker(url, chunksize=1024):
    """Returns an iterator over contents of a file
        *Params*
        #file - an open FILE object
        #chunksize - how many lines to read at once?
    """
    #url=book[0]
    #bookname=book[1]

    user_agent = {'User-agent': 'Mozilla/5.0'}
    result = requests.get(url, headers=user_agent)

    try:
        doc = result.content
    except:
        raise Exception("URL " + url + "not responding")

    text_in = StringIO(doc)
    chunks = []
    stop = False
    while not stop:
        text = ""
        for x in range(chunksize):
            try:
                text += text_in.next()
            except StopIteration:
                chunks.append(text)
                stop = True
                break

        chunks.append(text)

    jobids = cloud.map(wordcount, [(url, c) for c in chunks])
    cloud.join(jobids, deadlock_check=False)
    results = cloud.result(jobids)

    index = reduce_results(results)

    mongo_insert(index)

    return "OK"
Esempio n. 43
0
def calc_pi():
    """Almost correct way"""

    num_jobs = 8
    tests_per_call = total_tests/num_jobs

    # list of job ids for all jobs we're spawning
    jids = []
    for _ in range(num_jobs):
        # call() does not block, so jobs run in parallel
        jid = cloud.call(monte_carlo, tests_per_call, _type='c2')
        jids.append(jid)

    # aggregate the number of darts that land in the circle
    # across all jobs that we spawned
    num_in_circle = 0
    for jid in jids:
        num_in_circle += cloud.result(jid)

    pi = (4 * num_in_circle) / float(total_tests)
    return pi
Esempio n. 44
0
def trainIntermediateRAE():
  print 'Started:'
  print datetime.now()
  #wordVectorDir = "C:\\Users\\Deepak\\Dropbox\\6.864 Project\\Word Vectors"
  wordVectorDir = "/Users/aratner/Dropbox/6.864 Project/Word Vectors"

  # Get patent claims
  patentData = getPatentData(500)
  print "Got patent data..."

  # Obtain the tree from the claim using the rule based parser / machine learnt model
  trees = [(x,hm_tree(y,3)) for (x,y) in patentData]
  totalTrainingdata = []
  for treeData in trees:
  	tree = treeData[1]
  	#tree.draw()
  	label = treeData[0]
  	totalTrainingdata.extend(generatePhraseTree(tree, wordVectorDir))

  print "Generated trees for claims..."

  # Partition trees into two sets - a training set and a testing set
  #(trainingTrees, testingTrees) = partitionTrees(totalTrainingdata)
  trainingTrees = totalTrainingdata
  testingTrees = totalTrainingdata

  #for tree in trainingTrees:
  # 	if (len(tree) > 1):
  #		tree.draw()

  # Run the unfolding recursive auto-encoder on the tree
  jid = cloud.call(train_params, trainingTrees, math.pow(10,-5), 0.01, True, 60, _type='c2')
  print 'jid ='+str(jid)
  params = cloud.result(jid)
  save_params(params, '_finalpicloud_subtrees')
  print 'RAE training complete, at:'
  print datetime.now()


  print "Done..."
def patentComparer():
    print 'Started:'
    print datetime.now()
    #wordVectorDir = "C:\\Users\\Deepak\\Dropbox\\6.864 Project\\Word Vectors"
    wordVectorDir = "/Users/aratner/Dropbox/6.864 Project/Word Vectors"

    # Get patent claims
    patentData = getPatentData(200)
    print "Got patent data..."

    # Obtain the tree from the claim using the rule based parser / machine learnt model
    trees = [(x, hm_tree(y, 3)) for (x, y) in patentData]
    embeddingTrees = [(x, generateEmbeddingTree(tree, wordVectorDir))
                      for (x, tree) in trees]

    #print embeddingTrees
    #for embeddingTree in embeddingTrees:
    #  embeddingTree[1].draw()

    print "Generated trees for claims..."

    # Partition trees into two sets - a training set and a testing set
    (trainingTrees, testingTrees) = partitionTrees(embeddingTrees)

    # Run the unfolding recursive auto-encoder on the tree, using picloud
    #params = train_params(trainingTrees, math.pow(10,-5), 0.01, False)
    jid = cloud.call(train_params,
                     trainingTrees,
                     math.pow(10, -5),
                     0.01,
                     True,
                     60,
                     _type='c2')
    print 'jid = ' + str(jid)
    params = cloud.result(jid)
    save_params(params, '_finalpicloud')
    print 'RAE training complete, parameters saved, at:'
    print datetime.now()

    print "Done..."
Esempio n. 46
0
def retrieve_job(lon):
    """
    Retrieve the results of a previous job submission,
    and save to an hdf5 file

    This creates/overwrites a file at ../data/full_search/<lon>.h5

    Parameters
    ----------
    lon : int. Longitude to retrieve
    """
    import cloud

    jobs = fetch_job_ids(lon)
    stamps = np.array(field_stamps(lon), dtype=np.float32)
    scores = np.hstack(cloud.result(jobs)).astype(np.float32)

    #write to file
    result_file = os.path.join(result_dir, "%3.3i.h5" % lon)
    with h5py.File(result_file, 'w') as f:
        f.create_dataset('stamps', data=stamps, compression=9)
        f.create_dataset('scores', data=scores, compression=9)
Esempio n. 47
0
    def fetch_results(self, iters=None, via_remote=False, run_mode='local'):
        """
        Returns the result of the job that has already been run as a :py:class:`History` object. Typically you would call :py:meth:`run` first, then call :py:meth:`fetch_results` to get the resutlts. The method has various methods to control how much of the job is returned, to avoid excessive memory usage and data transfer between the cloud and local machine.

        :param iters: If *iters* is an iterable, returns only the iterations of the chain in *iters*. If *iters* is a scalar, return every *iters* state (the stride). If None, returns all states.
        :param via_remote: If *True*, executes the state filtering on the cloud before transferring the data to the local machine. If false, filter the state on the local machine.
        :param run_mode: Controls whether to search for the results on the local macine or on the cloud. Can be *local* or *cloud*.
        :return: A :py:class:`History` object that contains a filtered version the states of the Markov chain visited when this job ran.
        """
        def f():
            if run_mode == 'cloud':
                cloud.join([self.job_id])
                store = storage.CloudStore()
            else:
                store = storage.LocalStore()
            full_history = store[self.params]
            partial_history = History()
            if iters is None:
                partial_history.states = full_history.states
            else:
                if isinstance(iters, int):  #iters interpreted as stride
                    iter_set = range(0, len(full_history.states), iters)
                else:
                    iter_set = iters
                partial_history.states = [
                    state for state in full_history.states
                    if state.iter in iter_set
                ]
            partial_history.job = self
            partial_history.summary = full_history.summary
            return partial_history

        if via_remote:
            job_id = cloud.call(f, _env=picloud_env)
            return cloud.result(job_id)
        else:
            return f()
Esempio n. 48
0
def cloud_result(jid):
    result = cloud.result(jid)
    print "Retrieved results for trial %s" % jid
    return result
Esempio n. 49
0
for s in range(0, S):
    coef_old[:, s] = Vf[s].getCoeffs()

Nmax = 100


def solveOnCloud(Vf, c_policy, xprime_policy):
    diff = [0] * Nmax
    for i in range(0, Nmax):
        Vf, c_policy, xprime_policy = bellman.iterateBellmanOnCloud(
            Vf, c_policy, xprime_policy, Para, nCloud=5)
        for s_ in range(0, S):
            diff[i] = max(diff[i],
                          np.max(np.abs(coef_old[:, s_] - Vf[s_].getCoeffs())))
            coef_old[:, s_] = Vf[s_].getCoeffs()
    return Vf, c_policy, xprime_policy, diff


jid = cloud.call(solveOnCloud,
                 Vf,
                 c_policy,
                 xprime_policy,
                 _env="gspy_env",
                 _type='m1')
Vf, c_policy, xprime_policy, diff = cloud.result(jid)

#Now fit accurate Policy functions
nx = max(min(Para.nx * 10, 1000), 1000)
xgrid = np.linspace(Para.xmin, Para.xmax, nx)
#c_policy,xprime_policy = bellman.fitNewPolicies(xgrid,Vf,c_policy,xprime_policy,Para)
# <codecell>

# pull up status -- refresh until done
cloud.status(jid)

# <codecell>

# this will block until job is done or errors out

cloud.join(jid)

# <codecell>

# get your result
cloud.result(jid)

# <codecell>

# get some basic info
cloud.info(jid)

# <codecell>

# get some specific info
cloud.info(jid, info_requested=['created', 'finished', 'runtime', 'cputime'])

# <headingcell level=1>

# What I got the first time
Esempio n. 51
0
    
    ==PARAMS==
    results: A list of Counter() objects, as produced by cloud.result() method
    
    ==RETURNS==
    a Counter() object with total word-counts for the whole body of text
    """
    total_wordcount = Counter()
    for r in results:
        total_wordcount.update(r)
    return total_wordcount


##job_ids=cloud.map(wordcount,chunker(f))

## where are the files we care about?
path = '../www.gutenberg.lib.md.us/etext00'

## start cloud jobs over chunks of text
job_ids = cloud.map(wordcount, filechunker(path))

while True:
    c = cloud_status(job_ids)
    print c
    if c['processing'] == 0:
        break
    else:
        sleep(10)

res = cloud.result(job_ids)
Esempio n. 52
0
DATA_DIR = "data/fl"


def ddir(x):
    return os.path.join(DATA_DIR, x)


REPORT_DIR = "results"


def rdir(x):
    return os.path.join(REPORT_DIR, x)


dirs = glob(DATA_DIR + "/*")
datasets = [x[len(DATA_DIR) + 1:] for x in dirs]


def per_frame_wrapper(dname):
    algo_data = track.per_frame(ddir(dname), methods.centroid_frame,
                                {'thold': 240})


jids = cloud.map(per_frame_wrapper,
                 datasets,
                 _type='f2',
                 _vol="my-vol",
                 _env='base/precise')

cloud.result(jids)
Esempio n. 53
0
        randomword3 = randomword(5)
        if engdict.check(randomword3) == True:
            randomkey3 = randomword3 + str(random.randint(0, 99))
        elif engdict.check(randomword3) == False:
            englist = engdict.suggest(randomword3)
            if len(englist) > 0:
                randomkey3 = englist[0] + str(random.randint(0, 99))
            else:
                randomkey3 = randomword3 + str(random.randint(0, 99))

        if 'randomkey0' and 'randomkey3' and 'randomkey1' in locals():
            whasher0 = hashlib.new("md5")
            whasher0.update(randomkey0)
            whasher3 = hashlib.new("md5")
            whasher3.update(randomkey3)
            whasher1 = hashlib.new("md5")
            whasher1.update(randomkey1)
            print(randomkey0 + " + " + str(whasher0.hexdigest()) + "\n")
            print(randomkey3 + " + " + str(whasher3.hexdigest()) + "\n")
            print(randomkey1 + " + " + str(whasher1.hexdigest()) + "\n")
            fileb.write(randomkey0 + " + " + str(whasher0.hexdigest()) + "\n")
            fileb.write(randomkey3 + " + " + str(whasher3.hexdigest()) + "\n")
            fileb.write(randomkey1 + " + " + str(whasher1.hexdigest()) + "\n")


jid = cloud.call(randomword)  # square(3) evaluated on PiCloud
cloud.result(jid)
print('Value added to cloud')
print('Password added')
mainroutine()
Esempio n. 54
0
def main():
	jid = cloud.call(search_yellow(), type='m1')
	cloud.result(jid)
Esempio n. 55
0

TRIALS = int(sys.argv[1])
NUM_PARTICLES = int(sys.argv[2])
DELTA = int(sys.argv[3])
INTEGRAL_PATHS = int(sys.argv[4])


def run_on_instance(trial_id):
  global number_of_clusters
  global if_zero_shortlearning
  global experiment_name
  import subprocess
  import os
  os.environ['DISPLAY'] = ":1"
  print "Starting"
  ls_output = subprocess.Popen(["/home/picloud/julia/julia", "runner.jl", str(NUM_PARTICLES), str(DELTA), str(INTEGRAL_PATHS)], \
                               cwd = "/home/picloud/DPMixtureModel/DPMM_SMC/",  \
                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)

  out, err = ls_output.communicate()
  return out
 
#result = run_on_instance([1])  

jids = cloud.map(run_on_instance, range(TRIALS), _env=cloud_environment, _type='c2', _cores=1)
print jids
result = cloud.result(jids)
pickle.dump(result, open("result_"+str(NUM_PARTICLES)+"particles_"+str(DELTA)+"delta_"+str(INTEGRAL_PATHS)+"path.pkl","wb"))
print "RESULT:", result
Esempio n. 56
0
                                      gamma,
                                      p_sample,
                                      detection_step,
                                      min_dist_step,
                                      detection_window_hrs,
                                      req_consec_detections)

param_product_old = set(param_product_old)
param_product_new = set(param_product_new)

param_product = param_product_new.difference(param_product_old)
"""

jids = cloud.map(detect_trials, *zip(*param_product), _type='f2')

params_sub_jids = cloud.result(jids)
params = [elt[0] for elt in params_sub_jids]
sub_jids = [elt[1] for elt in params_sub_jids]
stats = cloud.result(sub_jids)

dt = datetime.now()

# Write out as plain text just in case.
out_path_txt = 'data/param_explore_%d%d%d%d%d%d.txt' % \
  (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
open(out_path_txt, 'w').write(str((params, stats)))

params, stats = fix_results_nesting((params, stats))
out_path_pkl = 'data/param_explore_%d%d%d%d%d%d.pkl' % \
  (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
store_results((params, stats), out_path_pkl)
 def get_dataset(self):
     if self.usecloud:
         return cloud.result(self.preprocess_job)
     else:
         return self._samples, self._labels
Esempio n. 58
0
            'dataset_name': dataset_name,
            'dataset_dir': dataset_dir,
            'jids': jids
        }, open(outfile_wait, 'w'))


@transform(score_frame_queue, regex(r"(.+).wait.(.+)$"),
           [r"\1.pickle", r"\1.npz"])
def score_frame_wait((infile_wait, infile_npz), (outfile_pickle, outfile_npz)):
    dnpz = np.load(infile_npz)
    p = pickle.load(open(infile_wait))

    jids = p['jids']

    if USE_CLOUD:
        results = cloud.result(jids)
    else:
        results = [x for x in jids]
    scores = np.concatenate(results)
    np.savez_compressed(outfile_npz, scores=scores, **dnpz)
    pickle.dump(p, open(outfile_pickle, 'w'))


@transform(score_frame_wait, suffix(".pickle"), [".png", ".hist.png"])
def plot_likelihood((infile_pickle, infile_npz), (outfile, outfile_hist)):
    data = np.load(infile_npz)
    data_p = pickle.load(open(infile_pickle))
    scores = data['scores']

    sv = create_state_vect(data['y_range'], data['x_range'], data['phi_range'],
                           data['theta_range'])