def done_jobs(jobs):
    '''
    Retrive the result of finished jobs from picloud.
    '''
    statuses = cloud.status(list(jobs))
    return list(
        cloud.iresult([
            y[0] for y in filter(lambda x: x[1] == 'done',
                                 zip(list(jobs), statuses))
        ]))
Exemple #2
0
 def check_job_itemdetail_status(self):  
     selleritemdetailsjobs_completed = 0
     newsellerlist=[]
     filenamelist=[]
     for seller in self.successsellers:
         if seller.get('jobcompletecheck', True) == False:
             newsellerlist.append(seller)
         else:
             cloud.cloud.cloudLog.critical("job failed for seller "+str(seller))
             if seller.get('itemdetailjobid'):cloud.kill(seller['itemdetailjobid'])
             if seller.get('itemdetailjoblist'):cloud.kill(seller['itemdetailjoblist'])
             
     while selleritemdetailsjobs_completed != len(newsellerlist):
         try:
             for seller in newsellerlist:
                 if seller.get('jobcompletecheck', True) == False:
                     sellerjobfailedcount = 0
                     sellercompletedjobs = 0
                     for jobid in seller['itemdetailjoblist']:
                         if cloud.status(jobid) in ('error', 'stalled', 'killed'):
                             sellerjobfailedcount += 1
                             sellercompletedjobs += 1
                         if cloud.status(jobid) in ('done'):
                             sellercompletedjobs += 1
                     if sellerjobfailedcount > SELLERITEMDETAIL_ALLOWED_JOB_FAIL_COUNT:
                         update_sellers_status([seller], dict(phase="INDEXING", progress="ERROR"))
                         cloud.kill(seller['itemdetailjoblist'])
                         selleritemdetailsjobs_completed += 1
                         seller['jobcompletecheck'] = True
                         logger.exception(seller['sellerid'] + " jobs kill after two job failed")
                     elif sellercompletedjobs >= len(seller['itemdetailjoblist']):
                         selleritemdetailsjobs_completed += 1
                         seller['jobcompletecheck'] = True
                         cloud.delete(seller['itemdetailjobid'])
                         self.download_dumps(seller['filenamelist'])
                         filenamelist+=seller['filenamelist']
                         update_sellers_status([seller], dict(phase="INDEXING", progress="ENDED"))
             print "Job detail wait loop"
             time.sleep(3)
         except Exception, e:
             print e
Exemple #3
0
def status(jids):
    naccounts = len(jids)
    status = []
    # Retrieves the results
    for i in xrange(naccounts):
        api_key = api_keys[i]
        api_secretkey = api_secretkeys[i]
        cloud.setkey(api_key=api_key, api_secretkey=api_secretkey)
        print "Retrieving status for account %d..." % (i + 1)
        status.extend(cloud.status(jids[i]))

    return status
Exemple #4
0
def status(jids):
    naccounts = len(jids)
    status = []
    # Retrieves the results
    for i in xrange(naccounts):
        api_key = api_keys[i]
        api_secretkey = api_secretkeys[i]
        cloud.setkey(api_key=api_key, api_secretkey=api_secretkey)
        print "Retrieving status for account %d..." % (i + 1)
        status.extend(cloud.status(jids[i]))

    return status
Exemple #5
0
def track(batch, remote, debug):
    try:
        if (not remote):
            logging.info("cannot track locally")
            return
        jobs = cache.get("batch/%s/jobs" % batch, remote)
        k_ = jobs['train'] + jobs['validate'] + jobs['test'] + [jobs['report']]
        
        status_ = cloud.status(k_)
        count = co.Counter(status_)
        print count
    except:
        print "status failed"
        pass
def run_ip():
    
    #Figure out how many jobs I want to create and how many requests per job
    job_count = int(sys.argv[1])
    
    job_rows = range(0, job_count)
    
    #Now actually map them to run in the cloud
    #The "s1" type gives unique IP addresses. Eek
    print "Creating job map for {0} jobs.".format(len(job_rows))
    jids = cloud.map(download_ip, job_rows, _type="s1")
    
    print "Waiting for jobs to complete."
    
    #The possible statuses and the statuses we are waiting for
    possible_job_statutes = ["waiting", "queued", "processing", "done", "error", "killed", "stalled"]
    pending_job_statuses = Set(["waiting", "queued", "processing"])
    
    #Keep looping until no job statuses are in the pending_job_statuses
    statuses = []
    while True:
        statuses = cloud.status(jids)
        tally = Counter()
        for status in statuses:
            tally[status] += 1
        print "Status of jobs: " + str(tally)
        
        #If none of the statuses are in pending_job_statuses, we are done!
        if len(pending_job_statuses.intersection(Set(statuses))) == 0:
            break
        
        #Wait for 5 seconds between checks
        sleep(5)
    
    #Now loop through the jobs and retrieve the results
    ip_counter = Counter()
    results = cloud.result(jids)
    for result in results:
        ip_counter[result] += 1
    
    print "IP Addresses: " + str(ip_counter)
Exemple #7
0
def cloud_status(jids):
    stati = cloud.status(jids)
    s = Counter()
    for st in stati:
        s[st] += 1
    return s
def cloud_status(jids):
    s = Counter(cloud.status(jids))
    return s
def test_exception4():
    '''Raise CloudException since cloud.status called for job that does not exist'''
    cloud.status(100000)
def test_exception3():
    '''Raise TypeError since cloud.status called with 1 invalid argument'''
    jid = cloud.status("asdf")
def run():
    #Import the data we need        
    
    #Figure out how many jobs I want to create and how many requests per job
    job_count = int(sys.argv[1])
    req_per_row = int(sys.argv[2])
    
    #Probably an easier way to do this...
    #For each job, add the requests to that array and append it to job_rows
    print "Generating data for {0} jobs".format(job_count)
    job_rows = []
    for i in xrange(0, job_count):
        job = []
        while len(job) < req_per_row and len(needed_rows) > 0:
            job.append(needed_rows.pop())
        job_rows.append(job)
        
        if len(needed_rows) == 0:
            break
    
    #Print status of jobs
    print "Created {0} jobs with {1} distances each, so {2} distances to be processed.".format(len(job_rows),req_per_row,sum([len(job) for job in job_rows]))
    
    #Now actually map them to run in the cloud
    #The "s1" type gives unique IP addresses per job. Eek
    print "Creating job map"
    jids = cloud.map(download, job_rows, _type="s1")
    
    print "Waiting for jobs to complete."
    
    #The possible statuses and the statuses we are waiting for
    possible_job_statutes = ["waiting", "queued", "processing", "done", "error", "killed", "stalled"]
    pending_job_statuses = Set(["waiting", "queued", "processing"])
    
    #Keep looping until no job statuses are in the pending_job_statuses
    statuses = []
    while True:
        statuses = cloud.status(jids)
        tally = Counter()
        for status in statuses:
            tally[status] += 1
        print "Status of jobs: " + str(tally)
        
        #If none of the statuses are in pending_job_statuses, we are done!
        if len(pending_job_statuses.intersection(Set(statuses))) == 0:
            break
        
        #Wait for 5 seconds between checks
        sleep(5)
    
    #Now loop through the jobs and retrieve the results
    saved = 0
    for index in xrange(0, len(statuses)):
        print "Working on job {0} of {1}".format(index+1, len(statuses))
        jid = jids[index]
        status = statuses[index]
        
        #If it's not "done", then there must have been an error
        if status != "done":
            print "Status of jid {0} = {1}.".format(jid, status)
            continue
        
        results = cloud.result(jid)
        print "There are {0} results.".format(len(results))
        for result in results:
            #Make sure we aren't over the limits or nothing went wrong
            if result["status"] != "OK":
                print('result["status"] == ' + result["status"])
                continue
            elem = result["rows"][0]["elements"][0]
            if elem["status"] != "OK":
                print('elem["status"] == ' + elem["status"])
                continue
            
            dist = elem["distance"]["value"]
            dur = elem["duration"]["value"]
            a = result["a"]
            b = result["b"]
            walk = result["walk"]
            
            if a not in distances:
                distances[a] = {}
            if b not in distances[a]:
                distances[a][b] = {}
            
            distances[a][b][walk] = {"walk":walk,"distance":dist,"duration":dur,"a":a,"b":b}
            saved += 1
        
        try:
            save_distances(distances)
        except:
            print "Couldn't save distances."
    
    print "Saved {0:,d} distance with {1:,d} remaining".format(saved, len_needed_rows-saved)
# look at how long it takes to run locally

%time segment_stats(valid_segments[0], None)

# <codecell>

# here's how to run it on PiCloud
# Prerequisite:  http://docs.picloud.com/primer.html <--- READ THIS AND STUDY TO REFRESH YOUR MEMORY

import cloud
jid = cloud.call(segment_stats, '1346823845675', None, _env='/rdhyee/Working_with_Open_Data')

# <codecell>

# pull up status -- refresh until done
cloud.status(jid)

# <codecell>

# this will block until job is done or errors out

cloud.join(jid)

# <codecell>

# get your result
cloud.result(jid)

# <codecell>

# get some basic info
Exemple #13
0
                         seller['itemdetailjoblist'] = jobtempids
                         seller['jobcompletecheck'] = False
                         seller['filenamelist'] = jobtempfilenames
             except Exception, e:
                 update_sellers_status([seller], dict(phase="INDEXING", progress="ERROR"))
 
         #cloud.join(itemdetailjobids,ignore_errors=False)
 
         successfullsellerfilenamelist=self.check_job_itemdetail_status()
         #self.delete_job_related_data_in_picloud(jobids, mainjobids, itemdetailjobids)
     
     except Exception,e:
         print e    
     finally:
         for seller in self.successsellers:
             if cloud.status(seller['jobid']) not in ('done','error', 'stalled', 'killed'):
                 cloud.kill(seller['jobid'])
                 update_sellers_status([seller], dict(phase="INDEXING", progress="ERROR"))
                 logger.critical("Job of "+str(seller['sellerid'])+" killed although running as there were exception from client rest call")
         removelockfile()
         return successfullsellerfilenamelist
 
 def delete_job_related_data_in_picloud(self, sellerjobids, queuejobdis, itemdetailjobids):
     if itemdetailjobids:
         cloud.delete(sellerjobids)
     if queuejobdis:
         cloud.delete(queuejobdis)
     if itemdetailjobids:
         cloud.delete(itemdetailjobids)
     
 def check_job_itemdetail_status(self):  
import exposure
#exposure.exposure()
import cloud
from pylab import show
from Qfunction import qfuncimage
from numpy import imag, real

N = 100 # number of times to run simulation
args = 1024
arglist = [1024]*N #each call uses the same argument in our case

jids = cloud.map(exposure.exposure, arglist)

all(item == 'done' for item in cloud.status(jids))

dataout = cloud.result(jids)

img = qfuncimage(real(dataout),imag(dataout),20)

show(img)

def run():
    #Import the data we need
    import_data()
    global rows
    
    #This was to convert the csvfile with commas as delimiters to "|" as delimiters
    """
    output = open("output.csv", "wb")
    rowwriter = csv.writer(output, delimiter='|')
    for row in rows:
        if len(row) != 5:
            print "Skipping row with len " + str(len(row))
            continue
        a = row[0]+","+row[1]
        b = row[2]+","+row[3]
        walk = row[4]
        rowwriter.writerow([a, b, walk])
    output.close()
    print "Done"
    exit()
    """
    distances = import_distances()
    #print "rows:"
    #print rows
    #Find the distances from the csv file not in the json file
    print "Finding distances which need to be downloaded"
    needed_rows = []
    ig = 0
    #Find which distances still need to be downloaded
    for row in rows:
        #print row
        song_title = row[0]
        artist_name = row[1]
        #walk = row[2]
        
        #If it's in distances, then it's been processed and we don't need it
        if song_title in distances and artist_name in distances[song_title]:# and walk in distances[a][b]:
            continue
        needed_rows.append(row)
    
    #Print the results
    print "There are a total of {0:,d} songs.".format(len(rows))
    print "There are {0:,d} downloaded songs.".format(sum([len(d) for d in distances]))
    print "Ignored {0:,d} songs since len != 3.".format(ig)
    print "There are {0:,d} songs needing to be processed.".format(len(needed_rows))
    
    len_needed_rows = len(needed_rows)
    
    if len_needed_rows <= 0:
        print "Done"
        exit()
    
    #Figure out how many jobs I want to create and how many requests per job
    job_count = int(sys.argv[1])
    req_per_row = int(sys.argv[2])
    
    #Probably an easier way to do this...
    #For each job, add the requests to that array and append it to job_rows
    print "Generating data for {0} jobs".format(job_count)
    job_rows = []
    for i in xrange(0, job_count):
        job = []
        while len(job) < req_per_row and len(needed_rows) > 0:
            job.append(needed_rows.pop())
        job_rows.append(job)
        
        if len(needed_rows) == 0:
            break
    
    #Print status of jobs
    print "Created {0} jobs with {1} songs each, so {2} songs to be processed.".format(len(job_rows),req_per_row,sum([len(job) for job in job_rows]))
    
    #Now actually map them to run in the cloud
    #The "s1" type gives unique IP addresses per job. Eek
    print "Creating job map"
    #print job_rows
    jids = cloud.map(download, job_rows, _type="s1")
    
    print "Waiting for jobs to complete."
    
    #The possible statuses and the statuses we are waiting for
    possible_job_statutes = ["waiting", "queued", "processing", "done", "error", "killed", "stalled"]
    pending_job_statuses = Set(["waiting", "queued", "processing"])
    
    #Keep looping until no job statuses are in the pending_job_statuses
    statuses = []
    while True:
        statuses = cloud.status(jids)
        tally = Counter()
        for status in statuses:
            tally[status] += 1
        print "Status of jobs: " + str(tally)
        
        #If none of the statuses are in pending_job_statuses, we are done!
        if len(pending_job_statuses.intersection(Set(statuses))) == 0:
            break
        
        #Wait for 5 seconds between checks
        sleep(5)
    
    #Now loop through the jobs and retrieve the results
    saved = 0
    for index in xrange(0, len(statuses)):
        print "Working on job {0} of {1}".format(index+1, len(statuses))
        jid = jids[index]
        status = statuses[index]
        
        #If it's not "done", then there must have been an error
        if status != "done":
            print "Status of jid {0} = {1}.".format(jid, status)
            continue
        
        results = cloud.result(jid)
        print "There are {0} results.".format(len(results))
        for result in results:
            #Make sure we aren't over the limits or nothing went wrong
            #if result["status"] != "OK":
            #    print('result["status"] == ' + result["status"])
            #    continue
            #elem = result["rows"][0]["elements"][0]
            #if elem["status"] != "OK":
            #    print('elem["status"] == ' + elem["status"])
            #    continue
            
            #dist = elem["distance"]["value"]
            #dur = elem["duration"]["value"]
            #a = result["artist_name"]
            #b = result["b"]
            #walk = result["walk"]
            
            #if a not in distances:
            #    distances[a] = {}
            #if b not in distances[a]:
            #    distances[a][b] = {}
            
            #distances[a][b][walk] = {"walk":walk,"distance":dist,"duration":dur,"a":a,"b":b}
            song = {'artist_name':result['artist_name'],'song_title':result['song_title'],'viewCount':result['viewCount'],'likeCount':result['likeCount']}
            #print song
            saved += 1
        
            try:
                #print song['viewCount']
                if song['viewCount'] > 0:
                    save_distances(song)
                    #print song
            except:
                print "Couldn't save distances."
    
    print "Saved {0:,d} distance with {1:,d} remaining".format(saved, len_needed_rows-saved)
def cloud_status(jids):
    s=Counter(cloud.status(jids))
    return s   
Exemple #17
0
def cloud_status(jids):
    stati=cloud.status(jids)
    s=Counter()
    for st in stati: s[st]+=1
    return s   
def done_jobs(jobs):
    '''
    Retrive the result of finished jobs from picloud.
    '''
    statuses = cloud.status(list(jobs))
    return list(cloud.iresult([y[0] for y in filter(lambda x: x[1]=='done',zip(list(jobs),statuses))]))
Exemple #19
0
'''
Created on Sep 14, 2012

@author: mudassar
'''
import time
import cloud
from etl.config.drivers.picloud import setcloudkey



def testruntime(**kwargs):
    i=0
    while i!=5:
        cloud.cloud.cloudLog.info("test run time")
        print "test run time"
        time.sleep(1)
        i+=1
    
if __name__ == '__main__':
    setcloudkey()
    jobid=cloud.call(testruntime,a=1,_max_runtime=1,_label="TESTRUNTIME")
    cloud.join(jobid)
    print cloud.status(jobid)
    pass
Exemple #20
0
# IPython log file

import exposure
exposure()
exposure.exposure()
import cloud
jid = cloud.call(exposure)
jid = cloud.call(exposure.exposure)
print jid
cloud.status()
cloud.status(jid)
cloud.result(jid)
exposure.exposure()
exposure.exposure(1024)
exposure.exposure(512)
exposure.exposure(1024)
args = 1024
N = 100
zip([args]*N)
jids = cloud.map(exposure.exposure, *zip(*([args]*N)))
zip(args*N)
zip([args]*N)
jids = cloud.map(exposure.exposure, zip(([args]*N)))
jids.status()
jids
cloud.status(jids)
cloud.result(jids)
jids = cloud.map(exposure.exposure, *zip(([args]*N)))
cloud.status(jids)
cloud.result(jids)
jids = cloud.map(exposure.exposure, *zip(*([args]*N)))
def square(x):
    return x*x

# demonstration of cloud.call()
import cloud
jid = cloud.call(square,3)

print 'Job Id::', jid
print 'Job status',cloud.status(jid)

print 'Result',cloud.result(jid)
import cloud

# print the status of a job known to exist
status = cloud.status(1)
print status
def test_exception2():
    '''Raise TypeError since cloud.status called without arguments'''
    cloud.status()
def test_multiply():
    jid = cloud.call(lambda: 3*3)
    cloud.result(jid)
    result = cloud.status(jid)
    assert result == "done"
# look at how long it takes to run locally

%time segment_stats(valid_segments[0], None)

# <codecell>

# here's how to run it on PiCloud
# Prerequisite:  http://docs.picloud.com/primer.html <--- READ THIS AND STUDY TO REFRESH YOUR MEMORY

import cloud
jid = cloud.call(segment_stats, '1346823845675', None, _env='Working_with_Open_Data')

# <codecell>

# pull up status -- refresh until done
cloud.status(jid)

# <codecell>

# this will block until job is done or errors out

cloud.join(jid)

# <codecell>

# get your result
cloud.result(jid)

# <codecell>

# get some basic info