def handler(event, context): crail.launch_dispatcher_from_lambda() call(["cp", "/var/task/lambda_java.py", "/tmp/lambda"]) call(["cp", "/var/task/jars/crail-reflex-1.0.jar", "/tmp/crail-reflex"]) time.sleep(20) socket = crail.connect() print "Talk to dispatcher..." src_filename = "/tmp/crail-reflex" dst_filename = "/dsttest-test-reflex2.data" ticket = 1001 print "Try PUT..." start = time.time() crail.put(socket, src_filename, dst_filename, ticket) end = time.time() print "Execution time for single PUT: ", (end - start) * 1000000, " us\n" time.sleep(1) src_filename = "/dsttest-test-reflex2.data" dst_filename = "/tmp/crail-reflex-2" print "Now GET..." start = time.time() crail.get(socket, src_filename, dst_filename, ticket) end = time.time() print "Execution time for single GET: ", (end - start) * 1000000, " us\n" time.sleep(1) call(["ls", "-al", "/tmp/"]) src_filename = "/dsttest-test-reflex2.data" print "Now DEL..." start = time.time() crail.delete(socket, src_filename, ticket) end = time.time() print "Execution time for single GET: ", (end - start) * 1000000, " us\n" return
def lambda_handler(event, context): id = int(event['id']) n = num_workers = int(event['n']) bucket_name = str(event['bucket_name']) path = str(event['path']) n_tasks = n t0 = time.time() t1 = time.time() t2 = time.time() #write to output files: shuffle<id 0> shuffle<id 1> shuffle<id num_workers-1> p = crail.launch_dispatcher_from_lambda() socket = crail.connect() ticket = 1001 file_tmp = '/tmp/input_tmp' t3 = time.time() # upload log log = {'id': id, 't0': t0, 't1': t1, 't2': t2, 't3': t3} file_tmp = '/tmp/tmp' with open(file_tmp, "w") as f: pickle.dump(log, f) src_filename = file_tmp dst_filename = '/invoke-logs-' + str(n) + '-' + str(id) r = crail.put(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("put failed: " + dst_filename) os.remove(file_tmp) crail.close(socket, ticket, p) #return time spent (in sec) writing intermediate files #return [t1-t0, t2-t1, t3-t2, t2-t2, [len(x)*100 for x in p_list]] #read input, compute, write shuffle r = 'lambda finished ' + str(id) print r return r
def lambda_handler(event, context): id = int(event['id']) n = num_workers = int(event['n']) bucket_name = str(event['bucket_name']) n_tasks = n t0=time.time() p = crail.launch_dispatcher_from_lambda() socket = crail.connect() ticket = 1001 #read from input file: shuffle<0 id> shuffle<1 id> ... shuffle<id num_workers-1> #''' file_tmp = '/tmp/tmp' all_lines = [] for i in xrange(n_tasks): key = 'shuffle' + str(i) +'-'+ str(id) src_filename = '/' + key dst_filename = file_tmp r = crail.get(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("get failed: "+ src_filename) with open(file_tmp, "r") as f: all_lines+=f.readlines() os.remove(file_tmp) #''' t1 = time.time() #''' #merge & sort for i in xrange(len(all_lines)): all_lines[i] = (all_lines[i][:10], all_lines[i][12:]) all_lines.sort(key=lambda x: x[0]) for i in xrange(len(all_lines)): all_lines[i] = all_lines[i][0]+" "+all_lines[i][1] #''' t2=time.time() #[s3] write to output file: output<id> s3 = boto3.resource('s3') file_name = 'output/sorted_output' m = 1000/n_tasks size = len(all_lines)/m for i in xrange(m): with open(file_tmp, "w+") as f: start = size*i end = start + size f.writelines(all_lines[start:end]) f.seek(0) body = f.read() key = file_name + str(id*m+i) s3.Bucket(bucket_name).upload_file(file_tmp, key) os.remove(file_tmp) t3=time.time() # upload log log = {'id': id, 't0': t0, 't1': t1, 't2': t2, 't3': t3} file_tmp = '/tmp/tmp' with open(file_tmp, "w") as f: pickle.dump(log, f) src_filename = file_tmp dst_filename = '/reduce-logs-100GB-'+str(n)+'-'+str(id) ## new file r = crail.put(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("put failed: "+ dst_filename) log = [t1-t0, t2-t1, t3-t2, t1-t1] file_tmp = '/tmp/tmp' with open(file_tmp, "w") as f: pickle.dump(log, f) src_filename = file_tmp dst_filename = '/reduce-results-100GB-'+str(n)+'-'+str(id) ## new file r = crail.put(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("put failed: "+ dst_filename) crail.close(socket, ticket, p) #return time (in sec) spent reading intermediate files #return [t1-t0, t2-t1, t3-t2, t1-t1] #read shuffle, compute, write output r = 'reduce finished ' + str(id) print r return r
def lambda_handler(event, context): id = int(event['id']) n = num_workers = int(event['n']) bucket_name = str(event['bucket_name']) path = str(event['path']) n_tasks = n t0=time.time() #[s3] read from input file: input<id> s3 = boto3.resource('s3') file_local = '/tmp/input_tmp' lines = [] # read 4 100MB files m = 1000/n_tasks for i in xrange(m): i += id*m key = path + 'input' + str(i) s3.Bucket(bucket_name).download_file(key, file_local) with open(file_local, "r") as f: lines += f.readlines() #each line contains a 100b record os.remove(file_local) t1=time.time() #partition p_list = [[] for x in xrange(n_tasks)] #list of n partitions #hardcode for line in lines: key1 = ord(line[0])-32 # key range 32-126 key2 = ord(line[1])-32 #126-32+1=95 #p = n/95 # 2500/(126-32+1) ~ 26.3 = 26 #index = int(26.3*(key1+key2/95.0)) p = n_tasks/95.0 # total of 250 tasks index = int(p*(key1+key2/95.0)) p_list[index].append(line) t2=time.time() #write to output files: shuffle<id 0> shuffle<id 1> shuffle<id num_workers-1> p = crail.launch_dispatcher_from_lambda() socket = crail.connect() ticket = 1001 file_tmp = file_local for i in xrange(n_tasks): with open(file_tmp, "w") as f: f.writelines(p_list[i]) key = 'shuffle' + str(id) +'-'+ str(i) src_filename = file_tmp dst_filename = '/' + key r = crail.put(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("put failed: "+ dst_filename) t3=time.time() # upload log log = {'id': id, 't0': t0, 't1': t1, 't2': t2, 't3': t3, 'file_size': [len(x)*100 for x in p_list]} file_tmp = '/tmp/tmp' with open(file_tmp, "w") as f: pickle.dump(log, f) src_filename = file_tmp dst_filename = '/map-logs-100GB-'+str(n)+'-'+str(id) r = crail.put(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("put failed: "+ dst_filename) log = [t1-t0, t2-t1, t3-t2, t2-t2] file_tmp = '/tmp/tmp' with open(file_tmp, "w") as f: pickle.dump(log, f) src_filename = file_tmp dst_filename = '/map-results-100GB-'+str(n)+'-'+str(id) r = crail.put(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("put failed: "+ dst_filename) os.remove(file_tmp) crail.close(socket, ticket, p) #return time spent (in sec) writing intermediate files #return [t1-t0, t2-t1, t3-t2, t2-t2, [len(x)*100 for x in p_list]] #read input, compute, write shuffle r = 'map finished ' + str(id) print r return r
def lambda_handler(event, context): id = int(event['id']) n = num_workers = int(event['n']) bucket_name = str(event['bucket_name']) path = str(event['path']) n_tasks = n STOP = threading.Event() LOGS_PATH = 'map-logs-' + str(n) class TimeLog: def __init__(self, enabled=True): self.enabled = enabled self.start = time.time() self.prev = self.start self.points = [] self.sizes = [] def add_point(self, title): if not self.enabled: return now = time.time() self.points += [(title, now - self.prev)] self.prev = now def upload_net_bytes(rclient, rxbytes_per_s, txbytes_per_s, timelogger, reqid): #rclient = redis.Redis(host=REDIS_HOSTADDR_PRIV, port=6379, db=0) netstats = LOGS_PATH + '/netstats-' + reqid rclient.set( netstats, str({ 'lambda': reqid, 'started': timelogger.start, 'rx': rxbytes_per_s, 'tx': txbytes_per_s }).encode('utf-8')) print "wrote netstats" return def get_net_bytes(rxbytes, txbytes, rxbytes_per_s, txbytes_per_s): SAMPLE_INTERVAL = 1.0 # schedule the function to execute every SAMPLE_INTERVAL seconds if STOP.is_set(): threading.Timer( SAMPLE_INTERVAL, get_net_bytes, [rxbytes, txbytes, rxbytes_per_s, txbytes_per_s]).start() rxbytes.append(int(ifcfg.default_interface()['rxbytes'])) txbytes.append(int(ifcfg.default_interface()['txbytes'])) rxbytes_per_s.append((rxbytes[-1] - rxbytes[-2]) / SAMPLE_INTERVAL) txbytes_per_s.append((txbytes[-1] - txbytes[-2]) / SAMPLE_INTERVAL) t0 = time.time() #[s3] read from input file: input<id> s3 = boto3.resource('s3') file_local = '/tmp/input_tmp' lines = [] # read 4 100MB files m = 1000 / n_tasks for i in xrange(m): i += id * m key = path + 'input' + str(i) s3.Bucket(bucket_name).download_file(key, file_local) with open(file_local, "r") as f: lines += f.readlines() #each line contains a 100b record os.remove(file_local) t1 = time.time() #partition p_list = [[] for x in xrange(n_tasks)] #list of n partitions #hardcode for line in lines: key1 = ord(line[0]) - 32 # key range 32-126 key2 = ord(line[1]) - 32 #126-32+1=95 #p = n/95 # 2500/(126-32+1) ~ 26.3 = 26 #index = int(26.3*(key1+key2/95.0)) p = n_tasks / 95.0 # total of 250 tasks index = int(p * (key1 + key2 / 95.0)) p_list[index].append(line) # start collecting network data iface = ifcfg.default_interface() rxbytes = [int(iface['rxbytes'])] txbytes = [int(iface['txbytes'])] rxbytes_per_s = [] txbytes_per_s = [] STOP.set() get_net_bytes(rxbytes, txbytes, rxbytes_per_s, txbytes_per_s) t2 = time.time() #write to output files: shuffle<id 0> shuffle<id 1> shuffle<id num_workers-1> p = crail.launch_dispatcher_from_lambda() socket = crail.connect() ticket = 1001 file_tmp = file_local for i in xrange(n_tasks): with open(file_tmp, "w") as f: f.writelines(p_list[i]) key = 'shuffle' + str(id) + '-' + str(i) src_filename = file_tmp dst_filename = '/' + key r = crail.put(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("put failed: " + dst_filename) t3 = time.time() #upload network data timelogger = TimeLog(enabled=True) startup_nodes = [{ "host": "rediscluster.a9ith3.clustercfg.usw2.cache.amazonaws.com", "port": "6379" }] redis_client = StrictRedisCluster(startup_nodes=startup_nodes, skip_full_coverage_check=True) rclient = redis_client STOP.clear() upload_net_bytes(rclient, rxbytes_per_s, txbytes_per_s, timelogger, str(id)) # upload log log = { 'id': id, 't0': t0, 't1': t1, 't2': t2, 't3': t3, 'file_size': [len(x) * 100 for x in p_list] } file_tmp = '/tmp/tmp' with open(file_tmp, "w") as f: pickle.dump(log, f) src_filename = file_tmp dst_filename = '/map-logs-100GB-' + str(n) + '-' + str(id) r = crail.put(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("put failed: " + dst_filename) log = [t1 - t0, t2 - t1, t3 - t2, t2 - t2] file_tmp = '/tmp/tmp' with open(file_tmp, "w") as f: pickle.dump(log, f) src_filename = file_tmp dst_filename = '/map-results-100GB-' + str(n) + '-' + str(id) r = crail.put(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("put failed: " + dst_filename) os.remove(file_tmp) crail.close(socket, ticket, p) #return time spent (in sec) writing intermediate files #return [t1-t0, t2-t1, t3-t2, t2-t2, [len(x)*100 for x in p_list]] #read input, compute, write shuffle r = 'map finished ' + str(id) print r return r
def handler(id): p = crail.launch_dispatcher_from_lambda() call(["cp", "/var/task/lambda_java.py", "/tmp/lambda"]) call(["cp", "/var/task/jars/crail-reflex-1.0.jar", "/tmp/crail-reflex"]) #call(["cp", "/var/task/jars/crail-client-1.0.jar", "/tmp/crail-reflex"]) socket = crail.connect() result = [] result.append("Talk to dispatcher...") src_filename = "/tmp/crail-reflex" #dst_filename = "/dsttest-test-reflex2.data" dst_filename = "/data" + str(id) ticket = 1001 result.append("Try PUT...") start = time.time() crail.put(socket, src_filename, dst_filename, ticket) ''' for i in range(100): dst_filename = "/id" + str(id) + str(i) crail.put(socket, src_filename, dst_filename, ticket) ''' end = time.time() result.append("Execution time for single PUT: " + str((end - start) * 1000000) + " us") crail.close(socket, ticket, p) return 0 print "storing logs" result.append("storing logs") log = {'id': 1, 's3read': 2, 'compute': 3, 'write': 4} file_tmp = '/tmp/tmp' with open(file_tmp, "w") as f: f.write(json.dumps(log)) src_filename = file_tmp dst_filename = '/map-logs-100GB' + str(n) crail.put(socket, src_filename, dst_filename, ticket) crail.close(socket, ticket, p) return result time.sleep(1) #src_filename = "/dsttest-test-reflex2.data" src_filename = dst_filename dst_filename = "/tmp/crail-reflex-2" result.append("Now GET...") start = time.time() crail.get(socket, src_filename, dst_filename, ticket) end = time.time() result.append("Execution time for single GET: " + str((end - start) * 1000000) + " us") ''' time.sleep(1) call(["ls", "-al", "/tmp/"]) src_filename = "/dsttest-test-reflex2.data" print "Now DEL..." start = time.time() crail.delete(socket, src_filename, ticket) end = time.time() print "Execution time for single GET: ", (end-start) * 1000000, " us\n" ''' return result
def lambda_handler(event, context): id = int(event['id']) n = num_workers = int(event['n']) bucket_name = str(event['bucket_name']) n_tasks = n STOP = threading.Event() LOGS_PATH = 'reduce-logs-' + str(n) class TimeLog: def __init__(self, enabled=True): self.enabled = enabled self.start = time.time() self.prev = self.start self.points = [] self.sizes = [] def add_point(self, title): if not self.enabled: return now = time.time() self.points += [(title, now - self.prev)] self.prev = now def upload_net_bytes(rclient, rxbytes_per_s, txbytes_per_s, timelogger, reqid): #rclient = redis.Redis(host=REDIS_HOSTADDR_PRIV, port=6379, db=0) netstats = LOGS_PATH + '/netstats-' + reqid rclient.set( netstats, str({ 'lambda': reqid, 'started': timelogger.start, 'rx': rxbytes_per_s, 'tx': txbytes_per_s }).encode('utf-8')) print "wrote netstats" return def get_net_bytes(rxbytes, txbytes, rxbytes_per_s, txbytes_per_s): SAMPLE_INTERVAL = 1.0 # schedule the function to execute every SAMPLE_INTERVAL seconds if STOP.is_set(): threading.Timer( SAMPLE_INTERVAL, get_net_bytes, [rxbytes, txbytes, rxbytes_per_s, txbytes_per_s]).start() rxbytes.append(int(ifcfg.default_interface()['rxbytes'])) txbytes.append(int(ifcfg.default_interface()['txbytes'])) rxbytes_per_s.append((rxbytes[-1] - rxbytes[-2]) / SAMPLE_INTERVAL) txbytes_per_s.append((txbytes[-1] - txbytes[-2]) / SAMPLE_INTERVAL) # start collecting network data iface = ifcfg.default_interface() rxbytes = [int(iface['rxbytes'])] txbytes = [int(iface['txbytes'])] rxbytes_per_s = [] txbytes_per_s = [] STOP.set() get_net_bytes(rxbytes, txbytes, rxbytes_per_s, txbytes_per_s) t0 = time.time() p = crail.launch_dispatcher_from_lambda() socket = crail.connect() ticket = 1001 #read from input file: shuffle<0 id> shuffle<1 id> ... shuffle<id num_workers-1> #''' file_tmp = '/tmp/tmp' all_lines = [] for i in xrange(n_tasks): key = 'shuffle' + str(i) + '-' + str(id) src_filename = '/' + key dst_filename = file_tmp r = crail.get(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("get failed: " + src_filename) with open(file_tmp, "r") as f: all_lines += f.readlines() os.remove(file_tmp) #''' t1 = time.time() #upload network data timelogger = TimeLog(enabled=True) startup_nodes = [{ "host": "rediscluster.a9ith3.clustercfg.usw2.cache.amazonaws.com", "port": "6379" }] redis_client = StrictRedisCluster(startup_nodes=startup_nodes, skip_full_coverage_check=True) rclient = redis_client STOP.clear() upload_net_bytes(rclient, rxbytes_per_s, txbytes_per_s, timelogger, str(id)) t1_2 = time.time() #''' #merge & sort for i in xrange(len(all_lines)): all_lines[i] = (all_lines[i][:10], all_lines[i][12:]) all_lines.sort(key=lambda x: x[0]) for i in xrange(len(all_lines)): all_lines[i] = all_lines[i][0] + " " + all_lines[i][1] #''' t2 = time.time() #[s3] write to output file: output<id> s3 = boto3.resource('s3') file_name = 'output/sorted_output' m = 1000 / n_tasks size = len(all_lines) / m for i in xrange(m): with open(file_tmp, "w+") as f: start = size * i end = start + size f.writelines(all_lines[start:end]) f.seek(0) body = f.read() key = file_name + str(id * m + i) s3.Bucket(bucket_name).upload_file(file_tmp, key) os.remove(file_tmp) t3 = time.time() # upload log log = {'id': id, 't0': t0, 't1': t1_2, 't2': t2, 't3': t3} file_tmp = '/tmp/tmp' with open(file_tmp, "w") as f: pickle.dump(log, f) src_filename = file_tmp dst_filename = '/reduce-logs-100GB-' + str(n) + '-' + str(id) ## new file r = crail.put(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("put failed: " + dst_filename) log = [t1 - t0, t2 - t1_2, t3 - t2, t1_2 - t1] file_tmp = '/tmp/tmp' with open(file_tmp, "w") as f: pickle.dump(log, f) src_filename = file_tmp dst_filename = '/reduce-results-100GB-' + str(n) + '-' + str(id) ## new file r = crail.put(socket, src_filename, dst_filename, ticket) if r[-1] != u'\u0000': crail.close(socket, ticket, p) raise Exception("put failed: " + dst_filename) crail.close(socket, ticket, p) #return time (in sec) spent reading intermediate files #return [t1-t0, t1_2-t1, t3-t2, t2-t1_2] #read shuffle, compute, write output r = 'reduce finished ' + str(id) print r return r