def predict(self, test_data, model_details=None, options={}): super(knet, self).predict(test_data, model_details, options) assert len(test_data) != 0, "test_data list shouldn't be empty" self.test_file = test_data[0] if not os.path.exists(self.test_file): assert False, "File doesn't exists" print("Start Predicting") direct_entity, direct_context, self.predict_types = util.raw2npy( self.test_file) embedding = np.load(self.embedding) model = models.KA_D("KA+D", self.disamb_file) sess = tf.Session() w2v = util.build_vocab(self.glove, model.word_size) sess.run(model.initializer) model.saver.restore(sess, self.model_name) util.printlog("Begin computing direct outputs") self.final_result = util.direct(w2v, sess, model, direct_entity, direct_context, embedding, self.type_file) dir_name = os.path.dirname(test_data[0]) output_file = os.path.join(dir_name, "entity_typing_test_output.txt") final_str = "" for i in range(len(self.final_result)): final_str = "{}\n{}\t{}\t{}".format(final_str, " ".join(direct_entity[i]), self.predict_types[i], self.final_result[i].lower()) with open(output_file, 'w') as fin: fin.write(final_str.strip()) return output_file
def get_url(url, url_type, request_num, phase, accept_header, http_headers): """ # Desc : Function to send get requests to the server. Type 1 is get requests # handles 3 types of GET requests based on ID, last_name and zipcode. # Input : Get request URL, Request Number, log file to collect per request data # phase of each request(ramp-up, ramp-down, measuring-time) # Output: Generates per request (get) details in a log file """ global timeout_err global conn_err global http_err global bad_url data = "" headers = "" urlo = urlparse.urlparse(url) ip = get_ip(urlo.hostname) start = time.time() query = '' if urlo.query != '': query = '?{}'.format(urlo.query) req_path = '{}{}'.format(urlo.path, query) req = '''GET {} HTTP/1.1\r Host: {}\r Accept: {}\r Connection: close\r {}\r '''.format(req_path, urlo.netloc, accept_header, ''.join(hh + '\r\n' for hh in http_headers)) try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(30) sock.connect((ip, urlo.port)) sock.sendall(req) while "\r\n\r\n" not in data: data = sock.recv(256) headers += data http_status = data[9:12] if http_status[0] != '2': http_err += 1 except socket.timeout: timeout_err += 1 except socket.error as e: conn_err += 1 finally: sock.close() end = time.time() response_time = end - start total_length = calculate_len_get(headers) util.printlog(log,phase,url_type,request_num,url,start,end,response_time,total_length) return
def delete_url(url, url_type, request_num, phase): """ # Desc : Function to send delete requests to the server. Type 3 is delete requests # also captures the data record being deleted and saves it in a list(post/_datalist) # which is used for new post requests # Input : Delete request URL, Request Number, log file to collect per request data # phase of each request(ramp-up, ramp-down, measuring-time) # Output: Generates per request (delete) details in a log file """ global timeout_err global conn_err global http_err global bad_url global employee_idlist global post_datalist #1. issue a get request to get the record that is being deleted try: try: get_res = s.get(url, headers=headers) except requests.exceptions.RequestException as e: #catastrophic error. bail. print e sys.exit(1) try: response = json.loads(get_res.content) except ValueError: # decoding failed print "Exception -- Decoding of result from getid for delete failed. Exiting" sys.exit(1)(1) if response: if 'employee' in response: post_datalist.insert(front_oflist,response) else: print url print "Warning : Record not found" start = time.time() r = s.delete(url, headers=headers) except requests.exceptions.Timeout as e: timeout_err = timeout_err + 1 except requests.exceptions.ConnectionError as e: conn_err = conn_err + 1 except requests.exceptions.HTTPError as e: http_err = http_err + 1 except requests.exceptions.TooManyRedirects as e: bad_url = bad_url + 1 except requests.exceptions.RequestException as e: #catastrophic error. bail. print e sys.exit(1) finally: end = time.time() response_time = end-start total_length = calculate_len_postdel(r) util.printlog(log,phase,url_type,request_num,url,start,end,response_time,total_length) return
def post_url(url, url_type, request_num, phase): """ # Desc : Function to send post requests to the server. Type 2 is post requests # Retries if the post request fails # Input : Post request URL, Request Number, log file to collect per request data # phase of each request(ramp-up, ramp-down, measuring-time) # Output: Generates per request (post) details in a log file """ global start_time global file_cnt global employee_idlist global timeout_err global conn_err global http_err global bad_url global post_datalist global static_post r = None post_data = static_postdata if post_datalist: post_data = post_datalist[0] else: static_post = static_post + 1 start = time.time() end = start for i in range(100): r = post_function(url, post_data) if r: end = time.time() total_length = calculate_len_postdel(r) break if not r: print "Post request failed. Received null response.Exiting run" print request_num, url print post_data return response_time = end-start try: result = json.loads(r.content) except ValueError: # decoding failed print "Exception -- Decoding of result from posturl failed. Exiting" exit(1) if result: if 'result' in result: employee_idlist.append(result['result']['employee_id']) else: print("Exception -- Post did not return a valid employee_id") print post_data exit(1) util.printlog(log,phase,url_type,request_num,url,start,end,response_time,total_length) return
def main(): util.start_timetest() util.print_elapsed_time() util.printlog('abc') util.sleep(0.1) util.printlog() util.printlog('xyz') util.sleep(0.2) util.printlog('1234567890') util.sleep(3) util.printlog() util.print_elapsed_time() util.sleep(1) util.print_elapsed_time('Elapsed: ') util.sleep(0.5) util.print_elapsed_time(suffix=' has passed') util.sleep(0.5) util.print_elapsed_time('It took ', ' to process') util.printlog('Done')
args = parser.parse_args() training = args.train direct = args.direct modelname = args.model datadir = args.data_dir directdir = args.direct_dir w2vfile = args.w2v_file savedir = args.save_dir batch_size = 1000 batch_manual = 100 iter_num = 10000 check_freq = 1000 ####### load data util.printlog("Loading data") embedding = np.load(datadir + '/embedding.npy') if training: train_entity = np.load(datadir + '/train_entity.npy') train_context = np.load(datadir + '/train_context.npy') train_label = np.load(datadir + '/train_label.npy') train_fbid = np.load(datadir + '/train_fbid.npy') valid_entity = np.load(datadir + '/valid_entity.npy') valid_context = np.load(datadir + '/valid_context.npy') valid_label = np.load(datadir + '/valid_label.npy') valid_fbid = np.load(datadir + '/valid_fbid.npy') linkvalid = np.load(datadir + '/linkvalid.npy')
def train(self, train_data=None, options={}): super(knet, self).train(train_data, options) if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) util.printlog("Loading Data") embedding = np.load(self.embedding) train_entity = np.load(self.train_entity) train_context = np.load(self.train_context) train_label = np.load(self.train_labels) train_fbid = np.load(self.train_fbid) valid_entity = np.load(self.valid_entity) valid_context = np.load(self.valid_context) valid_label = np.load(self.valid_labels) valid_fbid = np.load(self.valid_fbid) train_size = len(train_entity) if train_size < 500: batch_size = train_size iter_num = train_size check_freq = train_size elif train_size < 10000: batch_size = train_size / 100 iter_num = train_size / 10 check_freq = train_size / 100 else: batch_size = train_size / 1000 iter_num = train_size / 100 check_freq = train_size / 1000 batch_size = int(batch_size) iter_num = int(iter_num) check_freq = int(check_freq) model = models.KA_D("KA+D", self.disamb_file) sess = tf.Session() w2v = util.build_vocab(self.glove, model.word_size) sess.run(model.initializer) util.printlog("Begin training") for i in range(iter_num): if i % check_freq == 0: util.printlog("Validating after running " + str(int(i * batch_size / train_size)) + " epoches") util.test(w2v, model, valid_entity, valid_context, valid_label, valid_fbid, embedding, batch_size, sess, "all") model.saver.save(sess, os.path.join(self.model_dir, str(i))) fd = model.fdict(w2v, (i * batch_size) % train_size, batch_size, 1, train_entity, train_context, train_label, train_fbid, embedding, False) fd[model.kprob] = 0.5 sess.run(model.train, feed_dict=fd) if batch_size != train_size and i % int( train_size / batch_size / 10) == 0: util.printlog("Epoch {}, Batch {}".format( int((i * batch_size) / train_size), int((i * batch_size) % train_size / batch_size))) model.saver.save(sess, self.model_name)