Example #1
0
    def predict(self, test_data, model_details=None, options={}):
        super(knet, self).predict(test_data, model_details, options)
        assert len(test_data) != 0, "test_data list shouldn't be empty"
        self.test_file = test_data[0]
        if not os.path.exists(self.test_file):
            assert False, "File doesn't exists"

        print("Start Predicting")
        direct_entity, direct_context, self.predict_types = util.raw2npy(
            self.test_file)

        embedding = np.load(self.embedding)
        model = models.KA_D("KA+D", self.disamb_file)

        sess = tf.Session()
        w2v = util.build_vocab(self.glove, model.word_size)
        sess.run(model.initializer)
        model.saver.restore(sess, self.model_name)
        util.printlog("Begin computing direct outputs")
        self.final_result = util.direct(w2v, sess, model, direct_entity,
                                        direct_context, embedding,
                                        self.type_file)

        dir_name = os.path.dirname(test_data[0])
        output_file = os.path.join(dir_name, "entity_typing_test_output.txt")
        final_str = ""
        for i in range(len(self.final_result)):
            final_str = "{}\n{}\t{}\t{}".format(final_str,
                                                " ".join(direct_entity[i]),
                                                self.predict_types[i],
                                                self.final_result[i].lower())
        with open(output_file, 'w') as fin:
            fin.write(final_str.strip())

        return output_file
Example #2
0
def get_url(url, url_type, request_num, phase, accept_header, http_headers):
  """
  # Desc  : Function to send get requests to the server. Type 1 is get requests
  #         handles 3 types of GET requests based on ID, last_name and zipcode.
  # Input : Get request URL, Request Number, log file to collect per request data
  #          phase of each request(ramp-up, ramp-down, measuring-time)
  # Output: Generates per request (get) details in a log file
  """
  global timeout_err
  global conn_err
  global http_err
  global bad_url 
  data = ""
  headers = ""

  urlo = urlparse.urlparse(url)
  ip = get_ip(urlo.hostname)
  start = time.time()

  query = ''
  if urlo.query != '':
    query = '?{}'.format(urlo.query)

  req_path = '{}{}'.format(urlo.path, query)

  req = '''GET {} HTTP/1.1\r
Host: {}\r
Accept: {}\r
Connection: close\r
{}\r
'''.format(req_path, urlo.netloc,
           accept_header, ''.join(hh + '\r\n' for hh in http_headers))

  try:
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.settimeout(30)
    sock.connect((ip, urlo.port))
    sock.sendall(req)
    while "\r\n\r\n" not in data:
      data = sock.recv(256)
      headers += data

    http_status = data[9:12]
    if http_status[0] != '2':
      http_err += 1

  except socket.timeout:
    timeout_err += 1 
  except socket.error as e:
    conn_err += 1
  finally:
    sock.close()
    end = time.time()
    response_time = end - start
    total_length = calculate_len_get(headers)

  util.printlog(log,phase,url_type,request_num,url,start,end,response_time,total_length)
  return 
Example #3
0
def delete_url(url, url_type, request_num, phase):
  """
  # Desc  : Function to send delete requests to the server. Type 3 is delete requests
  #         also captures the data record being deleted and saves it in a list(post/_datalist)
  #         which is used for new post requests
  # Input : Delete request URL, Request Number, log file to collect per request data
  #         phase of each request(ramp-up, ramp-down, measuring-time)
  # Output: Generates per request (delete) details in a log file
  """
  global timeout_err
  global conn_err
  global http_err
  global bad_url 
  global employee_idlist
  global post_datalist

  #1. issue a get request to get the record that is being deleted
  try:
    try:
      get_res = s.get(url, headers=headers)   
    except requests.exceptions.RequestException as e:
      #catastrophic error. bail.
      print e
      sys.exit(1)
    try:
      response = json.loads(get_res.content)
    except ValueError:
    # decoding failed
      print "Exception -- Decoding of result from getid for delete failed. Exiting"
      sys.exit(1)(1)
    if response:
      if 'employee' in response:
        post_datalist.insert(front_oflist,response)
    else:
      print url
      print "Warning : Record not found"
    start = time.time()
    r = s.delete(url, headers=headers)
  except requests.exceptions.Timeout as e:
    timeout_err = timeout_err + 1 
  except requests.exceptions.ConnectionError as e:
    conn_err = conn_err + 1
  except requests.exceptions.HTTPError as e:
    http_err = http_err + 1
  except requests.exceptions.TooManyRedirects as e:
    bad_url = bad_url + 1 
  except requests.exceptions.RequestException as e:
    #catastrophic error. bail.
    print e
    sys.exit(1)
  finally:
    end = time.time()
    response_time = end-start
    total_length = calculate_len_postdel(r)

  util.printlog(log,phase,url_type,request_num,url,start,end,response_time,total_length)
  return
Example #4
0
def post_url(url, url_type, request_num, phase):
  """
  # Desc  : Function to send post requests to the server. Type 2 is post requests
  #         Retries if the post request fails
  # Input : Post request URL, Request Number, log file to collect per request data
  #         phase of each request(ramp-up, ramp-down, measuring-time)
  # Output: Generates per request (post) details in a log file
  """
  global start_time
  global file_cnt
  global employee_idlist
  global timeout_err
  global conn_err
  global http_err
  global bad_url
  global post_datalist
  global static_post
  r = None

  post_data = static_postdata
  if post_datalist:
    post_data = post_datalist[0]
  else:
    static_post = static_post + 1
  start = time.time()
  end = start
  for i in range(100):
    r =  post_function(url, post_data)
    if r:
      end = time.time()
      total_length = calculate_len_postdel(r)
      break
  if not r:
    print "Post request failed. Received null response.Exiting run"
    print request_num, url
    print post_data
    return
  response_time = end-start
  try:
    result = json.loads(r.content)
  except ValueError:
    # decoding failed
    print "Exception -- Decoding of result from posturl failed. Exiting"
    exit(1)
  if result:
    if 'result' in result:
      employee_idlist.append(result['result']['employee_id'])
    else:
      print("Exception -- Post did not return a valid employee_id")
      print post_data
      exit(1)

  util.printlog(log,phase,url_type,request_num,url,start,end,response_time,total_length)
  return
Example #5
0
def main():
    util.start_timetest()
    util.print_elapsed_time()
    util.printlog('abc')
    util.sleep(0.1)
    util.printlog()
    util.printlog('xyz')
    util.sleep(0.2)
    util.printlog('1234567890')
    util.sleep(3)
    util.printlog()
    util.print_elapsed_time()
    util.sleep(1)
    util.print_elapsed_time('Elapsed: ')
    util.sleep(0.5)
    util.print_elapsed_time(suffix=' has passed')
    util.sleep(0.5)
    util.print_elapsed_time('It took ', ' to process')
    util.printlog('Done')
Example #6
0
args = parser.parse_args()
training = args.train
direct = args.direct
modelname = args.model
datadir = args.data_dir
directdir = args.direct_dir
w2vfile = args.w2v_file
savedir = args.save_dir

batch_size = 1000
batch_manual = 100
iter_num = 10000
check_freq = 1000

####### load data
util.printlog("Loading data")
embedding = np.load(datadir + '/embedding.npy')

if training:

    train_entity = np.load(datadir + '/train_entity.npy')
    train_context = np.load(datadir + '/train_context.npy')
    train_label = np.load(datadir + '/train_label.npy')
    train_fbid = np.load(datadir + '/train_fbid.npy')

    valid_entity = np.load(datadir + '/valid_entity.npy')
    valid_context = np.load(datadir + '/valid_context.npy')
    valid_label = np.load(datadir + '/valid_label.npy')
    valid_fbid = np.load(datadir + '/valid_fbid.npy')

    linkvalid = np.load(datadir + '/linkvalid.npy')
Example #7
0
    def train(self, train_data=None, options={}):
        super(knet, self).train(train_data, options)
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)

        util.printlog("Loading Data")
        embedding = np.load(self.embedding)
        train_entity = np.load(self.train_entity)
        train_context = np.load(self.train_context)
        train_label = np.load(self.train_labels)
        train_fbid = np.load(self.train_fbid)

        valid_entity = np.load(self.valid_entity)
        valid_context = np.load(self.valid_context)
        valid_label = np.load(self.valid_labels)
        valid_fbid = np.load(self.valid_fbid)

        train_size = len(train_entity)
        if train_size < 500:
            batch_size = train_size
            iter_num = train_size
            check_freq = train_size
        elif train_size < 10000:
            batch_size = train_size / 100
            iter_num = train_size / 10
            check_freq = train_size / 100
        else:
            batch_size = train_size / 1000
            iter_num = train_size / 100
            check_freq = train_size / 1000

        batch_size = int(batch_size)
        iter_num = int(iter_num)
        check_freq = int(check_freq)

        model = models.KA_D("KA+D", self.disamb_file)

        sess = tf.Session()
        w2v = util.build_vocab(self.glove, model.word_size)
        sess.run(model.initializer)

        util.printlog("Begin training")

        for i in range(iter_num):
            if i % check_freq == 0:
                util.printlog("Validating after running " +
                              str(int(i * batch_size / train_size)) +
                              " epoches")
                util.test(w2v, model, valid_entity, valid_context, valid_label,
                          valid_fbid, embedding, batch_size, sess, "all")
                model.saver.save(sess, os.path.join(self.model_dir, str(i)))

            fd = model.fdict(w2v, (i * batch_size) % train_size, batch_size, 1,
                             train_entity, train_context, train_label,
                             train_fbid, embedding, False)
            fd[model.kprob] = 0.5
            sess.run(model.train, feed_dict=fd)

            if batch_size != train_size and i % int(
                    train_size / batch_size / 10) == 0:
                util.printlog("Epoch {}, Batch {}".format(
                    int((i * batch_size) / train_size),
                    int((i * batch_size) % train_size / batch_size)))
        model.saver.save(sess, self.model_name)