def spawn_workers(self): for x in range(self.worker_count): w = LocalAsyncWorker(x) t = threading.Thread(target=w.start) self.workers[w.identity] = { 'thread': t, 'before_start_callback': { 'func': None, 'args': None, 'kwargs': None }, 'after_finish_callback': { 'func': None, 'args': None, 'kwargs': None } } t.start() tprint('==== start worker thread ====') while len(self.available_workers) < self.worker_count: message = self.control_socket.recv_multipart() if message[-1] == WORKER_IDLE: self.available_workers.append(message[0]) tprint('==== all worker thread ready ====')
def send_unsent(): '''Send notifications for all notices that have not been sent yet.''' logging.debug("called : %s", __name__) filelist = os.listdir(path) if 'old' in filelist: filelist.remove('old') # Sorted filelist so that older json files are listed earlier. Due to # this, notices are sent in the order in which they arrive. filelist.sort() send_count = 0 for f in filelist: if send_name(f): send_count += 1 tprint("\r{} notifications sent.".format(send_count)) if send_count == 0: tprint("0 notifications sent.") else: print "" logging.info("%d notifications sent", send_count) return send_count
def run(init, e=1, train=False, name="", printnum=500): for epoch in range(e): total_loss = 0. batch_number = 0 total_pred = 0. # total_pred for one epoch local_pred = 0. local_loss = 0. # init_trainset init(sess) while True: try: if train: _, loss_value, pred = sess.run( (train_op, loss, correntPred)) else: loss_value, pred = sess.run((loss, correntPred)) total_loss += loss_value local_loss += loss_value total_pred += pred local_pred += pred batch_number += 1 # bc+=8 if batch_number % printnum == 0: tprint( f"{name}> average_loss:{local_loss/printnum}, local_accuracy:{local_pred/printnum}" ) local_pred = 0. local_loss = 0. except tf.errors.OutOfRangeError: break tprint( f"{name}> total_loss:{total_loss/batch_number}, total_accuracy:{total_pred/batch_number}" )
def __init__(self, mode, username=None): self.mode = mode if username is not None: self.username = username print "" print "-- Hunting in %s mode! --" % self.mode print "" if Settings.proxy is not None: self.proxies = { "https": Settings.proxy } random.seed() self.check_cache_dir() self.refresh_game_data() # sleeping for a while to avoid having the two calls performed simultaneously initial_delay = random.randint(1, 10) util.tprint("[I] Sleeping for %s to avoid having the initial calls too close together." % initial_delay) time.sleep(initial_delay) self.authenticate() util.tprint("[I] Ready to hunt")
def get_game_version(self): util.tprint("[I] Getting game version...") headers = { 'User-Agent': 'Mozilla/5.0 (Linux; U; Android 2.3.3; en-en; HTC Desire Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Content-Type': 'application/x-www-form-urlencoded', 'X-Requested-With': 'com.hitgrab.android.mousehunt', 'Accept-Encoding': 'gzip,deflate', 'Accept-Language': 'en-US', 'Accept-Charset': 'utf-8, iso-8859-1, utf-16, *;q=0.7' } response = requests.post("https://www.mousehuntgame.com/api/info", data={"game_version": "null"}, proxies=self.proxies, headers=headers) session_id = response.cookies["PHPSESSID"] response = response.text.split("\r\n") for line in response: if line.startswith("{"): response = line data = json.loads(response) if "game_version" in data: # store the session ID as a by-product self.session_id = session_id return data['game_version'] else: print "API responded weirdly: %s" % data return None
def insert(root=None): logging.debug("called : %s", __name__) logging.debug("argument root : %s", root) if root is None: logging.debug("empty root is received") root = os.path.abspath(os.path.dirname(__file__)) + "/gen/json" if not os.path.isdir(root): logging.debug("making directory : %s", root) os.makedirs(root) notices = extract.get_notice_list(False) if notices is None: logging.error("error getting notice list") return None count = 0 for notice in notices: timestamp = str(notice['timestamp']) path = root + '/' + timestamp + '.json' if os.path.isfile(path): continue else: count += 1 tprint("Saved notice dated '{}' titled '{}'.".format( notice['time'], notice['title'])) logging.info("Saved notice dated %s titled %s", notice['time'], notice['title']) n = Notice(timestamp) n.save_json(notice) logging.info("%d notices inserted", count) return count
def get_game_version(self): util.tprint("[I] Getting game version...") headers = { 'User-Agent': 'Mozilla/5.0 (Linux; U; Android 2.3.3; en-en; HTC Desire Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Content-Type': 'application/x-www-form-urlencoded', 'X-Requested-With': 'com.hitgrab.android.mousehunt', 'Accept-Encoding': 'gzip,deflate', 'Accept-Language': 'en-US', 'Accept-Charset': 'utf-8, iso-8859-1, utf-16, *;q=0.7' } response = requests.post("https://www.mousehuntgame.com/api/info", data={"game_version": "null"}, proxies=self.proxies, headers=headers) session_id = response.cookies["PHPSESSID"] response = response.text.split("\r\n") for line in response: if line.startswith("{"): response = line data = json.loads(response) if "game_version" in data: # store the session ID as a by-product self.session_id = session_id return data['game_version'] else: print "API responded weirdly: %s" % data return None
def insert_db(): """ Save the notices in Notice sqlite database """ logging.debug("called : %s", __name__) notices = extract.get_notice_list(False) if notices is None: logging.error("error getting notice list") return None count = 0 for notice in notices: if NoticeWrapper.insert_dict_safe(notice): count += 1 tprint("Added notice dated '{}' titled '{}'.".format( notice['time'], notice['title'])) logging.info("Added notice dated %s titled %s", notice['time'], notice['title']) pass else: continue tprint("{} notices inserted".format(count)) logging.info("%d notices inserted", count) return count
def send_unsent(): '''Send notifications for all notices that have not been sent yet.''' logging.debug("called : %s", __name__) filelist = os.listdir(path) if 'old' in filelist: filelist.remove('old') # Sorted filelist so that older json files are listed earlier. Due to # this, notices are sent in the order in which they arrive. filelist.sort() send_count = 0 for f in filelist: if send_name(f): send_count += 1 tprint("\r{} notifications sent.".format(send_count)) if send_count == 0: tprint("0 notifications sent.") else: print "" logging.info("%d notifications sent", send_count) return send_count
def __init__(self, mode, username=None): self.mode = mode if username is not None: self.username = username print "" print "-- Hunting in %s mode! --" % self.mode print "" if Settings.proxy is not None: self.proxies = {"https": Settings.proxy} random.seed() self.check_cache_dir() self.refresh_game_data() # sleeping for a while to avoid having the two calls performed simultaneously initial_delay = random.randint(1, 10) util.tprint( "[I] Sleeping for %s to avoid having the initial calls too close together." % initial_delay) time.sleep(initial_delay) self.authenticate() util.tprint("[I] Ready to hunt")
def get_notice_list(p): logging.debug("called : %s", __name__) logging.debug("argument p : %s", str(p)) if p is None: logging.error("empty p is received") return None elif type(p) is unicode or type(p) is str: logging.debug("html recieved in unicode or str") html = p pr = False elif type(p) is bool: logging.debug("bool recieved") pr = p logging.debug("reading html from /gen/notice_board.html") filename = os.path.abspath(os.path.dirname(__file__)) + '/gen/notice_board.html' if not os.path.isfile(filename): logging.error("No file with name %s is found. Please run 'python login.py' first", filename) return None f = open(filename, 'r') html = f.read() f.close() else: logging.error('recieved argument of no recognised type') return None logging.debug("making a BeautifulSoup") soup = BeautifulSoup(html) div = None for d in soup.find_all("ul"): if 'class' in d.attrs: x = d['class'] if 'topics' in x: div = d break list_li = None if div is None: logging.error("Unable to find ul of class topiclist topics") return None list_li = div.find_all('li') if list_li is None: logging.error("Error getting list items from div topics") return None tprint("%d notices retreived from the noticeboard"%len(list_li)) logging.info("%d notices retreived from the noticeboard", len(list_li)) info = [] for li in list_li: info.append(extract_info(li)) if p: print_info(info) return info
def print_catch(self, data): catch_string = "--- Mouse: %s, Gold: %d, Points: %d" % ( data['catch']['mouse'], data['catch']['gold'], data['catch']['points']) if data['catch']['loot'] is not None and data['catch']['loot'] != "": catch_string = "%s, Loot: %s" % (catch_string, data['catch']['loot']) util.tprint(catch_string)
def load_shared_content(): tprint("loading shared_files") shared_content = {} for sf in shared_files: with open(sf, "r") as sfd: for l in tqdm(sfd): pairID = l.split(" ")[0] shared_content[pairID] = json.loads(l[len(pairID) + 1:]) return shared_content
def read_backg_word_count_file(self, backg_word_count_fname): tprint("reading file: {}".format(backg_word_count_fname)) with open(backg_word_count_fname) as file: for line in file: word, count = line.split() self.backg_word_count[word] = int(count) tprint("done")
def handle_starttag(self, tag, attrs): if tag == "item": self._ignore = False tprint("-- Torrent --") self._pool.append({}) elif not self._ignore: tprint(tag + ":", end=" ") self._lastTag = tag self._pool[-1][self._lastTag] = None if tag == "link": self._isLink = True
def count_char(path): tprint("counting char") chars = set() with open(path, "r") as f: for i, l in tqdm(enumerate(f)): j = json.loads(l) chars |= set(j["sentence1"]) chars |= set(j["sentence2"]) char2idx = {c: i + 1 for i, c in enumerate(chars)} char2idx['\0'] = 0 return char2idx
def get_locations(self, refresh=False): util.tprint("[I] Getting locations...") locations_url = "https://www.mousehuntgame.com/api/get/environment/all" locations = [] if refresh or not os.path.exists(self.locaitons_json_file): locations = self.get_game_data(locations_url) open(self.locaitons_json_file, 'w').write(json.dumps(locations)) else: raw_json = open(self.locaitons_json_file, 'r').read() locations = json.loads(raw_json) return locations
def get_locations(self, refresh=False): util.tprint("[I] Getting locations...") locations_url = "https://www.mousehuntgame.com/api/get/environment/all" locations = [] if refresh or not os.path.exists(self.locaitons_json_file): locations = self.get_game_data(locations_url) open(self.locaitons_json_file, 'w').write(json.dumps(locations)) else: raw_json = open(self.locaitons_json_file, 'r').read() locations = json.loads(raw_json) return locations
def check_control_signal(self): if self.control_socket.poll(0): signal = self.control_socket.recv() tprint('============================ {}: {}'.format( self.identity, signal)) if signal == WORKER_EXIT: raise WorkerExitError() if signal == JOB_STOP: raise JobStopError() if signal == JOB_PAUSE: while True: waiting_signal = self.control_socket.recv() if waiting_signal == WORKER_EXIT: raise WorkerExitError() if waiting_signal == JOB_RESUME: break
def send_json(notice): '''Send the notification of notice for given json''' logging.debug("called : %s", __name__) logging.debug("argument notice : %s", str(notice)) if notice is None: logging.error("empty notice is recieved") return time = notice['time'] title = notice['title'] body = view.get_text_dict(notice, True) tprint("Sending notice {} dated {}.".format(title, time)) logging.info("Sending notice %s dated %s.", title, time) return push(title, body)
def send_json(notice): '''Send the notification of notice for given json''' logging.debug("called : %s", __name__) logging.debug("argument notice : %s", str(notice)) if notice is None: logging.error("empty notice is recieved") return time = notice['time'] title = notice['title'] body = view.get_text_dict(notice, True) tprint("Sending notice {} dated {}.".format(title, time)) logging.info("Sending notice %s dated %s.", title, time) return push(title, body)
def send_notice(notice): """ Given a database Notice instance, send its notification. """ logging.debug("called : %s", __name__) if notice is None: logging.error("empty notice is recieved") return time = notice.print_time title = notice.title body = view.get_text_notice(notice, True) tprint("Sending notice {} dated {}.".format(title, time)) logging.info("Sending notice %s dated %s.", title, time) return push(title, body)
def send_notice(notice): """ Given a database Notice instance, send its notification. """ logging.debug("called : %s", __name__) if notice is None: logging.error("empty notice is recieved") return time = notice.print_time title = notice.title body = view.get_text_notice(notice, True) tprint("Sending notice {} dated {}.".format(title, time)) logging.info("Sending notice %s dated %s.", title, time) return push(title, body)
def push(title, body): """ Given the title and body of notification to be sent, send it to the pushbullet servers. params: title : the title of notification to be sent body : the body of the notification to be sent """ logging.debug("called : %s", __name__) # Pushbullet needs access token to your account. # Add environmental variables # TPO_PB_AUTH - The pushbullet auth token # TPO_PB_CHANNEL - The pushbullet channel name logging.info("preparing to send post request to pushbullet") push_url = "https://api.pushbullet.com/v2/pushes" auth_token = os.environ.get("TPO_PB_AUTH") channel_tag = os.environ.get("TPO_PB_CHANNEL") auth = requests.auth.HTTPBasicAuth(auth_token, '') headers = {'content-type' : 'application/json'} payload = {} payload['type'] = 'note' payload['title'] = title payload['body'] = body payload['channel_tag'] = channel_tag data = json.dumps(payload) response = requests.post(push_url, auth=auth, headers=headers, data=data) logging.info("Recieved response status code : %d", response.status_code) if response.status_code == 200: tprint("Success") logging.info("push successfully sent") return True else: tprint("Failed, {} {}".format(response.status_code, response.reason)) logging.error("sending push failed : %d : %s", response.status_code, response.reason) return False
def count_word(zfile, size=None): tprint("loadding glove") word2idx = {"<PAD>": 0, "<UNK>": 1} embedding = [ np.zeros(300), np.zeros(300) #np.random.randn(300),#np.zeros(300), ] #np.random.randn(300)] with io.BufferedReader(gzip.open(zfile, "rb")) as f: for i, l in tqdm(enumerate(f)): l = l.decode("utf-8") values = l.split(" ") w = values.pop(0) word2idx[w] = i + 2 embedding.append(np.asarray(values, dtype=np.float32)) if size and i + 2 >= size - 1: break embedding = np.array(embedding) return word2idx, embedding
def get_game_version(self): util.tprint("[I] Getting game version...") response = requests.post("https://www.mousehuntgame.com/api/info", data={"game_version": "null"}, proxies=self.proxies) session_id = response.cookies["PHPSESSID"] response = response.text.split("\r\n") for line in response: if line.startswith("{"): response = line data = json.loads(response) if "game_version" in data: # store the session ID as a by-product self.session_id = session_id return data['game_version'] else: print "API responded weirdly: %s" % data return None
def push(title, body): """ Given the title and body of notification to be sent, send it to the pushbullet servers. params: title : the title of notification to be sent body : the body of the notification to be sent """ logging.debug("called : %s", __name__) # Pushbullet needs access token to your account. # Add environmental variables # TPO_PB_AUTH - The pushbullet auth token # TPO_PB_CHANNEL - The pushbullet channel name logging.info("preparing to send post request to pushbullet") push_url = "https://api.pushbullet.com/v2/pushes" auth_token = os.environ.get("TPO_PB_AUTH") channel_tag = os.environ.get("TPO_PB_CHANNEL") auth = requests.auth.HTTPBasicAuth(auth_token, '') headers = {'content-type': 'application/json'} payload = {} payload['type'] = 'note' payload['title'] = title payload['body'] = body payload['channel_tag'] = channel_tag data = json.dumps(payload) response = requests.post(push_url, auth=auth, headers=headers, data=data) logging.info("Recieved response status code : %d", response.status_code) if response.status_code == 200: tprint("Success") logging.info("push successfully sent") return True else: tprint("Failed, {} {}".format(response.status_code, response.reason)) logging.error("sending push failed : %d : %s", response.status_code, response.reason) return False
def send_unsent_db(): """ Send notification for all the unsent notices from the database """ notices = NoticeWrapper.get_unsent() send_count = 0 for notice in notices: if send_notice(notice): NoticeWrapper.sent(notice) send_count += 1 tprint("\r{} notifications sent.".format(send_count)) if send_count == 0: tprint("0 notifications sent.") else: print "" logging.info("%d notifications sent", send_count) return send_count
def send_unsent_db(): """ Send notification for all the unsent notices from the database """ notices = NoticeWrapper.get_unsent() send_count = 0 for notice in notices: if send_notice(notice): NoticeWrapper.sent(notice) send_count += 1 tprint("\r{} notifications sent.".format(send_count)) if send_count == 0: tprint("0 notifications sent.") else: print "" logging.info("%d notifications sent", send_count) return send_count
def train(self): print('Training %s epochs.' % self.epochs) loss_fun = nn.CrossEntropyLoss() scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min', verbose=True, patience=3) last_print = time.time() for epoch in range(self.epochs): tprint('Starting epoch: %s' % epoch) self.model.train() self.optimizer.zero_grad() for minibatch, targets in self.dataset.train: minibatch = Variable(torch.stack(minibatch)) targets = Variable(torch.LongTensor(targets)) if self.cuda: minibatch = minibatch.cuda() targets = targets.cuda() out = self.model.forward(minibatch) loss = loss_fun(out, targets) loss.backward() self.optimizer.step() self.optimizer.zero_grad() if time.time() - last_print > self.log_interval: last_print = time.time() numer, denom = self.dataset.train.progress() tprint('Training: %s, %s/%s' % (epoch, numer, denom)) tprint('Training complete. Beginning validation.') self.dataset.train.reload() self.model.eval() last_print = time.time() for minibatch, targets in self.dataset.validate: minibatch = Variable(torch.stack(minibatch), volatile=True) targets = Variable(torch.LongTensor(targets), volatile=True) if self.cuda: minibatch = minibatch.cuda() targets = targets.cuda() out = self.model.forward(minibatch) validation_loss = loss_fun(out, targets) if time.time() - last_print > self.log_interval: last_print = time.time() numer, denom = self.dataset.validate.progress() tprint('Validating: %s, %s/%s' % (epoch, numer, denom)) self.dataset.validate.reload() scheduler.step(validation_loss.data[0]) if self.save: torch.save( { 'model': self.model.state_dict(), 'optim': self.optimizer.state_dict(), }, 'signet.%s.pth' % int(time.time()))
def get_game_version(self): util.tprint("[I] Getting game version...") response = requests.post("https://www.mousehuntgame.com/api/info", data={"game_version": "null"}, proxies=self.proxies) session_id = response.cookies["PHPSESSID"] response = response.text.split("\r\n") for line in response: if line.startswith("{"): response = line data = json.loads(response) if "game_version" in data: # store the session ID as a by-product self.session_id = session_id return data['game_version'] else: print "API responded weirdly: %s" % data return None
def get_baits(self, refresh=False): util.tprint("[I] Getting baits...") items_url = "https://www.mousehuntgame.com/api/get/item/all" bait = [] if refresh or not os.path.exists(self.bait_json_file): objects = self.get_game_data(items_url) # filter by "classification": "bait" to get only the cheese for item in objects: if item['classification'] == 'bait': bait.append(item) open(self.bait_json_file, 'w').write(json.dumps(bait)) else: raw_json = open(self.bait_json_file, 'r').read() bait = json.loads(raw_json) return bait
def get_baits(self, refresh=False): util.tprint("[I] Getting baits...") items_url = "https://www.mousehuntgame.com/api/get/item/all" bait = [] if refresh or not os.path.exists(self.bait_json_file): objects = self.get_game_data(items_url) # filter by "classification": "bait" to get only the cheese for item in objects: if item['classification'] == 'bait': bait.append(item) open(self.bait_json_file, 'w').write(json.dumps(bait)) else: raw_json = open(self.bait_json_file, 'r').read() bait = json.loads(raw_json) return bait
def update_db(): """ Perform an update operation for all unupdated notices in the database """ logging.debug("called : %s", __name__) logging.info('Updating notices') notices = NoticeWrapper.get_unupdated() count = 0 for notice in notices: url = notice.url attach = (notice.num_attachments == 1) details = get_details_url(url, attach) NoticeWrapper.update(notice, details) count += 1 tprint("Updated notice dated {} titled {}.".format( notice.print_time, notice.title)) logging.info('Updated notice dated %s titled %s.', notice.print_time, notice.title) tprint("{} notices updated.".format(count)) logging.info('%d notices updated.', count)
def test(self): tprint('Beginning testing.') confusion_matrix = np.zeros((7, 7)).astype(np.int) last_print = time.time() for minibatch, targets in self.dataset.test: minibatch = Variable(torch.stack(minibatch), volatile=True) targets = Variable(torch.LongTensor(targets), volatile=True) if self.cuda: minibatch = minibatch.cuda() targets = targets.cuda() out = self.model.forward(minibatch) _, predicted = torch.max(out.data, 1) predicted = predicted.cpu().numpy() targets = targets.data.cpu().numpy() confusion_matrix += sklearn.metrics.confusion_matrix( predicted, targets, labels=[0, 1, 2, 3, 4, 5, 6]).astype(np.int) if time.time() - last_print > self.log_interval: last_print = time.time() numer, denom = self.dataset.test.progress() tprint('Testing: %s/%s' % (numer, denom)) tprint('Testing complete.') print(confusion_matrix) print(tabulate.tabulate(stats(confusion_matrix)))
def update(): logging.debug("called : %s", __name__) if not os.path.isdir(jsondir): logging.error("no directory named %s", jsondir) return clean_old(jsondir) filelist = os.listdir(jsondir) up_count = 0 tprint("Updating notices") logging.info("Updating notices") for f in filelist: if update_json(f): up_count += 1 tprint("\r{} Notices updated.".format(up_count)) if up_count == 0: tprint("0 Notices updated.") else: print '' logging.info("%d notice updated", up_count) return up_count
def handle_data(self, data): if data.strip() and not self._ignore: if self._isLink: data = data.replace(" ", "%20") tprint(data, end=" ") self._pool[-1][self._lastTag] = data
def print_catch(self, data): catch_string = "--- Mouse: %s, Gold: %d, Points: %d" % (data['catch']['mouse'], data['catch']['gold'], data['catch']['points']) if data['catch']['loot'] is not None and data['catch']['loot'] != "": catch_string = "%s, Loot: %s" % (catch_string, data['catch']['loot']) util.tprint(catch_string)
def prepare_emission_prob(self): """ prepares the emission probabilities matrix """ if self.cfg.similarity_fname and os.path.isfile(self.cfg.similarity_fname): tprint("loading similarity file: {}".format(self.cfg.similarity_fname)) emission_prob = np.load(self.cfg.similarity_fname) tprint("done") else: is_glove = not self.cfg.word_embed_fname[-3:] == 'bin' self.w2v, self.w2v_mean = read_pretrained_w2v(self.cfg.word_embed_fname, is_glove=is_glove) self.w2v_dim = self.w2v_mean.shape[0] tprint("w2v dimension: {}".format(self.w2v_dim)) self.article_all_sent_vecs = self.prepare_sent_vecs(self.article_sentences) if not self.cfg.transcript_word_level_mode: self.transcript_all_sent_vecs = self.prepare_sent_vecs(self.transcript_sents) emission_prob = np.zeros((self.n_article_sentences, self.n_observations)) tprint("preparing similarities for emission probabilities...") # prepare word vectors in case of word level mode if self.cfg.transcript_word_level_mode: word_vecs = [] for observation_i in range(self.n_observations): word = self.id2word[observation_i] if word in self.w2v: word_vec = self.w2v[word] else: # word_vec = self.w2v["<unk>"] word_vec = self.w2v_mean word_vecs.append(word_vec) for state_i in tqdm(range(self.n_article_sentences)): for observation_i in range(self.n_observations): if self.cfg.transcript_word_level_mode: emission_prob[state_i, observation_i] = self.word_sent_similarity( word_vecs[observation_i], self.article_all_sent_vecs[state_i]) else: if not self.cfg.wmd: emission_prob[state_i, observation_i] = self.sent_sent_similarity( self.transcript_all_sent_vecs[observation_i], self.article_all_sent_vecs[state_i]) else: emission_prob[state_i, observation_i] = -self.w2v.wmdistance( self.transcript_sents[observation_i], self.article_sentences[state_i]) if self.cfg.similarity_fname: # save to file np.save(self.cfg.similarity_fname, emission_prob) tprint("created file: {}".format(self.cfg.similarity_fname)) # manipulate the similarities and normalize for state_i in range(self.n_article_sentences): if self.cfg.wmd: emission_prob[state_i, :] -= np.max(emission_prob[state_i, :]) # this works better than applying a second softmax if self.cfg.emis_prob_subtruct_min_factor != 0: min_val = np.min(emission_prob[state_i, :]) emission_prob[state_i, :] -= self.cfg.emis_prob_subtruct_min_factor * min_val # normalize the similarities to obtain probabilities emission_prob[state_i, :] /= np.sum(emission_prob[state_i, :]) if not self.using_background: self.emission_prob = emission_prob else: word_dist = self.get_backg_distribution() # for all sentences, the word distribution is set to word_dist backg_emission_prob = np.tile(word_dist, (self.n_article_sentences, 1)) self.emission_prob = np.concatenate((emission_prob, backg_emission_prob))
def predict(self): """ runs the Viterbi algorithm to obtain a predicted sequence of hidden states, i.e. paper sentences """ tprint("predict...") if self.cfg.hmm_algo == HmmAlgo.VITERBI_0: predicted_path = viterbi(self.start_prob, self.transition_prob, self.emission_prob, self.observed_seq) elif self.cfg.hmm_algo == HmmAlgo.DUMMY: # for debugging - avoid waiting for prediction predicted_path = [20] * len(self.observed_seq) predicted_path[:3] = [10, 10, 12] else: raise Exception("unknown HMM algorithm") tprint("done") # if going backward is not allowed - validate it if not self.cfg.allow_backward_steps: for t in range(1, len(self.observed_seq)): assert (self.state2sent(predicted_path[t]) >= self.state2sent(predicted_path[t - 1])) log_prob = self.calc_log_prob(predicted_path, emission_prob_only=False) if self.using_background: foreg_pos = [self.state2backg(state_i) == 0 for state_i in predicted_path] backg_pos = [not bool_val for bool_val in foreg_pos] predicted_sents = list(compress(predicted_path, foreg_pos)) print('foreground count: {}'.format(len(predicted_path))) print('background count: {}'.format(sum(backg_pos))) else: predicted_sents = predicted_path unique_sent_indices = list(set(predicted_sents)) unique_sent_indices.sort() self.prepare_predicted_seq_info(predicted_path) print("predicted sequence summary:") for subseq_info in self.predicted_seq_info: sent_i = subseq_info[PredictedSeqInfoKey.SENT_I.value] backg = subseq_info[PredictedSeqInfoKey.BACKGROUND.value] if self.using_background: state_str = "({:4}, {})".format(sent_i, backg) else: state_str = "{:4}".format(sent_i) duration = subseq_info[PredictedSeqInfoKey.DURATION.value] print("{}: {:4}".format(state_str, duration)) if self.print_predicted_sentences: print("predicted sentences:") for sent_i in unique_sent_indices: print("sentence {}:".format(sent_i)) print(self.article_sentences[sent_i]) print("\nnum of predicted unique sentences: {}".format(len(unique_sent_indices))) return self.predicted_seq_info, log_prob
def exit_error(self, reason): print "Don't know what happened, see the reason below:" print reason util.tprint("Exitting...") exit(1)
def handle_endtag(self, tag): if tag == "item": self._ignore = True elif tag == "link": self._isLink = False tprint()
def play(self): while True: response = self.mh.hunt() if response.status == "error": error_delay = random.randint(50, 100) util.tprint("[E] Server replied wierdly (no JSON). Will retry once. Delay: %d. Raw response:" % error_delay) print(response.data) time.sleep(error_delay) response = self.mh.hunt() if response.status == "error": util.tprint("[E] No JSON in reply and already retried. Response:") print(raw_response) util.tprint("[E] Exiting...") exit(1) elif response.status == "login": util.tprint("[E] Session has expired. Reauthenticating...") self.access_token = self.mh.authenticate(True) # refreshing the expired access token # have a small delay before trying to sound again next_delay = random.randint(1, 10) elif response.status == "update": # game has been updated, we have to get the new version next_delay = self.get_next_delay(response.data['time_to_horn']) new_version_delay = random.randint(50, 100) util.tprint("[E] Game has been updated, have to get new game version. Sleeping for %s to simulate restarting the app..." % new_version_delay) time.sleep(new_version_delay) self.mh.refresh_game_data() util.tprint("[I] Game version updated. Will sound in: %d" % next_delay) elif not response.data['have_bait']: # we have to make a decision on how to proceed player_data = self.mh.get_player_data() util.exit_error("Out of bait. Exiting to avoid detection...") exit(1) elif response.status == "warn": # user hunted recently, we have to wait more next_delay = self.get_next_delay(response.data['time_to_horn']) util.tprint("[E] Hunted recently. Status: %s. Time until horn: %d, will sound in: %d" % (response.data['catch']['status'], response.data['time_to_horn'], next_delay)) if "catchsuccess" in response.data['catch']['status']: self.print_catch(response.data) elif response.status == "ok": # hunt should have been successful, set the delay for next time next_delay = self.get_next_delay(response.data['time_to_horn']) util.tprint("[I] Horn sounded. Status: %s. Will sound in: %d" % (response.data['catch']['status'], next_delay)) if "catchsuccess" in response.data['catch']['status']: self.print_catch(response.data) else: # we don't know what happened, better stop altogether util.exit_error(response) # 30% chance for user to check their journal after hunting if random.randint(0, 1000) < 300: check_delay = random.randint(1, 5) time.sleep(check_delay) player_data = self.mh.get_player_data() time.sleep(next_delay)
######parameters keep_prob = 1 learning_rate = 0.000001 batch_num = 128 max_len = None filter_size = 3 num_heads = 8 #for transformer hidden_dim = 300 #a dim reduction after highway network char_emb_dim = 8 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' ############################## tprint("start loading dataset") mnli = MultiNli( "glove.txt.gz", "./DIIN/data/multinli_0.9", max_len=max_len, batch=batch_num, train_epoch=10, dev_epoch=1, char_emb_dim=char_emb_dim, pad2=False, #all_printable_char=True, #trainfile="multinli_0.9_train_5000.jsonl", ) tprint("building graph") BST = time()