def pos_tag_stanford(text, output='str', input_file='temp.txt'): """ Part-of-speech tagging using the Stanford tagger. >>> pos_tag_stanford(u'ses beaux cheveux') u'ses_D beaux_JJ cheveux_NN ' >>> pos_tag_stanford(u'ses beaux cheveux', output='list') [u'ses_D', u'beaux_JJ', u'cheveux_NN'] >>> pos_tag_stanford(u'ses beaux cheveux', output='tuple') [(u'ses', u'D'), (u'beaux', u'JJ'), (u'cheveux', u'NN')] :param text: string or list of words :param output: the output format :param input_file: name of the file that stores the input for the tagger :return: string (default) with an underscore separating the word and the tag, list , or list of word-tag tuples """ if isinstance(text, list): text = ' '.join(text) helpers.write_to_file(text, input_file) script = '../apparatus/stanford-postagger.sh' model = '../stanford/models/french.tagger' tagger = subprocess.Popen([script, model, input_file], shell=False, stdout=subprocess.PIPE) tagged = tagger.communicate()[0] tagged = unicode(tagged, 'utf-8') tagged = tagged.replace('\n', ' ') tuples = helpers.strings_to_tuples(tagged, '_') tuples = correct_tags_stanford(tuples) if output == 'list': return helpers.tuples_to_strings(tuples, output='list') if output == 'tuple': return tuples return helpers.tuples_to_strings(tuples, output='str')
def create_title_page(self): title_with_tags = "<h1>" + self.title + "</h1>" author_with_tags = "<h2>" + self.author + "</h2>" title_page = title_with_tags + author_with_tags helpers.write_to_file(self.path, title_page)
def get_vector_representations(sess, model, data, save_dir, batch_size=100, max_batches=None, batches_in_epoch=1000, extension=".cell"): """ Given a trained model, gets a vector representation for the traces in batch @param sess is a tensorflow session @param model is the autoencoder model @param data is the data (in batch-major form and not padded or a list of files (depending on `in_memory`)) """ batches = helpers.get_batches(data, batch_size=batch_size) batches_in_data = len(data) // batch_size if max_batches is None or batches_in_data < max_batches: max_batches = batches_in_data - 1 try: for batch in range(max_batches): print("Batch {}/{}".format(batch, max_batches)) fd, paths = model.next_batch(batches, False) l = sess.run(model.encoder, fd) file_names = [helpers.extract_filename_from_path(path, extension) for path in paths] for file_name, features in zip(file_names, list(l)): helpers.write_to_file(features, save_dir, file_name, new_extension=".cellf") except KeyboardInterrupt: stdout.write('Interrupted') exit(0) return results
def compose_request(args, method, cmd): try: exec("response = client.{}(args, cmd)".format(method)) except Exception as e: exit_with_stderr(str(e)) if cmd == "login": data = { "EMAIL": response["data"]["email"], "AUTH_TOKEN": response["data"]["auth_token"] } write_to_file(".env", data) elif cmd == "list": for k, v in response["data"]["tosts"].iteritems(): sys.stdout.write(k + ": " + v + "\n") elif cmd == "view": access_token = response["data"]["tost"]["access-token"] body = response["data"]["tost"]["body"] exit_with_stdout(access_token + ": " + body) elif cmd == "access": for k, v in response["data"]["propagations"].iteritems(): sys.stdout.write(v["access-token"] + ": " + k + "\n") exit_with_stdout(response["msg"])
def pos_tag_stanford(text, input_file='temp.txt'): """ Part-of-speech tagging using the Stanford tagger. >>> pos_tag_stanford('O Helen fair! O Helen chaste!\\n\\ ... If I were with thee, I were blest.') [('O', 'UH'), ('Helen', 'NNP'), ('fair', 'JJ'), ('!', '.'), \ ('O', 'UH'), ('Helen', 'NNP'), ('chaste', 'JJ'), ('!', '.'), \ ('If', 'IN'), ('I', 'PRP'), ('were', 'VBDR'), ('with', 'IN'), \ ('thee', 'PRP'), (',', ','), ('I', 'PRP'), ('were', 'VBDR'), \ ('blest', 'VB'), ('.', '.')] :param text: string or list of words :param input_file: name of the file that stores the input for the tagger :return: list of word-tag tuples """ if isinstance(text, list): text = ' '.join(text) helpers.write_to_file(text, input_file) script = '../apparatus/stanford-postagger.sh' model = '../stanford/models/wsj-0-18-left3words-distsim.tagger' tagger = subprocess.Popen([script, model, input_file], shell=False, stdout=subprocess.PIPE) output = tagger.communicate()[0] output = output.replace('\n', ' ') tuples = helpers.strings_to_tuples(output) corrected = correct_tags(tuples) return corrected
def scrap_page(url, filename, path_to_save): try: print "Request to:", url url_response = urllib2.urlopen(url) except urllib2.HTTPError as e: print "Error scrapping:", e return url_html = url_response.read() h.write_to_file(url_html, filename, path_to_save) time.sleep(randint(5, 10))
def record(self): time.sleep(DUSTY_SENSOR_FREQUENCY_SECONDS) value = self.sensor.query() write_to_file( DUSTY_FILEPATH, value[0], value[1], ["pm2.5", "pm10"], ) print(value[0], value[1])
def parse_one_script(path, filename, output_path): all_html = h.open_file(filename, path) soup = BeautifulSoup(all_html, 'html.parser') try: script = soup.find_all("pre")[0] h.write_to_file(str(script), filename, output_path) return None except: print "Error: no <pre> in", filename return filename
def get_vector_representations(sess, model, data, save_dir, batch_size=100, max_batches=None, batches_in_epoch=1000, max_time_diff=float("inf"), extension=".cell"): """ Given a trained model, gets a vector representation for the traces in batch @param sess is a tensorflow session @param model is the seq2seq model @param data is the data (in batch-major form and not padded or a list of files (depending on `in_memory`)) """ batches = helpers.get_batches(data, batch_size=batch_size) batches_in_data = len(data) // batch_size if max_batches is None or batches_in_data < max_batches: max_batches = batches_in_data - 1 try: for batch in range(max_batches): print("Batch {}/{}".format(batch, max_batches)) fd, paths, _ = model.next_batch(batches, False, max_time_diff) l = sess.run(model.encoder_final_state, fd) # Returns a tuple, so we concatenate if isinstance(l, LSTMStateTuple): l = np.concatenate((l.c, l.h), axis=1) file_names = [ helpers.extract_filename_from_path(path, extension) for path in paths ] for file_name, features in zip(file_names, list(l)): helpers.write_to_file(features, save_dir, file_name, new_extension=".cellf") except KeyboardInterrupt: stdout.write('Interrupted') exit(0)
def rename_all_scripts(): movies = h.read_from_dataset("imdb_dataset_v7.1_6_actors_complete.tsv") scripts_no_tag_path = "clean_scripts/no_tags/" clean_scripts_path = "clean_scripts/finished_renamed/" script_names = os.listdir(scripts_no_tag_path) for script in script_names: temp = script.replace(".html","").replace("-", " ") if re.search(r'\b, The\b', temp): temp = temp.replace(",", "") temp = temp.partition(" The")[0] temp = "The " + temp for title in movies: jac = h.compute_jaccard_index(title.partition(" (")[0],temp) if jac >= 1: print jac, title, temp h.write_to_file(h.open_file(script, scripts_no_tag_path), title.replace("/","."), clean_scripts_path)
def record(self): p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=self.chunk) data = stream.read(self.chunk) rms_val = audioop.rms(data, 2) dbs_val = (max(20.0 * math.log(rms_val, 10), -60.0)) print(dbs_val) write_to_file( LOUD_FILEPATH, round(dbs_val, 2), round(rms_val, 2), ["db", "rms"], ) stream.close() p.terminate()
def record(self): try: while True: time.sleep(1) lines = (self.sock.recv(1024).decode('utf-8').split('\n')) for line in lines: if line.startswith('$GPGGA'): msg = pynmea2.parse(line) print(msg.latitude, msg.longitude) write_to_file( WHERE_FILEPATH, msg.latitude, msg.longitude, cols=["latitude", "longitude"], ) except: raise finally: self.sock.close()
def pos_tag_melt(text, output='str', input_file='temp.txt'): """ Part-of-speech tagging using the MElt tagger. >>> pos_tag_melt(u'ses beaux cheveux') u'ses_DET beaux_JJ cheveux_NN ' >>> pos_tag_melt(u'ses beaux cheveux', output='list') [u'ses_DET', u'beaux_JJ', u'cheveux_NN'] >>> pos_tag_melt(u'ses beaux cheveux', output='tuple') [(u'ses', u'DET'), (u'beaux', u'JJ'), (u'cheveux', u'NN')] :param text: string or list of words :param output: the output format :param input_file: name of the file that stores the input for the tagger :return: string (default) with an underscore separating the word and the tag, list , or list of word-tag tuples """ if isinstance(text, list): text = ' '.join(text) helpers.write_to_file(text, input_file) cat = subprocess.Popen(['cat', input_file], shell=False, stdout=subprocess.PIPE) melt = subprocess.Popen\ (['MElt'], shell=False, stdin=cat.stdout, stdout=subprocess.PIPE) tagged = melt.communicate()[0] tagged = unicode(tagged, 'utf-8') tuples = helpers.strings_to_tuples(tagged, '/') tuples = correct_tags_melt(tuples) if output == 'list': return helpers.tuples_to_strings(tuples, output='list') if output == 'tuple': return tuples return helpers.tuples_to_strings(tuples, output='str')
def create_content_page(self, current_page, chapter_title, body): current_page_formatted = "<br><br><b>" + str( current_page) + "</b><br><br>" helpers.write_to_file(self.path, current_page_formatted) if chapter_title != "None": chapter_title_formatted = "<h3>" + chapter_title + "</h3><br><br>" helpers.write_to_file(self.path, chapter_title_formatted) if body != "None": body_formatted = body.replace('\n', "<br>") helpers.write_to_file(self.path, body_formatted)
def get_result(model, search_word, language, forecast_days, access): print('Getting results for ', search_word) data = get_daily_views(search_word, language, access) if model == 'ARIMA': prediction = arima_forecast(data, forecast_days) elif model == 'LSTM': prediction = lstm_forecast(data, forecast_days) data = data[past:] elif model == 'CNN': prediction = cnn_forecast(data, forecast_days) else: raise Exception("Invalid model name") loss = get_loss(data, prediction, model, past) plot_title = model + ' forecast for ' + search_word if model == 'CNN': image_path = plot_graph(prediction, data, labels=['Predicted Views', 'Original Views'], colors=['#ff7f0e', '#1f77b4'], title=plot_title) else: image_path = plot_graph(data, prediction, labels=['Original Views', 'Predicted Views'], title=plot_title) prediction_file_path = write_to_file(str(prediction)) temp_dict = { 'predictionLink': prediction_file_path, 'loss': loss, 'graphImageSource': image_path } return json.dumps(temp_dict)
def bind_book(self, url): soup = getters.get_soup(url) html_open = f""" <html> <head> <title>{self.title} - {self.author}</title> <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.0/css/bootstrap.min.css"> </head> <body> <div class="container-fluid"> <div class="row"> <div class="col-sm-3"> </div> <div class="col-sm-6"> <center> """ html_close = """ </center> </div> <div class="col-sm-6"> </div> </div> </div> </body> </html> """ #write initial html helpers.write_to_file(self.path, html_open) #write the title page self.create_title_page() current_page = 1 page_url = url + "/viewer?page=" + str(current_page) #page 1 soup = getters.get_soup(page_url) max_pages = getters.get_max_page_number(soup) #progress bar progress_bar = tqdm(total=max_pages) #traverse through all pages while (current_page <= max_pages): page_url = url + "/viewer?page=" + str(current_page) soup = getters.get_soup(page_url) chapter_title = getters.get_chapter(soup) body = getters.get_text_body(soup) self.create_content_page(current_page, chapter_title, body) current_page += 1 #update progress bar progress_bar.update(1) time.sleep(1) progress_bar.close() helpers.write_to_file(self.path, html_close)
def clean_one_script_from_tags(path, filename, output_path): h.write_to_file(ts.strip_tags(h.open_file(filename, path)), filename, output_path)
def simple_rl(previousWeights={}): env = gym.make('SuperMarioBros-1-1-v0') eta = 1.0 / math.sqrt(NUM_ITERS) discount = .95 if previousWeights == {}: weights = defaultdict(float) else: weights = previousWeights actions = { #0: [0, 0, 0, 0, 0, 0], # Nothing 1: [1, 0, 0, 0, 0, 0], # Up 2: [0, 0, 1, 0, 0, 0], # Down 3: [0, 1, 0, 0, 0, 0], # Left #4: [0, 1, 0, 0, 1, 0], # Left + A #5: [0, 1, 0, 0, 0, 1], # Left + B #6: [0, 1, 0, 0, 1, 1], # Left + A + B 7: [0, 0, 0, 1, 0, 0], # Right 8: [0, 0, 0, 1, 1, 0], # Right + A 9: [0, 0, 0, 1, 0, 1], # Right + B 10: [0, 0, 0, 1, 1, 1], # Right + A + B 11: [0, 0, 0, 0, 1, 0], # A 12: [0, 0, 0, 0, 0, 1], # B 13: [0, 0, 0, 0, 1, 1], # A + B 14: [1, 0, 0, 0, 1, 0], # Up + A } def random_action(): return random.sample(actions.keys(), 1)[0] def featureExtractor(state, action): features = [] featureKey = (state, action) features.append((featureKey, 1)) for i, button in enumerate(actions[action]): buttonPressed = [0] * len(actions[action]) buttonPressed[i] = button features.append( ((state, " ".join(str(x) for x in buttonPressed)), 1)) marioIndex = state.find('3') if marioIndex == -1: return features if state[marioIndex + 2] == '1' or state[marioIndex - 2] == '1': features.append(('nextToWall', 1)) else: features.append(('nextToWall', 0)) if state[marioIndex + 2] == '2' or state[marioIndex - 2] == '2': features.append(('nextToGoomba', 1)) else: features.append(('nextToGoomba', 0)) return features def getQ(state, action): score = 0 for f, v in featureExtractor(state, action): score += weights[f] * v return score def getAction(state): if random.random() < EXPLORATION_PROB: return random_action() else: return max( (getQ(state, action), action) for action in actions.keys())[1] def generate_reward(oldInfo, newInfo, oldAction, newAction, farthestTraveled): reward = 0 if oldInfo == {} or not oldAction: return reward timeDelta = 1 / (newInfo['time'] + 1) distanceDelta = newInfo['distance'] - oldInfo['distance'] if distanceDelta > 0: if newInfo['distance'] > farthestTraveled: reward += distanceDelta else: reward += distanceDelta * 0.1 - timeDelta else: reward += distanceDelta - timeDelta scoreDelta = newInfo['score'] - oldInfo['score'] reward += scoreDelta if oldAction == newAction: reward *= 2 if action == 0: reward -= timeDelta return reward def incorporateFeedback(state, action, reward, newState): vOpt = max([getQ(newState, aPrime) for aPrime in actions.keys()]) if len(newState) > 0 else 0 qOpt = getQ(state, action) scale = eta * (qOpt - (reward + (discount * vOpt))) for f, v in featureExtractor(state, action): weights[f] -= scale * v for episode in range(1, NUM_ITERS): env.lock.acquire() currentState = str(env.reset()) env.lock.release() succ, oldAction, done, gameOver = None, None, False, False totalReward, reward, previousAction, bestDistance = 0, 0, 0, 0 oldInfo, info = {}, {} farthestTraveled = 0 for i in range(1, 100000): env.render() if oldInfo: farthestTraveled = max(farthestTraveled, oldInfo['distance']) action = getAction(currentState) ''' while action == None: action = getAction(currentState) print("WE HAVE A NONE ACTION!!!!") print(qTable[str(s)][previousAction]) ''' succ, ogReward, done, info = env.step(actions[action]) reward = generate_reward(oldInfo, info, oldAction, action, farthestTraveled) succState = str(succ) print("OgReward: {}".format(ogReward)) print("OurReward: {}".format(reward)) print(currentState) print(i, actions[action], reward) incorporateFeedback(currentState, action, reward, succState) if info['life'] == 0: reward = -50 gameOver = True if done: reward = 2000 totalReward += reward if info['distance'] > bestDistance: bestDistance = info['distance'] if gameOver or done: break currentState = succState oldInfo = copy.deepcopy(info) oldAction = action previousAction = action print("Episode: {} \t Reward: {} \t Distance: {}".format( episode, totalReward, bestDistance)) with open('rewards_funcApprox.txt', 'a') as f: f.write(str([episode, totalReward, bestDistance])) f.write("\n") f.close() if episode % 5 == 0: helpers.write_to_file("weights_funcApprox.pickle", weights, True) if episode % 15 == 0: with open('weights_funcApprox.txt', 'a') as f: f.write(str([episode, weights])) f.write("\n") f.close() env.lock.acquire() env.close() env.lock.release() helpers.killFCEUX() os._exit(0)
def simple_rl(weights={}): env = gym.make('SuperMarioBros-1-1-v0') qTable = weights actions = { 0: [0, 0, 0, 0, 0, 0], # Nothing 1: [1, 0, 0, 0, 0, 0], # Up 2: [0, 0, 1, 0, 0, 0], # Down 3: [0, 1, 0, 0, 0, 0], # Left 4: [0, 1, 0, 0, 1, 0], # Left + A 5: [0, 1, 0, 0, 0, 1], # Left + B 6: [0, 1, 0, 0, 1, 1], # Left + A + B 7: [0, 0, 0, 1, 0, 0], # Right 8: [0, 0, 0, 1, 1, 0], # Right + A 9: [0, 0, 0, 1, 0, 1], # Right + B 10: [0, 0, 0, 1, 1, 1], # Right + A + B 11: [0, 0, 0, 0, 1, 0], # A 12: [0, 0, 0, 0, 0, 1], # B 13: [0, 0, 0, 0, 1, 1], # A + B } def random_action(): return random.sample(actions.keys(), 1)[0] def get_best_action(state, explorationProb): state = str(state) if state not in qTable: action = random_action() qTable[state] = {} qTable[state][action] = 0 return (0, action) else: maxAction = (float("-inf"), None) if random.random() < explorationProb: action = random_action() if action not in qTable[str(state)]: qTable[state][action] = 0 maxAction = (0, action) else: for action, score in qTable[state].items(): if score >= maxAction[0]: maxAction = (score, action) return maxAction def generate_reward(state, oldInfo, newInfo): if oldInfo == {}: return 0 distanceDelta = newInfo['distance'] - oldInfo['distance'] scoreDelta = newInfo['score'] - oldInfo['score'] #timeDelta = 1/(401 - newInfo['time']) print("DistanceDelta: {} ScoreDelta: {}".format( distanceDelta, scoreDelta)) return distanceDelta + scoreDelta for episode in range(89, 500): env.lock.acquire() s = env.reset() env.lock.release() done = False totalReward, reward = 0, 0 bestDistance = 0 oldInfo = {} n = 0.618 for i in range(1, 100000): env.render() action = get_best_action(s, 0.2)[1] while action == None: action = get_best_action(s, 0.2)[1] print("WE HAVE A NONE ACTION!!!!") print(qTable[str(s)]) succ, ogReward, done, info = env.step(actions[action]) reward = generate_reward(succ, oldInfo, info) print("OgReward: {}".format(ogReward)) print("OurReward: {}".format(reward)) if info['life'] == 0: reward = -10 if done: reward = 20 print(s) print(i, actions[action], reward) oldVal = qTable[str(s)][action] qTable[str(s)][action] += n * ( reward + get_best_action(succ, 0.0)[0] - oldVal) totalReward += reward if info['distance'] > bestDistance: bestDistance = info['distance'] if reward == -10: break s = succ oldInfo = copy.deepcopy(info) print("Episode: {} \t Reward: {} \t Distance: {}".format( episode, totalReward, bestDistance)) with open('rewards.txt', 'a') as f: f.write(str([episode, totalReward, bestDistance])) f.write("\n") f.close() if episode % 5 == 0: helpers.write_to_file("weights.pickle", qTable, True) env.lock.acquire() env.close() env.lock.release() helpers.killFCEUX() os._exit(0)
def simple_rl(weights={}): env = gym.make('SuperMarioBros-1-1-v0') qTable = weights actionDict = read_in_data('action_space.pickle') def get_best_action(state, explorationProb): state = str(state) if state not in qTable: action = env.action_space.sample() actionDict[str(action)] = action qTable[state] = {} qTable[state][str(action)] = 0 return (0, action) else: maxAction = (float("-inf"), None) for action, score in qTable[state].items(): if score >= maxAction[0]: maxAction = (score, actionDict[action]) randAction = env.action_space.sample() if random.random() < explorationProb: if str(randAction) not in qTable[str(state)]: actionDict[str(randAction)] = randAction qTable[state][str(randAction)] = 0 return (0, randAction) return maxAction def generate_reward(state, oldInfo, newInfo): if newInfo['life'] == 0: return float("-inf") if oldInfo == {}: return 0 distanceDelta = newInfo['distance'] - oldInfo['distance'] scoreDelta = newInfo['score'] - oldInfo['score'] #timeDelta = 1/(401 - newInfo['time']) print("DistanceDelta: {} ScoreDelta: {}".format(distanceDelta, scoreDelta)) return distanceDelta + scoreDelta alpha = 0.618 for episode in range(1, 101): env.lock.acquire() s = env.reset() env.lock.release() done = False totalReward, reward = 0, 0 bestDistance = 0 oldInfo = {} for i in range(0, 100000): env.render() action = get_best_action(s, 0.2)[1] while action == None: action = get_best_action(s, 0.2)[1] print("WE HAVE A NONE ACTION!!!!") print(qTable[str(s)]) succ, ogReward, done, info = env.step(action) reward = generate_reward(succ, oldInfo, info) print("OgReward: {}".format(ogReward)) print("OurReward: {}".format(reward)) if info['life'] == 0: reward = float("-inf") if done: reward = float("inf") print(s) print(i, action, reward) oldVal = qTable[str(s)][str(action)] qTable[str(s)][str(action)] += alpha * (reward + get_best_action(succ, 0.0)[0] - oldVal) if info['distance'] > bestDistance: bestDistance = info['distance'] if reward == float("-inf"): break s = succ totalReward += reward oldInfo = copy.deepcopy(info) print("Episode: {} \t Reward: {} \t Distance: {}".format(episode, reward, bestDistance)) with open('rewards.txt', 'a') as f: f.write(str([episode, totalReward, bestDistance])) f.write("\n") f.close() env.lock.acquire() env.close() env.lock.release() killFCEUX() if episode % 5 == 0: helpers.write_to_file("weights.pickle", qTable, True) os._exit(0)