def readInput(): global optV first_line = raw_input() first_line_list = first_line.split() n_group = int(first_line_list[0]) n_actor = int(first_line_list[1]) n_character = int(first_line_list[2]) groups_list_id = [i for i in range(1, n_group + 1)] actors_list_id = [i for i in range(1, n_actor + 1)] actors = [] try: for i in range(n_actor): actor_line = raw_input() actor_line_list = actor_line.split() cost = int(actor_line_list[0]) groups = int(actor_line_list[1]) actor_groups_list = [] for j in range(groups): group = int(raw_input()) actor_groups_list.append(group) actor = classes.Actor(i + 1, cost, actor_groups_list) actors.append(actor) optV += cost except: return [], [], 0, [], 0 return groups_list_id, actors_list_id, n_character, actors, n_group
#root.mainline['9'] = c.Animal() #root.mainline['10'] = c.Armour() #root.mainline['11'] = c.Weapon() #root.mainline['12'] = c.Gear() #root.mainline['13'] = c.Food() #root.mainline['14'] = c.Magic() #root.mainline['15'] = c.Spell() #root.mainline['16'] = c.Skill() #root.mainline['17'] = c.Quest() #root.mainline['18'] = c.Encounter() #root.mainline['19'] = c.Location() #root.mainline['20'] = c.Building() #root.mainline['21'] = c.Lodging() root.structure['1'] = c.Root() # structural model root.structure['2'] = c.Actor() root.structure['3'] = c.Item() root.structure['4'] = c.Action() root.structure['5'] = c.Place() root.structure['6'] = c.Player() root.structure['7'] = c.NonPlayer() root.structure['8'] = c.Monster() root.structure['9'] = c.Animal() root.structure['10'] = c.Armour() root.structure['11'] = c.Weapon() root.structure['12'] = c.Gear() root.structure['13'] = c.Food() root.structure['14'] = c.Magic() root.structure['15'] = c.Spell() root.structure['16'] = c.Skill() root.structure['17'] = c.Quest()
import classes import pymongo import util import operator mongo = pymongo.MongoClient('localhost', 27017) db = mongo.Graph Actors = db.Actors.find({}, {"_id": 0}) Vertexs = [] for item in Actors: Vertexs.append(classes.Actor(item)) Characters = db.Characters.find({}, {"_id": 0}) for item in Characters: Vertexs.append(classes.Character(item)) Films = db.Films.find({}, {"_id": 0}) for item in Films: Vertexs.append(classes.Film(item)) graph = classes.Graph(Vertexs) f = open('csv_sources/filmsXactors.csv', 'rb') filmsXactors = util.format_csv(f) for item in filmsXactors: for j in range(1, len(item)): graph.addEgde(graph.getVertex('name', item[0]), graph.getVertex('name', item[j])) f = open('csv_sources/filmsXcharacters.csv', 'rb')
def run(args): """""" """""" """""" """""" """""" """ Set up """ """""" """""" """""" """""" """""" # topic = "Georgetown_University" topic = args.topic actor_lr = args.actor_LR critic_lr = args.critic_LR episodes = args.eps topic = "Georgetown_University" actor_lr = 0.0001 critic_lr = 0.001 episodes = 10 buffer = classes.ReplayBuffer() wiki_wiki = wikipediaapi.Wikipedia('en') rs = relation_standardizer.Relation_Standardizer("Bob") # TODO: Future add code to let a person choose another topic G_augment, cleaned_tuples, topic_graph, encoded_tuples = load_GU_Data() page_list = GU_pages() buffer.relations = encoded_tuples individual_pages, Topic_Dict = get_pages(page_list, wiki_wiki) n_features = 768 # Default used in BERT n_output = 25 # Num possilbe relations. Currently set to 25 actor_H1 = 768 # Num of hidden units in first layer of actor actor_H2 = 768 # Num of hidden units in second layer of actor critic_H1 = 768 # Num of hidden units in first layer of critic critic_H2 = 768 # Num of hidden units in second layer of critic # TensorFlow Setup and Initialization tf.reset_default_graph() actor = classes.Actor(n_features, n_output, actor_lr, actor_H1, actor_H2) critic = classes.Critic(n_features, critic_lr, critic_H1, critic_H2) sess = tf.Session() sess.run(tf.global_variables_initializer()) # BERT Setup print( "In a terminal window Python Environment can access, run the following:\n" + "bert-serving-start -model_dir ~/Final_Proj/BERT-Data/ -num_worker=2\n\nPress Enter when done." ) x = input() from bert_serving.client import BertClient bc = BertClient() # Core-NLP Setup print( "In a terminal window run the following:\ncd ~/Final_Proj/Stan-Core-NLP; java -mx6g -cp \"*\" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000\n\nPress Enter when done." ) x = input() nlp = StanfordCoreNLP('http://localhost:9000') current_node = topic """""" """""" """""" """""" """""" """ Running Episodes """ """""" """""" """""" """""" """""" for episode in range(episodes): relations = [] probs = [] chosen = [] rewards = [] states = [] pred_rewards = [] td_error = [] # Run the Training Routine for the Critic training_sample = buffer.sample(20) # sample = training_sample[0] # Use the Actor to determine the predicted relation for a state for sample in training_sample: relations.append(rs.relation_to_int(sample[0])) states.append(sample[1]) probs.append( sess.run(actor.layer3, feed_dict={actor.observation: sample[1]})) # Formatting the probabilities to make them easier to use for prob in probs: prob_list = prob[0].tolist() chosen.append(prob_list.index(max(prob_list))) # Determine reward from the environment for actual, pred in zip(relations, chosen): if actual == pred: rewards.append(1.0) else: rewards.append(0.0) # Training the Critic loss, _ = sess.run( [critic.loss, critic.train], feed_dict={ critic.observation: np.reshape(states, (-1, 768)), critic.reward: np.reshape(rewards, (-1, 1)) }) print("Training loss for critic is: " + str(loss)) ###### # Exploration code ###### # Run the links available for the current node through the critic to get lowest mean # The std is also included if we want to include a LCB version later. node_predictions = determine_node_knowledge(current_node, G_augment, sess, critic) # Filter out nan entries & sort filtered = [x for x in node_predictions if not math.isnan(x[1])] filtered.sort(key=lambda x: x[1]) # Determine the next node to go to for node in filtered: if node[0] not in individual_pages: current_node = node[0] individual_pages.append(current_node) break # Explore page clean_tuples, encodes = explore_new_page(current_node, bc, nlp, wiki_wiki, G_augment) # Add encoded tuples to the replay buffer buffer.relations += encodes relations = [] states = [] chosen = [] probs = [] questions = [] # Gather info for training the Actor for encode in encodes: relations.append(rs.relation_to_int(encode[0])) states.append(encode[1]) probs.append( sess.run(actor.layer3, feed_dict={actor.observation: encode[1]})) # Predict the rewards for the new relations from the page. # Runs it through the critic and then flattens it. pred_rewards = sess.run( critic.layer3, feed_dict={critic.observation: np.reshape(states, (-1, 768))}) pred_rewards = [item for sublist in pred_rewards for item in sublist] ''' # MOVE CRITIC PRED HERE and DETERMINE pred_rewards = sess.run(critic.layer3, feed_dict = {critic.observation: np.reshape(states,(-1, 768))}) pred_rewards = [item for sublist in pred_rewards for item in sublist] ''' ''' # Determine reward from the environment for actual, pred in zip(relations, chosen): if actual == pred: rewards.append(1.0) else: rewards.append(0.0) ''' # td_error = [(p - r) for p, r in zip(pred_rewards, rewards)] s = states[1] r = relations[1] p = pred_rewards[1] # Train the Actor on the downloaded items. for s, r, p, clean in zip(states, relations, pred_rewards, clean_tuples): # print(str(s) + " " + str(r) + " " + str(p)) actor_prob = sess.run(actor.layer3, feed_dict={actor.observation: s}) # print (actor_prob) actor_prob = actor_prob[0].tolist() chosen = actor_prob.index(max(actor_prob)) # I need to come back and make sure this is correct reward = -p if chosen == r: reward = 1 + reward else: questions.append("Actual: " + clean[0] + " | " + clean[1] + " | " + clean[2] + "\nPredicted: " + clean[0] + " | " + str(rs.int_to_relation(chosen)) + " | " + clean[2]) loss, log_prob, _ = sess.run( [actor.loss, actor.log_probability, actor.layer3], feed_dict={ actor.observation: s, actor.td_error: reward, actor.relation: r })
def run(args): """""" """""" """""" """""" """""" """ Set up """ """""" """""" """""" """""" """""" # topic = "Georgetown_University" topic = args.topic actor_lr = args.actor_LR critic_lr = args.critic_LR episodes = args.eps load_sess = args.load_sess sess_name = args.sess_name topic = "Georgetown_University" actor_lr = 0.0001 critic_lr = 0.0005 episodes = 227 buffer = classes.ReplayBuffer() wiki_wiki = wikipediaapi.Wikipedia('en') rs = relation_standardizer.Relation_Standardizer("Bob") # TODO: Future add code to let a person choose another topic G_augment, cleaned_tuples, topic_graph, encoded_tuples = load_GU_Data() page_list = GU_pages() buffer.relations = encoded_tuples individual_pages, Topic_Dict = get_pages(page_list, wiki_wiki) n_features = 768 # Default used in BERT n_output = 25 # Num possilbe relations. Currently set to 25 actor_H1 = 200 # Num of hidden units in first layer of actor actor_H2 = 200 # Num of hidden units in second layer of actor critic_H1 = 200 # Num of hidden units in first layer of critic critic_H2 = 200 # Num of hidden units in second layer of critic # TensorFlow Setup and Initialization tf.reset_default_graph() actor_tgt = classes.Actor(n_features, n_output, actor_lr, actor_H1, actor_H2, "target") actor_av = classes.Actor(n_features, n_output, actor_lr, actor_H1, actor_H2, "value") transfer_weights = [ tf.assign(tgt, av) for (tgt, av) in zip( tf.trainable_variables('target'), tf.trainable_variables('value')) ] critic = classes.Critic(n_features, critic_lr, critic_H1, critic_H2) sess = tf.Session() sess.run(tf.global_variables_initializer()) # BERT Setup print( "In a terminal window Python Environment can access, run the following:\n" + "bert-serving-start -model_dir ~/Final_Proj/BERT-Data/ -num_worker=2\n\nPress Enter when done." ) x = input() from bert_serving.client import BertClient bc = BertClient() # Core-NLP Setup print( "In a terminal window run the following:\ncd ~/Final_Proj/Stan-Core-NLP; java -mx6g -cp \"*\" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000\n\nPress Enter when done." ) x = input() nlp = StanfordCoreNLP('http://localhost:9000') current_node = topic #Note: Ability to train on other topics is not currently implemented. if (load_sess): load_session(sess_name, sess) else: train_on_topic(buffer, actor_tgt, actor_av, critic, sess, rs, encoded_tuples) # Setup for performance tracking guesses = [] relation_accuracy = [] questions = [] transfer_index = 0 actor_loss = 0 """""" """""" """""" """""" """""" """ Running Episodes """ """""" """""" """""" """""" """""" for episode in range(episodes): # Code to transfer weights every ~1K guesses if math.floor(len(guesses) / 1000) > transfer_index: sess.run(transfer_weights) transfer_index += 1 print("Weights Transfered") # Code to keep track of stuff during episode relations = [] probs = [] chosen = [] rewards = [] states = [] pred_rewards = [] # Run the Training Routine for the Critic training_sample = buffer.sample(100) # Use the Actor to determine the predicted relation for a state for sample in training_sample: relations.append(rs.relation_to_int(sample[0])) states.append(sample[1]) probs.append( sess.run(actor_tgt.layer3, feed_dict={actor_tgt.observation: sample[1]})) # Formatting the probabilities to make them easier to use for prob in probs: prob_list = prob[0].tolist() chosen.append(prob_list.index(max(prob_list))) # Determine reward from the environment for actual, pred in zip(relations, chosen): if actual == pred: rewards.append(1.0) else: rewards.append(0.0) # Training the Critic critic_loss, _ = sess.run( [critic.loss, critic.train], feed_dict={ critic.observation: np.reshape(states, (-1, 768)), critic.reward: np.reshape(rewards, (-1, 1)) }) if episode > 1: print("Training loss for critic is: " + str(critic_loss) + " Training loss for actor is: " + str(actor_loss)) ###### # Exploration code ###### # Run the links available for the current node through the critic to get lowest mean # The std is also included if we want to include a LCB version later. node_predictions = determine_node_knowledge(current_node, G_augment, sess, critic) # Filter out nan entries & sort filtered = [x for x in node_predictions if not math.isnan(x[1])] filtered.sort(key=lambda x: x[1]) # Determine the next node to go to for node in filtered: if node[0] not in individual_pages: current_node = node[0] individual_pages.append(current_node) break # Explore page clean_tuples, encodes = explore_new_page(current_node, bc, nlp, wiki_wiki, G_augment, rs) # Add encoded tuples to the replay buffer buffer.relations += encodes # To hold the information for training the Actor relations = [] states = [] chosen = [] probs = [] # Drawing a new random sample from the buffer. # This trains the agen 1/2 on new data and 1/2 on old data from_buffer = buffer.sample(len(encodes)) encodes += from_buffer # Gather info for training the Actor for encode in encodes: relations.append(rs.relation_to_int(encode[0])) states.append(encode[1]) probs.append( sess.run(actor_tgt.layer3, feed_dict={actor_tgt.observation: encode[1]})) # Predict the rewards for the new relations from the page. # Runs it through the critic and then flattens it. pred_rewards = sess.run( critic.layer3, feed_dict={critic.observation: np.reshape(states, (-1, 768))}) pred_rewards = [item for sublist in pred_rewards for item in sublist] # Train the Actor on the downloaded items. for s, r, p, clean in zip(states, relations, pred_rewards, clean_tuples): actor_prob = sess.run(actor_tgt.layer3, feed_dict={actor_tgt.observation: s}) actor_prob = actor_prob[0].tolist() chosen = actor_prob.index(max(actor_prob)) # Record the actual relation extracted from data and chosen # Use it later to build a table for accuracy by relation relation_accuracy.append([r, chosen]) """""" """""" """""" """""" """""" """""" """"" Calculation of TD error. Formula for TD error is normally: R + gamma * (v_t+1) - v(t) We can't really guess the next state with any accuracy so we're using a Contextual Bandit Actor Critic model. This means the TD error we're going to use is based on: R - v(t) where R = 1 turn reward and v(t) is a one turn expected value. Because our reward structure = 1 if correct 0, otherwise, we're settup up the -v(t) first, and then adding 1 if guess is correct. """ """""" """""" """""" """""" """""" """""" "" reward = -p if chosen == r: reward = 1 + reward guesses.append(1) else: questions.append("Actual: " + clean[0] + " | " + clean[1] + " | " + clean[2] + "\nPredicted: " + clean[0] + " | " + str(rs.int_to_relation(chosen)) + " | " + clean[2]) guesses.append(0) actor_loss, log_prob, _ = sess.run( [actor_av.loss, actor_av.log_probability, actor_av.train], feed_dict={ actor_av.observation: s, actor_av.td_error: reward, actor_av.relation: r }) # print(log_prob) # print(questions) print("Have explored " + str(episode + 1) + " new pages and made " + str(len(guesses)) + " guesses. Accuracy (last 200 predictions) was: " + "{:.2%}".format(sum(guesses[-200:]) / min(200, len(guesses)))) save_session(sess_name, sess) """""" """""" """""" """""" """""" """ Performance Graphing """ """""" """""" """""" """""" """""" # Graph for Training on Georgetown Data means = [] for i in range(3000, len(guesses), 3000): means.append(np.mean(guesses[i - 3000:i - 1])) ticks = list(range(3000, len(guesses), 3000)) plt.plot(ticks, means) plt.ylabel('Average Score (Over 3000 Guesses)') plt.xlabel('Guesses') plt.title("Agent Training on New Pages") plt.savefig('Accuracy_New_Pages_2.png') # List of Guesses by Actual Relation first = list(range(25)) y_true, y_pred = [], [] for i in relation_accuracy: y_true.append(i[0]) y_pred.append(i[1]) target_relations = [] for i in first: target_relations.append(rs.int_to_relation(i)) print( classification_report(y_true, y_pred, target_names=target_relations, labels=first)) y_true, y_pred = [], [] for i in relation_accuracy[-50000:]: y_true.append(i[0]) y_pred.append(i[1]) target_relations = [] for i in first: target_relations.append(rs.int_to_relation(i)) print( classification_report(y_true, y_pred, target_names=target_relations, labels=first))