def __init__(self, world, body, pos=(0, 0), brain=None): """ Shouldnt be used directly, use world.create_entity instead """ self.id = Entity.NEXT_ID Entity.NEXT_ID += 1 self.alive = True self.colour = ENTITY_DEFAULT_COLOUR self.world = world self.body = body self.pos = pos self.velocity = b2Vec2() self.sensors = [] self._add_sensor(0.5, 0.25, EntityType.FOOD_SENSOR) # 90 degrees on left self.total_food_eaten = 0 self.cumulative_food = 0 if brain is None: self.brain = net.Network(NET_LAYERS) else: self.brain = brain
def get_network(self): for item in self.data: if 'latitude' in item: lat = float(item['latitude']) else: continue if 'longitude' in item: lon = float(item['longitude']) else: continue if 'altitude' in item: alt = float(item['altitude']) else: alt = 10.0 if 'radius' in item: rad = int(item['radius'], 10) else: rad = 100 self.ap_list.append(ap.AP(lat, lon, alt, rad)) self.ap_list[-1].get_cover() self.net = net.Network(self.ap_list)
mcp[i].data[:] = mp[i].data[:] if __name__ == "__main__": DIR = args.DIR embedding_file = args.embedding_dir best_network_file = "./model/network_model_pretrain.best.top" print >> sys.stderr, "Read model from ", best_network_file best_network_model = torch.load(best_network_file) embedding_matrix = numpy.load(embedding_file) "Building torch model" worker = network.Network( nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"], nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000, nnargs["embedding_size"], nnargs["embedding_dimention"], embedding_matrix).cuda() net_copy(worker, best_network_model) best_network_file = "./model/network_model_pretrain.best.top" print >> sys.stderr, "Read model from ", best_network_file best_network_model = torch.load(best_network_file) manager = network.Network( nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"], nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000, nnargs["embedding_size"], nnargs["embedding_dimention"], embedding_matrix).cuda() net_copy(manager, best_network_model)
def _clone_with_zero_pots(bn, psi=1): network_type = str(bn.type) var_sizes = list(bn.var_sizes) pot_domains = [list(domain) for domain in bn.pot_domains] pots = [[psi - 1] * len(pot) for pot in bn.pots] return net.Network(network_type, var_sizes, pot_domains, pots)
import net from pong import game import time import random as r import numpy as np import matplotlib.pyplot as plt winner_genome_a = net.Network([5, 3, 1], weights_path=f"Saves_little_04/best.txt") winner_genome_b = net.Network([5, 3, 1], weights_path=f"Saves_little_04/best.txt") input('START GAME') init=True while True: if init: up_a, down_a, up_b, down_b = False, False, False, False dx_a, dy_a, dx_b, dy_b, ball_y, ball_dx, ball_dy = game(up_a, down_a, up_b, down_b) up_a = winner_genome_a.forward_propagation([dx_a, dy_a, ball_y, ball_dx, ball_dy])>0.5 down_a = not up_a up_b = winner_genome_b.forward_propagation([-dx_b, dy_b, ball_y, -ball_dx, ball_dy])>0.5 down_b = not up_b init = False
import net from pong_copy import game import time import random as r import numpy as np import matplotlib.pyplot as plt popolation = [] GENOME_SIZE = 50 for i in range(GENOME_SIZE): popolation.append( net.Network([5, 3, 1], weights_path=f"Saves_little_02/save_{i}.txt")) def log_change(best_five): vectors = [] for i, j in zip(np.logspace(-5, -1, 4), range(len(best_five))): vectors.append(best_five[j] + (np.random.uniform(-1, 1, len(best_five[j])) * i) * best_five[j]) for _ in range(25): vectors.append(np.random.uniform(-1, 1, len(best_five[0]))) for i in range(len(best_five)): vectors.append(best_five[i]) return np.array(vectors)
def main(): DIR = args.DIR embedding_file = args.embedding_dir best_network_file = "./model/network_model_pretrain.best.top.pair" print >> sys.stderr,"Read model from ",best_network_file best_network_model = torch.load(best_network_file) embedding_matrix = numpy.load(embedding_file) "Building torch model" network_model = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda() net_copy(network_model,best_network_model) best_network_file = "./model/network_model_pretrain.best.top.ana" print >> sys.stderr,"Read model from ",best_network_file best_network_model = torch.load(best_network_file) ana_network = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda() net_copy(ana_network,best_network_model) reduced="" if args.reduced == 1: reduced="_reduced" print >> sys.stderr,"prepare data for train ..." train_docs_iter = DataReader.DataGnerater("train"+reduced) print >> sys.stderr,"prepare data for dev and test ..." dev_docs_iter = DataReader.DataGnerater("dev"+reduced) test_docs_iter = DataReader.DataGnerater("test"+reduced) print "Performance after pretraining..." print "DEV" metric = performance.performance(dev_docs_iter,network_model,ana_network) print "Average:",metric["average"] print "TEST" metric = performance.performance(test_docs_iter,network_model,ana_network) print "Average:",metric["average"] print "***" print sys.stdout.flush() l2_lambda = 1e-6 #lr = 0.00001 #lr = 0.000005 lr = 0.000002 #lr = 0.0000009 dropout_rate = 0.5 shuffle = True times = 0 reinforce = True model_save_dir = "./model/reinforce/" utils.mkdir(model_save_dir) score_softmax = nn.Softmax() optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps = 1e-6) ana_optimizer = optim.RMSprop(ana_network.parameters(), lr=lr, eps = 1e-6) scheduler = lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.5) ana_scheduler = lr_scheduler.StepLR(ana_optimizer, step_size=15, gamma=0.5) for echo in range(30): start_time = timeit.default_timer() print "Pretrain Epoch:",echo scheduler.step() ana_scheduler.step() train_docs = utils.load_pickle(args.DOCUMENT + 'train_docs.pkl') docs_by_id = {doc.did: doc for doc in train_docs} print >> sys.stderr,"Link docs ..." tmp_data = [] path = [] for data in train_docs_iter.rl_case_generater(shuffle=True): mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\ target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return = data mention_index = autograd.Variable(torch.from_numpy(mention_word_index).type(torch.cuda.LongTensor)) mention_spans = autograd.Variable(torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable(torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable(torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable(torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable(torch.from_numpy(anaphoricity_word_indexs).type(torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable(torch.from_numpy(anaphoricity_spans).type(torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable(torch.from_numpy(anaphoricity_features).type(torch.cuda.FloatTensor)) output, pair_score = network_model.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents,0.0) ana_output, ana_score = ana_network.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, 0.0) ana_pair_output, ana_pair_score = ana_network.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents, 0.0) reindex = autograd.Variable(torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor)) scores_reindex = torch.transpose(torch.cat((pair_score,ana_score),1),0,1)[reindex] ana_scores_reindex = torch.transpose(torch.cat((ana_pair_score,ana_score),1),0,1)[reindex] doc = docs_by_id[rl['did']] for s,e in zip(rl["starts"],rl["ends"]): score = score_softmax(torch.transpose(ana_scores_reindex[s:e],0,1)).data.cpu().numpy()[0] pair_score = score_softmax(torch.transpose(scores_reindex[s:e-1],0,1)).data.cpu().numpy()[0] ana_action = utils.sample_action(score) if ana_action == (e-s-1): action = ana_action else: pair_action = utils.sample_action(pair_score*score[:-1]) action = pair_action path.append(action) link = action m1, m2 = rl['ids'][s + link] doc.link(m1, m2) tmp_data.append((mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return)) if rl["end"] == True: doc = docs_by_id[rl['did']] reward = doc.get_f1() inside_index = 0 for mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return in tmp_data: for (start, end) in zip(rl['starts'], rl['ends']): ids = rl['ids'][start:end] ana = ids[0, 1] old_ant = doc.ana_to_ant[ana] doc.unlink(ana) costs = rl['costs'][start:end] for ant_ind in range(end - start): costs[ant_ind] = doc.link(ids[ant_ind, 0], ana, hypothetical=True, beta=1) doc.link(old_ant, ana) cost = 0.0 mention_index = autograd.Variable(torch.from_numpy(mention_word_index).type(torch.cuda.LongTensor)) mention_spans = autograd.Variable(torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable(torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable(torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable(torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable(torch.from_numpy(anaphoricity_word_indexs).type(torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable(torch.from_numpy(anaphoricity_spans).type(torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable(torch.from_numpy(anaphoricity_features).type(torch.cuda.FloatTensor)) ana_output, ana_score = ana_network.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) ana_pair_output, ana_pair_score = ana_network.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents,dropout_rate) reindex = autograd.Variable(torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor)) ana_scores_reindex = torch.transpose(torch.cat((ana_pair_score,ana_score),1),0,1)[reindex] ana_optimizer.zero_grad() ana_loss = None i = inside_index for s,e in zip(rl["starts"],rl["ends"]): costs = rl["costs"][s:e] costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor)) score = torch.squeeze(score_softmax(torch.transpose(ana_scores_reindex[s:e],0,1))) baseline = torch.sum(score*costs) action = path[i] this_cost = torch.log(score[action])*-1.0*(reward-baseline) if ana_loss is None: ana_loss = this_cost else: ana_loss += this_cost i += 1 ana_loss.backward() torch.nn.utils.clip_grad_norm(ana_network.parameters(), 5.0) ana_optimizer.step() mention_index = autograd.Variable(torch.from_numpy(mention_word_index).type(torch.cuda.LongTensor)) mention_spans = autograd.Variable(torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable(torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable(torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable(torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable(torch.from_numpy(anaphoricity_word_indexs).type(torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable(torch.from_numpy(anaphoricity_spans).type(torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable(torch.from_numpy(anaphoricity_features).type(torch.cuda.FloatTensor)) output, pair_score = network_model.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents,dropout_rate) ana_output, ana_score = ana_network.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) reindex = autograd.Variable(torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor)) scores_reindex = torch.transpose(torch.cat((pair_score,ana_score),1),0,1)[reindex] pair_loss = None optimizer.zero_grad() i = inside_index index = 0 for s,e in zip(rl["starts"],rl["ends"]): action = path[i] if (not (action == (e-s-1))) and (anaphoricity_target[index] == 1): costs = rl["costs"][s:e-1] costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor)) score = torch.squeeze(score_softmax(torch.transpose(scores_reindex[s:e-1],0,1))) baseline = torch.sum(score*costs) this_cost = torch.log(score[action])*-1.0*(reward-baseline) if pair_loss is None: pair_loss = this_cost else: pair_loss += this_cost i += 1 index += 1 if pair_loss is not None: pair_loss.backward() torch.nn.utils.clip_grad_norm(network_model.parameters(), 5.0) optimizer.step() inside_index = i tmp_data = [] path = [] end_time = timeit.default_timer() print >> sys.stderr, "TRAINING Use %.3f seconds"%(end_time-start_time) print >> sys.stderr, "cost:",cost print >> sys.stderr,"save model ..." torch.save(network_model, model_save_dir+"network_model_rl_worker.%d"%echo) torch.save(ana_network, model_save_dir+"network_model_rl_manager.%d"%echo) print "DEV" metric = performance.performance(dev_docs_iter,network_model,ana_network) print "Average:",metric["average"] print "DEV Ana: ",metric["ana"] print "TEST" metric = performance.performance(test_docs_iter,network_model,ana_network) print "Average:",metric["average"] print "TEST Ana: ",metric["ana"] print sys.stdout.flush()
def main(): DIR = args.DIR embedding_file = args.embedding_dir best_network_file = "./model/network_model_pretrain.best.top" print >> sys.stderr, "Read model from ", best_network_file best_network_model = torch.load(best_network_file) embedding_matrix = numpy.load(embedding_file) "Building torch model" worker = network.Network( nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"], nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000, nnargs["embedding_size"], nnargs["embedding_dimention"], embedding_matrix).cuda() net_copy(worker, best_network_model) best_network_file = "./model/network_model_pretrain.best.top" print >> sys.stderr, "Read model from ", best_network_file best_network_model = torch.load(best_network_file) manager = network.Network( nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"], nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000, nnargs["embedding_size"], nnargs["embedding_dimention"], embedding_matrix).cuda() net_copy(manager, best_network_model) reduced = "" if args.reduced == 1: reduced = "_reduced" print >> sys.stderr, "prepare data for train ..." #train_docs_iter = DataReader.DataGnerater("train"+reduced) train_docs_iter = DataReader.DataGnerater("dev" + reduced) print >> sys.stderr, "prepare data for dev and test ..." dev_docs_iter = DataReader.DataGnerater("dev" + reduced) test_docs_iter = DataReader.DataGnerater("test" + reduced) print "Performance after pretraining..." print "DEV" metric = performance.performance(dev_docs_iter, worker, manager) print "Average:", metric["average"] print "TEST" metric = performance.performance(test_docs_iter, worker, manager) print "Average:", metric["average"] print "***" print sys.stdout.flush() lr = nnargs["lr"] top_k = nnargs["top_k"] model_save_dir = "./model/reinforce/" utils.mkdir(model_save_dir) score_softmax = nn.Softmax() optimizer_manager = optim.RMSprop(manager.parameters(), lr=lr, eps=1e-6) optimizer_worker = optim.RMSprop(worker.parameters(), lr=lr, eps=1e-6) MAX_AVE = 2048 for echo in range(nnargs["epoch"]): start_time = timeit.default_timer() print "Pretrain Epoch:", echo reward_log = Logger(Tensorboard + args.tb + "/acl2018/%d/reward/" % echo, flush_secs=3) entropy_log_manager = Logger(Tensorboard + args.tb + "/acl2018/%d/entropy/worker" % echo, flush_secs=3) entropy_log_worker = Logger(Tensorboard + args.tb + "/acl2018/%d/entropy/manager" % echo, flush_secs=3) #train_docs = utils.load_pickle(args.DOCUMENT + 'train_docs.pkl') train_docs = utils.load_pickle(args.DOCUMENT + 'dev_docs.pkl') docs_by_id = {doc.did: doc for doc in train_docs} ave_reward = [] ave_manager_entropy = [] ave_worker_entropy = [] print >> sys.stderr, "Link docs ..." tmp_data = [] cluster_info = {0: [0]} cluster_list = [0] current_new_cluster = 1 predict_action_embedding = [] choose_action = [] mid = 1 step = 0 statistic = { "worker_hits": 0, "manager_hits": 0, "total": 0, "manager_predict_last": 0, "worker_predict_last": 0 } for data in train_docs_iter.rl_case_generater(shuffle=True): rl = data["rl"] scores_manager, representations_manager = get_score_representations( manager, data) for s, e in zip(rl["starts"], rl["ends"]): action_embeddings = representations_manager[s:e] probs = F.softmax(torch.transpose(scores_manager[s:e], 0, 1)) m = Categorical(probs) this_action = m.sample() index = this_action.data.cpu().numpy()[0] if index == (e - s - 1): should_cluster = current_new_cluster cluster_info[should_cluster] = [] current_new_cluster += 1 else: should_cluster = cluster_list[index] choose_action.append(index) cluster_info[should_cluster].append(mid) cluster_list.append(should_cluster) mid += 1 cluster_indexs = torch.cuda.LongTensor( cluster_info[should_cluster]) action_embedding_predict = torch.mean( action_embeddings[cluster_indexs], 0, keepdim=True) predict_action_embedding.append(action_embedding_predict) tmp_data.append(data) if rl["end"] == True: inside_index = 0 manager_path = [] worker_path = [] doc = docs_by_id[rl["did"]] for data in tmp_data: rl = data["rl"] pair_target = data["pair_target"] anaphoricity_target = 1 - data["anaphoricity_target"] target = numpy.concatenate( (pair_target, anaphoricity_target))[rl["reindex"]] scores_worker, representations_worker = get_score_representations( worker, data) for s, e in zip(rl["starts"], rl["ends"]): action_embeddings = representations_worker[s:e] score = score_softmax( torch.transpose(scores_worker[s:e], 0, 1)).data.cpu().numpy()[0] action_embedding_choose = predict_action_embedding[ inside_index] similarities = torch.sum( torch.abs(action_embeddings - action_embedding_choose), 1) similarities = similarities.data.cpu().numpy() action_probabilities = [] action_list = [] action_candidates = heapq.nlargest( top_k, -similarities) for action in action_candidates: action_index = numpy.argwhere( similarities == -action)[0][0] action_probabilities.append(score[action_index]) action_list.append(action_index) manager_action = choose_action[inside_index] if not manager_action in action_list: action_list.append(manager_action) action_probabilities.append(score[manager_action]) this_target = target[s:e] manager_action = choose_action[inside_index] sample_action = utils.sample_action( numpy.array(action_probabilities)) worker_action = action_list[sample_action] if this_target[worker_action] == 1: statistic["worker_hits"] += 1 if this_target[manager_action] == 1: statistic["manager_hits"] += 1 if worker_action == (e - s - 1): statistic["worker_predict_last"] += 1 if manager_action == (e - s - 1): statistic["manager_predict_last"] += 1 statistic["total"] += 1 inside_index += 1 #link = manager_action link = worker_action m1, m2 = rl['ids'][s + link] doc.link(m1, m2) manager_path.append(manager_action) worker_path.append(worker_action) reward = doc.get_f1() for data in tmp_data: for s, e in zip(rl["starts"], rl["ends"]): ids = rl['ids'][s:e] ana = ids[0, 1] old_ant = doc.ana_to_ant[ana] doc.unlink(ana) costs = rl['costs'][s:e] for ant_ind in range(e - s): costs[ant_ind] = doc.link(ids[ant_ind, 0], ana, hypothetical=True, beta=1) doc.link(old_ant, ana) #costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor)) inside_index = 0 worker_entropy = 0.0 for data in tmp_data: new_step = step # worker scores_worker, representations_worker = get_score_representations( worker, data, dropout=nnargs["dropout_rate"]) optimizer_worker.zero_grad worker_loss = None for s, e in zip(rl["starts"], rl["ends"]): costs = rl['costs'][s:e] costs = autograd.Variable( torch.from_numpy(costs).type( torch.cuda.FloatTensor)) action = worker_path[inside_index] score = F.softmax( torch.transpose(scores_worker[s:e], 0, 1)) if not score.size()[1] == costs.size()[0]: continue score = torch.squeeze(score) baseline = torch.sum(costs * score) this_cost = torch.log( score[action]) * -1.0 * (reward - baseline) if worker_loss is None: worker_loss = this_cost else: worker_loss += this_cost worker_entropy += torch.sum( score * torch.log(score + 1e-7) ).data.cpu().numpy()[ 0] #+ 0.001*torch.sum(score*torch.log(score+1e-7)) inside_index += 1 worker_loss.backward() torch.nn.utils.clip_grad_norm(worker.parameters(), nnargs["clip"]) optimizer_worker.step() ave_worker_entropy.append(worker_entropy) if len(ave_worker_entropy) >= MAX_AVE: ave_worker_entropy = ave_worker_entropy[1:] entropy_log_worker.log_value( 'entropy', float(sum(ave_worker_entropy)) / float(len(ave_worker_entropy)), new_step) new_step += 1 inside_index = 0 manager_entropy = 0.0 for data in tmp_data: new_step = step rl = data["rl"] ave_reward.append(reward) if len(ave_reward) >= MAX_AVE: ave_reward = ave_reward[1:] reward_log.log_value( 'reward', float(sum(ave_reward)) / float(len(ave_reward)), new_step) scores_manager, representations_manager = get_score_representations( manager, data, dropout=nnargs["dropout_rate"]) optimizer_manager.zero_grad manager_loss = None for s, e in zip(rl["starts"], rl["ends"]): score = F.softmax( torch.transpose(scores_manager[s:e], 0, 1)) costs = rl['costs'][s:e] costs = autograd.Variable( torch.from_numpy(costs).type( torch.cuda.FloatTensor)) if not score.size()[1] == costs.size()[0]: continue action = manager_path[inside_index] score = torch.squeeze(score) baseline = torch.sum(costs * score) this_cost = torch.log(score[action]) * -1.0 * ( reward - baseline ) # + 0.001*torch.sum(score*torch.log(score+1e-7)) #this_cost = torch.sum(score*costs) + 0.001*torch.sum(score*torch.log(score+1e-7)) if manager_loss is None: manager_loss = this_cost else: manager_loss += this_cost manager_entropy += torch.sum( score * torch.log(score + 1e-7)).data.cpu().numpy()[0] inside_index += 1 manager_loss.backward() torch.nn.utils.clip_grad_norm(manager.parameters(), nnargs["clip"]) optimizer_manager.step() ave_manager_entropy.append(manager_entropy) if len(ave_manager_entropy) >= MAX_AVE: ave_manager_entropy = ave_manager_entropy[1:] entropy_log_manager.log_value( 'entropy', float(sum(ave_manager_entropy)) / float(len(ave_manager_entropy)), new_step) new_step += 1 step = new_step tmp_data = [] cluster_info = {0: [0]} cluster_list = [0] current_new_cluster = 1 mid = 1 predict_action_embedding = [] choose_action = [] end_time = timeit.default_timer() print >> sys.stderr, "TRAINING Use %.3f seconds" % (end_time - start_time) print >> sys.stderr, "save model ..." #print "Top k",top_k print "Worker Hits", statistic[ "worker_hits"], "Manager Hits", statistic[ "manager_hits"], "Total", statistic["total"] print "Worker predict last", statistic[ "worker_predict_last"], "Manager predict last", statistic[ "manager_predict_last"] #torch.save(network_model, model_save_dir+"network_model_rl_worker.%d"%echo) #torch.save(ana_network, model_save_dir+"network_model_rl_manager.%d"%echo) print "DEV" metric = performance.performance(dev_docs_iter, worker, manager) print "Average:", metric["average"] print "DEV manager" metric = performance_manager.performance(dev_docs_iter, worker, manager) print "Average:", metric["average"] print "TEST" metric = performance.performance(test_docs_iter, worker, manager) print "Average:", metric["average"] print sys.stdout.flush()
import numpy as np from net import load from net2 import loadNet if __name__ == '__main__': # import csv_loader import net as network #training_data, validation_data, test_data = csv_loader.load_data() #training_data, validation_data, testing_data = loadPokemons() training_data, validation_data, testing_data = loadImagesSimple() validation_data = list(validation_data) training_data = list(training_data) #net = network.Network([100, 10, 2], cost=network.QuadraticCost) net = network.Network([100, 10, 2], cost=network.QuadraticCost) #net.SGD(training_data, 10, 10, 0.2, lmbda=1.0, evaluation_data=list(validation_data), monitor_evaluation_accuracy=True) net.SGD(training_data, 50, 10, 0.1, 0.0, evaluation_data=list(validation_data), monitor_evaluation_cost=True, monitor_evaluation_accuracy=True, monitor_training_cost=True, monitor_training_accuracy=True) net.save("DANN_distance.txt") # best result so far in DANN_100.txt #net = load("DANN_distance.txt") a = toSepia(cv2.imread("resized/images109.png")) a = processImage(a) print(net.feedforward(a)) print(np.argmax(net.feedforward(a))) a = toSepia(cv2.imread("resized/images109_sepia.png")) a = processImage(a) print(net.feedforward(a))
import net from pong import game import time import random as r import numpy as np import matplotlib.pyplot as plt popolation = [] GENOME_SIZE = 50 for i in range(GENOME_SIZE): popolation.append(net.Network([5, 3, 1]))#, weights_path=f"Saves_little/save_{i}.txt")) def log_change(best_five): vectors = [] for i, j in zip(np.logspace(-5, -1, 4), range(len(best_five))): vectors.append(best_five[j] + (np.random.uniform(-1, 1, len(best_five[j]))*i)*best_five[j] ) for _ in range(25): vectors.append(np.random.uniform(-1, 1, len(best_five[0]))) for i in range(len(best_five)): vectors.append(best_five[i]) return np.array(vectors) best = 0 #a = net.Network([2, 3, 2], weights_path="best.txt") epochs = 5 std_all = []
import net import sys, os, json import scipy.stats # for entropy import setting # arg workdir = sys.argv[1] itercnt = int(sys.argv[2]) # # load generated graph file # net_base = net.Network() n_rootnei = 0 with open(os.path.join(workdir, 'cydata.json'), 'r') as f: net_base.loadJson(f) with open(os.path.join(workdir, 'cydata.json'), 'r') as f: j = json.load(f) n_rootnei = j['root_nei_total_cnt'] net_base.gene_targets.sort(key=lambda x: abs(x['pval'])) # sort targets first # load metadata of whole graph # TODO: make data when netserver.py runs with open('net_meta.json', 'r') as f: net_meta = json.load(f) ## # input: arrayes # output: p-value # def calcpv(samplearr, v):
def load_model(path_model, input_size, output_size, gpu): model = net.Network(input_size, output_size, gpu) model.load_state_dict(torch.load(path_model)) return model
train, validation, test, verbose, path, file, modelf, gpu = main(path) gpu = True training = True n_epochs = len(train) n_iters = train[0].n_item input_size = 400 output_size = 1 lr = 0.005 cycle = 5 if training: if verbose: print('###\tCreate the net') # create the net network = net.Network(input_size, output_size, gpu) if gpu: network.cuda() criterion = nn.L1Loss() optimizer = optim.SGD(network.parameters(), lr) if verbose: print('###\tStart training') start = timer() losses = np.zeros(n_epochs) hid = network.initHidden() for k in range(cycle): for epoch in range(n_epochs): input = [] target = []
def main(): DIR = args.DIR embedding_file = args.embedding_dir #network_file = "./model/model.pkl" #network_file = "./model/pretrain/network_model_pretrain.20" network_file = "./model/pretrain/network_model_pretrain.top.best" if os.path.isfile(network_file): print >> sys.stderr, "Read model from ./model/model.pkl" network_model = torch.load(network_file) else: embedding_matrix = numpy.load(embedding_file) #print len(embedding_matrix) "Building torch model" network_model = network.Network(pair_feature_dimention, mention_feature_dimention, word_embedding_dimention, span_dimention, 1000, embedding_size, embedding_dimention, embedding_matrix).cuda() print >> sys.stderr, "save model ..." torch.save(network_model, network_file) reduced = "" if args.reduced == 1: reduced = "_reduced" print >> sys.stderr, "prepare data for train ..." train_docs = DataReader.DataGnerater("train" + reduced) #train_docs = DataReader.DataGnerater("dev"+reduced) print >> sys.stderr, "prepare data for dev and test ..." dev_docs = DataReader.DataGnerater("dev" + reduced) #test_docs = DataReader.DataGnerater("test"+reduced) l2_lambda = 1e-6 lr = 0.00002 dropout_rate = 0.5 shuffle = True times = 0 best_thres = 0.5 reinforce = True model_save_dir = "./model/pretrain/" metrics = performance.performance(dev_docs, network_model) p, r, f = metrics["b3"] f_b = [f] #for echo in range(30,200): for echo in range(20): start_time = timeit.default_timer() print "Pretrain Epoch:", echo #if echo == 100: # lr = lr/2.0 #if echo == 150: # lr = lr/2.0 #optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, network_model.parameters()), lr=lr, weight_decay=l2_lambda) #optimizer = optim.RMSprop(network_model.parameters(), lr=lr, weight_decay=l2_lambda) cost = 0.0 optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5, weight_decay=l2_lambda) pair_cost_this_turn = 0.0 ana_cost_this_turn = 0.0 pair_nums = 0 ana_nums = 0 pos_num = 0 neg_num = 0 inside_time = 0.0 score_softmax = nn.Softmax() cluster_info = [] new_cluster_num = 0 cluster_info.append(-1) action_list = [] new_cluster_info = [] tmp_data = [] #for data in train_docs.rl_case_generater(): for data in train_docs.rl_case_generater(shuffle=True): inside_time += 1 this_doc = train_docs tmp_data.append(data) mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\ target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return = data gold_chain = this_doc.gold_chain[rl["did"]] gold_dict = {} for chain in gold_chain: for item in chain: gold_dict[item] = chain mention_index = autograd.Variable( torch.from_numpy(mention_word_index).type( torch.cuda.LongTensor)) mention_span = autograd.Variable( torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable( torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable( torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable( torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable( torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable( torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable( torch.from_numpy(anaphoricity_word_indexs).type( torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable( torch.from_numpy(anaphoricity_spans).type( torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable( torch.from_numpy(anaphoricity_features).type( torch.cuda.FloatTensor)) output, pair_score = network_model.forward_all_pair( word_embedding_dimention, mention_index, mention_span, candi_index, candi_spans, pair_feature, anaphors, antecedents, dropout_rate) ana_output, ana_score = network_model.forward_anaphoricity( word_embedding_dimention, anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) reindex = autograd.Variable( torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor)) scores_reindex = torch.transpose( torch.cat((pair_score, ana_score), 1), 0, 1)[reindex] #scores_reindex = torch.transpose(torch.cat((pair_score,-1-0.3*ana_score),1),0,1)[reindex] for s, e in zip(rl["starts"], rl["ends"]): #action_prob: scores_reindex[s:e][1] score = score_softmax( torch.transpose(scores_reindex[s:e], 0, 1)).data.cpu().numpy()[0] this_action = utils.sample_action(score) #this_action = ac_list.index(max(score.tolist())) action_list.append(this_action) if this_action == len(score) - 1: should_cluster = new_cluster_num new_cluster_num += 1 new_cluster_info.append(1) else: should_cluster = cluster_info[this_action] new_cluster_info.append(0) cluster_info.append(should_cluster) if rl["end"] == True: ev_document = utils.get_evaluation_document( cluster_info, this_doc.gold_chain[rl["did"]], candi_ids_return, new_cluster_num) p, r, f = evaluation.evaluate_documents([ev_document], evaluation.b_cubed) trick_reward = utils.get_reward_trick(cluster_info, gold_dict, new_cluster_info, action_list, candi_ids_return) #reward = f + trick_reward average_f = float(sum(f_b)) / len(f_b) reward = (f - average_f) * 10 f_b.append(f) if len(f_b) > 128: f_b = f_b[1:] index = 0 for data in tmp_data: mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\ target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return = data mention_index = autograd.Variable( torch.from_numpy(mention_word_index).type( torch.cuda.LongTensor)) mention_span = autograd.Variable( torch.from_numpy(mention_span).type( torch.cuda.FloatTensor)) candi_index = autograd.Variable( torch.from_numpy(candi_word_index).type( torch.cuda.LongTensor)) candi_spans = autograd.Variable( torch.from_numpy(candi_span).type( torch.cuda.FloatTensor)) pair_feature = autograd.Variable( torch.from_numpy(feature_pair).type( torch.cuda.FloatTensor)) anaphors = autograd.Variable( torch.from_numpy(pair_anaphors).type( torch.cuda.LongTensor)) antecedents = autograd.Variable( torch.from_numpy(pair_antecedents).type( torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable( torch.from_numpy(anaphoricity_word_indexs).type( torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable( torch.from_numpy(anaphoricity_spans).type( torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable( torch.from_numpy(anaphoricity_features).type( torch.cuda.FloatTensor)) rl_costs = autograd.Variable( torch.from_numpy(rl["costs"]).type( torch.cuda.FloatTensor)) rl_costs = torch.transpose(rl_costs, 0, 1) output, pair_score = network_model.forward_all_pair( word_embedding_dimention, mention_index, mention_span, candi_index, candi_spans, pair_feature, anaphors, antecedents, dropout_rate) ana_output, ana_score = network_model.forward_anaphoricity( word_embedding_dimention, anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) reindex = autograd.Variable( torch.from_numpy(rl["reindex"]).type( torch.cuda.LongTensor)) optimizer.zero_grad() loss = None scores_reindex = torch.transpose( torch.cat((pair_score, ana_score), 1), 0, 1)[reindex] #scores_reindex = torch.transpose(torch.cat((pair_score,-1-0.3*ana_score),1),0,1)[reindex] for s, e in zip(rl["starts"], rl["ends"]): #action_prob: scores_reindex[s:e][1] this_action = action_list[index] #current_reward = reward + trick_reward[index] current_reward = reward #this_loss = -reward*(torch.transpose(F.log_softmax(torch.transpose(scores_reindex[s:e],0,1)),0,1)[this_action]) this_loss = -current_reward * (torch.transpose( F.log_softmax( torch.transpose(scores_reindex[s:e], 0, 1)), 0, 1)[this_action]) if loss is None: loss = this_loss else: loss += this_loss index += 1 #loss /= len(rl["starts"]) loss /= len(rl["starts"]) #loss = loss/train_docs.scale_factor ## policy graident cost += loss.data[0] loss.backward() optimizer.step() new_cluster_num = 0 cluster_info = [] cluster_info.append(-1) tmp_data = [] action_list = [] new_cluster_info = [] #if inside_time%50 == 0: # performance.performance(dev_docs,network_model) # print # sys.stdout.flush() end_time = timeit.default_timer() print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time - start_time) print >> sys.stderr, "cost:", cost #print >> sys.stderr,"save model ..." #torch.save(network_model, model_save_dir+"network_model_pretrain.%d"%echo) performance.performance(dev_docs, network_model) sys.stdout.flush()
def main(): DIR = args.DIR embedding_file = args.embedding_dir best_network_file = "./model/network_model_pretrain.best" print >> sys.stderr,"Read model from",best_network_file best_network_model = torch.load(best_network_file) embedding_matrix = numpy.load(embedding_file) "Building torch model" network_model = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda() print >> sys.stderr,"save model ..." net_copy(network_model,best_network_model) reduced="" if args.reduced == 1: reduced="_reduced" print >> sys.stderr,"prepare data for train ..." train_docs = DataReader.DataGnerater("train"+reduced) print >> sys.stderr,"prepare data for dev and test ..." dev_docs = DataReader.DataGnerater("dev"+reduced) test_docs = DataReader.DataGnerater("test"+reduced) l2_lambda = 1e-6 lr = nnargs["lr"] dropout_rate = nnargs["dropout_rate"] epoch = nnargs["epoch"] model_save_dir = "./model/bp/" last_cost = 0.0 all_best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5) scheduler = lr_scheduler.StepLR(optimizer, step_size=75, gamma=0.5) for echo in range(epoch): start_time = timeit.default_timer() print "Pretrain Epoch:",echo scheduler.step() pair_cost_this_turn = 0.0 ana_cost_this_turn = 0.0 pair_nums = 0 ana_nums = 0 for data in train_docs.train_generater(shuffle=True): mention_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor)) mention_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable(torch.from_numpy(data["candi_word_index"]).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable(torch.from_numpy(data["candi_span"]).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable(torch.from_numpy(data["pair_features"]).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable(torch.from_numpy(data["pair_anaphors"]).type(torch.cuda.LongTensor)) antecedents = autograd.Variable(torch.from_numpy(data["pair_antecedents"]).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable(torch.from_numpy(data["anaphoricity_feature"]).type(torch.cuda.FloatTensor)) reindex = autograd.Variable(torch.from_numpy(data["top_score_index"]).type(torch.cuda.LongTensor)) start_index = autograd.Variable(torch.from_numpy(data["top_starts"]).type(torch.cuda.LongTensor)) end_index = autograd.Variable(torch.from_numpy(data["top_ends"]).type(torch.cuda.LongTensor)) top_gold = autograd.Variable(torch.from_numpy(data["top_gold"]).type(torch.cuda.FloatTensor)) anaphoricity_target = data["anaphoricity_target"] anaphoricity_gold = anaphoricity_target.tolist() ana_lable = autograd.Variable(torch.cuda.FloatTensor([anaphoricity_gold])) optimizer.zero_grad() output,output_reindex = network_model.forward_top_pair(nnargs["word_embedding_dimention"],mention_index,mention_span,candi_index,candi_spans,pair_feature,anaphors,antecedents,reindex,start_index,end_index,dropout_rate) loss = F.binary_cross_entropy(output,top_gold,size_average=False)/train_docs.scale_factor_top ana_output,_,_ = network_model.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) ana_loss = F.binary_cross_entropy(ana_output,ana_lable,size_average=False)/train_docs.anaphoricity_scale_factor_top loss_all = loss + ana_loss loss_all.backward() pair_cost_this_turn += loss.data[0] optimizer.step() end_time = timeit.default_timer() print >> sys.stderr, "PreTrain",echo,"Pair total cost:",pair_cost_this_turn print >> sys.stderr, "PreTRAINING Use %.3f seconds"%(end_time-start_time) print >> sys.stderr, "Learning Rate",lr gold = [] predict = [] ana_gold = [] ana_predict = [] for data in dev_docs.train_generater(shuffle=False): mention_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor)) mention_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable(torch.from_numpy(data["candi_word_index"]).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable(torch.from_numpy(data["candi_span"]).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable(torch.from_numpy(data["pair_features"]).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable(torch.from_numpy(data["pair_anaphors"]).type(torch.cuda.LongTensor)) antecedents = autograd.Variable(torch.from_numpy(data["pair_antecedents"]).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable(torch.from_numpy(data["anaphoricity_feature"]).type(torch.cuda.FloatTensor)) reindex = autograd.Variable(torch.from_numpy(data["top_score_index"]).type(torch.cuda.LongTensor)) start_index = autograd.Variable(torch.from_numpy(data["top_starts"]).type(torch.cuda.LongTensor)) end_index = autograd.Variable(torch.from_numpy(data["top_ends"]).type(torch.cuda.LongTensor)) top_gold = autograd.Variable(torch.from_numpy(data["top_gold"]).type(torch.cuda.FloatTensor)) anaphoricity_target = data["anaphoricity_target"] anaphoricity_gold = anaphoricity_target.tolist() ana_lable = autograd.Variable(torch.cuda.FloatTensor([anaphoricity_gold])) gold += data["top_gold"].tolist() ana_gold += anaphoricity_target.tolist() output,output_reindex = network_model.forward_top_pair(nnargs["word_embedding_dimention"],mention_index,mention_span,candi_index,candi_spans,pair_feature,anaphors,antecedents,reindex,start_index,end_index,0.0) predict += output.data.cpu().numpy().tolist() ana_output,_,_ = network_model.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, 0.0) ana_predict += ana_output.data.cpu().numpy()[0].tolist() gold = numpy.array(gold,dtype=numpy.int32) predict = numpy.array(predict) best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } thresh_list = [0.3,0.35,0.4,0.45,0.5,0.55,0.6] for thresh in thresh_list: evaluation_results = get_metrics(gold, predict, thresh) if evaluation_results["f1"] >= best_results["f1"]: best_results = evaluation_results print "Pair accuracy: %f and Fscore: %f with thresh: %f"\ %(best_results["accuracy"],best_results["f1"],best_results["thresh"]) sys.stdout.flush() if best_results["f1"] >= all_best_results["f1"]: all_best_results = best_results print >> sys.stderr, "New High Result, Save Model" torch.save(network_model, model_save_dir+"network_model_pretrain.best.top") ana_gold = numpy.array(ana_gold,dtype=numpy.int32) ana_predict = numpy.array(ana_predict) best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } for thresh in thresh_list: evaluation_results = get_metrics(ana_gold, ana_predict, thresh) if evaluation_results["f1"] >= best_results["f1"]: best_results = evaluation_results print "Anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\ %(best_results["accuracy"],best_results["f1"],best_results["thresh"]) sys.stdout.flush() if (echo+1)%10 == 0: best_network_model = torch.load(model_save_dir+"network_model_pretrain.best.top") print "DEV:" performance.performance(dev_docs,best_network_model) print "TEST:" performance.performance(test_docs,best_network_model)
def main(): DIR = args.DIR embedding_file = args.embedding_dir best_network_file = "./model/pretrain/network_model_pretrain.best" print >> sys.stderr, "Read model from ./model/model.pkl" best_network_model = torch.load(best_network_file) embedding_matrix = numpy.load(embedding_file) "Building torch model" network_model = network.Network(pair_feature_dimention, mention_feature_dimention, word_embedding_dimention, span_dimention, 1000, embedding_size, embedding_dimention, embedding_matrix).cuda() print >> sys.stderr, "save model ..." #torch.save(network_model,network_file) net_copy(network_model, best_network_model) reduced = "" if args.reduced == 1: reduced = "_reduced" print >> sys.stderr, "prepare data for train ..." train_docs = DataReader.DataGnerater("train" + reduced) print >> sys.stderr, "prepare data for dev and test ..." dev_docs = DataReader.DataGnerater("dev" + reduced) test_docs = DataReader.DataGnerater("test" + reduced) l2_lambda = 1e-6 lr = 0.0002 dropout_rate = 0.5 shuffle = True times = 0 best_thres = 0.5 model_save_dir = "./model/pretrain/" last_cost = 0.0 all_best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } for echo in range(100): start_time = timeit.default_timer() print "Pretrain Epoch:", echo #if echo == 100: # lr = lr/2.0 #if echo == 150: # lr = lr/2.0 #optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, network_model.parameters()), lr=lr, weight_decay=l2_lambda) #optimizer = optim.RMSprop(network_model.parameters(), lr=lr, weight_decay=l2_lambda) optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5, weight_decay=l2_lambda) pair_cost_this_turn = 0.0 ana_cost_this_turn = 0.0 pair_nums = 0 ana_nums = 0 pos_num = 0 neg_num = 0 inside_time = 0.0 for data in train_docs.train_generater(shuffle=shuffle, top=True): mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\ target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,top_x = data mention_index = autograd.Variable( torch.from_numpy(mention_word_index).type( torch.cuda.LongTensor)) mention_span = autograd.Variable( torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable( torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable( torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable( torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable( torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable( torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable( torch.from_numpy(anaphoricity_word_indexs).type( torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable( torch.from_numpy(anaphoricity_spans).type( torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable( torch.from_numpy(anaphoricity_features).type( torch.cuda.FloatTensor)) reindex = autograd.Variable( torch.from_numpy(top_x["score_index"]).type( torch.cuda.LongTensor)) start_index = autograd.Variable( torch.from_numpy(top_x["starts"]).type(torch.cuda.LongTensor)) end_index = autograd.Variable( torch.from_numpy(top_x["ends"]).type(torch.cuda.LongTensor)) top_gold = autograd.Variable( torch.from_numpy(top_x["top_gold"]).type( torch.cuda.FloatTensor)) anaphoricity_gold = anaphoricity_target.tolist() ana_lable = autograd.Variable( torch.cuda.FloatTensor([anaphoricity_gold])) optimizer.zero_grad() output, output_reindex = network_model.forward_top_pair( word_embedding_dimention, mention_index, mention_span, candi_index, candi_spans, pair_feature, anaphors, antecedents, reindex, start_index, end_index, dropout_rate) loss = F.binary_cross_entropy( output, top_gold, size_average=False) / train_docs.scale_factor_top ana_output, _ = network_model.forward_anaphoricity( word_embedding_dimention, anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) ana_loss = F.binary_cross_entropy( ana_output, ana_lable, size_average=False) / train_docs.anaphoricity_scale_factor_top loss_all = loss + ana_loss loss_all.backward() pair_cost_this_turn += loss.data[0] optimizer.step() end_time = timeit.default_timer() print >> sys.stderr, "PreTrain", echo, "Pair total cost:", pair_cost_this_turn print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time - start_time) print >> sys.stderr, "Learning Rate", lr print >> sys.stderr, "save model ..." torch.save(network_model, model_save_dir + "network_model_pretrain.%d.top" % echo) #if cost_this_turn > last_cost: # lr = lr*0.7 gold = [] predict = [] ana_gold = [] ana_predict = [] for data in dev_docs.train_generater(shuffle=False, top=True): mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\ target,positive,negative, anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target, top_x = data mention_index = autograd.Variable( torch.from_numpy(mention_word_index).type( torch.cuda.LongTensor)) mention_span = autograd.Variable( torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable( torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable( torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable( torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable( torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable( torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable( torch.from_numpy(anaphoricity_word_indexs).type( torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable( torch.from_numpy(anaphoricity_spans).type( torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable( torch.from_numpy(anaphoricity_features).type( torch.cuda.FloatTensor)) reindex = autograd.Variable( torch.from_numpy(top_x["score_index"]).type( torch.cuda.LongTensor)) start_index = autograd.Variable( torch.from_numpy(top_x["starts"]).type(torch.cuda.LongTensor)) end_index = autograd.Variable( torch.from_numpy(top_x["ends"]).type(torch.cuda.LongTensor)) gold += top_x["top_gold"].tolist() ana_gold += anaphoricity_target.tolist() output, output_reindex = network_model.forward_top_pair( word_embedding_dimention, mention_index, mention_span, candi_index, candi_spans, pair_feature, anaphors, antecedents, reindex, start_index, end_index, 0.0) predict += output.data.cpu().numpy().tolist() ana_output, _ = network_model.forward_anaphoricity( word_embedding_dimention, anaphoricity_index, anaphoricity_span, anaphoricity_feature, 0.0) ana_predict += ana_output.data.cpu().numpy()[0].tolist() gold = numpy.array(gold, dtype=numpy.int32) predict = numpy.array(predict) best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } thresh_list = [0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6] for thresh in thresh_list: evaluation_results = get_metrics(gold, predict, thresh) if evaluation_results["f1"] >= best_results["f1"]: best_results = evaluation_results print "Pair accuracy: %f and Fscore: %f with thresh: %f"\ %(best_results["accuracy"],best_results["f1"],best_results["thresh"]) sys.stdout.flush() if best_results["f1"] > all_best_results["f1"]: all_best_results = best_results print >> sys.stderr, "New High Result, Save Model" torch.save(network_model, model_save_dir + "network_model_pretrain.top.best") ana_gold = numpy.array(ana_gold, dtype=numpy.int32) ana_predict = numpy.array(ana_predict) best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } for thresh in thresh_list: evaluation_results = get_metrics(ana_gold, ana_predict, thresh) if evaluation_results["f1"] >= best_results["f1"]: best_results = evaluation_results print "Anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\ %(best_results["accuracy"],best_results["f1"],best_results["thresh"]) sys.stdout.flush() if (echo + 1) % 10 == 0: best_network_model = torch.load(model_save_dir + "network_model_pretrain.top.best") print "DEV:" performance.performance(dev_docs, best_network_model) print "TEST:" performance.performance(test_docs, best_network_model)
def main(): DIR = args.DIR embedding_file = args.embedding_dir embedding_matrix = numpy.load(embedding_file) "Building torch model" network_model = network.Network( nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"], nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000, nnargs["embedding_size"], nnargs["embedding_dimention"], embedding_matrix).cuda() reduced = "" if args.reduced == 1: reduced = "_reduced" print >> sys.stderr, "prepare data for train ..." train_docs = DataReader.DataGnerater("train" + reduced) print >> sys.stderr, "prepare data for dev and test ..." dev_docs = DataReader.DataGnerater("dev" + reduced) test_docs = DataReader.DataGnerater("test" + reduced) l2_lambda = 1e-6 #lr = 0.00009 lr = 0.0001 dropout_rate = 0.5 shuffle = True times = 0 best_thres = 0.5 model_save_dir = "./model/" last_cost = 0.0 all_best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5) scheduler = lr_scheduler.StepLR(optimizer, step_size=75, gamma=0.5) for echo in range(100): start_time = timeit.default_timer() print "Pretrain Epoch:", echo scheduler.step() pair_cost_this_turn = 0.0 ana_cost_this_turn = 0.0 pair_nums = 0 ana_nums = 0 inside_time = 0.0 for data in train_docs.train_generater(shuffle=shuffle): mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\ target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target = data mention_index = autograd.Variable( torch.from_numpy(mention_word_index).type( torch.cuda.LongTensor)) mention_span = autograd.Variable( torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable( torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable( torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable( torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable( torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable( torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable( torch.from_numpy(anaphoricity_word_indexs).type( torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable( torch.from_numpy(anaphoricity_spans).type( torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable( torch.from_numpy(anaphoricity_features).type( torch.cuda.FloatTensor)) gold = target.tolist() anaphoricity_gold = anaphoricity_target.tolist() pair_nums += len(gold) ana_nums += len(anaphoricity_gold) lable = autograd.Variable(torch.cuda.FloatTensor([gold])) ana_lable = autograd.Variable( torch.cuda.FloatTensor([anaphoricity_gold])) output, _ = network_model.forward_all_pair( nnargs["word_embedding_dimention"], mention_index, mention_span, candi_index, candi_spans, pair_feature, anaphors, antecedents, dropout_rate) ana_output, _ = network_model.forward_anaphoricity( nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate) optimizer.zero_grad() #loss = get_pair_loss(output,positive,negative,train_docs.scale_factor) loss = F.binary_cross_entropy( output, lable, size_average=False) / train_docs.scale_factor #ana_loss = F.binary_cross_entropy(ana_output,ana_lable,size_average=False)/train_docs.anaphoricity_scale_factor pair_cost_this_turn += loss.data[0] * train_docs.scale_factor loss_all = loss loss_all.backward() optimizer.step() end_time = timeit.default_timer() print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time - start_time) print >> sys.stderr, "Learning Rate", lr #print >> sys.stderr,"save model ..." #torch.save(network_model, model_save_dir+"network_model_pretrain.%d"%echo) gold = [] predict = [] ana_gold = [] ana_predict = [] for data in dev_docs.train_generater(shuffle=False): mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\ target,positive,negative, anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target = data mention_index = autograd.Variable( torch.from_numpy(mention_word_index).type( torch.cuda.LongTensor)) mention_span = autograd.Variable( torch.from_numpy(mention_span).type(torch.cuda.FloatTensor)) candi_index = autograd.Variable( torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor)) candi_spans = autograd.Variable( torch.from_numpy(candi_span).type(torch.cuda.FloatTensor)) pair_feature = autograd.Variable( torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor)) anaphors = autograd.Variable( torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor)) antecedents = autograd.Variable( torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor)) anaphoricity_index = autograd.Variable( torch.from_numpy(anaphoricity_word_indexs).type( torch.cuda.LongTensor)) anaphoricity_span = autograd.Variable( torch.from_numpy(anaphoricity_spans).type( torch.cuda.FloatTensor)) anaphoricity_feature = autograd.Variable( torch.from_numpy(anaphoricity_features).type( torch.cuda.FloatTensor)) gold += target.tolist() ana_gold += anaphoricity_target.tolist() output, _ = network_model.forward_all_pair( nnargs["word_embedding_dimention"], mention_index, mention_span, candi_index, candi_spans, pair_feature, anaphors, antecedents, 0.0) predict += output.data.cpu().numpy()[0].tolist() ana_output, _ = network_model.forward_anaphoricity( nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, 0.0) ana_predict += ana_output.data.cpu().numpy()[0].tolist() gold = numpy.array(gold, dtype=numpy.int32) predict = numpy.array(predict) best_results = { 'thresh': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0 } thresh_list = [0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6] for thresh in thresh_list: evaluation_results = get_metrics(gold, predict, thresh) if evaluation_results["f1"] >= best_results["f1"]: best_results = evaluation_results print "Pair accuracy: %f and Fscore: %f with thresh: %f"\ %(best_results["accuracy"],best_results["f1"],best_results["thresh"]) sys.stdout.flush() if best_results["f1"] >= all_best_results["f1"]: all_best_results = best_results print >> sys.stderr, "New High Result, Save Model" torch.save(network_model, model_save_dir + "network_model_pretrain.best.pair") sys.stdout.flush() ## output best print "In sum, anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\ %(best_results["accuracy"],best_results["f1"],best_results["thresh"]) sys.stdout.flush()
train, validation, test, verbose, path, file, modelf = main(path) n_epochs = len(train) n_iters = train[0].n_item input_size = 2 h1 = 256 h2 = 128 h3 = 64 output_size = 3 # fixed lr = 0.005 if verbose: print('###\tCreate the net') # create the net rnn = net.Network(input_size, n_iters, output_size, False, lr, h1, h2) criterion = nn.MSELoss() optimizer = optim.SGD(rnn.parameters(), lr) if verbose: print('###\tStart training') start = timer() losses = np.zeros(n_epochs) for epoch in range(n_epochs): inputs = [] targets = [] for i in range(len(train[epoch].items)): inputs.append(train[epoch].items[i][0])
def main(): data_dir = "../data/" os.makedirs(data_dir, exist_ok=True) np.set_printoptions(suppress=True) (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() x_train, y_train = sklearn.utils.shuffle(x_train, y_train) x_test, y_test = sklearn.utils.shuffle(x_test, y_test) logger = utilities.get_logger(os.path.join(data_dir, "mnist.html")) log_size = 10 # Log a few samples utilities.log_samples(logger, x_test[:log_size], y_test[:log_size]) # Reshape 28x28 matrices to vectors 784 elements vectors x_train_flat = x_train.reshape(60000, 784, 1) x_test_flat = x_test.reshape(10000, 784, 1) # ys are scalars, convert them to one-hot encoded vectors y_train_categorical = keras.utils.to_categorical(y_train, num_classes=10).reshape( 60000, 10, 1) y_test_categorical = keras.utils.to_categorical(y_test, num_classes=10).reshape( 10000, 10, 1) model = net.Network(layers=[784, 100, 50, 10]) # Log untrained model predictions log_predictions(logger, model, x_test[:log_size], y_test[:log_size], header="Untrained model") train_cost, train_accuracy = net.get_statistics(model, x_train_flat, y_train_categorical) print("Initial training cost: {:.3f}, training accuracy: {:.3f}".format( train_cost, train_accuracy)) test_cost, test_accuracy = net.get_statistics(model, x_test_flat, y_test_categorical) print("Initial test cost: {:.3f}, test accuracy: {:.3f}".format( test_cost, test_accuracy)) model.fit(x_train_flat, y_train_categorical, epochs=10, learning_rate=0.1, x_test=x_test_flat, y_test=y_test_categorical) # Log trained model predictions log_predictions(logger, model, x_test[:log_size], y_test[:log_size], header="Trained model")