def __init__(self, experiment, label_type, lr, weight_decay, num_directions, batch_size, num_epochs, threshold, hidden_size, dropout): self.experiment = experiment self.label_type = label_type self.lr = lr self.weight_decay = weight_decay self.num_directions = num_directions self.batch_size = batch_size self.num_epochs = num_epochs with open("public_data/vocab/word2id.pkl", 'rb') as infile: self.word2id = pickle.load(infile) self.train_dataset = BinaryPairDataset("train", label_type, threshold) self.valid_dataset = BinaryPairDataset("valid", label_type, threshold) self.orig_labels = self.train_dataset.orig_labels self.collection = IR_Dataset("train", label_type, threshold, "above_threshold") self.queries = IR_Dataset("valid", label_type, threshold, "above_threshold") with open("public_data/vocab/word2id.pkl", 'rb') as infile: self.word2id = pickle.load(infile) with open("public_data/embeddings/word_embeddings_%s.pkl" %self.experiment, 'rb') as infile: embeddings = pickle.load(infile) self.model = Encoder(embeddings=embeddings, dropout=dropout, hidden_size=hidden_size, num_directions=num_directions) if torch.cuda.is_available(): self.model = self.model.cuda() self.float = torch.cuda.FloatTensor self.long = torch.cuda.LongTensor else: self.float = torch.FloatTensor self.long = torch.LongTensor log('============ EXPERIMENT ID ============') id = str(uuid.uuid4())[:8] log("Files will be stored with id %s" % id) self.save_as = "_".join([self.experiment, label_type, str(threshold), "pair_cel", id]) with open("public_data/models/configuration_classifier_%s.pkl" % self.save_as, 'wb') as out: pickle.dump({"NUM_TRAIN": len(self.train_dataset.df), "NUM_VALID": len(self.valid_dataset.df), "DROPOUT": dropout, "HIDDEN_SIZE": hidden_size, "NUM_DIR": num_directions, "EMB_SHAPE": embeddings.shape, "LABEL_TYPE": label_type, "THRESHOLD": threshold, "LABELS": self.orig_labels}, out) self.run_training()
def setup(): bus = smbus.SMBus(1) slave_address = 0x07 enc_left = Encoder('left', 100, bus, slave_address) enc_right = Encoder('right', 100, bus, slave_address) drive_train = DriveTrain(enc_left, enc_right, bus, slave_address) ult_left = Ultrasonic('left', 22, 23) ult_front = Ultrasonic('front', 20, 21) ult_right = Ultrasonic('right', 18, 19) simple_filter = Filter(drive_train, ult_left, ult_front, ult_right, bus, slave_address) return simple_filter
def __init__(self, feature_dim, embed_dim, m_size, day, hour): super(STEMGEM, self).__init__() self.m_size = m_size self.graph = Encoder(feature_dim, embed_dim, m_size, hour) self.gat = GAT(nfeat=feature_dim, nhid=8, nclass=4, dropout=0.5, nheads=2, alpha=0.5) self.cnnday = CnnDay(m_size, day) self.cnnhour = CnnHour(m_size, hour) self.embed_size = 4 + 4 self.rnn = nn.GRU(self.embed_size, self.embed_size, 1) # self.rnn = self.rnn.cuda() self.w1 = nn.Parameter(torch.FloatTensor(m_size, m_size)) init.xavier_uniform_(self.w1) self.w2 = nn.Parameter(torch.FloatTensor(m_size, m_size)) init.xavier_uniform_(self.w2) self.w3 = nn.Parameter(torch.FloatTensor(m_size, m_size)) init.xavier_uniform_(self.w3) self.w4 = nn.Parameter(torch.FloatTensor(m_size, m_size)) init.xavier_uniform_(self.w4) self.tran_Matrix = nn.Parameter(torch.FloatTensor(2, self.embed_size)) init.xavier_uniform_(self.tran_Matrix) self.hn = nn.Parameter( torch.FloatTensor(1, self.m_size, self.embed_size)) init.xavier_uniform_(self.hn) self.loss_fn = torch.nn.MSELoss()
def home(request): message = None if request.method == "POST": print "POST parameters: ", request.POST print "Files: ", request.FILES # Build the form form = models.UploadModelForm(request.POST, request.FILES) if form.is_valid(): # Read the data and upload it to the location defined in UploadModel form.save() # Save the name of the uploaded file uploaded_filename = form.cleaned_data["filepicker_file"].name # Build the full input path to the file MEDIA_UPLOAD_ROOT = os.path.join(os.getcwd(), "media/uploads/") input_path = MEDIA_UPLOAD_ROOT + uploaded_filename # Build the output filename output_filename = uploaded_filename + "_transcoded.mkv" # Build the output path MEDIA_DOWNLOADS_ROOT = os.path.join(os.getcwd(), "media/finished/") # If the output folder doesn't exist, create it if not os.path.exists(MEDIA_DOWNLOADS_ROOT): os.makedirs(MEDIA_DOWNLOADS_ROOT) output_path = MEDIA_DOWNLOADS_ROOT + output_filename # Transcode the file video_encoder = Encoder(input_path, output_path) video_encoder.start() while video_encoder.alive(): print (video_encoder.get_time_elapsed()) time.sleep(0.1) # Return the path to the file message = output_filename else: message = None else: form = models.UploadModelForm() return render(request, "home.html", {"form": form, "message": message})
def main(): env = gym.make('TexasHoldem-v1', n_seats=N_PLAYERS, max_limit=100000, all_in_equity_reward=True, equity_steps=1000, debug=False) for i in range(N_PLAYERS): env.add_player(i, stack=2000) for p in env._seats: p.reset_stack() state = env.reset() #print(state) (player_states, (community_infos, community_cards)) = state #print('player states', player_states) #print('community info', community_infos) encoder = Encoder(N_PLAYERS, ranking_encoding=None) start_time = time.time() #community_info, players_info, community_cards, player_cards = encoder.encode(player_states, community_infos, community_cards, 0, concat=False) encoded_state = encoder.encode(player_states, community_infos, community_cards, 0) time_taken = time.time() - start_time print(time_taken) #print(encoded_state.shape) tr_env = TrainingEnv.build_environment('asd', N_PLAYERS) #print(tr_env.n_observation_dimensions) #print('Community Info:', community_info) #print('Players Info:', players_info) #print('Community Cards:', community_cards) #print('Player Cards:', player_cards) #print('Hand Rank:', hand_rank) #print(encoder.encode_slow(player_states, community_infos, community_cards, 0)) step = [[1, 0]] * N_PLAYERS #print(step) state, reward, done, info = env.step(step) (player_states, (community_infos, community_cards)) = state encoded_state = encoder.encode(player_states, community_infos, community_cards, 0) step = [[0, 0]] * N_PLAYERS state, reward, done, info = env.step(step) (player_states, (community_infos, community_cards)) = state encoded_state = encoder.encode(player_states, community_infos, community_cards, 0)
def load_model_and_configuration(model_name): """ Loads configuration and skeleton of trained model :param model_name: string ID of trained model :return: loaded model and configuration values """ with open("public_data/models/configuration_classifier_%s.pkl" % model_name, 'rb') as infile: configuration = pickle.load(infile) if "mucl" in model_name: model = Classifier(embeddings=np.zeros(configuration["EMB_SHAPE"]), num_classes=len(configuration["LABEL2ID"]), dropout=configuration["DROPOUT"], hidden_size=configuration["HIDDEN_SIZE"], num_directions=configuration["NUM_DIR"]) load_model(model_name, model) label2id = configuration["LABEL2ID"] labels = configuration["LABELS"] label_type = configuration["LABEL_TYPE"] return model, label_type, label2id, labels elif "pair_cel" in model_name: model = Encoder(embeddings=np.zeros(configuration["EMB_SHAPE"]), dropout=configuration["DROPOUT"], hidden_size=configuration["HIDDEN_SIZE"], num_directions=configuration["NUM_DIR"]) load_model(model_name, model) label_type = configuration["LABEL_TYPE"] threshold = configuration["THRESHOLD"] return model, label_type, threshold elif "triplet" in model_name: encoder = Encoder(embeddings=np.zeros(configuration["EMB_SHAPE"]), dropout=configuration["DROPOUT"], hidden_size=configuration["HIDDEN_SIZE"], num_directions=configuration["NUM_DIR"]) model = TripletEncoder(encoder) load_model(model_name, model) vfc_type = configuration["LABEL_TYPE"] threshold = configuration["THRESHOLD"] confidence = configuration["CONFIDENCE"] return model, vfc_type, threshold, confidence
def __init__(self): """ Instantiates arrays and encoder objects """ mh = mh = Adafruit_MotorHAT(addr=0x60) e = [None, None, None, None] for motor in xrange(0, 4): # Init encoders ePin = PINS[motor] if ePin is not None: e[motor] = Encoder(ePin) else: e[motor] = Encoder(-1) # Set GPIO pins for writing implement GPIO.setmode(GPIO.BCM) GPIO.setup(WRITINGPINS[0], GPIO.OUT) GPIO.setup(WRITINGPINS[1], GPIO.OUT) GPIO.setup(WRITINGPINS[2], GPIO.OUT) self.pwm = GPIO.PWM(WRITINGPINS[2], 490) self.pwm.start(0) self.encoders = e self.prevErrors = np.array([0.0, 0.0, 0.0, 0.0]) # Thread exit flags self.stopFlag = False self.currThread = None self.motors = [ mh.getMotor(1), mh.getMotor(2), mh.getMotor(3), mh.getMotor(4) ] atexit.register(self.stopMotors) # Writing implement starts off unactuated self.isWriting = False self.writingThread = None self._debug = 1
def run_cora(): np.random.seed(1) random.seed(1) num_nodes = 2708 feat_data, labels, adj_lists = load_cora() adj_score_list, adj_score_sum = findNeighbor(adj_lists) features = nn.Embedding(2708, 1433) features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False) # features.cuda() agg1 = MeanAggregator(adj_score_list, adj_score_sum, features, cuda=True) enc1 = Encoder(features, 1433, 128, adj_lists, agg1, gcn=True, cuda=False) agg2 = MeanAggregator(adj_score_list, adj_score_sum, lambda nodes: enc1(nodes).t(), cuda=False) enc2 = Encoder(lambda nodes: enc1(nodes).t(), enc1.embed_dim, 128, adj_lists, agg2, base_model=enc1, gcn=True, cuda=False) enc1.num_samples = 5 enc2.num_samples = 5 graphsage = SupervisedGraphSage(7, enc2) # graphsage.cuda() rand_indices = np.random.permutation(num_nodes) test = rand_indices[:1000] val = rand_indices[1000:1500] train = list(rand_indices[1500:]) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, graphsage.parameters()), lr=0.7) times = [] for batch in range(100): batch_nodes = train[:256] random.shuffle(train) start_time = time.time() optimizer.zero_grad() loss = graphsage.loss( batch_nodes, Variable(torch.LongTensor(labels[np.array(batch_nodes)]))) loss.backward() optimizer.step() end_time = time.time() times.append(end_time - start_time) print(batch, loss.data.item()) val_output = graphsage.forward(val) print( "Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro")) print("Average batch time:", np.mean(times))
def run_cora(): np.random.seed(1) random.seed(1) num_nodes = 2708 mini_batch_size = 256 feat_data, labels, adj_lists = load_cora() print(feat_data.shape) features = nn.Embedding(2708, 1433) features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False) # features.cuda() agg1 = MeanAggregator(features, cuda=True) enc1 = Encoder(features, 1433, 128, adj_lists, agg1, gcn=True, cuda=False) agg2 = MeanAggregator(lambda nodes : enc1(nodes).t(), cuda=False, gcn=True) enc2 = Encoder(lambda nodes : enc1(nodes).t(), enc1.embed_dim, 128, adj_lists, agg2, base_model=enc1, gcn=True, cuda=False) enc1.num_samples = 5 enc2.num_samples = 5 graphsage = SupervisedGraphSage(7, enc2) # graphsage.cuda() rand_indices = np.random.permutation(num_nodes) test = rand_indices[:1000] val = rand_indices[1000:1500] train = list(rand_indices[1500:]) optimizer = torch.optim.SGD(filter(lambda p : p.requires_grad, graphsage.parameters()), lr=0.7) times = [] num_train_nodes = len(train) mini_batches_iterator = minibatch_iter(num_train_nodes, mini_batch_size) n_epochs = 3 for epoch in range(n_epochs): # one epoch print("Start running epoch {0} / {1}".format(epoch + 1, n_epochs)) random.shuffle(train) mini_batches = iter(mini_batches_iterator) for start, end in mini_batches: batch_nodes = train[start:end] start_time = time.time() optimizer.zero_grad() loss = graphsage.loss(batch_nodes, Variable(torch.LongTensor(labels[np.array(batch_nodes)]))) loss.backward() optimizer.step() end_time = time.time() times.append(end_time-start_time) print("\t", start, end, loss.data.item()) val_output = graphsage.forward(val) print("Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro")) print("Average batch time:", np.mean(times) if len(times) else 0)
def main(): parser = argparse.ArgumentParser(description='Encode text file to binary') parser.add_argument('file', metavar='<file>') args = parser.parse_args() finput = args.file global model,encoder model = Model() encoder = Encoder() with open(finput) as f: for line in f: if doLine(line): break encoder.encode_symbol(model.EOF_SYMBOL, model.total_freq) encoder.done_encoding()
def __init__(self, env, other_players, n_seats, stacks=2500, encoding='norm', encoder=None, decoder=None, visualize=False, debug=False): assert len(other_players) == n_seats - 1 self.env = env self.other_players = other_players self.n_seats = n_seats self._debug = debug self._visualize = visualize self._encoder = encoder if not encoder is None else Encoder( n_seats, ranking_encoding=encoding) self._decoder = decoder if not decoder is None else Decoder() self._add_players(n_seats, stacks)
def main(): encoder_r = Encoder(pins.encoder_pin_r1, pins.encoder_pin_r2) position_r = -999 encoder_l = Encoder(pins.encoder_pin_l1, pins.encoder_pin_l2) position_l = -999 while(True): new_pos_r = -encoder_r.read() new_pos_l = encoder_l.read() if new_pos_r != position_r or new_pos_l != position_l: print("Position_r = {}".format(new_pos_r)) position_r = new_pos_r print("Position_l = {}".format(new_pos_l)) postion_l = new_pos_l sleep(1)
def home(request): message = None if request.method == "POST": print "POST parameters: ", request.POST print "Files: ", request.FILES # Build the form form = models.UploadModelForm(request.POST, request.FILES) if form.is_valid(): # Read the data and upload it to the location defined in UploadModel form.save() # Save the name of the uploaded file uploaded_filename = form.cleaned_data['filepicker_file'].name # Build the full input path to the file MEDIA_UPLOAD_ROOT = os.path.join(os.getcwd(), 'media/uploads/') input_path = MEDIA_UPLOAD_ROOT + uploaded_filename # Build the output filename output_filename = uploaded_filename + "_transcoded.mkv" # Build the output path MEDIA_DOWNLOADS_ROOT = os.path.join(os.getcwd(), 'media/finished/') # If the output folder doesn't exist, create it if not os.path.exists(MEDIA_DOWNLOADS_ROOT): os.makedirs(MEDIA_DOWNLOADS_ROOT) output_path = MEDIA_DOWNLOADS_ROOT + output_filename # Transcode the file video_encoder = Encoder(input_path, output_path) video_encoder.start() while video_encoder.alive(): print(video_encoder.get_time_elapsed()) time.sleep(0.1) # Return the path to the file message = output_filename else: message = None else: form = models.UploadModelForm() return render(request, "home.html", {'form': form, 'message': message})
def run_cora(): np.random.seed(1) random.seed(1) num_nodes = 2708 feat_data, labels, adj_lists, IDs = load_cora() print(feat_data[0]) #assert (0) features = nn.Embedding(2708, 1433) features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False) # features.cuda() agg1 = MeanAggregator(features, cuda=True) enc1 = Encoder(features, 1433, 128, adj_lists, agg1, cuda=False) print(enc1.embed_dim) #assert (0) agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=False) enc2 = Encoder(lambda nodes: enc1(nodes).t(), enc1.embed_dim, 128, adj_lists, agg2, base_model=enc1, cuda=False, text_label="Encoder_1") agg3 = MeanAggregator(lambda nodes: enc2(nodes).t(), cuda=False) enc3 = Encoder(lambda nodes: enc2(nodes).t(), enc2.embed_dim, 128, adj_lists, agg3, base_model=enc1, cuda=False, text_label="Encoder_2") enc1.num_samples = 5 enc2.num_samples = 5 enc3.num_samples = 5 graphsage = SupervisedGraphSage(7, enc2) # graphsage.cuda() rand_indices = range(num_nodes) test = rand_indices[2166:] val = rand_indices[2165:2166] train = list(rand_indices[:2165]) print(np.shape(train)) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, graphsage.parameters()), lr=0.6, weight_decay=4e-5) times = [] best_acc = 0 for batch in range(150): batch_nodes = train[:256] random.shuffle(train) start_time = time.time() optimizer.zero_grad() loss = graphsage.loss( batch_nodes, Variable(torch.LongTensor(labels[np.array(batch_nodes)]))) assert (0) loss.backward() optimizer.step() end_time = time.time() times.append(end_time - start_time) if batch % 25 == 0: val_output = graphsage.forward(val) val_pridict = val_output.data.numpy().argmax(axis=1) if best_acc < accuracy_score(labels[val], val_pridict): best_acc = accuracy_score(labels[val], val_pridict) if best_acc > 0.80: output = open('u6014942.csv', 'w') output.write('id,label\n') test_output = graphsage.forward(test) test_pridict = test_output.data.numpy().argmax(axis=1) print(np.shape(val_pridict)) cnt_output = 0 print(np.shape(IDs)) for i in list(test): output.write('%s,%s\n' % (IDs[i], test_pridict[cnt_output] + 1)) cnt_output = cnt_output + 1 print("batch", batch, " Validation accuracy:", accuracy_score(labels[val], val_pridict), "best_acc:", best_acc) #print (batch, loss.data[0]) output = open('u6014942_final.csv', 'w') output.write('id,label\n') test_output = graphsage.forward(test) test_pridict = test_output.data.numpy().argmax(axis=1) print(np.shape(val_pridict)) cnt_output = 0 print(np.shape(IDs)) for i in list(test): output.write('%s,%s\n' % (IDs[i], test_pridict[cnt_output] + 1)) cnt_output = cnt_output + 1 #print (val) #assert (0) #print ("Validation F1:", accuracy_score(labels[val],val_pridict)) print("Average batch time:", np.mean(times))
def run_graphsage(feat_data, labels, adj_lists, train, val, test, num_classes, model_class=SupervisedGraphSage): np.random.seed(1) random.seed(1) num_nodes = feat_data.shape[0] # feat_data, labels, adj_lists = load_cora() features = nn.Embedding(feat_data.shape[0], feat_data.shape[1]) features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False) if args.cuda: features.cuda() if model_class == SupervisedGraphSageConcat2: raise NotImplementedError() # The code seems to be not working... linear_embed_weights = nn.Parameter(torch.FloatTensor( feat_data.shape[1], args.hid_units), requires_grad=True) init.xavier_uniform(linear_embed_weights) features.weight = nn.Parameter( features.weight.mm(linear_embed_weights), requires_grad=False) agg1 = MeanAggregator(features, cuda=args.cuda, gcn=args.gcn_aggregator) enc1 = Encoder(features, features.weight.shape[1], args.hid_units, adj_lists, agg1, gcn=args.gcn_encoder, cuda=args.cuda) agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=args.cuda, gcn=args.gcn_aggregator) enc2 = Encoder(lambda nodes: enc1(nodes).t(), enc1.embed_dim, args.hid_units, adj_lists, agg2, base_model=enc1, gcn=args.gcn_encoder, cuda=args.cuda) enc1.num_samples = args.num_samples[0] enc2.num_samples = args.num_samples[1] if model_class == SupervisedGraphSageConcat: graphsage = model_class(num_classes, enc1, enc2) elif model_class == SupervisedGraphSageConcat2: graphsage = model_class(num_classes, enc1, enc2) else: graphsage = model_class(num_classes, enc2) if args.cuda: graphsage.cuda() optimizer = torch.optim.SGD( [p for p in graphsage.parameters() if p.requires_grad], lr=args.lr) times = [] record_dict = dict() best_val_record_dict = None for batch in range(args.epochs): batch_nodes = train[:args.batch_size] random.shuffle(train) start_time = time.time() optimizer.zero_grad() loss = graphsage.loss( batch_nodes, Variable(torch.LongTensor(labels[np.array(batch_nodes)]))) loss.backward() optimizer.step() end_time = time.time() times.append(end_time - start_time) train_acc = accuracy(graphsage.forward(train), labels[train]) val_acc = accuracy(graphsage.forward(val), labels[val]) test_acc = accuracy(graphsage.forward(test), labels[test]) print(batch, loss.data, train_acc, val_acc, test_acc) record_dict.update( dict(epoch=int(batch + 1), train_loss=float(loss.data), train_acc=float(train_acc), val_acc=float(val_acc), test_accuracy=float(test_acc), time=str(end_time - start_time), early_stopping=False)) if (best_val_record_dict is None) or (record_dict["val_acc"] >= best_val_record_dict["val_acc"]): best_val_record_dict = record_dict.copy() val_output = graphsage.forward(val) print( "Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro")) print("Average batch time:", np.mean(times)) print(best_val_record_dict) if args.use_signac: with open(job.fn("results.json"), "w") as f: json.dump(best_val_record_dict, f) print("Results recorded to {}".format(job.fn("results.json"))) job.data[f"correct_label"] = labels
def run_edgelist( name="chg-miner", edgelist_path="../data/chg-miner/chg-miner-graph.txt", label_path="../data/chg-miner/chg-miner-labels.txt", embedding_path="../poincare/embeddings/poincare_chg_miner_noburn.txt", # used to initialize + for distances embedding_header=False): feat_data, labels, adj_lists, num_nodes = load_edgelist( name, edgelist_path, label_path, embedding_path, embedding_header) features = nn.Embedding(num_nodes, feat_data.shape[1]) features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False) # network node_ordering_embeddings = load_embeddings(embedding_path, embedding_header) agg1 = MeanAggregator(features, cuda=True) enc1 = Encoder(features, feat_data.shape[1], 128, adj_lists, agg1, gcn=True, cuda=False, ordering_embeddings=None) agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=False) enc2 = Encoder(lambda nodes: enc1(nodes).t(), enc1.embed_dim, 128, adj_lists, agg2, base_model=enc1, gcn=True, cuda=False, ordering_embeddings=node_ordering_embeddings) # make sure we don't sample -- but change this later? enc1.num_sample = None enc2.num_sample = None graphsage = SupervisedGraphSage(max(labels)[0] + 1, enc2) rand_indices = np.random.permutation(num_nodes) test = rand_indices[:10] val = rand_indices[10:11] train = list(rand_indices[11:]) # 1 for email optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, graphsage.parameters()), lr=0.6) times = [] # embeds = None for batch in range(1000): batch_nodes = train[:256] random.shuffle(train) start_time = time.time() optimizer.zero_grad() loss = graphsage.loss( batch_nodes, Variable(torch.LongTensor(labels[np.array(batch_nodes)]))) # embeds = graphsage.embed(batch_nodes).detach().numpy() loss.backward() optimizer.step() end_time = time.time() times.append(end_time - start_time) print batch, loss.data[0] val_output = graphsage.forward(test) print "Test F1:", f1_score(labels[test], val_output.data.numpy().argmax(axis=1), average="macro") print "Test Accuracy:", accuracy_score( labels[test], val_output.data.numpy().argmax(axis=1)) print "Average batch time:", np.mean(times) out = open('embeddings/' + 'graphsage_' + edgelist_path.split('/')[-1], 'wb+') embeddings = graphsage.embed(np.arange(num_nodes)).detach().numpy() for i in range(0, embeddings.shape[0]): s = str(int(i)) + ' ' s += ' '.join([str(x) for x in embeddings[i]]) s += '\n' out.write(s) out.close()
for key in { 'num_data_points', 'vocab_size', 'max_ques_count', 'max_ques_len', 'max_ans_len' }: setattr(model_args, key, getattr(dataset, key)) # iterations per epoch setattr(args, 'iter_per_epoch', math.ceil(dataset.num_data_points['train'] / args.batch_size)) print("{} iter per epoch.".format(args.iter_per_epoch)) # ---------------------------------------------------------------------------- # setup the model # ---------------------------------------------------------------------------- encoder = Encoder(model_args) decoder = Decoder(model_args, encoder) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=args.lr) scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=args.lr_decay_rate) if args.load_path != '': encoder.load_state_dict(components['encoder']) decoder.load_state_dict(components['decoder']) optimizer.load_state_dict(components['optimizer']) # cuda enabled optimizer, see: for state in optimizer.state.values(): for k, v in state.items():
# Clean up encoders encoder.cleanup() # Clean up distance sensors tof.cleanup() print("Exiting") exit() # Attach the Ctrl+C signal interrupt signal.signal(signal.SIGINT, ctrlC) # Setup encoder encoder = Encoder() encoder.initEncoders() # Setup motor control motorControl = MotorControl(encoder) orientation = Orientation(encoder, motorControl) tof = TOF() pid = PID(0.5, -6, 6) try: with open('calibratedSpeeds.json') as json_file: motorControl.speedMap = json.load(json_file)
def run_cora(): np.random.seed(1) random.seed(1) # load data num_nodes = 2708 feat_data, labels, adj_lists = load_cora() train, test, val = split_data(labels) # construct model ## layer1 : Embedding layer features = nn.Embedding(feat_data.shape[0], feat_data.shape[1]) features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False) #features.cuda() ## layer2 : Sample and Aggregate 1433->128 agg1 = MeanAggregator(features, cuda=True) enc1 = Encoder(features, 1433, 128, adj_lists, agg1, gcn=True, cuda=False) ## layer3 : Sample and Aggregate 128->128 agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=False) enc2 = Encoder(lambda nodes: enc1(nodes).t(), enc1.embed_dim, 128, adj_lists, agg2, base_model=enc1, gcn=True, cuda=False) ## layer4 : Classification layer enc1.num_samples = 5 enc2.num_samples = 5 graphsage = SupervisedGraphSage(7, enc2) # optimizer optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, graphsage.parameters()), lr=0.7) times = [] for batch in range(100): batch_nodes = train[:256] random.shuffle(train) start_time = time.time() optimizer.zero_grad() loss = graphsage.loss( batch_nodes, Variable(torch.LongTensor(labels[np.array(batch_nodes)]))) loss.backward() optimizer.step() end_time = time.time() times.append(end_time - start_time) if batch % 10 == 0: print batch, float(loss.data) print 'Finished training.' print '******************************' test_output = graphsage.forward(test) test_onehot = test_output.data.numpy() test_labels = labels[test] test_preds = np.argmax(test_onehot, axis=1) print 'Test Accuracy:', accuracy_score(test_labels, test_preds) print 'Average batch time: ', np.mean(times)
# transfer some useful args from dataloader to model for key in { 'num_data_points', 'vocab_size', 'max_ques_count', 'max_ques_len', 'max_ans_len' }: setattr(model_args, key, getattr(dataset, key)) # iterations per epoch setattr(args, 'iter_per_epoch', math.floor(dataset.num_data_points['train'] / args.batch_size)) print("{} iter per epoch.".format(args.iter_per_epoch)) # ---------------------------------------------------------------------------- # setup the model # ---------------------------------------------------------------------------- encoder = Encoder(model_args) decoder = Decoder(model_args, encoder) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=args.lr, weight_decay=args.weight_decay) encoder.word_embed.init_embedding('data/glove/glove6b_init_300d_1.0.npy') start_epoch = 0 if args.load_path != '': components = torch.load(args.load_path) encoder.load_state_dict(components.get('encoder', components)) decoder.load_state_dict(components.get('decoder', components)) optimizer.load_state_dict(components.get('optimizer', components)) start_epoch = components['epoch'] print("Loaded model from {}".format(args.load_path))
# ---------------------------------------------------------------------------- dataset = VisDialDataset(args, [args.split]) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False, collate_fn=dataset.collate_fn) # iterations per epoch setattr(args, 'iter_per_epoch', math.floor(dataset.num_data_points[args.split] / args.batch_size)) print("{} iter per epoch.".format(args.iter_per_epoch)) # ---------------------------------------------------------------------------- # setup the model encoder = Encoder(model_args) decoder = Decoder(model_args, encoder) encoder = nn.DataParallel(encoder).cuda() decoder = nn.DataParallel(decoder).cuda() encoder.load_state_dict(components.get('encoder', components)) decoder.load_state_dict(components.get('decoder', components)) print("Loaded model from {}".format(args.load_path)) if args.gpuid >= 0: encoder = encoder.cuda() decoder = decoder.cuda() # ---------------------------------------------------------------------------- # evaluation
class Trainer: def __init__(self, experiment, label_type, lr, weight_decay, num_directions, batch_size, num_epochs, threshold, hidden_size, dropout): self.experiment = experiment self.label_type = label_type self.lr = lr self.weight_decay = weight_decay self.num_directions = num_directions self.batch_size = batch_size self.num_epochs = num_epochs with open("public_data/vocab/word2id.pkl", 'rb') as infile: self.word2id = pickle.load(infile) self.train_dataset = BinaryPairDataset("train", label_type, threshold) self.valid_dataset = BinaryPairDataset("valid", label_type, threshold) self.orig_labels = self.train_dataset.orig_labels self.collection = IR_Dataset("train", label_type, threshold, "above_threshold") self.queries = IR_Dataset("valid", label_type, threshold, "above_threshold") with open("public_data/vocab/word2id.pkl", 'rb') as infile: self.word2id = pickle.load(infile) with open("public_data/embeddings/word_embeddings_%s.pkl" %self.experiment, 'rb') as infile: embeddings = pickle.load(infile) self.model = Encoder(embeddings=embeddings, dropout=dropout, hidden_size=hidden_size, num_directions=num_directions) if torch.cuda.is_available(): self.model = self.model.cuda() self.float = torch.cuda.FloatTensor self.long = torch.cuda.LongTensor else: self.float = torch.FloatTensor self.long = torch.LongTensor log('============ EXPERIMENT ID ============') id = str(uuid.uuid4())[:8] log("Files will be stored with id %s" % id) self.save_as = "_".join([self.experiment, label_type, str(threshold), "pair_cel", id]) with open("public_data/models/configuration_classifier_%s.pkl" % self.save_as, 'wb') as out: pickle.dump({"NUM_TRAIN": len(self.train_dataset.df), "NUM_VALID": len(self.valid_dataset.df), "DROPOUT": dropout, "HIDDEN_SIZE": hidden_size, "NUM_DIR": num_directions, "EMB_SHAPE": embeddings.shape, "LABEL_TYPE": label_type, "THRESHOLD": threshold, "LABELS": self.orig_labels}, out) self.run_training() def train(self, train_loader): """ Trains the model :param train_loader: training documents :return: training acc & loss """ self.model.train() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, self.model.parameters()), lr=self.lr, weight_decay=self.weight_decay) loss_func = torch.nn.CosineEmbeddingLoss() sum_loss = 0 for batch_id, (text_a, text_b, label) in enumerate(train_loader): last_hidden_a = self.model(text_a.type(self.long)) last_hidden_b = self.model(text_b.type(self.long)) for i in range(len(label)): if label[i] == 0.0: label[i] = -1.0 cur_loss = loss_func(last_hidden_a, last_hidden_b, label.type(self.float)) sum_loss += cur_loss.data cur_loss.backward() optimizer.step() optimizer.zero_grad() train_loss = sum_loss/len(self.train_dataset.df) return train_loss def validate(self, valid_loader): """ Validates model :param valid_loader: validation documents :return: validation acc & loss """ self.model.eval() loss_func = torch.nn.CosineEmbeddingLoss() sum_loss = 0 for batch_id, (text_a, text_b, label) in enumerate(valid_loader): last_hidden_a = self.model(text_a.type(self.long)) last_hidden_b = self.model(text_b.type(self.long)) for i in range(len(label)): if label[i] == 0.0: label[i] = -1.0 cur_loss = loss_func(last_hidden_a, last_hidden_b, label.type(self.float)) sum_loss += cur_loss.data valid_loss = sum_loss / len(self.valid_dataset.df) return valid_loss def validate_ir(self, collection_loader, queries_loader): """ Computes MAP on last hidden states during classifier training :param collection_loader: documents in the collection :param queries_loader: query documents :return: mean average precision """ collection = extract_from_model(doc_repr="hidden", sequences=collection_loader, model=self.model) queries = extract_from_model(doc_repr="hidden", sequences=queries_loader, model=self.model) index = build_index(collection=collection) top_doc_ids, _ = search_index(index=index, k=len(self.collection.df), queries=queries) map = compute_mean_avg_prec(top_doc_ids=top_doc_ids, train_df=self.collection.df, test_df=self.queries.df, label_type=self.label_type) return map def run_training(self): """ Runs the training and validation, computes the MAP, plots the results """ train_loader = DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True) valid_loader = DataLoader(self.valid_dataset, batch_size=self.batch_size, shuffle=True) collection_loader = DataLoader(self.collection, batch_size=64, shuffle=False) queries_loader = DataLoader(self.queries, batch_size=64, shuffle=False) log('============ IR DATA ============') log("Corpus examples: %d, query examples: %d" %(len(self.collection.df), len(self.queries.df))) log('============ TRAINING & VALIDATION ============') log("%d original classes, %d training examples, %d validation examples" % ( len(self.orig_labels), len(self.train_dataset.df), len(self.valid_dataset.df))) train_losses = [] valid_losses = [] mean_avg_precs = [] best_map = -1 for epoch in range(self.num_epochs): log("Epoch: %d/%d ..." % (epoch, self.num_epochs)) train_loss = self.train(train_loader) valid_loss = self.validate(valid_loader) train_losses.append(train_loss) valid_losses.append(valid_loss) map = self.validate_ir(collection_loader, queries_loader) mean_avg_precs.append(map) if map > best_map: best_map = map torch.save(self.model.state_dict(), "public_data/models/experiment_%s/classifier_%s.pkl" % (self.save_as.split("_")[0], self.save_as)) log("TrainLoss: %.3f " %train_loss + "ValidLoss: %.3f " %valid_loss + "MAP: %.3f " %map ) plot_training(train_losses, valid_losses, "loss", "_".join([self.save_as, "loss"])) plot_ir(values=mean_avg_precs, save_as=self.save_as, title="Mean Average Precision", name="map") return train_losses, valid_losses
def run_data(input_node, input_edge_train, input_edge_test, output_file, name): feat_data, edge_train, label_train, edge_test, label_test, adj_lists, adj_time = load_data( input_node, input_edge_train, input_edge_test) print("Finish Loading Data") features = nn.Embedding(len(feat_data), 1000) features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False) #features.cuda() agg1 = MeanAggregator(features, cuda=True) enc1 = Encoder(features, 1000, dimension, adj_lists, adj_time, agg1, gcn=True, cuda=False) agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=False) enc2 = Encoder(lambda nodes: enc1(nodes).t(), enc1.embed_dim, dimension, adj_lists, adj_time, agg2, base_model=enc1, gcn=True, cuda=False) enc1.num_samples = 5 enc2.num_samples = 5 enc2.last = True graphsage = SupervisedGraphSage(2, enc2, name) #graphsage.cuda() #f=open('result_test'+name+'_'+file+"_"+str(dimension),'a+') f = open(output_file, 'a+') f.write("Training\n") #f.close() for epoch in range(0, 20): #f = open('result_test'+name+'_'+file+"_"+str(dimension), 'a+') optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, graphsage.parameters()), lr=0.7) optimizer.zero_grad() f.write("epoch " + str(epoch) + "\n") loss, predict_y = graphsage.loss( Variable(torch.LongTensor(label_train)), agg1, agg2, edge_train) print("AUC: " + str(metrics.roc_auc_score(label_train, predict_y)) + "\n") f.write("AUC: " + str(metrics.roc_auc_score(label_train, predict_y)) + "\n") loss.backward() optimizer.step() #f.close() #gc.collect() #f = open('result_test'+name+'_'+file+"_"+str(dimension), 'a+') f.write("Testing\n") loss, predict_y = graphsage.loss(Variable(torch.LongTensor(label_test)), agg1, agg2, edge_test) f.write("AUC: " + str(metrics.roc_auc_score(label_test, predict_y)) + "\n") predict_y1 = [] for i in range(0, len(predict_y)): if predict_y[i] > 0.5: predict_y1.append(1) else: predict_y1.append(0) f.write("Micro-f1 score: " + str(metrics.f1_score(label_test, predict_y1, average="micro")) + "\n") f.write("Macro-f1 score: " + str(metrics.f1_score(label_test, predict_y1, average="macro")) + "\n") f.write("recall: " + str(metrics.recall_score(label_test, predict_y1)) + "\n") f.close()
def train_batch(solver_base, data_loader, total_loss, rep, epoch, model_list, device, batch_replication, hidden_dimension, feature_dim, train_graph_recurrence_num, train_outer_recurrence_num, use_cuda=True, is_train=True, randomized=True): # np.random.seed(1) random.seed(1) '''优化参数列表''' optim_list = [{ 'params': filter(lambda p: p.requires_grad, model.parameters()) } for model in model_list] total_example_num = 0 '''# 下标起始位置为1,每次读入为 dalaloader 里的一项''' for (j, data) in enumerate(data_loader, 1): segment_num = len(data[0]) print('Train CNF:', j) for seg in range(segment_num): '''将读进来的data放进gpu''' (graph_map, batch_variable_map, batch_function_map, edge_feature, graph_feat, label, answers, var, func) = [_to_cuda(d[seg], use_cuda, device) for d in data] total_example_num += (batch_variable_map.max() + 1) sat_problem = SATProblem( (graph_map, batch_variable_map, batch_function_map, edge_feature, answers, None), device, batch_replication) loss = torch.zeros(1, device=device, requires_grad=False) # '''将所有CNF的答案拼接起来, 有解才执行 graphSage 模型''' # if len(answers[0].flatten()) > 0: # answers = np.concatenate(answers, axis = 0) '''展开所有子句的变量(绝对值)''' variable_map = torch.cat( ((torch.abs(sat_problem.nodes).to(torch.long) - 1).reshape( 1, -1), graph_map[1].to(torch.long).reshape(1, -1)), dim=0) '''feat_data 为输入 CNF 的[变量, 子句]矩阵''' feat_data = torch.sparse.FloatTensor( variable_map, edge_feature.squeeze(1), torch.Size([sum(var), sum(func)])).to_dense() # feat_data = feat_data[np.argwhere(torch.sum(torch.abs(feat_data), 1) > 0)[0]] num_nodes_x = feat_data.shape[0] num_nodes_y = feat_data.shape[1] '''编码读入的数据''' features = nn.Embedding(num_nodes_x, num_nodes_y) '''用sp模型初始化的data作为特征值的权重''' features.weight = nn.Parameter(feat_data, requires_grad=False) if use_cuda: features = features.cuda() agg1 = MeanAggregator(features, device=device) enc1 = Encoder(device, features, num_nodes_y, sat_problem._edge_num, sat_problem.adj_lists, sat_problem.node_adj_lists, agg1, gru=True) agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), device=device) enc2 = Encoder(device, lambda nodes: enc1(nodes).t(), enc1.embed_dim, sat_problem._edge_num, sat_problem.adj_lists, sat_problem.node_adj_lists, agg2, base_model=enc1, gru=False) enc1.num_samples = 15 enc2.num_samples = 5 graphsage = SupervisedGraphSage(device, hidden_dimension, feature_dim, enc2, 'sp-nueral') '''优化参数列表增加graphSAGE模型参数''' optim_list.append({ 'params': filter(lambda p: p.requires_grad, graphsage.parameters()) }) optimizer = torch.optim.SGD(optim_list, lr=0.3, weight_decay=0.01) optimizer.zero_grad() nodes = [i for i in range(sat_problem._variable_num)] # sample_length = int(len(nodes)/train_outer_recurrence_num) for i in range(train_graph_recurrence_num): loss += graphsage.loss(nodes, sat_problem, i < (train_graph_recurrence_num - 1)) # else: # # optimizer = torch.optim.SGD(optim_list, lr = 0.3, weight_decay = 0.01) # optimizer = torch.optim.Adam(optim_list, lr = 0.3, weight_decay = 0.01) # optimizer.zero_grad() for (k, model) in enumerate(model_list): '''初始化变量state, 可选择随机是否随机初始化 其中 batch_replication 表示同一个CNF数据重复次数''' state = _module(model).get_init_state(graph_map, randomized, batch_replication) '''train_outer_recurrence_num 代表同一组数据重新训练的次数, loss叠加''' for i in torch.arange(train_outer_recurrence_num, dtype=torch.int32, device=device): variable_prediction, state = model(init_state=state, sat_problem=sat_problem, is_training=True) '''计算 sp_aggregator 的loss''' # loss += model.compute_loss(is_train, variable_prediction, label, sat_problem._graph_map, # sat_problem._batch_variable_map, sat_problem._batch_function_map, # sat_problem._edge_feature, sat_problem._meta_data) loss += solver_base._compute_loss(_module(model), None, is_train, variable_prediction, label, sat_problem) for p in variable_prediction: del p for s in state: del s print('rep: %d, epoch: %d, data segment: %d, loss: %f' % (rep, epoch, seg, loss)) total_loss[k] += loss.detach().cpu().numpy() loss.backward() optimizer.step() for model in model_list: _module(model)._global_step += 1 del graph_map del batch_variable_map del batch_function_map del graph_feat del label del edge_feature return total_loss / total_example_num.cpu().numpy()
def test_pid(kp, ki, kd, stime, use_pid=True): left_encoder = Encoder(motor_pin_a=cfg.LEFT_MOTOR_PIN_A, motor_pin_b=cfg.LEFT_MOTOR_PIN_B, sampling_time_s=stime) left_encoder_counter = EncoderCounter(encoder=left_encoder) left_encoder_counter.start() pid_controller = PIDController(proportional_coef=kp, integral_coef=ki, derivative_coef=kd, windup_guard=cfg.PID_WINDUP_GUARD, current_time=None) max_steps_velocity_sts = cfg.ENCODER_RESOLUTION * cfg.MAX_ANGULAR_VELOCITY_RPM / 60 kit = MotorKit(0x40) left_motor = kit.motor1 velocity_levels = [1000, 2000, 3000, 4000, 5000, 6000, 0] velocity_levels = [3000, 0] sleep_time = 5 velocities_level_records = deque([]) velocities_steps_records = deque([]) pid_velocities_steps_records = deque([]) timestamps_records = deque([]) for v in velocity_levels: pid_controller.reset() pid_controller.set_set_point(v) left_motor.throttle = max(min(1, v / max_steps_velocity_sts), 0) start = time.time() current_time = time.time() while current_time - start < sleep_time: timestamp, measured_steps_velocity_sts = left_encoder_counter.get_velocity( ) # PID control if use_pid: new_steps_velocity_sts = pid_controller.update( -measured_steps_velocity_sts, current_time) left_motor.throttle = max( min(1, new_steps_velocity_sts / max_steps_velocity_sts), 0) else: new_steps_velocity_sts = -1 velocities_level_records.append(v) velocities_steps_records.append(-measured_steps_velocity_sts) pid_velocities_steps_records.append(new_steps_velocity_sts) timestamps_records.append(timestamp) current_time = time.time() left_motor.throttle = 0 left_encoder_counter.finish() records_left = pd.DataFrame({ 'velocity_steps': velocities_steps_records, 'velocity_levels': velocities_level_records, 'pid_velocity_steps': pid_velocities_steps_records, 'timestamp': timestamps_records }) records_left['velocity_ms'] = records_left[ 'velocity_steps'] * cfg.WHEEL_DIAMETER_MM * np.pi / ( 1000 * cfg.ENCODER_RESOLUTION) records_left.set_index('timestamp', drop=True) return records_left
def run_model(self): np.random.seed(1) random.seed(1) # feat_data, labels, adj_lists = load_cora() features = nn.Embedding(self.num_nodes, self.num_feats) features.weight = nn.Parameter(torch.FloatTensor(self.feat_data), requires_grad=False) print('Features weight initialized') # features.cuda() if self.if_cuda: features = features.cuda() agg1 = MeanAggregator(features, cuda=self.if_cuda) print('Agg 1 Initialized') enc1 = Encoder(features, self.num_feats, 128, self.adj_lists, agg1, gcn=True, cuda=self.if_cuda) print('Encoder 1 Initialized') agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=self.if_cuda) print('Agg 2 Initialized') enc2 = Encoder(lambda nodes: enc1(nodes).t(), enc1.embed_dim, 64, self.adj_lists, agg2, base_model=enc1, gcn=True, cuda=self.if_cuda) print('Encoder 2 Initialized') enc1.num_sample = 6 enc2.num_sample = 4 graphsage = SupervisedGraphSage(enc2) print('Model is Initialized') print('Model Weights : ') print(enc1.weight) print(enc2.weight) print('End') # graphsage.cuda() train_dataset = Question_Ans(self.df, mode='train', umap=self.user_map, qmap=self.question_map) val_dataset = Question_Ans(self.df, mode='val', umap=self.user_map, qmap=self.question_map) print('Dataloader Class Called') train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=self.batch_size, shuffle=True) val_dataloader = torch.utils.data.DataLoader( val_dataset, batch_size=self.batch_size, shuffle=False) print('Dataloaded') # rand_indices = np.random.permutation(num_nodes) # test = rand_indices[:1000] # val = rand_indices[1000:1500] # train = list(rand_indices[1500:]) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, graphsage.parameters()), lr=self.lr) times = [] for epoch in range(self.num_epochs): phase = 'train' batch = 0 # print('Printing Num Samples') # print('Enc2 : ', graphsage.enc.num_sample) # print('Enc2 features : ', graphsage.enc.features) # print('Hey') # print('Enc1 : ', graphsage.enc..num_samples) running_loss = 0 tk0 = tqdm(train_dataloader, total=int(len(train_dataloader))) confusion_matrix_train = [[0, 0], [0, 0]] for questions, users, ans in tk0: batch += 1 # batch_nodes = train[:256] # random.shuffle(train) start_time = time.time() optimizer.zero_grad() if (self.if_cuda): ans = ans.type(torch.cuda.FloatTensor) else: ans = ans.type(torch.FloatTensor) # print(questions,users) loss, preds = graphsage.loss(questions, users, ans) for i, x in enumerate(preds): confusion_matrix_train[int(preds[i])][int(ans[i])] += 1 metrics = get_metrics(confusion_matrix_train) loss.backward() optimizer.step() end_time = time.time() times.append(end_time - start_time) running_loss += loss.data tk0.set_postfix(loss=(running_loss / (batch * train_dataloader.batch_size)), suffix=str(metrics)) # tk0.set_postfix(suffix=str(metrics)) if (batch % 1000 == 0): print(confusion_matrix_train) val_losses = [] batch = 0 running_loss = 0 confusion_matrix_val = [[0, 0], [0, 0]] tk1 = tqdm(val_dataloader, total=int(len(val_dataloader))) for questions, users, ans in tk1: batch += 1 # batch_nodes = train[:256] # random.shuffle(train) start_time = time.time() optimizer.zero_grad() loss, preds = graphsage.loss(questions, users, ans) for i, x in enumerate(preds): confusion_matrix_val[int(preds[i])][int(ans[i])] += 1 metrics = get_metrics(confusion_matrix_val) val_losses.append(loss) # loss.backward() # optimizer.step() end_time = time.time() times.append(end_time - start_time) running_loss += loss.data tk1.set_postfix(loss=(running_loss / (batch * val_dataloader.batch_size)), suffix=str(metrics)) # tk1.set_postfix(suffix=str(metrics)) if (batch % 1000 == 0): print(confusion_matrix_val) # val_output = graphsage.l(val) # print("Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro")) # print("Average batch time:", np.mean(times)) return val_losses, graphsage
dataset = VisDialDataset(args, [args.split]) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False, collate_fn=dataset.collate_fn) # iterations per epoch setattr(args, 'iter_per_epoch', math.ceil(dataset.num_data_points[args.split] / args.batch_size)) print("{} iter per epoch.".format(args.iter_per_epoch)) # ---------------------------------------------------------------------------- # setup the model # ---------------------------------------------------------------------------- encoder = Encoder(model_args) encoder.load_state_dict(components['encoder']) decoder = Decoder(model_args, encoder) decoder.load_state_dict(components['decoder']) print("Loaded model from {}".format(args.load_path)) if args.gpuid >= 0: encoder = encoder.cuda() decoder = decoder.cuda() # ---------------------------------------------------------------------------- # evaluation # ---------------------------------------------------------------------------- print("Evaluation start time: {}".format(
pad_id = ConstituentDataset.encoder.tokenizer.pad_token_id batch_size = len(sents) padded_sents = pad_id * torch.ones(batch_size, max_length).long() for i, sent in enumerate(sents): padded_sents[i, :sent.shape[1]] = sent[0, :] spans = torch.cat(spans, dim=0) one_hot_labels = torch.zeros(batch_size, len(ConstituentDataset.label_dict)).long() for i, item in enumerate(labels): for l in item: one_hot_labels[i, l] = 1 return padded_sents, spans, one_hot_labels # unit test if __name__ == '__main__': from torch.utils.data import DataLoader from encoders import Encoder encoder = Encoder('bert', 'base', True) for split in ['train', 'development', 'test']: dataset = ConstituentDataset( f'tasks/constituent/data/edges/ontonotes/const/' f'debug/{split}.json', encoder) data_loader = DataLoader(dataset, 64, collate_fn=collate_fn) for sents, spans, labels in data_loader: pass print(f'Split "{split}" has passed the unit test ' f'with {len(dataset)} instances.') from IPython import embed embed(using=False)