def RunTrain(isSave=False): # MNIST dataset train_dataset = torchvision.datasets.MNIST(root='/data', train=True, transform=transforms.ToTensor(), download=True) # Data loader train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # cnn model model = cnn.ConvNet(output_classes).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Training mnist_trainer = trainer.Trainer(train_loader, model=model, cri=criterion, opt=optimizer, device=device) mnist_trainer.Execute(epochs) trained_model = mnist_trainer.GetModel() if isSave == True: trainer.SaveModel() return trained_model
def main(): print("Testing...") #CNN_Model '''initialization''' tf.reset_default_graph() sess = tf.InteractiveSession() # Forward propagation model = CNN() #data reading data_reader = DataReader() #intialize saving saver = tf.train.Saver() init = tf.global_variables_initializer() sess.run(init) #restoring the model if RESTORE_FROM is not None: saver.restore(sess, os.getcwd() + '\\' + LOGDIR + '\\' + RESTORE_FROM) print('Model restored from ' + os.getcwd() + '\\' + LOGDIR + '\\' + RESTORE_FROM) count = 0 example_num = 50799 # num of exapmles feature1 = np.zeros((example_num, 1452)) feature2 = np.zeros((example_num, 192)) feature3 = np.zeros((example_num, 100)) label = np.zeros((example_num, 1)) image = np.zeros((1, 25, 25, 1)) for i in range(0, example_num): image = scipy.misc.imread(data_reader.test_xs[i]) image = image.reshape(1, 25, 25, 1) / 255.0 output1 = model.pool1.eval(feed_dict={model.x: image}) feature1[i, :] = np.reshape(output1, [-1, 1452]) output2 = model.pool2.eval(feed_dict={model.x: image}) feature2[i, :] = np.reshape(output2, [-1, 192]) output3 = model.h_conv3_flat.eval(feed_dict={model.x: image}) feature3[i, :] = output3 label[i] = data_reader.test_ys[i] count = count + 1 print(count) scipy.io.savemat(os.getcwd() + '\\' + 'feature1_test.mat', mdict={'arr': feature1}) scipy.io.savemat(os.getcwd() + '\\' + 'feature2_test.mat', mdict={'arr': feature2}) scipy.io.savemat(os.getcwd() + '\\' + 'feature3_test.mat', mdict={'arr': feature3}) scipy.io.savemat(os.getcwd() + '\\' + 'label_test.mat', mdict={'arr': label})
def LoadModel(self, model_path): output_classes = 10 self.__model = CNN.ConvNet(output_classes) self.__model.load_state_dict( torch.load(model_path, map_location=device))
num_workers=4) classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9') def imshow(img): img = img / 2 + 0.5 npimg = img.numpy() plt.imshow(np.transpose(npimg, (1, 2, 0))) use_cuda = torch.cuda.is_available() criterion = nn.CrossEntropyLoss() #backpropagation method cnn = CNN() learning_rate = 1e-3 optimizer = optim.Adam(cnn.parameters(), lr=learning_rate) #hyper-parameters num_epochs = 2 num_batches = len(trn_loader) trn_loss_list = [] val_loss_list = [] for epoch in range(num_epochs): trn_loss = 0.0 for i, data in enumerate(trn_loader): x, label = data if use_cuda: x = x.cuda()
batch_size=batch_size, train_data_part=train_data_part) test(test_loader, epoch, test_data_part=test_data_part) if __name__ == '__main__': start_time = time.perf_counter() torch.manual_seed(0) np.random.seed(0) random.seed(0) lr = 0.0001 batch_size = 2 # 2, test_loader = load_data_qmnist(batch_size) train_loader, test_loader = load_data(batch_size) # show_ground_truth(train_loader.dataset) model = CNN() print(model) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr) log_interval = -10 epochs = 1 train_data_part = 0.02 test_data_part = 0.02 main() over_time = time.perf_counter() print('Time Cost: {:.1f}'.format(over_time - start_time))
args = parser.parse_args() epoch = args.epoch lr = args.lr ## load dataset & dataloader train = tv.datasets.MNIST(root='./data/', train=True, transform=transforms.ToTensor(), download=True) test = tv.datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True) train_loader = torch.utils.data.DataLoader(train, 100, True) test_loader = torch.utils.data.DataLoader(train, 100, False) ## model object model = CNN() model.cuda() print(model) ## loss & optimizer loss_type = nn.CrossEntropyLoss() loss_type.cuda() optimizer = SGD(model.parameters(), lr) #0.005, 0.01 Loss, Acc = Train(epoch, train_loader, model, loss_type, optimizer) Test(test_loader, model, loss_type, optimizer) Visualize(Loss, Acc)
def main(): print("Start training Model...") '''initialization''' tf.reset_default_graph() sess = tf.InteractiveSession() # Forward propagation model = CNN() # To keep track of the cost costs = [] # Cost function with tf.name_scope('loss'): loss = compute_cost(model.y, model.ytruth) loss = tf.reduce_mean(loss) #data reading data_reader = DataReader() #calculate number of iterations per epoch NUM_BATCHES_PER_EPOCH = int(data_reader.num_images / BATCH_SIZE) print('Num of batches per epoch :', NUM_BATCHES_PER_EPOCH) NUM_TEST_DATA = int(data_reader.total_test / TEST_SIZE) NUM_STEPS = NUM_BATCHES_PER_EPOCH * EPOCH_NUM print("Total No. of iterations :", NUM_STEPS) # Optimizer with tf.name_scope('adam'): train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss) #prediction correct_prediction = tf.equal(tf.argmax(model.y, 1), tf.argmax(model.ytruth, 1)) with tf.name_scope('accuracy'): correct_prediction = tf.cast(correct_prediction, tf.float32) accuracy = tf.reduce_mean(correct_prediction) #intialize saving saver = tf.train.Saver() min_loss = 1.0 start = 0 init = tf.global_variables_initializer() sess.run(init) steps = 0 test_error = 0 #restoring the model if RESTORE_FROM is not None: saver.restore(sess, os.getcwd() + '\\' + LOGDIR + '\\' + RESTORE_FROM) print('Model restored from ' + os.getcwd() + '\\' + LOGDIR + '\\' + RESTORE_FROM) for epoch in range(EPOCH_NUM): minibatch_cost = 0.0 for i in range(START_STEP, NUM_BATCHES_PER_EPOCH): start = time.time() steps += 1 #get minibatch xs, ys = data_reader.load_train_batch(BATCH_SIZE) # run session _, temp_cost = sess.run([train_step, loss], feed_dict={ model.x: xs, model.ytruth: ys }) #evauate train error train_error = loss.eval(feed_dict={model.x: xs, model.ytruth: ys}) #evaluate average error per epoch minibatch_cost += temp_cost / NUM_BATCHES_PER_EPOCH #evaluate train accuracy train_accuracy = accuracy.eval({model.x: xs, model.ytruth: ys}) end = time.time() elapsed = end - start print("Step%d [Train Loss= %g ,Accuracy= %g, elapse= %g min]" % (steps, train_error, train_accuracy * 100, elapsed * (NUM_STEPS - steps) / 60)) if steps % 100 == 0 or steps == NUM_BATCHES_PER_EPOCH * EPOCH_NUM - 1: test_cost = 0.0 test_acc = 0.0 for j in range(NUM_TEST_DATA): xtest, ytest = data_reader.load_test_data(TEST_SIZE) test_error = loss.eval(feed_dict={ model.x: xtest, model.ytruth: ytest }) test_cost += test_error / NUM_TEST_DATA test_accuracy = accuracy.eval({ model.x: xtest, model.ytruth: ytest }) test_acc += test_accuracy / NUM_TEST_DATA print("Testing... Test Loss= %g Accuracy:= %g" % (test_cost, test_acc * 100)) #saving if steps > 0 and steps % CHECKPOINT_EVERY == 0: if not os.path.exists(LOGDIR): os.makedirs(LOGDIR) checkpoint_path = os.path.join( LOGDIR, "model-step-%d-val-%g.ckpt" % (i, test_error)) filename = saver.save(sess, checkpoint_path) print("Model saved in file: %s" % filename) if test_error < min_loss: min_loss = test_error if not os.path.exists(LOGDIR): os.makedirs(LOGDIR) checkpoint_path = os.path.join( LOGDIR, "model-step-%d-val-%g.ckpt" % (i, test_error)) filename = saver.save(sess, checkpoint_path) print("Model saved in file: %s" % filename) if print_cost == True and epoch % 5 == 0: print("Cost after epoch %i: %f" % (epoch, minibatch_cost)) if print_cost == True and epoch % 1 == 0: costs.append(minibatch_cost) checkpoint_path = os.path.join(LOGDIR, "model-step-final.ckpt") filename = saver.save(sess, checkpoint_path) print("Model saved in file: %s" % filename) # plot the cost plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per tens)') plt.show()
def main(): #F1_test(thres,model_ID,osmembed,osm_word_emb,hc,hidden,region,lstm_dim,epoch,filter_l,bool_remove,tweet_cache,osm_names, bool_pos=0, emb=1, model_type=2): parser = argparse.ArgumentParser(description='manual to this script') parser.add_argument('--thres', type=float, default=0.70) parser.add_argument('--model_ID', type=str, default='0319140518') parser.add_argument('--osmembed', type=int, default=0) parser.add_argument('--osm_word_emb', type=int, default=0) parser.add_argument('--hidden', type=int, default=120) parser.add_argument('--region', type=int, default=1) parser.add_argument('--lstm_dim', type=int, default=120) parser.add_argument('--epoch', type=int, default=11) parser.add_argument('--filter_l', type=int, default=1) parser.add_argument('--bool_remove', type=int, default=1) parser.add_argument('--emb', type=int, default=1) parser.add_argument('--model_type', type=int, default=2) args = parser.parse_args() print('thres: ' + str(args.thres)) print('model_ID: ' + args.model_ID) print('osmembed: ' + str(args.osmembed)) print('osm_word_emb: ' + str(args.osm_word_emb)) print('hidden: ' + str(args.hidden)) print('lstm_dim: ' + str(args.lstm_dim)) print('epoch: ' + str(args.epoch)) print('filter_l: ' + str(args.filter_l)) print('bool_remove: ' + str(args.bool_remove)) print('emb: ' + str(args.emb)) print('model_type: ' + str(args.model_type)) postive_pro_t = args.thres PAD_idx = 0 s_max_len = 20 bool_mb_gaze = args.osm_word_emb gazetteer_emb_file = 'data/osm_vector' + str(args.osmembed) + '.txt' bigram_file = 'model/' + args.model_ID + '-bigram.txt' hcfeat_file = 'model/' + args.model_ID + '-hcfeat.txt' START_WORD = 'start_string_taghu' bigram_model = load_bigram_model(bigram_file) if bool_mb_gaze: gazetteer_emb, gaz_emb_dim = load_embeding(gazetteer_emb_file) else: gazetteer_emb = {} gaz_emb_dim = 0 char_hc_emb, _ = load_embeding(hcfeat_file) word_idx_file = 'model/' + args.model_ID + '-vocab.txt' word2idx, max_char_len = load_word_index(word_idx_file) max_char_len = 20 if args.emb == 2: glove_emb_file = 'data/GoogleNews-vectors-negative300.bin' emb_model = KeyedVectors.load_word2vec_format(glove_emb_file, binary=True) emb_dim = len(emb_model.wv['the']) glove_emb = emb_model.wv if args.emb == 3: glove_emb_file = 'data/glove.6B.100d.txt' glove_emb, emb_dim = load_embeding(glove_emb_file) elif args.emb == 4: BertEmbed = BertEmbeds('data/uncased_vocab.txt', 'data/uncased_bert_vectors.txt') glove_emb, emb_dim = BertEmbed.load_bert_embedding() else: glove_emb = {} emb_dim = 50 weight_l = emb_dim + gaz_emb_dim + 6 weights_matrix = np.zeros((len(word2idx.keys()), weight_l)) weights_matrix = torch.from_numpy(weights_matrix) tag_to_ix = {"p": 0, "n": 1} HIDDEN_DIM = args.hidden model_path = 'model/' + args.model_ID + 'epoch' + str(args.epoch) + '.pkl' DROPOUT = 0.5 flex_feat_len = 3 if args.model_type == 1: model = BiLSTM(weights_matrix, len(tag_to_ix), HIDDEN_DIM, 1, flex_feat_len) model.load_state_dict(torch.load(model_path, map_location='cpu')) elif args.model_type == 2: model = C_LSTM(weights_matrix, HIDDEN_DIM, args.filter_l, args.lstm_dim, len(tag_to_ix), flex_feat_len, DROPOUT) model.load_state_dict(torch.load(model_path, map_location='cpu')) else: FILTER_SIZES = [1, 2, 3] OUTPUT_DIM = 2 model = CNN(weights_matrix, HIDDEN_DIM, FILTER_SIZES, OUTPUT_DIM, flex_feat_len, DROPOUT) model.load_state_dict(torch.load(model_path, map_location='cpu')) model.eval() if args.model_type == 1: np_word_embeds = model.word_embeds.weight.detach().numpy() else: np_word_embeds = model.embedding.weight.detach().numpy() index_t = 0 time_str = datetime.now().strftime('%m%d%H%M%S') raw_result_file = 'experiments/cnn_result_F1' + time_str + 'm' + args.model_ID + 'region' + str( args.region) + 'epoch' + str(args.epoch) + 'th' + str( args.thres) + '.txt' save_file = open(raw_result_file, 'w') save_file.write(model_path) save_file.write('\n') true_count = 0 TP_count = 0 FP_count = 0 FN_count = 0 tweet_cache = {} if args.region == 1: t_json_file = "data/houston_floods_2016_annotations.json" #"data/raw_tweet.txt" elif args.region == 2: t_json_file = "data/chennai_floods_2015_annotations.json" #" else: t_json_file = "data/louisiana_floods_2016_annotations.json" #" place_lens = {} detected_score = {} '''preload data to cache''' with open(t_json_file) as json_file: js_data = json.load(json_file) for key in js_data.keys(): tweet = js_data[key]['text'] place_names = [] place_offset = [] for cur_k in js_data[key].keys(): if cur_k == 'text': tweet = js_data[key][cur_k] else: if js_data[key][cur_k]['type'] != 'ambLoc': row_nobrackets = re.sub("[\(\[].:;*?[\)\]]", "", js_data[key][cur_k]['text']) corpus = [ word.lower() for word in re.split( "[. #,&\"\',’]", row_nobrackets) ] corpus = [word for word in corpus if word] corpus = [replace_digs(word) for word in corpus] place_names.append(tuple(corpus)) place_offset.append( tuple([ int(js_data[key][cur_k]['start_idx']), int(js_data[key][cur_k]['end_idx']) - 1 ])) sentences, offsets, full_offset, hashtag_offsets = extract_sim( tweet, word2idx.keys(), 1) sentences_lowcases = [[x.lower() for x in y] for y in sentences] tweet_cache[key] = [ place_names, place_offset, sentences, offsets, full_offset, sentences_lowcases ] total_sen = [] for key in tweet_cache.keys(): sen = '' for sent in tweet_cache[key][4]: sen += sent[0] + ' ' total_sen.append(sen) tag_list = runtagger_parse(total_sen) tag_list = [item for item in tag_list if item] cur_index = 0 index = 0 for key in tweet_cache.keys(): tag_lists = [] aligned_full_offset = align(tag_list[index], tweet_cache[key][4]) for i in range(len(tweet_cache[key][2])): tag_lists.append( extract_subtaglist(tag_list[index], aligned_full_offset, tweet_cache[key][3][i])) index += 1 tweet_cache[key].insert(len(tweet_cache[key]), tag_lists) cur_index += len(tweet_cache[key][2]) with open(t_json_file) as json_file: js_data = json.load(json_file) for key in js_data.keys(): tweet = js_data[key]['text'] place_names, place_offset, raw_sentences, offsets, full_offset, sentences, tag_lists = tweet_cache[ key] save_file.write('#' * 50) save_file.write('\n') save_file.write(key + ': ' + tweet + '\n') ps = '' for place in place_names: for w in place: ps += str(w) + ' ' ps += '\n' #save_file.write(ps) pos_str = " ".join(str(item) for item in tag_lists) save_file.write(pos_str) save_file.write('\n') last_remove = ['area', 'region'] first_remove = ['se', 'ne', 'sw', 'nw'] true_count += len(place_names) detected_place_names = [] detected_offsets = [] OSM_CONF = postive_pro_t + 0.05 for idx, sentence in enumerate(sentences): if sentence: sub_index, all_sub_lists, _ = extract_nouns_tweet( tag_lists[idx], s_max_len) all_sub_lists = [[x.lower() for x in y] for y in all_sub_lists] if not all_sub_lists: continue cur_off = offsets[idx] index_t += 1 osm_probs = [0] * len(all_sub_lists) input_emb = np.zeros( (len(all_sub_lists), s_max_len, emb_dim + gaz_emb_dim + 6 + flex_feat_len)) for i, sub_sen in enumerate(all_sub_lists): sub_sen = [replace_digs(word) for word in sub_sen] input_emb[i] = sentence_embeding(sub_sen, np_word_embeds,word2idx,glove_emb,\ gazetteer_emb,s_max_len,emb_dim,\ gaz_emb_dim,max_char_len,bool_mb_gaze,\ PAD_idx,START_WORD,bigram_model,char_hc_emb,flex_feat_len) input_emb = torch.from_numpy(input_emb).float() if args.model_type == 1: output = model.predict(input_emb) _, preds_tensor = torch.max(output, 1) pos_prob = output.detach().numpy()[:, 1] elif args.model_type == 2: output = model.predict(input_emb) _, preds_tensor = torch.max(output, 1) pos_prob = torch.sigmoid(output).detach().numpy() pos_prob = pos_prob[:, 1] else: tem_output = model.core(input_emb) pos_prob = F.softmax(tem_output, dim=1).detach().numpy() pos_prob = pos_prob[:, 1] for i, prob in enumerate(pos_prob): if osm_probs[i] > prob: pos_prob[i] = osm_probs[i] postives = [] for i, p in enumerate(pos_prob): if pos_prob[i] > postive_pro_t: postives.append(i) origin_pos_prob = pos_prob pos_prob = pos_prob[postives] sort_index = (-pos_prob).argsort() selected_sub_sen = [] for index in sort_index: if not selected_sub_sen: selected_sub_sen.append(postives[index]) else: temp_sub_sen = selected_sub_sen.copy() bool_added = True for p in temp_sub_sen: if intersection(sub_index[p], sub_index[postives[index]]) and \ not (is_Sublist(sub_index[postives[index]],sub_index[p])): bool_added = False break if bool_added: selected_sub_sen.append(postives[index]) final_sub_sen = selected_sub_sen.copy() for i in selected_sub_sen: for j in selected_sub_sen: if not (i == j): if is_Sublist(sub_index[j], sub_index[i]): final_sub_sen.remove(i) break for i in final_sub_sen: if args.bool_remove: if all_sub_lists[i][-1] in last_remove: del all_sub_lists[i][-1] if all_sub_lists[i][0] in first_remove: del all_sub_lists[i][0] # detected_place_names.append(tuple(all_sub_lists[i])) detected_offsets.append( tuple([ cur_off[sub_index[i][0]][0], cur_off[sub_index[i][-1]][1] ])) save_file.write( str(round(origin_pos_prob[i], 3)) + ':' + str(all_sub_lists[i]) + '\n') c_tp, c_fp, c_fn, place_detect_score = interset_num( detected_offsets, place_offset, detected_place_names, place_names) print('*' * 50) print(tweet) print(detected_offsets) print(detected_place_names) #save_file.write('tp:'+str(c_tp)+' c_fp:'+str(c_fp)+' c_fn:'+str(c_fn)) save_file.write('\n') for p, i in enumerate(place_names): cur_len_p = 0 for pp in i: if hasNumbers(pp): groups = re.split('(\d+)', pp) groups = [x for x in groups if x] cur_len_p += len(groups) else: segments = segment(pp) cur_len_p += len(segments) if cur_len_p in place_lens.keys(): place_lens[cur_len_p] += 1 detected_score[cur_len_p] += place_detect_score[p] else: place_lens[cur_len_p] = 1 detected_score[cur_len_p] = place_detect_score[p] TP_count += c_tp FP_count += c_fp FN_count += c_fn P = TP_count / (TP_count + FP_count) R = TP_count / (TP_count + FN_count) F1 = (2 * P * R) / (P + R) save_file.write('recall:' + str(R)) save_file.write('\n') save_file.write('precision:' + str(P)) save_file.write('\n') save_file.write('f1:' + str(F1)) save_file.write('\n') save_file.write('TP:' + str(TP_count)) save_file.write('\n') save_file.write('FP:' + str(FP_count)) save_file.write('\n') save_file.write('FN:' + str(FN_count)) save_file.write('\n') save_file.write('true count:' + str(true_count)) save_file.write('\n') save_file.write( json.dumps(detected_score)) # use `json.loads` to do the reverse save_file.write( json.dumps(place_lens)) # use `json.loads` to do the reverse detection_rate = [ detected_score[key] / place_lens[key] for key in place_lens.keys() ] for item in detection_rate: save_file.write("%s\n" % item) save_file.close()
def train(self, train_file, dev_file=None, test_file=None): reader_train = Reader.reader(train_file, language='chn') reader_test = Reader.reader(test_file, language='chn') sents_train = reader_train.getWholeText() sents_test = reader_test.getWholeText() train_posts, train_responses, train_labels = reader_train.getData() test_posts, test_responses, test_labels = reader_test.getData() train_post_response_label = self.mergePostResponseLabel( train_posts, train_responses, train_labels) test_post_response_label = self.mergePostResponseLabel( test_posts, test_responses, test_labels) # sentsTrain = self.cutSentFromText(sentsTrain) # sentsTest = self.cutSentFromText(sentsTest) self.HyperParams.train_len = len(sents_train) self.HyperParams.test_len = len(sents_test) if self.HyperParams.using_English_data: reader_dev = Reader.reader(dev_file, language='eng') sents_dev = reader_dev.getWholeText() sents_dev = self.cutSentFromText(sents_dev) self.HyperParams.dev_den = len(sents_dev) if self.HyperParams.using_English_data: self.createAlphabet(train_posts + train_responses) else: self.createAlphabet(train_posts + train_responses, train_labels) self.HyperParams.topic_size = len(self.topics) if self.HyperParams.if_write_dic2file: print('writing dic to path:', self.HyperParams.word_dic_path) self.HyperParams.word_alpha.write(self.HyperParams.word_dic_path) self.HyperParams.label_alpha.write(self.HyperParams.label_dic_path) print('done') args = self.HyperParams.args() print(args) lr = self.HyperParams.lr Steps = self.HyperParams.Steps model = None if self.HyperParams.biLSTM: print("using biLSTM...") model = biLSTM.Model(self.HyperParams) if self.HyperParams.biGRU: print("using biGRU...") model = biGRU.Model(self.HyperParams) if self.HyperParams.CNN: print("using CNN...") model = CNN.Model(self.HyperParams) if model == None: print("please select a model!") raise RuntimeError print('use_cuda:', self.HyperParams.use_cuda) if self.HyperParams.use_cuda: model = model.cuda() # print(model) # param = [i for i in model.parameters() if i.requires_grad] # param = [i for i in model.parameters() if i.sparse] # sparseParam = [i for i in model.parameters() if not i.sparse] # Optimizer = oprim.Adam(param, lr=LearningRate) # SparseOprimizer = oprim.SparseAdam(sparseParam) # model. # Optimizer = oprim.Adam(model.parameters(), lr=LearningRate, weight_decay=self.HyperParams.decay) Optimizer = None if self.HyperParams.Adam: print("using Adam...") Optimizer = oprim.Adam(model.parameters(), lr=lr, weight_decay=self.HyperParams.decay) if self.HyperParams.SGD: print("using SGD...") Optimizer = oprim.SGD(model.parameters(), lr=lr, weight_decay=self.HyperParams.decay) if Optimizer == None: print("please select a model!") return def accuracy(model, posts, responses, labels): pred_right_num_idx = 0 pred_num_idx = 1 gold_num_idx = 2 evalList = [[0, 0, 0] for _ in range(self.HyperParams.label_size)] topic, text, label = posts, responses, labels topic = self.seq2id(topic) text = self.seq2id(text) label = self.label2id(label) topic = Variable(torch.LongTensor(topic)) text = Variable(torch.LongTensor(text)) label = Variable(torch.LongTensor(label)) if self.HyperParams.use_cuda: topic = topic.cuda() text = text.cuda() label = label.cuda() Y = model(topic, text) C = (torch.max(Y, 1)[1].view(label.size()).data == label.data).sum() pred_list = torch.max(Y, 1)[1].view(label.size()).data.tolist() label_list = label.data.tolist() for i in range(len(evalList)): for j in range(len(label_list)): if label_list[j] == i: evalList[i][gold_num_idx] += 1 if label_list[j] == pred_list[j]: evalList[i][pred_right_num_idx] += 1 if pred_list[j] == i: evalList[i][pred_num_idx] += 1 P_R_F1_list = [ Eval(pred_right_num=evalList[i][pred_right_num_idx], pred_num=evalList[i][pred_num_idx], gold_num=evalList[i][gold_num_idx]).P_R_F1 for i in range(len(evalList)) ] return float(C) / len(posts) * 100, C, len(posts), P_R_F1_list def getTextBatchList(text, batch): textBatchlist = [] textBatchNum = len(text) // batch if len(text) % batch != 0: textBatchNum += 1 if textBatchNum - 1 < 0: print("wrong: func getTextBatchList's text's length is 0!!!") return [] end = 0 for i in range(textBatchNum - 1): begin = end end += batch textBatchlist.append(text[begin:end]) textBatchlist.append(text[end:len(text)]) return textBatchlist def batch2PostResponseLabel(batch): posts = [] responses = [] labels = [] for elem in batch: posts.append(elem[0]) responses.append(elem[1]) labels.append(elem[2]) return posts, responses, labels file = open(self.HyperParams.write_file_name, 'a+') file.write(args) file.close() # sents_train = sents_train train_post_response_label = train_post_response_label if self.HyperParams.using_English_data: sents_dev = sents_dev sents_test = sents_test batch_size = self.HyperParams.batch_size best_F1 = 0 best_acc = 0 loss_list = [] start_time = time.time() for step in range(Steps): file = open(self.HyperParams.write_file_name, 'a+') total_loss = torch.Tensor([0]) if self.HyperParams.use_cuda: total_loss = total_loss.cuda() cnt = 0 train_correct = 0 random.shuffle(sents_train) text_batch_list = getTextBatchList(train_post_response_label, batch_size) for batch in text_batch_list: model.train() Optimizer.zero_grad() # SparseOprimizer.zero_grad() topic, text, label = batch2PostResponseLabel(batch) topic = self.seq2id(topic) text = self.seq2id(text) label = self.label2id(label) topic = Variable(torch.LongTensor(topic)) text = Variable(torch.LongTensor(text)) label = Variable(torch.LongTensor(label)) if self.HyperParams.use_cuda: topic = topic.cuda() text = text.cuda() label = label.cuda() Y = model(topic, text) # Y = Y.cuda() # print(Y.size()) # print(label.size()) Loss = F.cross_entropy(Y, label) Loss.backward() if self.HyperParams.clip_grad: torch.nn.utils.clip_grad_norm(model.parameters(), 10) Optimizer.step() cnt += 1 # if cnt % 500 == 0: # print(cnt) total_loss += Loss.data train_correct += (torch.max(Y, 1)[1].view( label.size()).data == label.data).sum() if self.HyperParams.lr_decay: adjust_learning_rate( Optimizer, self.HyperParams.lr / (1 + step * self.HyperParams.decay)) total_loss /= len(train_posts) total_loss = total_loss.cpu() loss_list.append(total_loss.numpy()[0]) train_acc = float(train_correct) / len(train_posts) * 100 #(step*3.01 + 1 p_index = self.HyperParams.label_alpha.string2id["p"] n_index = self.HyperParams.label_alpha.string2id["n"] if self.HyperParams.using_English_data: dev_acc, dev_correct, dev_num, P_R_F1_dev_list = accuracy( model, test_posts, test_responses, test_labels) test_acc, test_correct, test_num, P_R_F1_test_list = accuracy( model, test_posts, test_responses, test_labels) if self.HyperParams.using_English_data: dev_mean_F1 = (P_R_F1_dev_list[p_index][2] + P_R_F1_dev_list[n_index][2]) / 2 test_mean_F1 = (P_R_F1_test_list[p_index][2] + P_R_F1_test_list[n_index][2]) / 2 if self.HyperParams.using_Chinese_data: output = "Step: {} - loss: {:.6f} Train acc: {:.4f}%{}/{} Test acc: {:.4f}%{}/{} F1={:.4f}".format( step, total_loss.numpy()[0], train_acc, train_correct, len(train_posts), test_acc, test_correct, len(test_posts), test_mean_F1) else: output = "Step: {} - loss: {:.6f} Train acc: {:.4f}%{}/{} Dev acc: {:.4f}%{}/{} Test acc: {:.4f}%{}/{} F1={:.4f}".format( step, total_loss.numpy()[0], train_acc, train_correct, len(sents_train), dev_acc, dev_correct, int(dev_num), test_acc, test_correct, int(test_num), test_mean_F1) if best_F1 < test_mean_F1: best_F1 = test_mean_F1 best_acc = test_acc print('best F1={:.4f}'.format(best_F1)) fmodel = 'Module/%d.params' % (step) torch.save(model, fmodel) print(output, end='#') file.write(output + "\n") file.close() end_time = time.time() mins, secs = sec2min(end_time - start_time) need_secs = float(end_time - start_time) / ( (float(step) + 1) / Steps) need_mins, need_secs = sec2min(need_secs) print("use {:.0f}m{:.0f}s(-{:.0f}m{:.0f}s)".format( mins, secs, need_mins, need_secs)) file = open(self.HyperParams.write_file_name, 'a+') output = 'Total: best F1 = ' + str(best_F1) + ' acc = ' + str(best_acc) print(output) file.write(output + "\n") file.close()
batch_size = 32 shuffle = True pin_memory = True num_workers = 1 validation_set = DynamicVehicleDataset('data/validation_image_2/', 'validation_csv.csv', transform) validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory) model = CNN().to(device) model.load_state_dict( torch.load("model/pretrained_model/cnn_20.pth")['model_state_dict']) def validate(): num_correct = 0 num_samples = 0 model.eval() with torch.no_grad(): for x, y in validation_loader: x = x.to(device=device) y = y.to(device=device) scores = model(x)
def main(): # parse parameters start_time = time.time() time_str = datetime.now().strftime('%m%d%H%M%S') parser = argparse.ArgumentParser(description='manual to this script') parser.add_argument('--epoch', type=int, default=8) parser.add_argument('--train-batch-size', type=int, default=1000) parser.add_argument('--test-batch-size', type=int, default=1000) parser.add_argument('--osm_word_emb', type=int, default=1) parser.add_argument('--emb', type=int, default=1) parser.add_argument('--hc', type=int, default=1) parser.add_argument('--model', type=int, default=2) parser.add_argument('--positive', type=int, default=12) parser.add_argument('--negative', type=int, default=12) parser.add_argument('--osmembed', type=int, default=2) parser.add_argument('--preloadsize', type=int, default=1000000) parser.add_argument('--filter_l', type=int, default=3) parser.add_argument('--split_l', type=int, default=10000000) parser.add_argument('--max_cache', type=int, default=13) parser.add_argument('--lstm_dim', type=int, default=80) parser.add_argument('--cnn_hid', type=int, default=120) parser.add_argument('--optim', type=int, default=1) parser.add_argument('--weight', type=float, default=1) parser.add_argument('--multiple', type=int, default=0) parser.add_argument('--load_checkpoint', type=int, default=0) parser.add_argument('--check_point_id', type=str, default='1011114857') parser.add_argument('--max_len', type=int, default=20) args = parser.parse_args() print('epoch: ' + str(args.epoch)) print('train batch size: ' + str(args.train_batch_size)) print('test batch size: ' + str(args.test_batch_size)) print('osm_word_emb: ' + str(args.osm_word_emb)) print('hc: ' + str(args.hc)) print('positive: ' + str(args.positive)) print('negative: ' + str(args.negative)) print('osmembed: ' + str(args.osmembed)) print('preloadsize: ' + str(args.preloadsize)) print('split_l: ' + str(args.split_l)) print('max_cache: ' + str(args.max_cache)) print('lstm_dim: ' + str(args.lstm_dim)) print('filter_l: ' + str(args.filter_l)) print('cnn_hid: ' + str(args.cnn_hid)) print('optim: ' + str(args.optim)) print('weight: ' + str(args.weight)) print('emb: ' + str(args.emb)) print('multiple: ' + str(args.multiple)) print('load_checkpoint: ' + str(args.load_checkpoint)) print('check_point_id: ' + str(args.check_point_id)) print('model: ' + str(args.model)) print('max_len: ' + str(args.max_len)) #copy osmnames file orifile = 'data/osmnames' + str(args.positive) + '.txt' dstfile = 'data/osmnames' + time_str + '.txt' shutil.copy(orifile, dstfile) orifile = 'data/osmnames' + str(args.positive) + '.txt' os.environ["CUDA_VISIBLE_DEVICES"] = "0" START_WORD = 'start_string_taghu' device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) bool_mb_gaze = args.osm_word_emb bool_hc = args.hc lstm_layer_num = 1 pos_f = 'data/positive' + str(args.positive) + '.txt' neg_f = 'data/negative' + str(args.negative) + '.txt' bigram_file = 'model/' + time_str + '-bigram.txt' hc_file = 'model/' + time_str + '-hcfeat.txt' vocab_file = 'model/' + time_str + '-vocab.txt' PAD = 'paddxk' pad_index = 0 max_char_len = 20 max_len = args.max_len flex_feat_len = 3 # map words to its word embeding if args.emb == 2: glove_emb_file = 'data/GoogleNews-vectors-negative300.bin' emb_model = KeyedVectors.load_word2vec_format(glove_emb_file, binary=True) emb_dim = len(emb_model.wv['the']) glove = emb_model.wv elif args.emb == 3: glove_emb_file = 'data/glove.6B.100d.txt' glove, emb_dim = load_embeding(glove_emb_file) elif args.emb == 4: BertEmbed = BertEmbeds('data/uncased_vocab.txt', 'data/uncased_bert_vectors.txt') glove, emb_dim = BertEmbed.load_bert_embedding() # glove_emb_file = 'data/glove.6B.50d.txt' # glove, emb_dim = load_embeding(glove_emb_file) else: glove_emb_file = 'data/glove.6B.50d.txt' glove, emb_dim = load_embeding(glove_emb_file) # glove_emb_file = 'data/glove.6B.50d.txt' # glove, emb_dim = load_embeding(glove_emb_file) gazetteer_emb_file = 'data/osm_vector' + str(args.osmembed) + '.txt' split_c, x_train_pos, y_train_pos, data_index_pos, negative_num, word_hcfs, hc_feats_before_pos, word_to_ix, ix_to_word, target_vocab,listOfProb = \ load_training_data_os(pos_f,neg_f,START_WORD, bigram_file, hc_file,PAD,pad_index,flex_feat_len,max_char_len,max_len,bool_hc,args.split_l, multiple=args.multiple)# pos_unit = int(len(data_index_pos) * args.split_l / negative_num) print('memory % used:', psutil.virtual_memory()[2]) print('the number of unique words are:' + str(len(word_to_ix))) with open(vocab_file, 'w+', encoding='utf-8') as f: for word in word_to_ix.keys(): f.write(word + ' ' + str(word_to_ix[word]) + '\n') f.close() print('vocab file successfully saved:') # map words to its word embeding if bool_mb_gaze: gazetteer_emb, gaz_emb_dim = load_embeding(gazetteer_emb_file) else: gaz_emb_dim = 0 if bool_hc: hc_len = 6 else: hc_len = 0 matrix_len = len(word_to_ix) weight_dim = emb_dim + gaz_emb_dim + hc_len print('weight_dim: ' + str(weight_dim)) print('entity_dim: ' + str(max_len * weight_dim)) weights_matrix = np.zeros( (matrix_len, weight_dim) ) #np.random.normal(scale=0.6, size=(matrix_len, emb_dim+gaz_emb_dim)); words_found = 0 for i, word in enumerate(target_vocab): try: temp_glove = glove[word] words_found += 1 except KeyError: temp_glove = np.random.normal(scale=0.6, size=(emb_dim, )) if bool_mb_gaze: try: temp_gaz = gazetteer_emb[word] except KeyError: temp_gaz = np.random.normal(scale=0.6, size=(gaz_emb_dim, )) else: temp_gaz = [] if bool_hc: try: temp_hc = word_hcfs[word] except KeyError: temp_hc = np.zeros(hc_len) else: temp_hc = [] weights_matrix[i] = np.concatenate((temp_glove, temp_gaz, temp_hc), axis=None) segment_lens = [] garz_osm_pos_len = 0 garz_osm_pos_len += len(temp_glove) final_filter_w = [] final_filter_w.append([]) if bool_mb_gaze: garz_osm_pos_len += len(temp_gaz) segment_lens.append(garz_osm_pos_len) if bool_hc: segment_lens.append(len(temp_hc) + flex_feat_len) final_filter_w.append([1, 1, 1]) weights_matrix = torch.from_numpy(weights_matrix) tag_to_ix = {"p": 0, "n": 1} DROPOUT = 0.5 HIDDEN_DIM = args.cnn_hid # for HIDDEN_DIM in range(100,110,20): f_rec_name = 'experiments/record' + str(HIDDEN_DIM) + time_str + '.txt' f_record = open(f_rec_name, 'w+', encoding='utf-8') print(HIDDEN_DIM) print(time_str) print("lstm layer: {0}".format(lstm_layer_num), file=f_record) print("epoch number: {0}".format(args.epoch), file=f_record) print("train batch size: {0}".format(args.train_batch_size), file=f_record) print("max len: {0}".format(max_len), file=f_record) print("hidden: {0}".format(HIDDEN_DIM), file=f_record) '''weight of positive and negative classes''' weight_loss = args.weight weight = [1, float(weight_loss * negative_num / len(data_index_pos))] weight_tensor = torch.from_numpy(np.array(weight)).float().cuda() if args.model == 1: model_path = 'model/lstm_model_' model = BiLSTM(weights_matrix, len(tag_to_ix), HIDDEN_DIM, lstm_layer_num, flex_feat_len).to(device) criterion = nn.CrossEntropyLoss(weight=weight_tensor) elif args.model == 2: fileter_l = args.filter_l model_path = 'model/clstm_model_' model = C_LSTM(weights_matrix, HIDDEN_DIM, fileter_l, args.lstm_dim, len(tag_to_ix), flex_feat_len, DROPOUT).to(device) criterion = nn.CrossEntropyLoss(weight=weight_tensor) else: FILTER_SIZES = [1, 2, 3] OUTPUT_DIM = 2 model_path = 'model/cnn_model_' model = CNN(weights_matrix, HIDDEN_DIM, FILTER_SIZES, OUTPUT_DIM, flex_feat_len, DROPOUT).to(device) criterion = nn.BCEWithLogitsLoss(pos_weight=weight_tensor) # criterion.cuda() pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print("numer of trainable parameters: {0}".format(pytorch_total_params), file=f_record) print('numer of trainable parameters: ', str(pytorch_total_params)) if not args.optim: optimizer = optim.SGD(model.parameters(), lr=0.001, weight_decay=1e-4) else: optimizer = optim.Adam(model.parameters(), lr=0.001) output_dir = model_path + args.check_point_id + '_checkpoint.pt' if args.load_checkpoint and os.path.exists(output_dir): checkpoint = torch.load(output_dir, map_location='cpu') start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) print('Loaded the pretrain model, epoch:', checkpoint['epoch']) else: start_epoch = 0 # valid_acc_prev = 0 # valid_f1_prev = 0 index_list = random.sample(range(split_c), split_c) train_size = math.ceil(0.9 * split_c) train_idx, test_idx = index_list[:train_size + 1], index_list[train_size + 1:] if args.max_cache > split_c: max_cache_size = split_c else: max_cache_size = args.max_cache cache_data = [] cache_index = [] for epoch in range( start_epoch, args.epoch ): # again, normally you would NOT do 300 epochs, it is toy data model.train() cur_loss = [] for idx in train_idx: print(str(idx) + '...') first_l = int(idx / 26) last_l = idx % 26 neg_f_i = neg_f[0:len(neg_f) - 4] + chr(first_l + 97) + chr(last_l + 97) x_train_neg = [] y_train_neg = [] hc_feats_before_neg = [] if idx not in cache_index: x_train_neg, y_train_neg, hc_feats_before_neg, word_to_ix,ix_to_word,target_vocab,cur_neg_num = extract_feat(neg_f_i,0,x_train_neg, \ y_train_neg, hc_feats_before_neg,word_to_ix,ix_to_word,target_vocab,listOfProb,\ START_WORD,flex_feat_len,max_char_len,max_len,pad_index,multiple=args.multiple) if len(cache_index) < max_cache_size: cache_index.append(idx) cache_data.append([ x_train_neg, y_train_neg, hc_feats_before_neg, cur_neg_num ]) else: c_idx = cache_index.index(idx) x_train_neg = cache_data[c_idx][0] y_train_neg = cache_data[c_idx][1] hc_feats_before_neg = cache_data[c_idx][2] cur_neg_num = cache_data[c_idx][3] pos_st_idx = idx * pos_unit pos_en_idx = idx * pos_unit + int( len(data_index_pos) * cur_neg_num / negative_num) if pos_en_idx > len(data_index_pos) - 1: pos_en_idx = len(data_index_pos) - 1 x_train = [ x_train_pos[j] for j in data_index_pos[pos_st_idx:pos_en_idx] ] x_train.extend(x_train_neg) y_train = [ y_train_pos[j] for j in data_index_pos[pos_st_idx:pos_en_idx] ] y_train.extend(y_train_neg) hc_feats_before = [ hc_feats_before_pos[j] for j in data_index_pos[pos_st_idx:pos_en_idx] ] hc_feats_before.extend(hc_feats_before_neg) r_index_list = random.sample(range(len(y_train)), len(y_train)) x_train = np.array(x_train) y_train = np.array(y_train) hc_feats_before = np.array(hc_feats_before) loop_count = math.ceil(len(x_train) / args.preloadsize) for loop in range(loop_count): if (loop + 1) * args.preloadsize > len(x_train): last_idx = len(x_train) else: last_idx = (loop + 1) * args.preloadsize x_tr = torch.tensor( x_train[r_index_list[loop * args.preloadsize:last_idx]], dtype=torch.long) y_tr = torch.tensor( y_train[r_index_list[loop * args.preloadsize:last_idx]], dtype=torch.float) hc_tr = torch.tensor( hc_feats_before[r_index_list[loop * args.preloadsize:last_idx]], dtype=torch.float) # print('x_tr', len(x_tr)) train = TensorDataset(x_tr, y_tr, hc_tr) trainloader = DataLoader(train, batch_size=args.train_batch_size, pin_memory=True, num_workers=1) for sentence, tags, hcs in trainloader: sentence, tags = sentence.to(device), tags.to(device) # print('sen', len(sentence), 'tags', len(tags)) # print('sen size', sentence.size(), 'tags size', tags.size()) if list(sentence.size())[0] == 1: continue hcs = hcs.view(len(sentence), max_len, flex_feat_len).to(device) # Step 1. Remember that Pytorch accumulates gradients. model.zero_grad() predictions = model(sentence, hcs) # print('predictions size', len(predictions)) if args.model == 1 or args.model == 2: loss = criterion(predictions, tags.squeeze().long()) else: # print(tags.unsqueeze(1)) # print(predictions) # print(tags) tags_hot = F.one_hot(tags.long(), 2) tags_hot = tags_hot.type_as(predictions) loss = criterion(predictions, tags_hot) # loss = criterion(predictions, tags.unsqueeze(1)) # loss = criterion(predictions, tags.squeeze().long()) # loss = criterion(predictions, tags.squeeze().long()) loss.backward() cur_loss.append(loss.item()) optimizer.step() # Save a checkpoint torch.save( { 'epoch': epoch, 'optimizer_state_dict': optimizer.state_dict(), 'model_state_dict': model.state_dict(), 'loss': loss }, model_path + time_str + '_checkpoint.pt') print('epoch:' + str(epoch) + ' ' + str(np.mean(cur_loss))) correct = 0 incorrect_place = [] model.eval() test_sample_size = 0 for idx in test_idx: print(str(idx) + '...') first_l = int(idx / 26) last_l = idx % 26 neg_f_i = neg_f[0:len(neg_f) - 4] + chr(first_l + 97) + chr(last_l + 97) x_train_neg = [] y_train_neg = [] hc_feats_before_neg = [] if idx not in cache_index: x_train_neg, y_train_neg, hc_feats_before_neg, word_to_ix,ix_to_word,target_vocab,cur_neg_num = extract_feat(neg_f_i,0,x_train_neg, \ y_train_neg,hc_feats_before_neg,word_to_ix,ix_to_word,target_vocab,listOfProb,\ START_WORD,flex_feat_len,max_char_len,max_len,pad_index,multiple=args.multiple) if len(cache_index) < max_cache_size: cache_index.append(idx) cache_data.append([ x_train_neg, y_train_neg, hc_feats_before_neg, cur_neg_num ]) else: c_idx = cache_index.index(idx) x_train_neg = cache_data[c_idx][0] y_train_neg = cache_data[c_idx][1] hc_feats_before_neg = cache_data[c_idx][2] cur_neg_num = cache_data[c_idx][3] pos_st_idx = idx * pos_unit pos_en_idx = idx * pos_unit + int( len(data_index_pos) * cur_neg_num / negative_num) if pos_en_idx > len(data_index_pos) - 1: pos_en_idx = len(data_index_pos) - 1 x_train = [ x_train_pos[j] for j in data_index_pos[pos_st_idx:pos_en_idx] ] x_train.extend(x_train_neg) y_train = [ y_train_pos[j] for j in data_index_pos[pos_st_idx:pos_en_idx] ] y_train.extend(y_train_neg) hc_feats_before = [ hc_feats_before_pos[j] for j in data_index_pos[pos_st_idx:pos_en_idx] ] hc_feats_before.extend(hc_feats_before_neg) r_index_list = random.sample(range(len(y_train)), len(y_train)) x_train = np.array(x_train) y_train = np.array(y_train) hc_feats_before = np.array(hc_feats_before) test_sample_size += len(x_train) loop_count = math.ceil(len(x_train) / args.preloadsize) for loop in range(loop_count): if (loop + 1) * args.preloadsize > len(x_train): last_idx = len(x_train) else: last_idx = (loop + 1) * args.preloadsize x_test = torch.tensor( x_train[r_index_list[loop * args.preloadsize:last_idx]], dtype=torch.long) y_test = torch.tensor( y_train[r_index_list[loop * args.preloadsize:last_idx]], dtype=torch.float) hc_test = torch.tensor( hc_feats_before[r_index_list[loop * args.preloadsize:last_idx]], dtype=torch.float) test = TensorDataset(x_test, y_test, hc_test) testloader = DataLoader(test, batch_size=args.test_batch_size, pin_memory=True, num_workers=4) for data, tags, hcs in testloader: data, tags = data.to(device), tags.to(device) hcs = hcs.view(len(data), max_len, flex_feat_len).to(device) output = model(data, hcs) if args.model == 1: _, preds_tensor = torch.max(output, 1) elif args.model == 2: preds_tensor = output.argmax(dim=1) else: preds_tensor = output.argmax(dim=1) # preds_tensor = torch.round(torch.sigmoid(output)).squeeze(1) # preds_tensor = output.argmax(dim=1) correct += sum(preds_tensor.eq(tags).cpu().numpy()) data, tags = data.to('cpu'), tags.to('cpu') for i, se in enumerate(data): ixs = se.numpy() cur_s = [] for w in ixs: if w: cur_s.append(ix_to_word[w]) cur_s.append(str(tags[i].item())) if preds_tensor[i].eq(tags[i]): ccc = 0 #correct_place.append(cur_s) else: incorrect_place.append(cur_s) #write_place(correct_file, correct_place) if test_sample_size: incorrect_file = 'experiments/h' + str(HIDDEN_DIM) + 'epoch' + str( epoch) + time_str + 'inc.txt' write_place(incorrect_file, incorrect_place) print("test accuracy: %f", correct / test_sample_size) print("test accuracy: {0}".format(correct / test_sample_size), file=f_record) torch.save(model.state_dict(), model_path + time_str + 'epoch' + str(epoch) + '.pkl') print('total time:', time.time() - start_time) f_record.close()
def train(self, train_file, dev_file=None, test_file=None): reader_train = Reader.reader(train_file, language='chn') reader_test = Reader.reader(test_file, language='chn') sents_train = reader_train.getWholeText() sents_test = reader_test.getWholeText() # sentsTrain = self.cutSentFromText(sentsTrain) # sentsTest = self.cutSentFromText(sentsTest) self.HyperParams.train_len = len(sents_train) self.HyperParams.test_len = len(sents_test) if self.HyperParams.using_English_data: reader_dev = Reader.reader(dev_file, language='eng') sents_dev = reader_dev.getWholeText() sents_dev = self.cutSentFromText(sents_dev) self.HyperParams.dev_den = len(sents_dev) if self.HyperParams.using_English_data: self.createAlphabet(sents_train + sents_dev) else: self.createAlphabet(sents_train) self.HyperParams.topic_size = len(self.topics) args = self.HyperParams.args() print(args) lr = self.HyperParams.lr Steps = self.HyperParams.Steps model = None if self.HyperParams.biLSTM: print("using biLSTM...") model = biLSTM.Model(self.HyperParams) if self.HyperParams.biGRU: print("using biGRU...") model = biGRU.Model(self.HyperParams) if self.HyperParams.CNN: print("using CNN...") model = CNN.Model(self.HyperParams) if model == None: print("please select a model!") return # print(model) # param = [i for i in model.parameters() if i.requires_grad] # param = [i for i in model.parameters() if i.sparse] # sparseParam = [i for i in model.parameters() if not i.sparse] # Optimizer = oprim.Adam(param, lr=LearningRate) # SparseOprimizer = oprim.SparseAdam(sparseParam) # model. # Optimizer = oprim.Adam(model.parameters(), lr=LearningRate, weight_decay=self.HyperParams.decay) Optimizer = None if self.HyperParams.Adam: Optimizer = oprim.Adam(model.parameters(), lr=lr) if self.HyperParams.SGD: Optimizer = oprim.SGD(model.parameters(), lr=lr) if Optimizer == None: print("please select a model!") return def accuracy(model, sents): pred_right_num_idx = 0 pred_num_idx = 1 gold_num_idx = 2 evalList = [[0, 0, 0] for _ in range(self.HyperParams.label_size)] topic, text, label = self.processingRawStanceData(sents) topic = self.seq2id(topic) text = self.seq2id(text) label = self.label2id(label) topic = Variable(torch.LongTensor(topic)) text = Variable(torch.LongTensor(text)) label = Variable(torch.LongTensor(label)) Y = model(topic, text) C = (torch.max(Y, 1)[1].view(label.size()).data == label.data).sum() pred_list = torch.max(Y, 1)[1].view(label.size()).data.tolist() label_list = label.data.tolist() for i in range(len(evalList)): for j in range(len(label_list)): if label_list[j] == i: evalList[i][gold_num_idx] += 1 if label_list[j] == pred_list[j]: evalList[i][pred_right_num_idx] += 1 if pred_list[j] == i: evalList[i][pred_num_idx] += 1 P_R_F1_list = [ Eval(pred_right_num=evalList[i][pred_right_num_idx], pred_num=evalList[i][pred_num_idx], gold_num=evalList[i][gold_num_idx]).P_R_F1 for i in range(len(evalList)) ] return float(C) / len(sents) * 100, C, len(sents), P_R_F1_list def getTextBatchList(text, batch): textBatchlist = [] textBatchNum = len(text) // batch if len(text) % batch != 0: textBatchNum += 1 if textBatchNum - 1 < 0: print("wrong: func getTextBatchList's text's length is 0!!!") return [] end = 0 for i in range(textBatchNum - 1): begin = end end += batch textBatchlist.append(text[begin:end]) textBatchlist.append(text[end:len(text)]) return textBatchlist file = open(self.HyperParams.write_file_name, 'a+') file.write(args) file.close() sents_train = sents_train if self.HyperParams.using_English_data: sents_dev = sents_dev sents_test = sents_test batch_size = self.HyperParams.batch_size best_F1 = 0 best_acc = 0 for step in range(Steps): file = open(self.HyperParams.write_file_name, 'a+') total_loss = torch.Tensor([0]) cnt = 0 train_correct = 0 random.shuffle(sents_train) text_batch_list = getTextBatchList(sents_train, batch_size) for batch in text_batch_list: model.train() Optimizer.zero_grad() # SparseOprimizer.zero_grad() topic, text, label = self.processingRawStanceData(batch) topic = self.seq2id(topic) text = self.seq2id(text) label = self.label2id(label) topic = Variable(torch.LongTensor(topic)) text = Variable(torch.LongTensor(text)) label = Variable(torch.LongTensor(label)) Y = model(topic, text) Loss = F.cross_entropy(Y, label) Loss.backward() if self.HyperParams.clip_grad: torch.nn.utils.clip_grad_norm(model.parameters(), 10) Optimizer.step() cnt += 1 if cnt % 500 == 0: print(cnt) total_loss += Loss.data train_correct += (torch.max(Y, 1)[1].view( label.size()).data == label.data).sum() if self.HyperParams.lr_decay: adjust_learning_rate( Optimizer, self.HyperParams.lr / (1 + (step * 3.01 + 1) * self.HyperParams.decay)) total_loss /= len(sents_train) train_acc = float(train_correct) / len(sents_train) * 100 FAVOR_index = self.HyperParams.label_alpha.string2id["favor"] AGAINST_index = self.HyperParams.label_alpha.string2id["against"] if self.HyperParams.using_English_data: dev_acc, dev_correct, dev_num, P_R_F1_dev_list = accuracy( model, sents_dev) test_acc, test_correct, test_num, P_R_F1_test_list = accuracy( model, sents_test) if self.HyperParams.using_English_data: dev_mean_F1 = (P_R_F1_dev_list[FAVOR_index][2] + P_R_F1_dev_list[AGAINST_index][2]) / 2 test_mean_F1 = (P_R_F1_test_list[FAVOR_index][2] + P_R_F1_test_list[AGAINST_index][2]) / 2 if best_F1 < test_mean_F1: best_F1 = test_mean_F1 best_acc = test_acc if self.HyperParams.using_Chinese_data: output = "Step: {} - loss: {:.6f} Train acc: {:.4f}%{}/{} Test acc: {:.4f}%{}/{} F1={:.4f}".format( step, total_loss.numpy()[0], train_acc, train_correct, len(sents_train), test_acc, test_correct, int(test_num), test_mean_F1) else: output = "Step: {} - loss: {:.6f} Train acc: {:.4f}%{}/{} Dev acc: {:.4f}%{}/{} Test acc: {:.4f}%{}/{} F1={:.4f}".format( step, total_loss.numpy()[0], train_acc, train_correct, len(sents_train), dev_acc, dev_correct, int(dev_num), test_acc, test_correct, int(test_num), test_mean_F1) print(output) file.write(output + "\n") file.close() file = open(self.HyperParams.write_file_name, 'a+') output = 'Total: best F1 = ' + str(best_F1) + ' acc = ' + str(best_acc) print(output) file.write(output + "\n") file.close()
print(f'Starting fold: {fold}') # Setup subsamplers train_subsampler = SubsetRandomSampler(train_ids) valid_subsampler = SubsetRandomSampler(valid_ids) # Setup current fold's loaders train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_subsampler) valid_loader = DataLoader(dataset, batch_size=batch_size, sampler=valid_subsampler) # Initialize model, optimizer, LR-scheduler model = CNN(architecture, num_classes).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=initial_lr) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True) # Setup variables for metric collection fold_v_loss = [] fold_t_loss = [] fold_aucs = [] fold_aucs_by_class = [] best_auc = 0.0 # Training loop for epoch in range(0, num_epochs): print(f'Starting epoch: {epoch + 1}') # Train
validation_set = DrivingConstraintDataset('data/image_2/', 'data/validation_csv.csv', transform) training_loader = DataLoader(dataset=training_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory) validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory) # initialize model model = CNN().to(device) for name, param in model.resnet50.named_parameters(): if 'fc' in name: param.requires_grad = True else: param.requires_grad = pretrained criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) scheduler = \ torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5, 10, 15], gamma=0.1, last_epoch=-1) def check_accuracy(loader, model):
batch_size = 32 shuffle = True pin_memory = True num_workers = 1 validation_set = DrivingConstraintDataset('data/image_2/', 'data/validation_csv.csv', transform) validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory) model = CNN().to(device) model.load_state_dict(torch.load(args.loadmodel)['model_state_dict']) def validate(): num_correct = 0 num_samples = 0 model.eval() with torch.no_grad(): for x, y in validation_loader: x = x.to(device=device) y = y.to(device=device) scores = model(x) predictions = torch.tensor(
def main(): parser = argparse.ArgumentParser() parser.add_argument('-model', choices=['logistic', 'CNN', 'SVM', 'Fisher'], required=True) # This argument is only for logistic regression parser.add_argument('-opt', choices=['SGD', 'Langevin'], required=False) # This argument is only for CNN parser.add_argument('-load', choices=['y', 'n'], required=False) # This argument is only for SVM parser.add_argument('-kernel', choices=['linear', 'RBF'], required=False) # parser.add_argument('-pattern', choices=['classification','detection'], required=True) args = parser.parse_args() batch_size = 10 lr = 0.001 epochs = 1 # train data X_pos = [] X_neg = [] # label y_pos = [] y_neg = [] # # obtain positive data X_pos, y_pos = obtainSamples(X_pos, y_pos, pos_path, True, args.model) # obtain negative data X_neg, y_neg = obtainSamples(X_neg, y_neg, neg_path, False, args.model) X = [] y = [] X.extend(X_pos) X.extend(X_neg) y.extend(y_pos) y.extend(y_neg) X_test, y_test = generateclassify(args.model) # train if args.model == 'logistic': model = LogisticModel(n_iterations=1000, optimizer=args.opt) model.fit(np.array(X), np.array(y)) if args.model == 'CNN': train_dataset = MyDataset(X, y, transform=transforms.ToTensor()) train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) model = CNN(3, 2) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) filename = "CNNLOG" for epoch in range(epochs): running_acc = 0.0 running_loss = 0.0 visualize = [] labels = [] for step, data in enumerate(train_loader, 1): feature, label = data print("feature:", feature.shape) feature = Variable(feature) label = Variable(label) # forward out, inter_layer = model(feature) visualize.extend(list(inter_layer.detach().numpy())) labels.extend(list(label.detach().numpy())) loss = criterion(out, label) # backward optimizer.zero_grad() loss.backward() optimizer.step() _, pred = torch.max(out, dim=1) # accumulate loss running_loss += loss.item() * label.size(0) # accumulate the number of correct samples current_num = (pred == label).sum() acc = (pred == label).float().mean() running_acc += current_num.item() if step % 10 == 0: f = open(filename, 'a') f.write( "epoch: {}/{}, loss: {:.6f}, running_acc: {:.6f}\n". format(epoch + 1, epochs, loss.item(), acc.item())) f.close() print("epoch: {}/{}, loss: {:.6f}, running_acc: {:.6f}". format(epoch + 1, epochs, loss.item(), acc.item())) torch.save(model, "Model/%d" % (epoch + 1)) f = open(filename, 'a') f.write("epoch: {}, loss: {:.6f}, accuracy: {:.6f}\n".format( epoch + 1, running_loss, running_acc / len(X))) f.close() print("epoch: {}, loss: {:.6f}, accuracy: {:.6f}".format( epoch + 1, running_loss, running_acc / len(X))) CNNTest(model, X_test, y_test, filename) print("drawing figure") dim1, dim2 = visualize[0].shape length = len(visualize) visualize = np.array(visualize).reshape((length * dim1, dim2)) title1 = "Visualization of the intermediate-layer" # title2 = "Visualization of the PCA feature" TSNEVisualization(visualize, labels, title1, title1) if args.model == 'SVM': if args.kernel == 'RBF': model = SVC(kernel='rbf') else: model = SVC(kernel='linear') model.fit(X, y) # 支持向量个数 n_Support_vector = model.n_support_ #支持向量索引 Support_vector_index = model.support_ # 方向向量W if args.kernel == 'linear': W = model.coef_ # 截距项b b = model.intercept_ pca = PCA(n_components=2) pca_results = pca.fit_transform(X) SVM_plot(pca_results, np.array(y), Support_vector_index, W, b, args.kernel + "1") # Gkernel = Kernel() # if args.kernel=="RBF": # model = SVMTrainer(Gkernel.gaussian(0.3), 0.5) # else: # model = SVMTrainer(Gkernel.linear(), 0.5) # X = np.array(X).astype(float) # predictor = model.train(X,np.array(y)) # SVM_plot(pca_results,np.array(y),predictor._support_vector_indices,predictor._weights,predictor._bias,args.kernel) # filename = "Support_vector.txt" # # np.save("Support_vector", predictor._support_vectors) # f = open(filename,'w') # for vector in predictor._support_vectors: # f.write(str(vector)+"\n") # f.write("------------------------------------------------------------------------------------------------\n") # f.write("------------------------------------------------------------------------------------------------\n") # f.close() if args.model == 'Fisher': model = Fisher() model.fit(np.array(X_pos), np.array(X_neg)) print( "Doing classification---------------------------------------------------" ) accuracy = 0.0 for hog_feature in X_test: if args.model == 'logistic': pred = model.predict(hog_feature) if pred < 0.1: accuracy += 1 if args.model == 'SVM': pred = model.predict([hog_feature])[0] if pred < 0.1: accuracy += 1 if args.model == 'Fisher': pred = model.predict(hog_feature) if pred < 0.1: accuracy += 1 print("accuracy: ", accuracy / len(X_test)) print( "Doing detection---------------------------------------------------------" ) test_path = 'detection/' test_folders = os.listdir(test_path) accuracy = {} total_pic = 0 for folder in test_folders: try: test_sample = os.listdir(os.path.join(test_path, folder)) except: print("Not a directory!") continue face_num = 0 total_pic += 1 for path in test_sample: if path == "ground_truth.txt": with open(os.path.join(test_path, folder, path), 'r') as f: face_num = int(f.readlines()[0]) continue print("The size of bounding box: ", path) try: images = os.listdir(os.path.join(test_path, folder, path)) except: continue count = 0 features = [] prelabel = [] for image in images: try: with Image.open( os.path.join(test_path, folder, path, image)) as img: hog_feature = hog(img, orientations=9, pixels_per_cell=(16, 16), cells_per_block=(2, 2), visualize=False) features.append(hog_feature) if args.model == 'logistic': pred = model.predict(hog_feature) if pred > 0: count += 1 if args.model == 'SVM': pred = model.predict([hog_feature])[0] prelabel.append(pred) if pred > 0: count += 1 if args.model == 'CNN': test_dataset = MyDataset( img, transform=transforms.ToTensor()) test_loader = DataLoader(dataset=test_dataset) model.eval() for i, data in enumerate(test_loader, 1): feature, label = data with torch.no_grad(): feature = Variable(feature) out, inter_layer = model(feature) _, pred = torch.max(out, 1) count += (pred == 1).sum().item() if args.model == 'Fisher': pred = model.predict(hog_feature) if pred > 0: count += 1 if pred > 0: tmppath = "detection_results/" + args.model + "/" + folder + '/' + path + "/" if not os.path.exists("detection_results/" + args.model + "/"): os.mkdir("detection_results/" + args.model + "/") if not os.path.exists("detection_results/" + args.model + "/" + folder + '/'): os.mkdir("detection_results/" + args.model + "/" + folder + '/') if not os.path.exists(tmppath): os.mkdir(tmppath) img.save(tmppath + image) except: continue if face_num == count: size = int(path) if size not in accuracy.keys(): accuracy[size] = 0.0 accuracy[size] += 1.0 print("ground_truth: ", face_num, "prediction number: ", count) for key in accuracy.keys(): print("bounding box size: ", key, " accuracy: ", accuracy[key] / total_pic)
) num_epochs = 10 learning_rate = 0.00001 train_CNN = False batch_size = 32 shuffle = True pin_memory = True num_workers = 1 dataset = CatsAndDogsDataset("train","train_csv.csv",transform=transform) train_set, validation_set = torch.utils.data.random_split(dataset,[20000,5000]) train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers,pin_memory=pin_memory) validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers, pin_memory=pin_memory) model = CNN().to(device) criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) for name, param in model.inception.named_parameters(): if "fc.weight" in name or "fc.bias" in name: param.requires_grad = True else: param.requires_grad = train_CNN def check_accuracy(loader, model): if loader == train_loader: print("Checking accuracy on training data") else: print("Checking accuracy on validation data")