def load_model(self): config=self.load_config() vocabproc = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(os.path.join(self.root,"text.vocab")) print("Text Vocabulary Size: {:d}".format(len(vocabproc.vocabulary_))) rcnn=TextRCNN( sequence_length=config["sequence_length"], num_classes=config["classes"], vocab_size=len(vocabproc.vocabulary_), word_embedding_size=config["word_embedding_size"], context_embedding_size=config["context_embedding_size"], cell_type=config["cell_type"], hidden_size=config["hidden_size"], l2_reg_lambda=config["l2_reg_lambda"], W_text_trainable=config["W_text_trainable"] ) self.sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() restore_from_lastest(self.sess,saver,self.root) return rcnn,vocabproc
def main(config): if not os.path.exists(config.model_dir): os.makedirs(config.model_dir) if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) print("\t \t \t the model name is {}".format(config.model_name)) device, n_gpu = get_device() torch.manual_seed(config.seed) np.random.seed(config.seed) torch.manual_seed(config.seed) if n_gpu > 0: torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.deterministic = True # cudnn 使用确定性算法,保证每次结果一样 """ sst2 数据准备 """ text_field = data.Field(tokenize='spacy', lower=True, include_lengths=True, fix_length=config.sequence_length) label_field = data.LabelField(dtype=torch.long) train_iterator, dev_iterator, test_iterator = load_sst2( config.data_path, text_field, label_field, config.batch_size, device, config.glove_word_file, config.cache_path) """ 词向量准备 """ pretrained_embeddings = text_field.vocab.vectors model_file = config.model_dir + 'model1.pt' """ 模型准备 """ if config.model_name == "TextCNN": from TextCNN import TextCNN filter_sizes = [int(val) for val in config.filter_sizes.split()] model = TextCNN.TextCNN(config.glove_word_dim, config.filter_num, filter_sizes, config.output_dim, config.dropout, pretrained_embeddings) elif config.model_name == "TextRNN": from TextRNN import TextRNN model = TextRNN.TextRNN(config.glove_word_dim, config.output_dim, config.hidden_size, config.num_layers, config.bidirectional, config.dropout, pretrained_embeddings) elif config.model_name == "LSTMATT": from LSTM_ATT import LSTMATT model = LSTMATT.LSTMATT(config.glove_word_dim, config.output_dim, config.hidden_size, config.num_layers, config.bidirectional, config.dropout, pretrained_embeddings) elif config.model_name == 'TextRCNN': from TextRCNN import TextRCNN model = TextRCNN.TextRCNN(config.glove_word_dim, config.output_dim, config.hidden_size, config.num_layers, config.bidirectional, config.dropout, pretrained_embeddings) elif config.model_name == "TransformerText": from TransformerText import TransformerText model = TransformerText.TransformerText( config.head_num, config.encode_layer, config.glove_word_dim, config.d_model, config.d_ff, config.output_dim, config.dropout, pretrained_embeddings) optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss() model = model.to(device) criterion = criterion.to(device) if config.do_train: train(config.epoch_num, model, train_iterator, dev_iterator, optimizer, criterion, ['0', '1'], model_file, config.log_dir, config.print_step, 'word') model.load_state_dict(torch.load(model_file)) test_loss, test_acc, test_report = evaluate(model, test_iterator, criterion, ['0', '1'], 'word') print("-------------- Test -------------") print("\t Loss: {} | Acc: {} | Macro avg F1: {} | Weighted avg F1: {}". format(test_loss, test_acc, test_report['macro avg']['f1-score'], test_report['weighted avg']['f1-score']))
def aSampleTest(choose_model): x, y, vocabulary, vocabulary_inv, labelToindex, sentenceToindex, labelNumdict = data_processing.load_input_data( MAXLENGTH) # word2vec预训练权重 weight_array = pickle.load( open(os.path.join(DATA_PATH, 'weight_array'), 'rb')) test_sample_x = '价格公正,物流很快,但有些污垢!' test_sample_y = 1 test_sample_seg = [] # 去除标点符号、数字及字母 punctuation = re.compile( u"[-~!@#$%^&*()_+`=\[\]\\\{\}\"|;':,./<>?·!@#¥%……&*()——+【】、;‘:“”,。、《》?「『」』 ^┻]" ) digit = re.compile(u"[0-9]") number = re.compile(u"[a-zA-Z]") test_sample_x = punctuation.sub("", test_sample_x) test_sample_x = digit.sub("", test_sample_x) test_sample_x = number.sub("", test_sample_x) for word in jieba.cut(test_sample_x): if word not in data_processing.get_stop_words().keys( ) and word in vocabulary.keys(): test_sample_seg.append(word) test_sample_seg_pad = data_processing.pad_sentences([test_sample_seg], MAXLENGTH) test_x, test_y = data_processing.build_input_data(test_sample_seg_pad, test_sample_y, vocabulary) test_x = Variable(torch.LongTensor(test_x)) test_y = Variable(torch.LongTensor(test_y)) if use_cuda: test_x = test_x.cuda() test_y = test_y.cuda() # 选择test的模型 if choose_model == 'TextCNN': model = TextCNN(1, KERNEL_NUM, len(vocabulary), EMBEDDING_DIM, len(labelToindex)) elif choose_model == 'BiLSTM': model = BiLSTM(len(vocabulary), EMBEDDING_DIM, HIDDEN_SIZE, len(labelToindex)) elif choose_model == 'TextCNN_BN': model = TextCNN_BN(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, KERNEL_NUM, len(labelToindex), MAXLENGTH) elif choose_model == 'BiLSTM_b': model = BiLSTM_b(len(vocabulary), EMBEDDING_DIM, HIDDEN_SIZE, len(labelToindex), MAXLENGTH) elif choose_model == 'CNN_BiLSTM_a': model = CNN_BiLSTM_a(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, KERNEL_NUM, HIDDEN_SIZE, len(labelToindex), MAXLENGTH) elif choose_model == 'BiGRU': model = BiGRU(len(vocabulary), EMBEDDING_DIM, HIDDEN_SIZE, len(labelToindex), MAXLENGTH) elif choose_model == 'CNN_with_pretrained_embedding': model = TextCNN_BN_with_pretrained_embed(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, KERNEL_NUM, len(labelToindex), MAXLENGTH, weight_array) elif choose_model == 'TextRCNN': model = TextRCNN(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, HIDDEN_SIZE, KERNEL_NUM, len(labelToindex), MAXLENGTH, weight_array) elif choose_model == 'TextCNN_multi_channel': model = TextCNN_multi_channel(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, KERNEL_NUM, len(labelToindex), MAXLENGTH, weight_array) model.load_state_dict( torch.load(os.path.join(MODEL_PATH, choose_model + '_201807102300.pkl'))) # 日期要变 if use_cuda: model = model.cuda() model_out = model(test_x) # (1, 3) _, pre_y = torch.max(model_out, 1) print("预测的标签为:", pre_y.item())
def solver(mydata, config): #output dir timestamp = time.strftime('%Y-%m-%d-%Hh-%Mm-%Ss') out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) #get RCNN rcnn = TextRCNN(sequence_length=config["sequence_length"], num_classes=mydata.getClasses(), vocab_size=mydata.vocabSize, word_embedding_size=config["word_embedding_size"], context_embedding_size=config["context_embedding_size"], cell_type=config["cell_type"], hidden_size=config["hidden_size"], l2_reg_lambda=config["l2_reg_lambda"], W_text_trainable=config["W_text_trainable"], out_dir=out_dir) ## summary sess = rcnn.sess # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) #save vocab/config/category mydata.saveCategory2Index(os.path.join(out_dir, "category_index")) mydata.vocabproc.save(os.path.join(out_dir, "text.vocab")) Utils.showAndSaveConfig(config, os.path.join(out_dir, "config.txt")) print("[*]parameter number: %s" % (getParameterNumbers())) saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) # Initialize all variables sess.run(tf.global_variables_initializer()) restore_from = config["restore_from"] if restore_from != None: saver.restore(sess, restore_from) print("[*]restore success") # Pre-trained word2vec wordInit = {} if config["LoadGoogleModel"] and restore_from == None: print("[*]Loading Google Pre-trained Model") # initial matrix with random uniform initW = np.random.uniform( -0.25, 0.25, (mydata.vocabSize, config["word_embedding_size"])) # load any vectors from the word2vec word2vec = config["Word2Vec"] print(" [*]Load word2vec file {0}".format(word2vec)) cnt_word_in_word2vec = 0 with open(word2vec, "rb") as f: header = f.readline() vocab_size, layer1_size = map(int, header.split()) print(" [*]Google:vocab_size:%s" % (vocab_size)) binary_len = np.dtype('float32').itemsize * layer1_size for line in range(vocab_size): word = [] while True: ch = f.read(1).decode('latin-1') if ch == ' ': word = ''.join(word) break if ch != '\n': word.append(ch) idx = mydata.vocabproc.vocabulary_.get(word.lower()) if idx != 0: if idx not in wordInit: wordInit[idx] = word initW[idx] = np.fromstring(f.read(binary_len), dtype='float32') cnt_word_in_word2vec += 1 elif word == word.lower(): wordInit[idx] = word initW[idx] = np.fromstring(f.read(binary_len), dtype='float32') else: f.read(binary_len) print( " [*]Load Google Model success: word in Word2Vec :%s total word:%s" % (cnt_word_in_word2vec, mydata.vocabSize)) sess.run(rcnn.W_text.assign(initW)) print("[*]Success to load pre-trained word2vec model!\n") # start traning # step && learning rate stlr = STLR(1e-3, 1e-2, 200, 600) step = 0 while True: batch = mydata.nextBatch(config["BatchSize"]) learning_r = stlr.getLearningRate(step) feed_dict = { rcnn.input_text: batch[0], rcnn.input_y: batch[1], rcnn.dropout_keep_prob: config["droupout"], rcnn.learning_rate: learning_r } _, step, summaries, loss, accuracy = sess.run([ rcnn.train_op, rcnn.global_step, rcnn.train_summary_op, rcnn.loss, rcnn.accuracy ], feed_dict) rcnn.summary_writer.add_summary(summaries, step) # Training log display if step % config["TraingLogEverySteps"] == 0: time_str = datetime.datetime.now().isoformat() print(" [*] step %s; loss %s; acc %s; lr %.6f " % (step, loss, accuracy, learning_r)) # Evaluation if step % config["TestEverySteps"] == 0: test_data = mydata.getTestData() test_size = len(test_data[0]) correct_predict_count = 0 dev_loss = 0 for i in range(0, test_size, 500): x_test = test_data[0][i:i + 500] y_test = test_data[1][i:i + 500] feed_dict_dev = { rcnn.input_text: x_test, rcnn.input_y: y_test, rcnn.dropout_keep_prob: 1.0 } summaries_dev, loss, accuracy = sess.run( [rcnn.dev_summary_op, rcnn.loss, rcnn.accuracy], feed_dict_dev) #rcnn.summary_writer.add_summary(summaries_dev, step) # correct_predict_count += int(0.5 + accuracy * len(x_test)) dev_loss += loss * len(x_test) / test_size #dev summary dev_accuracy = correct_predict_count / test_size rcnn.summary_writer.add_summary( tf.Summary(value=[ tf.Summary.Value(tag="dev_loss", simple_value=dev_loss) ]), step) rcnn.summary_writer.add_summary( tf.Summary(value=[ tf.Summary.Value(tag="dev_accu", simple_value=dev_accuracy) ]), step) time_str = datetime.datetime.now().isoformat() print("\n[*]Test:%s step %s, loss %.6f, acc %.6f " % (time_str, step, dev_loss, dev_accuracy)) # Model checkpoint if step % 1000 == 0: path = saver.save(sess, checkpoint_prefix, global_step=step) print("Saved model checkpoint to {}\n".format(path))
def getBadCases(choose_model): badcases_contents = [] badcases_scores = [] badcases_true_labels = [] badcases_pred_labels = [] x, y, vocabulary, vocabulary_inv, labelToindex, _, labelNumdict = data_processing.load_input_data( MAXLENGTH) # word2vec预训练权重 weight_array = pickle.load( open(os.path.join(DATA_PATH, 'weight_array'), 'rb')) # 选择test的模型 if choose_model == 'TextCNN': model = TextCNN(1, KERNEL_NUM, len(vocabulary), EMBEDDING_DIM, len(labelToindex)) elif choose_model == 'BiLSTM': model = BiLSTM(len(vocabulary), EMBEDDING_DIM, HIDDEN_SIZE, len(labelToindex)) elif choose_model == 'TextCNN_BN': model = TextCNN_BN(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, KERNEL_NUM, len(labelToindex), MAXLENGTH, weight_array=None) elif choose_model == 'BiLSTM_b': model = BiLSTM_b(len(vocabulary), EMBEDDING_DIM, HIDDEN_SIZE, len(labelToindex), MAXLENGTH) elif choose_model == 'CNN_BiLSTM_a': model = CNN_BiLSTM_a(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, KERNEL_NUM, HIDDEN_SIZE, len(labelToindex), MAXLENGTH) elif choose_model == 'CNN_with_pretrained_embedding': model = TextCNN_BN_with_pretrained_embed(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, KERNEL_NUM, len(labelToindex), MAXLENGTH, weight_array) elif choose_model == 'TextRCNN': model = TextRCNN(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, HIDDEN_SIZE, KERNEL_NUM, len(labelToindex), MAXLENGTH, weight_array) elif choose_model == 'TextCNN_multi_channel': model = TextCNN_multi_channel(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, KERNEL_NUM, len(labelToindex), MAXLENGTH, weight_array) model.load_state_dict( torch.load(os.path.join(MODEL_PATH, choose_model + '_201807110957.pkl'))) # 日期要变 if use_cuda: model = model.cuda() print("Model loaded!") # 所有样本 all_samples = pd.read_csv(os.path.join(DATA_PATH, 'all_labeled_datas.csv')) all_samples_contents = all_samples['content'] all_samples_scores = all_samples['score'] all_samples_labels = all_samples['label'] all_samples_pro_contents = [] all_samples_pro_scores = [] all_samples_pro_labels = [] for content, score, label in zip(all_samples_contents, all_samples_scores, all_samples_labels): punctuation = re.compile( u"[-~!@#$%^&*()_+`=\[\]\\\{\}\"|;':,./<>?·!@#¥%……&*()——+【】、;‘:“”,。、《》?「『」』 ]" ) digit = re.compile(u"[0-9]") number = re.compile(u"[a-zA-Z]") content = punctuation.sub('', content) content = digit.sub("", content) content = number.sub("", content) if content != '': all_samples_pro_contents.append(content) all_samples_pro_scores.append(score) all_samples_pro_labels.append(label) all_pro_seg_contents = [] all_pro_seg_scores = [] all_pro_seg_labels = [] sentenceToindex = {} for content, score, label in zip(all_samples_pro_contents, all_samples_pro_scores, all_samples_pro_labels): seg_content = jieba.cut(content) seg_con = [] for word in seg_content: if word not in data_processing.get_stop_words().keys( ) and word in vocabulary.keys(): seg_con.append(word) # 文本去重 tmpSentence = ''.join(seg_con) if tmpSentence != '': if tmpSentence in sentenceToindex: continue else: sentenceToindex[tmpSentence] = len(sentenceToindex) all_pro_seg_contents.append(seg_con) all_pro_seg_scores.append(score) all_pro_seg_labels.append(label) for i, ct in enumerate(all_pro_seg_contents): ct_pad = data_processing.pad_sentences([ct], MAXLENGTH) input_x, input_y = data_processing.build_input_data( ct_pad, all_pro_seg_labels[i], vocabulary) input_x = Variable(torch.LongTensor(input_x)) input_y = Variable(torch.LongTensor(input_y)) if use_cuda: input_x = input_x.cuda() input_y = input_y.cuda() model_out = model(input_x) _, pre_y = torch.max(model_out, 1) if pre_y.item() != input_y.item(): badcases_contents.append(' '.join(all_pro_seg_contents[i])) badcases_scores.append(all_pro_seg_scores[i]) badcases_true_labels.append(all_pro_seg_labels[i]) badcases_pred_labels.append(pre_y.item()) dataframe = pd.DataFrame({ "content": badcases_contents, "user_score": badcases_scores, "true_label": badcases_true_labels, "pred_label": badcases_pred_labels }) dataframe.to_csv(os.path.join(DATA_PATH, 'badcases.csv'), index=False, sep=',') print("Badcases done!")
def train(config): print('parameters:') print(config) # load data print('load data') X, y = data_helper.process_data(config) # X=[[seq1],[seq2]] y=[,,,,] # make vocab print('make vocab...') word2index, label2index = data_helper.generate_vocab(X, y, config) # padding data print('padding data') input_x, input_y = data_helper.padding(X, y, config, word2index, label2index) # split data print('split data...') x_train, y_train, x_test, y_test, x_dev, y_dev = data_helper.split_data( input_x, input_y, config) print('length train: {}'.format(len(x_train))) print('length test: {}'.format(len(x_test))) print('length dev: {}'.format(len(x_dev))) print('training...') with tf.Graph().as_default(): sess_config = tf.ConfigProto( allow_soft_placement=config['allow_soft_placement'], log_device_placement=config['log_device_placement']) with tf.Session(config=sess_config) as sess: rcnn = TextRCNN(config) # training procedure global_step = tf.Variable(0, name='globel_step', trainable=False) train_op = tf.train.AdamOptimizer(config['learning_rate']).minimize( rcnn.loss, global_step=global_step) # output dir for models timestamp = str(int(time.time())) outdir = os.path.abspath( os.path.join(os.path.curdir, 'runs', timestamp)) if not os.path.exists(os.path.join(os.path.curdir, 'runs')): os.mkdir(os.path.join(os.path.curdir, 'runs')) if not os.path.exists(outdir): os.mkdir(outdir) print('writing to {}'.format(outdir)) # checkpoint dictory checkpoint_dir = os.path.abspath(os.path.join(outdir, 'checkpoints')) checkpoint_prefix = os.path.join(checkpoint_dir, 'model') if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=config['num_checkpoints']) sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): feed_dict = { rcnn.input_x: x_batch, rcnn.input_y: y_batch, rcnn.dropout_keep_prob: config['dropout_keep_prob'] } _, step, loss, accuracy = sess.run( [train_op, global_step, rcnn.loss, rcnn.accuracy], feed_dict=feed_dict) time_str = datetime.datetime.now().isoformat() print('{}: step {}, loss {}, acc {}'.format( time_str, step, loss, accuracy)) def dev_step(x_batch, y_batch): feed_dict = { rcnn.input_x: x_batch, rcnn.input_y: y_batch, rcnn.dropout_keep_prob: 1.0 } step, loss, accuracy = sess.run( [global_step, rcnn.loss, rcnn.accuracy], feed_dict=feed_dict) time_str = datetime.datetime.now().isoformat() print('{}: step {}, loss {}, acc {}'.format( time_str, step, loss, accuracy)) # generate batches batches = data_helper.generate_batchs(x_train, y_train, config) for batch in batches: x_batch, y_batch = zip(*batch) print(y_batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % config['evaluate_every'] == 0: print('Evaluation:') dev_step(x_dev, y_dev) if current_step % config['checkpoint_every'] == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print('save model checkpoint to {}'.format(path)) # test accuracy test_accuracy = sess.run( [rcnn.accuracy], feed_dict={ rcnn.input_x: x_test, rcnn.input_y: y_test, rcnn.dropout_keep_prob: 1.0 }) print('Test dataset accuracy: {}'.format(test_accuracy))
def train_and_test(choose_model): # 写入日志文件 logger = logging.getLogger(__name__) logger.setLevel(level=logging.DEBUG) runTime = time.strftime('%Y%m%d%H%M', time.localtime(time.time())) handler = logging.FileHandler('./logs/'+choose_model+'_'+runTime+'.log.txt') handler.setLevel(level=logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) logger.info("************************************************************") x, y, vocabulary, vocabulary_inv, labelToindex, sentenceToindex, labelNumdict = data_processing.load_input_data(MAXLENGTH) logger.info("The number of samples is: {}".format(len(sentenceToindex))) logger.info("The distribution of the all dataset label(With: 0-bad, 1-mid, 2-good):{}".format(labelNumdict)) # word2vec预训练权重 weight_array = pickle.load(open(os.path.join(DATA_PATH, 'weight_array'), 'rb')) train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, random_state=42) print("Train Sample's distribution: {}".format(data_processing.get_labelNumdict(train_y))) print("Test Sample's distribution: {}".format(data_processing.get_labelNumdict(test_y))) logger.info("Train Sample's distribution: {}".format(data_processing.get_labelNumdict(train_y))) logger.info("Test Sample's distribution: {}".format(data_processing.get_labelNumdict(test_y))) logger.info("Some hyperparameters with lr:{}, wd:{}, embed:{}".format(LEARNING_RATE, WEIGHT_DECAY, EMBEDDING_DIM)) train_x = torch.LongTensor(train_x) test_x = torch.LongTensor(test_x) train_y = torch.LongTensor(train_y) test_y = torch.LongTensor(test_y) trainDataset = data_processing.JDataset(train_x, train_y) testDataset = data_processing.JDataset(test_x, test_y) trainDataLoader = DataLoader(trainDataset, batch_size=BATCH_SIZE, shuffle=True) testDataLoader = DataLoader(testDataset, batch_size=BATCH_SIZE, shuffle=False) # 选择训练的模型 if choose_model == 'TextCNN': model = TextCNN(1, KERNEL_NUM, len(vocabulary), EMBEDDING_DIM, len(labelToindex)) elif choose_model == 'BiLSTM': model = BiLSTM(len(vocabulary), EMBEDDING_DIM, HIDDEN_SIZE, len(labelToindex)) elif choose_model == 'TextCNN_BN': model = TextCNN_BN(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, KERNEL_NUM, len(labelToindex), MAXLENGTH, weight_array=None) elif choose_model == 'BiLSTM_b': model = BiLSTM_b(len(vocabulary), EMBEDDING_DIM, HIDDEN_SIZE, len(labelToindex), MAXLENGTH, weight_array=weight_array) elif choose_model == 'CNN_BiLSTM_a': model = CNN_BiLSTM_a(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, KERNEL_NUM, HIDDEN_SIZE, len(labelToindex), MAXLENGTH, weight_array=weight_array) elif choose_model == 'BiGRU': model = BiGRU(len(vocabulary), EMBEDDING_DIM, HIDDEN_SIZE, len(labelToindex), MAXLENGTH) elif choose_model == 'CNN_with_pretrained_embedding': model = TextCNN_BN_with_pretrained_embed(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, KERNEL_NUM, len(labelToindex), MAXLENGTH, weight_array) elif choose_model == 'TextRCNN': model = TextRCNN(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, HIDDEN_SIZE, KERNEL_NUM, len(labelToindex), MAXLENGTH, weight_array) elif choose_model == 'TextCNN_multi_channel': model = TextCNN_multi_channel(len(vocabulary), EMBEDDING_DIM, KERNEL_SIZES, KERNEL_NUM, len(labelToindex), MAXLENGTH, weight_array) elif choose_model == 'Attention_rnn': model = Attention_RNN_model(len(vocabulary), EMBEDDING_DIM, HIDDEN_SIZE, len(labelToindex), weight_array) # 打印模型信息 print(model) logger.info(model) if use_cuda: model = model.cuda() # 损失函数 parameters = filter(lambda p: p.requires_grad, model.parameters()) criterion = nn.CrossEntropyLoss() optimzier = optim.Adam(parameters, lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) best_acc = 0 best_model = None for step in range(N_STEPS): model.train() train_loss = 0.0 train_acc = 0 for i, data in enumerate(trainDataLoader): tr_x, tr_y = data #print("Tr_X's size is: ", tr_x.size()) #print("Tr_Y size: ", tr_y.size()) if use_cuda: tr_x = Variable(tr_x).cuda() tr_y = Variable(tr_y).cuda() else: tr_x = Variable(tr_x) tr_y = Variable(tr_y) # forward out = model(tr_x) loss = criterion(out, tr_y) train_loss += loss.item() * len(tr_y) _, pre = torch.max(out, 1) #print("***", pre.size()) #print(pre) num_acc = (pre==tr_y).sum() train_acc += num_acc.item() #print(train_acc) # backward optimzier.zero_grad() loss.backward() optimzier.step() if (i+1) % 100 == 0: print('[{}/{}], train loss is: {:.6f}, train acc is: {:.6f}'.format(i, len(trainDataLoader), train_loss/(i*BATCH_SIZE), train_acc/(i*BATCH_SIZE))) logger.info('[{}/{}], train loss is: {:.6f}, train acc is: {:.6f}'.format(i, len(trainDataLoader), train_loss/(i*BATCH_SIZE), train_acc/(i*BATCH_SIZE))) print('Step:[{}], train loss is: {:.6f}, train acc is: {:.6f}'.format(step, train_loss/(len(trainDataLoader)*BATCH_SIZE), train_acc/(len(trainDataLoader)*BATCH_SIZE))) logger.info('Step:[{}], train loss is: {:.6f}, train acc is: {:.6f}'.format(step, train_loss / (len(trainDataLoader) * BATCH_SIZE), train_acc / (len(trainDataLoader) * BATCH_SIZE))) model.eval() eval_loss = 0 eval_acc = 0 for i, data in enumerate(testDataLoader): te_x, te_y = data if use_cuda: te_x = Variable(te_x).cuda() te_y = Variable(te_y).cuda() else: te_x = Variable(te_x) te_y = Variable(te_y) out = model(te_x) loss = criterion(out, te_y) eval_loss += loss.item() * len(te_y) _, pre = torch.max(out, 1) num_acc=(pre==te_y).sum() eval_acc += num_acc.item() print('test loss is: {:.6f}, test acc is: {:.6f}'.format(eval_loss/(len(testDataLoader)*BATCH_SIZE), eval_acc/(len(testDataLoader)*BATCH_SIZE))) logger.info('test loss is: {:.6f}, test acc is: {:.6f}'.format(eval_loss / (len(testDataLoader) * BATCH_SIZE), eval_acc / (len(testDataLoader) * BATCH_SIZE))) if best_acc < (eval_acc/(len(testDataLoader)*BATCH_SIZE)): best_acc = eval_acc/(len(testDataLoader)*BATCH_SIZE) best_model = model.state_dict() print('best acc is {:.6f}, best model is changed.'.format(best_acc)) logger.info('best acc is {:.6f}, best model is changed.'.format(best_acc)) logger.info("Best acc is: {}".format(best_acc)) logger.info("************************************************************") torch.save(model.state_dict(), os.path.join(MODEL_PATH, choose_model+'_'+runTime+'.pkl'))
random.seed(2020) np.random.seed(2020) torch.manual_seed(2020) torch.cuda.manual_seed(2020) device = torch.device("cuda:0") training_set = MyDataset(url_train_data) train_loader = data.DataLoader(dataset=training_set, batch_size=128, shuffle=True) valid_data = MyDataset(url_valid_data) valid_loader = DataLoader(dataset=valid_data, batch_size=128) model = TextRCNN() if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adagrad(model.parameters(), lr=0.01) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1) # Train the model for epoch in range(20): loss_mean = 0 correct = 0 total = 0 total0 = 0