def generate_tags(dataset, title, body): # Data Preparation # ================================================== path = os.path.join('model', dataset) text = data_helpers.preprocess(title,body) x_text = [data_helpers.clean_str(text)] # Restore vocab file vocab_processor = learn.preprocessing.VocabularyProcessor.restore(os.path.join( path, 'vocab')) x = np.array(list(vocab_processor.fit_transform(x_text))) tags_df = pd.read_csv(os.path.join(path,'tags_df.csv'), encoding='utf8', index_col=0) tag_list = tags_df['TagName'].tolist() # prediction # ================================================== with tf.Graph().as_default(): session_conf = tf.compat.v1.ConfigProto( allow_soft_placement=True, log_device_placement=False, intra_op_parallelism_threads=3, inter_op_parallelism_threads=3) sess = tf.compat.v1.Session(config=session_conf) with sess.as_default(): rcnn = RCNN( num_classes=len(tag_list), vocab_size=len(vocab_processor.vocabulary_), embedding_size=100, hidden_units=100, context_size=50, max_sequence_length=x.shape[1]) # l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.compat.v1.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(rcnn.loss) train_op = optimizer.apply_gradients( grads_and_vars, global_step=global_step) saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables()) # Loading checkpoint save_path = os.path.join(path, "model") saver.restore(sess, save_path) # predict sequence_length = [len(sample) for sample in x] feed_dict = { rcnn.X: x, rcnn.sequence_length: sequence_length, # rcnn.max_sequence_length: max_sequence_length, rcnn.dropout_keep_prob: 1.0 } prediction = sess.run([rcnn.predictions],feed_dict)[0][0] idx = prediction.argsort()[-5:][::-1] tags = [tag_list[i] for i in idx] return tags
def __init__(self, in_features, hidden_size, batch_first: bool = True, bidirectional: bool = False): super(RCNNEncoder, self).__init__() assert batch_first, "only batch_first=True supported" self.rcnn = RCNN(in_features, hidden_size, bidirectional=bidirectional)
def train(): ''' Main function for training and simple evaluation. ''' # data loading threads train_produce_thread = Thread(target=TRAIN_DATASET.load, args=(False, )) train_produce_thread.start() test_produce_thread = Thread(target=TEST_DATASET.load, args=(False, )) test_produce_thread.start() with tf.Graph().as_default(): with tf.device('/gpu:' + str(GPU_INDEX)): # Note the global_step=batch parameter to minimize. # That tells the optimizer to increment the 'batch' parameter # for you every time it trains. batch = tf.get_variable('batch', [], initializer=tf.constant_initializer(0), trainable=False) bn_decay = get_bn_decay(batch) tf.summary.scalar('bn_decay', bn_decay) # Get model and losses rcnn_model = RCNN(BATCH_SIZE, NUM_POINT, TRAIN_DATASET.num_channel, bn_decay=bn_decay, is_training=True) placeholders = rcnn_model.placeholders end_points = rcnn_model.end_points loss, loss_endpoints = rcnn_model.get_loss() iou2ds, iou3ds = tf.py_func(train_util.compute_box3d_iou, [ tf.expand_dims(end_points['box_corners'], 1), tf.expand_dims(placeholders['gt_box_of_prop'], 1), tf.expand_dims( tf.to_int32(tf.equal(placeholders['class_labels'], 0)) * tf.constant(-1), 1) ], [tf.float32, tf.float32]) # Get training operator learning_rate = get_learning_rate(batch) tf.summary.scalar('learning_rate', learning_rate) if OPTIMIZER == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM) elif OPTIMIZER == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) # Note: when training, the moving_mean and moving_variance need to be updated. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=batch) ''' train_op = slim.learning.create_train_op( loss, optimizer, clip_gradient_norm=1.0, global_step=batch) ''' # Add ops to save and restore all the variables. saver = tf.train.Saver() # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config=config) # Add summary writers merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph) test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'), sess.graph) # Init variables if FLAGS.restore_model_path is None: init = tf.global_variables_initializer() sess.run(init) else: saver.restore(sess, FLAGS.restore_model_path) ops = { 'loss': loss, 'train_op': train_op, 'step': batch, 'merged': merged, 'iou2ds': iou2ds, 'iou3ds': iou3ds, 'loss_endpoints': loss_endpoints, 'end_points': end_points } for epoch in range(MAX_EPOCH): log_string('**** EPOCH %03d ****' % (epoch)) sys.stdout.flush() # eval iou and recall is slow #eval_iou_recall = (epoch % 10 == 0 and epoch != 0) eval_iou_recall = True train_one_epoch(sess, ops, placeholders, train_writer) save_path = saver.save( sess, os.path.join(LOG_DIR, "model.ckpt.%03d" % epoch)) log_string("Model saved in file: {0}".format(save_path)) val_loss = eval_one_epoch(sess, ops, placeholders, test_writer) TRAIN_DATASET.stop_loading() train_produce_thread.join() TEST_DATASET.stop_loading() test_produce_thread.join()
def test(): ''' Main function for training and simple evaluation. ''' result_dir = FLAGS.output # data loading threads test_produce_thread = Thread(target=TEST_DATASET.load, args=(False,)) test_produce_thread.start() is_training = False with tf.Graph().as_default(): with tf.device('/gpu:'+str(GPU_INDEX)): rcnn_model = RCNN(BATCH_SIZE, NUM_POINT, TEST_DATASET.num_channel, is_training=is_training) pls = rcnn_model.placeholders # Get model and losses end_points = rcnn_model.end_points loss, loss_endpoints = rcnn_model.get_loss() # Add ops to save and restore all the variables. saver = tf.train.Saver() # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config=config) saver.restore(sess, FLAGS.model_path) objects = {} boxes = [] while(True): batch_data, is_last_batch = TEST_DATASET.get_next_batch(BATCH_SIZE) feed_dict = { pls['pointclouds']: batch_data['pointcloud'], pls['img_inputs']: batch_data['images'], pls['img_seg_map']: batch_data['img_seg_map'], pls['calib']: batch_data['calib'], pls['proposal_boxes']: batch_data['prop_box'], pls['class_labels']: batch_data['label'], pls['center_bin_x_labels']: batch_data['center_x_cls'], pls['center_bin_z_labels']: batch_data['center_z_cls'], pls['center_x_res_labels']: batch_data['center_x_res'], pls['center_y_res_labels']: batch_data['center_y_res'], pls['center_z_res_labels']: batch_data['center_z_res'], pls['heading_bin_labels']: batch_data['angle_cls'], pls['heading_res_labels']: batch_data['angle_res'], pls['size_class_labels']: batch_data['size_cls'], pls['size_res_labels']: batch_data['size_res'], pls['gt_box_of_prop']: batch_data['gt_box_of_prop'], pls['is_training_pl']: is_training } cls_logits, box_center, box_angle, box_size, box_score, corners = \ sess.run([end_points['cls_logits'], end_points['box_center'], end_points['box_angle'], end_points['box_size'], end_points['box_score'], end_points['box_corners']], feed_dict=feed_dict) cls_val = np.argmax(cls_logits, axis=-1) correct = np.sum(cls_val == batch_data['label']) for i in range(BATCH_SIZE): if type_list[cls_val[i]] == 'NonObject': #print('NonObject') continue idx = int(batch_data['ids'][i]) size = box_size[i] angle = box_angle[i] center = box_center[i] box_corner = corners[i] score = box_score[i] obj = DetectObject(size[1],size[2],size[0],center[0],center[1],center[2],angle,idx,type_list[cls_val[i]],score) # dont't use batch_data['calib'] which is for resized image calib = get_calibration(idx).P box3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib) if box3d_pts_2d is None: print('box3d_pts_2d is None') continue x1 = np.amin(box3d_pts_2d, axis=0)[0] y1 = np.amin(box3d_pts_2d, axis=0)[1] x2 = np.amax(box3d_pts_2d, axis=0)[0] y2 = np.amax(box3d_pts_2d, axis=0)[1] obj.box_2d = [x1,y1,x2,y2] obj.box_3d = box3d_pts_3d if idx not in objects: objects[idx] = [] objects[idx].append(obj) boxes.append(corners[i]) if is_last_batch: break TEST_DATASET.stop_loading() test_produce_thread.join() with open('rcnn_out.pkl','wb') as fp: pickle.dump(objects, fp) objects = nms_on_bev(objects, 0.01) # Write detection results for KITTI evaluation write_detection_results(result_dir, objects) output_dir = os.path.join(result_dir, 'data') print('write detection results to ' + output_dir) # Make sure for each frame (no matter if we have measurment for that frame), # there is a TXT file to_fill_filename_list = ['%06d.txt'%(int(frame_id)) \ for frame_id in TEST_DATASET.frame_ids] fill_files(output_dir, to_fill_filename_list)
'num_feature_channel': 256, 'num_fc7_channel': 512, 'num_rpn_channel': 512, 'num_anchor': 9, 'score_top_n': 100, 'nms_top_n': 50, 'nms_thresh': 0.7, 'pool_out_size': 8, 'num_class': 5, # 这里的标签数量包含背景(0),前景类别从 1 开始。 'radios': (0.5, 1, 2), 'scales': (4, 8, 16), # 'scales': (8, 16, 32), } n = 2 model = RCNN(feature_extractor, conv_to_head, config) image = torch.rand((n, 3, 128, 128)) gt_bbox = torch.randint(0, 8 * 16, (n, 8, 4)).sort(dim=2)[0].float() gt_label = torch.randint(1, config['num_class'], (n, 8)) model.rpn_mode() rpn_score, rpn_bbox = model.forward(image) print(rpn_score.shape, rpn_bbox.shape) rpn_loss = rpn_loss_layer.compute(model, rpn_score, rpn_bbox, gt_label, gt_bbox) rpn_loss.backward() model.total_mode() score, bbox = model.forward(image) print(score.shape, bbox.shape) rcnn_loss = rcnn_loss_layer.compute(model, score, bbox, gt_label, gt_bbox)
print('Loading data...') (x_train, x_test), (y_train, y_test) = data_preprocess(origin_filename) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Prepare input for model...') x_train_left = x_train x_train_right = x_train.reverse() x_test_left = x_test x_test_right = x_test.reverse() print('x_train_left shape:', np.array(x_train_left).shape) print('x_train_right shape:', np.array(x_train_right).shape) print('x_test_left shape:', np.array(x_test_left).shape) print('x_test_right shape:', np.array(x_test_right).shape) print('Build model...') model = RCNN(max_len, max_features, embedding_dims).get_model() model.compile('adam', 'categorical_crossentropy', metrics=['accuracy']) print('Train...') early_stopping = EarlyStopping(monitor='val_acc', patience=3, mode='max') model.fit([x_train_left, x_train_right], y_train, batch_size=batch_size, epochs=epochs, callbacks=[early_stopping], validation_data=([x_test_left, x_test_right], y_test)) print('Test...') result = model.predict([x_test_left, x_test_right])
def test(): cuda = True test_dataset = custom_dataset(split='test') test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=False) #resnet = Resnet101().eval() resnet = resnet101() rpn = RPN() rcnn = RCNN() if cuda: resnet = resnet.cuda() rpn = rpn.cuda() rcnn = rcnn.cuda() rpn_check_point = torch.load( '/home/licheng/home/licheng/projects/cnet/data/cnet.model.state.19.pkl' ) rpn.load_state_dict(rpn_check_point['rpn']) resnet.load_state_dict(rpn_check_point['resnet']) rcnn_check_point = torch.load( "/home/licheng/home/licheng/projects/cnet/data/rcnn/rcnn_epoch_19.params" ) rcnn.load_state_dict(rcnn_check_point['rcnn']) """ rpn_check_point = torch.load('/home/licheng/home/licheng/projects/cnet/data/rpn/rpn_epoch_19.params') #resnet.load_state_dict(check_point['resnet']) rpn.load_state_dict(rpn_check_point['rpn']) #resnet.load_state_dict(check_point['resnet']) rcnn_check_point = torch.load('/home/licheng/home/licheng/projects/cnet/data/rcnn/rcnn_epoch_16.params') rcnn.load_state_dict(rcnn_check_point['rcnn']) """ pred_bboxes = list() pred_labels = list() pred_scores = list() gt_boxes = list() gt_labels = list() rcnn_target_creator = RCNNTargetCreator() with torch.no_grad(): for img_batch, bndboxes_batch, labels_batch in test_data_loader: img, bndboxes, labels = img_batch, bndboxes_batch[0], labels_batch[ 0] if cuda: img, bndboxes, labels = img.cuda(), bndboxes.cuda( ), labels.cuda() feature = resnet(img.float()) #if cuda: # feature = feature.cuda() rois, anchors, rpn_loc, rpn_score = rpn(feature, feature_stride=16) sample_roi, gt_roi_label, gt_roi_loc = rcnn_target_creator( rois, bndboxes.cpu().numpy(), labels) rois = at.toTensor(rois) roi_cls_loc, roi_score = rcnn(rois, feature) look_score1 = np.array(roi_score.cpu().detach()) pred_score = F.softmax(roi_score, dim=1) look_score1 = np.array(pred_score.cpu().detach()) pred_score = pred_score.cpu().detach().numpy() mean = torch.Tensor( (0., 0., 0., 0.)).repeat(cfg.n_class)[None].cuda() std = torch.Tensor( (0.1, 0.1, 0.2, 0.2)).repeat(cfg.n_class)[None].cuda() roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = at.toTensor(roi_cls_loc) roi_cls_loc = roi_cls_loc.view(-1, cfg.n_class, 4) rois = rois.view(-1, 1, 4).expand_as(roi_cls_loc) # expand dim as loc #rois = rois.reshape(-1, 1, 4)[:, [int(x) for x in np.zeros(cfg.n_class).tolist()], :] #roi_cls_loc = at.toTensor(roi_cls_loc) #roi_cls_loc = roi_cls_loc.view(roi_cls_loc.shape[0], -1, 4) #pred_box = loc2bbox(at.toNumpy(rois).reshape(-1, 4), roi_cls_loc.view(-1, 4).cpu().detach().numpy()) pred_box = loc2bbox( at.toNumpy(rois).reshape(-1, 4), roi_cls_loc.view(-1, 4).cpu().detach().numpy()) # clip box pred_box[:, 0::2] = np.clip(pred_box[:, 0::2], 0, img.shape[3]) pred_box[:, 1::2] = np.clip(pred_box[:, 1::2], 0, img.shape[2]) gt_box = list(bndboxes_batch.cpu().numpy()) gt_label = list(labels_batch.cpu().numpy()) bbox = list() label = list() score = list() for class_index in range(1, cfg.n_class): each_bbox = pred_box.reshape( (-1, cfg.n_class, 4))[:, class_index, :] each_score = pred_score[:, class_index] mask = each_score > cfg.pred_score_thresh each_bbox = each_bbox[mask] each_score = each_score[mask] keep = nms(each_bbox, each_score, cfg.pred_nms_thresh) bbox.append(each_bbox[keep]) score.append(each_score[keep]) label.append(class_index * np.ones((len(keep), ))) bbox = np.concatenate(bbox, axis=0).astype(np.float32) score = np.concatenate(score, axis=0).astype(np.float32) label = np.concatenate(label, axis=0).astype(np.int32) print('gt_info:', gt_box, gt_label) print('sample roi', sample_roi[0]) print('predict info:', bbox, score, label) pred_bboxes += [bbox] pred_scores += [score] pred_labels += [label] gt_boxes += gt_box gt_labels += gt_label result = calc_map(pred_bboxes, pred_labels, pred_scores, gt_boxes, gt_labels) print(result)
'feature_stride': 16, 'feature_compress': 1 / 16, 'num_feature_channel': 256, 'num_fc7_channel': 512, 'num_rpn_channel': 512, 'num_anchor': 9, 'score_top_n': 100, 'nms_top_n': 50, 'nms_thresh': 0.7, 'pool_out_size': 8, 'num_class': 5, 'radios': (0.5, 1, 2), 'scales': (8, 16, 32), } model_ = RCNN(feature_extractor, conv_to_head, config) n_ = 10 image = torch.rand((n_, 3, 128, 128)) model_.total_mode() # score_(k, num_cls) delta_(k, num_cls, 4) score_, delta_ = model_.forward(image) print(score_.shape, delta_.shape) gt_bbox_ = torch.randint(0, 8 * 16, (n_, 8, 4)).sort(dim=2)[0].float() gt_label_ = torch.randint(1, config['num_class'], (n_, 8)) label_list_, fg_delta_list_, index_list_ = annotate_proposals(model_.rois_list, gt_bbox_, gt_label_) print(label_list_[0].shape, fg_delta_list_[0].shape, index_list_[0][0].shape, index_list_[0][1].shape) t = time.time() loss_ = rcnn_loss_layer(score_, delta_, label_list_, fg_delta_list_, index_list_)
print('generate context format...') x_train_current = x_train x_train_left = np.hstack( [np.expand_dims(x_train[:, 0], axis=1), x_train[:, 0:-1]]) x_train_right = np.hstack( [x_train[:, 1:], np.expand_dims(x_train[:, -1], axis=1)]) x_test_current = x_test x_test_left = np.hstack( [np.expand_dims(x_test[:, 0], axis=1), x_train[:, 0:-1]]) x_test_right = np.hstack( [x_test[:, 1:], np.expand_dims(x_test[:, -1], axis=1)]) print('x train shape: ', x_train_current.shape) print('x train left shape: ', x_train_left.shape) print('x train right shape: ', x_train_right.shape) print('build model...') model = RCNN(maxlen, max_features, emb_dim).get_model() model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) print('training ...') earlystop = EarlyStopping(monitor='val_acc', patience=3, mode='max') model.fit([x_train_current, x_train_left, x_train_right], y_train, batch_size=batch_size, epochs=epochs, callbacks=[earlystop], validation_data=([x_test_current, x_test_left, x_test_right], y_test))
def main(dataset): # Data Preparation # ================================================== path = os.path.join('transformed_data', dataset) if not os.path.exists(path): os.makedirs(path) # Load data x_text, y = data_helpers.load_data(os.path.join('community-data',dataset,'50')) y = np.array(y).astype(float) # Build vocabulary max_document_length = np.percentile( [len(x.split(" ")) for x in x_text], 50, interpolation='lower') vocab_processor = learn.preprocessing.VocabularyProcessor( max_document_length) x = np.array(list(vocab_processor.fit_transform(x_text))) # Write vocabulary vocab_processor.save(os.path.join(path,"vocab")) # Split train/test set x_train_dev, x_test, y_train_dev, y_test = train_test_split( x, y, test_size=0.1, random_state=42) x_train, x_dev, y_train, y_dev = train_test_split( x_train_dev, y_train_dev, test_size=0.1, random_state=42) # print(x_train.shape, y_train.shape) with open(os.path.join(path,'x_train.pickle'), 'wb') as output: pickle.dump(x_train, output) with open(os.path.join(path,'y_train.pickle'), 'wb') as output: pickle.dump(y_train, output) with open(os.path.join(path, 'x_dev.pickle'), 'wb') as output: pickle.dump(x_dev, output) with open(os.path.join(path, 'y_dev.pickle'), 'wb') as output: pickle.dump(y_dev, output) with open(os.path.join(path, 'x_test.pickle'), 'wb') as output: pickle.dump(x_test, output) with open(os.path.join(path, 'y_test.pickle'), 'wb') as output: pickle.dump(y_test, output) else: vocab_processor = learn.preprocessing.VocabularyProcessor.restore( os.path.join(path,'vocab')) with open(os.path.join(path,'x_train.pickle'), 'rb') as output: x_train = pickle.load(output) with open(os.path.join(path, 'y_train.pickle'), 'rb') as output: y_train = pickle.load(output) with open(os.path.join(path, 'x_dev.pickle'), 'rb') as output: x_dev = pickle.load(output) with open(os.path.join(path, 'y_dev.pickle'), 'rb') as output: y_dev = pickle.load(output) with open(os.path.join(path, 'x_test.pickle'), 'rb') as output: x_test = pickle.load(output) with open(os.path.join(path, 'y_test.pickle'), 'rb') as output: y_test = pickle.load(output) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, intra_op_parallelism_threads=3, inter_op_parallelism_threads=3) sess = tf.Session(config=session_conf) with sess.as_default(): rcnn = RCNN( num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, hidden_units=100, context_size=50, max_sequence_length=x_train.shape[1], l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(rcnn.loss) train_op = optimizer.apply_gradients( grads_and_vars, global_step=global_step) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join( os.path.curdir, "runs", dataset, timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", rcnn.loss) recall_10_summary = tf.summary.scalar("recall_10", rcnn.recall_10) recall_5_summary = tf.summary.scalar("recall_5", rcnn.recall_5) precise_10_summary = tf.summary.scalar( "precise_10", rcnn.precise_10) precise_5_summary = tf.summary.scalar("precise_5", rcnn.precise_5) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, recall_10_summary, recall_5_summary, precise_10_summary, precise_5_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge( [loss_summary, recall_10_summary, recall_5_summary, precise_10_summary, precise_5_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter( dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already # exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Initialize all variables sess.run(tf.global_variables_initializer()) """ A single training step """ def train_step(x_batch, y_batch): # global total_recall_5, total_recall_10 sequence_length = [len(np.nonzero(sample)[0]) for sample in x_batch] feed_dict = { rcnn.X: x_batch, rcnn.y: y_batch, rcnn.sequence_length: sequence_length, # rcnn.max_sequence_length: max_sequence_length, rcnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss_1, loss_0, recall_5, recall_10 = sess.run( [train_op, global_step, train_summary_op, rcnn.loss_for_1, rcnn.loss_for_0, rcnn.recall_5, rcnn.recall_10], feed_dict) # summary train_summary_writer.add_summary(summaries, step) """ Evaluates model on a dev set """ def dev_step(x_batch, y_batch, writer=None, test=False): sequence_length = [len(np.nonzero(sample)[0]) for sample in x_batch] feed_dict = { rcnn.X: x_batch, rcnn.y: y_batch, rcnn.sequence_length: sequence_length, # rcnn.max_sequence_length: max_sequence_length, rcnn.dropout_keep_prob: 1.0 } step, summaries, loss_1, loss_0, recall_5, recall_10 = sess.run( [global_step, train_summary_op, rcnn.loss_for_1, rcnn.loss_for_0, rcnn.recall_5, rcnn.recall_10], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss_1 {:.3f}, loss_0 {:.3f}, recall_5 {:.3f}, recall_10 {:.3f}".format( time_str, step, loss_1, loss_0, recall_5, recall_10)) if test: writer.add_summary(summaries, step + 1) else: writer.add_summary(summaries, step) # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size , FLAGS.num_epochs, dynamic=False) # Training loop epoch = 0 print("Training [" + dataset + "]") for batch in batches: if batch is True: epoch += 1 current_step = tf.train.global_step(sess, global_step) print("Epoch:" + str(epoch)) dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") path = saver.save(sess, checkpoint_prefix, global_step=current_step) # print("Saved model checkpoint to {}\n".format(path)) else: x_batch, y_batch = zip(*batch) # print(len(x_batch), len(y_batch)) # print(x_batch, y_batch) train_step(x_batch, y_batch) print("Test:") dev_step(x_test, y_test, writer=dev_summary_writer, test=True)
def __init__(self, model_params): super(End2EndModel, self).__init__() self.dropout = model_params['dropout'] self.batch_size = model_params['batch_size'] self.word_vocab_size = model_params['word_vocab_size'] self.lemma_vocab_size = model_params['lemma_vocab_size'] self.pos_vocab_size = model_params['pos_vocab_size'] self.deprel_vocab_size = model_params['deprel_vocab_size'] self.pretrain_vocab_size = model_params['pretrain_vocab_size'] self.word_emb_size = model_params['word_emb_size'] self.lemma_emb_size = model_params['lemma_emb_size'] self.pos_emb_size = model_params['pos_emb_size'] self.use_deprel = model_params['use_deprel'] self.deprel_emb_size = model_params['deprel_emb_size'] self.pretrain_emb_size = model_params['pretrain_emb_size'] self.pretrain_emb_weight = model_params['pretrain_emb_weight'] self.bilstm_num_layers = model_params['bilstm_num_layers'] self.bilstm_hidden_size = model_params['bilstm_hidden_size'] self.target_vocab_size = model_params['target_vocab_size'] self.use_flag_embedding = model_params['use_flag_embedding'] self.flag_emb_size = model_params['flag_embedding_size'] self.use_gcn = model_params['use_gcn'] self.use_sa_lstm = model_params['use_sa_lstm'] self.use_rcnn = model_params['use_rcnn'] self.use_tree_lstm = model_params['use_tree_lstm'] self.deprel2idx = model_params['deprel2idx'] if self.use_flag_embedding: self.flag_embedding = nn.Embedding(2, self.flag_emb_size) self.flag_embedding.weight.data.uniform_(-1.0, 1.0) self.word_embedding = nn.Embedding(self.word_vocab_size, self.word_emb_size) self.word_embedding.weight.data.uniform_(-1.0, 1.0) self.lemma_embedding = nn.Embedding(self.lemma_vocab_size, self.lemma_emb_size) self.lemma_embedding.weight.data.uniform_(-1.0, 1.0) self.pos_embedding = nn.Embedding(self.pos_vocab_size, self.pos_emb_size) self.pos_embedding.weight.data.uniform_(-1.0, 1.0) if self.use_deprel: self.deprel_embedding = nn.Embedding(self.deprel_vocab_size, self.deprel_emb_size) self.deprel_embedding.weight.data.uniform_(-1.0, 1.0) self.pretrained_embedding = nn.Embedding(self.pretrain_vocab_size, self.pretrain_emb_size) self.pretrained_embedding.weight.data.copy_( torch.from_numpy(self.pretrain_emb_weight)) input_emb_size = 0 if self.use_flag_embedding: input_emb_size += self.flag_emb_size else: input_emb_size += 1 if self.use_deprel: input_emb_size += self.pretrain_emb_size + self.word_emb_size + self.lemma_emb_size + self.pos_emb_size + self.deprel_emb_size # else: input_emb_size += self.pretrain_emb_size + self.word_emb_size + self.lemma_emb_size + self.pos_emb_size self.use_elmo = model_params['use_elmo'] self.elmo_emb_size = model_params['elmo_embedding_size'] if self.use_elmo: input_emb_size += self.elmo_emb_size self.elmo_mlp = nn.Sequential(nn.Linear(1024, self.elmo_emb_size), nn.ReLU()) self.elmo_w = nn.Parameter(torch.Tensor([0.5, 0.5])) self.elmo_gamma = nn.Parameter(torch.ones(1)) if USE_CUDA: self.bilstm_hidden_state0 = (Variable(torch.randn( 2 * self.bilstm_num_layers, self.batch_size, self.bilstm_hidden_size), requires_grad=True).cuda(), Variable(torch.randn( 2 * self.bilstm_num_layers, self.batch_size, self.bilstm_hidden_size), requires_grad=True).cuda()) else: self.bilstm_hidden_state0 = (Variable(torch.randn( 2 * self.bilstm_num_layers, self.batch_size, self.bilstm_hidden_size), requires_grad=True), Variable(torch.randn( 2 * self.bilstm_num_layers, self.batch_size, self.bilstm_hidden_size), requires_grad=True)) self.bilstm_layer = nn.LSTM(input_size=input_emb_size, hidden_size=self.bilstm_hidden_size, num_layers=self.bilstm_num_layers, dropout=self.dropout, bidirectional=True, bias=True, batch_first=True) # self.bilstm_mlp = nn.Sequential(nn.Linear(self.bilstm_hidden_size*2, self.bilstm_hidden_size), nn.ReLU()) self.use_self_attn = model_params['use_self_attn'] if self.use_self_attn: self.self_attn_head = model_params['self_attn_head'] self.attn_linear_first = nn.Linear(self.bilstm_hidden_size * 2, self.bilstm_hidden_size) self.attn_linear_first.bias.data.fill_(0) self.attn_linear_second = nn.Linear(self.bilstm_hidden_size, self.self_attn_head) self.attn_linear_second.bias.data.fill_(0) self.attn_linear_final = nn.Sequential( nn.Linear(self.bilstm_hidden_size * 2 * 2, self.bilstm_hidden_size * 2), nn.Tanh()) # self.biaf_attn = BiAFAttention(self.bilstm_hidden_size*2, self.bilstm_hidden_size*2, self.self_attn_head) # self.attn_linear_final = nn.Sequential(nn.Linear(self.bilstm_hidden_size*4,self.bilstm_hidden_size*2), nn.ReLU()) if self.use_tree_lstm: # self.tree_input_mlp = nn.Linear(self.bilstm_hidden_size*2, self.bilstm_hidden_size) self.tree_lstm = ChildSumTreeLSTM(self.bilstm_hidden_size * 2, self.bilstm_hidden_size, self.deprel_vocab_size) self.tree_mlp = nn.Sequential( nn.Linear(self.bilstm_hidden_size * 3, self.bilstm_hidden_size * 2), nn.ReLU()) if self.use_sa_lstm: self.sa_lstm = SyntaxAwareLSTM(input_emb_size, self.bilstm_hidden_size, self.deprel_vocab_size) self.sa_mlp = nn.Sequential( nn.Linear(self.bilstm_hidden_size * 3, self.bilstm_hidden_size * 2), nn.ReLU()) if self.use_gcn: # self.W_in = nn.Parameter(torch.randn(2*self.bilstm_hidden_size, 2*self.bilstm_hidden_size)) # self.W_out = nn.Parameter(torch.randn(2*self.bilstm_hidden_size, 2*self.bilstm_hidden_size)) # self.W_self = nn.Parameter(torch.randn(2*self.bilstm_hidden_size, 2*self.bilstm_hidden_size)) # self.gcn_bias = nn.Parameter(torch.randn(2*self.bilstm_hidden_size)) self.syntactic_gcn = SyntacticGCN(self.bilstm_hidden_size * 2, self.bilstm_hidden_size, self.deprel_vocab_size, batch_first=True) self.gcn_mlp = nn.Sequential( nn.Linear(self.bilstm_hidden_size * 3, self.bilstm_hidden_size * 2), nn.ReLU()) if self.use_rcnn: self.rcnn = RCNN(self.bilstm_hidden_size * 2, self.bilstm_hidden_size, self.deprel_vocab_size) self.rcnn_mlp = nn.Sequential( nn.Linear(self.bilstm_hidden_size * 3, self.bilstm_hidden_size * 2), nn.ReLU()) self.use_highway = model_params['use_highway'] self.highway_layers = model_params['highway_layers'] if self.use_highway: self.highway_layers = nn.ModuleList([ HighwayMLP(self.bilstm_hidden_size * 2, activation_function=F.relu) for _ in range(self.highway_layers) ]) self.output_layer = nn.Linear(self.bilstm_hidden_size * 2, self.target_vocab_size) else: self.output_layer = nn.Linear(self.bilstm_hidden_size * 2, self.target_vocab_size)