예제 #1
0
def generate_tags(dataset, title, body):
    # Data Preparation
    # ==================================================

    path = os.path.join('model', dataset)
    text = data_helpers.preprocess(title,body)
    x_text = [data_helpers.clean_str(text)]

    # Restore vocab file
    vocab_processor = learn.preprocessing.VocabularyProcessor.restore(os.path.join(
        path, 'vocab'))

    x = np.array(list(vocab_processor.fit_transform(x_text)))
    tags_df = pd.read_csv(os.path.join(path,'tags_df.csv'), encoding='utf8', index_col=0)
    tag_list = tags_df['TagName'].tolist()

    # prediction
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.compat.v1.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=False,
            intra_op_parallelism_threads=3,
            inter_op_parallelism_threads=3)
        sess = tf.compat.v1.Session(config=session_conf)
        with sess.as_default():
            rcnn = RCNN(
                num_classes=len(tag_list),
                vocab_size=len(vocab_processor.vocabulary_),
                embedding_size=100,
                hidden_units=100,
                context_size=50,
                max_sequence_length=x.shape[1])
                # l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.compat.v1.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(rcnn.loss)
            train_op = optimizer.apply_gradients(
                grads_and_vars, global_step=global_step)
            saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables())

            # Loading checkpoint
            save_path = os.path.join(path, "model")
            saver.restore(sess, save_path)

            # predict
            sequence_length = [len(sample) for sample in x]
            feed_dict = {
                rcnn.X: x,
                rcnn.sequence_length: sequence_length,
                # rcnn.max_sequence_length: max_sequence_length,
                rcnn.dropout_keep_prob: 1.0
            }
            prediction = sess.run([rcnn.predictions],feed_dict)[0][0]
            idx = prediction.argsort()[-5:][::-1]
            tags = [tag_list[i] for i in idx]
    return tags
예제 #2
0
 def __init__(self,
              in_features,
              hidden_size,
              batch_first: bool = True,
              bidirectional: bool = False):
     super(RCNNEncoder, self).__init__()
     assert batch_first, "only batch_first=True supported"
     self.rcnn = RCNN(in_features, hidden_size, bidirectional=bidirectional)
예제 #3
0
def train():
    ''' Main function for training and simple evaluation. '''
    # data loading threads
    train_produce_thread = Thread(target=TRAIN_DATASET.load, args=(False, ))
    train_produce_thread.start()
    test_produce_thread = Thread(target=TEST_DATASET.load, args=(False, ))
    test_produce_thread.start()

    with tf.Graph().as_default():
        with tf.device('/gpu:' + str(GPU_INDEX)):
            # Note the global_step=batch parameter to minimize.
            # That tells the optimizer to increment the 'batch' parameter
            # for you every time it trains.
            batch = tf.get_variable('batch', [],
                                    initializer=tf.constant_initializer(0),
                                    trainable=False)
            bn_decay = get_bn_decay(batch)
            tf.summary.scalar('bn_decay', bn_decay)

            # Get model and losses
            rcnn_model = RCNN(BATCH_SIZE,
                              NUM_POINT,
                              TRAIN_DATASET.num_channel,
                              bn_decay=bn_decay,
                              is_training=True)
            placeholders = rcnn_model.placeholders
            end_points = rcnn_model.end_points
            loss, loss_endpoints = rcnn_model.get_loss()

            iou2ds, iou3ds = tf.py_func(train_util.compute_box3d_iou, [
                tf.expand_dims(end_points['box_corners'], 1),
                tf.expand_dims(placeholders['gt_box_of_prop'], 1),
                tf.expand_dims(
                    tf.to_int32(tf.equal(placeholders['class_labels'], 0)) *
                    tf.constant(-1), 1)
            ], [tf.float32, tf.float32])

            # Get training operator
            learning_rate = get_learning_rate(batch)
            tf.summary.scalar('learning_rate', learning_rate)
            if OPTIMIZER == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                       momentum=MOMENTUM)
            elif OPTIMIZER == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)

            # Note: when training, the moving_mean and moving_variance need to be updated.
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = optimizer.minimize(loss, global_step=batch)
                '''
                train_op = slim.learning.create_train_op(
                    loss,
                    optimizer,
                    clip_gradient_norm=1.0,
                    global_step=batch)
                '''

            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()

        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        sess = tf.Session(config=config)

        # Add summary writers
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'),
                                             sess.graph)
        test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'),
                                            sess.graph)

        # Init variables
        if FLAGS.restore_model_path is None:
            init = tf.global_variables_initializer()
            sess.run(init)
        else:
            saver.restore(sess, FLAGS.restore_model_path)

        ops = {
            'loss': loss,
            'train_op': train_op,
            'step': batch,
            'merged': merged,
            'iou2ds': iou2ds,
            'iou3ds': iou3ds,
            'loss_endpoints': loss_endpoints,
            'end_points': end_points
        }

        for epoch in range(MAX_EPOCH):
            log_string('**** EPOCH %03d ****' % (epoch))
            sys.stdout.flush()
            # eval iou and recall is slow
            #eval_iou_recall = (epoch % 10 == 0 and epoch != 0)
            eval_iou_recall = True
            train_one_epoch(sess, ops, placeholders, train_writer)
            save_path = saver.save(
                sess, os.path.join(LOG_DIR, "model.ckpt.%03d" % epoch))
            log_string("Model saved in file: {0}".format(save_path))
            val_loss = eval_one_epoch(sess, ops, placeholders, test_writer)
    TRAIN_DATASET.stop_loading()
    train_produce_thread.join()
    TEST_DATASET.stop_loading()
    test_produce_thread.join()
예제 #4
0
def test():
    ''' Main function for training and simple evaluation. '''
    result_dir = FLAGS.output
    # data loading threads
    test_produce_thread = Thread(target=TEST_DATASET.load, args=(False,))
    test_produce_thread.start()

    is_training = False
    with tf.Graph().as_default():
        with tf.device('/gpu:'+str(GPU_INDEX)):
            rcnn_model = RCNN(BATCH_SIZE, NUM_POINT, TEST_DATASET.num_channel, is_training=is_training)
            pls = rcnn_model.placeholders

            # Get model and losses
            end_points = rcnn_model.end_points
            loss, loss_endpoints = rcnn_model.get_loss()

            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()

        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        sess = tf.Session(config=config)

        saver.restore(sess, FLAGS.model_path)

    objects = {}
    boxes = []
    while(True):
        batch_data, is_last_batch = TEST_DATASET.get_next_batch(BATCH_SIZE)
        feed_dict = {
            pls['pointclouds']: batch_data['pointcloud'],
            pls['img_inputs']: batch_data['images'],
            pls['img_seg_map']: batch_data['img_seg_map'],
            pls['calib']: batch_data['calib'],
            pls['proposal_boxes']: batch_data['prop_box'],
            pls['class_labels']: batch_data['label'],
            pls['center_bin_x_labels']: batch_data['center_x_cls'],
            pls['center_bin_z_labels']: batch_data['center_z_cls'],
            pls['center_x_res_labels']: batch_data['center_x_res'],
            pls['center_y_res_labels']: batch_data['center_y_res'],
            pls['center_z_res_labels']: batch_data['center_z_res'],
            pls['heading_bin_labels']: batch_data['angle_cls'],
            pls['heading_res_labels']: batch_data['angle_res'],
            pls['size_class_labels']: batch_data['size_cls'],
            pls['size_res_labels']: batch_data['size_res'],
            pls['gt_box_of_prop']: batch_data['gt_box_of_prop'],
            pls['is_training_pl']: is_training
        }

        cls_logits, box_center, box_angle, box_size, box_score, corners = \
                sess.run([end_points['cls_logits'],
                end_points['box_center'], end_points['box_angle'], end_points['box_size'],
                end_points['box_score'], end_points['box_corners']], feed_dict=feed_dict)
        cls_val = np.argmax(cls_logits, axis=-1)
        correct = np.sum(cls_val == batch_data['label'])
        for i in range(BATCH_SIZE):
            if type_list[cls_val[i]] == 'NonObject':
                #print('NonObject')
                continue
            idx = int(batch_data['ids'][i])
            size = box_size[i]
            angle = box_angle[i]
            center = box_center[i]
            box_corner = corners[i]
            score = box_score[i]
            obj = DetectObject(size[1],size[2],size[0],center[0],center[1],center[2],angle,idx,type_list[cls_val[i]],score)
            # dont't use batch_data['calib'] which is for resized image
            calib = get_calibration(idx).P
            box3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib)
            if box3d_pts_2d is None:
                print('box3d_pts_2d is None')
                continue
            x1 = np.amin(box3d_pts_2d, axis=0)[0]
            y1 = np.amin(box3d_pts_2d, axis=0)[1]
            x2 = np.amax(box3d_pts_2d, axis=0)[0]
            y2 = np.amax(box3d_pts_2d, axis=0)[1]
            obj.box_2d = [x1,y1,x2,y2]
            obj.box_3d = box3d_pts_3d
            if idx not in objects:
                objects[idx] = []
            objects[idx].append(obj)

            boxes.append(corners[i])
        if is_last_batch:
            break

    TEST_DATASET.stop_loading()
    test_produce_thread.join()

    with open('rcnn_out.pkl','wb') as fp:
        pickle.dump(objects, fp)
    objects = nms_on_bev(objects, 0.01)
    # Write detection results for KITTI evaluation
    write_detection_results(result_dir, objects)
    output_dir = os.path.join(result_dir, 'data')
    print('write detection results to ' + output_dir)
    # Make sure for each frame (no matter if we have measurment for that frame),
    # there is a TXT file
    to_fill_filename_list = ['%06d.txt'%(int(frame_id)) \
            for frame_id in TEST_DATASET.frame_ids]
    fill_files(output_dir, to_fill_filename_list)
예제 #5
0
파일: main.py 프로젝트: OYMiss/faster-rcnn
    'num_feature_channel': 256,
    'num_fc7_channel': 512,
    'num_rpn_channel': 512,
    'num_anchor': 9,
    'score_top_n': 100,
    'nms_top_n': 50,
    'nms_thresh': 0.7,
    'pool_out_size': 8,
    'num_class': 5,  # 这里的标签数量包含背景(0),前景类别从 1 开始。
    'radios': (0.5, 1, 2),
    'scales': (4, 8, 16),
    # 'scales': (8, 16, 32),
}

n = 2
model = RCNN(feature_extractor, conv_to_head, config)
image = torch.rand((n, 3, 128, 128))
gt_bbox = torch.randint(0, 8 * 16, (n, 8, 4)).sort(dim=2)[0].float()
gt_label = torch.randint(1, config['num_class'], (n, 8))

model.rpn_mode()
rpn_score, rpn_bbox = model.forward(image)
print(rpn_score.shape, rpn_bbox.shape)
rpn_loss = rpn_loss_layer.compute(model, rpn_score, rpn_bbox, gt_label,
                                  gt_bbox)
rpn_loss.backward()

model.total_mode()
score, bbox = model.forward(image)
print(score.shape, bbox.shape)
rcnn_loss = rcnn_loss_layer.compute(model, score, bbox, gt_label, gt_bbox)
예제 #6
0
print('Loading data...')
(x_train, x_test), (y_train, y_test) = data_preprocess(origin_filename)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Prepare input for model...')
x_train_left = x_train
x_train_right = x_train.reverse()
x_test_left = x_test
x_test_right = x_test.reverse()
print('x_train_left shape:', np.array(x_train_left).shape)
print('x_train_right shape:', np.array(x_train_right).shape)
print('x_test_left shape:', np.array(x_test_left).shape)
print('x_test_right shape:', np.array(x_test_right).shape)

print('Build model...')
model = RCNN(max_len, max_features, embedding_dims).get_model()
model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])

print('Train...')
early_stopping = EarlyStopping(monitor='val_acc', patience=3, mode='max')
model.fit([x_train_left, x_train_right],
          y_train,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=[early_stopping],
          validation_data=([x_test_left, x_test_right], y_test))

print('Test...')
result = model.predict([x_test_left, x_test_right])
예제 #7
0
def test():
    cuda = True

    test_dataset = custom_dataset(split='test')
    test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

    #resnet = Resnet101().eval()
    resnet = resnet101()
    rpn = RPN()
    rcnn = RCNN()
    if cuda:
        resnet = resnet.cuda()
        rpn = rpn.cuda()
        rcnn = rcnn.cuda()

    rpn_check_point = torch.load(
        '/home/licheng/home/licheng/projects/cnet/data/cnet.model.state.19.pkl'
    )
    rpn.load_state_dict(rpn_check_point['rpn'])
    resnet.load_state_dict(rpn_check_point['resnet'])

    rcnn_check_point = torch.load(
        "/home/licheng/home/licheng/projects/cnet/data/rcnn/rcnn_epoch_19.params"
    )
    rcnn.load_state_dict(rcnn_check_point['rcnn'])
    """
    rpn_check_point = torch.load('/home/licheng/home/licheng/projects/cnet/data/rpn/rpn_epoch_19.params')
    #resnet.load_state_dict(check_point['resnet'])
    rpn.load_state_dict(rpn_check_point['rpn'])
    #resnet.load_state_dict(check_point['resnet'])
    rcnn_check_point = torch.load('/home/licheng/home/licheng/projects/cnet/data/rcnn/rcnn_epoch_16.params')
    rcnn.load_state_dict(rcnn_check_point['rcnn'])
    """
    pred_bboxes = list()
    pred_labels = list()
    pred_scores = list()

    gt_boxes = list()
    gt_labels = list()
    rcnn_target_creator = RCNNTargetCreator()
    with torch.no_grad():
        for img_batch, bndboxes_batch, labels_batch in test_data_loader:
            img, bndboxes, labels = img_batch, bndboxes_batch[0], labels_batch[
                0]
            if cuda:
                img, bndboxes, labels = img.cuda(), bndboxes.cuda(
                ), labels.cuda()
            feature = resnet(img.float())
            #if cuda:
            #    feature = feature.cuda()
            rois, anchors, rpn_loc, rpn_score = rpn(feature, feature_stride=16)
            sample_roi, gt_roi_label, gt_roi_loc = rcnn_target_creator(
                rois,
                bndboxes.cpu().numpy(), labels)

            rois = at.toTensor(rois)
            roi_cls_loc, roi_score = rcnn(rois, feature)

            look_score1 = np.array(roi_score.cpu().detach())
            pred_score = F.softmax(roi_score, dim=1)

            look_score1 = np.array(pred_score.cpu().detach())
            pred_score = pred_score.cpu().detach().numpy()

            mean = torch.Tensor(
                (0., 0., 0., 0.)).repeat(cfg.n_class)[None].cuda()
            std = torch.Tensor(
                (0.1, 0.1, 0.2, 0.2)).repeat(cfg.n_class)[None].cuda()
            roi_cls_loc = (roi_cls_loc * std + mean)

            roi_cls_loc = at.toTensor(roi_cls_loc)
            roi_cls_loc = roi_cls_loc.view(-1, cfg.n_class, 4)
            rois = rois.view(-1, 1, 4).expand_as(roi_cls_loc)

            # expand dim as loc
            #rois = rois.reshape(-1, 1, 4)[:, [int(x) for x in np.zeros(cfg.n_class).tolist()], :]

            #roi_cls_loc = at.toTensor(roi_cls_loc)
            #roi_cls_loc = roi_cls_loc.view(roi_cls_loc.shape[0], -1, 4)

            #pred_box = loc2bbox(at.toNumpy(rois).reshape(-1, 4), roi_cls_loc.view(-1, 4).cpu().detach().numpy())
            pred_box = loc2bbox(
                at.toNumpy(rois).reshape(-1, 4),
                roi_cls_loc.view(-1, 4).cpu().detach().numpy())

            # clip box
            pred_box[:, 0::2] = np.clip(pred_box[:, 0::2], 0, img.shape[3])
            pred_box[:, 1::2] = np.clip(pred_box[:, 1::2], 0, img.shape[2])

            gt_box = list(bndboxes_batch.cpu().numpy())
            gt_label = list(labels_batch.cpu().numpy())

            bbox = list()
            label = list()
            score = list()

            for class_index in range(1, cfg.n_class):
                each_bbox = pred_box.reshape(
                    (-1, cfg.n_class, 4))[:, class_index, :]
                each_score = pred_score[:, class_index]
                mask = each_score > cfg.pred_score_thresh
                each_bbox = each_bbox[mask]
                each_score = each_score[mask]
                keep = nms(each_bbox, each_score, cfg.pred_nms_thresh)
                bbox.append(each_bbox[keep])
                score.append(each_score[keep])
                label.append(class_index * np.ones((len(keep), )))
            bbox = np.concatenate(bbox, axis=0).astype(np.float32)
            score = np.concatenate(score, axis=0).astype(np.float32)
            label = np.concatenate(label, axis=0).astype(np.int32)
            print('gt_info:', gt_box, gt_label)
            print('sample roi', sample_roi[0])
            print('predict info:', bbox, score, label)

            pred_bboxes += [bbox]
            pred_scores += [score]
            pred_labels += [label]
            gt_boxes += gt_box
            gt_labels += gt_label

        result = calc_map(pred_bboxes, pred_labels, pred_scores, gt_boxes,
                          gt_labels)
        print(result)
예제 #8
0
        'feature_stride': 16,
        'feature_compress': 1 / 16,
        'num_feature_channel': 256,
        'num_fc7_channel': 512,
        'num_rpn_channel': 512,
        'num_anchor': 9,
        'score_top_n': 100,
        'nms_top_n': 50,
        'nms_thresh': 0.7,
        'pool_out_size': 8,
        'num_class': 5,
        'radios': (0.5, 1, 2),
        'scales': (8, 16, 32),
    }

    model_ = RCNN(feature_extractor, conv_to_head, config)

    n_ = 10
    image = torch.rand((n_, 3, 128, 128))

    model_.total_mode()
    # score_(k, num_cls) delta_(k, num_cls, 4)
    score_, delta_ = model_.forward(image)
    print(score_.shape, delta_.shape)

    gt_bbox_ = torch.randint(0, 8 * 16, (n_, 8, 4)).sort(dim=2)[0].float()
    gt_label_ = torch.randint(1, config['num_class'], (n_, 8))
    label_list_, fg_delta_list_, index_list_ = annotate_proposals(model_.rois_list, gt_bbox_, gt_label_)
    print(label_list_[0].shape, fg_delta_list_[0].shape, index_list_[0][0].shape, index_list_[0][1].shape)
    t = time.time()
    loss_ = rcnn_loss_layer(score_, delta_, label_list_, fg_delta_list_, index_list_)
예제 #9
0
파일: train.py 프로젝트: xv44586/Papers
print('generate context format...')
x_train_current = x_train
x_train_left = np.hstack(
    [np.expand_dims(x_train[:, 0], axis=1), x_train[:, 0:-1]])

x_train_right = np.hstack(
    [x_train[:, 1:], np.expand_dims(x_train[:, -1], axis=1)])

x_test_current = x_test
x_test_left = np.hstack(
    [np.expand_dims(x_test[:, 0], axis=1), x_train[:, 0:-1]])
x_test_right = np.hstack(
    [x_test[:, 1:], np.expand_dims(x_test[:, -1], axis=1)])
print('x train shape: ', x_train_current.shape)
print('x train left shape: ', x_train_left.shape)
print('x train right shape: ', x_train_right.shape)

print('build model...')
model = RCNN(maxlen, max_features, emb_dim).get_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

print('training ...')
earlystop = EarlyStopping(monitor='val_acc', patience=3, mode='max')
model.fit([x_train_current, x_train_left, x_train_right],
          y_train,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=[earlystop],
          validation_data=([x_test_current, x_test_left,
                            x_test_right], y_test))
예제 #10
0
def main(dataset):
    # Data Preparation
    # ==================================================

    path = os.path.join('transformed_data', dataset)
    if not os.path.exists(path):
        os.makedirs(path)
        # Load data
        x_text, y = data_helpers.load_data(os.path.join('community-data',dataset,'50'))
        y = np.array(y).astype(float)

        # Build vocabulary
        max_document_length = np.percentile(
            [len(x.split(" ")) for x in x_text], 50, interpolation='lower')
        vocab_processor = learn.preprocessing.VocabularyProcessor(
            max_document_length)

        x = np.array(list(vocab_processor.fit_transform(x_text)))

        # Write vocabulary
        vocab_processor.save(os.path.join(path,"vocab"))

        # Split train/test set
        x_train_dev, x_test, y_train_dev, y_test = train_test_split(
            x, y, test_size=0.1, random_state=42)
        x_train, x_dev, y_train, y_dev = train_test_split(
            x_train_dev, y_train_dev, test_size=0.1, random_state=42)
        # print(x_train.shape, y_train.shape)

        with open(os.path.join(path,'x_train.pickle'), 'wb') as output:
            pickle.dump(x_train, output)
        with open(os.path.join(path,'y_train.pickle'), 'wb') as output:
            pickle.dump(y_train, output)
        with open(os.path.join(path, 'x_dev.pickle'), 'wb') as output:
            pickle.dump(x_dev, output)
        with open(os.path.join(path, 'y_dev.pickle'), 'wb') as output:
            pickle.dump(y_dev, output)
        with open(os.path.join(path, 'x_test.pickle'), 'wb') as output:
            pickle.dump(x_test, output)
        with open(os.path.join(path, 'y_test.pickle'), 'wb') as output:
            pickle.dump(y_test, output)
    else:
        vocab_processor = learn.preprocessing.VocabularyProcessor.restore(
            os.path.join(path,'vocab'))
        with open(os.path.join(path,'x_train.pickle'), 'rb') as output:
            x_train = pickle.load(output)
        with open(os.path.join(path, 'y_train.pickle'), 'rb') as output:
            y_train = pickle.load(output)
        with open(os.path.join(path, 'x_dev.pickle'), 'rb') as output:
            x_dev = pickle.load(output)
        with open(os.path.join(path, 'y_dev.pickle'), 'rb') as output:
            y_dev = pickle.load(output)
        with open(os.path.join(path, 'x_test.pickle'), 'rb') as output:
            x_test = pickle.load(output)
        with open(os.path.join(path, 'y_test.pickle'), 'rb') as output:
            y_test = pickle.load(output)

    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement,
            intra_op_parallelism_threads=3,
            inter_op_parallelism_threads=3)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            rcnn = RCNN(
                num_classes=y_train.shape[1],
                vocab_size=len(vocab_processor.vocabulary_),
                embedding_size=FLAGS.embedding_dim,
                hidden_units=100,
                context_size=50,
                max_sequence_length=x_train.shape[1],
                l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(rcnn.loss)
            train_op = optimizer.apply_gradients(
                grads_and_vars, global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(os.path.join(
                os.path.curdir, "runs", dataset, timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", rcnn.loss)
            recall_10_summary = tf.summary.scalar("recall_10", rcnn.recall_10)
            recall_5_summary = tf.summary.scalar("recall_5", rcnn.recall_5)
            precise_10_summary = tf.summary.scalar(
                "precise_10", rcnn.precise_10)
            precise_5_summary = tf.summary.scalar("precise_5", rcnn.precise_5)
            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, recall_10_summary, recall_5_summary, precise_10_summary, precise_5_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge(
                [loss_summary, recall_10_summary, recall_5_summary, precise_10_summary, precise_5_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(
                dev_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already
            # exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            """
                A single training step
            """
            def train_step(x_batch, y_batch):    
                # global total_recall_5, total_recall_10
                sequence_length = [len(np.nonzero(sample)[0])
                                   for sample in x_batch]
                feed_dict = {
                    rcnn.X: x_batch,
                    rcnn.y: y_batch,
                    rcnn.sequence_length: sequence_length,
                    # rcnn.max_sequence_length: max_sequence_length,
                    rcnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss_1, loss_0, recall_5, recall_10 = sess.run(
                    [train_op, global_step, train_summary_op,
                        rcnn.loss_for_1, rcnn.loss_for_0, rcnn.recall_5, rcnn.recall_10],
                    feed_dict)
                
                # summary
                train_summary_writer.add_summary(summaries, step)
            

            """
                Evaluates model on a dev set
            """
            def dev_step(x_batch, y_batch, writer=None, test=False):
                
                sequence_length = [len(np.nonzero(sample)[0])
                                   for sample in x_batch]
                feed_dict = {
                    rcnn.X: x_batch,
                    rcnn.y: y_batch,
                    rcnn.sequence_length: sequence_length,
                    # rcnn.max_sequence_length: max_sequence_length,
                    rcnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss_1, loss_0, recall_5, recall_10 = sess.run(
                    [global_step, train_summary_op,
                     rcnn.loss_for_1, rcnn.loss_for_0, rcnn.recall_5, rcnn.recall_10],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss_1 {:.3f}, loss_0 {:.3f}, recall_5 {:.3f}, recall_10 {:.3f}".format(
                    time_str, step, loss_1, loss_0, recall_5, recall_10))
                if test:
                    writer.add_summary(summaries, step + 1)
                else:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = data_helpers.batch_iter(
                list(zip(x_train, y_train)), FLAGS.batch_size , FLAGS.num_epochs, dynamic=False)
            
            # Training loop
            epoch = 0
            print("Training [" + dataset + "]")
            for batch in batches:
                if batch is True:
                    epoch += 1
                    current_step = tf.train.global_step(sess, global_step)
                    print("Epoch:" + str(epoch))
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    print("")
                    path = saver.save(sess, checkpoint_prefix,
                                      global_step=current_step)
                    # print("Saved model checkpoint to {}\n".format(path))
                else:
                    x_batch, y_batch = zip(*batch)
                    # print(len(x_batch), len(y_batch))
                    # print(x_batch, y_batch)
                    train_step(x_batch, y_batch)

            print("Test:")
            dev_step(x_test, y_test, writer=dev_summary_writer, test=True)
예제 #11
0
    def __init__(self, model_params):
        super(End2EndModel, self).__init__()
        self.dropout = model_params['dropout']
        self.batch_size = model_params['batch_size']

        self.word_vocab_size = model_params['word_vocab_size']
        self.lemma_vocab_size = model_params['lemma_vocab_size']
        self.pos_vocab_size = model_params['pos_vocab_size']
        self.deprel_vocab_size = model_params['deprel_vocab_size']
        self.pretrain_vocab_size = model_params['pretrain_vocab_size']

        self.word_emb_size = model_params['word_emb_size']
        self.lemma_emb_size = model_params['lemma_emb_size']
        self.pos_emb_size = model_params['pos_emb_size']

        self.use_deprel = model_params['use_deprel']
        self.deprel_emb_size = model_params['deprel_emb_size']

        self.pretrain_emb_size = model_params['pretrain_emb_size']
        self.pretrain_emb_weight = model_params['pretrain_emb_weight']

        self.bilstm_num_layers = model_params['bilstm_num_layers']
        self.bilstm_hidden_size = model_params['bilstm_hidden_size']

        self.target_vocab_size = model_params['target_vocab_size']

        self.use_flag_embedding = model_params['use_flag_embedding']
        self.flag_emb_size = model_params['flag_embedding_size']

        self.use_gcn = model_params['use_gcn']
        self.use_sa_lstm = model_params['use_sa_lstm']
        self.use_rcnn = model_params['use_rcnn']
        self.use_tree_lstm = model_params['use_tree_lstm']

        self.deprel2idx = model_params['deprel2idx']

        if self.use_flag_embedding:
            self.flag_embedding = nn.Embedding(2, self.flag_emb_size)
            self.flag_embedding.weight.data.uniform_(-1.0, 1.0)

        self.word_embedding = nn.Embedding(self.word_vocab_size,
                                           self.word_emb_size)
        self.word_embedding.weight.data.uniform_(-1.0, 1.0)

        self.lemma_embedding = nn.Embedding(self.lemma_vocab_size,
                                            self.lemma_emb_size)
        self.lemma_embedding.weight.data.uniform_(-1.0, 1.0)

        self.pos_embedding = nn.Embedding(self.pos_vocab_size,
                                          self.pos_emb_size)
        self.pos_embedding.weight.data.uniform_(-1.0, 1.0)

        if self.use_deprel:
            self.deprel_embedding = nn.Embedding(self.deprel_vocab_size,
                                                 self.deprel_emb_size)
            self.deprel_embedding.weight.data.uniform_(-1.0, 1.0)

        self.pretrained_embedding = nn.Embedding(self.pretrain_vocab_size,
                                                 self.pretrain_emb_size)
        self.pretrained_embedding.weight.data.copy_(
            torch.from_numpy(self.pretrain_emb_weight))

        input_emb_size = 0
        if self.use_flag_embedding:
            input_emb_size += self.flag_emb_size
        else:
            input_emb_size += 1

        if self.use_deprel:
            input_emb_size += self.pretrain_emb_size + self.word_emb_size + self.lemma_emb_size + self.pos_emb_size + self.deprel_emb_size  #
        else:
            input_emb_size += self.pretrain_emb_size + self.word_emb_size + self.lemma_emb_size + self.pos_emb_size

        self.use_elmo = model_params['use_elmo']
        self.elmo_emb_size = model_params['elmo_embedding_size']
        if self.use_elmo:
            input_emb_size += self.elmo_emb_size
            self.elmo_mlp = nn.Sequential(nn.Linear(1024, self.elmo_emb_size),
                                          nn.ReLU())
            self.elmo_w = nn.Parameter(torch.Tensor([0.5, 0.5]))
            self.elmo_gamma = nn.Parameter(torch.ones(1))

        if USE_CUDA:
            self.bilstm_hidden_state0 = (Variable(torch.randn(
                2 * self.bilstm_num_layers, self.batch_size,
                self.bilstm_hidden_size),
                                                  requires_grad=True).cuda(),
                                         Variable(torch.randn(
                                             2 * self.bilstm_num_layers,
                                             self.batch_size,
                                             self.bilstm_hidden_size),
                                                  requires_grad=True).cuda())
        else:
            self.bilstm_hidden_state0 = (Variable(torch.randn(
                2 * self.bilstm_num_layers, self.batch_size,
                self.bilstm_hidden_size),
                                                  requires_grad=True),
                                         Variable(torch.randn(
                                             2 * self.bilstm_num_layers,
                                             self.batch_size,
                                             self.bilstm_hidden_size),
                                                  requires_grad=True))

        self.bilstm_layer = nn.LSTM(input_size=input_emb_size,
                                    hidden_size=self.bilstm_hidden_size,
                                    num_layers=self.bilstm_num_layers,
                                    dropout=self.dropout,
                                    bidirectional=True,
                                    bias=True,
                                    batch_first=True)

        # self.bilstm_mlp = nn.Sequential(nn.Linear(self.bilstm_hidden_size*2, self.bilstm_hidden_size), nn.ReLU())
        self.use_self_attn = model_params['use_self_attn']
        if self.use_self_attn:
            self.self_attn_head = model_params['self_attn_head']
            self.attn_linear_first = nn.Linear(self.bilstm_hidden_size * 2,
                                               self.bilstm_hidden_size)
            self.attn_linear_first.bias.data.fill_(0)

            self.attn_linear_second = nn.Linear(self.bilstm_hidden_size,
                                                self.self_attn_head)
            self.attn_linear_second.bias.data.fill_(0)

            self.attn_linear_final = nn.Sequential(
                nn.Linear(self.bilstm_hidden_size * 2 * 2,
                          self.bilstm_hidden_size * 2), nn.Tanh())

            # self.biaf_attn = BiAFAttention(self.bilstm_hidden_size*2, self.bilstm_hidden_size*2, self.self_attn_head)

            # self.attn_linear_final = nn.Sequential(nn.Linear(self.bilstm_hidden_size*4,self.bilstm_hidden_size*2), nn.ReLU())

        if self.use_tree_lstm:
            # self.tree_input_mlp = nn.Linear(self.bilstm_hidden_size*2, self.bilstm_hidden_size)
            self.tree_lstm = ChildSumTreeLSTM(self.bilstm_hidden_size * 2,
                                              self.bilstm_hidden_size,
                                              self.deprel_vocab_size)
            self.tree_mlp = nn.Sequential(
                nn.Linear(self.bilstm_hidden_size * 3,
                          self.bilstm_hidden_size * 2), nn.ReLU())

        if self.use_sa_lstm:
            self.sa_lstm = SyntaxAwareLSTM(input_emb_size,
                                           self.bilstm_hidden_size,
                                           self.deprel_vocab_size)
            self.sa_mlp = nn.Sequential(
                nn.Linear(self.bilstm_hidden_size * 3,
                          self.bilstm_hidden_size * 2), nn.ReLU())

        if self.use_gcn:
            # self.W_in = nn.Parameter(torch.randn(2*self.bilstm_hidden_size, 2*self.bilstm_hidden_size))
            # self.W_out = nn.Parameter(torch.randn(2*self.bilstm_hidden_size, 2*self.bilstm_hidden_size))
            # self.W_self = nn.Parameter(torch.randn(2*self.bilstm_hidden_size, 2*self.bilstm_hidden_size))
            # self.gcn_bias = nn.Parameter(torch.randn(2*self.bilstm_hidden_size))
            self.syntactic_gcn = SyntacticGCN(self.bilstm_hidden_size * 2,
                                              self.bilstm_hidden_size,
                                              self.deprel_vocab_size,
                                              batch_first=True)

            self.gcn_mlp = nn.Sequential(
                nn.Linear(self.bilstm_hidden_size * 3,
                          self.bilstm_hidden_size * 2), nn.ReLU())

        if self.use_rcnn:
            self.rcnn = RCNN(self.bilstm_hidden_size * 2,
                             self.bilstm_hidden_size, self.deprel_vocab_size)
            self.rcnn_mlp = nn.Sequential(
                nn.Linear(self.bilstm_hidden_size * 3,
                          self.bilstm_hidden_size * 2), nn.ReLU())

        self.use_highway = model_params['use_highway']
        self.highway_layers = model_params['highway_layers']
        if self.use_highway:
            self.highway_layers = nn.ModuleList([
                HighwayMLP(self.bilstm_hidden_size * 2,
                           activation_function=F.relu)
                for _ in range(self.highway_layers)
            ])

            self.output_layer = nn.Linear(self.bilstm_hidden_size * 2,
                                          self.target_vocab_size)
        else:
            self.output_layer = nn.Linear(self.bilstm_hidden_size * 2,
                                          self.target_vocab_size)