コード例 #1
0
ファイル: models.py プロジェクト: linto-project/linto-sp2
def c3d(nb_classes,
        img_w=m_c3d.kernel_w,
        img_h=m_c3d.kernel_h,
        include_top=True):
    # generator_train_batch = m_c3d.generator_train_batch
    # generator_val_batch  =  m_c3d.generator_val_batch
    # generator_test_batch =  m_c3d.generator_test_batch
    generator_train_batch = g_c3d.generator_train_batch
    generator_val_batch = g_c3d.generator_val_batch
    generator_test_batch = g_c3d.generator_test_batch

    # generator_train_batch = m_c3d.generator_train_batch_opt_flow
    # generator_val_batch  =  m_c3d.generator_val_batch_opt_flow
    # generator_test_batch =  m_c3d.generator_test_batch_opt_flow

    m_c3d.doFlip = doFlip
    m_c3d.doScale = doScale
    g_c3d.doFlip = doFlip
    g_c3d.doScale = doScale
    g_c3d.doNoisy = True
    g_c3d.doTrans = True

    return m_c3d.c3d_model(
        nb_classes=nb_classes,
        img_w=img_w,
        img_h=img_h,
        num_channels=3,
        include_top=include_top
    ), generator_train_batch, generator_val_batch, generator_test_batch
コード例 #2
0
def main():
    train_file = 'train_list.txt'
    test_file = 'test_list.txt'
    f1 = open(train_file, 'r')
    f2 = open(test_file, 'r')
    lines = f1.readlines()
    f1.close()
    train_samples = len(lines)
    lines = f2.readlines()
    f2.close()
    val_samples = len(lines)

    num_classes = 101
    epochs = 25
    input_shape = (112, 112, 8, 3)
    if args.model == 'c3d':
        model = c3d.c3d_model(num_classes, input_shape)
    elif args.model == 'resnet_3d':
        model = resnet_3d.resnet_3d(num_classes,
                                    input_shape,
                                    drop_rate=args.drop_rate)
    elif args.model == 'densenet_3d':
        model = densenet_3d.densenet_3d(num_classes,
                                        input_shape,
                                        dropout_rate=args.drop_rate)
    elif args.model == 'inception_3d':
        model = inception_3d.inception_3d(num_classes, input_shape)
    elif args.model == 'dense_resnet_3d':
        model = DenseResNet_3d.dense_resnet_3d(num_classes,
                                               input_shape,
                                               dropout_rate=args.drop_rate)
    else:
        raise ValueError('Do not support {}!'.format(args.model))
    sgd = SGD(lr=args.lr, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])
    model.summary()

    train = generator_train_batch(train_file, args.batch_size, num_classes,
                                  args.img_path)
    val = generator_val_batch(test_file, args.batch_size, num_classes,
                              args.img_path)

    print('Start training..')
    history = model.fit_generator(
        train,
        steps_per_epoch=train_samples // args.batch_size,
        epochs=epochs,
        callbacks=[onetenth_10_15_20(args.lr)],
        validation_data=val,
        validation_steps=val_samples // args.batch_size,
        verbose=1)
    if not os.path.exists('results/'):
        os.mkdir('results/')
    plot_history(history, 'results/', args.model)
    save_history(history, 'results/', args.model)
    model.save_weights('results/weights_{}.h5'.format(args.model))
コード例 #3
0
def video_audio(input_audio=(257, 129, 1), num_class=2, mode='train', args=None):
    from models.c3d import c3d_model
    nb_classes = num_class
    img_w = 112
    img_h = 112
    mx = c3d_model(nb_classes=nb_classes, img_w=img_w, img_h=img_h, num_channels=3, include_top=False)
    my = vggvox_resnet2d(args=args, input_dim=input_audio, mode=mode, include_top=False)
    z = keras.layers.concatenate([mx.output, my.output])
    z = Dense(2048, activation='relu', kernel_regularizer=l2(weight_decay))(z)
    z = Dropout(0.1)(z)
    z = Dense(2048, activation='relu', kernel_regularizer=l2(weight_decay))(z)
    # z = Dropout(0.5)(z)
    z = Dense(nb_classes, kernel_regularizer=l2(weight_decay))(z)
    z = Activation('softmax')(z)
    model = Model(inputs=[mx.input, my.input], outputs=z)

    return model
コード例 #4
0
def load_data(class_names, video, model_path):
    with open(class_names, 'r') as f:
        class_names = f.readlines()
        f.close()
    # init model
    model = c3d_model(num_classes, kernel_w, kernel_h)
    # model = m.r3d_34(num_classes, kernel_w, kernel_h)
    lr = 0.005
    sgd = SGD(lr=lr, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])
    model.summary()
    # model.load_weights('./results/weights_c3d.h5', by_name=True)
    model.load_weights(model_path + '/weights_c3d.h5', by_name=True)
    # read GroudTruth
    gt_speakers, gt_faces = load_ground_truth(video)
    # read video
    cap = cv2.VideoCapture(video)
    return cap, class_names, model, gt_speakers, gt_faces
コード例 #5
0
def ict3DHP_test(video, class_names):
    with open(class_names, 'r') as f:
        class_names = f.readlines()
        f.close()

    # init model
    model = c3d_model(num_classes, kernel_w, kernel_h)
    # model = m.r3d_34(num_classes, kernel_w, kernel_h)
    lr = 0.005
    sgd = SGD(lr=lr, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])
    model.summary()
    model.load_weights('./results/weights_c3d.h5', by_name=True)

    # read video
    # video = './videos/v_Biking_g05_c02.avi'
    # video = '/data/jfmadrig/mvlrs_v1/pretrain/5672968469174139300/00004.mp4'
    cap = cv2.VideoCapture(video)
    gt_index = 1
    h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)

    with open('result_video_demo.txt', 'w') as fp:
        fp.write('Resolution\tFP\tMean FP\tTP\tMean TP\n')

        for i in range(1, 10):
            tmp_w = int(w / i)
            tmp_h = int(h / i)
            data = process_video_ict3DHP(cap, class_names, model, gt_index,
                                         (tmp_w, tmp_h))
            cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

            fp.write('{}\t{}\t{}\t{}\t{}\n'.format((tmp_w, tmp_h), data[1],
                                                   data[2], data[3], data[4]))

        fp.close()
    cap.release()
    cv2.destroyAllWindows()