예제 #1
0
def main(args):
    # load the kinetics classes
    kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')]


    if args.eval_type in ['rgb', 'joint']:
        if args.no_imagenet_pretrained:
            # build model for RGB data
            # and load pretrained weights (trained on kinetics dataset only)
            rgb_model = Inception_Inflated3d(
                include_top=False,
                weights='rgb_kinetics_only',
                input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS),
                classes=NUM_CLASSES)
        else:
            # build model for RGB data
            # and load pretrained weights (trained on imagenet and kinetics dataset)
            rgb_model = Inception_Inflated3d(
                include_top=False,
                weights='rgb_imagenet_and_kinetics',
                input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS),
                classes=NUM_CLASSES)
            # pdb.set_trace()
            # print rgb_model.summary()
            # plot_model(rgb_model, to_file='model_without_top.png', show_shapes = True)
            # print rgb_model.summary()


        # load RGB sample (just one example)
        vid_input = Input(shape =(79, 224,224, 3))
        features = rgb_model(vid_input)
        print features.shape.as_list()
        feature_shape = features.shape.as_list()
        mid_slice_no = (feature_shape[1]/2)+1
        print mid_slice_no
        rgb_features = features[:,mid_slice_no,:,:,:]

        print rgb_features.shape.as_list()
        '''
        rgb_sample = np.load(SAMPLE_DATA_PATH['rgb'])
        # #
        # # # make prediction
        rgb_features = rgb_model.predict(rgb_sample)
        # rgb_features
        # print rgb_features.shape.as_list()
        #
        # print rgb_logits.shape
        features = rgb_features[:,11,:,:,:]
        features = np.array(features)
        print features.shape
        '''
    '''
    def define_model(self, model_type="RGB"):
        if model_type == "RGB":
            channel = 3
            model_name = "_rgb"
        elif model_type == "OPT":
            channel = 2
            model_name = "_opt"
        a = keras.layers.Input(shape=(16, 224, 224, channel))
        i3d = Inception_Inflated3d(include_top=False,
                                   weights=None,
                                   input_tensor=a,
                                   input_shape=None,
                                   dropout_prob=0.5,
                                   endpoint_logit=True,
                                   classes=3,
                                   model_name=model_name)
        for layer in i3d.layers:
            layer.name = layer.name + model_name
        model = keras.models.Sequential()
        model.add(i3d)
        model.add(keras.layers.Flatten())
        model.add(keras.layers.Dropout(0.5))
        model.add(keras.layers.Dense(3))

        return model
def loadModel(numberOfClasses,inputFrames, frameHeight,frameWidth,numRGBChannels,withWeights = False):
    weights = None
    if withWeights : weights = 'rgb_inception_i3d'
    rgb_model = Inception_Inflated3d(
                include_top=False,
                weights=weights,
                input_shape=(inputFrames, frameHeight, frameWidth, numRGBChannels),
                dropout_prob=0.5,
                endpoint_logit=True,
                classes=numberOfClasses)

    x = rgb_model.output
    x = Dropout(0.5)(x)

    x = conv3d_bn(x,numberOfClasses, 1, 1, 1, padding='same', 
                    use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1')
    
    num_frames_remaining = int(x.shape[1])
    x = Reshape((num_frames_remaining, numberOfClasses))(x)

            # logits (raw scores for each class)
    x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False),
                    output_shape=lambda s: (s[0], s[2]))(x)

    predictions = Activation('softmax', name='prediction')(x)
    model = Model(rgb_model.input, predictions)
    
    return model    
예제 #4
0
def run_test(listFileName, storeDir):
    num_test_videos = len(list(open(listFileName,'r')))
    print("Number of test videos={}".format(num_test_videos))

    rgb_model = Inception_Inflated3d(
        include_top=False,
        weights=weight_names['withImagenet'],
        input_shape=(CLIP_LENGTH, CROP_SIZE, CROP_SIZE, 3),
        classes=NUM_CLASSES)

    #
    #
    # saver = tf.train.Saver()
    #
    #
    # tf_config = tf.ConfigProto()
    # tf_config.gpu_options.allow_growth = True
    # # tf_config.gpu_options.per_process_gpu_memory_fraction = 0.8
    # sess = tf.Session(config=tf_config)
    #
    # #     sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True))
    # init = tf.global_variables_initializer()######
    # sess.run(init)
    # saver.restore(sess, model_name)

    next_batch_start = 0
    all_steps = int((num_test_videos - 1) / BATCH_SIZE + 1)


    file_index = 0
    for step in xrange(all_steps):
        np_arr_data, np_arr_label, next_batch_start, _, _ = input_data.read_clip_and_label(
                            listFileName,BATCH_SIZE,SEQ_NUM,start_pos=next_batch_start,num_frames_per_clip=CLIP_LENGTH,crop_size = CROP_SIZE)
        logist_batch = []
        for i in range(SEQ_NUM):
            data = np_arr_data[:,i,:,:,:]
            rgb_logits = rgb_model.predict(data)
            logist_batch.append(rgb_logits.reshape(BATCH_SIZE, 1024))
        logist_batch= np.array(logist_batch)
        fc6_feature_batch = logist_batch.reshape((-1,SEQ_NUM,1024))
        for batch_index in range(min(BATCH_SIZE, np_arr_label.shape[0])):
            try:
                #image = io.imread(images[i]) # type(image) must be array!
                data = fc6_feature_batch[batch_index]
                data = data.astype(np.float64)
                label = np_arr_label[batch_index]
                file_index += 1
                filename = "%s/%08d_%02d.bin" % (storeDir, file_index,label)
                # print("data-->",data)
                # print("label-->",label)
                # print("filename",filename)
                # with open(filename, 'wb') as f:
                #     f.write(data[i,:])
                data.tofile(filename)
            except IOError as e:
                print('Skip it!\n')
예제 #5
0
def train():
    # load the kinetics classes
    # datapath='/users/kevin/downloads/aicure-dataset/*/*.npy'
    datapath = r'C:\Users\Chris\Documents\projects\cs172b\aicure-dataset\*\*.npy'
    indexes, data, labels = load_data(datapath)

    base_model = Inception_Inflated3d(weights='rgb_imagenet_and_kinetics',
                                      include_top=False,
                                      input_shape=(NUM_FRAMES, FRAME_HEIGHT,
                                                   FRAME_WIDTH,
                                                   NUM_RGB_CHANNELS))

    output = Dropout(0.5)(base_model.output)
    predict = Reshape((-1, 1024))(output)
    #predict = AveragePooling1D(pool_size=3)(predict)
    predict = Dense(NUM_FRAMES,
                    kernel_initializer='normal',
                    activation='sigmoid')(predict)
    predict = ThresholdedReLU(theta=0.8, trainable=False)(predict)
    model = Model(inputs=base_model.input, outputs=predict)

    # freeze the first 100 layers
    for layer in model.layers[:100]:
        layer.trainable = False

    # randomize the weights for the remaining trainable layers
    # for layer in model.layers[150:195]: # change to 150:195 later
    #     layer.kernel_initializer = 'glorot_uniform'

    optimizer = keras.optimizers.Adam(lr=1e-4)
    model.compile(optimizer=optimizer, loss='mae', metrics=['accuracy'])

    #model.summary()

    train_indexes = indexes[:int(0.7 * len(indexes))]
    validation_indexes = indexes[int(0.7 * len(indexes)):]

    reducelr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy',
                                                    patience=2,
                                                    factor=0.2,
                                                    min_lr=1e-8)
    earlystop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',
                                                 patience=5)

    callbacks = [reducelr, earlystop]

    history = model.fit_generator(data_generator(data, labels, train_indexes,
                                                 BATCH_SIZE),
                                  steps_per_epoch=int(STEPS * .7),
                                  epochs=50,
                                  validation_data=data_generator(
                                      data, labels, validation_indexes),
                                  validation_steps=int(STEPS * .3),
                                  callbacks=callbacks)
    write_out(history, 'hist.csv')
    save_model(model, 'i3d')
예제 #6
0
파일: pi3d.py 프로젝트: tienhoangvan/P-I3D
    def initialize_weights(self, all_models_name, mode, dropout_prob, sum_idx):
        model = self.pi3d_model(all_models_name, mode, dropout_prob, sum_idx)
        #model_second = load_model('/data/stars/user/achaudha/ACCV_2018/I3D_experiments_all_patches/new_model/weights_optim/' + 'left_hand' + '/weights.hdf5')
        #model_second = load_model('/data/stars/user/achaudha/ACCV_2018/I3D_experiments_all_patches/new_model/weights_optim/' + 'i3d' + '/weights.hdf5')
        model_second = Inception_Inflated3d(include_top = True, weights='rgb_imagenet_and_kinetics')
        #pi3d = PI3D2(self.num_classes)
        #model_second = pi3d.initialize_weights(all_models_name, mode, dropout_prob)
        #model_second.load_weights('/data/stars/user/achaudha/ACCV_2018/PI3D_a/weights_pi3d_left_hand_full_body_sum1/epoch8.hdf5') 
        weight_idx_s = -45  + (2-sum_idx)*20
        weight_idx_e = -4
        for l_m, l_lh in zip(model.layers[weight_idx_s: weight_idx_e], model_second.layers[weight_idx_s: weight_idx_e]):
            l_m.set_weights(l_lh.get_weights())

        return model
def loadModelLR(numberOfClasses,inputFrames, frameHeight,frameWidth,numRGBChannels,withWeights = False):
    weights = None
    if withWeights : weights = 'rgb_inception_i3d'
    rgb_model = Inception_Inflated3d(
                include_top=False,
                weights=weights,
                input_shape=(inputFrames, frameHeight, frameWidth, numRGBChannels),
                dropout_prob=0.5,
                endpoint_logit=True,
                classes=numberOfClasses)
    
    x = rgb_model.output
    x = Dropout(0.5)(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(numberOfClasses, activation='softmax')(x)
    model = Model(rgb_model.input, predictions)
    
    return model
예제 #8
0
def RGB_model(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_CLASSES,
              dropout_prob):

    rgb_model = Inception_Inflated3d(include_top=False,
                                     weights='rgb_imagenet_and_kinetics',
                                     input_shape=(NUM_FRAMES, FRAME_HEIGHT,
                                                  FRAME_WIDTH, 3))

    x1 = rgb_model.layers[-1].output
    x1 = Dropout(dropout_prob)(x1)

    x1 = generate_logit(x1, '1x1_Conv3d_rgb_logits', NUM_CLASSES)

    x = Activation('softmax', name='prediction')(x1)

    model = Model(input=rgb_model.input, output=x)

    return model
예제 #9
0
        while True:
            batch_features = np.zeros(
                (BATCH, NUM_FRAMES, FRAME_WIDTH, FRAME_HEIGHT, 3))
            batch_labels = np.zeros((BATCH, NUM_CLASSES))
            for i in range(BATCH):
                batch_features[i] = hf["validation"][counter % 20]
                batch_labels[i] = validation_labels[counter % 20]
                # print("Index: "+str(i))
                # print(batch_labels)
                counter += 1
            yield batch_features, batch_labels


rgb_model = Inception_Inflated3d(include_top=False,
                                 weights='rgb_kinetics_only',
                                 input_shape=(NUM_FRAMES, FRAME_HEIGHT,
                                              FRAME_WIDTH, 3),
                                 classes=NUM_CLASSES,
                                 endpoint_logit=False)

opt = optimizers.Adam(lr=0.001,
                      beta_1=0.9,
                      beta_2=0.999,
                      epsilon=1e-08,
                      decay=1e-6)

rgb_model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=opt,
                  metrics=['accuracy'])

best_checkpoint = ModelCheckpoint('2_per_signer_weights_best.hdf5',
                                  monitor='val_acc',
예제 #10
0
파일: evaluate.py 프로젝트: prinshul/GWSDR
            yield x, y


rgb_data = []
flow_data = []
labels = []
for t in range(5):
    rgb_data.append(pickle.load(open(rgb_test_path[t], "rb")))
    flow_data.append(pickle.load(open(flow_test_path[t], "rb")))
    labels.append(pickle.load(open(label_test_path[t], "rb")))

acc_list = []

for t in range(1):
    rgb_model = Inception_Inflated3d(include_top=False,
                                     weights='rgb_imagenet_and_kinetics',
                                     input_shape=(None, 224, 224, 3),
                                     classes=8)
    #rgb_model.load_weights("data/0_8/rgb"+str(t)+".h5")
    rgb_model.load_weights("data/0_8/rgb" + str(t) + ".h5")
    flow_model = Inception_Inflated3d(include_top=False,
                                      weights='flow_imagenet_and_kinetics',
                                      input_shape=(None, 224, 224, 2),
                                      classes=8)
    flow_model.load_weights("data/0_8/flow" + str(t) + ".h5")
    count = 0
    y_pred = []
    y_true = []
    overall_conf = []
    correct_overall_conf = []
    wrong_overall_conf = []
    label_conf = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: []}
예제 #11
0
 def __init__(self, weights='rgb_imagenet_and_kinetics'):
     self.model = Inception_Inflated3d(include_top=True, weights=weights)
예제 #12
0
def video_process(lock, IS_OVER):
    rgb_model = Inception_Inflated3d(
        include_top=True,
        weights='rgb_imagenet_and_kinetics',
        input_shape=(NUM_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, NUM_RGB_CHANNELS),
        classes=NUM_CLASSES)

    flow_model = Inception_Inflated3d(
        include_top=True,
        weights='flow_imagenet_and_kinetics',
        input_shape=(NUM_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, NUM_FLOW_CHANNELS),
        classes=NUM_CLASSES)

    if IS_PLOT_MODEL:
        rgb_model.summary()
        from keras.utils.vis_utils import plot_model
        plot_model(rgb_model, 'model.png')

    running_symbol = RUNNING_SYMBOL
    change_frames, warning_rate = 0, 0
    global is_change
    while True:
        with lock:
            with open('rgbs.pkl', 'rb') as f:
                rgbs = pickle.load(f)
        if len(rgbs) != NUM_FRAMES:
            continue
        now = time.time()
        if IS_OVER.value:
            exit()
        flows = read_frames_flow(rgbs)

        is_change = False
        video_text = []
        if running_symbol:
            norm = np.linalg.norm(flows[-1])
            # frame_now = CURRENT_FRAME if CURRENT_FRAME.shape[0] > 480 else cv2.resize(CURRENT_FRAME, (860, 480))
            process_time = time.time() - now
            video_text.append('Speed: %.2f s, running norm: %.2f' % (process_time, norm))
            if norm > RUNNING_SPEED:
                is_change = True
                video_text.append('Running fast!')
            if warning_rate > WARNING_RATE:
                video_text.append('WARNING!')
            change_frames = change_frames + 1 if not is_change else 0
            warning_rate = warning_rate + 1 if is_change else 0
            if change_frames > CHANGE_FRAMES_CLASSIFICATION:
                running_symbol = False
                change_frames = 0
                warning_rate = 0
        else:
            rgb_sample = rgb2tensor(rgbs)
            flow_sample = flow2tensor(flows)

            rgb_logits = rgb_model.predict(rgb_sample)
            flow_logits = flow_model.predict(flow_sample)

            sample_logits = rgb_logits + flow_logits
            # produce softmax output from model logit for class probabilities
            sample_logits = sample_logits[0]  # we are dealing with just one example
            sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits))
            sorted_indices = np.argsort(sample_predictions)[::-1]

            process_time = time.time() - now
            video_text.append('Speed: %.2f s' % process_time)
            for index in sorted_indices[:5]:
                if kinetics_classes[index] in RUN_CLASSES:
                    is_change = True
                video_text.append('%s: %.2f' % (kinetics_classes[index], sample_predictions[index]))
            change_frames = change_frames + 1 if is_change else 0
        with lock:
            with open('video_text.pkl', 'wb') as f:
                pickle.dump(video_text, f)

        if change_frames > CHANGE_FRAMES_RUNNING:
            running_symbol = True
            change_frames = 0
NUM_FRAMES = 79
FRAME_WIDTH = 224
FRAME_HEIGHT = 224
NUM_RGB_CHANNELS = 3
NUM_CLASSES = 400

#emotion_frames= np.load('./test_data/emotion_data_1.npy')
action_frames = np.load('./test_data/action_data_1.npy')

face_detection = load_detection_model(detection_model_path)
emotion_classifier = load_model(emotion_model_path, compile=False)
gender_classifier = load_model(gender_model_path, compile=False)
action_classifier = Inception_Inflated3d(
    include_top=True,
    weights='rgb_imagenet_and_kinetics',
    #input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS),
    input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS),
    classes=NUM_CLASSES)

emotion_target_size = emotion_classifier.input_shape[1:3]  #(64,64)
gender_target_size = gender_classifier.input_shape[1:3]  #(48,48)
action_target_size = (FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS
                      )  #(224,224,3)

#emotion_label_arg = np.argmax(emotion_classifier.predict(emotion_frames))
#final_emotion = mode(emotion_label_arg)
#emotion_text = emotion_labels[final_emotion]
action_frames = np.expand_dims(action_frames, 0)
logic_action = action_classifier.predict(action_frames)

# produce softmax output from model logit for class probabilities
예제 #14
0
                        type=int,
                        default=32)
    parser.add_argument("-m",
                        "--memory",
                        dest="memory",
                        help="memory -> for fusion",
                        type=int,
                        default=5)
    parser.add_argument("-t",
                        "--threshold",
                        dest="threshold",
                        help="prediction threshold",
                        type=float,
                        default=0.5)
    args = parser.parse_args()

    labels = readLabels(args.labels)
    clipDuration = args.input_frames
    memory = args.memory
    threshold = args.threshold
    if not args.model:
        model = Inception_Inflated3d(include_top=True,
                                     weights='rgb_inception_i3d',
                                     input_shape=(clipDuration, 224, 224, 3),
                                     classes=400,
                                     endpoint_logit=False)
    else:
        model = loadModel(len(labels), clipDuration, 224, 224, 3)
        model.load_weights(args.model)

    main(args.source, labels, model)
예제 #15
0
if K.image_dim_ordering() == 'th':
	input_shape_img = (3, None, None)
	# input_shape_features = (num_features, None, None)
else:
	input_shape_img = (None, None, 3)

shared_layers_input= Input(shape=( None,None,832))
roi_input = Input(shape=(None, 4))
vid_input = Input(shape =(None, None, None, 3))
vid_input_shape = (64, 400,320, 3)
feature_map_input = Input(shape=(None, None,None,832))

rgb_model = Inception_Inflated3d(
				include_top=False,
				weights='rgb_kinetics_only',
				input_shape=vid_input_shape,
				classes=classes_count)
def get_new_img_size(width, height, img_min_side, C):
	img_min_side =448
	if width <= height:
		f = float(img_min_side) / width
		resized_height = int(f * height)
		resized_width = img_min_side
	else:
		f = float(img_min_side) / height
		resized_width = int(f * width)
		resized_height = img_min_side

	if C.dataset == 'AVA':
		return resized_width, resized_height
예제 #16
0
def main(args):
    # load the kinetics classes
    kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')]

    args.eval_type = 'rgb'
    if args.eval_type in ['rgb', 'joint']:
        if args.no_imagenet_pretrained:
            # build model for RGB data
            # and load pretrained weights (trained on kinetics dataset only)
            rgb_model = Inception_Inflated3d(
                include_top=True,
                weights='rgb_kinetics_only',
                input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_RGB_CHANNELS),
                classes=NUM_CLASSES)
        else:
            # build model for RGB data
            # and load pretrained weights (trained on imagenet and kinetics dataset)
            rgb_model = Inception_Inflated3d(
                include_top=True,
                weights='rgb_imagenet_and_kinetics',
                input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_RGB_CHANNELS),
                classes=NUM_CLASSES)

        # load RGB sample (just one example)
        rgb_sample = np.load(SAMPLE_DATA_PATH['rgb'])
        # import IPython; IPython.embed()
        # make prediction
        train_generator, validation_generator = create_generators(
            args, backbone.preprocess_image)
        rgb_sample = train_generator.__getitem__(1)[0]
        rgb_logits = rgb_model.predict(rgb_sample)

    if args.eval_type in ['flow', 'joint']:
        if args.no_imagenet_pretrained:
            # build model for optical flow data
            # and load pretrained weights (trained on kinetics dataset only)
            flow_model = Inception_Inflated3d(
                include_top=True,
                weights='flow_kinetics_only',
                input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_FLOW_CHANNELS),
                classes=NUM_CLASSES)
        else:
            # build model for optical flow data
            # and load pretrained weights (trained on imagenet and kinetics dataset)
            flow_model = Inception_Inflated3d(
                include_top=True,
                weights='flow_imagenet_and_kinetics',
                input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_FLOW_CHANNELS),
                classes=NUM_CLASSES)

        # load flow sample (just one example)
        flow_sample = np.load(SAMPLE_DATA_PATH['flow'])
        # import IPython; IPython.embed()
        # make prediction
        flow_logits = flow_model.predict(flow_sample)

    # produce final model logits
    if args.eval_type == 'rgb':
        sample_logits = rgb_logits
    # elif args.eval_type == 'flow':
    #     sample_logits = flow_logits
    # else: # joint
    #     sample_logits = rgb_logits + flow_logits

    # produce softmax output from model logit for class probabilities
    sample_logits = sample_logits[0]  # we are dealing with just one example
    sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits))

    sorted_indices = np.argsort(sample_predictions)[::-1]

    print('\nNorm of logits: %f' % np.linalg.norm(sample_logits))
    print('\nTop classes and probabilities')
    for index in sorted_indices[:20]:
        print(sample_predictions[index], sample_logits[index],
              kinetics_classes[index])

    return
예제 #17
0
def main(args):

    model = ResNet50(weights='imagenet')
    model_res = Model(inputs=model.input,
                      outputs=[
                          model.get_layer('conv1_relu').output,
                          model.get_layer('conv2_block1_out').output,
                          model.get_layer('conv3_block1_out').output
                      ])

    # load the kinetics classes
    kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')]

    if args.eval_type in ['rgb', 'joint']:
        if args.no_imagenet_pretrained:
            # build model for RGB data
            # and load pretrained weights (trained on kinetics dataset only)
            rgb_model = Inception_Inflated3d(
                include_top=True,
                weights='rgb_kinetics_only',
                input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_RGB_CHANNELS),
                classes=NUM_CLASSES)
        else:
            # build model for RGB data
            # and load pretrained weights (trained on imagenet and kinetics dataset)
            rgb_model = Inception_Inflated3d(
                include_top=True,
                weights='rgb_imagenet_and_kinetics',
                input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_RGB_CHANNELS),
                classes=NUM_CLASSES)
        model_rgb = Model(
            inputs=rgb_model.input,
            outputs=rgb_model.get_layer('Conv3d_3c_3b_1x1').output)
#         print(model_rgb.summary())
# load RGB sample (just one example)
#         rgb_sample = np.load(SAMPLE_DATA_PATH['rgb'])

# make prediction
#         rgb_logits = rgb_model.predict(rgb_sample)

    if args.eval_type in ['flow', 'joint']:
        if args.no_imagenet_pretrained:
            # build model for optical flow data
            # and load pretrained weights (trained on kinetics dataset only)
            flow_model = Inception_Inflated3d(
                include_top=True,
                weights='flow_kinetics_only',
                input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_FLOW_CHANNELS),
                classes=NUM_CLASSES)
        else:
            # build model for optical flow data
            # and load pretrained weights (trained on imagenet and kinetics dataset)
            flow_model = Inception_Inflated3d(
                include_top=True,
                weights='flow_imagenet_and_kinetics',
                input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_FLOW_CHANNELS),
                classes=NUM_CLASSES)

        # load flow sample (just one example)
#         flow_sample = np.load(SAMPLE_DATA_PATH['flow'])

# make prediction
#         flow_logits = flow_model.predict(flow_sample)
        model_flow = Model(
            inputs=flow_model.input,
            outputs=flow_model.get_layer('Conv3d_3c_3b_1x1').output)
#         print(model_flow.summary())
# produce final model logits
#     if args.eval_type == 'rgb':
#         sample_logits = rgb_logits
#     elif args.eval_type == 'flow':
#         sample_logits = flow_logits
#     else: # joint
#         sample_logits = rgb_logits + flow_logits

#     # produce softmax output from model logit for class probabilities
#     sample_logits = sample_logits[0] # we are dealing with just one example
#     sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits))

#     sorted_indices = np.argsort(sample_predictions)[::-1]

#     print('\nNorm of logits: %f' % np.linalg.norm(sample_logits))
#     print('\nTop classes and probabilities')
#     for index in sorted_indices[:20]:
#         print(sample_predictions[index], sample_logits[index], kinetics_classes[index])
    import STLSTM
    NUM_CELL = 1
    FILTERS0 = 128
    FILTERS1 = 64
    FILTERS2 = 64
    FILTERS3 = 64
    KERNEL_SIZE = 3
    for layer in model_rgb.layers:
        layer.trainable = False
    for layer in model_res.layers:
        layer.trainable = False
    #Model 1
#     m1,m2,m3=model_res(input3)
#MODEL 2
#     rgb=model_rgb(input1)
#     flow=model_flow(input2)x_rgb=Input(shape=(10,224,224,3))

    cells0 = STLSTM.StackedSTLSTMCells([
        STLSTM.STLSTMCell(filters=FILTERS0,
                          kernel_size=KERNEL_SIZE,
                          padding="same",
                          data_format="channels_last") for i in range(NUM_CELL)
    ])
    cells1 = STLSTM.StackedSTLSTMCells([
        STLSTM.STLSTMCell(filters=FILTERS1,
                          kernel_size=KERNEL_SIZE,
                          padding="same",
                          data_format="channels_last") for i in range(NUM_CELL)
    ])
    cells2 = STLSTM.StackedSTLSTMCells([
        STLSTM.STLSTMCell(filters=FILTERS2,
                          kernel_size=KERNEL_SIZE,
                          padding="same",
                          data_format="channels_last") for i in range(NUM_CELL)
    ])
    cells3 = STLSTM.StackedSTLSTMCells([
        STLSTM.STLSTMCell(filters=FILTERS3,
                          kernel_size=KERNEL_SIZE,
                          padding="same",
                          data_format="channels_last") for i in range(NUM_CELL)
    ])

    x_rgb = Input(shape=(10, 224, 224, 3))
    #     x_flow=Input(shape=(10,224,224,2))
    x = model_rgb(x_rgb)
    #     x_flow1=model_flow(x_flow)
    l1 = []
    l2 = []
    l3 = []
    for i in range(10):
        [m1, m2, m3] = model_res(x_rgb[:, i, :, :, :])
        l1.append(m1)
        l2.append(m2)
        l3.append(m3)


#     [merge1,merge2,merge3]=model_res(x_res)
    skip_conn1 = tf.stack(l1, axis=1)
    skip_conn2 = tf.stack(l2, axis=1)
    skip_conn3 = tf.stack(l3, axis=1)
    print(skip_conn1.shape)
    print(skip_conn2.shape)
    print(skip_conn3.shape)
    x = STLSTM.STLSTM2D(cells0, return_sequences=True)(x)

    x = STLSTM.STLSTM2D(cells1, return_sequences=True)(x)
    x = STLSTM.STLSTM2D(cells2, return_sequences=True)(x)
    x = STLSTM.STLSTM2D(cells3, return_sequences=True)(x)
    x = Conv3DTranspose(64, (3, 3, 3),
                        strides=(2, 1, 1),
                        output_padding=(1, 0, 0),
                        padding='valid',
                        data_format="channels_last")(x)
    x = Conv3D(64, (3, 3, 3),
               strides=(1, 1, 1),
               padding='valid',
               data_format="channels_last")(x)
    x = tf.concat([x, skip_conn3], axis=4)
    print(x.shape)
    x = Conv3DTranspose(64, (3, 3, 3),
                        strides=(1, 2, 2),
                        output_padding=(0, 1, 1),
                        padding='valid',
                        data_format="channels_last")(x)
    x = Conv3D(64, (3, 3, 3),
               strides=(1, 1, 1),
               padding='valid',
               data_format="channels_last")(x)
    print(x.shape)
    x = tf.concat([x, skip_conn2], axis=4)
    x = Conv3DTranspose(64, (3, 3, 3),
                        strides=(1, 2, 2),
                        output_padding=(0, 1, 1),
                        padding='valid',
                        data_format="channels_last")(x)
    x = Conv3D(64, (3, 3, 3),
               strides=(1, 1, 1),
               padding='valid',
               data_format="channels_last")(x)
    print(x.shape)
    x = tf.concat([x, skip_conn1], axis=4)
    x = Conv3DTranspose(64, (3, 3, 3),
                        strides=(1, 2, 2),
                        output_padding=(0, 1, 1),
                        padding='valid',
                        data_format="channels_last")(x)
    x = Conv3D(64, (3, 3, 3),
               strides=(1, 1, 1),
               padding='valid',
               data_format="channels_last")(x)
    print(x.shape)
    x = Conv3D(3, (3, 3, 3),
               strides=(1, 1, 1),
               padding='same',
               data_format="channels_last")(x)
    model_final = Model(inputs=x_rgb, outputs=x)
    print(x.shape)
    #     print(model_final.summary())
    print(model_final.summary())
    plot_model(model_final, to_file='feature_extract.png')
    #     x=STLSTM(rgb+flow)
    #     x=STLSTM(x)
    #     x=STLSTM(x)
    #     x=DCONV(x)
    #     x=CONV(x)
    #     #Combine
    #     x=CONV(m1+x)
    #     x=DCONV(x)
    #     x=CONV(m2+x)
    #     x=DCONV(x)
    #     x=CONV(m3+x)
    #     output=DCONV(x)

    #     model_final=Model(inputs=[input1,input2,input3],outputs=output)

    return
    parser.add_argument(
        "-f", "--input_frames", type=int, default=64, help="number of frames in each input clip to the model")
    parser.add_argument(
        "-b", "--batch_size", type=int, default=8, help="batch size for testing.")
    parser.add_argument(
        "-r", "--results_path",default="./results/results.json", help="name/path of the output results of the test(has to be a json file -> ./results/results.json)")
    parser.add_argument(
        "-p", "--data_preprocessed",action="store_true", default=False,help="if data is preprocessed")
    parser.add_argument(
        "-c", "--per_clip",action="store_true", default=False,help="results for clips not videos")
    args = parser.parse_args()
    
    if not os.path.exists("./results"):
        os.makedirs("./results")

    labels = readLabels(args.labels)
    num_classes = len(labels)
    if not args.weights:
        model = Inception_Inflated3d(include_top=True,
                                        weights="rgb_inception_i3d",
                                        input_shape=(args.input_frames,224,224,3),
                                        classes=400,
                                        endpoint_logit=True)
    else:
        model = loadModel(num_classes,args.input_frames,224,224,3)
        model.load_weights(args.weights)                                    

    if num_classes != 400:
        testViolence (model, args.data_directory, labels, args.input_frames, args.batch_size,results_path=args.results_path,just_load=args.data_preprocessed,perClip=args.per_clip)
    else:
        test(model, args.data_directory, labels, args.input_frames, args.batch_size)
예제 #19
0
def pi3d_model(fc_main,
               model_inputs,
               dataset,
               protocol,
               all_models_name=[],
               mode='sum',
               dropout_prob=0.0,
               num_classes=60,
               sum_idx=0,
               train_end_to_end=False):
    mode = mode
    all_models_name = all_models_name
    #all_models = {}
    if sum_idx == 0:
        global f_dept
        f_dept = 1024

    pi3d_interm_outputs = []
    for model_name in all_models_name:
        model = load_model('./weights_optim/{}/weights_{}_{}.hdf5'.format(
            dataset, model_name, protocol))
        for idx in range(len(model.layers)):
            model.get_layer(
                index=idx).name = model.layers[idx].name + '_' + model_name

        for l in model.layers:
            l.trainable = train_end_to_end

        model_inputs.append(model.input)
        if sum_idx <= 3 and sum_idx >= 0:
            pi3d_interm_outputs.append(
                Reshape((1, 8, 7, 7, f_dept))(
                    model.get_layer(index=-46 + (2 - sum_idx) * 20).output))

    x = concatenate(pi3d_interm_outputs, axis=1)
    inflated_fc_main = keras.layers.core.Lambda(inflate_dense,
                                                output_shape=(no_of_p, 8, 7, 7,
                                                              f_dept))(fc_main)
    multiplied_features = keras.layers.Multiply()([inflated_fc_main, x])

    if mode == 'sum':
        x = keras.layers.core.Lambda(
            sum_feature, output_shape=(8, 7, 7, f_dept))(multiplied_features)
    elif mode == 'cat':
        x = keras.layers.core.Lambda(
            concat_feature,
            output_shape=(8, 7, 7, f_dept * no_of_p))(multiplied_features)

    ##second part of I3D

    if sum_idx == 2:
        # Mixed 5b
        branch_0 = conv3d_bn(x,
                             256,
                             1,
                             1,
                             1,
                             padding='same',
                             name='' + 'second')

        branch_1 = conv3d_bn(x,
                             160,
                             1,
                             1,
                             1,
                             padding='same',
                             name='Conv3d_5b_1a_1x1' + 'second')
        branch_1 = conv3d_bn(branch_1,
                             320,
                             3,
                             3,
                             3,
                             padding='same',
                             name='Conv3d_5b_1b_3x3' + 'second')

        branch_2 = conv3d_bn(x,
                             32,
                             1,
                             1,
                             1,
                             padding='same',
                             name='Conv3d_5b_2a_1x1' + 'second')
        branch_2 = conv3d_bn(branch_2,
                             128,
                             3,
                             3,
                             3,
                             padding='same',
                             name='Conv3d_5b_2b_3x3' + 'second')

        branch_3 = MaxPooling3D((3, 3, 3),
                                strides=(1, 1, 1),
                                padding='same',
                                name='MaxPool2d_5b_3a_3x3' + 'second')(x)
        branch_3 = conv3d_bn(branch_3,
                             128,
                             1,
                             1,
                             1,
                             padding='same',
                             name='Conv3d_5b_3b_1x1' + 'second')

        x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                               axis=4,
                               name='Mixed_5b' + 'second')

    if sum_idx == 1 or sum_idx == 2:
        # Mixed 5c
        branch_0 = conv3d_bn(x,
                             384,
                             1,
                             1,
                             1,
                             padding='same',
                             name='Conv3d_5c_0a_1x1' + 'second')

        branch_1 = conv3d_bn(x,
                             192,
                             1,
                             1,
                             1,
                             padding='same',
                             name='Conv3d_5c_1a_1x1' + 'second')
        branch_1 = conv3d_bn(branch_1,
                             384,
                             3,
                             3,
                             3,
                             padding='same',
                             name='Conv3d_5c_1b_3x3' + 'second')

        branch_2 = conv3d_bn(x,
                             48,
                             1,
                             1,
                             1,
                             padding='same',
                             name='Conv3d_5c_2a_1x1' + 'second')
        branch_2 = conv3d_bn(branch_2,
                             128,
                             3,
                             3,
                             3,
                             padding='same',
                             name='Conv3d_5c_2b_3x3' + 'second')

        branch_3 = MaxPooling3D((3, 3, 3),
                                strides=(1, 1, 1),
                                padding='same',
                                name='MaxPool2d_5c_3a_3x3' + 'second')(x)
        branch_3 = conv3d_bn(branch_3,
                             128,
                             1,
                             1,
                             1,
                             padding='same',
                             name='Conv3d_5c_3b_1x1' + 'second')

        x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                               axis=4,
                               name='Mixed_5c' + 'second')

    #Classification block
    x = AveragePooling3D((2, 7, 7),
                         strides=(1, 1, 1),
                         padding='valid',
                         name='global_avg_pool' + 'second')(x)
    x = Dropout(dropout_prob)(x)

    x = conv3d_bn(x,
                  num_classes,
                  1,
                  1,
                  1,
                  padding='same',
                  use_bias=True,
                  use_activation_fn=False,
                  use_bn=False,
                  name='Conv3d_6a_1x1' + 'second')

    x = Flatten(name='flatten' + 'second')(x)
    predictions = Dense(num_classes,
                        activation='softmax',
                        name='softmax' + 'second')(x)
    model = Model(inputs=model_inputs, outputs=predictions, name='PI3D')

    model_second = Inception_Inflated3d(include_top=True,
                                        weights='rgb_imagenet_and_kinetics')

    weight_idx_s = -45 + (2 - sum_idx) * 20
    weight_idx_e = -4

    for l_m, l_lh in zip(model.layers[weight_idx_s:weight_idx_e],
                         model_second.layers[weight_idx_s:weight_idx_e]):
        l_m.set_weights(l_lh.get_weights())
        l_m.trainable = True

    lstm_weights = "./weights_optim/{}/lstm_model_{}.hdf5".format(
        dataset, protocol)
    l_model = load_model(lstm_weights, compile=False)

    for idx1 in range(len(model.layers)):
        n1 = model.layers[idx1].name
        if 'lstm' in n1:
            for idx2 in range(len(l_model.layers)):
                n2 = l_model.layers[idx2].name
                if n1 == n2:
                    model.layers[idx1].set_weights(
                        l_model.layers[idx2].get_weights())
                    break

    return model
예제 #20
0
파일: train_rgb.py 프로젝트: dmm-dr/DMM-DR
            x, y = np.load(data[i]), labels[i]
            x = x.reshape((1, x.shape[0], x.shape[1], x.shape[2], x.shape[3]))
            yield x, y


earlystop = EarlyStopping(monitor='acc',
                          min_delta=0,
                          patience=5,
                          verbose=0,
                          mode='auto')

for i in range(2, 5):

    rgb_model = Inception_Inflated3d(include_top=False,
                                     weights='rgb_imagenet_and_kinetics',
                                     input_shape=(None, 224, 224, 3),
                                     endpoint_logit=False,
                                     classes=8)
    sgd = SGD(lr=1e-4, decay=1e-7, momentum=0.9, nesterov=True)
    rgb_model.compile(loss='categorical_crossentropy',
                      optimizer=sgd,
                      metrics=['accuracy'])
    rgb_model.summary()

    rgb_train_data = pickle.load(open(rgb_train_path[i], "rb"))
    label_train_data = pickle.load(open(label_train_path[i], "rb"))
    steps = len(label_train_data)
    rgb_model.fit_generator(generate_arrays_from_file(rgb_train_data,
                                                      label_train_data),
                            steps_per_epoch=steps,
                            epochs=65)
예제 #21
0
from i3d_dataset import I3DFusionSequence
from i3d_inception import Inception_Inflated3d

if __name__ == '__main__':
    NUM_FRAMES = 64
    FRAME_HEIGHT = 224
    FRAME_WIDTH = 224
    NUM_RGB_CHANNELS = 3
    NUM_FLOW_CHANNELS = 2
    NUM_CLASSES = 2

    rgb_input = Input(shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_RGB_CHANNELS))
    rgb_model = Inception_Inflated3d(
        include_top=False,
        weights='rgb_imagenet_and_kinetics',
        # weights='rgb_kinetics_only',
        input_tensor=rgb_input,
        classes=NUM_CLASSES)

    for i, l in enumerate(rgb_model.layers):
        # if i >= 181:
        #     break

        # if "Mixed_5b" == l.name:
        #     break

        l.trainable = False

    rgb_y = rgb_model.get_output_at(0)

    flow_input = Input(shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,
예제 #22
0
def train(path_train,
          batch_size,
          path_val=None,
          t_size=9,
          train_val_split=1,
          validate=True):
    data = h5py.File(path_train, 'r')

    X = data['X']
    Y = data['Y']
    input_shape = (t_size, ) + X.shape[1:]

    if 'autoscore_checkpoint' in os.listdir('.'):
        model_final = load_model('autoscore_checkpoint')
        print('Loaded existing model')

    else:

        rgb_model = Inception_Inflated3d(include_top=False,
                                         weights='rgb_imagenet_and_kinetics',
                                         input_shape=(input_shape))

        output_old = rgb_model.layers[-1].output

        x = Reshape((1024, ), name='Reshape_top')(output_old)
        x = Dense(50, activation='selu', name='Dense_top_1')(x)
        x = Dense(2, activation='sigmoid', name='Dense_top_2')(x)

        sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        model_final = Model(input=rgb_model.input, output=[x])
        model_final.compile(loss='binary_crossentropy',
                            optimizer=sgd,
                            metrics=['mae', 'acc'])
    checkpoint = ModelCheckpoint('autoscore_checkpoint',
                                 monitor='val_loss',
                                 verbose=0,
                                 save_best_only=False,
                                 save_weights_only=False,
                                 mode='auto',
                                 period=1)
    model_final = load_model('autoscore_checkpoint')
    train_generator = i3d_generator(X,
                                    Y,
                                    7,
                                    t_size,
                                    train_val_split=0.9,
                                    train=True)

    if validate:
        val_generator = i3d_generator(X,
                                      Y,
                                      7,
                                      t_size,
                                      train_val_split=0.9,
                                      train=False)
        model_final.fit_generator(train_generator.__getitem__(),
                                  steps_per_epoch=5000,
                                  epochs=50,
                                  validation_data=val_generator.__getitem__(),
                                  validation_steps=1000,
                                  callbacks=[checkpoint])


#    elif path_val:
#
#        data_val = h5py.open(path_train,'r')
#        X_val = data_val['X']
#        Y_val = data_val['Y']
#        val_generator = i3d_generator(X_val, Y_val, 5, t_size)
#        model_final.fit_generator(train_generator.__getitem__(),
#                                  steps_per_epoch=2000,
#                                  epochs=50,
#                                  validation_data=val_generator.__getitem__(),
#                                  validation_steps=800)
#
#    else:
#        if validate:
#            model_final.fit_generator(train_generator.__getitem__(),
#                                      steps_per_epoch=2000,
#                                      epochs=50)
    model_final.save('autoscore_model_3')
예제 #23
0
            batch_features = np.zeros(
                (BATCH, NUM_FRAMES, FRAME_WIDTH, FRAME_HEIGHT, 3))
            batch_labels = np.zeros((BATCH, NUM_CLASSES))
            for i in range(BATCH):
                batch_features[i] = hf["validation"][counter % 40]
                batch_labels[i] = validation_labels[counter % 40]
                # print("Index: "+str(i))
                # print(batch_labels)
                counter += 1
            yield batch_features, batch_labels


rgb_model = Inception_Inflated3d(include_top=False,
                                 weights='rgb_imagenet_and_kinetics',
                                 input_shape=(NUM_FRAMES, FRAME_HEIGHT,
                                              FRAME_WIDTH, 3),
                                 classes=NUM_CLASSES,
                                 endpoint_logit=False,
                                 dropout_prob=0.5)

opt = optimizers.Adam(lr=0.001,
                      beta_1=0.9,
                      beta_2=0.999,
                      epsilon=1e-08,
                      decay=1e-6)

index_freeze_layer = [
    1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
    24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44,
    45, 46, 47, 48, 49, 50, 53, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66,
    67, 68, 69, 70, 71, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 87,
import pickle
import numpy as np
from i3d_inception import Inception_Inflated3d
from keras.optimizers import SGD, Adam
from keras.callbacks import EarlyStopping
import tensorflow as tf
from keras import backend as K
import os
from keras.callbacks import Callback
from keras.metrics import binary_accuracy
import numpy.linalg as linalg

flow_model_kin = Inception_Inflated3d(include_top=False, weights='flow_imagenet_and_kinetics', input_shape=(None, 224, 224, 2), dropout_prob=0.5,endpoint_logit=False, classes=8)
flow_model_kin.load_weights("/DATA/keras-kinetics-i3d/data/0_8/kin_flow_0.h5")

flow_train_path = "../crossval5_8/less_f_train7.p"
rgb_train_path = "../crossval5_8/less_r_train7.p"
label_train_path = "../crossval5_8/less_l_train7.p"



class Flda(Callback):
    def __init__(self, w):
        super(Flda, self).__init__()
        self.w = K.variable(w, dtype=np.float32)

    def on_batch_begin(self, batch, logs={}):
        kin_wts=flow_model_kin.layers[196].get_weights()[0]
        #kin_wts=kin_wts.reshape(8192)
        #kin_wts=kin_wts[0:8100]
        #kin_wts=kin_wts.reshape(90,90)
예제 #25
0
def main(args):

    SAMPLE_DATA_PATH = {
        # 'rgb' : 'data/v_CricketShot_g04_c01_rgb.npy',
        'rgb': '../data/' + args.video_name + '_rgb.npy',
        'flow': '../data/' + args.video_name + '_flow.npy'
    }

    # load the kinetics classes
    kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')]

    if args.eval_type in ['rgb', 'joint']:

        # load RGB sample (just one example)
        rgb_sample = np.load(SAMPLE_DATA_PATH['rgb'])
        INPUT_SHAPE = rgb_sample.shape[1]

        if args.no_imagenet_pretrained:
            # build model for RGB data
            # and load pretrained weights (trained on kinetics dataset only)
            rgb_model = Inception_Inflated3d(
                include_top=True,
                weights='rgb_kinetics_only',
                input_shape=(INPUT_SHAPE, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_RGB_CHANNELS),
                classes=NUM_CLASSES)
        else:
            # build model for RGB data
            # and load pretrained weights (trained on imagenet and kinetics dataset)
            rgb_model = Inception_Inflated3d(
                include_top=True,
                weights='rgb_imagenet_and_kinetics',
                input_shape=(INPUT_SHAPE, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_RGB_CHANNELS),
                classes=NUM_CLASSES)

        # make prediction
        rgb_logits = rgb_model.predict(rgb_sample)

    if args.eval_type in ['flow', 'joint']:

        # load flow sample (just one example)
        flow_sample = np.load(SAMPLE_DATA_PATH['flow'])
        INPUT_SHAPE = flow_sample.shape[1]

        if args.no_imagenet_pretrained:
            # build model for optical flow data
            # and load pretrained weights (trained on kinetics dataset only)
            flow_model = Inception_Inflated3d(
                include_top=True,
                weights='flow_kinetics_only',
                input_shape=(INPUT_SHAPE, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_FLOW_CHANNELS),
                classes=NUM_CLASSES)
        else:
            # build model for optical flow data
            # and load pretrained weights (trained on imagenet and kinetics dataset)
            flow_model = Inception_Inflated3d(
                include_top=True,
                weights='flow_imagenet_and_kinetics',
                input_shape=(INPUT_SHAPE, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_FLOW_CHANNELS),
                classes=NUM_CLASSES)

        # make prediction
        flow_logits = flow_model.predict(flow_sample)

    # produce final model logits
    if args.eval_type == 'rgb':
        sample_logits = rgb_logits
    elif args.eval_type == 'flow':
        sample_logits = flow_logits
    else:  # joint
        sample_logits = rgb_logits + flow_logits

    # produce softmax output from model logit for class probabilities
    sample_logits = sample_logits[0]  # we are dealing with just one example
    sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits))

    sorted_indices = np.argsort(sample_predictions)[::-1]

    print('\nNorm of logits: %f' % np.linalg.norm(sample_logits))
    print('\nTop 20 classes and probabilities')
    for index in sorted_indices[:20]:
        print(sample_predictions[index], sample_logits[index],
              kinetics_classes[index])

    return
def main(args=None):
    # parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # create object that stores backbone information
    backbone = models.backbone(args.backbone)

    # make sure keras is the minimum required version
    check_keras_version()

    # optionally choose specific GPU
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    keras.backend.tensorflow_backend.set_session(get_session())

    # optionally load config parameters
    if args.config:
        args.config = read_config_file(args.config)

    # create the generators
    train_generator, validation_generator = create_generators(
        args, backbone.preprocess_image)
    # import IPython; IPython.embed()

    img_input_ret = img_input_retina(32, 1024, 1024, 3)

    from i3d_inception import Inception_Inflated3d

    NUM_CLASSES = train_generator.num_classes()

    rgb_model = Inception_Inflated3d(include_top=False,
                                     weights='rgb_imagenet_and_kinetics',
                                     input_shape=img_input_ret,
                                     classes=NUM_CLASSES)

    # rgb_sample = train_generator.__getitem__(1)[0]
    # rgb_logits = rgb_model.predict(rgb_sample)
    # import IPython; IPython.embed()

    print('Loading I3D models.........')
    # print(rgb_model.summary())

    # for layer in rgb_model.layers:
    #     print("Layer name: "+str(layer.name), "Input shape: "+str(layer.input_shape)+". Output shape: "+str(layer.output_shape))
    # import IPython;IPython.embed()

    # import IPython; IPython.embed()

    print("Connecting to Retinanet Layer................")
    num_classes = train_generator.num_classes()
    # model= retinanet(inputs=img_input_ret, num_classes=num_classes,
    #     backbone_layers=[rgb_model.get_layer('Conv3d_3c_0a_1x1').output,
    #     rgb_model.get_layer('Conv3d_4f_0a_1x1').output,
    #     rgb_model.get_layer('Conv3d_5b_0a_1x1').output])

    model = retinanet(inputs=img_input_ret,
                      num_classes=num_classes,
                      backbone_layers=[
                          rgb_model.get_layer('Conv3d_2c_3x3').output,
                          rgb_model.get_layer('Conv3d_3c_0a_1x1').output,
                          rgb_model.get_layer('Conv3d_4f_0a_1x1').output,
                          rgb_model.get_layer('Conv3d_5b_0a_1x1').output
                      ])

    print("Retinanet+I3D model summary:")
    print(model.summary())
    # import IPython;IPython.embed()
    # for layer in model.layers:
    #     print("Layer name: "+str(layer.name), "Input shape: "+str(layer.input_shape)+". Output shape: "+str(layer.output_shape))

    # import IPython; IPython.embed()

    # bbox
    # create the model
    if args.snapshot is not None:
        print('Loading prediction model, this may take a second...')
        model = models.load_model(args.snapshot, backbone_name=args.backbone)
        training_model = model
        anchor_params = None
        if args.config and 'anchor_parameters' in args.config:
            anchor_params = parse_anchor_parameters(args.config)
        prediction_model = retinanet_bbox(model=model,
                                          anchor_params=anchor_params)
    else:
        weights = args.weights
        # # default to imagenet if nothing else is specified
        # if weights is None and args.imagenet_weights:
        #     weights = backbone.download_imagenet()

        if args.multi_gpu is not None:
            multi_gpu = args.multi_gpu
        else:
            multi_gpu = 0

        if multi_gpu > 1:
            from keras.utils import multi_gpu_model
            # with tf.device('/cpu:0'):
            # model = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True)
            training_model = multi_gpu_model(model, gpus=multi_gpu)
        else:
            # model          = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True)
            training_model = model

        # load anchor parameters, or pass None (so that defaults will be used)

        if args.config is not None:
            if config and 'anchor_parameters' in config:
                anchor_params = parse_anchor_parameters(config)
                num_anchors = anchor_params.num_anchors()
        else:
            anchor_params = None
            num_anchors = None

        # make prediction model
        prediction_model = retinanet_bbox(model=model,
                                          anchor_params=anchor_params)

        # compile model
        training_model.compile(
            loss={
                'regression': losses.smooth_l1(),
                'classification': losses.focal(),
                # 'depthsification': lossesification.mean_squared_error
                'depthsification': losses.smooth_l1_depth()
            },
            optimizer=keras.optimizers.adam(lr=1e-5, clipnorm=0.001))

    # print('bin/train.py | line 610 | debug before fit_generator')
    # import IPython; IPython.embed()

    # create the callbacks
    callbacks = create_callbacks(model, training_model, prediction_model,
                                 validation_generator, args)

    # Use multiprocessing if workers > 0
    if args.workers > 0:
        use_multiprocessing = True
    else:
        use_multiprocessing = False

    # start training
    training_model.fit_generator(generator=train_generator,
                                 steps_per_epoch=args.steps,
                                 epochs=args.epochs,
                                 verbose=1,
                                 callbacks=callbacks,
                                 workers=args.workers,
                                 use_multiprocessing=use_multiprocessing,
                                 max_queue_size=args.max_queue_size)
예제 #27
0
    parser.add_argument('video_file', type=str, help="path to the video file")
    parser.add_argument('outvecs',
                        type=str,
                        help="path to the opeput numpy vector")
    parser.add_argument('--startframe',
                        type=int,
                        default=-1,
                        help="start frame")
    parser.add_argument('--endframe', type=int, default=-1, help="end frame")
    args = parser.parse_args()

    rgb_input = Input(shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,
                             NUM_RGB_CHANNELS))
    fe = Inception_Inflated3d(
        include_top=False,
        weights='rgb_imagenet_and_kinetics',
        # weights='rgb_kinetics_only',
        input_tensor=rgb_input,
        classes=-1)

    m = Model(input=fe.get_input_at(0),
              output=(fe.get_layer("Mixed_5c").output))
    # plot_model(m, show_shapes=True)

    os.makedirs(args.outvecs, exist_ok=True)

    for fn, vec in main_fe(m,
                           args.video_file,
                           show=False,
                           startfn=args.startframe,
                           endfn=args.endframe):
        print(fn)
예제 #28
0
    while True:
      batch_features = np.zeros((BATCH,NUM_FRAMES, FRAME_WIDTH, FRAME_HEIGHT,1))
      temp = np.zeros((BATCH,NUM_FRAMES, FRAME_WIDTH, FRAME_HEIGHT))
      batch_labels = np.zeros((BATCH,NUM_CLASSES))
      for i in range(BATCH):
        temp = hf["train"][counter%120]
        batch_features[i] = np.reshape(temp,(NUM_FRAMES,FRAME_WIDTH,FRAME_HEIGHT,1))
        batch_labels[i] = validation_labels[counter%40]
        # print("Index: "+str(i))
        # print(batch_labels)
        counter+=1
      yield batch_features,batch_labels

rgb_model = Inception_Inflated3d(
            include_top=False,
            #weights='None',
            input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,1),
            classes=NUM_CLASSES,endpoint_logit=False)

opt = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6)

rgb_model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=opt,
              metrics=['accuracy'])

best_checkpoint = ModelCheckpoint('sibi_1_weights_best.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')
checkpoint = ModelCheckpoint('sibi_1_weights_epoch.hdf5', monitor='val_acc', verbose=1, save_best_only=False, mode='max')
csv_logger = CSVLogger('sibi_1.log', append=False)
tensorboard = TensorBoard(log_dir='./sibi_1_tf-logs')
callbacks_list = [checkpoint,best_checkpoint, csv_logger, tensorboard]