def i3d_flattened(self, num_classes=60): i3d = Model(inputs=self.model.input, outputs=self.model.get_layer(index=-4).output) x = conv3d_bn(i3d.output, num_classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1') num_frames_remaining = int(x.shape[1]) x = Flatten()(x) predictions = Dense(num_classes, activation='softmax', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(x) new_model = Model(inputs=i3d.input, outputs=predictions) # for layer in i3d.layers: # layer.trainable = False return new_model
def loadModel(numberOfClasses,inputFrames, frameHeight,frameWidth,numRGBChannels,withWeights = False): weights = None if withWeights : weights = 'rgb_inception_i3d' rgb_model = Inception_Inflated3d( include_top=False, weights=weights, input_shape=(inputFrames, frameHeight, frameWidth, numRGBChannels), dropout_prob=0.5, endpoint_logit=True, classes=numberOfClasses) x = rgb_model.output x = Dropout(0.5)(x) x = conv3d_bn(x,numberOfClasses, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1') num_frames_remaining = int(x.shape[1]) x = Reshape((num_frames_remaining, numberOfClasses))(x) # logits (raw scores for each class) x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False), output_shape=lambda s: (s[0], s[2]))(x) predictions = Activation('softmax', name='prediction')(x) model = Model(rgb_model.input, predictions) return model
def pi3d_model(self, all_models_name, mode, dropout_prob, sum_idx): self.mode = mode self.all_models_name=all_models_name all_models = {} pi3d_inputs = [] pi3d_interm_outputs = [] for model_name in all_models_name: model = load_model('./weights_optim/' + model_name + '/weights.hdf5') for idx in range(len(model.layers)): model.get_layer(index=idx).name=model.layers[idx].name+'_'+model_name if sum_idx <= 3 and sum_idx >= 0: all_models[model_name] = Model(inputs=model.input, outputs=model.get_layer(index=-46 + (2-sum_idx)*20).output) ##max_pooling3d_11 (8,7,7,832) for l in all_models[model_name].layers: l.trainable=self.train_end_to_end pi3d_inputs.append(all_models[model_name].input) pi3d_interm_outputs.append(all_models[model_name].output) #pi3d_interm_outputs.append(Reshape((1,8,7,7,832))(all_models[model_name].output)) if self.mode=='sum': #keras.core.LambdaMerge([model0, model1], lambda inputs: p0*inputs[0]+p1*inputs[1])) #x = concatenate(pi3d_interm_outputs, axis=1) x = Add()(pi3d_interm_outputs) #x = keras.layers.core.Lambda(self.sum_feature, output_shape=(8, 7, 7, 832))(x) elif self.mode=='cat': x = concatenate(pi3d_interm_outputs) #pass elif self.mode=='single': x = pi3d_interm_outputs[0] ##second part of I3D if sum_idx==2: # Mixed 5b branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name=''+'second') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_5b_1a_1x1'+'second') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_5b_1b_3x3'+'second') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_5b_2a_1x1'+'second') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5b_2b_3x3'+'second') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5b_3a_3x3'+'second')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5b_3b_1x1'+'second') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=4, name='Mixed_5b'+'second') if sum_idx==1 or sum_idx==2: # Mixed 5c branch_0 = conv3d_bn(x, 384, 1, 1, 1, padding='same', name='Conv3d_5c_0a_1x1'+'second') branch_1 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_5c_1a_1x1'+'second') branch_1 = conv3d_bn(branch_1, 384, 3, 3, 3, padding='same', name='Conv3d_5c_1b_3x3'+'second') branch_2 = conv3d_bn(x, 48, 1, 1, 1, padding='same', name='Conv3d_5c_2a_1x1'+'second') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5c_2b_3x3'+'second') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5c_3a_3x3'+'second')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5c_3b_1x1'+'second') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=4, name='Mixed_5c'+'second') # Classification block x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool'+'second')(x) x = Dropout(dropout_prob)(x) x = conv3d_bn(x, self.num_classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1'+'second') x = Flatten(name='flatten'+'second')(x) x = Dense(self.num_classes, activation='softmax', name='softmax'+'second')(x) model = Model(inputs=pi3d_inputs, outputs=x, name = 'PI3D') return model
def embed_model_spatio_temporal_gcnn(n_neuron, timesteps, num_nodes, num_features, graph_conv_filters_shape1, graph_conv_filters_shape2, num_filters, num_classes, n_dropout, protocol): i3d = i3d_modified(weights='rgb_imagenet_and_kinetics') model_branch = i3d.i3d_flattened(num_classes=num_classes) ''' if protocol == 'CS': # to be replaced with the values in the yaml file model_branch.load_weights('/data/stars/user/sdas/PhD_work/STA_appearance/NTU_CS/i3d/weights_ntu_aug_4/epoch_7.hdf5') else: model_branch.load_weights('/data/stars/user/sdas/PhD_work/CVPR20/NTU_120/I3D/weights_ntu_set_i3d_full_body/epoch_12.hdf5') ''' optim = SGD(lr=0.01, momentum=0.9) model_branch.compile(loss='categorical_crossentropy', optimizer=optim, metrics=['accuracy']) print('Build model...') model_inputs = [] model_gcnn = GCNN_skeleton_t16(num_nodes, num_features, graph_conv_filters_shape1, graph_conv_filters_shape2, num_filters, num_classes, n_neuron, n_dropout, timesteps) z1 = Dense(256, activation='tanh', name='z1_layer', trainable=True)(model_gcnn.get_layer('gcnn_out').output) z2 = Dense(128, activation='tanh', name='z2_layer', trainable=True)(model_gcnn.get_layer('gcnn_out').output) fc_main_spatial = Dense(49, activity_regularizer=attention_reg, kernel_initializer='zeros', bias_initializer='zeros', activation='sigmoid', trainable=True, name='dense_spatial')(z1) fc_main_temporal = Dense(2, activity_regularizer=attention_reg, kernel_initializer='zeros', bias_initializer='zeros', activation='softmax', trainable=True, name='dense_temporal')(z2) atten_mask_spatial = keras.layers.core.Lambda( inflate_dense_spatial, output_shape=(2, 7, 7, 1024))(fc_main_spatial) atten_mask_temporal = keras.layers.core.Lambda( inflate_dense_temporal, output_shape=(2, 7, 7, 1024))(fc_main_temporal) atten_mask = keras.layers.Multiply()( [atten_mask_spatial, atten_mask_temporal]) for l in model_branch.layers: l.trainable = True for layer in model_gcnn.layers: layer.trainable = True for i in model_gcnn.input: model_inputs.append(i) model_inputs.append(model_branch.input) flatten_video = Flatten(name='flatten_video')( model_branch.get_layer('Mixed_5c').output) embed_video = Dense(256, activation='sigmoid', trainable=True, name='dense_video')(flatten_video) embed_skeleton = Dense(256, activation='sigmoid', trainable=True, name='dense_skeleton')(fc_main_spatial) embed_output = Merge(mode=lambda x: manhattan_distance(x[0], x[1]), output_shape=lambda inp_shp: (inp_shp[0][0], 1), name='embed_output')([embed_video, embed_skeleton]) multiplied_features = keras.layers.Multiply()( [atten_mask, model_branch.get_layer('Mixed_5c').output]) added_features = keras.layers.Add()( [multiplied_features, model_branch.get_layer('Mixed_5c').output]) x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool' + 'second')(added_features) x = Dropout(n_dropout)(x) x = conv3d_bn(x, num_classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1' + 'second') x = Flatten(name='flatten' + 'second')(x) predictions = Dense(num_classes, activation='softmax', name='action_output')(x) model = Model(inputs=model_inputs, outputs=[predictions, embed_output], name='spatial_temporal_attention') return model
def pi3d_model(fc_main, model_inputs, dataset, protocol, all_models_name=[], mode='sum', dropout_prob=0.0, num_classes=60, sum_idx=0, train_end_to_end=False): mode = mode all_models_name = all_models_name #all_models = {} if sum_idx == 0: global f_dept f_dept = 1024 pi3d_interm_outputs = [] for model_name in all_models_name: model = load_model('./weights_optim/{}/weights_{}_{}.hdf5'.format( dataset, model_name, protocol)) for idx in range(len(model.layers)): model.get_layer( index=idx).name = model.layers[idx].name + '_' + model_name for l in model.layers: l.trainable = train_end_to_end model_inputs.append(model.input) if sum_idx <= 3 and sum_idx >= 0: pi3d_interm_outputs.append( Reshape((1, 8, 7, 7, f_dept))( model.get_layer(index=-46 + (2 - sum_idx) * 20).output)) x = concatenate(pi3d_interm_outputs, axis=1) inflated_fc_main = keras.layers.core.Lambda(inflate_dense, output_shape=(no_of_p, 8, 7, 7, f_dept))(fc_main) multiplied_features = keras.layers.Multiply()([inflated_fc_main, x]) if mode == 'sum': x = keras.layers.core.Lambda( sum_feature, output_shape=(8, 7, 7, f_dept))(multiplied_features) elif mode == 'cat': x = keras.layers.core.Lambda( concat_feature, output_shape=(8, 7, 7, f_dept * no_of_p))(multiplied_features) ##second part of I3D if sum_idx == 2: # Mixed 5b branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='' + 'second') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_5b_1a_1x1' + 'second') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_5b_1b_3x3' + 'second') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_5b_2a_1x1' + 'second') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5b_2b_3x3' + 'second') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5b_3a_3x3' + 'second')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5b_3b_1x1' + 'second') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=4, name='Mixed_5b' + 'second') if sum_idx == 1 or sum_idx == 2: # Mixed 5c branch_0 = conv3d_bn(x, 384, 1, 1, 1, padding='same', name='Conv3d_5c_0a_1x1' + 'second') branch_1 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_5c_1a_1x1' + 'second') branch_1 = conv3d_bn(branch_1, 384, 3, 3, 3, padding='same', name='Conv3d_5c_1b_3x3' + 'second') branch_2 = conv3d_bn(x, 48, 1, 1, 1, padding='same', name='Conv3d_5c_2a_1x1' + 'second') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5c_2b_3x3' + 'second') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5c_3a_3x3' + 'second')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5c_3b_1x1' + 'second') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=4, name='Mixed_5c' + 'second') #Classification block x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool' + 'second')(x) x = Dropout(dropout_prob)(x) x = conv3d_bn(x, num_classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1' + 'second') x = Flatten(name='flatten' + 'second')(x) predictions = Dense(num_classes, activation='softmax', name='softmax' + 'second')(x) model = Model(inputs=model_inputs, outputs=predictions, name='PI3D') model_second = Inception_Inflated3d(include_top=True, weights='rgb_imagenet_and_kinetics') weight_idx_s = -45 + (2 - sum_idx) * 20 weight_idx_e = -4 for l_m, l_lh in zip(model.layers[weight_idx_s:weight_idx_e], model_second.layers[weight_idx_s:weight_idx_e]): l_m.set_weights(l_lh.get_weights()) l_m.trainable = True lstm_weights = "./weights_optim/{}/lstm_model_{}.hdf5".format( dataset, protocol) l_model = load_model(lstm_weights, compile=False) for idx1 in range(len(model.layers)): n1 = model.layers[idx1].name if 'lstm' in n1: for idx2 in range(len(l_model.layers)): n2 = l_model.layers[idx2].name if n1 == n2: model.layers[idx1].set_weights( l_model.layers[idx2].get_weights()) break return model
self.nb_epoch += 1 self.save_model.save(self.path + str(self.nb_epoch) + '.hdf5') i3d = i3d_modified(weights = 'rgb_imagenet_and_kinetics') model_branch = i3d.i3d_flattened(num_classes = num_classes) model_branch.load_weights('/data/stars/user/sdas/PhD_work/ICCV_2019/models/epoch_full_body_NTU_CS.hdf5') model_i3d = Model(inputs = model_branch.input, outputs = model_branch.get_layer('Mixed_5c').output) x = non_local_block(model_i3d.output, compression=2, mode='embedded') #x = non_local_block(x, compression=2, mode='embedded') #x = non_local_block(x, compression=2, mode='embedded') #x = non_local_block(x, compression=2, mode='embedded') #x = non_local_block(x, compression=2, mode='embedded') x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool'+'second')(x) x = Dropout(0.0)(x) x = conv3d_bn(x, num_classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1'+'second') x = Flatten(name='flatten'+'second')(x) predictions = Dense(num_classes, activation='softmax', name='softmax'+'second')(x) model = Model(inputs=model_branch.input, outputs=predictions, name = 'i3d_nonlocal') optim = SGD(lr = 0.01, momentum = 0.9) model.compile(loss = 'categorical_crossentropy', optimizer = optim, metrics = ['accuracy']) #model = load_model("../weights3/epoch11.hdf5") # Callbacks reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor = 0.1, patience = 10) #filepath = '../weights3/weights.{epoch:04d}-{val_loss:.2f}.hdf5' csvlogger = CSVLogger(model_name+'_ntu.csv') parallel_model = multi_gpu_model(model, gpus=4) parallel_model.compile(loss = 'categorical_crossentropy', optimizer = optim, metrics = ['accuracy']) model.compile(loss = 'categorical_crossentropy', optimizer = optim, metrics = ['accuracy'])
def build_model_two_pathways(n_neuron, timesteps, data_dim, num_classes, n_dropout, dataset): i3d = i3d_modified(weights='rgb_imagenet_and_kinetics') model_branch = i3d.i3d_flattened(num_classes=num_classes) if dataset == 'NTU': model_branch.load_weights( '../models/ntu-cv_pre-trained_rgb_model.hdf5') elif dataset == 'Smarthomes': model_branch.load_weights( '../models/smarthomes-cs_pre-trained_rgb_model.hdf5') optim = SGD(lr=0.01, momentum=0.9) model_branch.compile(loss='categorical_crossentropy', optimizer=optim, metrics=['accuracy']) print('Build model...') model_inputs = [] if dataset == 'NTU': model_lstm = load_model( '../models/ntu-cv_pre-trained_skeleton_model.hdf5') elif dataset == 'Smarthomes': model_lstm = load_model( '../models/smarthomes-cs_pre-trained_skeleton_model.hdf5') model_lstm.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(lr=0.005, clipnorm=1), metrics=['accuracy']) for layer in model_lstm.layers: layer.trainable = False #model_lstm.load_weights('../models/ntu_pre-trained_skeleton_model.hdf5') model_lstm.pop() z1 = Dense(256, activation='tanh', name='z1_layer', trainable=True)(model_lstm.get_layer('dropout_1').output) z2 = Dense(256, activation='tanh', name='z2_layer', trainable=True)(model_lstm.get_layer('dropout_1').output) fc_main1 = Dense(49, kernel_initializer='zeros', bias_initializer='zeros', activation='sigmoid', trainable=True, name='dense_1')(z1) atten_mask_spatial = keras.layers.core.Lambda( inflate_dense_spatial, output_shape=(8, 7, 7, 1024))(fc_main1) fc_main_2 = Dense(8, kernel_initializer='zeros', bias_initializer='zeros', activation='softmax', trainable=True, name='dense_2')(z2) atten_mask_temporal = keras.layers.core.Lambda( inflate_dense_temporal, output_shape=(8, 7, 7, 1024))(fc_main_2) model_inputs.append(model_lstm.input) model_inputs.append(model_branch.input) for l in model_branch.layers: l.trainable = True multiplied_features1 = keras.layers.Multiply()( [atten_mask_spatial, model_branch.get_layer('Mixed_5c').output]) x = AveragePooling3D( (2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool1' + 'second')(multiplied_features1) x = Dropout(n_dropout)(x) multiplied_features2 = keras.layers.Multiply()( [atten_mask_temporal, model_branch.get_layer('Mixed_5c').output]) y = AveragePooling3D( (2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool2' + 'second')(multiplied_features2) y = Dropout(n_dropout)(y) agg_features = keras.layers.Concatenate()([x, y]) agg_features = conv3d_bn(agg_features, num_classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1' + 'second') agg_features = Flatten(name='flatten' + 'second')(agg_features) predictions = Dense(num_classes, activation='softmax', name='softmax' + 'second')(agg_features) model = Model(inputs=model_inputs, outputs=predictions, name='ST_attention') for l_m, l_lh in zip(model.layers[-8:-7], model_branch.layers[-5:-4]): l_m.set_weights(l_lh.get_weights()) l_m.trainable = True for l_m, l_lh in zip(model.layers[-7:-6], model_branch.layers[-5:-4]): l_m.set_weights(l_lh.get_weights()) l_m.trainable = True return model