def __init__(self): # 获取配置参数 self.content_img_path = cfg.content_img_path self.style_img_path = cfg.style_img_path self.vgg_npy_path = cfg.vgg_model_path self.learning_rate = float(cfg.learning_rate) self.steps = int(cfg.steps) self.lambda_c = float(cfg.lambda_c) self.lambda_s = float(cfg.lambda_s) # 获取三种图片 self.content_value = self.read_image(self.content_img_path) self.style_value = self.read_image(self.style_img_path) self.result = self.initial_image([1, 224, 224, 3], 127.5, 30) # 获取占位符 self.content = tf.placeholder(tf.float32, [1, 224, 224, 3]) self.style = tf.placeholder(tf.float32, [1, 224, 224, 3]) # 定义三个VGG模型 data_dict = np.load(self.vgg_npy_path, encoding="latin1", allow_pickle=True).item() self.vgg_for_content = VGGNet(data_dict) self.vgg_for_style = VGGNet(data_dict) self.vgg_for_result = VGGNet(data_dict) # 构建VGG网络 self.vgg_for_content.build_vgg(self.content) self.vgg_for_style.build_vgg(self.style) self.vgg_for_result.build_vgg(self.result) pass
def __init__(self, load_pretrained=True): #Path save params self.path_save_params = './Unaries/trainedModels/' #logs self.train_logs_path = 'Unaries/train_unaries.txt' self.test_logs_path = 'Unaries/test_unaries.txt' #Oputput self.unaries_out_path = Config.unaries_path print "Preparing room" #Prepare room and evaluator #Create room self.room = POM_room(Config.parts_root_folder, with_templates=True) #Prepare evaluator which will let us load GT self.evaluator = POM_evaluator( self.room, GT_labels_path_json='../NDF_peds/data/ETH/labels_json/%08d.json') print "Initializing Unaries Network" #DEFINE NETWORK ''' Remark, when using ROIPooling, y axis first then x axis for ROI pooling ''' p_h, p_w = 3, 3 #"size of extracted features vector" epsilon = 1e-7 X = T.ftensor4('X') Ybb = T.fvector('Ybb') batch_size = X.shape[0] p_drop = T.scalar('dropout', dtype='float32') t_rois = T.fmatrix() # Building net ## Convnet mNet = VGGNet.VGG(X) c53_r = mNet.c53_r op = ROIPoolingOp(pooled_h=p_h, pooled_w=p_w, spatial_scale=1.0) roi_features = op(c53_r, t_rois)[0] #T.concatenate(op(c53, t_rois),axis = 0) #Initialize weights w0_u = init_weights((512 * p_h * p_w, 1024), name='w0_unaries') b0_u = init_weights((1024, ), name='b0_unaries', scale=0) w1_u = init_weights((1024, 1024), name='w1_unaries') b1_u = init_weights((1024, ), name='b1_unaries', scale=0) w2_u = init_weights((1024, 2), name='w2_unaries') b2_u = init_weights((2, ), name='b2_unaries', scale=0) paramsUnaries = [w0_u, b0_u, w1_u, b1_u, w2_u, b2_u] # #New network features_flat = roi_features.reshape((-1, 512 * p_h * p_w)) x1 = T.clip(T.dot(features_flat, w0_u) + b0_u, 0, 100000) x1_drop = dropout(x1, p_drop) x2 = T.clip(T.dot(x1_drop, w1_u) + b1_u, 0, 100000) x2_drop = dropout(x2, p_drop) p_out = softmax(T.dot(x2_drop, w2_u) + b2_u) log_p_out = stab_logsoftmax(T.dot(x2_drop, w2_u) + b2_u) ## Classification #loss = (T.nnet.binary_crossentropy(p_out[:,0], Ybb)).mean() loss = -(log_p_out[:, 0] * Ybb + log_p_out[:, 1] * (1 - Ybb)).mean() # Updates for decision parameter ## For regression tree/Flat updates_loss = Adam(loss, paramsUnaries, lr=2e-4) updates_loss_VGG = Adam(loss, paramsUnaries + mNet.paramsVGG, lr=1e-6) self.train_func = theano.function( inputs=[X, t_rois, Ybb, In(p_drop, value=0.5)], outputs=[T.exp(log_p_out), loss], updates=updates_loss_VGG, allow_input_downcast=True, on_unused_input='warn') self.test_func = theano.function( inputs=[X, t_rois, Ybb, In(p_drop, value=0.0)], outputs=[T.exp(log_p_out), loss], updates=[], allow_input_downcast=True, on_unused_input='warn') self.run_func = theano.function( inputs=[X, t_rois, In(p_drop, value=0.0)], outputs=T.exp(log_p_out), updates=[], allow_input_downcast=True, on_unused_input='warn') self.play_func = theano.function( inputs=[X, t_rois, In(p_drop, value=0.0)], outputs=roi_features, updates=[], allow_input_downcast=True, on_unused_input='warn') self.features_func = theano.function( inputs=[X, t_rois, In(p_drop, value=0.0)], outputs=x2, updates=[], allow_input_downcast=True, on_unused_input='warn') #Define self objects self.paramsUnaries = paramsUnaries self.mNet = mNet #Load pretrained params if load_pretrained: print "loading pretrained params" params_to_load = pickle.load( open('./VGG/models/paramsUnaries.pickle')) self.setParams(params_to_load) params_VGG = pickle.load( open('./VGG/models/paramsVGGUnaries.pickle')) mNet.setParams(params_VGG)
def __init__(self): #Choose paramters of training struct = 'Flat3' CNN_name = 'VGG' n_leaves = Config.n_parts - 1 self.n_leaves = n_leaves epsilon = 1e-5 numerical_normalisation = 1e7 # X is input matrix and Y is output (full image) X = T.ftensor4('X') Y_in = T.ftensor4('Y_in') batch_size = X.shape[0] p_drop = T.scalar('dropout', dtype='float32') # Building net ## Convnet mNet = VGGNet.VGG(X) self.mNet = mNet x_activ = mNet.activation_volume size_last_convolution = mNet.nb_activations H_ds, W_ds = x_activ.shape[2], x_activ.shape[3] #For VGG, y is already 1/4 of X, but we need to remove margins as it is done in the network Y_in_crop = Y_in[:, :, 0:H_ds, 0:W_ds] y_bg = Y_in_crop[:, 0:1] > 0 y_inside = Y_in_crop[:, 1:2] > -500 y_activ_regression = Y_in_crop[:, 1:5] ## Handling GT # y_activ_regression = mNet.reshapeInputImageToActivationVol(Y_in_regression) # # y_activ_binary = mNet.reshapeInputImageToActivationVol(Y_in_binary) ## Background substraction mBGsub = BGsubstractVGG.BGsubstract(x_activ) self.mBGsub = mBGsub p_fb = mBGsub.p_fb p_foreground = p_fb[:, 0, :, :].reshape((batch_size, 1, H_ds, W_ds)) ## Regression Network to produce probabilities ##Change here to switch between tree and flat self.regression_net = Flat3.Flat3(mNet, y_activ_regression, mBGsub, n_leaves, p_drop=p_drop) p_leaves = self.regression_net.p_leaves ## Gaussian leaves params_gaussian = init_all_gaussian_params( n_leaves) #[a_1,s_1,a_2, s_2] self.params_gaussian = params_gaussian sums_gaussian = init_all_gaussian_sums(n_leaves) G = [] for l in range(0, n_leaves): G.append( gaussian(y_activ_regression, params_gaussian[2 * l], params_gaussian[ 2 * l + 1])) # outputs a (batch_size,h_ds,w_ds) tensor ) P_T = 0 for l in range(0, n_leaves): P_T = P_T + G[l] * p_leaves[l] * numerical_normalisation #Objective functions ## Regression #regression_cost =-T.sum((T.log(P_T[:,:,:,:]*y_inside + epsilon)*y_inside))/(T.sum(y_inside)) regression_cost = -T.sum( (T.log(P_T[:, :, :, :] * y_bg + epsilon) * y_bg)) / (T.sum(y_bg)) ## Background bg_cost = (T.nnet.binary_crossentropy(p_foreground, y_bg) * (3 * y_bg + 1 * (1 - y_bg))).mean() # Updates for decision parameter ## For regression tree/Flat updates_decision = Adam(regression_cost, self.regression_net.params_regression, lr=Config.Regrate) updates_bg = Adam(bg_cost, mBGsub.params, lr=Config.BGrate) ## Updates for gaussian parameters gaussian_maximisation updates_sums = update_sums(p_leaves, G, P_T, y_activ_regression, y_inside, sums_gaussian, numerical_normalisation, epsilon) updates_zero_sums = update_sums_to_zero(n_leaves, sums_gaussian) ## Updates for gaussian parameters gaussian_maximisation updates_gaussian = gaussian_maximisation( p_leaves, G, P_T, y_activ_regression, y_inside, params_gaussian, sums_gaussian, numerical_normalisation, epsilon) ## Prepare outputs all_p = T.concatenate(p_leaves, axis=1) all_gaussian_parameters = T.concatenate(params_gaussian) #Training functions ## For decision tree self.train_decision_func = theano.function( inputs=[X, Y_in, In(p_drop, value=0.3)], outputs=[regression_cost], updates=updates_decision, allow_input_downcast=True, on_unused_input='warn') self.train_bg_func = theano.function( inputs=[X, Y_in, In(p_drop, value=0.0)], outputs=[bg_cost], updates=updates_bg, allow_input_downcast=True, on_unused_input='warn') ## For updating gaussians self.go_zero_sum_func = theano.function(inputs=[], outputs=[], updates=updates_zero_sums) self.train_sums_func = theano.function( inputs=[X, Y_in, In(p_drop, value=0.0)], outputs=[], updates=updates_sums, allow_input_downcast=True, on_unused_input='warn') self.train_gaussians = theano.function( inputs=[X, Y_in, In(p_drop, value=0.0)], outputs=[], updates=updates_gaussian, allow_input_downcast=True, on_unused_input='warn') ## Test function self.test_function = theano.function( inputs=[X, Y_in, In(p_drop, value=0.0)], outputs=[regression_cost], updates=[], allow_input_downcast=True, on_unused_input='warn') self.run_function = theano.function( inputs=[X, In(p_drop, value=0.0)], outputs=[p_foreground, all_p, all_gaussian_parameters], updates=[], allow_input_downcast=True, on_unused_input='warn')
class Transfer(object): def __init__(self): # 获取配置参数 self.content_img_path = cfg.content_img_path self.style_img_path = cfg.style_img_path self.vgg_npy_path = cfg.vgg_model_path self.learning_rate = float(cfg.learning_rate) self.steps = int(cfg.steps) self.lambda_c = float(cfg.lambda_c) self.lambda_s = float(cfg.lambda_s) # 获取三种图片 self.content_value = self.read_image(self.content_img_path) self.style_value = self.read_image(self.style_img_path) self.result = self.initial_image([1, 224, 224, 3], 127.5, 30) # 获取占位符 self.content = tf.placeholder(tf.float32, [1, 224, 224, 3]) self.style = tf.placeholder(tf.float32, [1, 224, 224, 3]) # 定义三个VGG模型 data_dict = np.load(self.vgg_npy_path, encoding="latin1", allow_pickle=True).item() self.vgg_for_content = VGGNet(data_dict) self.vgg_for_style = VGGNet(data_dict) self.vgg_for_result = VGGNet(data_dict) # 构建VGG网络 self.vgg_for_content.build_vgg(self.content) self.vgg_for_style.build_vgg(self.style) self.vgg_for_result.build_vgg(self.result) pass def initial_image(self, shape, mean, stddev): initial = tf.truncated_normal(shape=shape, mean=mean, stddev=stddev) return tf.Variable(initial) pass def read_image(self, path): image = Image.open(path) # image = image.resize((224,224)) np_img = np.array(image) np_img = np.asarray([np_img], dtype=np.int32) return np_img def gram_matrix(self, x): b, w, h, ch = x.get_shape().as_list() feature = tf.reshape(x, [b, w * h, ch]) gram = tf.matmul(feature, feature, adjoint_a=True) / tf.constant( w * b * ch, tf.float32) return gram pass def losses(self): # 内容损失 content_features = [ self.vgg_for_content.conv1_2, # self.vgg_for_content.conv2_2 ] result_content_features = [ self.vgg_for_result.conv1_2, # self.vgg_for_result.conv2_2 ] content_loss = tf.zeros(1, tf.float32) for c, c_ in zip(content_features, result_content_features): content_loss += tf.reduce_mean((c - c_)**2, [1, 2, 3]) # 风格损失 style_features = [self.vgg_for_style.conv4_3] result_style_features = [self.vgg_for_result.conv4_3] style_gram = [self.gram_matrix(feature) for feature in style_features] result_style_gram = [ self.gram_matrix(feature) for feature in result_style_features ] style_loss = tf.zeros(1, tf.float32) for s, s_ in zip(style_gram, result_style_gram): style_loss += tf.reduce_mean((s - s_)**2, [1, 2]) loss = self.lambda_c * content_loss + self.lambda_s * style_loss return content_loss, style_loss, loss pass def trans(self): content_loss, style_loss, loss = self.losses() train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(loss) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) for step in range(self.steps): loss_value, content_loss_value, style_loss_value, _ = sess.run( [loss, content_loss, style_loss, train_op], feed_dict={ self.content: self.content_value, self.style: self.style_value }) print( "step: %d, loss_value: %8.4f, content_loss: %8.4f, style_loss: %8.4f" % (step + 1, loss_value[0], content_loss_value[0], style_loss_value[0])) if (step + 1) % 10 == 0: result_img_path = os.path.join( "./output_dir", "result-%05d.jpg" % (step + 1)) result_value = self.result.eval(sess)[0] result_value = np.clip(result_value, 0, 255) img_arr = np.array(result_value, np.uint8) img = Image.fromarray(img_arr) img.save(result_img_path) pass
def __init__(self, load_pretrained=True, training=True): #Path save params self.path_save_params = MyConfig.unaries_params_path print 'param save at = ', self.path_save_params #logs self.train_logs_path = MyConfig.unaries_train_log self.test_logs_path = MyConfig.unaries_test_log #Oputput self.unaries_out_path = Config.unaries_path print "Preparing room" #Prepare room and evaluator #Create room self.room = POM_room(Config.parts_root_folder, with_templates=True) #Prepare evaluator which will let us load GT self.evaluator = POM_evaluator( self.room, GT_labels_path_json='../NDF_peds/data/ETH/labels_json/%08d.json') print "Initializing Unaries Network" #DEFINE NETWORK ''' Remark, when using ROIPooling, y axis first then x axis for ROI pooling ''' p_h, p_w = 3, 3 #"size of extracted features vector" epsilon = 1e-7 X = T.ftensor4('X') Ybb = T.fvector('Ybb') # GT for positive or negative bbox Ybody = T.fvector('Ybody') Yhead = T.fvector('Yhead') batch_size = X.shape[0] p_drop = T.scalar('dropout', dtype='float32') t_rois = T.fmatrix() # Building net ## Convnet mNet = VGGNet.VGG(X) c53_r = mNet.c53_r op = ROIPoolingOp(pooled_h=p_h, pooled_w=p_w, spatial_scale=1.0) roi_features = op(c53_r, t_rois)[0] #T.concatenate(op(c53, t_rois),axis = 0) #Initialize weights w0_u = init_weights((512 * p_h * p_w, 1024), name='w0_unaries') b0_u = init_weights((1024, ), name='b0_unaries', scale=0) w1_u = init_weights((1024, 1024), name='w1_unaries') b1_u = init_weights((1024, ), name='b1_unaries', scale=0) w2_u = init_weights((1024, 2), name='w2_unaries') b2_u = init_weights((2, ), name='b2_unaries', scale=0) #for orientation of body, head estimation w2_u_ori = init_weights((1024, 2), name='w2_unaries_ori') b2_u_ori = init_weights((2, ), name='b2_unaries_ori', scale=0) paramsUnaries = [ w0_u, b0_u, w1_u, b1_u, w2_u, b2_u, w2_u_ori, b2_u_ori ] # #New network features_flat = roi_features.reshape((-1, 512 * p_h * p_w)) x1 = T.clip(T.dot(features_flat, w0_u) + b0_u, 0, 100000) x1_drop = dropout(x1, p_drop) x2 = T.clip(T.dot(x1_drop, w1_u) + b1_u, 0, 100000) x2_drop = dropout(x2, p_drop) p_out = softmax(T.dot(x2_drop, w2_u) + b2_u) log_p_out = stab_logsoftmax(T.dot(x2_drop, w2_u) + b2_u) #Another FC layer for orientation of body, head estimation rad_out = T.clip( T.dot(x2_drop, w2_u_ori) + b2_u_ori, -math.pi, math.pi) ## Classification # loss = -(log_p_out[:,0]*Ybb + log_p_out[:,1]*(1-Ybb)).mean() loss_bbox = -(log_p_out[:, 0] * Ybb + log_p_out[:, 1] * (1 - Ybb)).mean() unit = 1.0 est_body_orienX = unit * np.cos(rad_out[:, 0]) # x on th unit circle est_body_orienY = unit * np.sin(rad_out[:, 0]) # y on th unit circle gt_body_orienX = unit * np.cos(Ybody) gt_body_orienY = unit * np.sin(Ybody) d_bodyX = est_body_orienX - gt_body_orienX d_bodyY = est_body_orienY - gt_body_orienY cost_body = np.sqrt(d_bodyX * d_bodyX + d_bodyY * d_bodyY) est_head_orienX = unit * np.cos(rad_out[:, 1]) # x on th unit circle est_head_orienY = unit * np.sin(rad_out[:, 1]) # y on th unit circle gt_head_orienX = unit * np.cos(Yhead) gt_head_orienY = unit * np.sin(Yhead) d_headX = est_head_orienX - gt_head_orienX d_headY = est_head_orienY - gt_head_orienY cost_head = np.sqrt(d_headX * d_headX + d_headY * d_headY) loss_body = (Ybb * cost_body).sum() / Ybb.sum() loss_head = (Ybb * cost_head).sum() / Ybb.sum() lambda1 = 0.3 lambda2 = 0.3 # print loss_bbox, loss_head, loss_body loss = loss_bbox + lambda1 * loss_body + lambda2 * loss_head # Updates for decision parameter ## For regression tree/Flat updates_loss = Adam(loss, paramsUnaries, lr=2e-4) updates_loss_VGG = Adam(loss, paramsUnaries + mNet.paramsVGG, lr=1e-6) self.train_func = theano.function( inputs=[X, t_rois, Ybb, Ybody, Yhead, In(p_drop, value=0.5)], outputs=[ T.exp(log_p_out), loss, rad_out, loss_bbox, loss_body, loss_head ], updates=updates_loss_VGG, allow_input_downcast=True, on_unused_input='warn') self.test_func = theano.function( inputs=[X, t_rois, Ybb, Ybody, Yhead, In(p_drop, value=0.0)], outputs=[ T.exp(log_p_out), loss, rad_out, loss_bbox, loss_body, loss_head ], updates=[], allow_input_downcast=True, on_unused_input='warn') self.run_func = theano.function( inputs=[X, t_rois, In(p_drop, value=0.0)], outputs=[T.exp(log_p_out), rad_out], updates=[], allow_input_downcast=True, on_unused_input='warn') self.play_func = theano.function( inputs=[X, t_rois, In(p_drop, value=0.0)], outputs=roi_features, updates=[], allow_input_downcast=True, on_unused_input='warn') self.features_func = theano.function( inputs=[X, t_rois, In(p_drop, value=0.0)], outputs=x2, updates=[], allow_input_downcast=True, on_unused_input='warn') #Define self objects self.paramsUnaries = paramsUnaries self.mNet = mNet #Load pretrained params if load_pretrained: print "loading pretrained params for bbox detection" print MyConfig.unary_storedParam params_to_load = pickle.load(open(MyConfig.unary_storedParam)) #append the params for orientation estimation if training: print 'append value' params_to_load.append( floatX(np.random.randn(*(1024, 2)) * 0.01)) params_to_load.append(floatX(np.random.randn(*(2, )) * 0.0)) self.setParams(params_to_load) print MyConfig.refinedVGG_storedParam params_VGG = pickle.load(open(MyConfig.refinedVGG_storedParam)) mNet.setParams(params_VGG)
val_df = df[df['subject_id'].isin(val_index)] train_dataset = BraTSDataset(train_df, data_folder, transform=train_transform) val_dataset = BraTSDataset(val_df, data_folder, transform=val_transform) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=1) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=1) device = ('cuda' if torch.cuda.is_available() else 'cpu') criterion = nn.BCEWithLogitsLoss() model = VGGNet().to(device).float() optimizer = optim.Adam(model.parameters(), lr=0.001) scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True) n_epochs = 50 stats = {'epoch': [], 'train_loss': [], 'val_loss': [], 'acc': []} val_loss_min = np.Inf for epoch in range(n_epochs): batch_train_loss = [] epoch_loss = 0 stats['epoch'].append(epoch) for images, labels in train_loader: loss = train_step(images, labels, model, criterion, optimizer)