Beispiel #1
0
    def __init__(self):
        # 获取配置参数
        self.content_img_path = cfg.content_img_path
        self.style_img_path = cfg.style_img_path
        self.vgg_npy_path = cfg.vgg_model_path
        self.learning_rate = float(cfg.learning_rate)
        self.steps = int(cfg.steps)
        self.lambda_c = float(cfg.lambda_c)
        self.lambda_s = float(cfg.lambda_s)

        # 获取三种图片
        self.content_value = self.read_image(self.content_img_path)
        self.style_value = self.read_image(self.style_img_path)
        self.result = self.initial_image([1, 224, 224, 3], 127.5, 30)

        # 获取占位符
        self.content = tf.placeholder(tf.float32, [1, 224, 224, 3])
        self.style = tf.placeholder(tf.float32, [1, 224, 224, 3])

        # 定义三个VGG模型
        data_dict = np.load(self.vgg_npy_path,
                            encoding="latin1",
                            allow_pickle=True).item()
        self.vgg_for_content = VGGNet(data_dict)
        self.vgg_for_style = VGGNet(data_dict)
        self.vgg_for_result = VGGNet(data_dict)

        # 构建VGG网络
        self.vgg_for_content.build_vgg(self.content)
        self.vgg_for_style.build_vgg(self.style)
        self.vgg_for_result.build_vgg(self.result)

        pass
    def __init__(self, load_pretrained=True):
        #Path save params
        self.path_save_params = './Unaries/trainedModels/'

        #logs
        self.train_logs_path = 'Unaries/train_unaries.txt'
        self.test_logs_path = 'Unaries/test_unaries.txt'

        #Oputput
        self.unaries_out_path = Config.unaries_path

        print "Preparing room"
        #Prepare room and evaluator
        #Create room
        self.room = POM_room(Config.parts_root_folder, with_templates=True)
        #Prepare evaluator which will let us load GT
        self.evaluator = POM_evaluator(
            self.room,
            GT_labels_path_json='../NDF_peds/data/ETH/labels_json/%08d.json')

        print "Initializing Unaries Network"
        #DEFINE NETWORK
        '''
        Remark, when using ROIPooling, y axis first then x axis for ROI pooling
        '''
        p_h, p_w = 3, 3  #"size of extracted features vector"

        epsilon = 1e-7
        X = T.ftensor4('X')
        Ybb = T.fvector('Ybb')
        batch_size = X.shape[0]
        p_drop = T.scalar('dropout', dtype='float32')
        t_rois = T.fmatrix()

        # Building net
        ## Convnet

        mNet = VGGNet.VGG(X)

        c53_r = mNet.c53_r

        op = ROIPoolingOp(pooled_h=p_h, pooled_w=p_w, spatial_scale=1.0)

        roi_features = op(c53_r,
                          t_rois)[0]  #T.concatenate(op(c53, t_rois),axis = 0)

        #Initialize weights
        w0_u = init_weights((512 * p_h * p_w, 1024), name='w0_unaries')
        b0_u = init_weights((1024, ), name='b0_unaries', scale=0)
        w1_u = init_weights((1024, 1024), name='w1_unaries')
        b1_u = init_weights((1024, ), name='b1_unaries', scale=0)
        w2_u = init_weights((1024, 2), name='w2_unaries')
        b2_u = init_weights((2, ), name='b2_unaries', scale=0)

        paramsUnaries = [w0_u, b0_u, w1_u, b1_u, w2_u, b2_u]

        # #New network
        features_flat = roi_features.reshape((-1, 512 * p_h * p_w))
        x1 = T.clip(T.dot(features_flat, w0_u) + b0_u, 0, 100000)
        x1_drop = dropout(x1, p_drop)
        x2 = T.clip(T.dot(x1_drop, w1_u) + b1_u, 0, 100000)
        x2_drop = dropout(x2, p_drop)
        p_out = softmax(T.dot(x2_drop, w2_u) + b2_u)
        log_p_out = stab_logsoftmax(T.dot(x2_drop, w2_u) + b2_u)

        ## Classification
        #loss = (T.nnet.binary_crossentropy(p_out[:,0], Ybb)).mean()
        loss = -(log_p_out[:, 0] * Ybb + log_p_out[:, 1] * (1 - Ybb)).mean()

        # Updates for decision parameter
        ## For regression tree/Flat
        updates_loss = Adam(loss, paramsUnaries, lr=2e-4)
        updates_loss_VGG = Adam(loss, paramsUnaries + mNet.paramsVGG, lr=1e-6)

        self.train_func = theano.function(
            inputs=[X, t_rois, Ybb, In(p_drop, value=0.5)],
            outputs=[T.exp(log_p_out), loss],
            updates=updates_loss_VGG,
            allow_input_downcast=True,
            on_unused_input='warn')

        self.test_func = theano.function(
            inputs=[X, t_rois, Ybb, In(p_drop, value=0.0)],
            outputs=[T.exp(log_p_out), loss],
            updates=[],
            allow_input_downcast=True,
            on_unused_input='warn')

        self.run_func = theano.function(
            inputs=[X, t_rois, In(p_drop, value=0.0)],
            outputs=T.exp(log_p_out),
            updates=[],
            allow_input_downcast=True,
            on_unused_input='warn')

        self.play_func = theano.function(
            inputs=[X, t_rois, In(p_drop, value=0.0)],
            outputs=roi_features,
            updates=[],
            allow_input_downcast=True,
            on_unused_input='warn')

        self.features_func = theano.function(
            inputs=[X, t_rois, In(p_drop, value=0.0)],
            outputs=x2,
            updates=[],
            allow_input_downcast=True,
            on_unused_input='warn')

        #Define self objects
        self.paramsUnaries = paramsUnaries
        self.mNet = mNet

        #Load pretrained params
        if load_pretrained:
            print "loading pretrained params"
            params_to_load = pickle.load(
                open('./VGG/models/paramsUnaries.pickle'))
            self.setParams(params_to_load)
            params_VGG = pickle.load(
                open('./VGG/models/paramsVGGUnaries.pickle'))
            mNet.setParams(params_VGG)
Beispiel #3
0
    def __init__(self):

        #Choose paramters of training
        struct = 'Flat3'
        CNN_name = 'VGG'
        n_leaves = Config.n_parts - 1
        self.n_leaves = n_leaves

        epsilon = 1e-5
        numerical_normalisation = 1e7

        # X is input matrix and Y is output (full image)
        X = T.ftensor4('X')
        Y_in = T.ftensor4('Y_in')
        batch_size = X.shape[0]
        p_drop = T.scalar('dropout', dtype='float32')

        # Building net
        ## Convnet

        mNet = VGGNet.VGG(X)
        self.mNet = mNet

        x_activ = mNet.activation_volume
        size_last_convolution = mNet.nb_activations
        H_ds, W_ds = x_activ.shape[2], x_activ.shape[3]

        #For VGG, y is already 1/4 of X, but we need to remove margins as it is done in the network
        Y_in_crop = Y_in[:, :, 0:H_ds, 0:W_ds]

        y_bg = Y_in_crop[:, 0:1] > 0
        y_inside = Y_in_crop[:, 1:2] > -500
        y_activ_regression = Y_in_crop[:, 1:5]

        ## Handling GT
        # y_activ_regression = mNet.reshapeInputImageToActivationVol(Y_in_regression)
        # # y_activ_binary = mNet.reshapeInputImageToActivationVol(Y_in_binary)

        ## Background substraction

        mBGsub = BGsubstractVGG.BGsubstract(x_activ)
        self.mBGsub = mBGsub

        p_fb = mBGsub.p_fb
        p_foreground = p_fb[:, 0, :, :].reshape((batch_size, 1, H_ds, W_ds))

        ## Regression Network to produce probabilities
        ##Change here to switch between tree and flat

        self.regression_net = Flat3.Flat3(mNet,
                                          y_activ_regression,
                                          mBGsub,
                                          n_leaves,
                                          p_drop=p_drop)

        p_leaves = self.regression_net.p_leaves

        ## Gaussian leaves

        params_gaussian = init_all_gaussian_params(
            n_leaves)  #[a_1,s_1,a_2, s_2]
        self.params_gaussian = params_gaussian
        sums_gaussian = init_all_gaussian_sums(n_leaves)

        G = []
        for l in range(0, n_leaves):
            G.append(
                gaussian(y_activ_regression, params_gaussian[2 * l],
                         params_gaussian[
                             2 * l +
                             1]))  # outputs a (batch_size,h_ds,w_ds) tensor )

        P_T = 0

        for l in range(0, n_leaves):
            P_T = P_T + G[l] * p_leaves[l] * numerical_normalisation

        #Objective functions

        ## Regression
        #regression_cost =-T.sum((T.log(P_T[:,:,:,:]*y_inside + epsilon)*y_inside))/(T.sum(y_inside))
        regression_cost = -T.sum(
            (T.log(P_T[:, :, :, :] * y_bg + epsilon) * y_bg)) / (T.sum(y_bg))

        ## Background
        bg_cost = (T.nnet.binary_crossentropy(p_foreground, y_bg) *
                   (3 * y_bg + 1 * (1 - y_bg))).mean()

        # Updates for decision parameter
        ## For regression tree/Flat
        updates_decision = Adam(regression_cost,
                                self.regression_net.params_regression,
                                lr=Config.Regrate)
        updates_bg = Adam(bg_cost, mBGsub.params, lr=Config.BGrate)

        ## Updates for gaussian parameters gaussian_maximisation
        updates_sums = update_sums(p_leaves, G, P_T, y_activ_regression,
                                   y_inside, sums_gaussian,
                                   numerical_normalisation, epsilon)
        updates_zero_sums = update_sums_to_zero(n_leaves, sums_gaussian)

        ## Updates for gaussian parameters gaussian_maximisation
        updates_gaussian = gaussian_maximisation(
            p_leaves, G, P_T, y_activ_regression, y_inside, params_gaussian,
            sums_gaussian, numerical_normalisation, epsilon)

        ## Prepare outputs
        all_p = T.concatenate(p_leaves, axis=1)
        all_gaussian_parameters = T.concatenate(params_gaussian)

        #Training functions

        ## For decision tree
        self.train_decision_func = theano.function(
            inputs=[X, Y_in, In(p_drop, value=0.3)],
            outputs=[regression_cost],
            updates=updates_decision,
            allow_input_downcast=True,
            on_unused_input='warn')

        self.train_bg_func = theano.function(
            inputs=[X, Y_in, In(p_drop, value=0.0)],
            outputs=[bg_cost],
            updates=updates_bg,
            allow_input_downcast=True,
            on_unused_input='warn')

        ## For updating gaussians
        self.go_zero_sum_func = theano.function(inputs=[],
                                                outputs=[],
                                                updates=updates_zero_sums)
        self.train_sums_func = theano.function(
            inputs=[X, Y_in, In(p_drop, value=0.0)],
            outputs=[],
            updates=updates_sums,
            allow_input_downcast=True,
            on_unused_input='warn')
        self.train_gaussians = theano.function(
            inputs=[X, Y_in, In(p_drop, value=0.0)],
            outputs=[],
            updates=updates_gaussian,
            allow_input_downcast=True,
            on_unused_input='warn')

        ## Test function
        self.test_function = theano.function(
            inputs=[X, Y_in, In(p_drop, value=0.0)],
            outputs=[regression_cost],
            updates=[],
            allow_input_downcast=True,
            on_unused_input='warn')
        self.run_function = theano.function(
            inputs=[X, In(p_drop, value=0.0)],
            outputs=[p_foreground, all_p, all_gaussian_parameters],
            updates=[],
            allow_input_downcast=True,
            on_unused_input='warn')
Beispiel #4
0
class Transfer(object):
    def __init__(self):
        # 获取配置参数
        self.content_img_path = cfg.content_img_path
        self.style_img_path = cfg.style_img_path
        self.vgg_npy_path = cfg.vgg_model_path
        self.learning_rate = float(cfg.learning_rate)
        self.steps = int(cfg.steps)
        self.lambda_c = float(cfg.lambda_c)
        self.lambda_s = float(cfg.lambda_s)

        # 获取三种图片
        self.content_value = self.read_image(self.content_img_path)
        self.style_value = self.read_image(self.style_img_path)
        self.result = self.initial_image([1, 224, 224, 3], 127.5, 30)

        # 获取占位符
        self.content = tf.placeholder(tf.float32, [1, 224, 224, 3])
        self.style = tf.placeholder(tf.float32, [1, 224, 224, 3])

        # 定义三个VGG模型
        data_dict = np.load(self.vgg_npy_path,
                            encoding="latin1",
                            allow_pickle=True).item()
        self.vgg_for_content = VGGNet(data_dict)
        self.vgg_for_style = VGGNet(data_dict)
        self.vgg_for_result = VGGNet(data_dict)

        # 构建VGG网络
        self.vgg_for_content.build_vgg(self.content)
        self.vgg_for_style.build_vgg(self.style)
        self.vgg_for_result.build_vgg(self.result)

        pass

    def initial_image(self, shape, mean, stddev):
        initial = tf.truncated_normal(shape=shape, mean=mean, stddev=stddev)
        return tf.Variable(initial)
        pass

    def read_image(self, path):
        image = Image.open(path)
        #         image = image.resize((224,224))
        np_img = np.array(image)
        np_img = np.asarray([np_img], dtype=np.int32)
        return np_img

    def gram_matrix(self, x):
        b, w, h, ch = x.get_shape().as_list()
        feature = tf.reshape(x, [b, w * h, ch])
        gram = tf.matmul(feature, feature, adjoint_a=True) / tf.constant(
            w * b * ch, tf.float32)
        return gram
        pass

    def losses(self):
        # 内容损失
        content_features = [
            self.vgg_for_content.conv1_2,
            # self.vgg_for_content.conv2_2
        ]

        result_content_features = [
            self.vgg_for_result.conv1_2,
            # self.vgg_for_result.conv2_2
        ]

        content_loss = tf.zeros(1, tf.float32)
        for c, c_ in zip(content_features, result_content_features):
            content_loss += tf.reduce_mean((c - c_)**2, [1, 2, 3])

        # 风格损失
        style_features = [self.vgg_for_style.conv4_3]

        result_style_features = [self.vgg_for_result.conv4_3]

        style_gram = [self.gram_matrix(feature) for feature in style_features]

        result_style_gram = [
            self.gram_matrix(feature) for feature in result_style_features
        ]

        style_loss = tf.zeros(1, tf.float32)

        for s, s_ in zip(style_gram, result_style_gram):
            style_loss += tf.reduce_mean((s - s_)**2, [1, 2])

        loss = self.lambda_c * content_loss + self.lambda_s * style_loss
        return content_loss, style_loss, loss
        pass

    def trans(self):
        content_loss, style_loss, loss = self.losses()
        train_op = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(loss)

        init_op = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init_op)
            for step in range(self.steps):
                loss_value, content_loss_value, style_loss_value, _ = sess.run(
                    [loss, content_loss, style_loss, train_op],
                    feed_dict={
                        self.content: self.content_value,
                        self.style: self.style_value
                    })

                print(
                    "step: %d, loss_value: %8.4f, content_loss: %8.4f, style_loss: %8.4f"
                    % (step + 1, loss_value[0], content_loss_value[0],
                       style_loss_value[0]))
                if (step + 1) % 10 == 0:
                    result_img_path = os.path.join(
                        "./output_dir", "result-%05d.jpg" % (step + 1))
                    result_value = self.result.eval(sess)[0]
                    result_value = np.clip(result_value, 0, 255)
                    img_arr = np.array(result_value, np.uint8)
                    img = Image.fromarray(img_arr)
                    img.save(result_img_path)

        pass
Beispiel #5
0
    def __init__(self, load_pretrained=True, training=True):
        #Path save params
        self.path_save_params = MyConfig.unaries_params_path
        print 'param save at = ', self.path_save_params

        #logs
        self.train_logs_path = MyConfig.unaries_train_log
        self.test_logs_path = MyConfig.unaries_test_log

        #Oputput
        self.unaries_out_path = Config.unaries_path

        print "Preparing room"
        #Prepare room and evaluator
        #Create room
        self.room = POM_room(Config.parts_root_folder, with_templates=True)
        #Prepare evaluator which will let us load GT
        self.evaluator = POM_evaluator(
            self.room,
            GT_labels_path_json='../NDF_peds/data/ETH/labels_json/%08d.json')

        print "Initializing Unaries Network"
        #DEFINE NETWORK
        '''
        Remark, when using ROIPooling, y axis first then x axis for ROI pooling
        '''
        p_h, p_w = 3, 3  #"size of extracted features vector"

        epsilon = 1e-7
        X = T.ftensor4('X')
        Ybb = T.fvector('Ybb')  # GT for positive or negative bbox
        Ybody = T.fvector('Ybody')
        Yhead = T.fvector('Yhead')

        batch_size = X.shape[0]
        p_drop = T.scalar('dropout', dtype='float32')
        t_rois = T.fmatrix()

        # Building net
        ## Convnet

        mNet = VGGNet.VGG(X)

        c53_r = mNet.c53_r

        op = ROIPoolingOp(pooled_h=p_h, pooled_w=p_w, spatial_scale=1.0)

        roi_features = op(c53_r,
                          t_rois)[0]  #T.concatenate(op(c53, t_rois),axis = 0)

        #Initialize weights
        w0_u = init_weights((512 * p_h * p_w, 1024), name='w0_unaries')
        b0_u = init_weights((1024, ), name='b0_unaries', scale=0)
        w1_u = init_weights((1024, 1024), name='w1_unaries')
        b1_u = init_weights((1024, ), name='b1_unaries', scale=0)
        w2_u = init_weights((1024, 2), name='w2_unaries')
        b2_u = init_weights((2, ), name='b2_unaries', scale=0)
        #for orientation of body, head estimation
        w2_u_ori = init_weights((1024, 2), name='w2_unaries_ori')
        b2_u_ori = init_weights((2, ), name='b2_unaries_ori', scale=0)

        paramsUnaries = [
            w0_u, b0_u, w1_u, b1_u, w2_u, b2_u, w2_u_ori, b2_u_ori
        ]

        # #New network
        features_flat = roi_features.reshape((-1, 512 * p_h * p_w))
        x1 = T.clip(T.dot(features_flat, w0_u) + b0_u, 0, 100000)
        x1_drop = dropout(x1, p_drop)
        x2 = T.clip(T.dot(x1_drop, w1_u) + b1_u, 0, 100000)
        x2_drop = dropout(x2, p_drop)
        p_out = softmax(T.dot(x2_drop, w2_u) + b2_u)
        log_p_out = stab_logsoftmax(T.dot(x2_drop, w2_u) + b2_u)
        #Another FC layer for orientation of body, head estimation
        rad_out = T.clip(
            T.dot(x2_drop, w2_u_ori) + b2_u_ori, -math.pi, math.pi)

        ## Classification
        # loss = -(log_p_out[:,0]*Ybb + log_p_out[:,1]*(1-Ybb)).mean()
        loss_bbox = -(log_p_out[:, 0] * Ybb + log_p_out[:, 1] *
                      (1 - Ybb)).mean()

        unit = 1.0
        est_body_orienX = unit * np.cos(rad_out[:, 0])  # x on th unit circle
        est_body_orienY = unit * np.sin(rad_out[:, 0])  # y on th unit circle
        gt_body_orienX = unit * np.cos(Ybody)
        gt_body_orienY = unit * np.sin(Ybody)
        d_bodyX = est_body_orienX - gt_body_orienX
        d_bodyY = est_body_orienY - gt_body_orienY
        cost_body = np.sqrt(d_bodyX * d_bodyX + d_bodyY * d_bodyY)

        est_head_orienX = unit * np.cos(rad_out[:, 1])  # x on th unit circle
        est_head_orienY = unit * np.sin(rad_out[:, 1])  # y on th unit circle
        gt_head_orienX = unit * np.cos(Yhead)
        gt_head_orienY = unit * np.sin(Yhead)
        d_headX = est_head_orienX - gt_head_orienX
        d_headY = est_head_orienY - gt_head_orienY
        cost_head = np.sqrt(d_headX * d_headX + d_headY * d_headY)

        loss_body = (Ybb * cost_body).sum() / Ybb.sum()
        loss_head = (Ybb * cost_head).sum() / Ybb.sum()

        lambda1 = 0.3
        lambda2 = 0.3
        # print loss_bbox, loss_head, loss_body
        loss = loss_bbox + lambda1 * loss_body + lambda2 * loss_head

        # Updates for decision parameter
        ## For regression tree/Flat
        updates_loss = Adam(loss, paramsUnaries, lr=2e-4)
        updates_loss_VGG = Adam(loss, paramsUnaries + mNet.paramsVGG, lr=1e-6)

        self.train_func = theano.function(
            inputs=[X, t_rois, Ybb, Ybody, Yhead,
                    In(p_drop, value=0.5)],
            outputs=[
                T.exp(log_p_out), loss, rad_out, loss_bbox, loss_body,
                loss_head
            ],
            updates=updates_loss_VGG,
            allow_input_downcast=True,
            on_unused_input='warn')

        self.test_func = theano.function(
            inputs=[X, t_rois, Ybb, Ybody, Yhead,
                    In(p_drop, value=0.0)],
            outputs=[
                T.exp(log_p_out), loss, rad_out, loss_bbox, loss_body,
                loss_head
            ],
            updates=[],
            allow_input_downcast=True,
            on_unused_input='warn')

        self.run_func = theano.function(
            inputs=[X, t_rois, In(p_drop, value=0.0)],
            outputs=[T.exp(log_p_out), rad_out],
            updates=[],
            allow_input_downcast=True,
            on_unused_input='warn')

        self.play_func = theano.function(
            inputs=[X, t_rois, In(p_drop, value=0.0)],
            outputs=roi_features,
            updates=[],
            allow_input_downcast=True,
            on_unused_input='warn')

        self.features_func = theano.function(
            inputs=[X, t_rois, In(p_drop, value=0.0)],
            outputs=x2,
            updates=[],
            allow_input_downcast=True,
            on_unused_input='warn')

        #Define self objects
        self.paramsUnaries = paramsUnaries
        self.mNet = mNet

        #Load pretrained params
        if load_pretrained:
            print "loading pretrained params for bbox detection"
            print MyConfig.unary_storedParam
            params_to_load = pickle.load(open(MyConfig.unary_storedParam))
            #append the params for orientation estimation
            if training:
                print 'append value'
                params_to_load.append(
                    floatX(np.random.randn(*(1024, 2)) * 0.01))
                params_to_load.append(floatX(np.random.randn(*(2, )) * 0.0))
            self.setParams(params_to_load)

            print MyConfig.refinedVGG_storedParam
            params_VGG = pickle.load(open(MyConfig.refinedVGG_storedParam))
            mNet.setParams(params_VGG)
val_df = df[df['subject_id'].isin(val_index)]

train_dataset = BraTSDataset(train_df, data_folder, transform=train_transform)
val_dataset = BraTSDataset(val_df, data_folder, transform=val_transform)
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=64,
                                           shuffle=True,
                                           num_workers=1)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=64,
                                         shuffle=False,
                                         num_workers=1)

device = ('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.BCEWithLogitsLoss()
model = VGGNet().to(device).float()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer,
                              mode='max',
                              factor=0.1,
                              patience=2,
                              verbose=True)
n_epochs = 50
stats = {'epoch': [], 'train_loss': [], 'val_loss': [], 'acc': []}
val_loss_min = np.Inf
for epoch in range(n_epochs):
    batch_train_loss = []
    epoch_loss = 0
    stats['epoch'].append(epoch)
    for images, labels in train_loader:
        loss = train_step(images, labels, model, criterion, optimizer)