def generate_theano_func(args, network, penalty, input_dict, target_var):

    prediction = get_output(network, input_dict)

    # loss = T.mean( target_var * ( T.log(target_var) - prediction ))
    loss = T.mean(categorical_crossentropy(prediction, target_var))
    # loss += 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(network) )
    # penalty = sum ( T.sum(lstm_param**2) for lstm_param in lstm_params )
    # penalty = regularize_layer_params(l_forward_1_lstm, l2)
    # penalty = T.sum(lstm_param**2 for lstm_param in lstm_params)
    # penalty = 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(l_forward_1) )

    loss = loss + penalty

    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"

    test_prediction = get_output(network, input_dict, deterministic=True)
    # test_prediction = get_output(network, deterministic=True)
    # test_loss = T.mean( target_var * ( T.log(target_var) - test_prediction))
    test_loss = T.mean(categorical_crossentropy(test_prediction, target_var))

    train_fn = theano.function(
        [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var],
        loss,
        updates=updates,
        allow_input_downcast=True,
    )

    if args.task == "sts":
        val_fn = theano.function(
            [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var],
            [test_loss, test_prediction],
            allow_input_downcast=True,
        )

    elif args.task == "ent":
        # test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX)
        test_acc = T.mean(categorical_accuracy(test_prediction, target_var))
        val_fn = theano.function(
            [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var],
            [test_loss, test_acc],
            allow_input_downcast=True,
        )

    return train_fn, val_fn
Exemple #2
0
 def build_model(self):
     
     import theano.tensor as T
     self.x = T.ftensor4('x')
     self.y = T.lvector('y')
     self.lr = T.scalar('lr')
     
     net = build_model_vgg16(input_shape=(None, 3, 224, 224), verbose=self.verbose)
     self.output_layer = net['prob']
     
     from lasagne.layers import get_output
     self.output = lasagne.layers.get_output(self.output_layer, self.x, deterministic=False)
     self.cost = lasagne.objectives.categorical_crossentropy(self.output, self.y).mean()
     from lasagne.objectives import categorical_accuracy
     self.error = 1-categorical_accuracy(self.output, self.y, top_k=1).mean()
     self.error_top_5 = 1-categorical_accuracy(self.output, self.y, top_k=5).mean()
Exemple #3
0
def test_categorical_accuracy():
    from lasagne.objectives import categorical_accuracy
    p = theano.tensor.matrix('p')
    t = theano.tensor.ivector('t')
    c = categorical_accuracy(p, t)
    # numeric version
    floatX = theano.config.floatX
    predictions = np.random.rand(100, 5).astype(floatX)
    cls_predictions = np.argmax(predictions, axis=1)
    targets = np.random.random_integers(0, 4, (100,)).astype("int8")
    accuracy = cls_predictions == targets
    # compare
    assert np.allclose(accuracy, c.eval({p: predictions, t: targets}))
    one_hot = np.zeros((100, 5)).astype("int8")
    one_hot[np.arange(100), targets] = 1
    t = theano.tensor.imatrix('t')
    c = categorical_accuracy(p, t)
    assert np.allclose(accuracy, c.eval({p: predictions, t: one_hot}))
Exemple #4
0
def test_categorical_accuracy():
    from lasagne.objectives import categorical_accuracy
    p = theano.tensor.matrix('p')
    t = theano.tensor.ivector('t')
    c = categorical_accuracy(p, t)
    # numeric version
    floatX = theano.config.floatX
    predictions = np.random.rand(10, 20).astype(floatX)
    cls_predictions = np.argmax(predictions, axis=1)
    targets = np.random.random_integers(0, 19, (10,)).astype("int8")
    accuracy = cls_predictions == targets
    # compare
    assert np.allclose(accuracy, c.eval({p: predictions, t: targets}))
    one_hot = np.zeros((10, 20)).astype("int8")
    one_hot[np.arange(10), targets] = 1
    t = theano.tensor.imatrix('t')
    c = categorical_accuracy(p, t)
    assert np.allclose(accuracy, c.eval({p: predictions, t: one_hot}))
Exemple #5
0
    def compute_cost(self, deterministic=False):
        output = get_output(self.net, deterministic=deterministic)

        cost = categorical_crossentropy(output, self.tg).mean()
        cost.name = 'negll'

        accuracy = categorical_accuracy(output, self.tg).mean()
        accuracy.name = 'accuracy'

        return cost, accuracy
Exemple #6
0
    def compute_cost(self, deterministic=False):
        output = get_output(self.net, deterministic=deterministic)

        cost = categorical_crossentropy(output, self.tg).mean()
        cost.name = 'negll'

        accuracy = categorical_accuracy(output, self.tg).mean()
        accuracy.name = 'accuracy'

        return cost, accuracy
    def compile_val(self):

        if self.verbose: print('compiling validation function...')
        
        import theano
        
        from lasagne.layers import get_output
        
        output_val = lasagne.layers.get_output(self.output_layer, self.x, deterministic=True)
        
        from lasagne.objectives import categorical_accuracy, categorical_crossentropy
        
        cost = categorical_crossentropy(output_val, self.y).mean()
        error = 1-categorical_accuracy(output_val, self.y, top_k=1).mean()
        error_top_5 = 1-categorical_accuracy(output_val, self.y, top_k=5).mean()
        
        self.val_fn=  theano.function([self.subb_ind], [cost,error,error_top_5], updates=[], 
                                          givens=[(self.x, self.shared_x_slice),
                                                  (self.y, self.shared_y_slice)]
                                                                )
Exemple #8
0
def test_categorical_accuracy_top_k():
    from lasagne.objectives import categorical_accuracy
    p = theano.tensor.matrix('p')
    t = theano.tensor.ivector('t')
    top_k = 4
    c = categorical_accuracy(p, t, top_k=top_k)
    # numeric version
    floatX = theano.config.floatX
    predictions = np.random.rand(10, 20).astype(floatX)
    cls_predictions = np.argsort(predictions, axis=1).astype("int8")
    # (construct targets such that top-1 to top-10 predictions are in there)
    targets = cls_predictions[np.arange(10), -np.random.permutation(10)]
    top_predictions = cls_predictions[:, -top_k:]
    accuracy = np.any(top_predictions == targets[:, np.newaxis], axis=1)
    # compare
    assert np.allclose(accuracy, c.eval({p: predictions, t: targets}))
    one_hot = np.zeros((10, 20)).astype("int8")
    one_hot[np.arange(10), targets] = 1
    t = theano.tensor.imatrix('t')
    c = categorical_accuracy(p, t, top_k=top_k)
    assert np.allclose(accuracy, c.eval({p: predictions, t: one_hot}))
Exemple #9
0
    def build_model(self):

        import theano.tensor as T
        self.x = T.ftensor4('x')
        self.y = T.lvector('y')
        self.lr = T.scalar('lr')

        net = build_model_vgg16(input_shape=(None, 3, 224, 224),
                                verbose=self.verbose)
        self.output_layer = net['prob']

        from lasagne.layers import get_output
        self.output = lasagne.layers.get_output(self.output_layer,
                                                self.x,
                                                deterministic=False)
        self.cost = lasagne.objectives.categorical_crossentropy(
            self.output, self.y).mean()
        from lasagne.objectives import categorical_accuracy
        self.error = 1 - categorical_accuracy(self.output, self.y,
                                              top_k=1).mean()
        self.error_top_5 = 1 - categorical_accuracy(
            self.output, self.y, top_k=5).mean()
Exemple #10
0
    def compile_val(self):

        if self.verbose: print('compiling validation function...')

        import theano

        from lasagne.layers import get_output

        output_val = lasagne.layers.get_output(self.output_layer,
                                               self.x,
                                               deterministic=True)

        from lasagne.objectives import categorical_accuracy, categorical_crossentropy

        cost = categorical_crossentropy(output_val, self.y).mean()
        error = 1 - categorical_accuracy(output_val, self.y, top_k=1).mean()
        error_top_5 = 1 - categorical_accuracy(output_val, self.y,
                                               top_k=5).mean()

        self.val_fn = theano.function([self.subb_ind],
                                      [cost, error, error_top_5],
                                      updates=[],
                                      givens=[(self.x, self.shared_x_slice),
                                              (self.y, self.shared_y_slice)])
def build(layer_heads, params):
    """"""
    fns = {}  # model methods
    x = T.tensor4('input')

    for target in params['targets']:
        fns[target['name']] = {}
        out_layer = layer_heads[target['name']]

        y = T.matrix('target')
        o = L.get_output(out_layer, inputs=x)
        o_vl = L.get_output(out_layer, inputs=x, deterministic=True)

        if 'class_weight' in params and params['class_weight']:
            loss_fn = partial(weighted_cce, weights=params['class_weight'])
        else:
            loss_fn = obj.categorical_crossentropy

        loss = loss_fn(o, y).mean()
        loss_vl = loss_fn(o_vl, y).mean()
        wd_l2 = reg.regularize_network_params(out_layer, reg.l2)
        wd_l2 *= params['beta']

        acc_vl = obj.categorical_accuracy(o_vl, y).mean()

        updates_ = updates.adam(loss + wd_l2,
                                L.get_all_params(out_layer, trainable=True),
                                learning_rate=params['learning_rate'],
                                epsilon=params['epsilon'])

        fns[target['name']]['train'] = theano.function(
            [x, y], updates=updates_, allow_input_downcast=True)
        fns[target['name']]['predict'] = theano.function(
            [x], o_vl, allow_input_downcast=True)
        fns[target['name']]['cost'] = theano.function(
            [x, y], loss_vl, allow_input_downcast=True)
        fns[target['name']]['acc'] = theano.function([x, y],
                                                     acc_vl,
                                                     allow_input_downcast=True)
        fns[target['name']]['transform'] = theano.function(
            [x],
            L.get_output(L.get_all_layers(layer_heads[target['name']])[-2],
                         inputs=x,
                         deterministic=True),
            allow_input_downcast=True)

    return fns, layer_heads
    def build_model(self, train_set, test_set, validation_set=None):
        super(CNN, self).build_model(train_set, test_set, validation_set)

        epsilon = 1e-8
        y_train = T.clip(get_output(self.model, self.sym_x), epsilon, 1)
        loss_cc = aggregate(categorical_crossentropy(y_train, self.sym_t),
                            mode='mean')
        loss_train_acc = categorical_accuracy(y_train, self.sym_t).mean()

        y = T.clip(get_output(self.model, self.sym_x, deterministic=True),
                   epsilon, 1)
        loss_eval = aggregate(categorical_crossentropy(y, self.sym_t),
                              mode='mean')
        loss_acc = categorical_accuracy(y, self.sym_t).mean()

        all_params = get_all_params(self.model, trainable=True)
        sym_beta1 = T.scalar('beta1')
        sym_beta2 = T.scalar('beta2')
        grads = T.grad(loss_cc, all_params)
        grads = [T.clip(g, -5, 5) for g in grads]
        updates = rmsprop(grads, all_params, self.sym_lr, sym_beta1, sym_beta2)

        inputs = [
            self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1,
            sym_beta2
        ]
        f_train = theano.function(
            inputs,
            [loss_cc, loss_train_acc],
            updates=updates,
            givens={
                self.sym_x: self.sh_train_x[self.batch_slice],
                self.sym_t: self.sh_train_t[self.batch_slice],
            },
        )

        f_test = theano.function(
            [self.sym_index, self.sym_batchsize],
            [loss_eval, loss_acc],
            givens={
                self.sym_x: self.sh_test_x[self.batch_slice],
                self.sym_t: self.sh_test_t[self.batch_slice],
            },
        )

        f_validate = None
        if validation_set is not None:
            f_validate = theano.function(
                [self.sym_index, self.sym_batchsize],
                [loss_eval, loss_acc],
                givens={
                    self.sym_x: self.sh_valid_x[self.batch_slice],
                    self.sym_t: self.sh_valid_t[self.batch_slice],
                },
            )

        self.train_args['inputs']['batchsize'] = 128
        self.train_args['inputs']['learningrate'] = 1e-3
        self.train_args['inputs']['beta1'] = 0.9
        self.train_args['inputs']['beta2'] = 0.999
        self.train_args['outputs']['loss_cc'] = '%0.6f'
        self.train_args['outputs']['loss_train_acc'] = '%0.6f'

        self.test_args['inputs']['batchsize'] = 128
        self.test_args['outputs']['loss_eval'] = '%0.6f'
        self.test_args['outputs']['loss_acc'] = '%0.6f'

        self.validate_args['inputs']['batchsize'] = 128
        # self.validate_args['outputs']['loss_eval'] = '%0.6f'
        # self.validate_args['outputs']['loss_acc'] = '%0.6f'
        return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args
Exemple #13
0
    def build_model(self,
                    train_set,
                    test_set,
                    validation_set=None,
                    weights=None):
        super(BRNN, self).build_model(train_set, test_set, validation_set)

        def brier_score(given, predicted, weight_vector, mask):
            return T.mean(
                T.power(given - predicted, 2.0).dot(weight_vector) * mask)

        epsilon = 1e-8
        mask = get_output(self.mask, self.sym_x)
        y_train = T.clip(get_output(self.model, self.sym_x), epsilon, 1)
        train_brier = brier_score(y_train, self.sym_t, weights, mask)
        train_cc = aggregate(categorical_crossentropy(y_train, self.sym_t),
                             mode='mean')
        loss_train_acc = categorical_accuracy(y_train, self.sym_t).mean()

        y_test = T.clip(get_output(self.model, self.sym_x, deterministic=True),
                        epsilon, 1)
        test_brier = brier_score(y_test, self.sym_t, weights, mask)
        test_cc = aggregate(categorical_crossentropy(y_test, self.sym_t),
                            mode='mean')
        test_acc = categorical_accuracy(y_test, self.sym_t).mean()

        all_params = get_all_params(self.model, trainable=True)
        sym_beta1 = T.scalar('beta1')
        sym_beta2 = T.scalar('beta2')
        grads = T.grad(train_brier, all_params)
        grads = [T.clip(g, -1, 1) for g in grads]
        updates = adam(grads, all_params, self.sym_lr, sym_beta1, sym_beta2)

        inputs = [
            self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1,
            sym_beta2
        ]
        f_train = theano.function(
            inputs,
            [train_cc, train_brier],
            updates=updates,
            givens={
                self.sym_x: self.sh_train_x[self.batch_slice],
                self.sym_t: self.sh_train_t[self.batch_slice],
            },
        )

        f_test = theano.function(
            [],
            [test_cc, test_brier],
            givens={
                self.sym_x: self.sh_test_x,
                self.sym_t: self.sh_test_t,
            },
        )

        f_validate = None
        if validation_set is not None:
            f_validate = theano.function(
                [self.sym_index, self.sym_batchsize],
                [test_cc, test_acc],
                givens={
                    self.sym_x: self.sh_valid_x[self.batch_slice],
                    self.sym_t: self.sh_valid_t[self.batch_slice],
                },
            )

        predict = theano.function([self.sym_x], [y_test])

        self.train_args['inputs']['batchsize'] = 64
        self.train_args['inputs']['learningrate'] = 1e-3
        self.train_args['inputs']['beta1'] = 0.9
        self.train_args['inputs']['beta2'] = 0.999  # 1e-6
        self.train_args['outputs']['train_cc'] = '%0.4f'
        # self.train_args['outputs']['train_acc'] = '%0.4f'
        self.train_args['outputs']['train_brier'] = '%0.4f'

        # self.test_args['inputs']['batchsize'] = 64
        self.test_args['outputs']['test_cc'] = '%0.4f'
        # self.test_args['outputs']['test_acc'] = '%0.4f'
        self.test_args['outputs']['test_brier'] = '%0.4f'

        # self.validate_args['inputs']['batchsize'] = 64
        # self.validate_args['outputs']['loss_eval'] = '%0.6f'
        # self.validate_args['outputs']['test_acc'] = '%0.6f'
        return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args, predict
Exemple #14
0
    def build_model(self,
                    train_set_unlabeled,
                    train_set_labeled,
                    test_set,
                    validation_set=None):
        """
        Build the auxiliary deep generative model from the initialized hyperparameters.
        Define the lower bound term and compile it into a training function.
        :param train_set_unlabeled: Unlabeled train set containing variables x, t.
        :param train_set_labeled: Unlabeled train set containing variables x, t.
        :param test_set: Test set containing variables x, t.
        :param validation_set: Validation set containing variables x, t.
        :return: train, test, validation function and dicts of arguments.
        """
        super(CSDGM, self).build_model(train_set_unlabeled, test_set,
                                       validation_set)

        sh_train_x_l = theano.shared(np.asarray(train_set_labeled[0],
                                                dtype=theano.config.floatX),
                                     borrow=True)
        sh_train_t_l = theano.shared(np.asarray(train_set_labeled[1],
                                                dtype=theano.config.floatX),
                                     borrow=True)
        n = self.sh_train_x.shape[0].astype(
            theano.config.floatX)  # no. of data points
        n_l = sh_train_x_l.shape[0].astype(
            theano.config.floatX)  # no. of labeled data points

        # Define the layers for the density estimation used in the lower bound.
        l_log_qa = GaussianLogDensityLayer(self.l_qa, self.l_qa_mu,
                                           self.l_qa_logvar)
        l_log_qz = GaussianLogDensityLayer(self.l_qz, self.l_qz_mu,
                                           self.l_qz_logvar)
        l_log_qy = MultinomialLogDensityLayer(self.l_qy, self.l_y_in, eps=1e-8)

        l_log_pz = StandardNormalLogDensityLayer(self.l_qz)
        l_log_pa = GaussianLogDensityLayer(self.l_qa, self.l_pa_mu,
                                           self.l_pa_logvar)

        l_x_in = ReshapeLayer(self.l_x_in, (-1, self.n_l * self.n_c))
        l_px = DimshuffleLayer(self.l_px, (0, 3, 1, 2, 4))
        l_px = ReshapeLayer(l_px, (-1, self.sym_samples, 1, self.n_c))
        if self.x_dist == 'bernoulli':
            l_log_px = BernoulliLogDensityLayer(self.l_px, self.l_x_in)
        elif self.x_dist == 'multinomial':
            l_log_px = MultinomialLogDensityLayer(l_px, l_x_in)
            l_log_px = ReshapeLayer(l_log_px, (-1, self.n_l, 1, 1, 1))
            l_log_px = MeanLayer(l_log_px, axis=1)
        elif self.x_dist == 'gaussian':
            l_px_mu = ReshapeLayer(
                DimshuffleLayer(self.l_px_mu, (0, 2, 3, 1, 4)),
                (-1, self.sym_samples, 1, self.n_l * self.n_c))
            l_px_logvar = ReshapeLayer(
                DimshuffleLayer(self.l_px_logvar, (0, 2, 3, 1, 4)),
                (-1, self.sym_samples, 1, self.n_l * self.n_c))
            l_log_px = GaussianLogDensityLayer(l_x_in, l_px_mu, l_px_logvar)

        def lower_bound(log_pa, log_qa, log_pz, log_qz, log_py, log_px):
            lb = log_px + log_py + (log_pz + log_pa - log_qa -
                                    log_qz) * (1.1 - self.sym_warmup)
            return lb

        # Lower bound for labeled data
        out_layers = [
            l_log_pa, l_log_pz, l_log_qa, l_log_qz, l_log_px, l_log_qy
        ]
        inputs = {self.l_x_in: self.sym_x_l, self.l_y_in: self.sym_t_l}
        out = get_output(out_layers,
                         inputs,
                         batch_norm_update_averages=False,
                         batch_norm_use_averages=False)
        log_pa_l, log_pz_l, log_qa_x_l, log_qz_axy_l, log_px_zy_l, log_qy_ax_l = out

        # Prior p(y) expecting that all classes are evenly distributed
        py_l = softmax(T.zeros((self.sym_x_l.shape[0], self.n_y)))
        log_py_l = -categorical_crossentropy(py_l, self.sym_t_l).reshape(
            (-1, 1)).dimshuffle((0, 'x', 'x', 1))
        lb_l = lower_bound(log_pa_l, log_qa_x_l, log_pz_l, log_qz_axy_l,
                           log_py_l, log_px_zy_l)
        lb_l = lb_l.mean(axis=(1, 2))  # Mean over the sampling dimensions
        log_qy_ax_l *= (
            self.sym_beta * (n / n_l)
        )  # Scale the supervised cross entropy with the alpha constant
        lb_l += log_qy_ax_l.mean(axis=(
            1, 2
        ))  # Collect the lower bound term and mean over sampling dimensions

        # Lower bound for unlabeled data
        bs_u = self.sym_x_u.shape[0]

        # For the integrating out approach, we repeat the input matrix x, and construct a target (bs * n_y) x n_y
        # Example of input and target matrix for a 3 class problem and batch_size=2. 2D tensors of the form
        #               x_repeat                     t_repeat
        #  [[x[0,0], x[0,1], ..., x[0,n_x]]         [[1, 0, 0]
        #   [x[1,0], x[1,1], ..., x[1,n_x]]          [1, 0, 0]
        #   [x[0,0], x[0,1], ..., x[0,n_x]]          [0, 1, 0]
        #   [x[1,0], x[1,1], ..., x[1,n_x]]          [0, 1, 0]
        #   [x[0,0], x[0,1], ..., x[0,n_x]]          [0, 0, 1]
        #   [x[1,0], x[1,1], ..., x[1,n_x]]]         [0, 0, 1]]
        t_eye = T.eye(self.n_y, k=0)
        t_u = t_eye.reshape((self.n_y, 1, self.n_y)).repeat(bs_u,
                                                            axis=1).reshape(
                                                                (-1, self.n_y))
        x_u = self.sym_x_u.reshape(
            (1, bs_u, self.n_l, self.n_c)).repeat(self.n_y, axis=0).reshape(
                (-1, self.n_l, self.n_c))

        # Since the expectation of var a is outside the integration we calculate E_q(a|x) first
        a_x_u = get_output(self.l_qa,
                           self.sym_x_u,
                           batch_norm_update_averages=True,
                           batch_norm_use_averages=False)
        a_x_u_rep = a_x_u.reshape(
            (1, bs_u * self.sym_samples, self.n_a)).repeat(self.n_y,
                                                           axis=0).reshape(
                                                               (-1, self.n_a))
        out_layers = [l_log_pa, l_log_pz, l_log_qa, l_log_qz, l_log_px]
        inputs = {self.l_x_in: x_u, self.l_y_in: t_u, self.l_a_in: a_x_u_rep}
        out = get_output(out_layers,
                         inputs,
                         batch_norm_update_averages=False,
                         batch_norm_use_averages=False)
        log_pa_u, log_pz_u, log_qa_x_u, log_qz_axy_u, log_px_zy_u = out

        # Prior p(y) expecting that all classes are evenly distributed
        py_u = softmax(T.zeros((bs_u * self.n_y, self.n_y)))
        log_py_u = -categorical_crossentropy(py_u, t_u).reshape(
            (-1, 1)).dimshuffle((0, 'x', 'x', 1))
        lb_u = lower_bound(log_pa_u, log_qa_x_u, log_pz_u, log_qz_axy_u,
                           log_py_u, log_px_zy_u)
        lb_u = lb_u.reshape(
            (self.n_y, 1, 1, bs_u)).transpose(3, 1, 2, 0).mean(axis=(1, 2))
        inputs = {
            self.l_x_in: self.sym_x_u,
            self.l_a_in: a_x_u.reshape((-1, self.n_a))
        }
        y_u = get_output(self.l_qy,
                         inputs,
                         batch_norm_update_averages=True,
                         batch_norm_use_averages=False).mean(axis=(1, 2))
        y_u += 1e-8  # Ensure that we get no NANs when calculating the entropy
        y_u /= T.sum(y_u, axis=1, keepdims=True)
        lb_u = (y_u * (lb_u - T.log(y_u))).sum(axis=1)

        # Regularizing with weight priors p(theta|N(0,1)), collecting and clipping gradients
        weight_priors = 0.0
        for p in self.trainable_model_params:
            if 'W' not in str(p):
                continue
            weight_priors += log_normal(p, 0, 1).sum()

        # Collect the lower bound and scale it with the weight priors.
        elbo = ((lb_l.mean() + lb_u.mean()) * n + weight_priors) / -n
        lb_labeled = -lb_l.mean()
        lb_unlabeled = -lb_u.mean()
        log_px = log_px_zy_l.mean() + log_px_zy_u.mean()
        log_pz = log_pz_l.mean() + log_pz_u.mean()
        log_qz = log_qz_axy_l.mean() + log_qz_axy_u.mean()
        log_pa = log_pa_l.mean() + log_pa_u.mean()
        log_qa = log_qa_x_l.mean() + log_qa_x_u.mean()

        grads_collect = T.grad(elbo, self.trainable_model_params)
        params_collect = self.trainable_model_params
        sym_beta1 = T.scalar('beta1')
        sym_beta2 = T.scalar('beta2')
        clip_grad, max_norm = 1, 5
        mgrads = total_norm_constraint(grads_collect, max_norm=max_norm)
        mgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]
        updates = adam(mgrads, params_collect, self.sym_lr, sym_beta1,
                       sym_beta2)

        # Training function
        indices = self._srng.choice(size=[self.sym_bs_l],
                                    a=sh_train_x_l.shape[0],
                                    replace=False)
        x_batch_l = sh_train_x_l[indices]
        t_batch_l = sh_train_t_l[indices]
        x_batch_u = self.sh_train_x[self.batch_slice]
        if self.x_dist == 'bernoulli':  # Sample bernoulli input.
            x_batch_u = self._srng.binomial(size=x_batch_u.shape,
                                            n=1,
                                            p=x_batch_u,
                                            dtype=theano.config.floatX)
            x_batch_l = self._srng.binomial(size=x_batch_l.shape,
                                            n=1,
                                            p=x_batch_l,
                                            dtype=theano.config.floatX)

        givens = {
            self.sym_x_l: x_batch_l,
            self.sym_x_u: x_batch_u,
            self.sym_t_l: t_batch_l
        }
        inputs = [
            self.sym_index, self.sym_batchsize, self.sym_bs_l, self.sym_beta,
            self.sym_lr, sym_beta1, sym_beta2, self.sym_samples,
            self.sym_warmup
        ]
        outputs = [
            elbo, lb_labeled, lb_unlabeled, log_px, log_pz, log_qz, log_pa,
            log_qa
        ]
        f_train = theano.function(inputs=inputs,
                                  outputs=outputs,
                                  givens=givens,
                                  updates=updates)

        # Default training args. Note that these can be changed during or prior to training.
        self.train_args['inputs']['batchsize_unlabeled'] = 100
        self.train_args['inputs']['batchsize_labeled'] = 100
        self.train_args['inputs']['beta'] = 0.1
        self.train_args['inputs']['learningrate'] = 3e-4
        self.train_args['inputs']['beta1'] = 0.9
        self.train_args['inputs']['beta2'] = 0.999
        self.train_args['inputs']['samples'] = 1
        self.train_args['inputs']['warmup'] = 0.1
        self.train_args['outputs']['lb'] = '%0.3f'
        self.train_args['outputs']['lb-l'] = '%0.3f'
        self.train_args['outputs']['lb-u'] = '%0.3f'
        self.train_args['outputs']['px'] = '%0.3f'
        self.train_args['outputs']['pz'] = '%0.3f'
        self.train_args['outputs']['qz'] = '%0.3f'
        self.train_args['outputs']['pa'] = '%0.3f'
        self.train_args['outputs']['qa'] = '%0.3f'

        # Validation and test function
        y = get_output(self.l_qy, self.sym_x_l,
                       deterministic=True).mean(axis=(1, 2))
        class_err = (1. - categorical_accuracy(y, self.sym_t_l).mean()) * 100
        givens = {self.sym_x_l: self.sh_test_x, self.sym_t_l: self.sh_test_t}
        f_test = theano.function(inputs=[self.sym_samples],
                                 outputs=[class_err],
                                 givens=givens)

        # Test args.  Note that these can be changed during or prior to training.
        self.test_args['inputs']['samples'] = 1
        self.test_args['outputs']['test'] = '%0.2f%%'

        f_validate = None
        if validation_set is not None:
            givens = {
                self.sym_x_l: self.sh_valid_x,
                self.sym_t_l: self.sh_valid_t
            }
            f_validate = theano.function(inputs=[self.sym_samples],
                                         outputs=[class_err],
                                         givens=givens)
            # Default validation args. Note that these can be changed during or prior to training.
            self.validate_args['inputs']['samples'] = 1
            self.validate_args['outputs']['validation'] = '%0.2f%%'

        return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args
Exemple #15
0
loss_gen_source = T.mean(categorical_crossentropy(predictions=softmax(source_gen), targets=T.zeros(shape=(args.batch_size,), dtype='int32')))
loss_lab_class = T.mean(categorical_crossentropy(predictions=softmax(class_lab), targets=labels))
loss_lab_source = T.mean(categorical_crossentropy(predictions=softmax(source_lab), targets=T.zeros(shape=(args.batch_size,), dtype='int32'))) +\
    T.mean(categorical_crossentropy(predictions=softmax(source_gen), targets=T.ones(shape=(args.batch_size,), dtype='int32')))
weight_gen_loss = th.shared(np.float32(0.))
# output_lab = ll.get_output(disc_layers[-2],x_lab)
# output_gen = ll.get_output(disc_layers[-2],gen_dat)
# m1 = T.mean(output_lab,axis=0)
# m2 = T.mean(output_gen,axis=0)
# feature_loss = T.mean(abs(m1-m2))
loss_gen = (1-weight_gen_loss)*loss_gen_source
# loss_gen = (1-weight_gen_loss)*feature_loss
loss_lab = (1-weight_gen_loss)*loss_lab_source + weight_gen_loss*(loss_lab_class+0.5*loss_gen_class)

#network performance
D_acc_on_real = T.mean(categorical_accuracy(predictions=source_lab, targets=T.zeros(shape=(args.batch_size,))))
D_acc_on_fake = T.mean(categorical_accuracy(predictions=source_gen, targets=T.ones(shape=(args.batch_size,))))
G_acc_on_fake = T.mean(categorical_accuracy(predictions=source_gen, targets=T.zeros(shape=(args.batch_size,))))
performfun = th.function(inputs=[x_lab, labels, labels_gen], outputs=[D_acc_on_real, D_acc_on_fake, G_acc_on_fake])
train_err = T.mean(T.neq(T.argmax(class_lab, axis=1), labels))

# Theano functions for training the disc net
learning_rate_var = th.shared(np.cast[th.config.floatX](args.learning_rate))
disc_params = ll.get_all_params(disc_layers[-1], trainable=True)
disc_param_updates = nn.adam_updates(disc_params, loss_lab, lr=learning_rate_var, mom1=0.5)
disc_param_avg = [th.shared(np.cast[th.config.floatX](0. * p.get_value()),broadcastable=p.broadcastable) for p in disc_params]
disc_avg_updates = [(a, a + 0.01 * (p - a)) for p, a in zip(disc_params, disc_param_avg)]
disc_avg_givens = [(p, a) for p, a in zip(disc_params, disc_param_avg)]
# init_param = th.function(inputs=[x_lab], outputs=None, updates=init_updates,on_unused_input='ignore')
train_batch_disc = th.function(inputs=[x_lab, labels, labels_gen], outputs=[loss_lab, train_err], updates=disc_param_updates + disc_avg_updates)
test_batch = th.function(inputs=[x_lab, labels], outputs=train_err, givens=disc_avg_givens)
Exemple #16
0
def test_categorial_accuracy_invalid():
    from lasagne.objectives import categorical_accuracy
    with pytest.raises(TypeError) as exc:
        categorical_accuracy(theano.tensor.vector(),
                             theano.tensor.matrix())
    assert 'rank mismatch' in exc.value.args[0]
Exemple #17
0
def test_categorial_accuracy_invalid():
    from lasagne.objectives import categorical_accuracy
    with pytest.raises(TypeError) as exc:
        categorical_accuracy(theano.tensor.vector(),
                             theano.tensor.matrix())
    assert 'rank mismatch' in exc.value.args[0]
    def build_model(self,
                    train_set,
                    test_set,
                    validation_set=None,
                    weights=None):
        super(wconvRNN, self).build_model(train_set, test_set, validation_set)

        def brier_score(given, predicted, weight_vector):
            return T.power(given - predicted, 2.0).dot(weight_vector).mean()

        epsilon = 1e-8
        y_train = T.clip(get_output(self.model, self.sym_x), epsilon, 1)
        loss_brier_train = brier_score(y_train, self.sym_t, weights)
        loss_cc = aggregate(categorical_crossentropy(y_train, self.sym_t),
                            mode='mean')
        loss_train_acc = categorical_accuracy(y_train, self.sym_t).mean()

        y_test = T.clip(get_output(self.model, self.sym_x, deterministic=True),
                        epsilon, 1)
        loss_brier_test = brier_score(y_test, self.sym_t, weights)
        loss_eval = aggregate(categorical_crossentropy(y_test, self.sym_t),
                              mode='mean')
        loss_acc = categorical_accuracy(y_test, self.sym_t).mean()

        all_params = get_all_params(self.model, trainable=True)
        sym_beta1 = T.scalar('beta1')
        sym_beta2 = T.scalar('beta2')
        grads = T.grad(loss_brier_train, all_params)
        grads = [T.clip(g, -5, 5) for g in grads]
        updates = rmsprop(grads, all_params, self.sym_lr, sym_beta1, sym_beta2)

        inputs = [
            self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1,
            sym_beta2
        ]
        f_train = theano.function(
            inputs,
            [loss_brier_train],
            updates=updates,
            givens={
                self.sym_x: self.sh_train_x[self.batch_slice],
                self.sym_t: self.sh_train_t[self.batch_slice],
            },
        )

        f_test = theano.function(
            [],
            [loss_brier_test],
            givens={
                self.sym_x: self.sh_test_x,
                self.sym_t: self.sh_test_t,
            },
            on_unused_input='ignore',
        )

        f_validate = None
        if validation_set is not None:
            f_validate = theano.function(
                [self.sym_batchsize],
                [loss_brier_test],
                givens={
                    self.sym_x: self.sh_valid_x,
                    self.sym_t: self.sh_valid_t,
                },
                on_unused_input='ignore',
            )

        predict = theano.function([self.sym_x], [y_test])

        self.train_args['inputs']['batchsize'] = 128
        self.train_args['inputs']['learningrate'] = 1e-3
        self.train_args['inputs']['beta1'] = 0.9
        self.train_args['inputs']['beta2'] = 0.999
        self.train_args['outputs']['loss_brier_train'] = '%0.6f'
        # self.train_args['outputs']['loss_train_acc'] = '%0.6f'

        # self.test_args['inputs']['batchsize'] = 128
        self.test_args['outputs']['loss_brier_test'] = '%0.6f'
        # self.test_args['outputs']['loss_acc'] = '%0.6f'

        # self.validate_args['inputs']['batchsize'] = 128
        # self.validate_args['outputs']['loss_eval'] = '%0.6f'
        # self.validate_args['outputs']['loss_acc'] = '%0.6f'
        return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args, predict
Exemple #19
0
def transfer(sources,
             learning_rate,
             epsilon,
             beta,
             n_epochs,
             batch_sz,
             train_id=None,
             test_sources=None):
    """"""
    if train_id is None:
        train_id = uuid.uuid4()

    # launch logger
    logger = tblog.Logger('runs/{}'.format(train_id))

    # launch model
    # net = fcn_transfer({'inputs': sources})
    net = fcn_transfer({'inputs': range(2)})
    input = T.matrix('input')
    target = T.matrix('target')
    o = L.get_output(net, inputs=input)
    o_vl = L.get_output(net, inputs=input, deterministic=True)

    loss = obj.categorical_crossentropy(o, target).mean()
    loss_vl = obj.categorical_crossentropy(o_vl, target).mean()
    wd_l2 = reg.regularize_network_params(net, reg.l2)
    wd_l2 *= beta

    acc_vl = obj.categorical_accuracy(o_vl, target).mean()

    updates_ = updates.adam(loss + wd_l2,
                            L.get_all_params(net, trainable=True),
                            learning_rate=learning_rate,
                            epsilon=epsilon)

    Model = namedtuple('Model', 'partial_fit predict cost acc')
    model = Model(partial_fit=theano.function([input, target],
                                              updates=updates_,
                                              allow_input_downcast=True),
                  predict=theano.function([input],
                                          o_vl,
                                          allow_input_downcast=True),
                  cost=theano.function([input, target],
                                       loss_vl,
                                       allow_input_downcast=True),
                  acc=theano.function([input, target],
                                      acc_vl,
                                      allow_input_downcast=True))

    # load data
    D = [joblib.load(fn) for fn in sources]

    # prepare data
    trn_ix = np.where(D[0][2] == 'train')[0]
    val_ix = np.where(D[0][2] == 'valid')[0]
    ids_map = copy.deepcopy(D[0][3])

    y = copy.deepcopy(D[0][1])
    X = np.concatenate([d[0] for d in D], axis=1)
    lb = LabelBinarizer().fit(y)

    del D

    # TRAIN!
    iters = 0
    try:
        epoch = trange(n_epochs, desc='[Loss : -.--] Epoch', ncols=80)
        for n in epoch:
            np.random.shuffle(trn_ix)
            np.random.shuffle(val_ix)

            for i, X_, y_ in prepare_batch(X, y, batch_sz, lb):
                if iters % 100 == 0:
                    ix = np.random.choice(val_ix, batch_sz, replace=False)
                    X_v, y_v = X[ix], lb.transform(y[ix])

                    c = model.cost(X_, y_).item()
                    cv = model.cost(X_v, y_v).item()
                    a = model.acc(X_, y_).item()
                    av = model.acc(X_v, y_v).item()

                    logger.log_value('trns_cost_tr', c, iters)
                    logger.log_value('trns_cost_vl', cv, iters)
                    logger.log_value('trns_acc_tr', a, iters)
                    logger.log_value('trns_acc_vl', av, iters)

                    epoch.set_description(
                        '[v_loss: {:.4f} / v_acc: {:.4f}]Epoch'.format(cv, av))
                model.partial_fit(X_, y_)
                iters += 1

    except KeyboardInterrupt as kbe:
        print('User Stopped!')

    # evaluate
    uniq_ix_set = list(set(ids_map[val_ix]))
    Y_pred = []
    y_true = []
    Xvl = X[val_ix]
    yvl = y[val_ix]
    for i in tqdm(uniq_ix_set):
        ix = np.where(ids_map[val_ix] == i)[0]
        Y_pred.append(model.predict(Xvl[ix]).mean(axis=0))
        y_true.append(yvl[ix][0])
    Y_true = lb.transform(y_true)
    y_pred = [lb.classes_[i] for i in np.argmax(Y_pred, axis=1)]

    f1 = f1_score(y_true, y_pred, average='macro')
    ll = -np.mean(np.sum(Y_true * np.log(np.maximum(Y_pred, 1e-8)), axis=1))

    # TODO: this is under-dev functionality. not generally working
    if test_sources is not None:
        del X, y  # delete training data from memory
        # process test data
        test_fns = glob.glob(os.path.join(TEST_ROOT, '*.npy'))
        tids = map(lambda fn: os.path.basename(fn).split('.')[0], test_fns)
        Y_pred = []
        for fn in tqdm(test_fns, ncols=80):
            X = np.load(fn)
            Y_pred.append(model.predict(X).mean(axis=0))

        out_df = pd.DataFrame(Y_pred, columns=lb.classes_, index=tids)
        out_df.index.name = 'file_id'
        out_df.sort_index(inplace=True)
        out_df.to_csv('results/{}.csv'.format(train_id))

    # return result
    return train_id, f1, ll
Exemple #20
0
                        forgetgate=forget_gate_parameters,
                        cell=cell_parameters,
                        outgate=gate_parameters,
                        learn_init=True,
                        grad_clipping=100.0)

l_merge = ConcatLayer([l_lstm_up, l_lstm_down])
l_rshp1 = ReshapeLayer(l_merge, (-1, 2 * num_states))
l_dense = DenseLayer(l_rshp1, 1, W=HeNormal(gain='relu'), nonlinearity=elu)
l_rshp2 = ReshapeLayer(l_dense, (-1, 20))
l_y = NonlinearityLayer(l_rshp2, softmax)

prediction = get_output(l_y)

loss = T.mean(categorical_crossentropy(prediction, y))
accuracy = T.mean(categorical_accuracy(prediction, y))

params = get_all_params(l_y, trainable=True)
updates = adam(loss, params, learning_rate=3e-4)

print "... compiling"
train_fn = theano.function(inputs=[X, y], outputs=loss, updates=updates)
val_fn = theano.function(inputs=[X, y], outputs=[loss, accuracy])
op_fn = theano.function([X], outputs=prediction)

meta_data = {}
meta_data["n_iter"] = 50000
meta_data["num_output"] = 20

meta_data, params = train(train_fn,
                          val_fn,
#
# Define and compile theano functions
#
print "Defining and compiling training functions"

prediction = lasagne.layers.get_output(simple_net_output[0])
loss = categorical_crossentropy(prediction, target_var)
loss = loss.mean()

if weight_decay > 0:
    weightsl2 = regularize_network_params(simple_net_output,
                                          lasagne.regularization.l2)
    loss += weight_decay * weightsl2

train_acc = T.mean(categorical_accuracy(prediction, target_var))

params = lasagne.layers.get_all_params(simple_net_output, trainable=True)
updates = lasagne.updates.adam(loss, params, learning_rate=learn_step)

train_fn = theano.function([input_var, target_var], [loss, train_acc],
                           updates=updates)

print "Done"

# In[11]:

print "Defining and compiling valid functions"
valid_prediction = lasagne.layers.get_output(simple_net_output[0],
                                             deterministic=True)
valid_loss = categorical_crossentropy(valid_prediction, target_var)
Exemple #22
0
def event_dr_classifier(args, input_var, target_var, wordEmbeddings, seqlen,
                        num_feats):

    print("Building model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen - kw + 1
    stride = 1

    #important context words as channels

    #CNN_sentence config
    filter_size = wordDim
    pool_size = seqlen - filter_size + 1

    input = InputLayer((None, seqlen, num_feats), input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input,
                         input_size=vocab_size,
                         output_size=wordDim,
                         W=wordEmbeddings.T)
    #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim)

    #print get_output_shape(emb)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats * wordDim))
    #print get_output_shape(reshape)

    conv1d = Conv1DLayer(reshape,
                         num_filters=num_filters,
                         filter_size=wordDim,
                         stride=1,
                         nonlinearity=tanh,
                         W=GlorotUniform())  #nOutputFrame = num_flters,
    #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1

    #print get_output_shape(conv1d)

    conv1d = DimshuffleLayer(conv1d, (0, 2, 1))

    #print get_output_shape(conv1d)

    pool_size = num_filters

    maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size)

    #print get_output_shape(maxpool)

    #forward = FlattenLayer(maxpool)

    #print get_output_shape(forward)

    hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=5, nonlinearity=softmax)

    prediction = get_output(network)

    loss = T.mean(categorical_crossentropy(prediction, target_var))
    lambda_val = 0.5 * 1e-4

    layers = {
        emb: lambda_val,
        conv1d: lambda_val,
        hid: lambda_val,
        network: lambda_val
    }
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty

    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"

    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(categorical_crossentropy(test_prediction, target_var))

    train_fn = theano.function([input_var, target_var],
                               loss,
                               updates=updates,
                               allow_input_downcast=True)

    test_acc = T.mean(categorical_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc],
                             allow_input_downcast=True)

    return train_fn, val_fn, network
Exemple #23
0
def main():
    args = build_parser().parse_args()

    assert args.num_individuals >= 1, (
        'Must have at least one member in ensemble'
    )
    assert args.max_epochs >= 1, (
        'Must have at least 1 epoch.'
    )

    assert args.base_power >= 0, (
        'Cannot have fractional filters!'
    )

    import lasagne
    np.random.seed(args.seed)
    lasagne.random.set_rng(np.random.RandomState(args.seed))
    experiment_timestamp = str(time.time()).replace('.', '-')
    experiment_path = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        'experiments',
        experiment_timestamp
    )
    if os.path.exists(experiment_path):
        print('Experiment directory exists!')
        sys.exit(1)
    else:
        os.makedirs(experiment_path)

    # Save the commit hash used for these experiments.
    commit_hash = str(
        subprocess.check_output(['git', 'rev-parse', 'HEAD']),
        'utf-8'
    )
    commit_file_path = os.path.join(experiment_path, 'exp_commit.txt')
    with open(commit_file_path, 'w') as fd:
        fd.write('\n'.join((__file__, commit_hash)))

    args_file_path = os.path.join(experiment_path, 'provided_args.json')
    with open(args_file_path, 'w') as fd:
        json.dump(vars(args), fd, indent=4)

    # Initial dataset setup
    dataset_mean = load_mean(args.mean_path)
    X, y = load_data(
        args.dataset_directory, dataset_mean,
        mean_normalise=True, four_dim=True)

    train_X, train_y, val_X, val_y = train_val_split(X, y)

    print(
        'Train X shape: {}\ttrain y shape: {}'
        'Test X shape: {}\tTest y shape: {}'
        ''.format(*(mat.shape for mat in (train_X, train_y, val_X, val_y)))
    )

    # Network setup
    input_var = T.tensor4('input', dtype=theano.config.floatX)
    target = T.vector('target', dtype='int32')

    for lbd_val in np.linspace(0., 1., 6):
        path_for_lambda = os.path.join(
            experiment_path, '{:.2f}'.format(lbd_val))
        os.makedirs(path_for_lambda)
        print('Lambda of {:.2f}'.format(lbd_val))
        for num_individuals in range(1, args.num_individuals + 1):
            network_kwargs = {
                'input_var': input_var,
                'base_power': args.base_power,
                'num_individuals': num_individuals
            }
            model = NCEnsemble(**network_kwargs)

            network = model.network
            prediction = get_output(network['output'])

            # TODO Remove hardcoded number of classes.
            loss = model.get_loss(
                target,
                np.array(lbd_val, dtype=theano.config.floatX),
                10
            ).mean()
            accuracy = np.array(100., dtype=theano.config.floatX) * (
                categorical_accuracy(prediction, target).mean())

            params = get_all_params(network['output'], trainable=True)
            updates = adam(loss, params)

            print('Starting theano function compliation')
            train_function = theano.function(
                [input_var, target],
                loss,
                updates=updates
            )
            loss_function = theano.function(
                [input_var, target],
                loss
            )
            accuracy_function = theano.function(
                [input_var, target],
                accuracy
            )
            print('Finished theano function compliation')

            train_network = make_training_function(
                train_function, loss_function,
                accuracy_function, network,
                val_X, val_y,
                args.max_epochs,
                args.early_stopping_epochs
            )

            (
                best_params, training_losses,
                validation_losses, validation_accuracies
            ) = train_network(
                train_X, train_y, True, False)

            ensemble_accuracy = validation_accuracies[
                np.argmin(validation_losses)]

            print(
                'Ensemble at {:.2f}% with {} members'
                ''.format(ensemble_accuracy, num_individuals)
            )
            print()
            sys.stdout.flush()

            member_path = os.path.join(
                path_for_lambda, 'ensemble_{}'.format(num_individuals))
            os.makedirs(member_path)
            stats = {
                'training_losses': training_losses,
                'validation_losses': validation_losses,
                'validation_accuracies': validation_accuracies
            }
            stats_path = os.path.join(member_path, 'train_stats.json')
            with open(stats_path, 'w') as fd:
                json.dump(stats, fd, indent=4)
            model_save_path = os.path.join(member_path, 'model.npz')
            np.savez(
                model_save_path,
                *get_all_param_values(model.final_layer)
            )
            model_hash = md5(model_save_path)
            model_hash_path = os.path.join(member_path, 'model_hash.txt')
            with open(model_hash_path, 'w') as fd:
                fd.write(model_hash + '\n')
Exemple #24
0
def model_class(ds, paths, param_arch, param_cost, param_updates, param_train):

    # create a log file containing the architecture configuration
    formatter = logging.Formatter('%(message)s')
    logger = logging.getLogger('log_config')
    if 'start_from_epoch' in param_train:
        name_tmp = 'config_from_epoch=%04d.log' % (
            param_train['start_from_epoch'])
    else:
        name_tmp = 'config.log'
    path_tmp = os.path.join(paths['exp'], name_tmp)
    if not os.path.isfile(path_tmp):
        handler = logging.FileHandler(
            path_tmp,
            mode='w')  # to append at the end of the file use: mode='a'
    else:
        raise Exception('[e] the log file ', name_tmp, ' already exists!')
    handler.setFormatter(formatter)
    handler.setLevel(logging.INFO)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)

    # input dimensions
    dim_desc = ds.descs_train[0].shape[1]
    dim_labels = ds.labels_train[0].shape[0]
    print(dim_labels)

    # architecture definition:
    print(("[i] architecture definition... "), end=' ')
    tic = time.time()
    if param_arch['type'] == 0:
        desc, patch_op, cla, net, logger = arch_class_00(
            dim_desc, dim_labels, param_arch, logger)
    elif param_arch['type'] == 1:
        desc, patch_op, cla, net, logger = arch_class_01(
            dim_desc, dim_labels, param_arch, logger)
    elif param_arch['type'] == 2:
        desc, patch_op, cla, net, logger = arch_class_02(
            dim_desc, dim_labels, param_arch, logger)
    else:
        raise Exception('[e] architecture not supported!')
    print(("%02.2fs" % (time.time() - tic)))

    # cost function definition:
    print(("[i] cost function definition... "), end=' ')
    tic = time.time()
    pred = LL.get_output(cla, deterministic=True)  # in case we use dropout
    feat = LL.get_output(net)
    target = T.ivector('target')
    # data term
    if param_cost['cost_func'] == 'cross_entropy':
        if param_arch['non_linearity'] == 'softmax':
            cost_dataterm = T.mean(
                LO.categorical_crossentropy(pred, target)
            )  # in the original code we were using *.mean() instead of T.mean(*)
        elif param_arch['non_linearity'] == 'log_softmax':
            cost_dataterm = T.mean(
                categorical_crossentropy_logdomain(pred, target))
    elif param_cost['cost_func'] == 'cross_entropy_stable':
        if param_arch['non_linearity'] == 'softmax':
            cost_dataterm = T.mean(
                categorical_crossentropy_stable(pred, target))
        else:
            raise Exception(
                '[e] the chosen cost function is not implemented for the chosen non-linearity!'
            )
    else:
        raise Exception('[e] the chosen cost function is not supported!')
    # classification accuracy
    acc = LO.categorical_accuracy(pred, target).mean()
    # regularization
    cost_reg = param_cost['mu'] * LR.regularize_network_params(cla, LR.l2)
    # cost function
    cost = cost_dataterm + cost_reg
    # get params
    params = LL.get_all_params(cla)
    # gradient definition
    grad = T.grad(cost, params)
    grad_norm = T.nlinalg.norm(T.concatenate([g.flatten() for g in grad]), 2)
    print(("%02.2fs" % (time.time() - tic)))

    # updates definition:
    print(("[i] gradient updates definition... "), end=' ')
    tic = time.time()
    if param_updates['method'] == 'momentum':
        if param_updates.get('learning_rate') is not None:
            learning_rate = param_updates['learning_rate']  # default: 1.0
        else:
            raise Exception('[e] missing learning_rate parameter!')
        if param_updates.get('momentum') is not None:
            momentum = param_updates['momentum']  # default: 0.9
        else:
            raise Exception('[e] missing learning_rate parameter!')
        updates = LU.momentum(grad, params, learning_rate, momentum)
    elif param_updates['method'] == 'adagrad':
        if param_updates.get('learning_rate') is not None:
            learning_rate = param_updates['learning_rate']  # default: 1.0
        else:
            raise Exception('[e] missing learning_rate parameter!')
        updates = LU.adagrad(grad, params, learning_rate)
    elif param_updates['method'] == 'adadelta':
        if param_updates.get('learning_rate') is not None:
            learning_rate = param_updates['learning_rate']  # default: 1.0
        else:
            raise Exception('[e] missing learning_rate parameter!')
        updates = LU.adadelta(grad, params, learning_rate)
    elif param_updates['method'] == 'adam':
        if param_updates.get('learning_rate') is not None:
            learning_rate = param_updates['learning_rate']  # default: 1e-03
        else:
            raise Exception('[e] missing learning_rate parameter!')
        if param_updates.get('beta1') is not None:
            beta1 = param_updates['beta1']  # default: 0.9
        else:
            raise Exception('[e] missing beta1 parameter!')
        if param_updates.get('beta2') is not None:
            beta2 = param_updates['beta2']  # default: 0.999
        else:
            raise Exception('[e] missing beta2 parameter!')
        if param_updates.get('epsilon') is not None:
            epsilon = param_updates['epsilon']  # default: 1e-08
        else:
            raise Exception('[e] missing epsilon parameter!')
        updates = LU.adam(grad, params, learning_rate, beta1, beta2, epsilon)
    else:
        raise Exception('[e] updates method not supported!')
    print(("%02.2fs" % (time.time() - tic)))

    # train / test functions:
    funcs = dict()
    print(("[i] compiling function 'train'... "), end=' ')
    tic = time.time()
    funcs['train'] = theano.function(
        [desc.input_var, patch_op.input_var, target],
        [cost, cost_dataterm, cost_reg, grad_norm, acc],
        updates=updates,
        allow_input_downcast=True,
        on_unused_input='warn')
    print(("%02.2fs" % (time.time() - tic)))
    print(("[i] compiling function 'fwd'... "), end=' ')
    tic = time.time()
    funcs['fwd'] = theano.function(
        [desc.input_var, patch_op.input_var, target], [cost, grad_norm, acc],
        allow_input_downcast=True,
        on_unused_input='ignore')
    print(("%02.2fs" % (time.time() - tic)))
    print(("[i] compiling function 'pred'... "), end=' ')
    tic = time.time()
    funcs['pred'] = theano.function(
        [desc.input_var, patch_op.input_var, target], [pred],
        allow_input_downcast=True,
        on_unused_input='ignore')
    print(("%02.2fs" % (time.time() - tic)))
    print(("[i] compiling function 'feat'... "), end=' ')
    tic = time.time()
    funcs['feat'] = theano.function(
        [desc.input_var, patch_op.input_var, target], [feat],
        allow_input_downcast=True,
        on_unused_input='ignore')
    print(("%02.2fs" % (time.time() - tic)))

    # save cost function parameters to a config file
    logger.info('\nCost function parameters:')
    logger.info('   cost function = %s' % param_cost['cost_func'])
    logger.info('   mu            = %e' % param_cost['mu'])

    # save updates parameters to a config file
    logger.info('\nUpdates parameters:')
    logger.info('   method        = %s' % param_updates['method'])
    logger.info('   learning rate = %e' % param_updates['learning_rate'])
    if param_updates['method'] == 'momentum':
        logger.info('   momentum      = %e' % param_updates['momentum'])
    if param_updates['method'] == 'adam':
        logger.info('   beta1         = %e' % param_updates['beta1'])
        logger.info('   beta2         = %e' % param_updates['beta2'])
        logger.info('   epsilon       = %e' % param_updates['epsilon'])

    # save training parameters to a config file
    logger.info('\nTraining parameters:')
    logger.info('   epoch size = %d' % ds.epoch_size)

    return funcs, cla, updates
Exemple #25
0
def main():
    setup_train_experiment(logger, FLAGS, "%(model)s_at")

    logger.info("Loading data...")
    data = mnist_load(FLAGS.train_size, FLAGS.seed)
    X_train, y_train = data.X_train, data.y_train
    X_val, y_val = data.X_val, data.y_val
    X_test, y_test = data.X_test, data.y_test

    img_shape = [None, 1, 28, 28]
    train_images = T.tensor4('train_images')
    train_labels = T.lvector('train_labels')
    val_images = T.tensor4('valid_labels')
    val_labels = T.lvector('valid_labels')

    layer_dims = [int(dim) for dim in FLAGS.layer_dims.split("-")]
    num_classes = layer_dims[-1]
    net = create_network(FLAGS.model, img_shape, layer_dims=layer_dims)
    model = with_end_points(net)

    train_outputs = model(train_images)
    val_outputs = model(val_images, deterministic=True)

    # losses
    train_ce = categorical_crossentropy(train_outputs['prob'],
                                        train_labels).mean()
    train_at = adversarial_training(lambda x: model(x)['prob'],
                                    train_images,
                                    train_labels,
                                    epsilon=FLAGS.epsilon).mean()
    train_loss = train_ce + FLAGS.lmbd * train_at
    val_ce = categorical_crossentropy(val_outputs['prob'], val_labels).mean()
    val_deepfool_images = deepfool(
        lambda x: model(x, deterministic=True)['logits'],
        val_images,
        val_labels,
        num_classes,
        max_iter=FLAGS.deepfool_iter,
        clip_dist=FLAGS.deepfool_clip,
        over_shoot=FLAGS.deepfool_overshoot)

    # metrics
    train_acc = categorical_accuracy(train_outputs['logits'],
                                     train_labels).mean()
    train_err = 1.0 - train_acc
    val_acc = categorical_accuracy(val_outputs['logits'], val_labels).mean()
    val_err = 1.0 - val_acc
    # deepfool robustness
    reduc_ind = range(1, train_images.ndim)
    l2_deepfool = (val_deepfool_images - val_images).norm(2, axis=reduc_ind)
    l2_deepfool_norm = l2_deepfool / val_images.norm(2, axis=reduc_ind)

    train_metrics = OrderedDict([('loss', train_loss), ('nll', train_ce),
                                 ('at', train_at), ('err', train_err)])
    val_metrics = OrderedDict([('nll', val_ce), ('err', val_err)])
    summary_metrics = OrderedDict([('l2', l2_deepfool.mean()),
                                   ('l2_norm', l2_deepfool_norm.mean())])

    lr = theano.shared(floatX(FLAGS.initial_learning_rate), 'learning_rate')
    train_params = get_all_params(net, trainable=True)
    train_updates = adam(train_loss, train_params, lr)

    logger.info("Compiling theano functions...")
    train_fn = theano.function([train_images, train_labels],
                               outputs=train_metrics.values(),
                               updates=train_updates)
    val_fn = theano.function([val_images, val_labels],
                             outputs=val_metrics.values())
    summary_fn = theano.function([val_images, val_labels],
                                 outputs=summary_metrics.values() +
                                 [val_deepfool_images])

    logger.info("Starting training...")
    try:
        samples_per_class = FLAGS.summary_samples_per_class
        summary_images, summary_labels = select_balanced_subset(
            X_val, y_val, num_classes, samples_per_class)
        save_path = os.path.join(FLAGS.samples_dir, 'orig.png')
        save_images(summary_images, save_path)

        epoch = 0
        batch_index = 0
        while epoch < FLAGS.num_epochs:
            epoch += 1

            start_time = time.time()
            train_iterator = batch_iterator(X_train,
                                            y_train,
                                            FLAGS.batch_size,
                                            shuffle=True)
            epoch_outputs = np.zeros(len(train_fn.outputs))
            for batch_index, (images,
                              labels) in enumerate(train_iterator,
                                                   batch_index + 1):
                batch_outputs = train_fn(images, labels)
                epoch_outputs += batch_outputs
            epoch_outputs /= X_train.shape[0] // FLAGS.batch_size
            logger.info(
                build_result_str(
                    "Train epoch [{}, {:.2f}s]:".format(
                        epoch,
                        time.time() - start_time), train_metrics.keys(),
                    epoch_outputs))

            # update learning rate
            if epoch > FLAGS.start_learning_rate_decay:
                new_lr_value = lr.get_value(
                ) * FLAGS.learning_rate_decay_factor
                lr.set_value(floatX(new_lr_value))
                logger.debug("learning rate was changed to {:.10f}".format(
                    new_lr_value))

            # validation
            start_time = time.time()
            val_iterator = batch_iterator(X_val,
                                          y_val,
                                          FLAGS.test_batch_size,
                                          shuffle=False)
            val_epoch_outputs = np.zeros(len(val_fn.outputs))
            for images, labels in val_iterator:
                val_epoch_outputs += val_fn(images, labels)
            val_epoch_outputs /= X_val.shape[0] // FLAGS.test_batch_size
            logger.info(
                build_result_str(
                    "Test epoch [{}, {:.2f}s]:".format(
                        epoch,
                        time.time() - start_time), val_metrics.keys(),
                    val_epoch_outputs))

            if epoch % FLAGS.summary_frequency == 0:
                summary = summary_fn(summary_images, summary_labels)
                logger.info(
                    build_result_str(
                        "Epoch [{}] adversarial statistics:".format(epoch),
                        summary_metrics.keys(), summary[:-1]))
                save_path = os.path.join(FLAGS.samples_dir,
                                         'epoch-%d.png' % epoch)
                df_images = summary[-1]
                save_images(df_images, save_path)

            if epoch % FLAGS.checkpoint_frequency == 0:
                save_network(net, epoch=epoch)
    except KeyboardInterrupt:
        logger.debug("Keyboard interrupt. Stopping training...")
    finally:
        save_network(net)

    # evaluate final model on test set
    test_iterator = batch_iterator(X_test,
                                   y_test,
                                   FLAGS.test_batch_size,
                                   shuffle=False)
    test_results = np.zeros(len(val_fn.outputs))
    for images, labels in test_iterator:
        test_results += val_fn(images, labels)
    test_results /= X_test.shape[0] // FLAGS.test_batch_size
    logger.info(
        build_result_str("Final test results:", val_metrics.keys(),
                         test_results))
def multi_task_classifier(args,
                          input_var,
                          target_var,
                          wordEmbeddings,
                          seqlen,
                          num_feats,
                          lambda_val=0.5 * 1e-4):

    print("Building multi task model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen - kw + 1
    stride = 1
    filter_size = wordDim
    pool_size = num_filters

    input = InputLayer((None, seqlen, num_feats), input_var=input_var)
    batchsize, _, _ = input.input_var.shape

    #span
    emb1 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape1 = ReshapeLayer(emb1, (batchsize, seqlen, num_feats * wordDim))
    conv1d_1 = DimshuffleLayer(
        Conv1DLayer(reshape1,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size)
    hid_1 = DenseLayer(maxpool_1,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax)
    """
    #DocTimeRel
    emb2 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape2 = ReshapeLayer(emb2, (batchsize, seqlen, num_feats*wordDim))
    conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape2, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size)  
    hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_2 = DenseLayer(hid_2, num_units=5, nonlinearity=softmax)
    """

    #Type
    emb3 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape3 = ReshapeLayer(emb3, (batchsize, seqlen, num_feats * wordDim))
    conv1d_3 = DimshuffleLayer(
        Conv1DLayer(reshape3,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size)
    hid_3 = DenseLayer(maxpool_3,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_3 = DenseLayer(hid_3, num_units=4, nonlinearity=softmax)

    #Degree
    emb4 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape4 = ReshapeLayer(emb4, (batchsize, seqlen, num_feats * wordDim))
    conv1d_4 = DimshuffleLayer(
        Conv1DLayer(reshape4,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size)
    hid_4 = DenseLayer(maxpool_4,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_4 = DenseLayer(hid_4, num_units=4, nonlinearity=softmax)

    #Polarity
    emb5 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape5 = ReshapeLayer(emb5, (batchsize, seqlen, num_feats * wordDim))
    conv1d_5 = DimshuffleLayer(
        Conv1DLayer(reshape5,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size)
    hid_5 = DenseLayer(maxpool_5,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_5 = DenseLayer(hid_5, num_units=3, nonlinearity=softmax)

    #ContextualModality
    emb6 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape6 = ReshapeLayer(emb6, (batchsize, seqlen, num_feats * wordDim))
    conv1d_6 = DimshuffleLayer(
        Conv1DLayer(reshape6,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size)
    hid_6 = DenseLayer(maxpool_6,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_6 = DenseLayer(hid_6, num_units=5, nonlinearity=softmax)
    """
    #ContextualAspect
    emb7 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape7 = ReshapeLayer(emb7, (batchsize, seqlen, num_feats*wordDim))
    conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape7, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size)  
    hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_7 = DenseLayer(hid_7, num_units=4, nonlinearity=softmax)
    """
    """
    #Permanence
    emb8 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape8 = ReshapeLayer(emb8, (batchsize, seqlen, num_feats*wordDim))
    conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape8, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size)  
    hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_8 = DenseLayer(hid_8, num_units=4, nonlinearity=softmax)
    """

    # Is this important?
    """
    network_1_out, network_2_out, network_3_out, network_4_out, \
    network_5_out, network_6_out, network_7_out, network_8_out = \
    get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8])
    """
    network_1_out = get_output(network_1)
    network_3_out = get_output(network_3)
    network_4_out = get_output(network_4)
    network_5_out = get_output(network_5)
    network_6_out = get_output(network_6)

    loss_1 = T.mean(binary_crossentropy(
        network_1_out, target_var)) + regularize_layer_params_weighted(
            {
                emb1: lambda_val,
                conv1d_1: lambda_val,
                hid_1: lambda_val,
                network_1: lambda_val
            }, l2)
    updates_1 = adagrad(loss_1,
                        get_all_params(network_1, trainable=True),
                        learning_rate=args.step)
    train_fn_1 = theano.function([input_var, target_var],
                                 loss_1,
                                 updates=updates_1,
                                 allow_input_downcast=True)
    val_acc_1 = T.mean(
        binary_accuracy(get_output(network_1, deterministic=True), target_var))
    val_fn_1 = theano.function([input_var, target_var],
                               val_acc_1,
                               allow_input_downcast=True)
    """
    loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb2:lambda_val, conv1d_2:lambda_val, 
                hid_2:lambda_val, network_2:lambda_val} , l2)
    updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step)
    train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True)
    val_acc_2 =  T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var))
    val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True)
    """

    loss_3 = T.mean(categorical_crossentropy(
        network_3_out, target_var)) + regularize_layer_params_weighted(
            {
                emb3: lambda_val,
                conv1d_3: lambda_val,
                hid_3: lambda_val,
                network_3: lambda_val
            }, l2)
    updates_3 = adagrad(loss_3,
                        get_all_params(network_3, trainable=True),
                        learning_rate=args.step)
    train_fn_3 = theano.function([input_var, target_var],
                                 loss_3,
                                 updates=updates_3,
                                 allow_input_downcast=True)
    val_acc_3 = T.mean(
        categorical_accuracy(get_output(network_3, deterministic=True),
                             target_var))
    val_fn_3 = theano.function([input_var, target_var],
                               val_acc_3,
                               allow_input_downcast=True)

    loss_4 = T.mean(categorical_crossentropy(
        network_4_out, target_var)) + regularize_layer_params_weighted(
            {
                emb4: lambda_val,
                conv1d_4: lambda_val,
                hid_4: lambda_val,
                network_4: lambda_val
            }, l2)
    updates_4 = adagrad(loss_4,
                        get_all_params(network_4, trainable=True),
                        learning_rate=args.step)
    train_fn_4 = theano.function([input_var, target_var],
                                 loss_4,
                                 updates=updates_4,
                                 allow_input_downcast=True)
    val_acc_4 = T.mean(
        categorical_accuracy(get_output(network_4, deterministic=True),
                             target_var))
    val_fn_4 = theano.function([input_var, target_var],
                               val_acc_4,
                               allow_input_downcast=True)

    loss_5 = T.mean(categorical_crossentropy(
        network_5_out, target_var)) + regularize_layer_params_weighted(
            {
                emb5: lambda_val,
                conv1d_5: lambda_val,
                hid_5: lambda_val,
                network_5: lambda_val
            }, l2)
    updates_5 = adagrad(loss_5,
                        get_all_params(network_5, trainable=True),
                        learning_rate=args.step)
    train_fn_5 = theano.function([input_var, target_var],
                                 loss_5,
                                 updates=updates_5,
                                 allow_input_downcast=True)
    val_acc_5 = T.mean(
        categorical_accuracy(get_output(network_5, deterministic=True),
                             target_var))
    val_fn_5 = theano.function([input_var, target_var],
                               val_acc_5,
                               allow_input_downcast=True)

    loss_6 = T.mean(categorical_crossentropy(
        network_6_out, target_var)) + regularize_layer_params_weighted(
            {
                emb6: lambda_val,
                conv1d_6: lambda_val,
                hid_6: lambda_val,
                network_6: lambda_val
            }, l2)
    updates_6 = adagrad(loss_6,
                        get_all_params(network_6, trainable=True),
                        learning_rate=args.step)
    train_fn_6 = theano.function([input_var, target_var],
                                 loss_6,
                                 updates=updates_6,
                                 allow_input_downcast=True)
    val_acc_6 = T.mean(
        categorical_accuracy(get_output(network_6, deterministic=True),
                             target_var))
    val_fn_6 = theano.function([input_var, target_var],
                               val_acc_6,
                               allow_input_downcast=True)
    """
    loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb7:lambda_val, conv1d_7:lambda_val, 
                hid_7:lambda_val, network_7:lambda_val} , l2)
    updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step)
    train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True)
    val_acc_7 =  T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var))
    val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True)

    loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb8:lambda_val, conv1d_8:lambda_val, 
                hid_8:lambda_val, network_8:lambda_val} , l2)
    updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step)
    train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True)
    val_acc_8 =  T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var))
    val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True)
    """
    """
    return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8
    """
    return train_fn_1, val_fn_1, network_1, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6
def main():
    args = build_parser().parse_args()

    assert args.num_individuals >= 1, (
        'Must have at least one member in ensemble')
    assert args.max_epochs >= 1, ('Must have at least 1 epoch.')

    assert args.base_power >= 0, ('Cannot have fractional filters!')

    np.random.seed(args.seed)
    import lasagne
    lasagne.random.set_rng(np.random.RandomState(args.seed))
    experiment_timestamp = str(time.time()).replace('.', '-')
    experiment_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                   'experiments', experiment_timestamp)
    if os.path.exists(experiment_path):
        print('Experiment directory exists!')
        sys.exit(1)
    else:
        os.makedirs(experiment_path)

    # Save the commit hash used for these experiments.
    commit_hash = str(subprocess.check_output(['git', 'rev-parse', 'HEAD']),
                      'utf-8')
    commit_file_path = os.path.join(experiment_path, 'exp_commit.txt')
    with open(commit_file_path, 'w') as fd:
        fd.write(commit_hash)

    args_file_path = os.path.join(experiment_path, 'provided_args.json')
    with open(args_file_path, 'w') as fd:
        json.dump(vars(args), fd, indent=4)

    # Initial dataset setup
    dataset_mean = load_mean(args.mean_path)
    X, y = load_data(args.dataset_directory,
                     dataset_mean,
                     mean_normalise=True,
                     four_dim=True)

    train_X, train_y, val_X, val_y = train_val_split(X, y)

    print('Train X shape: {}\ttrain y shape: {}'
          'Test X shape: {}\tTest y shape: {}'
          ''.format(*(mat.shape for mat in (train_X, train_y, val_X, val_y))))

    # Network setup
    input_var = T.tensor4('input', dtype=theano.config.floatX)
    target = T.vector('target', dtype='int32')

    network_kwargs = {'input_var': input_var, 'base_power': args.base_power}
    model = MiniVGG(**network_kwargs)
    model.pretty_print_network()

    network = model.network
    prediction = get_output(network['output'])
    loss = categorical_crossentropy(prediction, target).mean()
    accuracy = np.array(100., dtype=theano.config.floatX) * (
        categorical_accuracy(prediction, target).mean())

    params = get_all_params(network['output'], trainable=True)
    updates = adam(loss, params)

    print('Starting theano function compliation')
    train_function = theano.function([input_var, target],
                                     loss,
                                     updates=updates)
    loss_function = theano.function([input_var, target], loss)
    accuracy_function = theano.function([input_var, target], accuracy)
    pred_function = theano.function([input_var], prediction)
    print('Finished theano function compliation')
    ensemble_prediction = make_ens_predictor(network, pred_function, val_X,
                                             val_y)
    train_network = make_training_function(train_function, loss_function,
                                           accuracy_function, network, val_X,
                                           val_y, args.max_epochs,
                                           args.early_stopping_epochs)

    # Setup bootstraps
    initialisations = get_k_network_initialisations(args.num_individuals,
                                                    input_var=input_var,
                                                    base_power=args.base_power)
    bootstraps = [
        get_bootstrap(train_X, train_y) for _ in range(args.num_individuals)
    ]
    ensembles = zip(initialisations, bootstraps)

    # Train models
    trained_parameters = []
    for index, (initialisation, bootstrap) in enumerate(ensembles):
        (best_params, training_losses, validation_losses,
         validation_accuracies) = train_network(*bootstrap, initialisation,
                                                True, False)
        trained_parameters.append(best_params)

        max_accuracy = validation_accuracies[np.argmin(validation_losses)]
        ensemble_accuracy = ensemble_prediction(trained_parameters)

        print('New member at {:.2f}% validation accuracy'.format(max_accuracy))
        print('Ensemble at {:.2f}% with {} members'
              ''.format(ensemble_accuracy, len(trained_parameters)))
        print()
        sys.stdout.flush()

        member_path = os.path.join(experiment_path, 'model_{}'.format(index))
        os.makedirs(member_path)
        stats = {
            'training_losses': training_losses,
            'validation_losses': validation_losses,
            'validation_accuracies': validation_accuracies
        }
        with open(os.path.join(member_path, 'train_stats.json'), 'w') as fd:
            json.dump(stats, fd, indent=4)
        model_save_path = os.path.join(member_path, 'model.npz')
        np.savez(model_save_path, *get_all_param_values(model.final_layer))
        model_hash = md5(model_save_path)
        model_hash_path = os.path.join(member_path, 'model_hash.txt')
        with open(model_hash_path, 'w') as fd:
            fd.write(model_hash + '\n')

    ensemble_accuracies = {}
    for num_models in range(1, args.num_individuals + 1):
        parameter_combinations = combinations(trained_parameters, num_models)
        validation_accuracies = [
            ensemble_prediction(parameter_combination)
            for parameter_combination in parameter_combinations
        ]
        ensemble_accuracies[num_models] = {
            'mean': np.mean(validation_accuracies),
            'std': np.std(validation_accuracies),
            'raw': validation_accuracies
        }
    results_path = os.path.join(experiment_path, 'results.json')
    with open(results_path, 'w') as fd:
        json.dump(ensemble_accuracies, fd, indent=4)
def multi_task_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats, lambda_val = 0.5 * 1e-4):

    print("Building multi task model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 
    filter_size=wordDim
    pool_size=num_filters

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))


    conv1d_1 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size)  
    hid_1 = DenseLayer(maxpool_1, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax)


    conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size)  
    hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_2 = DenseLayer(hid_2, num_units=4, nonlinearity=softmax)

    conv1d_3 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size)  
    hid_3 = DenseLayer(maxpool_3, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_3 = DenseLayer(hid_3, num_units=3, nonlinearity=softmax)

    conv1d_4 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size)  
    hid_4 = DenseLayer(maxpool_4, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_4 = DenseLayer(hid_4, num_units=3, nonlinearity=softmax)

    conv1d_5 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size)  
    hid_5 = DenseLayer(maxpool_5, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_5 = DenseLayer(hid_5, num_units=2, nonlinearity=softmax)

    conv1d_6 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size)  
    hid_6 = DenseLayer(maxpool_6, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_6 = DenseLayer(hid_6, num_units=4, nonlinearity=softmax)


    conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size)  
    hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_7 = DenseLayer(hid_7, num_units=3, nonlinearity=softmax)

    conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size)  
    hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_8 = DenseLayer(hid_8, num_units=3, nonlinearity=softmax)


    # Is this important?
    network_1_out, network_2_out, network_3_out, network_4_out, \
    network_5_out, network_6_out, network_7_out, network_8_out = \
    get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8])

    loss_1 = T.mean(binary_crossentropy(network_1_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_1:lambda_val, 
                hid_1:lambda_val, network_1:lambda_val} , l2)
    updates_1 = adagrad(loss_1, get_all_params(network_1, trainable=True), learning_rate=args.step)
    train_fn_1 = theano.function([input_var, target_var], loss_1, updates=updates_1, allow_input_downcast=True)
    val_acc_1 =  T.mean(binary_accuracy(get_output(network_1, deterministic=True), target_var))
    val_fn_1 = theano.function([input_var, target_var], val_acc_1, allow_input_downcast=True)


    loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_2:lambda_val, 
                hid_2:lambda_val, network_2:lambda_val} , l2)
    updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step)
    train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True)
    val_acc_2 =  T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var))
    val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True)


    loss_3 = T.mean(categorical_crossentropy(network_3_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_3:lambda_val, 
                hid_3:lambda_val, network_3:lambda_val} , l2)
    updates_3 = adagrad(loss_3, get_all_params(network_3, trainable=True), learning_rate=args.step)
    train_fn_3 = theano.function([input_var, target_var], loss_3, updates=updates_3, allow_input_downcast=True)
    val_acc_3 =  T.mean(categorical_accuracy(get_output(network_3, deterministic=True), target_var))
    val_fn_3 = theano.function([input_var, target_var], val_acc_3, allow_input_downcast=True)


    loss_4 = T.mean(categorical_crossentropy(network_4_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_4:lambda_val, 
                hid_4:lambda_val, network_4:lambda_val} , l2)
    updates_4 = adagrad(loss_4, get_all_params(network_4, trainable=True), learning_rate=args.step)
    train_fn_4 = theano.function([input_var, target_var], loss_4, updates=updates_4, allow_input_downcast=True)
    val_acc_4 =  T.mean(categorical_accuracy(get_output(network_4, deterministic=True), target_var))
    val_fn_4 = theano.function([input_var, target_var], val_acc_4, allow_input_downcast=True)

    loss_5 = T.mean(binary_crossentropy(network_5_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_5:lambda_val, 
                hid_5:lambda_val, network_5:lambda_val} , l2)
    updates_5 = adagrad(loss_5, get_all_params(network_5, trainable=True), learning_rate=args.step)
    train_fn_5 = theano.function([input_var, target_var], loss_5, updates=updates_5, allow_input_downcast=True)
    val_acc_5 =  T.mean(binary_accuracy(get_output(network_5, deterministic=True), target_var))
    val_fn_5 = theano.function([input_var, target_var], val_acc_5, allow_input_downcast=True)

    loss_6 = T.mean(categorical_crossentropy(network_6_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_6:lambda_val, 
                hid_6:lambda_val, network_6:lambda_val} , l2)
    updates_6 = adagrad(loss_6, get_all_params(network_6, trainable=True), learning_rate=args.step)
    train_fn_6 = theano.function([input_var, target_var], loss_6, updates=updates_6, allow_input_downcast=True)
    val_acc_6 =  T.mean(categorical_accuracy(get_output(network_6, deterministic=True), target_var))
    val_fn_6 = theano.function([input_var, target_var], val_acc_6, allow_input_downcast=True)

    loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_7:lambda_val, 
                hid_7:lambda_val, network_7:lambda_val} , l2)
    updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step)
    train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True)
    val_acc_7 =  T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var))
    val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True)

    loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_8:lambda_val, 
                hid_8:lambda_val, network_8:lambda_val} , l2)
    updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step)
    train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True)
    val_acc_8 =  T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var))
    val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True)


    return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8
def event_modality_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats):

    print("Building model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 

    filter_size=wordDim
    pool_size=seqlen-filter_size+1

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim)

    #print get_output_shape(emb)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))
    #print get_output_shape(reshape)

    conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()) #nOutputFrame = num_flters, 
                                            #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1

    #print get_output_shape(conv1d)

    conv1d = DimshuffleLayer(conv1d, (0,2,1))

    #print get_output_shape(conv1d)

    pool_size=num_filters

    maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) 

    #print get_output_shape(maxpool)
  
    #forward = FlattenLayer(maxpool) 

    #print get_output_shape(forward)
 
    hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=5, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, conv1d:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(categorical_crossentropy(test_prediction,target_var))

    train_fn = theano.function([input_var, target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(categorical_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
Exemple #30
0
def main():
    setup_experiment()

    data = mnist_load()
    X_test = data.X_test
    y_test = data.y_test
    if FLAGS.sort_labels:
        ys_indices = np.argsort(y_test)
        X_test = X_test[ys_indices]
        y_test = y_test[ys_indices]

    img_shape = [None, 1, 28, 28]
    test_images = T.tensor4('test_images')
    test_labels = T.lvector('test_labels')

    # loaded discriminator number of classes and dims
    layer_dims = [int(dim) for dim in FLAGS.layer_dims.split("-")]
    num_classes = layer_dims[-1]

    # create and load discriminator
    net = create_network(FLAGS.model, img_shape, layer_dims=layer_dims)
    load_network(net, epoch=FLAGS.load_epoch)
    model = with_end_points(net)

    test_outputs = model(test_images, deterministic=True)
    # deepfool images
    test_df_images = deepfool(lambda x: model(x, deterministic=True)['logits'],
                              test_images,
                              test_labels,
                              num_classes,
                              max_iter=FLAGS.deepfool_iter,
                              clip_dist=FLAGS.deepfool_clip,
                              over_shoot=FLAGS.deepfool_overshoot)
    test_df_images_all = deepfool(
        lambda x: model(x, deterministic=True)['logits'],
        test_images,
        num_classes=num_classes,
        max_iter=FLAGS.deepfool_iter,
        clip_dist=FLAGS.deepfool_clip,
        over_shoot=FLAGS.deepfool_overshoot)
    test_df_outputs = model(test_df_images, deterministic=True)
    # fast gradient sign images
    test_fgsm_images = test_images + fast_gradient_perturbation(
        test_images, test_outputs['logits'], test_labels, FLAGS.fgsm_epsilon)
    test_at_outputs = model(test_fgsm_images, deterministic=True)

    # test metrics
    test_acc = categorical_accuracy(test_outputs['logits'], test_labels).mean()
    test_err = 1 - test_acc
    test_fgsm_acc = categorical_accuracy(test_at_outputs['logits'],
                                         test_labels).mean()
    test_fgsm_err = 1 - test_fgsm_acc
    test_df_acc = categorical_accuracy(test_df_outputs['logits'],
                                       test_labels).mean()
    test_df_err = 1 - test_df_acc

    # adversarial noise statistics
    reduc_ind = range(1, test_images.ndim)
    test_l2_df = T.sqrt(
        T.sum((test_df_images - test_images)**2, axis=reduc_ind))
    test_l2_df_norm = test_l2_df / T.sqrt(T.sum(test_images**2,
                                                axis=reduc_ind))
    test_l2_df_skip = test_l2_df.sum() / T.sum(test_l2_df > 0)
    test_l2_df_skip_norm = test_l2_df_norm.sum() / T.sum(test_l2_df_norm > 0)
    test_l2_df_all = T.sqrt(
        T.sum((test_df_images_all - test_images)**2, axis=reduc_ind))
    test_l2_df_all_norm = test_l2_df_all / T.sqrt(
        T.sum(test_images**2, axis=reduc_ind))

    test_metrics = OrderedDict([('err', test_err), ('err_fgsm', test_fgsm_err),
                                ('err_df', test_df_err),
                                ('l2_df', test_l2_df.mean()),
                                ('l2_df_norm', test_l2_df_norm.mean()),
                                ('l2_df_skip', test_l2_df_skip),
                                ('l2_df_skip_norm', test_l2_df_skip_norm),
                                ('l2_df_all', test_l2_df_all.mean()),
                                ('l2_df_all_norm', test_l2_df_all_norm.mean())
                                ])
    logger.info("Compiling theano functions...")
    test_fn = theano.function([test_images, test_labels],
                              outputs=test_metrics.values())
    generate_fn = theano.function([test_images, test_labels],
                                  [test_df_images, test_df_images_all],
                                  on_unused_input='ignore')

    logger.info("Generate samples...")
    samples_per_class = 10
    summary_images, summary_labels = select_balanced_subset(
        X_test, y_test, num_classes, samples_per_class)
    save_path = os.path.join(FLAGS.samples_dir, 'orig.png')
    save_images(summary_images, save_path)
    df_images, df_images_all = generate_fn(summary_images, summary_labels)
    save_path = os.path.join(FLAGS.samples_dir, 'deepfool.png')
    save_images(df_images, save_path)
    save_path = os.path.join(FLAGS.samples_dir, 'deepfool_all.png')
    save_images(df_images_all, save_path)

    logger.info("Starting...")
    test_iterator = batch_iterator(X_test,
                                   y_test,
                                   FLAGS.batch_size,
                                   shuffle=False)
    test_results = np.zeros(len(test_fn.outputs))
    start_time = time.time()
    for batch_index, (images, labels) in enumerate(test_iterator, 1):
        batch_results = test_fn(images, labels)
        test_results += batch_results
        if batch_index % FLAGS.summary_frequency == 0:
            df_images, df_images_all = generate_fn(images, labels)
            save_path = os.path.join(FLAGS.samples_dir,
                                     'b%d-df.png' % batch_index)
            save_images(df_images, save_path)
            save_path = os.path.join(FLAGS.samples_dir,
                                     'b%d-df_all.png' % batch_index)
            save_images(df_images_all, save_path)
            logger.info(
                build_result_str(
                    "Batch [{}] adversarial statistics:".format(batch_index),
                    test_metrics.keys(), batch_results))
    test_results /= batch_index
    logger.info(
        build_result_str(
            "Test results [{:.2f}s]:".format(time.time() - start_time),
            test_metrics.keys(), test_results))
Exemple #31
0
    def __init__(self, config):
        self.clouds = T.tensor3(dtype='float32')
        self.norms = [
            T.tensor3(dtype='float32') for step in xrange(config['steps'])
        ]
        self.target = T.vector(dtype='int64')
        KDNet = {}
        if config['input_features'] == 'no':
            KDNet['input'] = InputLayer((None, 1, 2**config['steps']),
                                        input_var=self.clouds)
        else:
            KDNet['input'] = InputLayer((None, 3, 2**config['steps']),
                                        input_var=self.clouds)
        for i in xrange(config['steps']):
            KDNet['norm{}_r'.format(i + 1)] = InputLayer(
                (None, 3, 2**(config['steps'] - 1 - i)),
                input_var=self.norms[i])
            KDNet['norm{}_l'.format(i + 1)] = ExpressionLayer(
                KDNet['norm{}_r'.format(i + 1)], lambda X: -X)
            KDNet['norm{}_l_X-'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_l'.format(i + 1)], '-', 0, config['n_f'][i + 1])
            KDNet['norm{}_l_Y-'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_l'.format(i + 1)], '-', 1, config['n_f'][i + 1])
            KDNet['norm{}_l_Z-'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_l'.format(i + 1)], '-', 2, config['n_f'][i + 1])
            KDNet['norm{}_l_X+'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_l'.format(i + 1)], '+', 0, config['n_f'][i + 1])
            KDNet['norm{}_l_Y+'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_l'.format(i + 1)], '+', 1, config['n_f'][i + 1])
            KDNet['norm{}_l_Z+'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_l'.format(i + 1)], '+', 2, config['n_f'][i + 1])
            KDNet['norm{}_r_X-'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_r'.format(i + 1)], '-', 0, config['n_f'][i + 1])
            KDNet['norm{}_r_Y-'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_r'.format(i + 1)], '-', 1, config['n_f'][i + 1])
            KDNet['norm{}_r_Z-'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_r'.format(i + 1)], '-', 2, config['n_f'][i + 1])
            KDNet['norm{}_r_X+'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_r'.format(i + 1)], '+', 0, config['n_f'][i + 1])
            KDNet['norm{}_r_Y+'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_r'.format(i + 1)], '+', 1, config['n_f'][i + 1])
            KDNet['norm{}_r_Z+'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_r'.format(i + 1)], '+', 2, config['n_f'][i + 1])
            KDNet['cloud{}'.format(i+1)] = SharedDotLayer(KDNet['input'], config['n_f'][i]) if i == 0 else \
                                    ElemwiseSumLayer([KDNet['cloud{}_l_X-_masked'.format(i)],
                                                     KDNet['cloud{}_l_Y-_masked'.format(i)],
                                                     KDNet['cloud{}_l_Z-_masked'.format(i)],
                                                     KDNet['cloud{}_l_X+_masked'.format(i)],
                                                     KDNet['cloud{}_l_Y+_masked'.format(i)],
                                                     KDNet['cloud{}_l_Z+_masked'.format(i)],
                                                     KDNet['cloud{}_r_X-_masked'.format(i)],
                                                     KDNet['cloud{}_r_Y-_masked'.format(i)],
                                                     KDNet['cloud{}_r_Z-_masked'.format(i)],
                                                     KDNet['cloud{}_r_X+_masked'.format(i)],
                                                     KDNet['cloud{}_r_Y+_masked'.format(i)],
                                                     KDNet['cloud{}_r_Z+_masked'.format(i)]])
            KDNet['cloud{}_bn'.format(i + 1)] = BatchNormDNNLayer(
                KDNet['cloud{}'.format(i + 1)])
            KDNet['cloud{}_relu'.format(i + 1)] = NonlinearityLayer(
                KDNet['cloud{}_bn'.format(i + 1)], rectify)
            KDNet['cloud{}_r'.format(i + 1)] = ExpressionLayer(
                KDNet['cloud{}_relu'.format(i + 1)], lambda X: X[:, :, 1::2],
                (None, config['n_f'][i], 2**(config['steps'] - i - 1)))
            KDNet['cloud{}_l'.format(i + 1)] = ExpressionLayer(
                KDNet['cloud{}_relu'.format(i + 1)], lambda X: X[:, :, ::2],
                (None, config['n_f'][i], 2**(config['steps'] - i - 1)))

            KDNet['cloud{}_l_X-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1])
            KDNet['cloud{}_l_Y-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1])
            KDNet['cloud{}_l_Z-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1])
            KDNet['cloud{}_l_X+'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1])
            KDNet['cloud{}_l_Y+'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1])
            KDNet['cloud{}_l_Z+'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1])

            KDNet['cloud{}_r_X-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_X-'.format(i + 1)].W,
                b=KDNet['cloud{}_l_X-'.format(i + 1)].b)
            KDNet['cloud{}_r_X-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_X-'.format(i + 1)].W,
                b=KDNet['cloud{}_l_X-'.format(i + 1)].b)
            KDNet['cloud{}_r_Y-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_Y-'.format(i + 1)].W,
                b=KDNet['cloud{}_l_Y-'.format(i + 1)].b)
            KDNet['cloud{}_r_Z-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_Z-'.format(i + 1)].W,
                b=KDNet['cloud{}_l_Z-'.format(i + 1)].b)
            KDNet['cloud{}_r_X+'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_X+'.format(i + 1)].W,
                b=KDNet['cloud{}_l_X+'.format(i + 1)].b)
            KDNet['cloud{}_r_Y+'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_Y+'.format(i + 1)].W,
                b=KDNet['cloud{}_l_Y+'.format(i + 1)].b)
            KDNet['cloud{}_r_Z+'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_Z+'.format(i + 1)].W,
                b=KDNet['cloud{}_l_Z+'.format(i + 1)].b)

            KDNet['cloud{}_l_X-_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_l_X-'.format(i + 1)],
                KDNet['norm{}_l_X-'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_l_Y-_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_l_Y-'.format(i + 1)],
                KDNet['norm{}_l_Y-'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_l_Z-_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_l_Z-'.format(i + 1)],
                KDNet['norm{}_l_Z-'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_l_X+_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_l_X+'.format(i + 1)],
                KDNet['norm{}_l_X+'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_l_Y+_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_l_Y+'.format(i + 1)],
                KDNet['norm{}_l_Y+'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_l_Z+_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_l_Z+'.format(i + 1)],
                KDNet['norm{}_l_Z+'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_r_X-_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_r_X-'.format(i + 1)],
                KDNet['norm{}_r_X-'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_r_Y-_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_r_Y-'.format(i + 1)],
                KDNet['norm{}_r_Y-'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_r_Z-_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_r_Z-'.format(i + 1)],
                KDNet['norm{}_r_Z-'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_r_X+_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_r_X+'.format(i + 1)],
                KDNet['norm{}_r_X+'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_r_Y+_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_r_Y+'.format(i + 1)],
                KDNet['norm{}_r_Y+'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_r_Z+_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_r_Z+'.format(i + 1)],
                KDNet['norm{}_r_Z+'.format(i + 1)]
            ], T.mul)

        KDNet['cloud_fin'] = ElemwiseSumLayer([
            KDNet['cloud{}_l_X-_masked'.format(config['steps'])],
            KDNet['cloud{}_l_Y-_masked'.format(config['steps'])],
            KDNet['cloud{}_l_Z-_masked'.format(config['steps'])],
            KDNet['cloud{}_l_X+_masked'.format(config['steps'])],
            KDNet['cloud{}_l_Y+_masked'.format(config['steps'])],
            KDNet['cloud{}_l_Z+_masked'.format(config['steps'])],
            KDNet['cloud{}_r_X-_masked'.format(config['steps'])],
            KDNet['cloud{}_r_Y-_masked'.format(config['steps'])],
            KDNet['cloud{}_r_Z-_masked'.format(config['steps'])],
            KDNet['cloud{}_r_X+_masked'.format(config['steps'])],
            KDNet['cloud{}_r_Y+_masked'.format(config['steps'])],
            KDNet['cloud{}_r_Z+_masked'.format(config['steps'])]
        ])

        KDNet['cloud_fin_bn'] = BatchNormDNNLayer(KDNet['cloud_fin'])
        KDNet['cloud_fin_relu'] = NonlinearityLayer(KDNet['cloud_fin_bn'],
                                                    rectify)
        KDNet['cloud_fin_reshape'] = ReshapeLayer(KDNet['cloud_fin_relu'],
                                                  (-1, config['n_f'][-1]))
        KDNet['output'] = DenseLayer(KDNet['cloud_fin_reshape'],
                                     config['num_classes'],
                                     nonlinearity=softmax)

        prob = get_output(KDNet['output'])
        prob_det = get_output(KDNet['output'], deterministic=True)

        weights = get_all_params(KDNet['output'], trainable=True)
        l2_pen = regularize_network_params(KDNet['output'], l2)

        loss = categorical_crossentropy(
            prob, self.target).mean() + config['l2'] * l2_pen
        accuracy = categorical_accuracy(prob, self.target).mean()

        lr = theano.shared(np.float32(config['learning_rate']))
        updates = adam(loss, weights, learning_rate=lr)

        self.train_fun = theano.function([self.clouds] + self.norms +
                                         [self.target], [loss, accuracy],
                                         updates=updates)
        self.prob_fun = theano.function([self.clouds] + self.norms +
                                        [self.target], [loss, prob_det])

        self.KDNet = KDNet
Exemple #32
0
patch_op = LL.InputLayer(input_var=Tsp.csc_fmatrix('patch_op'),
                         shape=(None, None))

ffn = get_model(inp, patch_op)

# L.layers.get_output -> theano variable representing network
output = LL.get_output(ffn)
pred = LL.get_output(ffn, deterministic=True)  # in case we use dropout

# target theano variable indicatind the index a vertex should be mapped to wrt the latent space
target = T.ivector('idxs')

# to work with logit predictions, better behaved numerically
cla = utils_lasagne.categorical_crossentropy_logdomain(output, target,
                                                       nclasses).mean()
acc = LO.categorical_accuracy(pred, target).mean()

# a bit of regularization is commonly used
regL2 = L.regularization.regularize_network_params(ffn, L.regularization.l2)

cost = cla + l2_weight * regL2
''' Define the update rule, how to train '''

params = LL.get_all_params(ffn, trainable=True)
grads = T.grad(cost, params)
# computes the L2 norm of the gradient to better inspect training
grads_norm = T.nlinalg.norm(T.concatenate([g.flatten() for g in grads]), 2)

# Adam turned out to be a very good choice for correspondence
updates = L.updates.adam(grads, params, learning_rate=0.001)
''' Compile '''
def build_network_2dconv(
    args, input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var, wordEmbeddings, maxlen=36
):

    print ("Building model with 2D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    num_filters = 100

    stride = 1

    # CNN_sentence config
    filter_size = (3, wordDim)
    pool_size = (maxlen - 3 + 1, 1)

    # two conv pool layer
    # filter_size=(10, 100)
    # pool_size=(4,4)

    input_1 = InputLayer((None, maxlen), input_var=input1_var)
    batchsize, seqlen = input_1.input_var.shape
    # input_1_mask = InputLayer((None, maxlen),input_var=input1_mask_var)
    emb_1 = EmbeddingLayer(input_1, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb_1.params[emb_1.W].remove("trainable")  # (batchsize, maxlen, wordDim)

    reshape_1 = ReshapeLayer(emb_1, (batchsize, 1, maxlen, wordDim))

    conv2d_1 = Conv2DLayer(
        reshape_1,
        num_filters=num_filters,
        filter_size=(filter_size),
        stride=stride,
        nonlinearity=rectify,
        W=GlorotUniform(),
    )  # (None, 100, 34, 1)
    maxpool_1 = MaxPool2DLayer(conv2d_1, pool_size=pool_size)  # (None, 100, 1, 1)

    """
    filter_size_2=(4, 10)
    pool_size_2=(2,2)
    conv2d_1 = Conv2DLayer(maxpool_1, num_filters=num_filters, filter_size=filter_size_2, stride=stride, 
        nonlinearity=rectify,W=GlorotUniform()) #(None, 100, 34, 1)
    maxpool_1 = MaxPool2DLayer(conv2d_1, pool_size=pool_size_2) #(None, 100, 1, 1) (None, 100, 1, 20)
    """

    forward_1 = FlattenLayer(maxpool_1)  # (None, 100) #(None, 50400)

    input_2 = InputLayer((None, maxlen), input_var=input2_var)
    # input_2_mask = InputLayer((None, maxlen),input_var=input2_mask_var)
    emb_2 = EmbeddingLayer(input_2, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb_2.params[emb_2.W].remove("trainable")

    reshape_2 = ReshapeLayer(emb_2, (batchsize, 1, maxlen, wordDim))
    conv2d_2 = Conv2DLayer(
        reshape_2,
        num_filters=num_filters,
        filter_size=filter_size,
        stride=stride,
        nonlinearity=rectify,
        W=GlorotUniform(),
    )  # (None, 100, 34, 1)
    maxpool_2 = MaxPool2DLayer(conv2d_2, pool_size=pool_size)  # (None, 100, 1, 1)

    """
    conv2d_2 = Conv2DLayer(maxpool_2, num_filters=num_filters, filter_size=filter_size_2, stride=stride, 
        nonlinearity=rectify,W=GlorotUniform()) #(None, 100, 34, 1)
    maxpool_2 = MaxPool2DLayer(conv2d_2, pool_size=pool_size_2) #(None, 100, 1, 1)
    """

    forward_2 = FlattenLayer(maxpool_2)  # (None, 100)

    # elementwisemerge need fix the sequence length
    mul = ElemwiseMergeLayer([forward_1, forward_2], merge_function=T.mul)
    sub = AbsSubLayer([forward_1, forward_2], merge_function=T.sub)
    concat = ConcatLayer([mul, sub])

    concat = ConcatLayer([forward_1, forward_2])

    hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid)

    if args.task == "sts":
        network = DenseLayer(hid, num_units=5, nonlinearity=softmax)

    elif args.task == "ent":
        network = DenseLayer(hid, num_units=3, nonlinearity=softmax)

    # prediction = get_output(network, {input_1:input1_var, input_2:input2_var})
    prediction = get_output(network)

    loss = T.mean(categorical_crossentropy(prediction, target_var))
    lambda_val = 0.5 * 1e-4

    layers = {conv2d_1: lambda_val, hid: lambda_val, network: lambda_val}
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty

    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"

    # test_prediction = get_output(network, {input_1:input1_var, input_2:input2_var}, deterministic=True)
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(categorical_crossentropy(test_prediction, target_var))

    """
    train_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], 
        loss, updates=updates, allow_input_downcast=True)
    """
    train_fn = theano.function([input1_var, input2_var, target_var], loss, updates=updates, allow_input_downcast=True)

    if args.task == "sts":
        """
        val_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], 
            [test_loss, test_prediction], allow_input_downcast=True)
        """
        val_fn = theano.function(
            [input1_var, input2_var, target_var], [test_loss, test_prediction], allow_input_downcast=True
        )

    elif args.task == "ent":
        # test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX)
        test_acc = T.mean(categorical_accuracy(test_prediction, target_var))

        """
        val_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], 
            [test_loss, test_acc], allow_input_downcast=True)
        """
        val_fn = theano.function([input1_var, input2_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn
Exemple #34
0
    def build_model(self, train_set, test_set, validation_set=None):
        super(UFCNN, self).build_model(train_set, test_set, validation_set)

        epsilon = 1e-8
        loss_cc = aggregate(categorical_crossentropy(
            T.clip(get_output(self.model, self.sym_x), epsilon, 1),
            self.sym_t),
                            mode='mean')

        y = T.clip(get_output(self.model, self.sym_x, deterministic=True),
                   epsilon, 1)
        loss_eval = aggregate(categorical_crossentropy(y, self.sym_t),
                              mode='mean')
        loss_acc = categorical_accuracy(y, self.sym_t).mean()

        all_params = get_all_params(self.model, trainable=True)
        grads = T.grad(loss_cc, all_params)
        for idx, param in enumerate(all_params):
            param_name = param.name
            if ('h2.W' in param_name) or ('g2.W' in param_name):
                print(param_name)
                grads[idx] *= self.l2_mask
            if ('h3.W' in param_name) or ('g3.W' in param_name):
                print(param_name)
                grads[idx] *= self.l3_mask

        sym_beta1 = T.scalar('beta1')
        sym_beta2 = T.scalar('beta2')
        updates = adam(grads, all_params, self.sym_lr, sym_beta1, sym_beta2)

        inputs = [
            self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1,
            sym_beta2
        ]
        f_train = theano.function(
            inputs,
            [loss_cc],
            updates=updates,
            givens={
                self.sym_x: self.sh_train_x[self.batch_slice],
                self.sym_t: self.sh_train_t[self.batch_slice],
            },
        )

        f_test = theano.function(
            [self.sym_index, self.sym_batchsize],
            [loss_eval],
            givens={
                self.sym_x: self.sh_test_x[self.batch_slice],
                self.sym_t: self.sh_test_t[self.batch_slice],
            },
        )

        f_validate = None
        if validation_set is not None:
            f_validate = theano.function(
                [self.sym_index, self.sym_batchsize],
                [loss_eval, loss_acc],
                givens={
                    self.sym_x: self.sh_valid_x[self.batch_slice],
                    self.sym_t: self.sh_valid_t[self.batch_slice],
                },
            )

        self.train_args['inputs']['batchsize'] = 128
        self.train_args['inputs']['learningrate'] = 1e-3
        self.train_args['inputs']['beta1'] = 0.9
        self.train_args['inputs']['beta2'] = 0.999
        self.train_args['outputs']['loss_cc'] = '%0.6f'

        self.test_args['inputs']['batchsize'] = 128
        self.test_args['outputs']['loss_eval'] = '%0.6f'

        self.validate_args['inputs']['batchsize'] = 128
        self.validate_args['outputs']['loss_eval'] = '%0.6f'
        self.validate_args['outputs']['loss_acc'] = '%0.6f%%'
        return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args
Exemple #35
0
loss_lab_source = T.mean(categorical_crossentropy(predictions=softmax(source_lab), targets=T.zeros(shape=(args.batch_size,), dtype='int32'))) +\
    T.mean(categorical_crossentropy(predictions=softmax(source_gen), targets=T.ones(shape=(args.batch_size,), dtype='int32')))
weight_gen_loss = th.shared(np.float32(0.))
output_lab = ll.get_output(disc_layers[-2], x_lab, deterministic=False)
output_gen = ll.get_output(disc_layers[-2], gen_dat, deterministic=False)
m1 = T.mean(output_lab, axis=0)
m2 = T.mean(output_gen, axis=0)
feature_loss = T.mean(abs(m1 - m2))

loss_gen = (1 - weight_gen_loss) * (loss_gen_source + 0.5 * feature_loss)
loss_lab = (1 - weight_gen_loss) * loss_lab_source + weight_gen_loss * (
    loss_lab_class + loss_gen_class)

#network performance
D_acc_on_real = T.mean(
    categorical_accuracy(predictions=source_lab,
                         targets=T.zeros(shape=(args.batch_size, ))))
D_acc_on_fake = T.mean(
    categorical_accuracy(predictions=source_gen,
                         targets=T.ones(shape=(args.batch_size, ))))
G_acc_on_fake = T.mean(
    categorical_accuracy(predictions=source_gen,
                         targets=T.zeros(shape=(args.batch_size, ))))
performfun = th.function(inputs=[x_lab, labels, labels_gen],
                         outputs=[D_acc_on_real, D_acc_on_fake, G_acc_on_fake])
train_err = T.mean(T.neq(T.argmax(class_lab, axis=1), labels))
# test error
output_before_softmax = ll.get_output(disc_layers[-1],
                                      x_lab,
                                      deterministic=True)
test_class_lab = T.batched_dot(
    T.reshape(output_before_softmax,
Exemple #36
0
def calc_accuracy(prediction, targets):

    #we can use the lasagne objective categorical_accuracy to determine the top1 single label accuracy
    a = T.mean(objectives.categorical_accuracy(prediction, targets, top_k=1))

    return a
Exemple #37
0
def calc_accuracy(prediction, targets):

    # The lasagne objective categorical_accuracy is used to determine the top1 accuracy
    a = T.mean(objectives.categorical_accuracy(prediction, targets, top_k=1))

    return a