Example #1
1
 def __init__(self,
              enc_nhids=1000,
              dec_nhids=1000,
              enc_embed=620,
              dec_embed=620,
              src_vocab_size=30000,
              trg_vocab_size=30000,
              **kwargs):
     self.src_lookup_table = Lookup_table(enc_embed, src_vocab_size, prefix='src_lookup_table')
     self.trg_lookup_table = Lookup_table(dec_embed, trg_vocab_size, prefix='trg_lookup_table')
     self.encoder = BiGRU(enc_embed, enc_nhids, **kwargs)
     self.decoder = Decoder(dec_embed, dec_nhids, c_hids=enc_nhids*2, **kwargs)
     self.logistic = LogisticRegression(kwargs.get('n_out', dec_nhids), trg_vocab_size, prefix='logistic', drop_rate=kwargs['dropout'])
     self.params = self.src_lookup_table.params + self.trg_lookup_table.params + self.encoder.params + self.decoder.params  \
         + self.logistic.params
     self.tparams = OrderedDict([(param.name, param) for param in self.params])
Example #2
0
def test_k_fold_logistic():
    np.set_printoptions(precision=4)
    b_time = datetime.datetime.now()
    print('Begining reading data')
    DATA_TRAIN_PATH = get_filepath('train')
    y, tX, ids = load_csv_data(DATA_TRAIN_PATH)
    print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() -
                                                    b_time).total_seconds()))
    tX = remove_dimensions(tX)
    tX = standardize(tX)

    e_time = datetime.datetime.now()
    print("Finish data reading in {s} seconds".format(
        s=(e_time - b_time).total_seconds()))

    # Lambda space
    lambdas = np.logspace(-3, 1, 10)
    logistic = LogisticRegression((y, tX[0]),
                                  regularizer='Lasso',
                                  regularizer_p=0.1)
    best_lambda, (tr_err, te_err) = logistic.cross_validation(
        5, lambdas, lambda_name='regularizer_p', max_iters=6000)
    print('best lambda {}'.format(best_lambda))
    save_path = get_plot_path(test_k_fold_logistic.__name__)
    tr_err = np.array(tr_err)
    te_err = np.array(te_err)
    np.save(save_path + "tr_err", tr_err)
    np.save(save_path + "te_err", te_err)
Example #3
0
    def __init__(self,
                 enc_nhids=1000,
                 dec_nhids=1000,
                 enc_embed=620,
                 dec_embed=620,
                 src_vocab_size=30000,
                 trg_vocab_size=30000,
                 **kwargs):
        self.lr_in = kwargs.get('n_out', dec_nhids)

        self.src_lookup_table = Lookup_table(enc_embed,
                                             src_vocab_size,
                                             prefix='src_lookup_table')
        self.trg_lookup_table = Lookup_table(dec_embed,
                                             trg_vocab_size,
                                             prefix='trg_lookup_table')
        self.encoder = BiGRU(enc_embed, enc_nhids, **kwargs)
        # src_nhids*2 corresponds the last dimension of encoded state
        self.decoder = Decoder(dec_embed,
                               dec_nhids,
                               c_hids=enc_nhids * 2,
                               **kwargs)
        # the output size of decoder should be same with lr_in if no n_out
        # defined
        self.logistic = LogisticRegression(self.lr_in,
                                           trg_vocab_size,
                                           prefix='logistic',
                                           **kwargs)
        self.params = self.src_lookup_table.params + self.trg_lookup_table.params + \
            self.encoder.params + self.decoder.params + self.logistic.params
        self.tparams = OrderedDict([(param.name, param)
                                    for param in self.params])
        self.use_mv = kwargs.get('use_mv', 0)
Example #4
0
def test_compute_z():
    ''' (5 points) compute_z'''
    x = th.tensor([
        [1., 6.],  # the first sample in the mini-batch
        [2., 5.],  # the second sample in the mini-batch
        [3., 4.]
    ])  # the third sample in the mini-batch
    m = LogisticRegression(2)  # create a logistic regression object
    m.layer.weight.data = th.tensor([[-0.1, 0.1]])
    m.layer.bias.data = th.tensor([0.1])
    z = compute_z(x, m)
    assert type(z) == th.Tensor
    z_true = [
        [0.6],  # linear logit for the first sample in the mini-batch
        [0.4],  # linear logit for the second sample in the mini-batch
        [0.2]
    ]  # linear logit for the third sample in the mini-batch
    assert np.allclose(z.data, z_true, atol=1e-2)
    assert z.requires_grad
    L = th.sum(z)  # compute the sum of all elements in z
    L.backward()  # back propagate gradient to w and b
    assert np.allclose(m.layer.weight.grad, [[6, 15]], atol=0.1)
    assert np.allclose(m.layer.bias.grad, [3], atol=0.1)
    n = np.random.randint(2, 5)  # batch_size
    p = np.random.randint(2, 5)  # the number of input features
    x = th.randn(n, p)
    m = LogisticRegression(p)  # create a logistic regression object
    z = compute_z(x, m)
    assert np.allclose(z.size(), (n, 1))
Example #5
0
def test_update_parameters():
    ''' (5 points) update_parameters'''
    m = LogisticRegression(3)  # create a logistic regression object
    m.layer.weight.data = th.tensor([[0.5, 0.1, -0.2]])
    m.layer.bias.data = th.tensor([0.2])
    # create a toy loss function: the sum of all elements in w and b
    L = m.layer.weight.sum() + m.layer.bias.sum()
    # create an optimizer for w and b with learning rate = 0.1
    optimizer = th.optim.SGD(m.parameters(), lr=0.1)
    # (step 1) back propagation to compute the gradients
    L.backward()
    assert np.allclose(m.layer.weight.grad, np.ones((3, 1)), atol=1e-2)
    assert np.allclose(m.layer.bias.grad, 1, atol=1e-2)
    # now perform gradient descent using SGD
    update_parameters(optimizer)
    # lets check the new values of the w and b
    assert np.allclose(m.layer.weight.data, [[0.4, 0., -0.3]], atol=1e-2)
    assert np.allclose(m.layer.bias.data, [0.1], atol=1e-2)
    # (step 2) back propagation again to compute the gradients
    L.backward()
    update_parameters(optimizer)
    assert np.allclose(m.layer.weight.data, [[0.3, -0.1, -0.4]], atol=1e-2)
    assert np.allclose(m.layer.bias.data, [0.], atol=1e-2)
    # (step 3) back propagation again to compute the gradients
    L.backward()
    update_parameters(optimizer)
    assert np.allclose(m.layer.weight.data, [[0.2, -0.2, -0.5]], atol=1e-2)
    assert np.allclose(m.layer.bias.data, [-0.1], atol=1e-2)
Example #6
0
    def apply_rnnlm(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask,
                    sentence_char, sentence_char_mask, use_noise=1):
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]
        src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1]
        src_char, src_char_mask = sentence_char[:-1], sentence_char_mask[:-1]

        emb_lstm_range = T.arange(self.n_emb_lstm)
        #word lookup table
        table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb')
        src_emb = table.apply(src, emb_lstm_range)
        self.layers.append(table)

        if self.dropout < 1.0:
            src_emb = DropoutLayer(src_emb, use_noise, self.dropout)

        rnn_layer_1st = NormalRNN(self.n_emb_lstm, self.n_hids)
        hiddens = rnn_layer_1st.apply(src_emb, src_mask)
        self.layers.append(rnn_layer_1st)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)
        for layer in self.layers:
            self.params.extend(layer.params)
        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Example #7
0
    def apply_morph_only_rnn_gru(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1):
        """
            sentence : sentence * batch
            sentence_morph : sentence * batch * morph
            1. morph lookup -> dropout
            2. MorphStructRNN
            3. lstm -> dropout
            4. lstm -> maxout -> dropout
            5. logistic
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]
        src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1]

        #morph lookup table
        emb_morph_range = T.arange(self.n_emb_morph)
        table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb')
        src_morph_emb = table_morph.apply(src_morph, emb_morph_range)
        self.layers.append(table_morph)

        if self.dropout < 1.0:
            src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout)

        morph_layer_1st = MorphStructRNN(self.n_emb_morph, self.n_hids, 'gru')
        hiddens = morph_layer_1st.apply(src_morph_emb, src_morph_mask)
        self.layers.append(morph_layer_1st)

        rnn_layer_2rd = LSTM(self.n_hids , self.n_hids)
        hiddens , cells  = rnn_layer_2rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_2rd)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        rnn_layer_3nd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_3nd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_3nd)

        if True:
            maxout = MaxoutLayer()
            src_morph_merge_emb = src_morph_emb.sum(2)
            src_morph_mask = src_morph_mask.max(axis=2)
            #src_morph_merge_emb : sentence * batch * n_emb_morph
            states = T.concatenate([src_morph_merge_emb, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, self.n_emb_morph + self.n_hids, self.n_hids, src_morph_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)
        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Example #8
0
def test_complex():
    b_time = datetime.datetime.now()
    print('Begining reading data')
    DATA_TRAIN_PATH = get_filepath('train')
    y, tX, ids = load_csv_data(DATA_TRAIN_PATH)
    print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() -
                                                    b_time).total_seconds()))
    tX, _, _ = standardize(tX, intercept=False)
    complex_tx, _, _ = compose_complex_features(tX,
                                                intercept=True,
                                                interaction=True,
                                                log=True,
                                                sqrt=False,
                                                pca=True)
    test_bias(y)
    logistic = LogisticRegression((y, complex_tx),
                                  regularizer="Lasso",
                                  regularizer_p=0.5)
    # result = logistic.train(lr=0.1, batch_size=32, max_iters=6000)
    result = logistic.cross_validation(4, [0.5],
                                       'regularizer_p',
                                       lr=0.1,
                                       batch_size=32,
                                       max_iters=6000,
                                       early_stop=1000)
Example #9
0
    def apply(self, facts, facts_mask, question, question_mask, y):

        table = lookup_table(self.n_in, self.vocab_size)
        self.params += table.params

        facts_encoder = auto_encoder(facts, facts_mask, self.vocab_size,
                                     self.n_in, self.n_hids, table=table)

        questions_encoder = auto_encoder(question, question_mask, self.vocab_size,
                                         self.n_in, self.n_hids, table=table)

        self.params += facts_encoder.params
        self.params += questions_encoder.params

        facts_rep = facts_encoder.output
        questions_rep = questions_encoder.output

        for _ in range(self.n_layer):
            questions_rep = self.interact(facts_rep, questions_rep)

        logistic_layer = LogisticRegression(questions_rep,
                                            self.n_hids, self.n_label)
        self.params += logistic_layer.params
        self.cost = logistic_layer.cost(y)/y.shape[0]
        self.decoder_cost = facts_encoder.cost + questions_encoder.cost

        self.errors = logistic_layer.errors(y)
        return self.cost, self.decoder_cost
Example #10
0
    def apply_morph_memory(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1):
        """
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]
        src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1]

        emb_lstm_range = T.arange(self.n_emb_lstm)
        #word read lookup table
        table_read = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb_read', rng=1234)
        src_read = table_read.apply(src, emb_lstm_range)
        self.layers.append(table_read)

        #word write lookup table
        table_write = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb_write', rng=4321)
        src_write = table_write.apply(src, emb_lstm_range)
        self.layers.append(table_write)

        #morph read lookup table
        emb_morph_range = T.arange(self.n_emb_morph)
        table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb_read', rng=4321)
        src_morph_emb = table_morph.apply(src_morph, emb_morph_range)
        self.layers.append(table_morph)

        if self.dropout < 1.0:
            src_read = DropoutLayer(src_read, use_noise, self.dropout)
            src_write = DropoutLayer(src_write, use_noise, self.dropout)
            src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout)

        lstm_att_1st = LstmMorphAttention(self.n_hids, self.n_hids, self.n_hids)
        src_read, cells = lstm_att_1st.apply(src_read, src_morph_emb, src_mask)
        self.layers.append(lstm_att_1st)

        lstm_att_2st = LstmMorphAttention(self.n_hids, self.n_hids, self.n_hids)
        src_write, cells = lstm_att_2st.apply(src_write, src_morph_emb, src_mask)
        self.layers.append(lstm_att_2st)

        #memory_layer_1st = MorphMerge2(self.n_emb_lstm, self.n_hids)
        memory_layer_1st = MemoryRNN(self.n_emb_lstm, self.n_hids)
        hiddens = memory_layer_1st.apply(src_read, src_mask , src_write)
        self.layers.append(memory_layer_1st)

        #rnn_layer_2rd = MorphMerge2(self.n_hids, self.n_hids)
        memory_layer_2rd = MemoryRNN(self.n_hids, self.n_hids)
        hiddens = memory_layer_2rd.apply(src_read, src_mask, src_write, hiddens)
        self.layers.append(memory_layer_2rd)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)
        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Example #11
0
    def apply_normal(self, sentence, sentence_mask, use_noise=1, use_maxout=True):
        """
            sentence : sentence * batch
            1. word lookup -> dropout
            2. lstm -> dropout
            3. lstm -> maxout -> dropout
            4. logistic
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]

        emb_lstm_range = T.arange(self.n_emb_lstm)
        #word lookup table
        table = DynamicMixLookupTable(self.n_emb_lstm, **self.cfig)
        #table = DynamicLookupTable(self.n_emb_lstm, **self.cfig)
        #table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb')
        src_emb = table.apply(src, emb_lstm_range)
        self.src_emb = src_emb
        self.layers.append(table)

        if self.dropout < 1.0:
            src_emb = DropoutLayer(src_emb, use_noise, self.dropout)

        rnn_layer_1st = LSTM(self.n_emb_lstm, self.n_hids)
        hiddens , cells  = rnn_layer_1st.apply(src_emb, src_mask)
        self.layers.append(rnn_layer_1st)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        rnn_layer_2rd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_2rd)

        if use_maxout:
            maxout = MaxoutLayer()
            states = T.concatenate([src_emb, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)
        self.cost = logistic_layer.cost(tgt, tgt_mask)

        #hier_softmax_layer = HierarchicalSoftmax(hiddens, self.n_hids, self.vocab_size)
        #self.layers.append(hier_softmax_layer)
        #self.cost = hier_softmax_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)
        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("--dataset",
                        type=str,
                        default="usa",
                        help="airport dataset to run experiment on")

    parser.add_argument("--embed_dim",
                        type=int,
                        default=128,
                        help="struc2vec output embedding dimesnion")

    parser.add_argument("--epochs",
                        type=int,
                        default=10,
                        help="number of epochs")

    parser.add_argument("--lr", type=float, default=0.01, help="learning rate")

    parser.add_argument("--l2",
                        type=float,
                        default=0.1,
                        help="L2 regularization")

    args = parser.parse_args()
    dataset = args.dataset
    embed_dim = args.embed_dim
    epochs = args.epochs
    lr_rate = args.lr
    l2 = args.l2
    num_classes = 4

    test_accuracy = []

    for i in range(10):
        print("Experiment {}".format(i))
        model = LogisticRegression(embed_dim, num_classes)
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr_rate,
                                    weight_decay=l2)

        feat_data, labels, train_idx, test_idx = load_dataset(
            dataset, embed_dim)

        train(feat_data, labels, train_idx, model, criterion, optimizer,
              epochs)
        test_acc = evaluate(feat_data, labels, test_idx, model, criterion)

        test_accuracy.append(test_acc)

    print("Average performance: {}, standard deviation: {}".format(
        np.average(test_accuracy), np.std(test_accuracy)))
Example #13
0
def test_data_model():
    title = 'complex_full_before_ddl_interactions_full'
    print("Base line testing for model " + title)
    b_time = datetime.datetime.now()
    print('Beginning reading data')
    DATA_TRAIN_PATH = get_filepath('train')
    y, tX, ids = load_csv_data(DATA_TRAIN_PATH)
    print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() -
                                                    b_time).total_seconds()))
    _, test_x, test_ids = load_test_data(clean=False)

    data = compose_interactions_for_transforms(tX)
    t_data = compose_interactions_for_transforms(test_x)

    # Test 1 Ridge 0.1
    logistic = LogisticRegression((y, data),
                                  regularizer='Ridge',
                                  regularizer_p=0.1)
    weight = logistic.train(lr=0.01,
                            decay=0.5,
                            max_iters=2000,
                            early_stop=1000,
                            decay_intval=100)
    # weight, _, _ = logistic.cross_validation(4, [0.1, 0.5, 0.05], 'regularizer_p', max_iters=2000)
    pred_label = predict_labels(weight, t_data)
    create_csv_submission(
        test_ids, pred_label,
        get_dataset_dir() +
        '/submission/removed_outlier_{}.csv'.format(title + 'Ridge01'))

    # Test 2 Lasso 0.1
    logistic = LogisticRegression((y, data),
                                  regularizer='Lasso',
                                  regularizer_p=0.1)
    weight = logistic.train(lr=0.01,
                            decay=0.5,
                            max_iters=2000,
                            early_stop=1000,
                            decay_intval=100)
    # weight, _, _ = logistic.cross_validation(4, [0.1, 0.5, 0.05], 'regularizer_p', max_iters=2000)
    pred_label = predict_labels(weight, t_data)
    create_csv_submission(
        test_ids, pred_label,
        get_dataset_dir() +
        '/submission/removed_outlier_{}.csv'.format(title + '-Lasso0.1'))

    # Test 3 No penalized
    logistic = LogisticRegression((y, data))
    weight = logistic.train(lr=0.01,
                            decay=0.5,
                            max_iters=2000,
                            early_stop=1000)
    # weight, _, _ = logistic.cross_validation(4, [0.1, 0.5, 0.05], 'regularizer_p', max_iters=2000)
    pred_label = predict_labels(weight, t_data)
    create_csv_submission(
        test_ids, pred_label,
        get_dataset_dir() + '/submission/removed_outlier_{}.csv'.format(title))
Example #14
0
    def apply_morph_only_memory(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1):
        """
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]
        src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1]

        #morph read lookup table
        emb_morph_range = T.arange(self.n_emb_morph)
        table_morph_read = LookupTable(self.n_emb_morph, self.morph_size, name='Memb_read', rng=1234)
        src_morph_read = table_morph_read.apply(src_morph, emb_morph_range)
        self.layers.append(table_morph_read)

        #morph write lookup table
        table_morph_write = LookupTable(self.n_emb_lstm, self.morph_size, name='Memb_write', rng=4321)
        src_morph_write = table_morph_write.apply(src_morph, emb_morph_range)
        self.layers.append(table_morph_write)

        if self.dropout < 1.0:
            src_morph_read = DropoutLayer(src_morph_read, use_noise, self.dropout)
            src_morph_write = DropoutLayer(src_morph_write, use_noise, self.dropout)

        morph_layer_1st = MorphStructRNN(self.n_emb_morph, self.n_hids, 'gru')
        src_read = morph_layer_1st.apply(src_morph_read, src_morph_mask)
        self.layers.append(morph_layer_1st)

        morph_layer_2rd = MorphStructRNN(self.n_emb_morph, self.n_hids, 'gru')
        src_write = morph_layer_2rd.apply(src_morph_write, src_morph_mask)
        self.layers.append(morph_layer_2rd)

        memory_layer_1st = MemoryRNN(self.n_emb_lstm, self.n_hids)
        hiddens = memory_layer_1st.apply(src_read, src_mask , src_write)
        self.layers.append(memory_layer_1st)

        memory_layer_2rd = MemoryRNN(self.n_hids, self.n_hids)
        hiddens = memory_layer_2rd.apply(src_read, src_mask, src_write, hiddens)
        self.layers.append(memory_layer_2rd)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)
        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Example #15
0
def test_draw():
    """
    Draw balanced sample, but result worse result.
    """
    b_time = datetime.datetime.now()
    print('Begining reading data')
    DATA_TRAIN_PATH = get_filepath('train')
    y, tX, ids = load_csv_data(DATA_TRAIN_PATH)
    print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() -
                                                    b_time).total_seconds()))

    # data, _, _ = standardize(tX)

    # test_bias(y)
    # nb_pc = 5
    # print("test the PCA with {} elements".format(nb_pc))
    # pcs, pc_data = pca_transform(data, nb_pc, concatenate=False)
    #
    # print("get interactions")
    # interaction = interactions(data, range(0, 10))
    # interaction, _, _ = standardize(interaction)
    # print("select first 10 data entry with pc data")
    # data = np.c_[data[:, 0:10], pc_data]
    # data = np.c_[data, interaction]
    # # Begin the least square sgd
    # e_time = datetime.datetime.now()
    # print("Finish data reading in {s} seconds".
    #       format(s=(e_time - b_time).total_seconds()))
    data, _, _ = compose_complex_features_further(tX,
                                                  intercept=True,
                                                  interaction=True,
                                                  log=True,
                                                  sqrt=True,
                                                  power=True,
                                                  pca=True)
    train, valid = draw_balanced_subsample(y, data, trainsize=6000)
    # t_data, _, _ = compose_complex_features_further(test_x, intercept=True,
    #                                                 interaction=True,
    #                                                 log=True,
    #                                                 sqrt=True,
    #                                                 power=True,
    #                                                 pca=True)

    logistic = LogisticRegression(train=train,
                                  validation=valid,
                                  regularizer='Lasso',
                                  regularizer_p=0.5)
    result = logistic.train(lr=0.01, decay=0.5, early_stop=400, max_iters=2000)
    print(result)
Example #16
0
    def apply_model(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1):
        """
            sentence : sentence * batch
            sentence_morph : sentence * batch * morph
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]
        src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1]

        src_emb = lookup_layer('word',src)
        #src_morph_emb : sentence * batch * morph * n_emb_morph
        #src_morph_emb = lookup_layer('morph',src)

        if self.dropout < 1.0:
            src_emb = DropoutLayer(src_emb, use_noise, self.dropout)

        rnn_layer_1rd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_1rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_1rd)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        rnn_layer_2rd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_2rd)

        if True:
            maxout = MaxoutLayer()
            #src_emb : sentence * batch * n_emb
            #hiddens : sentence * batch * hids
            states = T.concatenate([src_emb, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)
        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Example #17
0
def train(data_loader, p, alpha=0.001, n_epoch=100):
    m = LogisticRegression(p) # initialize the model
    optimizer = th.optim.SGD(m.parameters(), lr=alpha) # create an SGD optimizer
    for _ in range(n_epoch): # iterate through the dataset n_epoch times
        for mini_batch in data_loader: # iterate through the dataset, with one mini-batch of random training samples (x,y) at a time
            x=mini_batch[0] # the feature vectors of the data samples in a mini-batch
            y=mini_batch[1] # the labels of the samples in a mini-batch
            #########################################
            ## INSERT YOUR CODE HERE (5 points)
            L = compute_L(compute_z(x, m), y)
            L.backward()
            update_parameters(optimizer)
            #########################################
    return m
    #-----------------
    '''
Example #18
0
def main():
    """
    Train the model and print progress.
    """
    data = gather_data()
    model = LogisticRegression(N_FEATS)
    optimizer = torch.optim.Adam(model.parameters())

    for iteration in range(EPOCHS):
        print("Epoch {:d}".format(iteration + 1))

        # Shuffle data.
        random.shuffle(data)

        total_loss = 0
        running_loss = 0
        n_examples = len(data)

        # Loop through examples in data.
        for i, example in enumerate(data):
            inp, tgt = example

            # Zero out the gradient.
            model.zero_grad()

            # Make a forward pass, i.e. compute the logits.
            logits = model(inp)
            loss = nn.functional.binary_cross_entropy_with_logits(logits, tgt)

            # Compute gradient and take step.
            loss.backward()
            optimizer.step()

            total_loss += loss
            running_loss += loss

            # Print progress.
            if (i + 1) % LOG_EVERY == 0:
                guess = logits[0].item() > 0
                actual = tgt[0].item() == 1
                correct = "✓" if guess == actual else "✗ ({:})".format(actual)
                print("({:d} / {:d}) Loss: {:.5f}".format(
                    i + 1, n_examples, running_loss))
                print(" => {:} {:}".format(guess, correct))
                running_loss = 0

        print("Epoch loss: {:f}\n".format(total_loss))
Example #19
0
File: lm.py Project: gumaojie/rnnlm
    def apply(self, sentence, sentence_mask, use_noise=1):
        n_emb_lstm = self.n_emb_lstm

        src = sentence[:-1]
        src_mask = sentence_mask[:-1]
        tgt = sentence[1:]
        tgt_mask = sentence_mask[1:]

        emb_lstm_range = T.arange(n_emb_lstm)
        table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb')
        state_below = table.apply(src, emb_lstm_range)
        self.layers.append(table)
        if self.dropout < 1.0:
            state_below = dropout_layer(state_below, use_noise, self.dropout)

        rnn = LSTM(n_emb_lstm, self.n_hids)
        hiddens , cells  = rnn.apply(state_below, src_mask)
        self.layers.append(rnn)
        #if self.dropout < 1.0:
        #    hiddens = dropout_layer(hiddens, use_noise, self.dropout)
        rnn2 = FLSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn2.apply(hiddens , hiddens , src_mask)
        self.layers.append(rnn2)

        #rnn = NormalRNN(n_emb_lstm , self.n_hids)
        #hiddens  = rnn.apply(state_below, src_mask)
        #self.layers.append(rnn)

        if True:
            maxout = maxout_layer()
            states = T.concatenate([state_below, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)
        if self.dropout < 1.0:
            hiddens = dropout_layer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Example #20
0
def test_logistic():
    b_time = datetime.datetime.now()
    print('Begining reading data')
    DATA_TRAIN_PATH = get_filepath('train')
    y, tX, ids = load_csv_data(DATA_TRAIN_PATH)
    print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() -
                                                    b_time).total_seconds()))
    tX = standardize(tX)
    # Begin the least square sgd
    e_time = datetime.datetime.now()
    print("Finish data reading in {s} seconds".format(
        s=(e_time - b_time).total_seconds()))
    logistic = LogisticRegression((y, tX[0]),
                                  regularizer="Lasso",
                                  regularizer_p=0.1)
    result = logistic.train(lr=0.05, batch_size=128, max_iters=1000)
    print(result)
Example #21
0
def logistic_accuracy(model: LogisticRegression, X: np.ndarray,
                      targets: np.ndarray):
    predictions = model.predict(X)  # These are probabilities
    predictions = np.around(predictions)
    predictions = predictions.reshape(-1)
    targets = targets.reshape(-1)
    correct = sum(predictions == targets)
    return correct / len(targets)
Example #22
0
def test_pca_logistic():
    """
    According to the PCA first 3 component test, the selected index:
        3,8,5,9,7,10,2,1,6,0,4
        0-10
    :return:
    """

    b_time = datetime.datetime.now()
    print('Begining reading data')
    DATA_TRAIN_PATH = get_filepath('train')
    y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

    print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() -
                                                    b_time).total_seconds()))
    data, x_mean, x_std = standardize(tX)
    print("test bias")
    test_bias(y)
    nb_pc = 5
    print("test the PCA with {} elements".format(nb_pc))
    pcs, pc_data = pca_transform(data, nb_pc, concatenate=False)

    print("get interactions")
    interaction = interactions(data, range(0, 10))
    interaction, _, _ = standardize(interaction)
    print("select first 10 data entry with pc data")
    data = np.c_[data[:, 0:10], pc_data]
    data = np.c_[data, interaction]
    # Begin the least square sgd
    e_time = datetime.datetime.now()
    print("Finish data reading in {s} seconds".format(
        s=(e_time - b_time).total_seconds()))
    # logistic = LogisticRegression((y, tX))
    logistic = LogisticRegression((y, data),
                                  regularizer="Lasso",
                                  regularizer_p=0.)
    # result = logistic.train(lr=0.1, batch_size=32, max_iters=6000)
    result = logistic.cross_validation(4, [0.5],
                                       'regularizer_p',
                                       lr=0.1,
                                       batch_size=32,
                                       max_iters=6000,
                                       early_stop=1000)
    print(result)
    def Fitness(self, population):
        '''


        :param population:
        :return:
        '''
        print("  Begin Fitness")
        fitness = np.zeros(self.populationSize)
        for i in range(self.populationSize):
            print("    Fitness", i)
            X_train_mark = self.X_train[:, population[i, :] == 1]
            X_test_mark = self.X_test[:, population[i, :] == 1]
            LR = LogisticRegression(X_train_mark, self.Y_train, X_test_mark,
                                    self.Y_test)
            fitness[i] = LR.evalution(is_GA=True)

        prob = fitness / np.sum(fitness)
        cum_prob = np.cumsum(prob)
        return prob, cum_prob, fitness
Example #24
0
def train_lr():
    params = {
        "offline_model_dir": "../weights",
    }
    params.update(params_common)

    X_train, X_valid = load_data("train"), load_data("vali")
    X_test = load_data("test")
    # print(X_test['label'])

    model = LogisticRegression("ranking", params, logger)
    model.fit(X_train, validation_data=X_valid)
    model.save_session()
    model.predict(X_test, 'pred.txt')
Example #25
0
def test_baseline():
    print("base line testing")
    b_time = datetime.datetime.now()
    print('Begining reading data')
    DATA_TRAIN_PATH = get_filepath('train')
    y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

    print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() -
                                                    b_time).total_seconds()))
    data = baseline_logistic(tX)

    logistic = LogisticRegression((y, data))
    weight = logistic.train(lr=0.01, decay=1)

    plot = True
    if plot:
        from plots import cross_validation_visualization

    _, test_x, test_ids = load_test_data(clean=False)
    t_data = baseline_logistic(test_x)
    pred_label = predict_labels(weight, t_data)
    create_csv_submission(
        test_ids, pred_label,
        get_dataset_dir() + '/submission/logistic_baseline.csv')
Example #26
0
def train_lr():
    params = {
        "offline_model_dir": "weights/lr",
    }
    params.update(params_common)

    X_train, X_valid = load_data("train"), load_data("vali")

    model = LogisticRegression("ranking", params, logger)
    model.fit(X_train, validation_data=X_valid)
    model.save_session()
Example #27
0
def logistic_loss_function(model: LogisticRegression, X: np.ndarray,
                           targets: np.ndarray):
    """

    Args:
        model: Logistic regression model
        X: array of pca transformed images
        targets: actual labels for the images the predictions are done on

    Returns:
        The average cross entropy loss over all the predictions
    """
    predictions = model.predict(X)

    # Error when label equal 1
    loss_1 = targets * np.log(predictions)

    # Error when label equal 0
    loss_0 = (1 - targets) * np.log(1 - predictions)

    total_loss = loss_1 + loss_0

    # return the average loss overall
    return -total_loss.sum() / targets.shape[0]
Example #28
0
    # 2. batch size should not be very large since the lambda_ij matrix in ranknet and lambdarank
    # (which are of size batch_size x batch_size) will consume large memory space
    "batch_size": 128,
    "epoch": 10,
    "feature_dim": 46,
    "batch_sampling_method": "sample",
    "shuffle": True,
    "optimizer_type": "adam",
    "init_lr": 0.001,
    "beta1": 0.975,
    "beta2": 0.999,
    "decay_steps": 1000,
    "decay_rate": 0.9,
    "schedule_decay": 0.004,
    "random_seed": 2018,
    "eval_every_num_update": 100,
}

params = {
    "offline_model_dir": "weights/lr",
}
params.update(params_common)
logger = utils._get_logger("logs", "tf-%s.log" % utils._timestamp())
model = LogisticRegression("ranking", params, logger)
model.restore_session()
# SAVE THE MODEL
builder = tf.saved_model.builder.SavedModelBuilder("model")
builder.add_meta_graph_and_variables(model.sess,
                                     [tf.saved_model.tag_constants.SERVING])
builder.save()
Example #29
0
class Translate(object):

    def __init__(self,
                 enc_nhids=1000,
                 dec_nhids=1000,
                 enc_embed=620,
                 dec_embed=620,
                 src_vocab_size=30000,
                 trg_vocab_size=30000,
                 **kwargs):
        self.src_lookup_table = Lookup_table(enc_embed, src_vocab_size, prefix='src_lookup_table')
        self.trg_lookup_table = Lookup_table(dec_embed, trg_vocab_size, prefix='trg_lookup_table')
        self.encoder = BiGRU(enc_embed, enc_nhids, **kwargs)
        self.decoder = Decoder(dec_embed, dec_nhids, c_hids=enc_nhids*2, **kwargs)
        self.logistic = LogisticRegression(kwargs.get('n_out', dec_nhids), trg_vocab_size, prefix='logistic', drop_rate=kwargs['dropout'])
        self.params = self.src_lookup_table.params + self.trg_lookup_table.params + self.encoder.params + self.decoder.params  \
            + self.logistic.params
        self.tparams = OrderedDict([(param.name, param) for param in self.params])

    def apply(self, source, source_mask, target, target_mask, **kwargs):
        sbelow = self.src_lookup_table.apply(source)
        tbelow = self.trg_lookup_table.apply_zero_pad(target)

        s_rep = self.encoder.apply(sbelow, source_mask)
        hiddens = self.decoder.apply(tbelow, target_mask, s_rep, source_mask)

        cost_matrix = self.logistic.cost(hiddens, target, target_mask)
        self.cost = cost_matrix.sum()/target_mask.shape[1]

    def _next_prob_state(self, y, state, c, c_x):
        next_state, merge_out = self.decoder.next_state_merge(y, state, c, c_x)
        prob = self.logistic.apply(merge_out)
        return prob, next_state

    def build_sample(self):
        x = T.matrix('x', dtype='int64')
        sbelow = self.src_lookup_table.apply(x)
        ctx = self.encoder.apply(sbelow, mask=None)
        c_x = T.dot(ctx, self.decoder.Ws) + self.decoder.bs
        init_state = self.decoder.init_state(ctx)

        outs = [init_state, ctx]
        f_init = theano.function([x], outs, name='f_init')

        y = T.vector('y_sampler', dtype='int64')
        y_emb = self.trg_lookup_table.index(y)
        init_state = T.matrix('init_state', dtype='float32')
        next_probs, next_state = self._next_prob_state(y_emb, init_state, ctx, c_x)

        inps = [y, ctx, init_state]
        outs = [next_probs, next_state]
        f_next = theano.function(inps, outs, name='f_next')

        return f_init, f_next

    def savez(self, filename):
        params_value = OrderedDict([(kk, value.get_value()) for kk, value in self.tparams.iteritems()])
        numpy.savez(filename, **params_value)

    def load(self, filename):
        params_value = numpy.load(filename)
        assert len(params_value.files) == len(self.tparams)
        for key, value in self.tparams.iteritems():
            value.set_value(params_value[key])
Example #30
0
import torch
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
from data import TwoClassCifar10
from model import ConvNet, LogisticRegression
from config import Config as config

test_dataset = TwoClassCifar10(config.root, train=False)

conv_net = ConvNet(config.input_channel, 2)
lr_model = LogisticRegression(config.cifar10_input_size)
conv_net.load_state_dict(torch.load("model/2020428163925_0.719000.pth"))
lr_model.load_state_dict(torch.load("model/2020428163951_0.589000.pth"))

conv_preds = []
lr_preds = []
targets = []
with torch.no_grad():
    for image, label in test_dataset:
        image.unsqueeze_(0)
        conv_pred = conv_net(image)
        lr_pred = lr_model(image)
        conv_pred = torch.max(torch.softmax(conv_pred, dim=1),
                              dim=1)[0].squeeze()
        lr_pred = torch.sigmoid(lr_pred).squeeze()
        conv_preds.append(conv_pred.item())
        lr_preds.append(lr_pred.item())
        targets.append(label)

fpr, tpr, thresholds = metrics.roc_curve(targets, conv_preds)
import numpy as np
from model import LogisticRegression

# load data
x_train = np.load('./data/LR/train_data.npy')[:, 1:]
y_train = np.load('./data/LR/train_target.npy')
x_test = np.load('./data/LR/test_data.npy')[:, 1:]
y_test = np.load('./data/LR/test_target.npy')

# create an LR model and fit it
lr = LogisticRegression(learning_rate=1,
                        max_iter=10,
                        fit_bias=True,
                        optimizer='Newton',
                        seed=0)
lr.fit(x_train, y_train, val_data=(x_test, y_test))

# predict and calculate acc
train_acc = lr.score(x_train, y_train, metric='acc')
test_acc = lr.score(x_test, y_test, metric='acc')
print("train acc = {0}".format(train_acc))
print("test acc = {0}".format(test_acc))

# plot learning curve and decision boundary
lr.plot_learning_curve()
lr.plot_boundary(x_train, y_train)
lr.plot_boundary(x_test, y_test)
Example #32
0
    def test_leave_one_out(self):

        gpus = 1 if torch.cuda.is_available() else 0

        (x_train, y_train), (x_test, y_test) = get_2class_mnist(NUM_A, NUM_B)
        train_sample_num = len(x_train)

        class CreateData(torch.utils.data.Dataset):
            def __init__(self, data, targets):
                self.data = data
                self.targets = targets

            def __len__(self):
                return len(self.data)

            def __getitem__(self, idx):
                out_data = self.data[idx]
                out_label = self.targets[idx]

                return out_data, out_label
        
        train_data = CreateData(x_train, y_train)
        train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=False)

        # prepare sklearn model to train w
        C = 1.0 / (train_sample_num * WEIGHT_DECAY)
        sklearn_model = linear_model.LogisticRegression(C=C, solver='lbfgs', tol=1e-8, fit_intercept=False)

        # prepare pytorch model to compute influence function
        torch_model = LR(weight_decay=WEIGHT_DECAY)

        # train
        sklearn_model.fit(x_train, y_train.ravel())
        print('LBFGS training took %s iter.' % sklearn_model.n_iter_)

        # assign W into pytorch model
        w_opt = sklearn_model.coef_.ravel()
        with torch.no_grad():
            torch_model.w = torch.nn.Parameter(
                torch.tensor(w_opt, dtype=torch.float)
            )
        
        # calculate original loss
        x_test_input = torch.FloatTensor(x_test[TEST_INDEX: TEST_INDEX+1])
        y_test_input = torch.LongTensor(y_test[TEST_INDEX: TEST_INDEX+1])

        test_data = CreateData(x_test[TEST_INDEX: TEST_INDEX+1], y_test[TEST_INDEX: TEST_INDEX+1])
        test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=True)

        if gpus >= 0:
            torch_model = torch_model.cuda()
            x_test_input = x_test_input.cuda()
            y_test_input = y_test_input.cuda()

        test_loss_ori = torch_model.loss(torch_model(x_test_input), y_test_input, train=False).detach().cpu().numpy()

        # # get test loss gradient
        # test_grad = torch.autograd.grad(test_loss_ori, torch_model.w)

        # # get inverse hvp (s_test)
        # print('Calculating s_test ...')
        # s_test = s_test_sample(
        #         torch_model, x_test_input, y_test_input, train_loader, gpu=gpus, damp=0, scale=25, recursion_depth=RECURSION_DEPTH, r=R
        #     )[0].detach().cpu().numpy()
        # # s_test = torch_model.sess.run(torch_model.inverse_hessian, feed_dict={torch_model.x: x_train, torch_model.y: y_train}) @ test_grad

        # print(s_test)

        # get train loss gradient and estimate loss diff
        # loss_diff_approx = np.zeros(train_sample_num)
        # for i in range(train_sample_num):
        #     x_input = torch.FloatTensor(x_train[i])
        #     y_input = torch.LongTensor(y_train[i])

        #     if gpus >= 0:
        #         x_input = x_input.cuda()
        #         y_input = y_input.cuda()

        #     train_loss = torch_model.loss(torch_model(x_input), y_input)
        #     train_grad = torch.autograd.grad(train_loss, torch_model.w)[0].detach().cpu().numpy()

        #     loss_diff_approx[i] = np.asscalar(train_grad.T @ s_test) / train_sample_num
        #     if i % 100 == 0:
        #         print('[{}/{}] Estimated loss diff: {}'.format(i+1, train_sample_num, loss_diff_approx[i]))

        loss_diff_approx, _, _, _, = calc_influence_single(torch_model, train_loader, test_loader, test_id_num=0, gpu=1,
                                    recursion_depth=RECURSION_DEPTH, r=R, damp=0, scale=SCALE, exact=EXACT, batch_size=128)
        loss_diff_approx = torch.FloatTensor(loss_diff_approx).cpu().numpy()

        # get high and low loss diff indice
        sorted_indice = np.argsort(loss_diff_approx)
        sample_indice = np.concatenate([sorted_indice[-int(SAMPLE_NUM/2):], sorted_indice[:int(SAMPLE_NUM/2)]])

        # calculate true loss diff
        loss_diff_true = np.zeros(SAMPLE_NUM)
        for i, index in zip(range(SAMPLE_NUM), sample_indice):
            print('[{}/{}]'.format(i+1, SAMPLE_NUM))

            # get minus one dataset
            x_train_minus_one = np.delete(x_train, index, axis=0)
            y_train_minus_one = np.delete(y_train, index, axis=0)

            # retrain
            C = 1.0 / ((train_sample_num - 1) * WEIGHT_DECAY)
            sklearn_model_minus_one = linear_model.LogisticRegression(C=C, fit_intercept=False, tol=1e-8, solver='lbfgs')
            sklearn_model_minus_one.fit(x_train_minus_one, y_train_minus_one.ravel())
            print('LBFGS training took {} iter.'.format(sklearn_model_minus_one.n_iter_))

            # assign w on tensorflow model
            w_retrain = sklearn_model_minus_one.coef_.T.ravel()
            with torch.no_grad():
                torch_model.w = torch.nn.Parameter(
                    torch.tensor(w_retrain, dtype=torch.float)
                )
            
            if gpus >= 0:
                torch_model = torch_model.cuda()

            # get retrain loss
            test_loss_retrain = torch_model.loss(torch_model(x_test_input), y_test_input, train=False).detach().cpu().numpy()

            # get true loss diff
            loss_diff_true[i] = test_loss_retrain - test_loss_ori

            print('Original loss       :{}'.format(test_loss_ori))
            print('Retrain loss        :{}'.format(test_loss_retrain))
            print('True loss diff      :{}'.format(loss_diff_true[i]))
            print('Estimated loss diff :{}'.format(loss_diff_approx[index]))

        r2_score = visualize_result(loss_diff_true, loss_diff_approx[sample_indice])

        self.assertTrue(r2_score > 0.9)
Example #33
0
def test_pca_logistic2():
    """
    According to the PCA first 3 component test, the selected index:
        3,8,5,9,7,10,2,1,6,0,4
        0-10
    :return:
    """
    print('Submission added test full')

    b_time = datetime.datetime.now()
    print('Begining reading data')
    DATA_TRAIN_PATH = get_filepath('train')
    y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

    print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() -
                                                    b_time).total_seconds()))
    data, x_mean, x_std = standardize(tX)
    print("test bias")
    test_bias(y)
    nb_pc = 5
    print("test the PCA with {} elements".format(nb_pc))
    pcs, pc_data = pca_transform(data, nb_pc, concatenate=False)

    print("get interactions")
    interaction = interactions(data, range(0, 10))
    interaction, _, _ = standardize(interaction)
    print("select first 10 data entry with pc data")
    data = np.c_[data[:, 0:10], pc_data]
    data = np.c_[data, interaction]
    # Begin the least square sgd
    e_time = datetime.datetime.now()
    print("Finish data reading in {s} seconds".format(
        s=(e_time - b_time).total_seconds()))
    # logistic = LogisticRegression((y, tX))
    logistic = LogisticRegression((y, data),
                                  regularizer="Lasso",
                                  regularizer_p=0.)
    # result = logistic.train(lr=0.1, batch_size=32, max_iters=6000)
    result = logistic.cross_validation(4, [0.5],
                                       'regularizer_p',
                                       lr=0.1,
                                       batch_size=32,
                                       max_iters=1000,
                                       early_stop=1000,
                                       skip=True)

    weight = result[0]
    _, test_x, test_ids = load_test_data(clean=False)
    test_data, x_mean, x_std = standardize(test_x)
    pcs, pc_data = pca_transform(test_data, nb_pc, concatenate=False)

    print("get interactions")
    interaction = interactions(test_data, range(0, 10))
    interaction, _, _ = standardize(interaction)
    print("select first 10 data entry with pc data")
    test_data = np.c_[test_data[:, 0:10], pc_data]
    test_data = np.c_[test_data, interaction]

    y_pred = []
    for w in weight:
        _y_pred = logistic.__call__(test_data, w)
        y_pred.append(_y_pred)
    y_pred = np.average(y_pred, axis=0)
    y_pred[np.where(y_pred <= 0.5)] = -1
    y_pred[np.where(y_pred > 0.5)] = 1
    output_path = get_dataset_dir() + \
                  '/submission/pca_test{}.csv'.format(
                      datetime.datetime.now().__str__())
    create_csv_submission(test_ids, y_pred, output_path)
Example #34
0
def main(datasets, U, n_epochs=20, batch_size=20, max_l=100, hidden_size=100, \
         word_embedding_size=100, session_hidden_size=50, session_input_size=50, \
         model_name='SMN_last.bin', learning_rate=0.001, r_seed=3435, \
        val_frequency=100):
    hiddensize = hidden_size
    U = U.astype(dtype=theano.config.floatX)
    rng = np.random.RandomState(r_seed)
    lsize, rsize = max_l, max_l
    sessionmask = T.matrix()
    lx = []  #tokens from previous turns
    lxmask = []  #masks from previous turns
    for i in range(max_turn):
        lx.append(T.matrix())
        lxmask.append(T.matrix())

    index = T.lscalar()
    rx = T.matrix('rx')  #tokens from response
    rxmask = T.matrix()  #masks from response
    y = T.ivector('y')
    Words = theano.shared(value=U, name="Words")
    llayer0_input = []
    for i in range(max_turn):
        llayer0_input.append(Words[T.cast(lx[i].flatten(), dtype="int32")] \
            .reshape((lx[i].shape[0], lx[i].shape[1], Words.shape[1])))

    # input: word embeddings of the mini batch
    rlayer0_input = Words[T.cast(rx.flatten(), dtype="int32")].\
                    reshape((rx.shape[0], rx.shape[1], Words.shape[1]))

    train_set, dev_set, test_set = datasets[0], datasets[1], datasets[2]

    train_set_lx = []
    train_set_lx_mask = []
    q_embedding = []
    q_embedding_Cat = []
    q_embedding_Cat_mask = []
    q_embedding_self_att = []
    q_embedding_self_att_rnn = []
    q_embedding_hiddenequal = []

    offset = 2 * lsize
    for i in range(max_turn):
        train_set_lx.append(theano.shared(
            np.asarray(a=train_set[:, offset*i:offset*i+lsize], \
                       dtype=theano.config.floatX), \
                       borrow=True))
        train_set_lx_mask.append(theano.shared(
            np.asarray(a=train_set[:, offset*i + lsize:offset*i + 2*lsize], \
                       dtype=theano.config.floatX), \
                       borrow=True))
    train_set_rx = theano.shared(
        np.asarray(a=train_set[:, offset*max_turn:offset*max_turn + lsize], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    train_set_rx_mask = theano.shared(
        np.asarray(a=train_set[:, offset*max_turn+lsize:offset*max_turn + 2*lsize], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    train_set_session_mask = theano.shared(
        np.asarray(a=train_set[:, -max_turn-1:-1], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    train_set_y = theano.shared(np.asarray(train_set[:, -1], dtype="int32"), \
                               borrow=True)

    val_set_lx = []
    val_set_lx_mask = []
    for i in range(max_turn):
        val_set_lx.append(theano.shared(
            np.asarray(a=dev_set[:, offset*i:offset*i + lsize], \
                       dtype=theano.config.floatX), \
                       borrow=True))
        val_set_lx_mask.append(theano.shared(
            np.asarray(a=dev_set[:, offset*i + lsize:offset*i + 2*lsize], \
                       dtype=theano.config.floatX), \
                       borrow=True))
    val_set_rx = theano.shared(
        np.asarray(a=dev_set[:, offset*max_turn:offset*max_turn + lsize], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    val_set_rx_mask = theano.shared(
        np.asarray(a=dev_set[:, offset*max_turn + lsize:offset*max_turn + 2*lsize], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    val_set_session_mask = theano.shared(np.asarray(a=dev_set[:, -max_turn-1:-1], \
                                                    dtype=theano.config.floatX), \
                                         borrow=True)
    val_set_y = theano.shared(np.asarray(dev_set[:, -1], dtype="int32"),
                              borrow=True)

    test_set_lx = []
    test_set_lx_mask = []
    for i in range(max_turn):
        test_set_lx.append(theano.shared(
            np.asarray(a=test_set[:, offset*i:offset*i + lsize], \
                       dtype=theano.config.floatX), \
                       borrow=True))
        test_set_lx_mask.append(theano.shared(
            np.asarray(a=test_set[:, offset*i + lsize:offset*i + 2*lsize], \
                       dtype=theano.config.floatX), \
                       borrow=True))
    test_set_rx = theano.shared(
        np.asarray(a=test_set[:, offset*max_turn:offset*max_turn + lsize], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    test_set_rx_mask = theano.shared(
        np.asarray(a=test_set[:, offset*max_turn + lsize:offset*max_turn + 2*lsize], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    test_set_session_mask = theano.shared(np.asarray(a=test_set[:, -max_turn-1:-1], \
                                                    dtype=theano.config.floatX), \
                                         borrow=True)
    test_set_y = theano.shared(np.asarray(test_set[:, -1], dtype="int32"), \
                               borrow=True)

    dic = {}
    for i in range(max_turn):
        dic[lx[i]] = train_set_lx[i][index * batch_size:(index + 1) *
                                     batch_size]
        dic[lxmask[i]] = train_set_lx_mask[i][index * batch_size:(index + 1) *
                                              batch_size]
    dic[rx] = train_set_rx[index * batch_size:(index + 1) * batch_size]
    dic[sessionmask] = train_set_session_mask[index * batch_size:(index + 1) *
                                              batch_size]
    dic[rxmask] = train_set_rx_mask[index * batch_size:(index + 1) *
                                    batch_size]
    dic[y] = train_set_y[index * batch_size:(index + 1) * batch_size]

    val_dic = {}
    for i in range(max_turn):
        val_dic[lx[i]] = val_set_lx[i][index * batch_size:(index + 1) *
                                       batch_size]
        val_dic[lxmask[i]] = val_set_lx_mask[i][index *
                                                batch_size:(index + 1) *
                                                batch_size]
    val_dic[rx] = val_set_rx[index * batch_size:(index + 1) * batch_size]
    val_dic[sessionmask] = val_set_session_mask[index *
                                                batch_size:(index + 1) *
                                                batch_size]
    val_dic[rxmask] = val_set_rx_mask[index * batch_size:(index + 1) *
                                      batch_size]
    val_dic[y] = val_set_y[index * batch_size:(index + 1) * batch_size]

    test_dic = {}
    for i in range(max_turn):
        test_dic[lx[i]] = test_set_lx[i][index * batch_size:(index + 1) *
                                         batch_size]
        test_dic[lxmask[i]] = test_set_lx_mask[i][index *
                                                  batch_size:(index + 1) *
                                                  batch_size]
    test_dic[rx] = test_set_rx[index * batch_size:(index + 1) * batch_size]
    test_dic[sessionmask] = test_set_session_mask[index *
                                                  batch_size:(index + 1) *
                                                  batch_size]
    test_dic[rxmask] = test_set_rx_mask[index * batch_size:(index + 1) *
                                        batch_size]
    test_dic[y] = test_set_y[index * batch_size:(index + 1) * batch_size]

    # This is the first RNN.
    sentence2vec = GRU(n_in=word_embedding_size, n_hidden=hiddensize, \
                       n_out=hiddensize, batch_size=batch_size)
    for i in range(max_turn):
        q_embedding.append(sentence2vec(llayer0_input[i], lxmask[i], True))
    r_embedding = sentence2vec(rlayer0_input, rxmask, True)

    # This is the concat/elementwise_produce of the after the first RNN which
    # concat the tenth sentence to the first nine sentences.
    for i in range(max_turn):
        q_embedding_Cat.append(T.concatenate([q_embedding[i], \
                                              q_embedding[-1]], \
                               axis=2))
        q_embedding_Cat_mask.append(lxmask[i])
    r_embedding_Cat = T.concatenate([r_embedding, q_embedding[-1]], axis=2)
    r_embedding_Cat_mask = rxmask
    # This is the self_attention step
    sa = self_attention(n_in=hiddensize * 2)
    for i in range(max_turn):
        q_embedding_self_att.append(T.concatenate([q_embedding_Cat[i], \
                                                   sa(q_embedding_Cat[i], \
                                                       q_embedding_Cat_mask[i])], \
                                                  axis=2))
    r_embedding_self_att = T.concatenate([r_embedding_Cat, \
                                          sa(r_embedding_Cat, \
                                              r_embedding_Cat_mask)], \
                                         axis=2)
    # This is the SRNN
    vec2svec = SGRU(n_in=hiddensize*2, n_hidden=hiddensize, \
                    n_out=hiddensize, batch_size=batch_size)

    for i in range(max_turn):
        q_embedding_self_att_rnn.append(vec2svec(q_embedding_self_att[i], \
                                                 q_embedding_Cat_mask[i], \
                                                 True))
    r_embedding_self_att_rnn = vec2svec(r_embedding_self_att, \
                                        r_embedding_Cat_mask, \
                                        True)

    # This is the CNN with pooling and full-connection
    pooling_layer = ConvSim(rng=rng, n_in=max_l, n_out=session_input_size, \
                            hidden_size=hiddensize, session_size=session_hidden_size, \
                            batch_size=batch_size)
    poolingoutput = []
    for i in range(max_turn):
        poolingoutput.append(pooling_layer(llayer0_input[i], \
                                           rlayer0_input, \
                                           q_embedding_self_att_rnn[i], \
                                           r_embedding_self_att_rnn))

    # This is the second RNN
    session2vec = GRU(n_in=session_input_size, n_hidden=session_hidden_size, \
                      n_out=session_hidden_size, batch_size=batch_size)
    res = session2vec(T.stack(poolingoutput, 1), sessionmask, True)

    # This is the final Attention and put the output to a classifier
    W = theano.shared(ortho_weight(session_hidden_size), borrow=True)
    W2 = theano.shared(glorot_uniform((hiddensize, session_hidden_size)),
                       borrow=True)
    b = theano.shared(value=np.zeros((session_hidden_size, ), dtype='float32'),
                      borrow=True)
    U_s = theano.shared(glorot_uniform((session_hidden_size, 1)), borrow=True)

    final = T.dot(T.tanh(T.dot(res, W) + \
                         T.dot(T.stack(q_embedding_self_att_rnn, 1)[:, :, -1, :], W2) \
                         + b), U_s)
    weight = T.exp(T.max(final, 2)) * sessionmask
    weight2 = weight / T.sum(weight, 1)[:, None]
    final2 = T.sum(res * weight2[:, :, None], 1) + 1e-6

    # This is the classifier
    classifier = LogisticRegression(final2, session_hidden_size, 2, rng)

    # Calculate the cost and updata the param with gradient
    cost = classifier.negative_log_likelihood(y)
    error = classifier.errors(y)
    predict = classifier.predict_prob
    opt = Adam()

    # Make params
    params = classifier.params
    params += sentence2vec.params
    params += session2vec.params
    params += pooling_layer.params
    params += [Words, W, b, W2, U_s]
    params += vec2svec.params
    params += sa.params

    # Make updater
    grad_updates = opt.Adam(cost=cost, params=params, lr=learning_rate)

    # The training step
    train_model = theano.function([index], cost, updates=grad_updates, \
                                  givens=dic, on_unused_input='ignore')
    val_model = theano.function([index], [cost, error], givens=val_dic, \
                                on_unused_input='ignore')
    best_dev = 1.
    n_train_batches = datasets[0].shape[0] / batch_size
    for i in xrange(n_epochs):
        cost_all = 0
        total = 0.
        for minibatch_index in np.random.permutation(range(n_train_batches)):
            batch_cost = train_model(minibatch_index)
            total = total + 1
            cost_all = cost_all + batch_cost
            if total % val_frequency == 0:
                sf.write('epcho %d, num %d, train_loss %f' %
                         (i, total, cost_all / total))
                sf.write('\n')
                sf.flush()
                cost_dev = 0
                errors_dev = 0
                j = 0
                for minibatch_index in xrange(datasets[1].shape[0] /
                                              batch_size):
                    tcost, terr = val_model(minibatch_index)
                    cost_dev += tcost
                    errors_dev += terr
                    j = j + 1
                cost_dev = cost_dev / j
                errors_dev = errors_dev / j
                if cost_dev < best_dev:
                    best_dev = cost_dev
                    save_params(params, model_name + 'dev')
                sf.write("epcho %d, num %d, dev_loss %f" %
                         (i, total, cost_dev))
                sf.write('\n')
                sf.write("epcho %d, num %d, dev_accuracy %f" %
                         (i, total, 1 - errors_dev))
                sf.write('\n')
                sf.flush()
        cost_all = cost_all / n_train_batches
        sf.write("epcho %d loss %f" % (i, cost_all))
        sf.write('\n')
        sf.flush()
Example #35
0
    def apply(self, sentence, sentence_mask, use_noise=1):
        n_emb_lstm = self.n_emb_lstm
        n_emb_struct = self.n_emb_struct
        n_emb_share = self.n_emb_share

        src = sentence[:-1]
        src_mask = sentence_mask[:-1]
        tgt = sentence[1:]
        tgt_mask = sentence_mask[1:]

        if False: #(share only part of embedding)
            n_emb_all = n_emb_lstm + n_emb_struct - n_emb_share
            emb_all_range = T.arange(n_emb_all)
            emb_lstm_range = T.arange(n_emb_lstm)
            emb_struct_range = T.arange(n_emb_lstm - n_emb_share, n_emb_all)

            table = lookup_table(n_emb_all, self.vocab_size, name='Wemb')
            state_below = table.apply(src, emb_all_range)
            state_below_lstm = table.apply(src, emb_lstm_range)
            state_below_struct = table.apply(src, emb_struct_range)
            self.layers.append(table)

            rnn = SLSTM(n_emb_lstm, n_emb_struct, n_emb_share, self.n_hids, self.n_shids, self.n_structs)
            #rnn = LSTM(self.n_in, self.n_hids)
            hiddens = rnn.merge_out(state_below, state_below_lstm, state_below_struct, src_mask)
            self.layers.append(rnn)

        elif True: # use rnn_pyramid
            emb_lstm_range = T.arange(n_emb_lstm)
            table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb')
            state_below = table.apply(src, emb_lstm_range)
            self.layers.append(table)

            if self.dropout < 1.0:
                state_below = dropout_layer(state_below, use_noise, self.dropout)

            rnn = rnn_pyramid_layer(n_emb_lstm, self.n_hids)
            hiddens, cells, structs = rnn.apply(state_below, src_mask)
            self.layers.append(rnn)
            self.structs = structs

        else: # share all embedding
            emb_lstm_range = T.arange(n_emb_lstm)
            table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb')
            state_below = table.apply(src, emb_lstm_range)
            self.layers.append(table)

            if self.dropout < 1.0:
                state_below = dropout_layer(state_below, use_noise, self.dropout)

            rnn = LSTM(n_emb_lstm, self.n_hids)
            hiddens, cells = rnn.apply(state_below, src_mask)
            #hiddens = rnn.merge_out(state_below, src_mask)
            self.layers.append(rnn)

            if self.dropout < 1.0:
                hiddens = dropout_layer(hiddens, use_noise, self.dropout)

            rnn1 = LSTM(self.n_hids, self.n_hids)
            hiddens, cells = rnn1.apply(hiddens, src_mask)
            #hiddens = rnn.merge_out(state_below, src_mask)
            self.layers.append(rnn1)

            maxout = maxout_layer()
            states = T.concatenate([state_below, hiddens], axis=2)
            hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask)
            self.layers.append(maxout)

            #rnng = LSTM(n_emb_lstm, self.n_hids)
            #hiddens, cells = rnn.apply(state_below, src_mask)
            #hiddensg = rnng.merge_out(state_below, src_mask)
            #self.layers.append(rnng)

            if self.dropout < 1.0:
                hiddens = dropout_layer(hiddens, use_noise, self.dropout)

            #chunk = chunk_layer(n_lstm_in + n_lstm_out, n_lstm_out, n_chunk_out, 6)
            n_emb_hid = n_emb_lstm + self.n_hids
            emb_hid = T.concatenate([state_below, hiddens], axis=2)
            #chunk = chunk_layer(self.n_hids, self.n_hids, self.n_hids, self.n_structs)
            #hiddens = chunk.merge_out(hiddens, hiddens, src_mask, merge_how="for_struct",\
            #        state_below_other=state_below, n_other=n_emb_lstm)
            chunk = chunk_layer(n_emb_hid, self.n_hids, self.n_hids, self.n_structs)
            hiddens = chunk.merge_out(emb_hid, hiddens, src_mask, merge_how="for_struct",\
                    state_below_other=None, n_other=0)
            #chunk = chunk_layer(self.n_hids, self.n_hids, self.n_hids, self.n_structs)
            #hiddens = chunk.merge_out(hiddens, hiddensg, src_mask, merge_how="both",\
            #        state_below_other=state_below, n_other=n_emb_lstm)
            self.layers.append(chunk)

        # apply dropout
        if self.dropout < 1.0:
            # dropout is applied to the output of maxout in ghog
            hiddens = dropout_layer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Example #36
0
def main():
    train_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))
    ])

    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))
    ])

    #TODO adjust batch size
    batch_size = 128
    nb_epochs = 100
    lr = 0.001

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # MNIST Dataset
    trainset = torchvision.datasets.MNIST(root='./data',
                                          train=True,
                                          download=True,
                                          transform=transforms.ToTensor())
    testset = torchvision.datasets.MNIST(root='./data',
                                         train=False,
                                         transform=transforms.ToTensor())

    # CIFAR10 Dataset
    #trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
    #testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)

    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=2)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=2)

    num_classes = 10

    model_names = ['MLP', 'LR', 'vgg16_bn', 'resnet18']
    optim_names = ['sgd', 'adam', 'lbfgs']

    for model_name in model_names:
        for opt in optim_names:
            ## !!! TODO every iter should create a new model !!!
            print("creating model: ", model_name)
            print("using optimizer: ", opt)

            if model_name == 'vgg16_bn':
                model = models.vgg16_bn()
                model.classifier[6] = nn.Linear(4096, num_classes)
            elif model_name == 'resnet18':
                model = models.resnet18()
                model.fc = nn.Linear(512, num_classes)
            elif model_name == 'LR':
                model = LogisticRegression(
                    784, num_classes)  # 3072 for CIFAR10, 784 for MNIST
            elif model_name == 'MLP':
                model = MLP(1 * 28 * 28)  # 3072 for CIFAR10, 784 for MNIST
            model.to(device)
            train(model_name,
                  model,
                  trainloader,
                  testloader,
                  device,
                  opt,
                  nb_epochs,
                  lr=lr)
Example #37
0
    ####################################
    # Read and preprocess data from files
    ####################################
    df_train = pd.read_csv(fp_train, skipinitialspace=True)
    df_test = pd.read_csv(fp_test, skipinitialspace=True)

    df_train['income'].replace('<=50K', 0, inplace=True)
    df_train['income'].replace('>50K', 1, inplace=True)

    Xtrain, ytrain, Xtest = preprocess(df_train, df_test, features)

    ####################################
    # Train the estimator and predict test data
    ####################################
    regr = []
    if estimator == 'logistic':
        regr = LogisticRegression().fit(Xtrain, ytrain)
        ypred = np.around(regr.predict(Xtest)).astype(int)
    elif estimator == 'generative':
        regr = NaiveBayes().fit(Xtrain, ytrain)
        ypred = regr.predict(Xtest)

    ####################################
    # Write the result to file
    ####################################
    df_pred = pd.DataFrame()
    df_pred['id'] = np.arange(1, len(ypred) + 1)
    df_pred['label'] = ypred
    df_pred.to_csv(fp_ans, index=False)
Example #38
0
    def apply_morph_attention(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1):
        """
            sentence : sentence * batch
            sentence_morph : sentence * batch * morph
            src_morph_emb : sentence * batch * morph * n_emb_morph
            1. word morph lookup -> dropout -> attention
            2. lstm -> dropout
            3. lstm -> maxout -> dropout
            4. logistic
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]
        src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1]

        #word lookup table
        emb_lstm_range = T.arange(self.n_emb_lstm)
        table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb')
        src_emb = table.apply(src, emb_lstm_range)
        self.layers.append(table)

        #morph lookup table
        emb_morph_range = T.arange(self.n_emb_morph)
        table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb')
        src_morph_emb = table_morph.apply(src_morph, emb_morph_range)
        self.layers.append(table_morph)

        if self.dropout < 1.0:
            src_emb = DropoutLayer(src_emb, use_noise, self.dropout)
            src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout)

        lstm_att_1st = LstmMorphAttention(self.n_hids, self.n_hids, self.n_hids)
        hiddens, cells = lstm_att_1st.apply(src_emb, src_morph_emb, src_mask)
        self.layers.append(lstm_att_1st)
        #print len(hiddens) , hiddens[0].ndim

        rnn_layer_2rd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_2rd)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        rnn_layer_3nd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_3nd.apply(hiddens, src_mask)
        self.layers.append(rnn_layer_3nd)

        if True:
            maxout = MaxoutLayer()
            #src_emb : sentence * batch * n_emb
            #hiddens : sentence * batch * hids
            states = T.concatenate([src_emb, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)
        for layer in self.layers:
            self.params.extend(layer.params)
        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
test_dataset = TwoClassCifar10(config.root, train=False)
train_dataloader = data.DataLoader(train_dataset,
                                   config.batch_size,
                                   shuffle=True,
                                   num_workers=2)
test_dataloader = data.DataLoader(test_dataset,
                                  config.batch_size,
                                  shuffle=False,
                                  num_workers=2)
print(f"{datetime.now().ctime()} - Finish Loading Dataset")

print(
    f"{datetime.now().ctime()} - Start Creating Net, Criterion, Optimizer and Scheduler..."
)
conv_net = ConvNet(config.input_channel, 2)
lr_model = LogisticRegression(config.cifar10_input_size)
conv_criterion = nn.CrossEntropyLoss()
lr_criterion = nn.BCEWithLogitsLoss()
conv_optimizer = optim.SGD(conv_net.parameters(),
                           config.lr,
                           momentum=config.momentum,
                           weight_decay=config.weight_decay)
lr_optimizer = optim.SGD(lr_model.parameters(),
                         config.lr,
                         momentum=config.momentum,
                         weight_decay=config.weight_decay)
conv_scheduler = optim.lr_scheduler.CosineAnnealingLR(conv_optimizer,
                                                      len(train_dataloader) *
                                                      config.epochs,
                                                      eta_min=config.eta_min)
lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(lr_optimizer,
Example #40
0
    def apply(self, sentence, sentence_mask, use_noise=1):
        n_emb_lstm = self.n_emb_lstm
        n_emb_struct = self.n_emb_struct
        n_emb_share = self.n_emb_share

        src = sentence[:-1]
        src_mask = sentence_mask[:-1]
        tgt = sentence[1:]
        tgt_mask = sentence_mask[1:]

        emb_lstm_range = T.arange(n_emb_lstm)
        table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb')
        state_below = table.apply(src, emb_lstm_range)
        self.layers.append(table)

        if self.dropout < 1.0:
            state_below = dropout_layer(state_below, use_noise, self.dropout)

        rnn = LSTM(n_emb_lstm, self.n_hids)
        hiddens, cells = rnn.apply(state_below, src_mask)
        #hiddens = rnn.merge_out(state_below, src_mask)
        self.layers.append(rnn)

        if True:
            if self.dropout < 1.0:
                hiddens = dropout_layer(hiddens, use_noise, self.dropout)

            rnn1 = LSTM(self.n_hids, self.n_hids)
            hiddens, cells = rnn1.apply(hiddens, src_mask)
            #hiddens = rnn.merge_out(state_below, src_mask)
            self.layers.append(rnn1)

        if True:
            if self.dropout < 1.0:
                hiddens = dropout_layer(hiddens, use_noise, self.dropout)

            rnnp = rnn_pyramid_layer(self.n_hids, n_emb_lstm, self.n_hids)
            hiddens,cells,structs,pyramid = rnnp.apply(hiddens, state_below, src_mask)
            self.layers.append(rnnp)
            #self.structs = structs
            self.rnn_len = rnnp.n_steps
        self.sent_len = sentence.shape[0]

        if True:
            maxout = maxout_layer()
            states = T.concatenate([state_below, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = dropout_layer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Example #41
0
def test_final():
    b_time = datetime.datetime.now()
    print('Begining reading data')
    DATA_TRAIN_PATH = get_filepath('train')
    y, tX, ids = load_csv_data(DATA_TRAIN_PATH)
    print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() -
                                                    b_time).total_seconds()))

    data, _, _ = standardize(tX)

    nb_pc = 5
    print("test the PCA with {} elements".format(nb_pc))
    pcs, pc_data = pca_transform(data, nb_pc, concatenate=False)

    print("get interactions")
    interaction = interactions(data, range(0, 10))
    interaction, _, _ = standardize(interaction)

    print("select first 10 data entry with pc data")
    data = np.c_[data[:, 0:10], pc_data]
    data = np.c_[data, interaction]
    # Begin the least square sgd
    e_time = datetime.datetime.now()

    print("Finish data reading in {s} seconds".format(
        s=(e_time - b_time).total_seconds()))
    # train, valid = split_train_valid(0.8, data, labels=y)
    logistic = LogisticRegression((y, data),
                                  regularizer='Lasso',
                                  regularizer_p=0.)
    result = logistic.cross_validation(4, [0.],
                                       'regularizer_p',
                                       lr=0.1,
                                       batch_size=32,
                                       max_iters=1200,
                                       early_stop=400)
    weight = result[0]

    print("loading the test set")
    _, test_data, test_ids = load_test_data(clean=False)
    # Feature transform
    data, _, _ = standardize(test_data)
    nb_pc = 5
    print("test the PCA with {} elements".format(nb_pc))
    pcs, pc_data = pca_transform(data, nb_pc, concatenate=False)

    print("get interactions")
    interaction = interactions(data, range(0, 10))
    interaction, _, _ = standardize(interaction)
    print("select first 10 data entry with pc data")
    data = np.c_[data[:, 0:10], pc_data]
    data = np.c_[data, interaction]
    # Begin the least square sgd
    e_time = datetime.datetime.now()
    print("Finish data reading in {s} seconds".format(
        s=(e_time - b_time).total_seconds()))
    y_pred = []
    for w in weight:
        _y_pred = logistic.__call__(data, w)
        y_pred += _y_pred
    y_pred = np.average(y_pred)
    y_pred[np.where(y_pred <= 0.5)] = -1
    y_pred[np.where(y_pred > 0.5)] = 1
    output_path = get_dataset_dir() + '/second_submission.csv'
    create_csv_submission(test_ids, y_pred, output_path)
Example #42
0
labels = unpickle('../data/meta')
interesting_coarse_labels = [0, 1]  # Aquatic mammals and Fish

train = []
y = []
test = []
y_test = []
for i in range(len(train_data[b'coarse_labels'])):
    for j in interesting_coarse_labels:
        if train_data[b'coarse_labels'][i] == j:
            train.append(train_data[b'data'][i])
            y.append(j)
            break

for i in range(len(test_data[b'coarse_labels'])):
    for j in interesting_coarse_labels:
        if test_data[b'coarse_labels'][i] == j:
            test.append(test_data[b'data'][i])
            y_test.append(j)
            break

train = np.array(train)
y = np.array(y)
test = np.array(test)
y_test = np.array(y_test)

weight_matrix, losses = LogisticRegression.train(train, y,
                                                 iteration=1, learning_rate=0.1)
LogisticRegression.accuracy(weight_matrix, test, y_test)

Example #43
0
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import dataset.MNIST.mnist as mnist
import model.LogisticRegression as LR

print("start download minst")
mnist_data_sets = mnist.read_data_sets("../data/mnist", one_hot=True)
print("start lr")

lr = LR.LogisticRegression()
lr.build_model()
batch_xs, batch_ys = mnist_data_sets.train.next_batch(55000)
lr.train(batch_xs, batch_ys)
batch_xs, batch_ys = mnist_data_sets.test.images, mnist_data_sets.test.labels
lr.test(batch_xs, batch_ys)