def __init__(self, enc_nhids=1000, dec_nhids=1000, enc_embed=620, dec_embed=620, src_vocab_size=30000, trg_vocab_size=30000, **kwargs): self.src_lookup_table = Lookup_table(enc_embed, src_vocab_size, prefix='src_lookup_table') self.trg_lookup_table = Lookup_table(dec_embed, trg_vocab_size, prefix='trg_lookup_table') self.encoder = BiGRU(enc_embed, enc_nhids, **kwargs) self.decoder = Decoder(dec_embed, dec_nhids, c_hids=enc_nhids*2, **kwargs) self.logistic = LogisticRegression(kwargs.get('n_out', dec_nhids), trg_vocab_size, prefix='logistic', drop_rate=kwargs['dropout']) self.params = self.src_lookup_table.params + self.trg_lookup_table.params + self.encoder.params + self.decoder.params \ + self.logistic.params self.tparams = OrderedDict([(param.name, param) for param in self.params])
def test_k_fold_logistic(): np.set_printoptions(precision=4) b_time = datetime.datetime.now() print('Begining reading data') DATA_TRAIN_PATH = get_filepath('train') y, tX, ids = load_csv_data(DATA_TRAIN_PATH) print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() - b_time).total_seconds())) tX = remove_dimensions(tX) tX = standardize(tX) e_time = datetime.datetime.now() print("Finish data reading in {s} seconds".format( s=(e_time - b_time).total_seconds())) # Lambda space lambdas = np.logspace(-3, 1, 10) logistic = LogisticRegression((y, tX[0]), regularizer='Lasso', regularizer_p=0.1) best_lambda, (tr_err, te_err) = logistic.cross_validation( 5, lambdas, lambda_name='regularizer_p', max_iters=6000) print('best lambda {}'.format(best_lambda)) save_path = get_plot_path(test_k_fold_logistic.__name__) tr_err = np.array(tr_err) te_err = np.array(te_err) np.save(save_path + "tr_err", tr_err) np.save(save_path + "te_err", te_err)
def __init__(self, enc_nhids=1000, dec_nhids=1000, enc_embed=620, dec_embed=620, src_vocab_size=30000, trg_vocab_size=30000, **kwargs): self.lr_in = kwargs.get('n_out', dec_nhids) self.src_lookup_table = Lookup_table(enc_embed, src_vocab_size, prefix='src_lookup_table') self.trg_lookup_table = Lookup_table(dec_embed, trg_vocab_size, prefix='trg_lookup_table') self.encoder = BiGRU(enc_embed, enc_nhids, **kwargs) # src_nhids*2 corresponds the last dimension of encoded state self.decoder = Decoder(dec_embed, dec_nhids, c_hids=enc_nhids * 2, **kwargs) # the output size of decoder should be same with lr_in if no n_out # defined self.logistic = LogisticRegression(self.lr_in, trg_vocab_size, prefix='logistic', **kwargs) self.params = self.src_lookup_table.params + self.trg_lookup_table.params + \ self.encoder.params + self.decoder.params + self.logistic.params self.tparams = OrderedDict([(param.name, param) for param in self.params]) self.use_mv = kwargs.get('use_mv', 0)
def test_compute_z(): ''' (5 points) compute_z''' x = th.tensor([ [1., 6.], # the first sample in the mini-batch [2., 5.], # the second sample in the mini-batch [3., 4.] ]) # the third sample in the mini-batch m = LogisticRegression(2) # create a logistic regression object m.layer.weight.data = th.tensor([[-0.1, 0.1]]) m.layer.bias.data = th.tensor([0.1]) z = compute_z(x, m) assert type(z) == th.Tensor z_true = [ [0.6], # linear logit for the first sample in the mini-batch [0.4], # linear logit for the second sample in the mini-batch [0.2] ] # linear logit for the third sample in the mini-batch assert np.allclose(z.data, z_true, atol=1e-2) assert z.requires_grad L = th.sum(z) # compute the sum of all elements in z L.backward() # back propagate gradient to w and b assert np.allclose(m.layer.weight.grad, [[6, 15]], atol=0.1) assert np.allclose(m.layer.bias.grad, [3], atol=0.1) n = np.random.randint(2, 5) # batch_size p = np.random.randint(2, 5) # the number of input features x = th.randn(n, p) m = LogisticRegression(p) # create a logistic regression object z = compute_z(x, m) assert np.allclose(z.size(), (n, 1))
def test_update_parameters(): ''' (5 points) update_parameters''' m = LogisticRegression(3) # create a logistic regression object m.layer.weight.data = th.tensor([[0.5, 0.1, -0.2]]) m.layer.bias.data = th.tensor([0.2]) # create a toy loss function: the sum of all elements in w and b L = m.layer.weight.sum() + m.layer.bias.sum() # create an optimizer for w and b with learning rate = 0.1 optimizer = th.optim.SGD(m.parameters(), lr=0.1) # (step 1) back propagation to compute the gradients L.backward() assert np.allclose(m.layer.weight.grad, np.ones((3, 1)), atol=1e-2) assert np.allclose(m.layer.bias.grad, 1, atol=1e-2) # now perform gradient descent using SGD update_parameters(optimizer) # lets check the new values of the w and b assert np.allclose(m.layer.weight.data, [[0.4, 0., -0.3]], atol=1e-2) assert np.allclose(m.layer.bias.data, [0.1], atol=1e-2) # (step 2) back propagation again to compute the gradients L.backward() update_parameters(optimizer) assert np.allclose(m.layer.weight.data, [[0.3, -0.1, -0.4]], atol=1e-2) assert np.allclose(m.layer.bias.data, [0.], atol=1e-2) # (step 3) back propagation again to compute the gradients L.backward() update_parameters(optimizer) assert np.allclose(m.layer.weight.data, [[0.2, -0.2, -0.5]], atol=1e-2) assert np.allclose(m.layer.bias.data, [-0.1], atol=1e-2)
def apply_rnnlm(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, sentence_char, sentence_char_mask, use_noise=1): src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1] src_char, src_char_mask = sentence_char[:-1], sentence_char_mask[:-1] emb_lstm_range = T.arange(self.n_emb_lstm) #word lookup table table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb') src_emb = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: src_emb = DropoutLayer(src_emb, use_noise, self.dropout) rnn_layer_1st = NormalRNN(self.n_emb_lstm, self.n_hids) hiddens = rnn_layer_1st.apply(src_emb, src_mask) self.layers.append(rnn_layer_1st) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def apply_morph_only_rnn_gru(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1): """ sentence : sentence * batch sentence_morph : sentence * batch * morph 1. morph lookup -> dropout 2. MorphStructRNN 3. lstm -> dropout 4. lstm -> maxout -> dropout 5. logistic """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1] #morph lookup table emb_morph_range = T.arange(self.n_emb_morph) table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb') src_morph_emb = table_morph.apply(src_morph, emb_morph_range) self.layers.append(table_morph) if self.dropout < 1.0: src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout) morph_layer_1st = MorphStructRNN(self.n_emb_morph, self.n_hids, 'gru') hiddens = morph_layer_1st.apply(src_morph_emb, src_morph_mask) self.layers.append(morph_layer_1st) rnn_layer_2rd = LSTM(self.n_hids , self.n_hids) hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_2rd) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) rnn_layer_3nd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_3nd.apply(hiddens , src_mask) self.layers.append(rnn_layer_3nd) if True: maxout = MaxoutLayer() src_morph_merge_emb = src_morph_emb.sum(2) src_morph_mask = src_morph_mask.max(axis=2) #src_morph_merge_emb : sentence * batch * n_emb_morph states = T.concatenate([src_morph_merge_emb, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, self.n_emb_morph + self.n_hids, self.n_hids, src_morph_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def test_complex(): b_time = datetime.datetime.now() print('Begining reading data') DATA_TRAIN_PATH = get_filepath('train') y, tX, ids = load_csv_data(DATA_TRAIN_PATH) print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() - b_time).total_seconds())) tX, _, _ = standardize(tX, intercept=False) complex_tx, _, _ = compose_complex_features(tX, intercept=True, interaction=True, log=True, sqrt=False, pca=True) test_bias(y) logistic = LogisticRegression((y, complex_tx), regularizer="Lasso", regularizer_p=0.5) # result = logistic.train(lr=0.1, batch_size=32, max_iters=6000) result = logistic.cross_validation(4, [0.5], 'regularizer_p', lr=0.1, batch_size=32, max_iters=6000, early_stop=1000)
def apply(self, facts, facts_mask, question, question_mask, y): table = lookup_table(self.n_in, self.vocab_size) self.params += table.params facts_encoder = auto_encoder(facts, facts_mask, self.vocab_size, self.n_in, self.n_hids, table=table) questions_encoder = auto_encoder(question, question_mask, self.vocab_size, self.n_in, self.n_hids, table=table) self.params += facts_encoder.params self.params += questions_encoder.params facts_rep = facts_encoder.output questions_rep = questions_encoder.output for _ in range(self.n_layer): questions_rep = self.interact(facts_rep, questions_rep) logistic_layer = LogisticRegression(questions_rep, self.n_hids, self.n_label) self.params += logistic_layer.params self.cost = logistic_layer.cost(y)/y.shape[0] self.decoder_cost = facts_encoder.cost + questions_encoder.cost self.errors = logistic_layer.errors(y) return self.cost, self.decoder_cost
def apply_morph_memory(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1): """ """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1] emb_lstm_range = T.arange(self.n_emb_lstm) #word read lookup table table_read = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb_read', rng=1234) src_read = table_read.apply(src, emb_lstm_range) self.layers.append(table_read) #word write lookup table table_write = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb_write', rng=4321) src_write = table_write.apply(src, emb_lstm_range) self.layers.append(table_write) #morph read lookup table emb_morph_range = T.arange(self.n_emb_morph) table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb_read', rng=4321) src_morph_emb = table_morph.apply(src_morph, emb_morph_range) self.layers.append(table_morph) if self.dropout < 1.0: src_read = DropoutLayer(src_read, use_noise, self.dropout) src_write = DropoutLayer(src_write, use_noise, self.dropout) src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout) lstm_att_1st = LstmMorphAttention(self.n_hids, self.n_hids, self.n_hids) src_read, cells = lstm_att_1st.apply(src_read, src_morph_emb, src_mask) self.layers.append(lstm_att_1st) lstm_att_2st = LstmMorphAttention(self.n_hids, self.n_hids, self.n_hids) src_write, cells = lstm_att_2st.apply(src_write, src_morph_emb, src_mask) self.layers.append(lstm_att_2st) #memory_layer_1st = MorphMerge2(self.n_emb_lstm, self.n_hids) memory_layer_1st = MemoryRNN(self.n_emb_lstm, self.n_hids) hiddens = memory_layer_1st.apply(src_read, src_mask , src_write) self.layers.append(memory_layer_1st) #rnn_layer_2rd = MorphMerge2(self.n_hids, self.n_hids) memory_layer_2rd = MemoryRNN(self.n_hids, self.n_hids) hiddens = memory_layer_2rd.apply(src_read, src_mask, src_write, hiddens) self.layers.append(memory_layer_2rd) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def apply_normal(self, sentence, sentence_mask, use_noise=1, use_maxout=True): """ sentence : sentence * batch 1. word lookup -> dropout 2. lstm -> dropout 3. lstm -> maxout -> dropout 4. logistic """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] emb_lstm_range = T.arange(self.n_emb_lstm) #word lookup table table = DynamicMixLookupTable(self.n_emb_lstm, **self.cfig) #table = DynamicLookupTable(self.n_emb_lstm, **self.cfig) #table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb') src_emb = table.apply(src, emb_lstm_range) self.src_emb = src_emb self.layers.append(table) if self.dropout < 1.0: src_emb = DropoutLayer(src_emb, use_noise, self.dropout) rnn_layer_1st = LSTM(self.n_emb_lstm, self.n_hids) hiddens , cells = rnn_layer_1st.apply(src_emb, src_mask) self.layers.append(rnn_layer_1st) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) rnn_layer_2rd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_2rd) if use_maxout: maxout = MaxoutLayer() states = T.concatenate([src_emb, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) #hier_softmax_layer = HierarchicalSoftmax(hiddens, self.n_hids, self.vocab_size) #self.layers.append(hier_softmax_layer) #self.cost = hier_softmax_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--dataset", type=str, default="usa", help="airport dataset to run experiment on") parser.add_argument("--embed_dim", type=int, default=128, help="struc2vec output embedding dimesnion") parser.add_argument("--epochs", type=int, default=10, help="number of epochs") parser.add_argument("--lr", type=float, default=0.01, help="learning rate") parser.add_argument("--l2", type=float, default=0.1, help="L2 regularization") args = parser.parse_args() dataset = args.dataset embed_dim = args.embed_dim epochs = args.epochs lr_rate = args.lr l2 = args.l2 num_classes = 4 test_accuracy = [] for i in range(10): print("Experiment {}".format(i)) model = LogisticRegression(embed_dim, num_classes) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=lr_rate, weight_decay=l2) feat_data, labels, train_idx, test_idx = load_dataset( dataset, embed_dim) train(feat_data, labels, train_idx, model, criterion, optimizer, epochs) test_acc = evaluate(feat_data, labels, test_idx, model, criterion) test_accuracy.append(test_acc) print("Average performance: {}, standard deviation: {}".format( np.average(test_accuracy), np.std(test_accuracy)))
def test_data_model(): title = 'complex_full_before_ddl_interactions_full' print("Base line testing for model " + title) b_time = datetime.datetime.now() print('Beginning reading data') DATA_TRAIN_PATH = get_filepath('train') y, tX, ids = load_csv_data(DATA_TRAIN_PATH) print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() - b_time).total_seconds())) _, test_x, test_ids = load_test_data(clean=False) data = compose_interactions_for_transforms(tX) t_data = compose_interactions_for_transforms(test_x) # Test 1 Ridge 0.1 logistic = LogisticRegression((y, data), regularizer='Ridge', regularizer_p=0.1) weight = logistic.train(lr=0.01, decay=0.5, max_iters=2000, early_stop=1000, decay_intval=100) # weight, _, _ = logistic.cross_validation(4, [0.1, 0.5, 0.05], 'regularizer_p', max_iters=2000) pred_label = predict_labels(weight, t_data) create_csv_submission( test_ids, pred_label, get_dataset_dir() + '/submission/removed_outlier_{}.csv'.format(title + 'Ridge01')) # Test 2 Lasso 0.1 logistic = LogisticRegression((y, data), regularizer='Lasso', regularizer_p=0.1) weight = logistic.train(lr=0.01, decay=0.5, max_iters=2000, early_stop=1000, decay_intval=100) # weight, _, _ = logistic.cross_validation(4, [0.1, 0.5, 0.05], 'regularizer_p', max_iters=2000) pred_label = predict_labels(weight, t_data) create_csv_submission( test_ids, pred_label, get_dataset_dir() + '/submission/removed_outlier_{}.csv'.format(title + '-Lasso0.1')) # Test 3 No penalized logistic = LogisticRegression((y, data)) weight = logistic.train(lr=0.01, decay=0.5, max_iters=2000, early_stop=1000) # weight, _, _ = logistic.cross_validation(4, [0.1, 0.5, 0.05], 'regularizer_p', max_iters=2000) pred_label = predict_labels(weight, t_data) create_csv_submission( test_ids, pred_label, get_dataset_dir() + '/submission/removed_outlier_{}.csv'.format(title))
def apply_morph_only_memory(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1): """ """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1] #morph read lookup table emb_morph_range = T.arange(self.n_emb_morph) table_morph_read = LookupTable(self.n_emb_morph, self.morph_size, name='Memb_read', rng=1234) src_morph_read = table_morph_read.apply(src_morph, emb_morph_range) self.layers.append(table_morph_read) #morph write lookup table table_morph_write = LookupTable(self.n_emb_lstm, self.morph_size, name='Memb_write', rng=4321) src_morph_write = table_morph_write.apply(src_morph, emb_morph_range) self.layers.append(table_morph_write) if self.dropout < 1.0: src_morph_read = DropoutLayer(src_morph_read, use_noise, self.dropout) src_morph_write = DropoutLayer(src_morph_write, use_noise, self.dropout) morph_layer_1st = MorphStructRNN(self.n_emb_morph, self.n_hids, 'gru') src_read = morph_layer_1st.apply(src_morph_read, src_morph_mask) self.layers.append(morph_layer_1st) morph_layer_2rd = MorphStructRNN(self.n_emb_morph, self.n_hids, 'gru') src_write = morph_layer_2rd.apply(src_morph_write, src_morph_mask) self.layers.append(morph_layer_2rd) memory_layer_1st = MemoryRNN(self.n_emb_lstm, self.n_hids) hiddens = memory_layer_1st.apply(src_read, src_mask , src_write) self.layers.append(memory_layer_1st) memory_layer_2rd = MemoryRNN(self.n_hids, self.n_hids) hiddens = memory_layer_2rd.apply(src_read, src_mask, src_write, hiddens) self.layers.append(memory_layer_2rd) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def test_draw(): """ Draw balanced sample, but result worse result. """ b_time = datetime.datetime.now() print('Begining reading data') DATA_TRAIN_PATH = get_filepath('train') y, tX, ids = load_csv_data(DATA_TRAIN_PATH) print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() - b_time).total_seconds())) # data, _, _ = standardize(tX) # test_bias(y) # nb_pc = 5 # print("test the PCA with {} elements".format(nb_pc)) # pcs, pc_data = pca_transform(data, nb_pc, concatenate=False) # # print("get interactions") # interaction = interactions(data, range(0, 10)) # interaction, _, _ = standardize(interaction) # print("select first 10 data entry with pc data") # data = np.c_[data[:, 0:10], pc_data] # data = np.c_[data, interaction] # # Begin the least square sgd # e_time = datetime.datetime.now() # print("Finish data reading in {s} seconds". # format(s=(e_time - b_time).total_seconds())) data, _, _ = compose_complex_features_further(tX, intercept=True, interaction=True, log=True, sqrt=True, power=True, pca=True) train, valid = draw_balanced_subsample(y, data, trainsize=6000) # t_data, _, _ = compose_complex_features_further(test_x, intercept=True, # interaction=True, # log=True, # sqrt=True, # power=True, # pca=True) logistic = LogisticRegression(train=train, validation=valid, regularizer='Lasso', regularizer_p=0.5) result = logistic.train(lr=0.01, decay=0.5, early_stop=400, max_iters=2000) print(result)
def apply_model(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1): """ sentence : sentence * batch sentence_morph : sentence * batch * morph """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1] src_emb = lookup_layer('word',src) #src_morph_emb : sentence * batch * morph * n_emb_morph #src_morph_emb = lookup_layer('morph',src) if self.dropout < 1.0: src_emb = DropoutLayer(src_emb, use_noise, self.dropout) rnn_layer_1rd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_1rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_1rd) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) rnn_layer_2rd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_2rd) if True: maxout = MaxoutLayer() #src_emb : sentence * batch * n_emb #hiddens : sentence * batch * hids states = T.concatenate([src_emb, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def train(data_loader, p, alpha=0.001, n_epoch=100): m = LogisticRegression(p) # initialize the model optimizer = th.optim.SGD(m.parameters(), lr=alpha) # create an SGD optimizer for _ in range(n_epoch): # iterate through the dataset n_epoch times for mini_batch in data_loader: # iterate through the dataset, with one mini-batch of random training samples (x,y) at a time x=mini_batch[0] # the feature vectors of the data samples in a mini-batch y=mini_batch[1] # the labels of the samples in a mini-batch ######################################### ## INSERT YOUR CODE HERE (5 points) L = compute_L(compute_z(x, m), y) L.backward() update_parameters(optimizer) ######################################### return m #----------------- '''
def main(): """ Train the model and print progress. """ data = gather_data() model = LogisticRegression(N_FEATS) optimizer = torch.optim.Adam(model.parameters()) for iteration in range(EPOCHS): print("Epoch {:d}".format(iteration + 1)) # Shuffle data. random.shuffle(data) total_loss = 0 running_loss = 0 n_examples = len(data) # Loop through examples in data. for i, example in enumerate(data): inp, tgt = example # Zero out the gradient. model.zero_grad() # Make a forward pass, i.e. compute the logits. logits = model(inp) loss = nn.functional.binary_cross_entropy_with_logits(logits, tgt) # Compute gradient and take step. loss.backward() optimizer.step() total_loss += loss running_loss += loss # Print progress. if (i + 1) % LOG_EVERY == 0: guess = logits[0].item() > 0 actual = tgt[0].item() == 1 correct = "✓" if guess == actual else "✗ ({:})".format(actual) print("({:d} / {:d}) Loss: {:.5f}".format( i + 1, n_examples, running_loss)) print(" => {:} {:}".format(guess, correct)) running_loss = 0 print("Epoch loss: {:f}\n".format(total_loss))
def apply(self, sentence, sentence_mask, use_noise=1): n_emb_lstm = self.n_emb_lstm src = sentence[:-1] src_mask = sentence_mask[:-1] tgt = sentence[1:] tgt_mask = sentence_mask[1:] emb_lstm_range = T.arange(n_emb_lstm) table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: state_below = dropout_layer(state_below, use_noise, self.dropout) rnn = LSTM(n_emb_lstm, self.n_hids) hiddens , cells = rnn.apply(state_below, src_mask) self.layers.append(rnn) #if self.dropout < 1.0: # hiddens = dropout_layer(hiddens, use_noise, self.dropout) rnn2 = FLSTM(self.n_hids, self.n_hids) hiddens , cells = rnn2.apply(hiddens , hiddens , src_mask) self.layers.append(rnn2) #rnn = NormalRNN(n_emb_lstm , self.n_hids) #hiddens = rnn.apply(state_below, src_mask) #self.layers.append(rnn) if True: maxout = maxout_layer() states = T.concatenate([state_below, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def test_logistic(): b_time = datetime.datetime.now() print('Begining reading data') DATA_TRAIN_PATH = get_filepath('train') y, tX, ids = load_csv_data(DATA_TRAIN_PATH) print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() - b_time).total_seconds())) tX = standardize(tX) # Begin the least square sgd e_time = datetime.datetime.now() print("Finish data reading in {s} seconds".format( s=(e_time - b_time).total_seconds())) logistic = LogisticRegression((y, tX[0]), regularizer="Lasso", regularizer_p=0.1) result = logistic.train(lr=0.05, batch_size=128, max_iters=1000) print(result)
def logistic_accuracy(model: LogisticRegression, X: np.ndarray, targets: np.ndarray): predictions = model.predict(X) # These are probabilities predictions = np.around(predictions) predictions = predictions.reshape(-1) targets = targets.reshape(-1) correct = sum(predictions == targets) return correct / len(targets)
def test_pca_logistic(): """ According to the PCA first 3 component test, the selected index: 3,8,5,9,7,10,2,1,6,0,4 0-10 :return: """ b_time = datetime.datetime.now() print('Begining reading data') DATA_TRAIN_PATH = get_filepath('train') y, tX, ids = load_csv_data(DATA_TRAIN_PATH) print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() - b_time).total_seconds())) data, x_mean, x_std = standardize(tX) print("test bias") test_bias(y) nb_pc = 5 print("test the PCA with {} elements".format(nb_pc)) pcs, pc_data = pca_transform(data, nb_pc, concatenate=False) print("get interactions") interaction = interactions(data, range(0, 10)) interaction, _, _ = standardize(interaction) print("select first 10 data entry with pc data") data = np.c_[data[:, 0:10], pc_data] data = np.c_[data, interaction] # Begin the least square sgd e_time = datetime.datetime.now() print("Finish data reading in {s} seconds".format( s=(e_time - b_time).total_seconds())) # logistic = LogisticRegression((y, tX)) logistic = LogisticRegression((y, data), regularizer="Lasso", regularizer_p=0.) # result = logistic.train(lr=0.1, batch_size=32, max_iters=6000) result = logistic.cross_validation(4, [0.5], 'regularizer_p', lr=0.1, batch_size=32, max_iters=6000, early_stop=1000) print(result)
def Fitness(self, population): ''' :param population: :return: ''' print(" Begin Fitness") fitness = np.zeros(self.populationSize) for i in range(self.populationSize): print(" Fitness", i) X_train_mark = self.X_train[:, population[i, :] == 1] X_test_mark = self.X_test[:, population[i, :] == 1] LR = LogisticRegression(X_train_mark, self.Y_train, X_test_mark, self.Y_test) fitness[i] = LR.evalution(is_GA=True) prob = fitness / np.sum(fitness) cum_prob = np.cumsum(prob) return prob, cum_prob, fitness
def train_lr(): params = { "offline_model_dir": "../weights", } params.update(params_common) X_train, X_valid = load_data("train"), load_data("vali") X_test = load_data("test") # print(X_test['label']) model = LogisticRegression("ranking", params, logger) model.fit(X_train, validation_data=X_valid) model.save_session() model.predict(X_test, 'pred.txt')
def test_baseline(): print("base line testing") b_time = datetime.datetime.now() print('Begining reading data') DATA_TRAIN_PATH = get_filepath('train') y, tX, ids = load_csv_data(DATA_TRAIN_PATH) print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() - b_time).total_seconds())) data = baseline_logistic(tX) logistic = LogisticRegression((y, data)) weight = logistic.train(lr=0.01, decay=1) plot = True if plot: from plots import cross_validation_visualization _, test_x, test_ids = load_test_data(clean=False) t_data = baseline_logistic(test_x) pred_label = predict_labels(weight, t_data) create_csv_submission( test_ids, pred_label, get_dataset_dir() + '/submission/logistic_baseline.csv')
def train_lr(): params = { "offline_model_dir": "weights/lr", } params.update(params_common) X_train, X_valid = load_data("train"), load_data("vali") model = LogisticRegression("ranking", params, logger) model.fit(X_train, validation_data=X_valid) model.save_session()
def logistic_loss_function(model: LogisticRegression, X: np.ndarray, targets: np.ndarray): """ Args: model: Logistic regression model X: array of pca transformed images targets: actual labels for the images the predictions are done on Returns: The average cross entropy loss over all the predictions """ predictions = model.predict(X) # Error when label equal 1 loss_1 = targets * np.log(predictions) # Error when label equal 0 loss_0 = (1 - targets) * np.log(1 - predictions) total_loss = loss_1 + loss_0 # return the average loss overall return -total_loss.sum() / targets.shape[0]
# 2. batch size should not be very large since the lambda_ij matrix in ranknet and lambdarank # (which are of size batch_size x batch_size) will consume large memory space "batch_size": 128, "epoch": 10, "feature_dim": 46, "batch_sampling_method": "sample", "shuffle": True, "optimizer_type": "adam", "init_lr": 0.001, "beta1": 0.975, "beta2": 0.999, "decay_steps": 1000, "decay_rate": 0.9, "schedule_decay": 0.004, "random_seed": 2018, "eval_every_num_update": 100, } params = { "offline_model_dir": "weights/lr", } params.update(params_common) logger = utils._get_logger("logs", "tf-%s.log" % utils._timestamp()) model = LogisticRegression("ranking", params, logger) model.restore_session() # SAVE THE MODEL builder = tf.saved_model.builder.SavedModelBuilder("model") builder.add_meta_graph_and_variables(model.sess, [tf.saved_model.tag_constants.SERVING]) builder.save()
class Translate(object): def __init__(self, enc_nhids=1000, dec_nhids=1000, enc_embed=620, dec_embed=620, src_vocab_size=30000, trg_vocab_size=30000, **kwargs): self.src_lookup_table = Lookup_table(enc_embed, src_vocab_size, prefix='src_lookup_table') self.trg_lookup_table = Lookup_table(dec_embed, trg_vocab_size, prefix='trg_lookup_table') self.encoder = BiGRU(enc_embed, enc_nhids, **kwargs) self.decoder = Decoder(dec_embed, dec_nhids, c_hids=enc_nhids*2, **kwargs) self.logistic = LogisticRegression(kwargs.get('n_out', dec_nhids), trg_vocab_size, prefix='logistic', drop_rate=kwargs['dropout']) self.params = self.src_lookup_table.params + self.trg_lookup_table.params + self.encoder.params + self.decoder.params \ + self.logistic.params self.tparams = OrderedDict([(param.name, param) for param in self.params]) def apply(self, source, source_mask, target, target_mask, **kwargs): sbelow = self.src_lookup_table.apply(source) tbelow = self.trg_lookup_table.apply_zero_pad(target) s_rep = self.encoder.apply(sbelow, source_mask) hiddens = self.decoder.apply(tbelow, target_mask, s_rep, source_mask) cost_matrix = self.logistic.cost(hiddens, target, target_mask) self.cost = cost_matrix.sum()/target_mask.shape[1] def _next_prob_state(self, y, state, c, c_x): next_state, merge_out = self.decoder.next_state_merge(y, state, c, c_x) prob = self.logistic.apply(merge_out) return prob, next_state def build_sample(self): x = T.matrix('x', dtype='int64') sbelow = self.src_lookup_table.apply(x) ctx = self.encoder.apply(sbelow, mask=None) c_x = T.dot(ctx, self.decoder.Ws) + self.decoder.bs init_state = self.decoder.init_state(ctx) outs = [init_state, ctx] f_init = theano.function([x], outs, name='f_init') y = T.vector('y_sampler', dtype='int64') y_emb = self.trg_lookup_table.index(y) init_state = T.matrix('init_state', dtype='float32') next_probs, next_state = self._next_prob_state(y_emb, init_state, ctx, c_x) inps = [y, ctx, init_state] outs = [next_probs, next_state] f_next = theano.function(inps, outs, name='f_next') return f_init, f_next def savez(self, filename): params_value = OrderedDict([(kk, value.get_value()) for kk, value in self.tparams.iteritems()]) numpy.savez(filename, **params_value) def load(self, filename): params_value = numpy.load(filename) assert len(params_value.files) == len(self.tparams) for key, value in self.tparams.iteritems(): value.set_value(params_value[key])
import torch import numpy as np from sklearn import metrics import matplotlib.pyplot as plt from data import TwoClassCifar10 from model import ConvNet, LogisticRegression from config import Config as config test_dataset = TwoClassCifar10(config.root, train=False) conv_net = ConvNet(config.input_channel, 2) lr_model = LogisticRegression(config.cifar10_input_size) conv_net.load_state_dict(torch.load("model/2020428163925_0.719000.pth")) lr_model.load_state_dict(torch.load("model/2020428163951_0.589000.pth")) conv_preds = [] lr_preds = [] targets = [] with torch.no_grad(): for image, label in test_dataset: image.unsqueeze_(0) conv_pred = conv_net(image) lr_pred = lr_model(image) conv_pred = torch.max(torch.softmax(conv_pred, dim=1), dim=1)[0].squeeze() lr_pred = torch.sigmoid(lr_pred).squeeze() conv_preds.append(conv_pred.item()) lr_preds.append(lr_pred.item()) targets.append(label) fpr, tpr, thresholds = metrics.roc_curve(targets, conv_preds)
import numpy as np from model import LogisticRegression # load data x_train = np.load('./data/LR/train_data.npy')[:, 1:] y_train = np.load('./data/LR/train_target.npy') x_test = np.load('./data/LR/test_data.npy')[:, 1:] y_test = np.load('./data/LR/test_target.npy') # create an LR model and fit it lr = LogisticRegression(learning_rate=1, max_iter=10, fit_bias=True, optimizer='Newton', seed=0) lr.fit(x_train, y_train, val_data=(x_test, y_test)) # predict and calculate acc train_acc = lr.score(x_train, y_train, metric='acc') test_acc = lr.score(x_test, y_test, metric='acc') print("train acc = {0}".format(train_acc)) print("test acc = {0}".format(test_acc)) # plot learning curve and decision boundary lr.plot_learning_curve() lr.plot_boundary(x_train, y_train) lr.plot_boundary(x_test, y_test)
def test_leave_one_out(self): gpus = 1 if torch.cuda.is_available() else 0 (x_train, y_train), (x_test, y_test) = get_2class_mnist(NUM_A, NUM_B) train_sample_num = len(x_train) class CreateData(torch.utils.data.Dataset): def __init__(self, data, targets): self.data = data self.targets = targets def __len__(self): return len(self.data) def __getitem__(self, idx): out_data = self.data[idx] out_label = self.targets[idx] return out_data, out_label train_data = CreateData(x_train, y_train) train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=False) # prepare sklearn model to train w C = 1.0 / (train_sample_num * WEIGHT_DECAY) sklearn_model = linear_model.LogisticRegression(C=C, solver='lbfgs', tol=1e-8, fit_intercept=False) # prepare pytorch model to compute influence function torch_model = LR(weight_decay=WEIGHT_DECAY) # train sklearn_model.fit(x_train, y_train.ravel()) print('LBFGS training took %s iter.' % sklearn_model.n_iter_) # assign W into pytorch model w_opt = sklearn_model.coef_.ravel() with torch.no_grad(): torch_model.w = torch.nn.Parameter( torch.tensor(w_opt, dtype=torch.float) ) # calculate original loss x_test_input = torch.FloatTensor(x_test[TEST_INDEX: TEST_INDEX+1]) y_test_input = torch.LongTensor(y_test[TEST_INDEX: TEST_INDEX+1]) test_data = CreateData(x_test[TEST_INDEX: TEST_INDEX+1], y_test[TEST_INDEX: TEST_INDEX+1]) test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=True) if gpus >= 0: torch_model = torch_model.cuda() x_test_input = x_test_input.cuda() y_test_input = y_test_input.cuda() test_loss_ori = torch_model.loss(torch_model(x_test_input), y_test_input, train=False).detach().cpu().numpy() # # get test loss gradient # test_grad = torch.autograd.grad(test_loss_ori, torch_model.w) # # get inverse hvp (s_test) # print('Calculating s_test ...') # s_test = s_test_sample( # torch_model, x_test_input, y_test_input, train_loader, gpu=gpus, damp=0, scale=25, recursion_depth=RECURSION_DEPTH, r=R # )[0].detach().cpu().numpy() # # s_test = torch_model.sess.run(torch_model.inverse_hessian, feed_dict={torch_model.x: x_train, torch_model.y: y_train}) @ test_grad # print(s_test) # get train loss gradient and estimate loss diff # loss_diff_approx = np.zeros(train_sample_num) # for i in range(train_sample_num): # x_input = torch.FloatTensor(x_train[i]) # y_input = torch.LongTensor(y_train[i]) # if gpus >= 0: # x_input = x_input.cuda() # y_input = y_input.cuda() # train_loss = torch_model.loss(torch_model(x_input), y_input) # train_grad = torch.autograd.grad(train_loss, torch_model.w)[0].detach().cpu().numpy() # loss_diff_approx[i] = np.asscalar(train_grad.T @ s_test) / train_sample_num # if i % 100 == 0: # print('[{}/{}] Estimated loss diff: {}'.format(i+1, train_sample_num, loss_diff_approx[i])) loss_diff_approx, _, _, _, = calc_influence_single(torch_model, train_loader, test_loader, test_id_num=0, gpu=1, recursion_depth=RECURSION_DEPTH, r=R, damp=0, scale=SCALE, exact=EXACT, batch_size=128) loss_diff_approx = torch.FloatTensor(loss_diff_approx).cpu().numpy() # get high and low loss diff indice sorted_indice = np.argsort(loss_diff_approx) sample_indice = np.concatenate([sorted_indice[-int(SAMPLE_NUM/2):], sorted_indice[:int(SAMPLE_NUM/2)]]) # calculate true loss diff loss_diff_true = np.zeros(SAMPLE_NUM) for i, index in zip(range(SAMPLE_NUM), sample_indice): print('[{}/{}]'.format(i+1, SAMPLE_NUM)) # get minus one dataset x_train_minus_one = np.delete(x_train, index, axis=0) y_train_minus_one = np.delete(y_train, index, axis=0) # retrain C = 1.0 / ((train_sample_num - 1) * WEIGHT_DECAY) sklearn_model_minus_one = linear_model.LogisticRegression(C=C, fit_intercept=False, tol=1e-8, solver='lbfgs') sklearn_model_minus_one.fit(x_train_minus_one, y_train_minus_one.ravel()) print('LBFGS training took {} iter.'.format(sklearn_model_minus_one.n_iter_)) # assign w on tensorflow model w_retrain = sklearn_model_minus_one.coef_.T.ravel() with torch.no_grad(): torch_model.w = torch.nn.Parameter( torch.tensor(w_retrain, dtype=torch.float) ) if gpus >= 0: torch_model = torch_model.cuda() # get retrain loss test_loss_retrain = torch_model.loss(torch_model(x_test_input), y_test_input, train=False).detach().cpu().numpy() # get true loss diff loss_diff_true[i] = test_loss_retrain - test_loss_ori print('Original loss :{}'.format(test_loss_ori)) print('Retrain loss :{}'.format(test_loss_retrain)) print('True loss diff :{}'.format(loss_diff_true[i])) print('Estimated loss diff :{}'.format(loss_diff_approx[index])) r2_score = visualize_result(loss_diff_true, loss_diff_approx[sample_indice]) self.assertTrue(r2_score > 0.9)
def test_pca_logistic2(): """ According to the PCA first 3 component test, the selected index: 3,8,5,9,7,10,2,1,6,0,4 0-10 :return: """ print('Submission added test full') b_time = datetime.datetime.now() print('Begining reading data') DATA_TRAIN_PATH = get_filepath('train') y, tX, ids = load_csv_data(DATA_TRAIN_PATH) print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() - b_time).total_seconds())) data, x_mean, x_std = standardize(tX) print("test bias") test_bias(y) nb_pc = 5 print("test the PCA with {} elements".format(nb_pc)) pcs, pc_data = pca_transform(data, nb_pc, concatenate=False) print("get interactions") interaction = interactions(data, range(0, 10)) interaction, _, _ = standardize(interaction) print("select first 10 data entry with pc data") data = np.c_[data[:, 0:10], pc_data] data = np.c_[data, interaction] # Begin the least square sgd e_time = datetime.datetime.now() print("Finish data reading in {s} seconds".format( s=(e_time - b_time).total_seconds())) # logistic = LogisticRegression((y, tX)) logistic = LogisticRegression((y, data), regularizer="Lasso", regularizer_p=0.) # result = logistic.train(lr=0.1, batch_size=32, max_iters=6000) result = logistic.cross_validation(4, [0.5], 'regularizer_p', lr=0.1, batch_size=32, max_iters=1000, early_stop=1000, skip=True) weight = result[0] _, test_x, test_ids = load_test_data(clean=False) test_data, x_mean, x_std = standardize(test_x) pcs, pc_data = pca_transform(test_data, nb_pc, concatenate=False) print("get interactions") interaction = interactions(test_data, range(0, 10)) interaction, _, _ = standardize(interaction) print("select first 10 data entry with pc data") test_data = np.c_[test_data[:, 0:10], pc_data] test_data = np.c_[test_data, interaction] y_pred = [] for w in weight: _y_pred = logistic.__call__(test_data, w) y_pred.append(_y_pred) y_pred = np.average(y_pred, axis=0) y_pred[np.where(y_pred <= 0.5)] = -1 y_pred[np.where(y_pred > 0.5)] = 1 output_path = get_dataset_dir() + \ '/submission/pca_test{}.csv'.format( datetime.datetime.now().__str__()) create_csv_submission(test_ids, y_pred, output_path)
def main(datasets, U, n_epochs=20, batch_size=20, max_l=100, hidden_size=100, \ word_embedding_size=100, session_hidden_size=50, session_input_size=50, \ model_name='SMN_last.bin', learning_rate=0.001, r_seed=3435, \ val_frequency=100): hiddensize = hidden_size U = U.astype(dtype=theano.config.floatX) rng = np.random.RandomState(r_seed) lsize, rsize = max_l, max_l sessionmask = T.matrix() lx = [] #tokens from previous turns lxmask = [] #masks from previous turns for i in range(max_turn): lx.append(T.matrix()) lxmask.append(T.matrix()) index = T.lscalar() rx = T.matrix('rx') #tokens from response rxmask = T.matrix() #masks from response y = T.ivector('y') Words = theano.shared(value=U, name="Words") llayer0_input = [] for i in range(max_turn): llayer0_input.append(Words[T.cast(lx[i].flatten(), dtype="int32")] \ .reshape((lx[i].shape[0], lx[i].shape[1], Words.shape[1]))) # input: word embeddings of the mini batch rlayer0_input = Words[T.cast(rx.flatten(), dtype="int32")].\ reshape((rx.shape[0], rx.shape[1], Words.shape[1])) train_set, dev_set, test_set = datasets[0], datasets[1], datasets[2] train_set_lx = [] train_set_lx_mask = [] q_embedding = [] q_embedding_Cat = [] q_embedding_Cat_mask = [] q_embedding_self_att = [] q_embedding_self_att_rnn = [] q_embedding_hiddenequal = [] offset = 2 * lsize for i in range(max_turn): train_set_lx.append(theano.shared( np.asarray(a=train_set[:, offset*i:offset*i+lsize], \ dtype=theano.config.floatX), \ borrow=True)) train_set_lx_mask.append(theano.shared( np.asarray(a=train_set[:, offset*i + lsize:offset*i + 2*lsize], \ dtype=theano.config.floatX), \ borrow=True)) train_set_rx = theano.shared( np.asarray(a=train_set[:, offset*max_turn:offset*max_turn + lsize], \ dtype=theano.config.floatX), \ borrow=True) train_set_rx_mask = theano.shared( np.asarray(a=train_set[:, offset*max_turn+lsize:offset*max_turn + 2*lsize], \ dtype=theano.config.floatX), \ borrow=True) train_set_session_mask = theano.shared( np.asarray(a=train_set[:, -max_turn-1:-1], \ dtype=theano.config.floatX), \ borrow=True) train_set_y = theano.shared(np.asarray(train_set[:, -1], dtype="int32"), \ borrow=True) val_set_lx = [] val_set_lx_mask = [] for i in range(max_turn): val_set_lx.append(theano.shared( np.asarray(a=dev_set[:, offset*i:offset*i + lsize], \ dtype=theano.config.floatX), \ borrow=True)) val_set_lx_mask.append(theano.shared( np.asarray(a=dev_set[:, offset*i + lsize:offset*i + 2*lsize], \ dtype=theano.config.floatX), \ borrow=True)) val_set_rx = theano.shared( np.asarray(a=dev_set[:, offset*max_turn:offset*max_turn + lsize], \ dtype=theano.config.floatX), \ borrow=True) val_set_rx_mask = theano.shared( np.asarray(a=dev_set[:, offset*max_turn + lsize:offset*max_turn + 2*lsize], \ dtype=theano.config.floatX), \ borrow=True) val_set_session_mask = theano.shared(np.asarray(a=dev_set[:, -max_turn-1:-1], \ dtype=theano.config.floatX), \ borrow=True) val_set_y = theano.shared(np.asarray(dev_set[:, -1], dtype="int32"), borrow=True) test_set_lx = [] test_set_lx_mask = [] for i in range(max_turn): test_set_lx.append(theano.shared( np.asarray(a=test_set[:, offset*i:offset*i + lsize], \ dtype=theano.config.floatX), \ borrow=True)) test_set_lx_mask.append(theano.shared( np.asarray(a=test_set[:, offset*i + lsize:offset*i + 2*lsize], \ dtype=theano.config.floatX), \ borrow=True)) test_set_rx = theano.shared( np.asarray(a=test_set[:, offset*max_turn:offset*max_turn + lsize], \ dtype=theano.config.floatX), \ borrow=True) test_set_rx_mask = theano.shared( np.asarray(a=test_set[:, offset*max_turn + lsize:offset*max_turn + 2*lsize], \ dtype=theano.config.floatX), \ borrow=True) test_set_session_mask = theano.shared(np.asarray(a=test_set[:, -max_turn-1:-1], \ dtype=theano.config.floatX), \ borrow=True) test_set_y = theano.shared(np.asarray(test_set[:, -1], dtype="int32"), \ borrow=True) dic = {} for i in range(max_turn): dic[lx[i]] = train_set_lx[i][index * batch_size:(index + 1) * batch_size] dic[lxmask[i]] = train_set_lx_mask[i][index * batch_size:(index + 1) * batch_size] dic[rx] = train_set_rx[index * batch_size:(index + 1) * batch_size] dic[sessionmask] = train_set_session_mask[index * batch_size:(index + 1) * batch_size] dic[rxmask] = train_set_rx_mask[index * batch_size:(index + 1) * batch_size] dic[y] = train_set_y[index * batch_size:(index + 1) * batch_size] val_dic = {} for i in range(max_turn): val_dic[lx[i]] = val_set_lx[i][index * batch_size:(index + 1) * batch_size] val_dic[lxmask[i]] = val_set_lx_mask[i][index * batch_size:(index + 1) * batch_size] val_dic[rx] = val_set_rx[index * batch_size:(index + 1) * batch_size] val_dic[sessionmask] = val_set_session_mask[index * batch_size:(index + 1) * batch_size] val_dic[rxmask] = val_set_rx_mask[index * batch_size:(index + 1) * batch_size] val_dic[y] = val_set_y[index * batch_size:(index + 1) * batch_size] test_dic = {} for i in range(max_turn): test_dic[lx[i]] = test_set_lx[i][index * batch_size:(index + 1) * batch_size] test_dic[lxmask[i]] = test_set_lx_mask[i][index * batch_size:(index + 1) * batch_size] test_dic[rx] = test_set_rx[index * batch_size:(index + 1) * batch_size] test_dic[sessionmask] = test_set_session_mask[index * batch_size:(index + 1) * batch_size] test_dic[rxmask] = test_set_rx_mask[index * batch_size:(index + 1) * batch_size] test_dic[y] = test_set_y[index * batch_size:(index + 1) * batch_size] # This is the first RNN. sentence2vec = GRU(n_in=word_embedding_size, n_hidden=hiddensize, \ n_out=hiddensize, batch_size=batch_size) for i in range(max_turn): q_embedding.append(sentence2vec(llayer0_input[i], lxmask[i], True)) r_embedding = sentence2vec(rlayer0_input, rxmask, True) # This is the concat/elementwise_produce of the after the first RNN which # concat the tenth sentence to the first nine sentences. for i in range(max_turn): q_embedding_Cat.append(T.concatenate([q_embedding[i], \ q_embedding[-1]], \ axis=2)) q_embedding_Cat_mask.append(lxmask[i]) r_embedding_Cat = T.concatenate([r_embedding, q_embedding[-1]], axis=2) r_embedding_Cat_mask = rxmask # This is the self_attention step sa = self_attention(n_in=hiddensize * 2) for i in range(max_turn): q_embedding_self_att.append(T.concatenate([q_embedding_Cat[i], \ sa(q_embedding_Cat[i], \ q_embedding_Cat_mask[i])], \ axis=2)) r_embedding_self_att = T.concatenate([r_embedding_Cat, \ sa(r_embedding_Cat, \ r_embedding_Cat_mask)], \ axis=2) # This is the SRNN vec2svec = SGRU(n_in=hiddensize*2, n_hidden=hiddensize, \ n_out=hiddensize, batch_size=batch_size) for i in range(max_turn): q_embedding_self_att_rnn.append(vec2svec(q_embedding_self_att[i], \ q_embedding_Cat_mask[i], \ True)) r_embedding_self_att_rnn = vec2svec(r_embedding_self_att, \ r_embedding_Cat_mask, \ True) # This is the CNN with pooling and full-connection pooling_layer = ConvSim(rng=rng, n_in=max_l, n_out=session_input_size, \ hidden_size=hiddensize, session_size=session_hidden_size, \ batch_size=batch_size) poolingoutput = [] for i in range(max_turn): poolingoutput.append(pooling_layer(llayer0_input[i], \ rlayer0_input, \ q_embedding_self_att_rnn[i], \ r_embedding_self_att_rnn)) # This is the second RNN session2vec = GRU(n_in=session_input_size, n_hidden=session_hidden_size, \ n_out=session_hidden_size, batch_size=batch_size) res = session2vec(T.stack(poolingoutput, 1), sessionmask, True) # This is the final Attention and put the output to a classifier W = theano.shared(ortho_weight(session_hidden_size), borrow=True) W2 = theano.shared(glorot_uniform((hiddensize, session_hidden_size)), borrow=True) b = theano.shared(value=np.zeros((session_hidden_size, ), dtype='float32'), borrow=True) U_s = theano.shared(glorot_uniform((session_hidden_size, 1)), borrow=True) final = T.dot(T.tanh(T.dot(res, W) + \ T.dot(T.stack(q_embedding_self_att_rnn, 1)[:, :, -1, :], W2) \ + b), U_s) weight = T.exp(T.max(final, 2)) * sessionmask weight2 = weight / T.sum(weight, 1)[:, None] final2 = T.sum(res * weight2[:, :, None], 1) + 1e-6 # This is the classifier classifier = LogisticRegression(final2, session_hidden_size, 2, rng) # Calculate the cost and updata the param with gradient cost = classifier.negative_log_likelihood(y) error = classifier.errors(y) predict = classifier.predict_prob opt = Adam() # Make params params = classifier.params params += sentence2vec.params params += session2vec.params params += pooling_layer.params params += [Words, W, b, W2, U_s] params += vec2svec.params params += sa.params # Make updater grad_updates = opt.Adam(cost=cost, params=params, lr=learning_rate) # The training step train_model = theano.function([index], cost, updates=grad_updates, \ givens=dic, on_unused_input='ignore') val_model = theano.function([index], [cost, error], givens=val_dic, \ on_unused_input='ignore') best_dev = 1. n_train_batches = datasets[0].shape[0] / batch_size for i in xrange(n_epochs): cost_all = 0 total = 0. for minibatch_index in np.random.permutation(range(n_train_batches)): batch_cost = train_model(minibatch_index) total = total + 1 cost_all = cost_all + batch_cost if total % val_frequency == 0: sf.write('epcho %d, num %d, train_loss %f' % (i, total, cost_all / total)) sf.write('\n') sf.flush() cost_dev = 0 errors_dev = 0 j = 0 for minibatch_index in xrange(datasets[1].shape[0] / batch_size): tcost, terr = val_model(minibatch_index) cost_dev += tcost errors_dev += terr j = j + 1 cost_dev = cost_dev / j errors_dev = errors_dev / j if cost_dev < best_dev: best_dev = cost_dev save_params(params, model_name + 'dev') sf.write("epcho %d, num %d, dev_loss %f" % (i, total, cost_dev)) sf.write('\n') sf.write("epcho %d, num %d, dev_accuracy %f" % (i, total, 1 - errors_dev)) sf.write('\n') sf.flush() cost_all = cost_all / n_train_batches sf.write("epcho %d loss %f" % (i, cost_all)) sf.write('\n') sf.flush()
def apply(self, sentence, sentence_mask, use_noise=1): n_emb_lstm = self.n_emb_lstm n_emb_struct = self.n_emb_struct n_emb_share = self.n_emb_share src = sentence[:-1] src_mask = sentence_mask[:-1] tgt = sentence[1:] tgt_mask = sentence_mask[1:] if False: #(share only part of embedding) n_emb_all = n_emb_lstm + n_emb_struct - n_emb_share emb_all_range = T.arange(n_emb_all) emb_lstm_range = T.arange(n_emb_lstm) emb_struct_range = T.arange(n_emb_lstm - n_emb_share, n_emb_all) table = lookup_table(n_emb_all, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_all_range) state_below_lstm = table.apply(src, emb_lstm_range) state_below_struct = table.apply(src, emb_struct_range) self.layers.append(table) rnn = SLSTM(n_emb_lstm, n_emb_struct, n_emb_share, self.n_hids, self.n_shids, self.n_structs) #rnn = LSTM(self.n_in, self.n_hids) hiddens = rnn.merge_out(state_below, state_below_lstm, state_below_struct, src_mask) self.layers.append(rnn) elif True: # use rnn_pyramid emb_lstm_range = T.arange(n_emb_lstm) table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: state_below = dropout_layer(state_below, use_noise, self.dropout) rnn = rnn_pyramid_layer(n_emb_lstm, self.n_hids) hiddens, cells, structs = rnn.apply(state_below, src_mask) self.layers.append(rnn) self.structs = structs else: # share all embedding emb_lstm_range = T.arange(n_emb_lstm) table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: state_below = dropout_layer(state_below, use_noise, self.dropout) rnn = LSTM(n_emb_lstm, self.n_hids) hiddens, cells = rnn.apply(state_below, src_mask) #hiddens = rnn.merge_out(state_below, src_mask) self.layers.append(rnn) if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) rnn1 = LSTM(self.n_hids, self.n_hids) hiddens, cells = rnn1.apply(hiddens, src_mask) #hiddens = rnn.merge_out(state_below, src_mask) self.layers.append(rnn1) maxout = maxout_layer() states = T.concatenate([state_below, hiddens], axis=2) hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask) self.layers.append(maxout) #rnng = LSTM(n_emb_lstm, self.n_hids) #hiddens, cells = rnn.apply(state_below, src_mask) #hiddensg = rnng.merge_out(state_below, src_mask) #self.layers.append(rnng) if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) #chunk = chunk_layer(n_lstm_in + n_lstm_out, n_lstm_out, n_chunk_out, 6) n_emb_hid = n_emb_lstm + self.n_hids emb_hid = T.concatenate([state_below, hiddens], axis=2) #chunk = chunk_layer(self.n_hids, self.n_hids, self.n_hids, self.n_structs) #hiddens = chunk.merge_out(hiddens, hiddens, src_mask, merge_how="for_struct",\ # state_below_other=state_below, n_other=n_emb_lstm) chunk = chunk_layer(n_emb_hid, self.n_hids, self.n_hids, self.n_structs) hiddens = chunk.merge_out(emb_hid, hiddens, src_mask, merge_how="for_struct",\ state_below_other=None, n_other=0) #chunk = chunk_layer(self.n_hids, self.n_hids, self.n_hids, self.n_structs) #hiddens = chunk.merge_out(hiddens, hiddensg, src_mask, merge_how="both",\ # state_below_other=state_below, n_other=n_emb_lstm) self.layers.append(chunk) # apply dropout if self.dropout < 1.0: # dropout is applied to the output of maxout in ghog hiddens = dropout_layer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def main(): train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) #TODO adjust batch size batch_size = 128 nb_epochs = 100 lr = 0.001 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # MNIST Dataset trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor()) testset = torchvision.datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor()) # CIFAR10 Dataset #trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform) #testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2) num_classes = 10 model_names = ['MLP', 'LR', 'vgg16_bn', 'resnet18'] optim_names = ['sgd', 'adam', 'lbfgs'] for model_name in model_names: for opt in optim_names: ## !!! TODO every iter should create a new model !!! print("creating model: ", model_name) print("using optimizer: ", opt) if model_name == 'vgg16_bn': model = models.vgg16_bn() model.classifier[6] = nn.Linear(4096, num_classes) elif model_name == 'resnet18': model = models.resnet18() model.fc = nn.Linear(512, num_classes) elif model_name == 'LR': model = LogisticRegression( 784, num_classes) # 3072 for CIFAR10, 784 for MNIST elif model_name == 'MLP': model = MLP(1 * 28 * 28) # 3072 for CIFAR10, 784 for MNIST model.to(device) train(model_name, model, trainloader, testloader, device, opt, nb_epochs, lr=lr)
#################################### # Read and preprocess data from files #################################### df_train = pd.read_csv(fp_train, skipinitialspace=True) df_test = pd.read_csv(fp_test, skipinitialspace=True) df_train['income'].replace('<=50K', 0, inplace=True) df_train['income'].replace('>50K', 1, inplace=True) Xtrain, ytrain, Xtest = preprocess(df_train, df_test, features) #################################### # Train the estimator and predict test data #################################### regr = [] if estimator == 'logistic': regr = LogisticRegression().fit(Xtrain, ytrain) ypred = np.around(regr.predict(Xtest)).astype(int) elif estimator == 'generative': regr = NaiveBayes().fit(Xtrain, ytrain) ypred = regr.predict(Xtest) #################################### # Write the result to file #################################### df_pred = pd.DataFrame() df_pred['id'] = np.arange(1, len(ypred) + 1) df_pred['label'] = ypred df_pred.to_csv(fp_ans, index=False)
def apply_morph_attention(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1): """ sentence : sentence * batch sentence_morph : sentence * batch * morph src_morph_emb : sentence * batch * morph * n_emb_morph 1. word morph lookup -> dropout -> attention 2. lstm -> dropout 3. lstm -> maxout -> dropout 4. logistic """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1] #word lookup table emb_lstm_range = T.arange(self.n_emb_lstm) table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb') src_emb = table.apply(src, emb_lstm_range) self.layers.append(table) #morph lookup table emb_morph_range = T.arange(self.n_emb_morph) table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb') src_morph_emb = table_morph.apply(src_morph, emb_morph_range) self.layers.append(table_morph) if self.dropout < 1.0: src_emb = DropoutLayer(src_emb, use_noise, self.dropout) src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout) lstm_att_1st = LstmMorphAttention(self.n_hids, self.n_hids, self.n_hids) hiddens, cells = lstm_att_1st.apply(src_emb, src_morph_emb, src_mask) self.layers.append(lstm_att_1st) #print len(hiddens) , hiddens[0].ndim rnn_layer_2rd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_2rd) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) rnn_layer_3nd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_3nd.apply(hiddens, src_mask) self.layers.append(rnn_layer_3nd) if True: maxout = MaxoutLayer() #src_emb : sentence * batch * n_emb #hiddens : sentence * batch * hids states = T.concatenate([src_emb, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
test_dataset = TwoClassCifar10(config.root, train=False) train_dataloader = data.DataLoader(train_dataset, config.batch_size, shuffle=True, num_workers=2) test_dataloader = data.DataLoader(test_dataset, config.batch_size, shuffle=False, num_workers=2) print(f"{datetime.now().ctime()} - Finish Loading Dataset") print( f"{datetime.now().ctime()} - Start Creating Net, Criterion, Optimizer and Scheduler..." ) conv_net = ConvNet(config.input_channel, 2) lr_model = LogisticRegression(config.cifar10_input_size) conv_criterion = nn.CrossEntropyLoss() lr_criterion = nn.BCEWithLogitsLoss() conv_optimizer = optim.SGD(conv_net.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) lr_optimizer = optim.SGD(lr_model.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) conv_scheduler = optim.lr_scheduler.CosineAnnealingLR(conv_optimizer, len(train_dataloader) * config.epochs, eta_min=config.eta_min) lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(lr_optimizer,
def apply(self, sentence, sentence_mask, use_noise=1): n_emb_lstm = self.n_emb_lstm n_emb_struct = self.n_emb_struct n_emb_share = self.n_emb_share src = sentence[:-1] src_mask = sentence_mask[:-1] tgt = sentence[1:] tgt_mask = sentence_mask[1:] emb_lstm_range = T.arange(n_emb_lstm) table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: state_below = dropout_layer(state_below, use_noise, self.dropout) rnn = LSTM(n_emb_lstm, self.n_hids) hiddens, cells = rnn.apply(state_below, src_mask) #hiddens = rnn.merge_out(state_below, src_mask) self.layers.append(rnn) if True: if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) rnn1 = LSTM(self.n_hids, self.n_hids) hiddens, cells = rnn1.apply(hiddens, src_mask) #hiddens = rnn.merge_out(state_below, src_mask) self.layers.append(rnn1) if True: if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) rnnp = rnn_pyramid_layer(self.n_hids, n_emb_lstm, self.n_hids) hiddens,cells,structs,pyramid = rnnp.apply(hiddens, state_below, src_mask) self.layers.append(rnnp) #self.structs = structs self.rnn_len = rnnp.n_steps self.sent_len = sentence.shape[0] if True: maxout = maxout_layer() states = T.concatenate([state_below, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def test_final(): b_time = datetime.datetime.now() print('Begining reading data') DATA_TRAIN_PATH = get_filepath('train') y, tX, ids = load_csv_data(DATA_TRAIN_PATH) print("Finish loading in {s} seconds".format(s=(datetime.datetime.now() - b_time).total_seconds())) data, _, _ = standardize(tX) nb_pc = 5 print("test the PCA with {} elements".format(nb_pc)) pcs, pc_data = pca_transform(data, nb_pc, concatenate=False) print("get interactions") interaction = interactions(data, range(0, 10)) interaction, _, _ = standardize(interaction) print("select first 10 data entry with pc data") data = np.c_[data[:, 0:10], pc_data] data = np.c_[data, interaction] # Begin the least square sgd e_time = datetime.datetime.now() print("Finish data reading in {s} seconds".format( s=(e_time - b_time).total_seconds())) # train, valid = split_train_valid(0.8, data, labels=y) logistic = LogisticRegression((y, data), regularizer='Lasso', regularizer_p=0.) result = logistic.cross_validation(4, [0.], 'regularizer_p', lr=0.1, batch_size=32, max_iters=1200, early_stop=400) weight = result[0] print("loading the test set") _, test_data, test_ids = load_test_data(clean=False) # Feature transform data, _, _ = standardize(test_data) nb_pc = 5 print("test the PCA with {} elements".format(nb_pc)) pcs, pc_data = pca_transform(data, nb_pc, concatenate=False) print("get interactions") interaction = interactions(data, range(0, 10)) interaction, _, _ = standardize(interaction) print("select first 10 data entry with pc data") data = np.c_[data[:, 0:10], pc_data] data = np.c_[data, interaction] # Begin the least square sgd e_time = datetime.datetime.now() print("Finish data reading in {s} seconds".format( s=(e_time - b_time).total_seconds())) y_pred = [] for w in weight: _y_pred = logistic.__call__(data, w) y_pred += _y_pred y_pred = np.average(y_pred) y_pred[np.where(y_pred <= 0.5)] = -1 y_pred[np.where(y_pred > 0.5)] = 1 output_path = get_dataset_dir() + '/second_submission.csv' create_csv_submission(test_ids, y_pred, output_path)
labels = unpickle('../data/meta') interesting_coarse_labels = [0, 1] # Aquatic mammals and Fish train = [] y = [] test = [] y_test = [] for i in range(len(train_data[b'coarse_labels'])): for j in interesting_coarse_labels: if train_data[b'coarse_labels'][i] == j: train.append(train_data[b'data'][i]) y.append(j) break for i in range(len(test_data[b'coarse_labels'])): for j in interesting_coarse_labels: if test_data[b'coarse_labels'][i] == j: test.append(test_data[b'data'][i]) y_test.append(j) break train = np.array(train) y = np.array(y) test = np.array(test) y_test = np.array(y_test) weight_matrix, losses = LogisticRegression.train(train, y, iteration=1, learning_rate=0.1) LogisticRegression.accuracy(weight_matrix, test, y_test)
import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import dataset.MNIST.mnist as mnist import model.LogisticRegression as LR print("start download minst") mnist_data_sets = mnist.read_data_sets("../data/mnist", one_hot=True) print("start lr") lr = LR.LogisticRegression() lr.build_model() batch_xs, batch_ys = mnist_data_sets.train.next_batch(55000) lr.train(batch_xs, batch_ys) batch_xs, batch_ys = mnist_data_sets.test.images, mnist_data_sets.test.labels lr.test(batch_xs, batch_ys)