def __init__(self, vocab_size, embed_size, hidden_size, num_layers): super(RNNLM, self).__init__() with self.name_scope(): self.embed = nn.Embedding(vocab_size, embed_size, weight_initializer=init.Uniform(0.1)) self.lstm = rnn.LSTM(hidden_size, num_layers, layout='NTC') self.linear = nn.Dense(vocab_size, weight_initializer=init.Uniform(0.1))
def build_dcmn(batch_size, max_len, ctx): # do training train_sentences = './data_tmp/train_sentences.pkl' train_labels = './data_tmp/train_labels.pkl' test_sentences = './data_tmp/dev_sentences.pkl' test_labels = './data_tmp/dev_labels.pkl' max_pad_length = 16 train_sample_num = None test_sample_num = None dataloader_train = preprocess.get_dataloader( sts=train_sentences, labels=train_labels, sample_num=train_sample_num,max_seq_length=max_len, \ batch_size=batch_size,max_pad_length=max_pad_length, ctx = ctx, dataset_load_path='./data_tmp/dataset-train.pkl' ) dataloader_test = preprocess.get_dataloader( sts=test_sentences, labels=test_labels, sample_num=test_sample_num,max_seq_length=max_len, \ batch_size=batch_size,max_pad_length=max_pad_length, ctx = ctx, dataset_load_path='./data_tmp/dataset-test.pkl' ) dcmn = model_dcmn.DCMN(num_candidates=max_pad_length - 2) dcmn.initialize(init=init.Uniform(.001), ctx=ctx) loss_func = gluon.loss.SoftmaxCrossEntropyLoss() lr, clip = 5e-5, 5 trainer = gluon.Trainer(dcmn.collect_params(), 'adam', { 'learning_rate': lr, 'clip_gradient': clip }) return dcmn, dataloader_train, dataloader_test, trainer, loss_func
def _get_embedding(self): if self._use_pretrained_embedding: embedding = EmbeddingBlock( CORPUS_WORDS=self._vocab_size, CORPUS_CHARACTERS=self._char_vocab_size, DIM_WORD_EMBED=self._embed_size, use_highway=self._use_highway, WORD_EMBEDDING_DROPOUT=self._drop_i, DIM_CHAR_EMBED=self._char_embed_size, MAX_CHARACTER_PER_WORD=self._max_word_length) else: embedding = nn.HybridSequential() with embedding.name_scope(): embedding_block = nn.Embedding( self._vocab_size, self._embed_size, weight_initializer=init.Uniform(0.1)) if self._drop_e: nlp.model.utils.apply_weight_drop(embedding_block, 'weight', self._drop_e, axes=(1, )) embedding.add(embedding_block) if self._drop_i: embedding.add(nn.Dropout(self._drop_i, axes=(0, ))) return embedding
def __init__(self,initializer = init.Uniform(scale = 0.07),batchnorm = False,dropout = 0): mxn.random.seed(42) self.net = Model(batchnorm,dropout) self.net.initialize(init = initializer) self.lossfn = gluon.loss.SoftmaxCrossEntropyLoss() self.train_loss_his = [] self.epoch_his = [] self.val_loss_his = []
def _get_embedding(self): embedding = nn.HybridSequential() with embedding.name_scope(): embedding.add(nn.Embedding(self._vocab_size, self._embed_size, weight_initializer=init.Uniform(0.1))) if self._dropout: embedding.add(nn.Dropout(self._dropout)) return embedding
def build_model(A, X): model = nn.Sequential() with model.name_scope(): features, out_units = bulid_features(A, X) model.add(features) calssifier = LogisticRegressor(out_units) model.add(calssifier) model.initialize(init.Uniform(1)) return model, features
def __init__(self, vocab, embed_size, num_hiddens, num_layers, bidirectional, num_outputs, **kwargs): super(SentimentNet, self).__init__(**kwargs) with self.name_scope(): self.embedding = nn.Embedding( len(vocab), embed_size, weight_initializer=init.Uniform(0.1)) self.encoder = rnn.LSTM(num_hiddens, num_layers=num_layers, bidirectional=bidirectional, input_size=embed_size) self.decoder = nn.Dense(num_outputs, flatten=False)
def _get_embedding(self): embedding = nn.HybridSequential() with embedding.name_scope(): embedding_block = nn.Embedding(self._vocab_size, self._embed_size, weight_initializer=init.Uniform(0.1)) if self._drop_e: apply_weight_drop(embedding_block, 'weight', self._drop_e, axes=(1,)) embedding.add(embedding_block) if self._drop_i: embedding.add(nn.Dropout(self._drop_i, axes=(0,))) return embedding
def create_gluon_model(initializer): net = nn.HybridSequential() net.add( nn.Conv2D(channels=6, kernel_size=5, activation="relu"), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=16, kernel_size=3, activation="relu"), nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(), nn.Dense(120, activation="relu"), nn.Dense(84, activation="relu"), nn.Dense(10), ) if initializer == 1: net.initialize(init=init.Xavier(), ctx=mx.cpu()) elif initializer == 2: # variance will not remain the same across layers net.initialize(init=init.Uniform(1), ctx=mx.cpu()) else: # does not break symmetry,so gradients will not differ much net.initialize(init=init.Uniform(0.0001), ctx=mx.cpu()) return net
def model_1_fit(no_epochs, batch_size, ctx, mx_train_data, mx_valid_data): train_loss_hist = [] train_acc_hist = [] valid_loss_hist = [] valid_acc_hist = [] mx_net = mx_nn.Sequential() mx_net.add(mx_nn.Dense(512, activation='relu'), mx_nn.Dense(128, activation='relu'), mx_nn.Dense(64, activation='relu'), mx_nn.Dense(32, activation='relu'), mx_nn.Dense(16, activation='relu'), mx_nn.Dense(10)) print('The Model') print(mx_net) mx_loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() #mx_net.initialize(init=init.Xavier()) mx_net.collect_params().initialize(init.Uniform(), ctx=ctx) mx_trainer = gluon.Trainer(mx_net.collect_params(), 'adam', { 'learning_rate': 1e-3, 'beta1': 0.9, 'beta2': 0.999 }) print('\nFitting the model\n') for epoch in range(no_epochs): train_loss, train_acc, valid_acc, valid_loss = 0.0, 0.0, 0.0, 0.0 tic = time.time() for data, label in mx_train_data: with autograd.record(): output = mx_net(data) loss = mx_loss_fn(output, label) loss.backward() mx_trainer.step(batch_size=batch_size) train_loss += loss.mean().asscalar() train_acc += acc(output, label) for data, label in mx_valid_data: output = mx_net(data) loss = mx_loss_fn(output, label) valid_acc += acc(output, label) valid_loss += loss.mean().asscalar() train_loss_hist.append(train_loss / len(mx_train_data)) train_acc_hist.append(train_acc / len(mx_train_data)) valid_loss_hist.append(valid_loss / len(mx_valid_data)) valid_acc_hist.append(valid_acc / len(mx_valid_data)) print( "Epoch %d: train loss %.3f, train acc %.3f %%, val loss %.3f, val acc %.3f %%, in %.1f sec" % (epoch, train_loss / len(mx_train_data), train_acc / len(mx_train_data), valid_loss / len(mx_valid_data), valid_acc / len(mx_valid_data), time.time() - tic)) mx_net.save_parameters('../weights/task_a_model_1.params') return train_loss_hist, train_acc_hist, valid_loss_hist, valid_acc_hist
def init_weights(self, ctx): initrange = 0.1 self.pgm_embed.initialize(init.Uniform(initrange), ctx=ctx) self.logit1.initialize(init.Uniform(initrange), ctx=ctx) self.logit2.initialize(init.Uniform(initrange), ctx=ctx) self.regress1.initialize(init.Uniform(initrange), ctx=ctx) self.regress2.initialize(init.Uniform(initrange), ctx=ctx) self.core.initialize(init.Uniform(initrange), ctx=ctx)
def __init__(self, mode, vocab_size, embed_size, num_hiddens, num_layers, drop_prob=0.5, **kwargs): super(RNNModel, self).__init__(**kwargs) with self.name_scope(): self.dropout = nn.Dropout(drop_prob) # 将词索引变换成词向量。这些词向量也是模型参数。 self.embedding = nn.Embedding(vocab_size, embed_size, weight_initializer=init.Uniform(0.1)) if mode == 'rnn_relu': self.rnn = rnn.RNN(num_hiddens, num_layers, activation='relu', dropout=drop_prob, input_size=embed_size) elif mode == 'rnn_tanh': self.rnn = rnn.RNN(num_hiddens, num_layers, activation='tanh', dropout=drop_prob, input_size=embed_size) elif mode == 'lstm': self.rnn = rnn.LSTM(num_hiddens, num_layers, dropout=drop_prob, input_size=embed_size) elif mode == 'gru': self.rnn = rnn.GRU(num_hiddens, num_layers, dropout=drop_prob, input_size=embed_size) else: raise ValueError('Invalid mode %s. Options are rnn_relu, ' 'rnn_tanh, lstm, and gru' % mode) self.dense = nn.Dense(vocab_size, in_units=num_hiddens) self.num_hiddens = num_hiddens
def dropout_model_fit(no_epochs,batch_size,ctx,mx_train_data,mx_valid_data,drop_prob): train_loss_hist=[] train_acc_hist=[] valid_loss_hist=[] valid_acc_hist=[] mx_net = MLP_DROP(drop_prob) print('\nThe Model with Dropout Probability %.1f \n' % drop_prob) print(mx_net) mx_loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() #mx_net.initialize(init=init.Xavier()) mx_net.collect_params().initialize(init.Uniform(), ctx=ctx) mx_trainer = gluon.Trainer(mx_net.collect_params(),'adam', {'learning_rate': 1e-3,'beta1':0.9,'beta2':0.999}) print('\nFitting the model\n') for epoch in range(no_epochs): train_loss, train_acc, valid_acc,valid_loss = 0.0, 0.0, 0.0,0.0 tic = time.time() for data, label in mx_train_data: with autograd.record(): output=mx_net(data) loss = mx_loss_fn(output, label) loss.backward() mx_trainer.step(batch_size=batch_size) train_loss += loss.mean().asscalar() train_acc += acc(output, label) for data, label in mx_valid_data: output=mx_net(data) loss = mx_loss_fn(output, label) valid_acc += acc(output, label) valid_loss += loss.mean().asscalar() train_loss_hist.append(train_loss/len(mx_train_data)) train_acc_hist.append(train_acc/len(mx_train_data)) valid_loss_hist.append(valid_loss/len(mx_valid_data)) valid_acc_hist.append(valid_acc/len(mx_valid_data)) print("Epoch %d: train loss %.3f, train acc %.3f %%, val loss %.3f, val acc %.3f %%, in %.1f sec" % (epoch, train_loss/len(mx_train_data), train_acc/len(mx_train_data),valid_loss/len(mx_valid_data),valid_acc/len(mx_valid_data), time.time()-tic)) mx_net.save_parameters('../weights/task_b_exp_3_dropout_'+str(int(drop_prob*10))+'.params') return train_loss_hist,train_acc_hist,valid_loss_hist,valid_acc_hist
im_train = train_data / 255 im_valid = valid_data / 255 train_dataset = mx.gluon.data.dataset.ArrayDataset( im_train, train_label) valid_dataset = mx.gluon.data.dataset.ArrayDataset( im_valid, valid_label) mx_train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) mx_valid_data = gluon.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) print('Traing the Model 1 with Optimizer = SGD') mx_net = MLP() mx_net.collect_params().initialize(init.Uniform(), ctx=ctx) mx_trainer = gluon.Trainer(mx_net.collect_params(), 'sgd', { 'learning_rate': 0.1, 'momentum': 0.9 }) train_loss_hist_m1, train_acc_hist_m1, valid_loss_hist_m1, valid_acc_hist_m1 = model_fit( mx_net, no_epochs, batch_size, mx_train_data, mx_valid_data, 'sgd', mx_trainer) print('Finished Traing the Model 1') print('\nTraing the Model 2 with Optimizer = NAG') mx_net = MLP() mx_net.collect_params().initialize(init.Uniform(), ctx=ctx) mx_trainer = gluon.Trainer(mx_net.collect_params(), 'nag', { 'learning_rate': 0.1, 'momentum': 0.9
f = np.loadtxt(dirTrain + "image_train_features.txt", delimiter=' ') l = np.loadtxt(dirTrain + "image_train_labels.txt", delimiter=' ') features = nd.array(f) labels = nd.array(l) data_num = len(f) batch_size = 500 dataset = gdata.ArrayDataset(features, labels) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) net = nn.Sequential() net.add(nn.Dense(100, activation='sigmoid'), nn.Dense(100, activation='sigmoid'), nn.Dense(3)) net.initialize(init.Uniform(scale=20)) loss = gloss.L1Loss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) def accuracy(y_hat, y, error): sum_acc = 0 yy = y_hat - y yyy = yy.asnumpy() yyy = np.abs(yyy) i = 0 for i, val in enumerate(yyy): sum_acc = sum_acc + equal(val, error)
if __name__ == '__main__': if args.inference: # do inference dcmn = model.DCMN() dcmn.load_parameters(args.model_params) sts = args.sample.split('|') samples = [[sentence.strip() for sentence in sts]] inference(dcmn, samples) else: # do training dataloader_train = preprocess.get_dataloader(sts=args.train_sentences, labels=args.train_labels) dataloader_test = preprocess.get_dataloader(sts=args.test_sentences, labels=args.test_labels) dcmn = model.DCMN() dcmn.initialize(init=init.Uniform(.001), ctx=mx.gpu()) loss_func = gluon.loss.SoftmaxCrossEntropyLoss() lr, clip = 5e-4, 2.5 trainer = gluon.Trainer(dcmn.collect_params(), 'adam', { 'learning_rate': lr, 'clip_gradient': clip }) train.train_valid(dataloader_train, dataloader_test, dcmn, loss_func, trainer, num_epoch=15, ctx=mx.gpu()) dcmn.save_parameters('dcmn8k.params')
def __init__(self, feature_dict, args, ctx, task, **kwargs): """{"sparse":[SingleFeat],"dense":[SingleFeat]}""" super(xDeepFM, self).__init__(**kwargs) # ?? util.mkdir_if_not_exist(args.SAVE_PARAMS_PATH_PREFIX) # self.feature_sizes = args.FEATURE_SIZE self.field_size = args.FIELD_NUM self.feature_dict = feature_dict print('field_size:') print(self.field_size) if args.TASK == 'finish': self.embedding_size = args.FINISH_EMBEDDING_SIZE self.batch_size = args.FINISH_BATCH_SIZE else: self.embedding_size = args.LIKE_EMBEDDING_SIZE self.batch_size = args.LIKE_BATCH_SIZE self.config_name = args.CONFIG_NAME # self.dropout_prob = args.DROPOUT_PROB self.task = task # self.loss = gloss.SigmoidBinaryCrossEntropyLoss() if args.LOSS == 'l2loss': self.loss = gloss.L2Loss() else: self.loss = gloss.SigmoidBinaryCrossEntropyLoss() self.ctx = ctx self.embedding_dict = OrderedDict() self.dense_dict = OrderedDict() with self.name_scope(): if self.task == 'finish': self.layer_list = [np.int(x) for x in args.FINISH_LAYER] self.dropout = args.FINISH_DROPOUT_PROB else: self.layer_list = [np.int(x) for x in args.LIKE_LAYER] self.dropout = args.LIKE_DROPOUT_PROB # self.params.get('v',shape=(self.field_size,self.embedding_size)) self.dnn_out = nn.Dense(1, use_bias=False) self.register_child(self.dnn_out) for feat in feature_dict['sparse']: self.embedding_dict[feat.feat_name] = nn.Embedding( feat.feat_num, self.embedding_size) for feat in feature_dict['dense']: self.dense_dict[feat.feat_name] = nn.Dense(self.embedding_size) for emb_k, emb_v in self.embedding_dict.items(): self.register_child(emb_v) for den_k, den_v in self.dense_dict.items(): self.register_child(den_v) self.linear_logit_dense = nn.Dense(1, use_bias=False) self.register_child(self.linear_logit_dense) self.linear_logit_embedding_bn = nn.BatchNorm() self.register_child(self.linear_logit_embedding_bn) self.dense_list = [] self.dropout_list = [] self.bn_list = [] self.activation_list = [] for i in range(len(self.layer_list)): self.dense_list.append(nn.Dense(self.layer_list[i])) self.dropout_list.append(nn.Dropout(self.dropout)) self.bn_list.append(nn.BatchNorm()) self.activation_list.append(nn.Activation('relu')) self.register_child(self.dense_list[i]) self.register_child(self.dropout_list[i]) self.register_child(self.bn_list[i]) self.register_child(self.activation_list[i]) # if True: print('true') self.layer_size = [np.int(x) for x in args.CONV1D_LAYER] # self.cin_net = CIN(self.embedding_size,self.field_size, (128, 64), self.ctx) # print('oo') # self.cin_net.initialize() # print('uu') # self.register_child(self.cin_net) self.cin_dense = nn.Dense(1) self.register_child(self.cin_dense) self.cin_bn = nn.BatchNorm() self.register_child(self.cin_bn) self.field_nums = [self.field_size] self.conv_list = [] for idx, size in enumerate(self.layer_size): self.conv_list.append( nn.Conv1D(channels=size, kernel_size=1, strides=1, padding=0, activation='relu', in_channels=self.field_nums[0] * self.field_nums[-1], weight_initializer=init.Uniform())) self.field_nums.append(size) self.register_child(self.conv_list[idx])
im_valid, valid_label) mx_train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) mx_valid_data = gluon.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) print('Traing the Model 1 with Uniform Initialization') train_loss_hist_m0, train_acc_hist_m0, valid_loss_hist_m0, valid_acc_hist_m0 = model_fit( no_epochs, batch_size, ctx, mx_train_data, mx_valid_data, init_type=init.Uniform(), path='uniform') print('Finished Traing the Model 1') print('Traing the Model 2 with Normal Initialization') train_loss_hist_m1, train_acc_hist_m1, valid_loss_hist_m1, valid_acc_hist_m1 = model_fit( no_epochs, batch_size, ctx, mx_train_data, mx_valid_data, init_type=init.Normal(), path='normal') print('Finished Traing the Model 2') print('\nTraing the Model 3 with Xavier Initialization')
features = nd.array(f).copyto(ctx) labels = nd.array(l).copyto(ctx) labels_test = nd.zeros(labels.shape, ctx) data_num = len(f) batch_size = 500 dataset = gdata.ArrayDataset(features, labels) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) net = nn.Sequential() net.add(nn.Dense(10, activation='sigmoid'), nn.Dense(100, activation='sigmoid'), nn.Dense(10, activation='sigmoid'), nn.Dense(3)) net.initialize(init.Uniform(scale=20), ctx=ctx) loss = gloss.L2Loss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) def accuracy(y_hat, y, error): sum_acc = 0 yy = y_hat - y yyy = yy.asnumpy() yyy = np.abs(yyy) i = 0 for i, val in enumerate(yyy): sum_acc = sum_acc + equal(val, error)