Example #1
0
 def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
     super(RNNLM, self).__init__()
     with self.name_scope():
         self.embed = nn.Embedding(vocab_size,
                                   embed_size,
                                   weight_initializer=init.Uniform(0.1))
         self.lstm = rnn.LSTM(hidden_size, num_layers, layout='NTC')
         self.linear = nn.Dense(vocab_size,
                                weight_initializer=init.Uniform(0.1))
Example #2
0
def build_dcmn(batch_size, max_len, ctx):
    # do training
    train_sentences = './data_tmp/train_sentences.pkl'
    train_labels = './data_tmp/train_labels.pkl'
    test_sentences = './data_tmp/dev_sentences.pkl'
    test_labels = './data_tmp/dev_labels.pkl'
    max_pad_length = 16
    train_sample_num = None
    test_sample_num = None
    dataloader_train = preprocess.get_dataloader(
        sts=train_sentences, labels=train_labels, sample_num=train_sample_num,max_seq_length=max_len, \
        batch_size=batch_size,max_pad_length=max_pad_length, ctx = ctx,
        dataset_load_path='./data_tmp/dataset-train.pkl'
    )
    dataloader_test = preprocess.get_dataloader(
        sts=test_sentences, labels=test_labels, sample_num=test_sample_num,max_seq_length=max_len, \
        batch_size=batch_size,max_pad_length=max_pad_length, ctx = ctx,
        dataset_load_path='./data_tmp/dataset-test.pkl'
    )
    dcmn = model_dcmn.DCMN(num_candidates=max_pad_length - 2)
    dcmn.initialize(init=init.Uniform(.001), ctx=ctx)
    loss_func = gluon.loss.SoftmaxCrossEntropyLoss()
    lr, clip = 5e-5, 5
    trainer = gluon.Trainer(dcmn.collect_params(), 'adam', {
        'learning_rate': lr,
        'clip_gradient': clip
    })
    return dcmn, dataloader_train, dataloader_test, trainer, loss_func
 def _get_embedding(self):
     if self._use_pretrained_embedding:
         embedding = EmbeddingBlock(
             CORPUS_WORDS=self._vocab_size,
             CORPUS_CHARACTERS=self._char_vocab_size,
             DIM_WORD_EMBED=self._embed_size,
             use_highway=self._use_highway,
             WORD_EMBEDDING_DROPOUT=self._drop_i,
             DIM_CHAR_EMBED=self._char_embed_size,
             MAX_CHARACTER_PER_WORD=self._max_word_length)
     else:
         embedding = nn.HybridSequential()
         with embedding.name_scope():
             embedding_block = nn.Embedding(
                 self._vocab_size,
                 self._embed_size,
                 weight_initializer=init.Uniform(0.1))
             if self._drop_e:
                 nlp.model.utils.apply_weight_drop(embedding_block,
                                                   'weight',
                                                   self._drop_e,
                                                   axes=(1, ))
             embedding.add(embedding_block)
             if self._drop_i:
                 embedding.add(nn.Dropout(self._drop_i, axes=(0, )))
     return embedding
 def __init__(self,initializer = init.Uniform(scale = 0.07),batchnorm = False,dropout = 0):
     mxn.random.seed(42)
     self.net = Model(batchnorm,dropout)
     self.net.initialize(init = initializer)
     self.lossfn = gluon.loss.SoftmaxCrossEntropyLoss()
     self.train_loss_his = []
     self.epoch_his = []
     self.val_loss_his = []
Example #5
0
 def _get_embedding(self):
     embedding = nn.HybridSequential()
     with embedding.name_scope():
         embedding.add(nn.Embedding(self._vocab_size, self._embed_size,
                                    weight_initializer=init.Uniform(0.1)))
         if self._dropout:
             embedding.add(nn.Dropout(self._dropout))
     return embedding
Example #6
0
def build_model(A, X):
    model = nn.Sequential()
    with model.name_scope():
        features, out_units = bulid_features(A, X)
        model.add(features)
        calssifier = LogisticRegressor(out_units)
        model.add(calssifier)
    model.initialize(init.Uniform(1))
    return model, features
Example #7
0
 def __init__(self, vocab, embed_size, num_hiddens, num_layers,
              bidirectional, num_outputs, **kwargs):
     super(SentimentNet, self).__init__(**kwargs)
     with self.name_scope():
         self.embedding = nn.Embedding(
             len(vocab), embed_size, weight_initializer=init.Uniform(0.1))
         self.encoder = rnn.LSTM(num_hiddens, num_layers=num_layers,
                                 bidirectional=bidirectional,
                                 input_size=embed_size)
         self.decoder = nn.Dense(num_outputs, flatten=False)
Example #8
0
 def _get_embedding(self):
     embedding = nn.HybridSequential()
     with embedding.name_scope():
         embedding_block = nn.Embedding(self._vocab_size, self._embed_size,
                                        weight_initializer=init.Uniform(0.1))
         if self._drop_e:
             apply_weight_drop(embedding_block, 'weight', self._drop_e, axes=(1,))
         embedding.add(embedding_block)
         if self._drop_i:
             embedding.add(nn.Dropout(self._drop_i, axes=(0,)))
     return embedding
Example #9
0
def create_gluon_model(initializer):
    net = nn.HybridSequential()
    net.add(
        nn.Conv2D(channels=6, kernel_size=5, activation="relu"),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=16, kernel_size=3, activation="relu"),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Flatten(),
        nn.Dense(120, activation="relu"),
        nn.Dense(84, activation="relu"),
        nn.Dense(10),
    )
    if initializer == 1:
        net.initialize(init=init.Xavier(), ctx=mx.cpu())
    elif initializer == 2:
        # variance will not remain the same across layers
        net.initialize(init=init.Uniform(1), ctx=mx.cpu())
    else:
        # does not break symmetry,so gradients will not differ much
        net.initialize(init=init.Uniform(0.0001), ctx=mx.cpu())
    return net
Example #10
0
def model_1_fit(no_epochs, batch_size, ctx, mx_train_data, mx_valid_data):
    train_loss_hist = []
    train_acc_hist = []
    valid_loss_hist = []
    valid_acc_hist = []

    mx_net = mx_nn.Sequential()
    mx_net.add(mx_nn.Dense(512, activation='relu'),
               mx_nn.Dense(128, activation='relu'),
               mx_nn.Dense(64, activation='relu'),
               mx_nn.Dense(32, activation='relu'),
               mx_nn.Dense(16, activation='relu'), mx_nn.Dense(10))
    print('The Model')
    print(mx_net)
    mx_loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
    #mx_net.initialize(init=init.Xavier())
    mx_net.collect_params().initialize(init.Uniform(), ctx=ctx)
    mx_trainer = gluon.Trainer(mx_net.collect_params(), 'adam', {
        'learning_rate': 1e-3,
        'beta1': 0.9,
        'beta2': 0.999
    })
    print('\nFitting the model\n')
    for epoch in range(no_epochs):
        train_loss, train_acc, valid_acc, valid_loss = 0.0, 0.0, 0.0, 0.0
        tic = time.time()
        for data, label in mx_train_data:
            with autograd.record():
                output = mx_net(data)
                loss = mx_loss_fn(output, label)
            loss.backward()
            mx_trainer.step(batch_size=batch_size)
            train_loss += loss.mean().asscalar()
            train_acc += acc(output, label)
        for data, label in mx_valid_data:
            output = mx_net(data)
            loss = mx_loss_fn(output, label)
            valid_acc += acc(output, label)
            valid_loss += loss.mean().asscalar()
        train_loss_hist.append(train_loss / len(mx_train_data))
        train_acc_hist.append(train_acc / len(mx_train_data))
        valid_loss_hist.append(valid_loss / len(mx_valid_data))
        valid_acc_hist.append(valid_acc / len(mx_valid_data))

        print(
            "Epoch %d: train loss %.3f, train acc %.3f %%, val loss %.3f, val acc %.3f %%, in %.1f sec"
            % (epoch, train_loss / len(mx_train_data),
               train_acc / len(mx_train_data), valid_loss / len(mx_valid_data),
               valid_acc / len(mx_valid_data), time.time() - tic))

    mx_net.save_parameters('../weights/task_a_model_1.params')

    return train_loss_hist, train_acc_hist, valid_loss_hist, valid_acc_hist
Example #11
0
 def init_weights(self, ctx):
     initrange = 0.1
     self.pgm_embed.initialize(init.Uniform(initrange), ctx=ctx)
     self.logit1.initialize(init.Uniform(initrange), ctx=ctx)
     self.logit2.initialize(init.Uniform(initrange), ctx=ctx)
     self.regress1.initialize(init.Uniform(initrange), ctx=ctx)
     self.regress2.initialize(init.Uniform(initrange), ctx=ctx)
     self.core.initialize(init.Uniform(initrange), ctx=ctx)
Example #12
0
 def __init__(self,
              mode,
              vocab_size,
              embed_size,
              num_hiddens,
              num_layers,
              drop_prob=0.5,
              **kwargs):
     super(RNNModel, self).__init__(**kwargs)
     with self.name_scope():
         self.dropout = nn.Dropout(drop_prob)
         # 将词索引变换成词向量。这些词向量也是模型参数。
         self.embedding = nn.Embedding(vocab_size,
                                       embed_size,
                                       weight_initializer=init.Uniform(0.1))
         if mode == 'rnn_relu':
             self.rnn = rnn.RNN(num_hiddens,
                                num_layers,
                                activation='relu',
                                dropout=drop_prob,
                                input_size=embed_size)
         elif mode == 'rnn_tanh':
             self.rnn = rnn.RNN(num_hiddens,
                                num_layers,
                                activation='tanh',
                                dropout=drop_prob,
                                input_size=embed_size)
         elif mode == 'lstm':
             self.rnn = rnn.LSTM(num_hiddens,
                                 num_layers,
                                 dropout=drop_prob,
                                 input_size=embed_size)
         elif mode == 'gru':
             self.rnn = rnn.GRU(num_hiddens,
                                num_layers,
                                dropout=drop_prob,
                                input_size=embed_size)
         else:
             raise ValueError('Invalid mode %s. Options are rnn_relu, '
                              'rnn_tanh, lstm, and gru' % mode)
         self.dense = nn.Dense(vocab_size, in_units=num_hiddens)
         self.num_hiddens = num_hiddens
Example #13
0
def dropout_model_fit(no_epochs,batch_size,ctx,mx_train_data,mx_valid_data,drop_prob):
    train_loss_hist=[]
    train_acc_hist=[]
    valid_loss_hist=[]
    valid_acc_hist=[]
    
    mx_net = MLP_DROP(drop_prob)
    print('\nThe Model with Dropout Probability %.1f \n' % drop_prob)
    print(mx_net)
    mx_loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
    #mx_net.initialize(init=init.Xavier())
    mx_net.collect_params().initialize(init.Uniform(), ctx=ctx)
    mx_trainer = gluon.Trainer(mx_net.collect_params(),'adam', {'learning_rate': 1e-3,'beta1':0.9,'beta2':0.999})
    print('\nFitting the model\n')
    for epoch in range(no_epochs):
        train_loss, train_acc, valid_acc,valid_loss = 0.0, 0.0, 0.0,0.0
        tic = time.time()
        for data, label in mx_train_data:
            with autograd.record():
                output=mx_net(data)
                loss = mx_loss_fn(output, label)
            loss.backward()
            mx_trainer.step(batch_size=batch_size)
            train_loss += loss.mean().asscalar()
            train_acc += acc(output, label)
        for data, label in mx_valid_data:
            output=mx_net(data)
            loss = mx_loss_fn(output, label)
            valid_acc += acc(output, label)
            valid_loss += loss.mean().asscalar()
        train_loss_hist.append(train_loss/len(mx_train_data))
        train_acc_hist.append(train_acc/len(mx_train_data))
        valid_loss_hist.append(valid_loss/len(mx_valid_data))
        valid_acc_hist.append(valid_acc/len(mx_valid_data))
        
        print("Epoch %d: train loss %.3f, train acc %.3f %%, val loss %.3f, val acc %.3f %%, in %.1f sec" % (epoch, train_loss/len(mx_train_data), train_acc/len(mx_train_data),valid_loss/len(mx_valid_data),valid_acc/len(mx_valid_data), time.time()-tic))
        
    mx_net.save_parameters('../weights/task_b_exp_3_dropout_'+str(int(drop_prob*10))+'.params')
        
    
    return train_loss_hist,train_acc_hist,valid_loss_hist,valid_acc_hist
        im_train = train_data / 255
        im_valid = valid_data / 255
        train_dataset = mx.gluon.data.dataset.ArrayDataset(
            im_train, train_label)
        valid_dataset = mx.gluon.data.dataset.ArrayDataset(
            im_valid, valid_label)
        mx_train_data = gluon.data.DataLoader(train_dataset,
                                              batch_size=batch_size,
                                              shuffle=True)
        mx_valid_data = gluon.data.DataLoader(valid_dataset,
                                              batch_size=batch_size,
                                              shuffle=False)

        print('Traing the Model 1 with Optimizer = SGD')
        mx_net = MLP()
        mx_net.collect_params().initialize(init.Uniform(), ctx=ctx)
        mx_trainer = gluon.Trainer(mx_net.collect_params(), 'sgd', {
            'learning_rate': 0.1,
            'momentum': 0.9
        })
        train_loss_hist_m1, train_acc_hist_m1, valid_loss_hist_m1, valid_acc_hist_m1 = model_fit(
            mx_net, no_epochs, batch_size, mx_train_data, mx_valid_data, 'sgd',
            mx_trainer)
        print('Finished Traing the Model 1')

        print('\nTraing the Model 2 with Optimizer = NAG')
        mx_net = MLP()
        mx_net.collect_params().initialize(init.Uniform(), ctx=ctx)
        mx_trainer = gluon.Trainer(mx_net.collect_params(), 'nag', {
            'learning_rate': 0.1,
            'momentum': 0.9
Example #15
0
File: test6.py Project: cszfz/mxnet
f = np.loadtxt(dirTrain + "image_train_features.txt", delimiter=' ')
l = np.loadtxt(dirTrain + "image_train_labels.txt", delimiter=' ')
features = nd.array(f)
labels = nd.array(l)

data_num = len(f)
batch_size = 500

dataset = gdata.ArrayDataset(features, labels)
data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)

net = nn.Sequential()
net.add(nn.Dense(100, activation='sigmoid'), nn.Dense(100,
                                                      activation='sigmoid'),
        nn.Dense(3))
net.initialize(init.Uniform(scale=20))

loss = gloss.L1Loss()

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001})


def accuracy(y_hat, y, error):
    sum_acc = 0
    yy = y_hat - y
    yyy = yy.asnumpy()
    yyy = np.abs(yyy)
    i = 0
    for i, val in enumerate(yyy):
        sum_acc = sum_acc + equal(val, error)
Example #16
0
if __name__ == '__main__':
    if args.inference:
        # do inference
        dcmn = model.DCMN()
        dcmn.load_parameters(args.model_params)
        sts = args.sample.split('|')
        samples = [[sentence.strip() for sentence in sts]]
        inference(dcmn, samples)
    else:
        # do training
        dataloader_train = preprocess.get_dataloader(sts=args.train_sentences,
                                                     labels=args.train_labels)
        dataloader_test = preprocess.get_dataloader(sts=args.test_sentences,
                                                    labels=args.test_labels)
        dcmn = model.DCMN()
        dcmn.initialize(init=init.Uniform(.001), ctx=mx.gpu())
        loss_func = gluon.loss.SoftmaxCrossEntropyLoss()
        lr, clip = 5e-4, 2.5
        trainer = gluon.Trainer(dcmn.collect_params(), 'adam', {
            'learning_rate': lr,
            'clip_gradient': clip
        })
        train.train_valid(dataloader_train,
                          dataloader_test,
                          dcmn,
                          loss_func,
                          trainer,
                          num_epoch=15,
                          ctx=mx.gpu())
        dcmn.save_parameters('dcmn8k.params')
Example #17
0
    def __init__(self, feature_dict, args, ctx, task, **kwargs):
        """{"sparse":[SingleFeat],"dense":[SingleFeat]}"""
        super(xDeepFM, self).__init__(**kwargs)  # ??
        util.mkdir_if_not_exist(args.SAVE_PARAMS_PATH_PREFIX)
        # self.feature_sizes = args.FEATURE_SIZE
        self.field_size = args.FIELD_NUM
        self.feature_dict = feature_dict
        print('field_size:')
        print(self.field_size)
        if args.TASK == 'finish':
            self.embedding_size = args.FINISH_EMBEDDING_SIZE
            self.batch_size = args.FINISH_BATCH_SIZE
        else:
            self.embedding_size = args.LIKE_EMBEDDING_SIZE
            self.batch_size = args.LIKE_BATCH_SIZE
        self.config_name = args.CONFIG_NAME
        # self.dropout_prob = args.DROPOUT_PROB
        self.task = task

        # self.loss = gloss.SigmoidBinaryCrossEntropyLoss()
        if args.LOSS == 'l2loss':
            self.loss = gloss.L2Loss()
        else:
            self.loss = gloss.SigmoidBinaryCrossEntropyLoss()
        self.ctx = ctx
        self.embedding_dict = OrderedDict()
        self.dense_dict = OrderedDict()
        with self.name_scope():
            if self.task == 'finish':
                self.layer_list = [np.int(x) for x in args.FINISH_LAYER]
                self.dropout = args.FINISH_DROPOUT_PROB
            else:
                self.layer_list = [np.int(x) for x in args.LIKE_LAYER]
                self.dropout = args.LIKE_DROPOUT_PROB
            # self.params.get('v',shape=(self.field_size,self.embedding_size))
            self.dnn_out = nn.Dense(1, use_bias=False)

            self.register_child(self.dnn_out)

            for feat in feature_dict['sparse']:
                self.embedding_dict[feat.feat_name] = nn.Embedding(
                    feat.feat_num, self.embedding_size)

            for feat in feature_dict['dense']:
                self.dense_dict[feat.feat_name] = nn.Dense(self.embedding_size)

            for emb_k, emb_v in self.embedding_dict.items():
                self.register_child(emb_v)
            for den_k, den_v in self.dense_dict.items():
                self.register_child(den_v)
            self.linear_logit_dense = nn.Dense(1, use_bias=False)
            self.register_child(self.linear_logit_dense)
            self.linear_logit_embedding_bn = nn.BatchNorm()
            self.register_child(self.linear_logit_embedding_bn)
            self.dense_list = []
            self.dropout_list = []
            self.bn_list = []
            self.activation_list = []
            for i in range(len(self.layer_list)):
                self.dense_list.append(nn.Dense(self.layer_list[i]))
                self.dropout_list.append(nn.Dropout(self.dropout))
                self.bn_list.append(nn.BatchNorm())
                self.activation_list.append(nn.Activation('relu'))
                self.register_child(self.dense_list[i])
                self.register_child(self.dropout_list[i])
                self.register_child(self.bn_list[i])
                self.register_child(self.activation_list[i])
            # if True:
            print('true')
            self.layer_size = [np.int(x) for x in args.CONV1D_LAYER]
            # self.cin_net = CIN(self.embedding_size,self.field_size, (128, 64), self.ctx)
            # print('oo')
            # self.cin_net.initialize()
            # print('uu')
            # self.register_child(self.cin_net)

            self.cin_dense = nn.Dense(1)
            self.register_child(self.cin_dense)
            self.cin_bn = nn.BatchNorm()
            self.register_child(self.cin_bn)

            self.field_nums = [self.field_size]
            self.conv_list = []
            for idx, size in enumerate(self.layer_size):
                self.conv_list.append(
                    nn.Conv1D(channels=size,
                              kernel_size=1,
                              strides=1,
                              padding=0,
                              activation='relu',
                              in_channels=self.field_nums[0] *
                              self.field_nums[-1],
                              weight_initializer=init.Uniform()))
                self.field_nums.append(size)
                self.register_child(self.conv_list[idx])
Example #18
0
            im_valid, valid_label)
        mx_train_data = gluon.data.DataLoader(train_dataset,
                                              batch_size=batch_size,
                                              shuffle=True)
        mx_valid_data = gluon.data.DataLoader(valid_dataset,
                                              batch_size=batch_size,
                                              shuffle=False)

        print('Traing the Model 1 with Uniform Initialization')
        train_loss_hist_m0, train_acc_hist_m0, valid_loss_hist_m0, valid_acc_hist_m0 = model_fit(
            no_epochs,
            batch_size,
            ctx,
            mx_train_data,
            mx_valid_data,
            init_type=init.Uniform(),
            path='uniform')
        print('Finished Traing the Model 1')

        print('Traing the Model 2 with Normal Initialization')
        train_loss_hist_m1, train_acc_hist_m1, valid_loss_hist_m1, valid_acc_hist_m1 = model_fit(
            no_epochs,
            batch_size,
            ctx,
            mx_train_data,
            mx_valid_data,
            init_type=init.Normal(),
            path='normal')
        print('Finished Traing the Model 2')

        print('\nTraing the Model 3 with Xavier Initialization')
Example #19
0
features = nd.array(f).copyto(ctx)
labels = nd.array(l).copyto(ctx)
labels_test = nd.zeros(labels.shape, ctx)

data_num = len(f)
batch_size = 500

dataset = gdata.ArrayDataset(features, labels)
data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)

net = nn.Sequential()
net.add(nn.Dense(10, activation='sigmoid'), nn.Dense(100,
                                                     activation='sigmoid'),
        nn.Dense(10, activation='sigmoid'), nn.Dense(3))
net.initialize(init.Uniform(scale=20), ctx=ctx)

loss = gloss.L2Loss()

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001})


def accuracy(y_hat, y, error):
    sum_acc = 0
    yy = y_hat - y
    yyy = yy.asnumpy()
    yyy = np.abs(yyy)
    i = 0
    for i, val in enumerate(yyy):
        sum_acc = sum_acc + equal(val, error)