def mlp(num_epochs, unit_count, hidden_layer_num=1): net = nn.Sequential() with net.name_scope(): for _ in range(hidden_layer_num): net.add(gluon.nn.Dense(unit_count, activation="relu")) net.add(gluon.nn.Dense(10)) net.initialize() ctx = utils.try_gpu() batch_size = 256 train_data, test_data = utils.load_data_fashion_mnist(batch_size) loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), "sgd", {"learning_rate": 0.5}) return utils.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=num_epochs)
def train_linear(num_epochs, batch_size=5): dataset = gdata.ArrayDataset(X, y) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) net = nn.Sequential() with net.name_scope(): net.add(nn.Dense(1)) net.initialize() square_loss = gloss.L2Loss() ctx = utils.try_gpu() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.05}) return utils.train(data_iter, data_iter, net, square_loss, trainer, ctx, num_epochs=num_epochs)
def cnn(num_epochs): net = nn.Sequential() with net.name_scope(): net.add( nn.Conv2D(channels=20, kernel_size=5, activation="relu"), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=50, kernel_size=3, activation="relu"), nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(), nn.Dense(128, activation="relu"), nn.Dense(10) ) net.initialize() ctx = utils.try_gpu() batch_size = 256 train_data, test_data = utils.load_data_fashion_mnist(batch_size) loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5}) return utils.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=num_epochs)
if __name__ == "__main__": model_name = 'rnn_relu' embed_dim = 100 hidden_dim = 100 num_layers = 2 lr = 0.5 clipping_norm = 0.2 epochs = 10 batch_size = 20 batch_size_clas = 1 num_steps = 1 dropout_rate = 0.2 eval_period = 50 context = utils.try_gpu() train_data_path = "E:\\ML_learning\\Daguan\\data\\train_data.csv" w2v = word2vec.Word2Vec.load("E:\\ML_learning\\Daguan\\data\\mymodel") # test_data_path = "" train_data, label = get_data_iter(train_data_path, batch_size, w2v) # train_data_iter = get_data_iter(train_data_path, batch_size, w2v) model = RNNModel(model_name, embed_dim, hidden_dim, num_layers, w2v, dropout_rate) model.collect_params().initialize(mx.init.Xavier(), ctx=context) trainer = gluon.Trainer(model.collect_params(), 'sgd', { 'learning_rate': lr, 'momentum': 0.1, 'wd': 0
batch_len = - data_len // batch_size indices = corpus_indices[0: batch_size * batch_len].reshape((batch_size, batch_len)) epoch_size = (batch_len - 1) // num_steps for i in epoch_size: i = i * num_steps data = indices[:, i: i + num_steps] label = indices[:, i + 1: i + num_steps + 1] yield data, label def get_inputs(X): return [nd.one_hot(x, vocab_size) for x in X.T] ctx = utils.try_gpu() print('will use', ctx) num_inputs = vocab_size hidden_dim = 256 num_outputs = vocab_size std = .01 def get_params(): W_xh = nd.random.normal(scale=std, shape=(num_inputs, hidden_dim), ctx=ctx) W_hh = nd.random.normal(scale=std, shape=(hidden_dim, hidden_dim), ctx=ctx) b_h = nd.zeros(hidden_dim, ctx=ctx) W_y = nd.random.normal(scale=std, shape=(hidden_dim, num_outputs), ctx=ctx) b_y = nd.zeros(num_outputs, ctx=ctx)