예제 #1
0
def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,
                          vocab_size, ctx, corpus_indices, idx_to_char,
                          char_to_idx, is_random_iter, num_epochs, num_steps,
                          lr, clipping_theta, batch_size, pred_period,
                          pred_len, prefixes):
    """Train an RNN model and predict the next item in the sequence."""

    data_iter_fn = data_iter_random

    #corpus_indices = my_seq
    params = get_params()
    loss = gloss.SoftmaxCrossEntropyLoss()

    for epoch in range(num_epochs):
        if not is_random_iter:
            state = init_rnn_state(batch_size, num_hiddens, ctx)
        l_sum, n, start = 0.0, 0, time.time()
        data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx)
        for X, Y in data_iter:
            if is_random_iter:
                state = init_rnn_state(batch_size, num_hiddens, ctx)
            else:
                for s in state:
                    s.detach()
            with autograd.record():
                #print("X ",X)
                inputs = to_onehot(X, vocab_size)
                #print("len inputs ",len(inputs))
                #print("shape ",inputs)
                (outputs, state) = rnn(inputs, state, params)
                #print('len outputs',len(outputs))
                outputs = nd.concat(*outputs, dim=0)
                #print("concat output : ",len(outputs))
                #print("Y ", Y)
                y = Y.T.reshape((-1, ))
                #print("y ",y)
                l = loss(outputs, y).mean()
            l.backward()
            grad_clipping(params, clipping_theta, ctx)
            sgd(params, lr, 1)
            l_sum += l.asscalar() * y.size
            n += y.size

        if (epoch + 1) % pred_period == 0:
            #print("n ",n)
            print('epoch %d, perplexity %f, time %.2f sec' %
                  (epoch + 1, math.exp(l_sum / n), time.time() - start))
            for prefix in prefixes:
                print(
                    ' -',
                    predict_rnn(prefix, pred_len, rnn, params, init_rnn_state,
                                num_hiddens, vocab_size, ctx, idx_to_char,
                                char_to_idx))
예제 #2
0
def predict_rnn(prefix, num_chars, rnn, params, init_rnn_state,
            num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx):
    state = init_rnn_state(1, num_hiddens, ctx)
    output = [char_to_idx[prefix[0]]]
    for t in range(num_chars + len(prefix) - 1):
        # 将上⼀时间步的输出作为当前时间步的输⼊。
        X = ld.to_onehot(nd.array([output[-1]], ctx=ctx), vocab_size)
        # 计算输出和更新隐藏状态。
        (Y, state) = rnn(X, state, params)
        # 下⼀个时间步的输⼊是 prefix ⾥的字符或者当前的最佳预测字符。
        if t < len(prefix) - 1:
            output.append(char_to_idx[prefix[t + 1]])
        else:
            output.append(int(Y[0].argmax(axis=1).asscalar()))
    return ''.join([idx_to_char[i] for i in output])
예제 #3
0
def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,
                          vocab_size, ctx, corpus_indices, idx_to_char,
                          char_to_idx, is_random_iter, num_epochs, num_steps,
                          lr, clipping_theta, batch_size, pred_period,
                          pred_len, prefixes):
    if is_random_iter:
        data_iter_fn = d2l.data_iter_random
    else:
        data_iter_fn = d2l.data_iter_consecutive
    params = get_params()
    loss = gloss.SoftmaxCrossEntropyLoss()
    
    for epoch in range(num_epochs):
        if not is_random_iter:  # 如使用相邻采样,在epoch开始时初始化隐藏状态
            state = init_rnn_state(batch_size, num_hiddens, ctx)
        l_sum, n,start=0.0, 0, time.time()
        data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx)
        for X, Y in data_iter:
            if is_random_iter:  # 如使用随机采样,在每个小批量更新前初始化隐藏状态
                state = init_rnn_state(batch_size, num_hiddens, ctx)
            else:  # 否则需要使用detach函数从计算图分离隐藏状态
                for s in state:
                    s.detach()
            with autograd.record():
                inputs = to_onehot(X, vocab_size)
                # outputs有num_steps个形状为(batch_size, vocab_size)的矩阵
                (outputs, state) = rnn(inputs, state, params)
                # 拼接之后形状为(num_steps * batch_size, vocab_size)
                outputs = nd.concat(*outputs, dim=0)
                # Y的形状是(batch_size, num_steps),转置后再变成长度为
                # batch * num_steps 的向量,这样跟输出的行一一对应
                y = Y.T.reshape((-1,))
                # 使用交叉熵损失计算平均分类误差
                l = loss(outputs, y).mean()
            l.backward()
            grad_clipping(params, clipping_theta, ctx)  # 裁剪梯度
            d2l.sgd(params, lr, 1)  # 因为误差已经取过均值,梯度不用再做平均
            l_sum += l.asscalar() * y.size
            n += y.size

        if (epoch + 1) % pred_period == 0:
            print('epoch %d, perplexity %f, time %.2f sec' % (
                epoch + 1, math.exp(l_sum / n), time.time() - start))
            for prefix in prefixes:
                print(' -', predict_rnn(
                    prefix, pred_len, rnn, params, init_rnn_state,
                    num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx))
                print(y)
예제 #4
0
def predict_rnn(prefix, num_chars, rnn, params, init_rnn_state, num_hiddens,
                vocab_size, ctx, idx_to_char, char_to_idx):
    """Predict next chars with a RNN model"""
    state = init_rnn_state(1, num_hiddens, ctx)
    output = [char_to_idx[int(prefix[0])]]
    #print("output ",output)
    for t in range(num_chars + len(prefix) - 1):
        X = to_onehot(nd.array([output[-1]], ctx=ctx), vocab_size)
        (Y, state) = rnn(X, state, params)
        if t < len(prefix) - 1:
            output.append(char_to_idx[prefix[t + 1]])
        else:
            output.append(int(Y[0].argmax(axis=1).asscalar()))
    # print("idx_to_char ",idx_to_char)
    # for i in output:
    #     print("idx_to_char ",idx_to_char[i])
    return ''.join(str([idx_to_char[i] for i in output]))
예제 #5
0
rnn = BiRNN(hidden_size, num_layers, num_classes)
rnn.initialize(ctx=ctx)

# Loss and Optimizer
criterion = gluon.loss.SoftmaxCrossEntropyLoss()
optimizer = gluon.Trainer(rnn.collect_params(), 'adam', {'learning_rate': learning_rate})

# Train the Model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.astype('float32').reshape((-1, sequence_length, input_size)) / 255
        images, labels = images.as_in_context(ctx), labels.as_in_context(ctx)
        # Forward + Backward + Optimize
        with autograd.record():
            outputs = rnn(images)
            loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step(batch_size)

        if (i + 1) % 100 == 0:
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                  % (epoch + 1, num_epochs, i + 1, len(train_dataset) // batch_size, loss.sum().asscalar()))

# Test the Model
total, correct = 0, 0
for images, labels in test_loader:
    images = images.astype('float32').reshape((-1, sequence_length, input_size)) / 255
    images, labels = images.as_in_context(ctx), labels.as_in_context(ctx)
    outputs = rnn(images)
    predict = outputs.argmax(1).astype('int32')
예제 #6
0
        'learning_rate':0.005,
        "wd":0.001
    })
    dataLoader = DataLoader(a,b)
    for epoch in range(500):
        total_L = 0.0
        hidden = rnn.begin_state(func=mx.nd.zeros,batch_size = batch_size,ctx=mx.cpu())

        for data,label in dataLoader.dataIter(batch_size):
            label = nd.array(label)
            #label = nd.ones(shape=(5,32)) * label
            #label = label.reshape((-1,))
            dd = nd.array(data.reshape((batch_size,5,11)).swapaxes(0,1))
            hidden = detach(hidden)
            with mx.autograd.record():
                output, hidden = rnn(dd,hidden)
                output = output.reshape((5,batch_size,2))
                output = nd.sum(output,axis=0)/5
                lv = loss(output,label)
                lv.backward()
            grads = [i.grad() for i in rnn.collect_params().values()]
            mx.gluon.utils.clip_global_norm(grads,clipping_norm*num_steps*batch_size)
            trainer.step(batch_size)
            total_L += mx.nd.sum(lv).asscalar()
        test_loss = evals(rnn,c,d,batch_size)

        print("Epoch %d loss %.4f test loss %.4f train acc %.4f test acc %.4f" %(epoch, total_L/len(a), test_loss,predict(rnn,a,b),predict(rnn,c,d)))

            
        
    
예제 #7
0
    dataLoader = DataLoader(a,b)
    trl = []
    tel = []
    for epoch in range(500):
        total_L = 0.0
        hidden = rnn.begin_state(func=mx.nd.zeros,batch_size = batch_size,ctx=mx.cpu())

        for data,label in dataLoader.dataIter(batch_size):
            label = nd.array(label)
           # print("label shape" ,label.shape)
            #label = nd.ones(shape=(5,32)) * label
            #label = label.reshape((-1,))
            dd = nd.array(data.reshape((batch_size,5,11)).swapaxes(0,1))
            hidden = detach(hidden)
            with mx.autograd.record():
                output, hidden = rnn(dd,hidden)
                output = output.reshape((5,256,1))
                output = nd.sum(output,axis=0)/5
               # print(output.shape)
                lv = loss(output,label)
            lv.backward()
            grads = [i.grad() for i in rnn.collect_params().values()]
            mx.gluon.utils.clip_global_norm(grads,clipping_norm*num_steps*batch_size)
            trainer.step(batch_size)
            total_L += mx.nd.sum(lv).asscalar()
        test_loss = evals(rnn,c,d,batch_size)
        trl.append(total_L/len(a))
        tel.append(test_loss)
        print("Epoch %d loss %.4f test loss %.4f train acc %.4f test acc %.4f" %(epoch, total_L/len(a), test_loss,predict(rnn,a,b),predict(rnn,c,d)))
    with open("rnn.csv",'w',newline='') as f:
        import csv