Ejemplo n.º 1
0
def main():
    '''main function'''
    parser = argparse.ArgumentParser()
    parser.add_argument('--state', required=True, help='训练还是预测, train or eval')
    parser.add_argument('--txt', required=True, help='进行训练的txt文件')
    parser.add_argument('--batch', default=128, type=int, help='训练的batch size')
    parser.add_argument('--epoch', default=5000, type=int, help='跑多少个epoch')
    parser.add_argument('--len', default=100, type=int, help='输入模型的序列长度')
    parser.add_argument('--max_vocab',
                        default=5000,
                        type=int,
                        help='最多存储的字符数目')
    parser.add_argument('--embed', default=512, type=int, help='词向量的维度')
    parser.add_argument('--hidden', default=512, type=int, help='RNN的输出维度')
    parser.add_argument('--n_layer', default=2, type=int, help='RNN的层数')
    parser.add_argument('--dropout',
                        default=0.5,
                        type=float,
                        help='RNN中drop的概率')
    parser.add_argument('--begin', default='我', help='给出生成文本的开始')
    parser.add_argument('--pred_len', default=20, type=int, help='生成文本的长度')
    parser.add_argument('--checkpoint', help='载入模型的位置')
    opt = parser.parse_args()
    print(opt)

    convert = TextConverter(opt.txt, max_vocab=opt.max_vocab)
    model = CharRNN(convert.vocab_size, opt.embed, opt.hidden, opt.n_layer,
                    opt.dropout)

    model.initialize(ctx=ctx)

    if opt.state == 'train':
        dataset = TextData(opt.txt, opt.len, convert.text_to_arr)
        dataloader = g.data.DataLoader(dataset, opt.batch, shuffle=True)
        lr_sch = mx.lr_scheduler.FactorScheduler(int(1000 * len(dataloader)),
                                                 factor=0.1)
        optimizer = g.Trainer(model.collect_params(), 'adam', {
            'learning_rate': 1e-3,
            'clip_gradient': 3,
            'lr_scheduler': lr_sch
        })
        cross_entorpy = g.loss.SoftmaxCrossEntropyLoss()
        train(opt.epoch, model, dataloader, optimizer, cross_entorpy)

    elif opt.state == 'eval':
        pred_text = sample(model, opt.checkpoint, convert.word_to_int,
                           convert.arr_to_text, opt.begin, opt.pred_len)
        print(pred_text)
        with open('./generate.txt', 'a') as f:
            f.write(pred_text)
            f.write('\n')
    else:
        print('Error state, must choose from train and eval!')
Ejemplo n.º 2
0
def main():
    '''main function'''
    parser = argparse.ArgumentParser()
    parser.add_argument('--state', required=True, help='训练还是预测, train or eval')
    parser.add_argument('--txt', required=True, help='进行训练的txt文件')
    parser.add_argument('--batch', default=128, type=int, help='训练的batch size')
    parser.add_argument('--epoch', default=5000, type=int, help='跑多少个epoch')
    parser.add_argument('--len', default=100, type=int, help='输入模型的序列长度')
    parser.add_argument('--max_vocab',
                        default=5000,
                        type=int,
                        help='最多存储的字符数目')
    parser.add_argument('--embed', default=512, type=int, help='词向量的维度')
    parser.add_argument('--hidden', default=512, type=int, help='RNN的输出维度')
    parser.add_argument('--n_layer', default=2, type=int, help='RNN的层数')
    parser.add_argument('--dropout', default=0.5, help='RNN中drop的概率')
    parser.add_argument('--begin', default='我', help='给出生成文本的开始')
    parser.add_argument('--pred_len', default=20, type=int, help='生成文本的长度')
    parser.add_argument('--checkpoint', help='载入模型的位置')
    opt = parser.parse_args()
    print(opt)

    convert = TextConverter(opt.txt, max_vocab=opt.max_vocab)
    model = CharRNN(convert.vocab_size, opt.embed, opt.hidden, opt.n_layer,
                    opt.dropout)
    model.load_state_dict(torch.load('./poetry_checkpoints/model_300.pth'))
    if use_gpu:
        model = model.cuda()

    if opt.state == 'train':
        model.train()
        dataset = TextData(opt.txt, opt.len, convert.text_to_arr)
        dataloader = data.DataLoader(dataset,
                                     opt.batch,
                                     shuffle=True,
                                     num_workers=4)
        optimizer = optim.Adam(model.parameters(), lr=1e-4)
        criterion = nn.CrossEntropyLoss(size_average=False)
        train(opt.epoch, model, dataloader, optimizer, criterion)

    elif opt.state == 'eval':
        pred_text = sample(model, opt.checkpoint, convert.word_to_int,
                           convert.arr_to_text, opt.begin, opt.pred_len)
        print(pred_text)
        with open('./generate.txt', 'a') as f:
            f.write(pred_text)
            f.write('\n')
    else:
        print('Error state, must choose from train and eval!')
Ejemplo n.º 3
0
    def __init__(self, target_ids, model=None, rnn_size=128, gpu=-1):
        self.twitter = Twitter(target_ids)
        self.train_count=0
        self.gpu = gpu
        self.model_path = "dada/model{}.pkl".format(target_ids)
        self.tweet_path = "TimeLine/TimeLine"+target_ids
        self.vocab_path = "data/vocab{}.bin".format(target_ids)
        self.vocab = pickle.load(open(self.vocab_path,"rb")) if os.path.exists(self.vocab_path) else {}

        if os.path.exists(self.model_path):
            self.model = pickle.load(open(self.model_path, 'rb'))
        else:
            self.model = CharRNN(len(self.vocab), rnn_size)
Ejemplo n.º 4
0
if not os.path.exists(args.checkpoint_dir):
    os.mkdir(args.checkpoint_dir)

n_epochs    = args.epochs
n_units     = args.rnn_size
batchsize   = args.batchsize
bprop_len   = args.seq_length
grad_clip   = args.grad_clip

train_data, words, vocab = load_data(args)
pickle.dump(vocab, open('%s/vocab.bin'%args.data_dir, 'wb'))

if len(args.init_from) > 0:
    model = pickle.load(open(args.init_from, 'rb'))
else:
    model = CharRNN(len(vocab), n_units)

if args.gpu >= 0:
    cuda.get_device(args.gpu).use()
    model.to_gpu()

optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8)
optimizer.setup(model)

whole_len    = train_data.shape[0]
jump         = int(whole_len / batchsize)
epoch        = 0
start_at     = time.time()
cur_at       = start_at
state        = make_initial_state(n_units, batchsize=batchsize)
if args.gpu >= 0:
Ejemplo n.º 5
0
if not os.path.exists(args.checkpoint_dir):
    os.mkdir(args.checkpoint_dir)

n_epochs    = args.epochs
n_units     = args.rnn_size
batchsize   = args.batchsize
bprop_len   = args.seq_length
grad_clip   = args.grad_clip

train_data, words, vocab = load_data(args)
pickle.dump(vocab, open('%s/vocab.bin'%args.data_dir, 'wb'))

if len(args.init_from) > 0:
    model = pickle.load(open(args.init_from, 'rb'))
else:
    model = CharRNN(len(vocab), n_units)

if args.gpu >= 0:
    cuda.init()
    model.to_gpu()

optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8)
optimizer.setup(model.collect_parameters())

whole_len    = train_data.shape[0]
jump         = whole_len / batchsize
cur_log_perp = cuda.zeros(())
epoch        = 0
start_at     = time.time()
cur_at       = start_at
state        = make_initial_state(n_units, batchsize=batchsize)
Ejemplo n.º 6
0
loss_file = open('%s/loss.txt' % args.checkpoint_dir, 'w')

n_epochs    = args.epochs
n_units     = args.rnn_size
batchsize   = args.batchsize
bprop_len   = args.seq_length
grad_clip   = args.grad_clip

train_data, words, vocab = load_data(args)
pickle.dump(vocab, open('%s/vocab.bin'%args.data_dir, 'wb'))

if len(args.init_from) > 0:
    model = pickle.load(open(args.init_from, 'rb'))
else:
    model = CharRNN(len(vocab), n_units)

if args.gpu >= 0:
    cuda.init()
    model.to_gpu()

optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8)
optimizer.setup(model)

whole_len    = train_data.shape[0]
jump         = whole_len / batchsize
epoch        = 0
start_at     = time.time()
cur_at       = start_at
state        = make_initial_state(n_units, batchsize=batchsize)
if args.gpu >= 0:
Ejemplo n.º 7
0
# encoding the text and map each character to an integer and vice versa
# 1. int2char, which maps integers to characters
# 2. char2int, which maps characters to integers
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii, ch in int2char.items()}
encoded = np.array([char2int[i] for i in text])

# Check if GPU is available
train_on_gpu = torch.cuda.is_available()

n_hidden = 512
n_layers = 2

net = CharRNN(chars, n_hidden, n_layers)
net.load_state_dict(torch.load('weights.ckpt'))
print(net)

# Declaring the hyperparameters
batch_size = 128
seq_length = 100


# Defining a method to generate the next character
def predict(net, char, h=None, top_k=None):
    ''' Given a character, predict the next character.
        Returns the predicted character and the hidden state.
    '''

    # tensor inputs
Ejemplo n.º 8
0
loss_file = open('%s/loss.txt' % args.checkpoint_dir, 'w')

n_epochs = args.epochs
n_units = args.rnn_size
batchsize = args.batchsize
bprop_len = args.seq_length
grad_clip = args.grad_clip

train_data, words, vocab = load_data(args)
pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb'))

if len(args.init_from) > 0:
    model = pickle.load(open(args.init_from, 'rb'))
else:
    model = CharRNN(len(vocab), n_units)

if args.gpu >= 0:
    cuda.init()
    model.to_gpu()

optimizer = optimizers.RMSprop(lr=args.learning_rate,
                               alpha=args.decay_rate,
                               eps=1e-8)
optimizer.setup(model.collect_parameters())

whole_len = train_data.shape[0]
jump = whole_len / batchsize
epoch = 0
start_at = time.time()
cur_at = start_at
Ejemplo n.º 9
0
def main():
    # arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', type=str, default='data/dazai')
    parser.add_argument('--checkpoint_dir', type=str, default='model')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--rnn_size', type=int, default=128)
    parser.add_argument('--learning_rate', type=float, default=2e-3)
    parser.add_argument('--learning_rate_decay', type=float, default=0.97)
    parser.add_argument('--learning_rate_decay_after', type=int, default=10)
    parser.add_argument('--decay_rate', type=float, default=0.95)
    parser.add_argument('--dropout', type=float, default=0.0)
    parser.add_argument('--seq_length', type=int, default=50)
    parser.add_argument('--batchsize', type=int, default=50)
    parser.add_argument('--epochs', type=int, default=50)
    parser.add_argument('--grad_clip', type=int, default=5)
    parser.add_argument('--init_from', type=str, default='')
    parser.add_argument('--enable_checkpoint', type=bool, default=True)
    parser.add_argument('--file_name', type=str, default='input.txt')
    args = parser.parse_args()

    if not os.path.exists(args.checkpoint_dir):
        os.mkdir(args.checkpoint_dir)

    n_epochs = args.epochs
    n_units = args.rnn_size
    batchsize = args.batchsize
    bprop_len = args.seq_length
    grad_clip = args.grad_clip

    xp = cuda.cupy if args.gpu >= 0 else np

    train_data, words, vocab = load_data(args.data_dir, args.file_name)
    pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb'))

    if len(args.init_from) > 0:
        model = pickle.load(open(args.init_from, 'rb'))
    else:
        model = CharRNN(len(vocab), n_units)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    optimizer = optimizers.RMSprop(lr=args.learning_rate,
                                   alpha=args.decay_rate,
                                   eps=1e-8)
    #optimizer = chainer.optimizers.SGD(lr=1.0)
    optimizer.setup(model)
    optimizer.add_hook(
        chainer.optimizer.GradientClipping(grad_clip))  #勾配の上限を設定

    whole_len = train_data.shape[0]
    #jump         = whole_len / batchsize
    jump = int(whole_len / batchsize)
    epoch = 0
    start_at = time.time()
    cur_at = start_at
    state = make_initial_state(n_units, batchsize=batchsize)
    if args.gpu >= 0:
        accum_loss = Variable(xp.zeros(()).astype(np.float32))
        for key, value in state.items():
            value.data = cuda.to_gpu(value.data)
    else:
        accum_loss = Variable(xp.zeros(()).astype(np.float32))

    print('going to train {} iterations'.format(jump * n_epochs / bprop_len))
    sum_perp = 0
    count = 0
    iteration = 0
    for i in range(jump * n_epochs):
        x_batch = xp.array([
            train_data[(jump * j + i) % whole_len] for j in xrange(batchsize)
        ])
        y_batch = xp.array([
            train_data[(jump * j + i + 1) % whole_len]
            for j in xrange(batchsize)
        ])

        if args.gpu >= 0:
            x_batch = cuda.to_gpu(x_batch)
            y_batch = cuda.to_gpu(y_batch)

        state, loss_i = model.forward_one_step(x_batch,
                                               y_batch,
                                               state,
                                               dropout_ratio=args.dropout)
        accum_loss += loss_i
        count += 1

        if (i + 1) % bprop_len == 0:  # Run truncated BPTT
            iteration += 1
            sum_perp += accum_loss.data
            now = time.time()
            #print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at))
            print('{}/{}, train_loss = {}, time = {:.2f}'.format(
                (i + 1) / bprop_len, jump * n_epochs / bprop_len,
                accum_loss.data / bprop_len, now - cur_at))
            cur_at = now

            model.cleargrads()
            #optimizer.zero_grads()
            accum_loss.backward()
            accum_loss.unchain_backward()  # truncate
            #accum_loss = Variable(xp.zeros(()).astype(np.float32))
            if args.gpu >= 0:
                accum_loss = Variable(xp.zeros(()).astype(np.float32))
                #accum_loss = Variable(cuda.zeros(()))
            else:
                accum_loss = Variable(np.zeros((), dtype=np.float32))
            #optimizer.clip_grads(grad_clip)
            optimizer.update()

        if (i + 1) % 1000 == 0:
            print('epoch: ', epoch)
            print('iteration: ', iteration)
            print('training perplexity: ', np.exp(float(sum_perp) / count))
            sum_perp = 0
            count = 0

        if args.enable_checkpoint:
            if (i + 1) % 10000 == 0:
                fn = ('%s/charrnn_epoch_%.2f.chainermodel' %
                      (args.checkpoint_dir, float(i) / jump))
                pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb'))
                pickle.dump(
                    copy.deepcopy(model).to_cpu(),
                    open('%s/latest.chainermodel' % (args.checkpoint_dir),
                         'wb'))

        if (i + 1) % jump == 0:
            epoch += 1

            if epoch >= args.learning_rate_decay_after:
                optimizer.lr *= args.learning_rate_decay
                print('decayed learning rate by a factor {} to {}'.format(
                    args.learning_rate_decay, optimizer.lr))

        sys.stdout.flush()
Ejemplo n.º 10
0
    pickle.dump(model_copy, open(CHECKPOINT_PATH + "CharRNN-Epoch-%s.model" % epoch, 'wb'))
    pickle.dump(model_copy, open(MODEL_PATH + "CharRNN-Latest.model", 'wb'))




print(RNN_TRAIN_MODE)
train_data, words, vocab = load_data(mode=RNN_TRAIN_MODE)

pickle.dump(vocab, open(VOCAB_PATH, 'wb'))

if len(RNN_INIT_FROM) > 0:
    model = pickle.load(open(RNN_INIT_FROM, 'rb'))
else:
    model = CharRNN(len(vocab), RNN_RNN_SIZE)

if RNN_GPU >= 0:
    cuda.get_device(RNN_GPU).use()
    model.to_gpu()

optimizer = optimizers.RMSprop(lr=RNN_LEARNING_RATE, alpha=RNN_DECAY_RATE, eps=1e-8)
optimizer.setup(model)

whole_len    = train_data.shape[0]
jump         = whole_len / RNN_BATCHSIZE
epoch        = 0
start_at     = time.time()
cur_at       = start_at
state        = make_initial_state(RNN_RNN_SIZE, batchsize=RNN_BATCHSIZE)
if RNN_GPU >= 0:
Ejemplo n.º 11
0
class Clone:
    def __init__(self, target_ids, model=None, rnn_size=128, gpu=-1):
        self.twitter = Twitter(target_ids)
        self.train_count=0
        self.gpu = gpu
        self.model_path = "dada/model{}.pkl".format(target_ids)
        self.tweet_path = "TimeLine/TimeLine"+target_ids
        self.vocab_path = "data/vocab{}.bin".format(target_ids)
        self.vocab = pickle.load(open(self.vocab_path,"rb")) if os.path.exists(self.vocab_path) else {}

        if os.path.exists(self.model_path):
            self.model = pickle.load(open(self.model_path, 'rb'))
        else:
            self.model = CharRNN(len(self.vocab), rnn_size)

    def train_loop(self, num=100):
        for i in range(num):
            if (i % 5 ) == 0:
                self.make_dataset()
            self.train()
            self.tweet()

    def train(self, **kwargs):
        print ("Start {} times learning.".format(self.train_count))
        self._train(*kwargs)
        print ("{} times learning done.".format(self.train_count))
        self.train_count+=1

    def make_dataset(self):
        self.twitter.save_timeline()
        dataset, result, vocab = TextTools.make_dataset(self.tweet_path, self.vocab)
        self.dataset = dataset
        self.result = result
        self.vocab = vocab

        count = 0
        pickle.dump(self.vocab, open(self.vocab_path, 'wb'))
        while len(self.model.l3.b) < len(self.vocab):
            self.model.add_unit()
            count+=1
        if count:
            print (count,"units added")

    def tweet(self):
        if time.localtime()[3] > 6:
            pass
        twit=self.Speak()
        twit=twit.rsplit(u"。")
        tNum=np.random.randint(1, min(4, len(twit)))
        tList=random.sample( range(len(twit)), tNum)
        ttt=[twit[i] for i in tList]
        tweet=ttt[0]
        try:
            Twitter.twit_post(tweet)
        except ReadTimeout as e:
            print ("ReadTimeout")
            print ("waiting 5 mins")
            time.sleep(5*60)
        except ConnectionError as e:
            print ("ConnectionError")
            print ("waiting 5mins")
            time.sleep(5*60)

    def Speak(self, seed=1, sample=1, length=200):
        ivocab = {}
        for c, i in self.vocab.items():
            ivocab[i[0]] = c
        n_units = self.model.embed.W.data.shape[1]

        if self.gpu >= 0:
            cuda.get_device(gpu).use()
            self.model.to_gpu()

        # initialize generator
        state = self.model.make_initial_state(batchsize=1, train=False)
        if self.gpu >= 0:
            for key, value in state.items():
                value.data = cuda.to_gpu(value.data)

        prev_char = np.array([0], dtype=np.int32)
        if self.gpu >= 0:
            prev_char = cuda.to_gpu(prev_char)
        primetext=random.sample(self.vocab.keys(),1)
        if len(primetext) > 0:
            sys.stdout.write(primetext[0])
            prev_char = np.array([self.vocab[primetext[0]][0]], dtype=np.int32)
            if self.gpu >= 0:
                prev_char = cuda.to_gpu(prev_char)
        twit=""
        for i in range(length):
            state, prob = self.model.forward_one_step(prev_char,prev_char,state,train=False)
            if sample > 0:
                probability = cuda.to_cpu(prob.data)[0].astype(np.float64)
                probability /= np.sum(probability)
                index = np.random.choice(range(len(probability)), p=probability)
            else:
                index = np.argmax(cuda.to_cpu(prob.data))
                prob.data[index]=0
                index2 = np.argmax(cuda.to_cpu(prob.data))
                prob.data[index2]=0
                index3 = np.argmax(cuda.to_cpu(prob.data))
                prob.data[index3]=0
                index = random.sample([index,index2,index3],1)[0]

            if index != 0:
                twit+=ivocab[index]
            prev_char = np.array([index], dtype=np.int32)
            if self.gpu >= 0:
                prev_char = cuda.to_gpu(prev_char)
        return twit

    def _train(self, **kwargs):
            gpu = -1 if "gpu" not in kwargs else kwargs["gpu"]
            lr = 2e-3 if "lr" not in kwargs else kwargs["lr"]
            lr_decay = 0.97 if "lr_decay" not in kwargs else kwargs["lr_decay"]
            lr_decay_after=10 if "lr_decay_after" not in kwargs else kwargs["lr_decay_after"]
            decay_rate = 0.95 if "decay_rate" not in kwargs else kwargs["decay_rate"]
            dropout = 0.0 if "dropout" not in kwargs else kwargs["dropout"]
            bprop_len   = 50 if "bprop_len" not in kwargs else kwargs["bprop_len"]
            batchsize   = 50 if "batchsize" not in kwargs else kwargs["batchsize"]
            grad_clip   = 5 if "grad_clip" not in kwargs else kwargs["grad_clip"]
            n_epochs  = 5 if "epochs" not in kwargs else kwargs["epochs"]
            if gpu >= 0:
                cuda.get_device(gpu).use()
                self.model.to_gpu()

            optimizer = optimizers.RMSprop(lr=lr, alpha=decay_rate, eps=1e-8)
            optimizer.setup(self.model)

            train_data = self.dataset
            whole_len    = train_data.shape[0]
            jump         = whole_len // batchsize
            epoch        = 0
            start_at     = time.time()
            cur_at       = start_at
            state        = self.model.make_initial_state(batchsize=batchsize)

            if gpu >= 0:
                accum_loss   = Variable(cuda.zeros(()))
                for key, value in state.items():
                    value.data = cuda.to_gpu(value.data)#plist
            else:
                accum_loss   = Variable(np.zeros((), dtype=np.float32))

            print ('going to train {} iterations'.format(jump * n_epochs))

            for i in range(jump * n_epochs):
                x_batch = np.array([train_data[(jump * j + i) % whole_len]
                                    for j in range(batchsize)])
                y_batch = np.array([train_data[(jump * j + i + 1) % whole_len]
                                    for j in range(batchsize)])

                if gpu >=0:
                    x_batch = cuda.to_gpu(x_batch)
                    y_batch = cuda.to_gpu(y_batch)

                state, loss_i = self.model.forward_one_step(x_batch, y_batch, state, dropout_ratio=dropout)
                accum_loss   += loss_i

                if (i + 1) % bprop_len == 0:  # Run truncated BPTT
                    now = time.time()
                    sys.stderr.write('\r{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)//bprop_len,(jump*n_epochs)//bprop_len, accum_loss.data / bprop_len, now-cur_at))
                    sys.stderr.flush()
                    cur_at = now

                    optimizer.zero_grads()
                    accum_loss.backward()
                    accum_loss.unchain_backward()  # truncate

                    if gpu >= 0:
                        accum_loss = Variable(cuda.zeros(()))
                    else:
                        accum_loss = Variable(np.zeros((), dtype=np.float32))

                    optimizer.clip_grads(grad_clip)
                    optimizer.update()

                    if (i + 1) % 10000 == 0:
                        pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))

                    if (i + 1) % jump == 0:
                        epoch += 1

                    if epoch >= lr_decay_after:
                        optimizer.lr *= lr_decay
                        print ('decayed learning rate by a factor {} to {}'.format(lr_decay, optimizer.lr))
                sys.stdout.flush()

            pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))
Ejemplo n.º 12
0
parser.add_argument('--init_from', type=str, default='')

args = parser.parse_args()

n_epochs = 1000
n_units = 1024
batchsize = 50
bprop_len = 50
grad_clip = 5

dataset, word2num, num2word = load_data()

if len(args.init_from) > 0:
    model = pickle.load(open(args.init_from, 'rb'))
else:
    model = CharRNN(len(word2num) + 1, n_units)

if args.gpu >= 0:
    cuda.get_device(args.gpu).use()
    model.to_gpu()

optimizer = optimizers.RMSprop(lr=args.learning_rate,
                               alpha=args.decay_rate,
                               eps=1e-8)
optimizer.setup(model)

whole_len = dataset.shape[0]
jump = whole_len / batchsize
epoch = 0
start_at = time.time()
cur_at = start_at
Ejemplo n.º 13
0
        'No GPU available, training on CPU; consider making n_epochs very small.'
    )

# define the network with PyTorch
from CharRNN import CharRNN

# train the function
from train import train

# instantiating the model

# set model hyperparameters
n_hidden = 256
n_layers = 2

net = CharRNN(chars, n_hidden, n_layers)
print(net)

# set training hyperparameters
batch_size = 10
seq_length = 100
n_epochs = 20

# train the model
train(net,
      encoded,
      epochs=n_epochs,
      batch_size=batch_size,
      seq_length=seq_length,
      lr=0.001,
      print_every=10)
Ejemplo n.º 14
0
            break

    for l in range(len(keyList)):
        key = keyList[l]
        img = original[:, colListDefault[l][0]:colListDefault[l][1], :]
        y = discrimination_one_image(img)
        word = unichr(int(num2code[key], 16))
        wordList.append(word)
        imgMatrix.append(img)
        yMatrix.append(max(y))
        accMatrix.append(max(max(y)))

## 文脈補正

word2num = pickle.load(open('word2num.pkl', 'rb'))
model = CharRNN(len(word2num) + 1, 1024)
serializers.load_npz("latestmodel20171108.npz", model)
for i in range(len(wordList) - 1):
    x_batch = np.array([word2num[wordList[i]]])
    y_batch = np.array([word2num[wordList[i + 1]]])
    x = chainer.Variable(x_batch)
    t = chainer.Variable(y_batch)
    y = model(x, train=False)
    loss = F.softmax_cross_entropy(y, t).data
    if loss < 10:
        continue
    if accMatrix[i + 1] > 0.99:
        continue
    if loss < 15 and accMatrix[i + 1] > 0.9:
        continue
    yArgSort = np.argsort(yMatrix[i + 1])[::-1]
Ejemplo n.º 15
0
# -*- coding: utf-8 -*-

from chainer import cuda, Variable, optimizers
import chainer.functions as F
from CharRNN import CharRNN
from chainer import serializers

model = CharRNN(len(word2num) + 1, n_units)
serializers.load_npz("latestmodel20171108.npz", model)

sentence = []
sentence.append('す')
sentence.append('る')
sentence.append('検')
sentence.append('討')
sentence.append('会')
sentence.append('」')
sentence.append('は')
sentence.append('2')
sentence.append('日')
sentence.append('、')
sentence.append('孑')

loss = 0

for i in range(len(sentence) - 1):
    sent = sentence[i]
    sentNext = sentence[i + 1]
    x_batch = np.array([word2num[sent.decode('utf-8')]])
    y_batch = np.array([word2num[sentNext.decode('utf-8')]])
    x = Variable(x_batch)