def init_state(self, model=None, vocab=None):
     self.model = model
     self.vocab = vocab 
     n_units = self.model.embed.W.data.shape[1]
     self.state =  make_initial_state(n_units, batchsize=1, train=False)
     for e, (c, i) in enumerate(self.vocab.items()):
         self.ivocab[i] = c
Esempio n. 2
0
def prediction(args, vocab="", model=""):

    output = ""
    np.random.seed(args.seed)

    # load vocabulary
    if vocab == "":
        vocab = pickle.load(open(args.vocabulary, 'rb'))
    ivocab = {}
    for c, i in vocab.items():
        ivocab[i] = c

    # load model
    if model == "":
        model = pickle.load(open(args.model, 'rb'))
    n_units = model.embed.W.shape[1]

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    # initialize generator
    state = make_initial_state(n_units, batchsize=1, train=False)
    if args.gpu >= 0:
        for key, value in state.items():
            value.data = cuda.to_gpu(value.data)

    prev_char = np.array([0], dtype=np.int32)
    if args.gpu >= 0:
        prev_char = cuda.to_gpu(prev_char)
    if len(args.primetext) > 0:
        for i in args.primetext:
            output += i
            prev_char = np.ones((1, ), dtype=np.int32) * vocab[i]
            if args.gpu >= 0:
                prev_char = cuda.to_gpu(prev_char)

            state, prob = model.predict(prev_char, state)

    for i in xrange(args.length):
        state, prob = model.predict(prev_char, state)

        if args.sample > 0:
            probability = cuda.to_cpu(prob.data)[0].astype(np.float64)
            probability /= np.sum(probability)
            index = np.random.choice(range(len(probability)), p=probability)
        else:
            index = np.argmax(cuda.to_cpu(prob.data))
        output += ivocab[index]

        prev_char = np.array([index], dtype=np.int32)
        if args.gpu >= 0:
            prev_char = cuda.to_gpu(prev_char)

    return output
Esempio n. 3
0
def prediction(args, vocab="", model=""):

    output = ""
    np.random.seed(args.seed)

    # load vocabulary
    if vocab == "":
        vocab = pickle.load(open(args.vocabulary, 'rb'))
    ivocab = {}
    for c, i in vocab.items():
        ivocab[i] = c

    # load model
    if model == "":
        model = pickle.load(open(args.model, 'rb'))
    n_units = model.embed.W.shape[1]

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    # initialize generator
    state = make_initial_state(n_units, batchsize=1, train=False)
    if args.gpu >= 0:
        for key, value in state.items():
            value.data = cuda.to_gpu(value.data)

    prev_char = np.array([0], dtype=np.int32)
    if args.gpu >= 0:
        prev_char = cuda.to_gpu(prev_char)
    if len(args.primetext) > 0:
        for i in args.primetext:
            output += i
            prev_char = np.ones((1,), dtype=np.int32) * vocab[i]
            if args.gpu >= 0:
                prev_char = cuda.to_gpu(prev_char)

            state, prob = model.predict(prev_char, state)

    for i in xrange(args.length):
        state, prob = model.predict(prev_char, state)

        if args.sample > 0:
            probability = cuda.to_cpu(prob.data)[0].astype(np.float64)
            probability /= np.sum(probability)
            index = np.random.choice(range(len(probability)), p=probability)
        else:
            index = np.argmax(cuda.to_cpu(prob.data))
        output += ivocab[index]

        prev_char = np.array([index], dtype=np.int32)
        if args.gpu >= 0:
            prev_char = cuda.to_gpu(prev_char)

    return output
Esempio n. 4
0
# load vocabulary
vocab = pickle.load(open(args.vocabulary, 'rb'))
ivocab = {}
for c, i in vocab.items():
    ivocab[i] = c

# load model
model = pickle.load(open(args.model, 'rb'))
n_units = model.embed.W.shape[1]

if args.gpu >= 0:
    cuda.init()
    model.to_gpu()

# initialize generator
state = make_initial_state(n_units, batchsize=1, train=False)
if args.gpu >= 0:
    for key, value in state.items():
        value.data = cuda.to_gpu(value.data)

prev_char = np.array([0])
if args.gpu >= 0:
    prev_char = cuda.to_gpu(prev_char)

sys.stdout = codecs.getwriter('utf_8')(sys.stdout)

if len(args.primetext) > 0:
    for i in unicode(args.primetext, 'utf-8'):
        sys.stdout.write(i)
        prev_char = np.ones((1, )).astype(np.int32) * vocab[i]
        if args.gpu >= 0:
Esempio n. 5
0
else:
    model = CharRNN(len(vocab), n_units)

if args.gpu >= 0:
    cuda.get_device(args.gpu).use()
    model.to_gpu()

optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8)
optimizer.setup(model)

whole_len    = train_data.shape[0]
jump         = int(whole_len / batchsize)
epoch        = 0
start_at     = time.time()
cur_at       = start_at
state        = make_initial_state(n_units, batchsize=batchsize)
if args.gpu >= 0:
    accum_loss   = Variable(cuda.zeros(()))
    for key, value in list(state.items()):
        value.data = cuda.to_gpu(value.data)
else:
    accum_loss   = Variable(np.zeros((), dtype=np.float32))

print('going to train {} iterations'.format(jump * n_epochs))
for i in range(jump * n_epochs):
    x_batch = np.array([train_data[(jump * j + i) % whole_len]
                        for j in range(batchsize)])
    y_batch = np.array([train_data[(jump * j + i + 1) % whole_len]
                        for j in range(batchsize)])

    if args.gpu >=0:
Esempio n. 6
0
else:
    model = CharRNN(len(vocab), n_units)

if args.gpu >= 0:
    cuda.init()
    model.to_gpu()

optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8)
optimizer.setup(model)

whole_len    = train_data.shape[0]
jump         = whole_len / batchsize
epoch        = 0
start_at     = time.time()
cur_at       = start_at
state        = make_initial_state(n_units, batchsize=batchsize)
if args.gpu >= 0:
    accum_loss   = Variable(cuda.zeros(()))
    for key, value in state.items():
        value.data = cuda.to_gpu(value.data)
else:
    accum_loss = Variable(np.zeros(()).astype(np.float32))

print 'going to train {} iterations'.format(jump * n_epochs)
for i in xrange(jump * n_epochs):
    x_batch = np.array([train_data[(jump * j + i) % whole_len]
                        for j in xrange(batchsize)])
    y_batch = np.array([train_data[(jump * j + i + 1) % whole_len]
                        for j in xrange(batchsize)])

    if args.gpu >=0:
    cuda.get_device(args.gpu).use()
    model.to_gpu()
# initialize generator
if args.gpu >= 0:
    for key, value in list(state.items()):
        value.data = cuda.to_gpu(value.data)
if args.gpu >= 0:
    prev_char = cuda.to_gpu(prev_char)
# print('\n dumping...', i, end= " ")
from copy import deepcopy as DC
""" stateを初期化 """
i = vocab["iPhone"]
prev_char = np.array([i], dtype=np.int32)
prev_char_stack  = []
prev_ichar_stack = [ivocab[i]]
state = make_initial_state(n_units, batchsize=1, train=False)
state, prob = model.forward_one_step(prev_char, prev_char, state, train=False)
beam = {ivocab[i]: [DC(state), DC(prob), 1.]}
for i in range(30):
    probability = cuda.to_cpu(prob.data)[0].astype(np.float64)
    probability /= np.sum(probability)
    prob_with_index = []
    for e, p in enumerate(probability):
        prob_with_index.append( [e, p, ivocab[e] ] )
    prob_with_index.sort(key=lambda x:-1 * x[1] )
    
    index1     = prob_with_index[0][0]
    chosen_p1  = probability[index1]
    prev_char1 = np.array([index1], dtype=np.int32)

    prev_char = prev_char1
Esempio n. 8
0
def  sample(model, vocabulary,seed,sample,primetext,length,gpu):


	np.random.seed(seed)

	# load vocabulary
	vocab = pickle.load(open(vocabulary, 'rb'))
	ivocab = {}
	for c, i in vocab.items():
	    ivocab[i] = c

	# load model
	model = pickle.load(open(model, 'rb'))
	n_units = model.embed.W.data.shape[1]

	if gpu >= 0:
	    cuda.get_device(gpu).use()
	    model.to_gpu()

	# initialize generator
	state = make_initial_state(n_units, batchsize=1, train=False)
	if gpu >= 0:
	    for key, value in state.items():
	        value.data = cuda.to_gpu(value.data)

	prev_char = np.array([0], dtype=np.int32)
	if gpu >= 0:
	    prev_char = cuda.to_gpu(prev_char)

	if len(primetext) > 0:
	    for i in unicode(primetext, 'utf-8'):
	        #sys.stdout.write(i)
	        prev_char = np.ones((1,), dtype=np.int32) * vocab[i]
	        if gpu >= 0:
	            prev_char = cuda.to_gpu(prev_char)

	        state, prob = model.forward_one_step(prev_char, prev_char, state, train=False)
	strtxt = ''
	out = ''
	count = 0
	checker = 0
	para = 0

	for i in xrange(length):
	    state, prob = model.forward_one_step(prev_char, prev_char, state, train=False)

	    if sample > 0:
	        probability = cuda.to_cpu(prob.data)[0].astype(np.float64)
	        probability /= np.sum(probability)
	        index = np.random.choice(range(len(probability)), p=probability)
	    else:
	        index = np.argmax(cuda.to_cpu(prob.data))
	    strtxt = strtxt + ivocab[index]
	    #sys.stdout.write(ivocab[index])
	    if ivocab[index] == unicode('\xe3\x80\x82','utf-8'):
	        count = count + 1


	    prev_char = np.array([index], dtype=np.int32)
	    if gpu >= 0:
	        prev_char = cuda.to_gpu(prev_char)

	for i in strtxt:
	    para = para + 1
	    if i == unicode('\xe3\x80\x82','utf-8'):
	        checker = checker + 1
	    if checker == count:
	        break
	strtxt = strtxt[0:para]
	return strtxt
Esempio n. 9
0
def main():
    # arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', type=str, default='data/dazai')
    parser.add_argument('--checkpoint_dir', type=str, default='model')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--rnn_size', type=int, default=128)
    parser.add_argument('--learning_rate', type=float, default=2e-3)
    parser.add_argument('--learning_rate_decay', type=float, default=0.97)
    parser.add_argument('--learning_rate_decay_after', type=int, default=10)
    parser.add_argument('--decay_rate', type=float, default=0.95)
    parser.add_argument('--dropout', type=float, default=0.0)
    parser.add_argument('--seq_length', type=int, default=50)
    parser.add_argument('--batchsize', type=int, default=50)
    parser.add_argument('--epochs', type=int, default=50)
    parser.add_argument('--grad_clip', type=int, default=5)
    parser.add_argument('--init_from', type=str, default='')
    parser.add_argument('--enable_checkpoint', type=bool, default=True)
    parser.add_argument('--file_name', type=str, default='input.txt')
    args = parser.parse_args()

    if not os.path.exists(args.checkpoint_dir):
        os.mkdir(args.checkpoint_dir)

    n_epochs = args.epochs
    n_units = args.rnn_size
    batchsize = args.batchsize
    bprop_len = args.seq_length
    grad_clip = args.grad_clip

    xp = cuda.cupy if args.gpu >= 0 else np

    train_data, words, vocab = load_data(args.data_dir, args.file_name)
    pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb'))

    if len(args.init_from) > 0:
        model = pickle.load(open(args.init_from, 'rb'))
    else:
        model = CharRNN(len(vocab), n_units)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    optimizer = optimizers.RMSprop(lr=args.learning_rate,
                                   alpha=args.decay_rate,
                                   eps=1e-8)
    #optimizer = chainer.optimizers.SGD(lr=1.0)
    optimizer.setup(model)
    optimizer.add_hook(
        chainer.optimizer.GradientClipping(grad_clip))  #勾配の上限を設定

    whole_len = train_data.shape[0]
    #jump         = whole_len / batchsize
    jump = int(whole_len / batchsize)
    epoch = 0
    start_at = time.time()
    cur_at = start_at
    state = make_initial_state(n_units, batchsize=batchsize)
    if args.gpu >= 0:
        accum_loss = Variable(xp.zeros(()).astype(np.float32))
        for key, value in state.items():
            value.data = cuda.to_gpu(value.data)
    else:
        accum_loss = Variable(xp.zeros(()).astype(np.float32))

    print('going to train {} iterations'.format(jump * n_epochs / bprop_len))
    sum_perp = 0
    count = 0
    iteration = 0
    for i in range(jump * n_epochs):
        x_batch = xp.array([
            train_data[(jump * j + i) % whole_len] for j in xrange(batchsize)
        ])
        y_batch = xp.array([
            train_data[(jump * j + i + 1) % whole_len]
            for j in xrange(batchsize)
        ])

        if args.gpu >= 0:
            x_batch = cuda.to_gpu(x_batch)
            y_batch = cuda.to_gpu(y_batch)

        state, loss_i = model.forward_one_step(x_batch,
                                               y_batch,
                                               state,
                                               dropout_ratio=args.dropout)
        accum_loss += loss_i
        count += 1

        if (i + 1) % bprop_len == 0:  # Run truncated BPTT
            iteration += 1
            sum_perp += accum_loss.data
            now = time.time()
            #print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at))
            print('{}/{}, train_loss = {}, time = {:.2f}'.format(
                (i + 1) / bprop_len, jump * n_epochs / bprop_len,
                accum_loss.data / bprop_len, now - cur_at))
            cur_at = now

            model.cleargrads()
            #optimizer.zero_grads()
            accum_loss.backward()
            accum_loss.unchain_backward()  # truncate
            #accum_loss = Variable(xp.zeros(()).astype(np.float32))
            if args.gpu >= 0:
                accum_loss = Variable(xp.zeros(()).astype(np.float32))
                #accum_loss = Variable(cuda.zeros(()))
            else:
                accum_loss = Variable(np.zeros((), dtype=np.float32))
            #optimizer.clip_grads(grad_clip)
            optimizer.update()

        if (i + 1) % 1000 == 0:
            print('epoch: ', epoch)
            print('iteration: ', iteration)
            print('training perplexity: ', np.exp(float(sum_perp) / count))
            sum_perp = 0
            count = 0

        if args.enable_checkpoint:
            if (i + 1) % 10000 == 0:
                fn = ('%s/charrnn_epoch_%.2f.chainermodel' %
                      (args.checkpoint_dir, float(i) / jump))
                pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb'))
                pickle.dump(
                    copy.deepcopy(model).to_cpu(),
                    open('%s/latest.chainermodel' % (args.checkpoint_dir),
                         'wb'))

        if (i + 1) % jump == 0:
            epoch += 1

            if epoch >= args.learning_rate_decay_after:
                optimizer.lr *= args.learning_rate_decay
                print('decayed learning rate by a factor {} to {}'.format(
                    args.learning_rate_decay, optimizer.lr))

        sys.stdout.flush()
Esempio n. 10
0
else:
    model = CharRNN(len(vocab), RNN_RNN_SIZE)

if RNN_GPU >= 0:
    cuda.get_device(RNN_GPU).use()
    model.to_gpu()

optimizer = optimizers.RMSprop(lr=RNN_LEARNING_RATE, alpha=RNN_DECAY_RATE, eps=1e-8)
optimizer.setup(model)

whole_len    = train_data.shape[0]
jump         = whole_len / RNN_BATCHSIZE
epoch        = 0
start_at     = time.time()
cur_at       = start_at
state        = make_initial_state(RNN_RNN_SIZE, batchsize=RNN_BATCHSIZE)
if RNN_GPU >= 0:
    accum_loss   = Variable(cuda.zeros(()))
    for key, value in state.items():
        value.data = cuda.to_gpu(value.data)
else:
    accum_loss   = Variable(np.zeros((), dtype=np.float32))

print 'going to train {} iterations'.format(jump * RNN_EPOCHS)
for i in xrange(jump * RNN_EPOCHS):

    x_batch = np.array([train_data[(jump * j + i) % whole_len]
                        for j in xrange(RNN_BATCHSIZE)])
    y_batch = np.array([train_data[(jump * j + i + 1) % whole_len]
                        for j in xrange(RNN_BATCHSIZE)])
    if RNN_GPU >=0:
 def init_state():
     TextList.state =  make_initial_state(n_units, batchsize=1, train=False)
Esempio n. 12
0
def run():



    np.random.seed(RNN_SEED)

    # load vocabulary
    vocab = pickle.load(open(VOCAB_PATH, 'rb'))
    ivocab = {}
    for c, i in vocab.items():
        ivocab[i] = c

    # load model
    model = pickle.load(open(MODEL_FILE_PATH, 'rb'))
    n_units = model.embed.W.data.shape[1]

    if RNN_GPU >= 0:
        cuda.get_device(RNN_GPU).use()
        model.to_gpu()

    # initialize generator
    state = make_initial_state(n_units, batchsize=1, train=False)
    if RNN_GPU >= 0:
        for key, value in state.items():
            value.data = cuda.to_gpu(value.data)

    prev_char = np.array([0], dtype=np.int32)
    if RNN_GPU >= 0:
        prev_char = cuda.to_gpu(prev_char)

    if len(RNN_PRIMETEXT) > 0:
        for i in unicode(RNN_PRIMETEXT, 'utf-8'):
            sys.stdout.write(i)
            if RNN_TRAIN_MODE == "Word":
                sys.stdout.write(" ")

            prev_char = np.ones((1,), dtype=np.int32) * vocab[i]
            if RNN_GPU >= 0:
                prev_char = cuda.to_gpu(prev_char)

            state, prob = model.forward_one_step(prev_char, prev_char, state, train=False)

    for i in xrange(RNN_LENGTH):
        state, prob = model.forward_one_step(prev_char, prev_char, state, train=False)

        if RNN_SAMPLE > 0:
            probability = cuda.to_cpu(prob.data)[0].astype(np.float64)
            probability /= np.sum(probability)
            index = np.random.choice(range(len(probability)), p=probability)
        else:
            index = np.argmax(cuda.to_cpu(prob.data))
        sys.stdout.write(ivocab[index])

        if RNN_TRAIN_MODE == "Word":
            sys.stdout.write(" ")

        prev_char = np.array([index], dtype=np.int32)
        if RNN_GPU >= 0:
            prev_char = cuda.to_gpu(prev_char)

    print
inputs_index4 = []
ivocab1 = {}
ivocab2 = {}
ivocab3 = {}
ivocab4 = {}
model1 = pickle.load(open('./cv/latest_ensembl1_128.chainermodel', 'rb'))
model2 = pickle.load(open('./cv/latest_ensembl2_128.chainermodel', 'rb'))
model3 = pickle.load(open('./cv/latest_ensembl3_128.chainermodel', 'rb'))
model4 = pickle.load(open('./cv/latest_ensembl4_128.chainermodel', 'rb'))
n_units1 = model1.embed.W.data.shape[1]
n_units2 = model2.embed.W.data.shape[1]
n_units3 = model3.embed.W.data.shape[1]
n_units4 = model4.embed.W.data.shape[1]

# initialize generator
state1 = make_initial_state(n_units1, batchsize=1, train=False)
state2 = make_initial_state(n_units2, batchsize=1, train=False)
state3 = make_initial_state(n_units3, batchsize=1, train=False)
state4 = make_initial_state(n_units4, batchsize=1, train=False)

for vocab, inputs_index in [ (vocab1, inputs_index1), (vocab2, inputs_index2), (vocab3, inputs_index3), (vocab4, inputs_index4) ]:
  for word in inputs:
    if vocab.get(word) != None:
      inputs_index.append( vocab.get(word) ) 
    else:
      #print word,  "is not found."
      inputs_index.append( 'UNK' )
for vocab, ivocab in [ (vocab1, ivocab1), (vocab2, ivocab2), (vocab3, ivocab3), (vocab4, ivocab4)]:
  for e, (c, i) in enumerate(vocab.items()):
    ivocab[i] = c