예제 #1
0
	def ready(self, params = None):
		'''
			Sets up the model.
		'''
		#Creates the shapes of the inputs, target, and other variables.
		self.x = t.matrix()
		self.y = t.vector(name = 'y', dtype = 'int32')
		self.h0 = t.vector()
		self.lr = t.scalar()

		#The params to be used (input nodes, output nodes, etc...) are retrieved
		#from the params dictionary.  When the values are not found, the default
		#values are used.
		params = self.defaultparams(params)
		self.setparams(params)

		#The actual RNN.
		self.rnn = RNN(input = self.x, n_in = self.n_in, n_hid = self.n_hid,
						n_out = self.n_out, activation = self.activation)

		#Computes the probabilities of the next token and the next token.
		self.predict_probability = theano.function(inputs = [self.x,],
												outputs = self.rnn.probability_y)
		self.predict = theano.function(inputs = [self.x,],
										outputs = self.rnn.y_out)
예제 #2
0
파일: rnn_pos.py 프로젝트: ticcky/rnn-pos
    def train(self, data, hidden_layer_cnt = 40):
        n_input = len(self.vocab)
        n_output = len(self.tags)
        n_hidden = hidden_layer_cnt

        self.rnn = RNN(n_input, n_output, n_hidden)

        training_set = self.prepare_training_set(data)
        n_epochs = 50
        max_rate = 0.0001
        learning_coeff = 1.0
        history_cnt = 10
        learning_rate_history = [0.0 for i in range(history_cnt)]
        history_pointer = 0
        for epoch in range(n_epochs):
            print "Running epoch #%d" % epoch
            curr_rate = self.rnn.train(training_set, 0.5)  #200.0/(len(training_set))) # * learning_coeff))
            learning_rate_history[history_pointer] = curr_rate
            history_pointer = (history_pointer + 1) % history_cnt
            rate = sum(learning_rate_history)
            #max_rate = max([curr_rate, max_rate])
            #learning_coeff -= (max_rate - curr_rate) / (n_epochs)
            #print curr_rate, max_rate, learning_coeff
            #if rate < 0.1:
            #    break
            if epoch % 20 == 0:
                self.save_to_file('_tmp_save')
예제 #3
0
파일: cws.py 프로젝트: zbxzc35/cws
class CWS:
    def __init__(self, s):
	self.rnn = RNN(s['ne'], s['de'], s['win'], s['nh'], s['nc'], np.random.RandomState(s['seed']))
	self.s = s

    def fit(self, lex, label):
	s = self.s
	n_sentences = len(lex)
	n_train = int(n_sentences * (1. - s['valid_size']))
	s['clr'] = s['lr']
	best_f = 0
	be = 0
	for e in xrange(s['n_epochs']):
	    shuffle([lex, label], s['seed'])
	    train_lex, valid_lex = lex[:n_train], lex[n_train:]
	    train_label, valid_label = label[:n_train], label[n_train:]
	    tic = time.time()
	    for i in xrange(n_train):
		cwords = contextwin(train_lex[i], s['win'])
		words = map(lambda x: np.asarray(x).astype('int32'), minibatch(cwords, s['bs']))
		labels = train_label[i]
		for word_batch, label_last_word in zip(words, labels):
		    self.rnn.fit(word_batch, label_last_word, s['clr'])
		    self.rnn.normalize()
		    if s['verbose']:
			print '[learning] epoch %i >> %2.2f%%' % (e+1, (i+1)*100./n_train), 'completed in %s << \r' % time_format(time.time() - tic),
			sys.stdout.flush()

	    pred_y = self.predict(valid_lex)
	    p, r, f = evaluate(pred_y, valid_label)
	    print '[learning] epoch %i >> P: %2.2f%% R: %2.2f%% F: %2.2f%%' % (e+1, p*100., r*100., f*100.), '<< %s used' % time_format(time.time() - tic)
	    
	    if f > best_f:
		best_f = f
		be = e
		self.save()
    
	    if s['decay'] and e - be >= 5: s['clr'] *= 0.5	    
	    if s['clr'] < 1e-5: break

    def predict(self, lex):
	s = self.s
	y = [self.rnn.predict(np.asarray(contextwin(x, s['win'])).astype('int32'))[1:-1] for x in lex]
	return y

    def save(self):
	if not os.path.exists('params'): os.mkdir('params')
	self.rnn.save() 

    def load(self):
	self.rnn.load()
예제 #4
0
    def create_training_data(self):
        X_train = np.asarray(
            [[self.word_to_index[w] for w in sent[:-1]] for sent in
             self.tokenized_sentences])
        y_train = np.asarray(
            [[self.word_to_index[w] for w in sent[1:]] for sent in
             self.tokenized_sentences])

        model = RNN(self.vocabulary_size, self.hidden_dim, 4)
        t1 = time.time()
        model.sgd_step(X_train[10], y_train[10], self.learning_rate)
        t2 = time.time()
        print("SGD step time: %f milliseconds" % ((t2 - t1) * 1000.))

        if self.model_file is not None:
            self.load_model_parameters(self.model_file, model)
        if self.enable_training:
            self.train_with_sgd(model, X_train, y_train, self.learning_rate,
                                self.nepoch)
        return model
예제 #5
0
def create_model():
    if args.model_type == 'lstm':
        return LSTM(input_size=dset.input_dimension,
                                          hidden_size=args.hx,
                                          output_size=dset.output_dimension,
                                          layers=args.layers,
                                          drop=args.drop,
                                          rec_drop=args.rec_drop)
    elif args.model_type == 'rnn':
        return RNN(input_size=dset.input_dimension,
                                          hidden_size=args.hx,
                                          output_size=dset.output_dimension,
                                          layers=args.layers,
                                          drop=args.drop,
                                          rec_drop=args.rec_drop)
    elif args.model_type == 'irnn':
        return IRNN(input_size=dset.input_dimension,
                                          hidden_size=args.hx,
                                          output_size=dset.output_dimension,
                                          layers=args.layers,
                                          drop=args.drop,
                                          rec_drop=args.rec_drop)
    elif args.model_type == 'gru':
        return GRU(input_size=dset.input_dimension,
                                          hidden_size=args.hx,
                                          output_size=dset.output_dimension,
                                          layers=args.layers,
                                          drop=args.drop,
                                          rec_drop=args.rec_drop)
    elif args.model_type == 'rnn+':
        if args.layers == 1:
            args.layers = 2
        return IntersectionRNN(input_size=dset.input_dimension,
                                      hidden_size=args.hx,
                                      output_size=dset.output_dimension,
                                      layers=args.layers,
                                      drop=args.drop,
                                      rec_drop=args.rec_drop)
    elif args.model_type == 'peephole':
        return Peephole(input_size=dset.input_dimension,
                                          hidden_size=args.hx,
                                          output_size=dset.output_dimension,
                                          layers=args.layers,
                                          drop=args.drop,
                                          rec_drop=args.rec_drop)
    elif args.model_type == 'ugrnn':
        return UGRNN(input_size=dset.input_dimension,
                                          hidden_size=args.hx,
                                          output_size=dset.output_dimension,
                                          layers=args.layers,
                                          drop=args.drop,
                                          rec_drop=args.rec_drop)
    else:
        raise Exception
예제 #6
0
def main():
    global encoder
    count = 0
    for filename in FILE_NAMES[:-1]:
        count += file_len('clean/' + filename)
    take_size = math.floor(count / 5)

    tmp = labeled_data()
    data = tmp[-1]
    all_labeled_data = shuffle_labled_data(tmp)
    train_test_data = shuffle_labled_data(tmp[:-1])

    tokenizer = tfds.features.text.Tokenizer()
    vocabulary_set = set()
    for text_tensor, _ in all_labeled_data:
        some_tokens = tokenizer.tokenize(text_tensor.numpy())
        vocabulary_set.update(some_tokens)
    vocab_size = len(vocabulary_set)
    encoder = tfds.features.text.TokenTextEncoder(vocabulary_set)

    train_test_data = train_test_data.map(encode_map_fn)
    data = data.map(encode_map_fn)
    data = data.padded_batch(BATCH_SIZE, padded_shapes=([None], []))
    for ex in data.take(5):
        print(ex)

    train_data = train_test_data.skip(take_size).shuffle(BUFFER_SIZE)
    train_data = train_data.padded_batch(BATCH_SIZE,
                                         padded_shapes=([None], []))

    test_data = train_test_data.take(take_size)
    test_data = test_data.padded_batch(BATCH_SIZE, padded_shapes=([None], []))

    vocab_size += 1
    model = RNN(vocab_size, train_data, test_data)

    result = model.predict_classes(data, batch_size=None, verbose=0)

    for i, v in enumerate(result):
        if v == 0:
            print(i + 1)
예제 #7
0
def creat_trunk_ply_by_nn(AE_model_dir1, AE_model_dir2, RNN_model_dir,
                          rawPc_ply_dir_list, device, threshold, save_dir):

    rnn_model = RNN().to(device)
    rnn_model.load_state_dict(torch.load(RNN_model_dir))

    AE_model1 = autoencoder.AE_3d_conv().to(device)
    AE_model1.load_state_dict(torch.load(AE_model_dir1))

    rnn_in_feature = get_rnn_in_featur(AE_model1, device, rawPc_ply_dir_list)
    rnn_out = get_rnn_out(rnn_model, rnn_in_feature)

    #     AE_model2 = autoencoder.AE_3d_conv().to(device)
    #     AE_model2.load_state_dict(torch.load(AE_model_dir2))

    AE_decoder_out = get_AE_decoder_out(AE_model1, rnn_out)
    #point_counts = len(AE_decoder_out[AE_decoder_out>0.2])
    AE_decoder_out = torch.squeeze(AE_decoder_out)

    print('try to save predicted point cloud(.ply) by NN ')
    tensor_to_ply(AE_decoder_out, threshold, save_dir)
예제 #8
0
 def __init__(self,
              vocab_size,
              embedding_dim,
              hidden_dim,
              n_classes=1,
              bidirectional=False,
              padding_idx=0):
     super(SentimentRNN, self).__init__()
     self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx)
     self.bridge = nn.Linear(embedding_dim, embedding_dim)
     self.rnn = RNN(embedding_dim, hidden_dim)
     self.out = nn.Linear(hidden_dim, n_classes)
예제 #9
0
def test(netFile,
         dataSet,
         model='RNN',
         trees=None,
         confusion_matrix_file=None,
         acti=None):
    if trees == None:
        trees = tr.loadTrees(dataSet)
    assert netFile is not None, "Must give model to test"
    print "Testing netFile %s" % netFile
    with open(netFile, 'r') as fid:
        opts = pickle.load(fid)
        _ = pickle.load(fid)

        if (model == 'RNTN'):
            nn = RNTN(wvecDim=opts.wvecDim,
                      outputDim=opts.outputDim,
                      numWords=opts.numWords,
                      mbSize=opts.minibatch,
                      rho=opts.rho,
                      acti=acti)
        elif (model == 'RNN'):
            nn = RNN(opts.wvecDim, opts.outputDim, opts.numWords,
                     opts.minibatch)
        else:
            raise '%s is not a valid neural network so far only RNTN, RNN' % opts.model

        nn.initParams()
        nn.fromFile(fid)

    print "Testing %s..." % model

    cost, correct, guess, total = nn.costAndGrad(trees, test=True)
    correct_sum = 0
    for i in xrange(0, len(correct)):
        correct_sum += (guess[i] == correct[i])

    correctSent = 0
    for tree in trees:
        sentLabel = tree.root.label
        sentPrediction = tree.root.prediction
        if sentLabel == sentPrediction:
            correctSent += 1

    # Generate confusion matrix
    #if confusion_matrix_file is not None:
    #    cm = confusion_matrix(correct, guess)
    #    makeconf(cm, confusion_matrix_file)

    print "%s: Cost %f, Acc %f, Sentence-Level: Acc %f" % (
        dataSet, cost, correct_sum / float(total),
        correctSent / float(len(trees)))
    return (correct_sum / float(total), correctSent / float(len(trees)))
예제 #10
0
def train_rnn(step, data_path):
    rnn = RNN(27, 50, 27, lr=0.01)
    gen = yield_sample(data_path)
    for i in range(step):
        word, x, y = next(gen)
        #print(y)
        rnn.inference(x)
        los = rnn.loss(y)
        rnn.bptt(y)
        print("step:%d, loss:%f" % (i, los))
예제 #11
0
def test(netFile, dataSet, model='RNN', trees=None):
    if trees == None:
        trees = tr.loadTrees(dataSet)
    assert netFile is not None, "Must give model to test"
    print "Testing netFile %s" % netFile
    with open(netFile, 'r') as fid:
        opts = pickle.load(fid)
        _ = pickle.load(fid)

        if (model == 'RNTN'):
            nn = RNTN(opts.wvecDim, opts.outputDim, opts.numWords,
                      opts.minibatch)
        elif (model == 'RNN'):
            nn = RNN(opts.wvecDim, opts.outputDim, opts.numWords,
                     opts.minibatch)
        elif (model == 'RNN2'):
            nn = RNN2(opts.wvecDim, opts.middleDim, opts.outputDim,
                      opts.numWords, opts.minibatch)
        elif (opts.model == 'RNN3'):
            nn = RNN3(opts.wvecDim, opts.middleDim, opts.outputDim,
                      opts.numWords, opts.minibatch)
        elif (model == 'DCNN'):
            nn = DCNN(opts.wvecDim,
                      opts.ktop,
                      opts.m1,
                      opts.m2,
                      opts.n1,
                      opts.n2,
                      0,
                      opts.outputDim,
                      opts.numWords,
                      2,
                      opts.minibatch,
                      rho=1e-4)
            trees = cnn.tree2matrix(trees)
        else:
            raise '%s is not a valid neural network so far only RNTN, RNN, RNN2, RNN3, and DCNN' % opts.model

        nn.initParams()
        nn.fromFile(fid)

    print "Testing %s..." % model

    cost, correct, guess, total = nn.costAndGrad(trees, test=True)
    correct_sum = 0
    for i in xrange(0, len(correct)):
        correct_sum += (guess[i] == correct[i])

    # TODO
    # Plot the confusion matrix?

    print "Cost %f, Acc %f" % (cost, correct_sum / float(total))
    return correct_sum / float(total)
예제 #12
0
파일: sdfa.py 프로젝트: Kraas/short_project
def get_audio_feature_extractor(model_path, gpu=-1):
    if gpu < 0:
        device = torch.device("cpu")
        model_dict = torch.load(model_path,
                                map_location=lambda storage, loc: storage)
    else:
        device = torch.device("cuda:" + str(gpu))
        model_dict = torch.load(
            model_path, map_location=lambda storage, loc: storage.cuda(gpu))

    audio_rate = model_dict["audio_rate"]
    audio_feat_len = model_dict['audio_feat_len']
    rnn_gen_dim = model_dict['rnn_gen_dim']
    aud_enc_dim = model_dict['aud_enc_dim']
    video_rate = model_dict["video_rate"]

    encoder = RNN(audio_feat_len,
                  aud_enc_dim,
                  rnn_gen_dim,
                  audio_rate,
                  init_kernel=0.005,
                  init_stride=0.001)
    encoder.to(device)
    encoder.load_state_dict(model_dict['encoder'])

    overlap = audio_feat_len - 1.0 / video_rate
    return encoder, {
        "rate": audio_rate,
        "feature length": audio_feat_len,
        "overlap": overlap
    }
예제 #13
0
    def load_existed_models(self):
        mdir = self.model_dir
        models = {}

        for fname in os.listdir(mdir):
            if fname[-3:] == '.h5':
                print('[Load]', fname)
                rnn = RNN(mdir + fname)
                if rnn.model is not None:
                    models[fname] = rnn

        self.models = models
        print('[Load] done.')
예제 #14
0
 def test_forward(self):
     config = {
         'dim_hidden' : 10
       , 'len' : 2
     }
     l = RNN(config)
     l.accept([26])
     x = [np.zeros([26])] * 2
     x[0][0] = 1.0
     x[1][1] = 1.0
      
     l.forward(x)
     
     pass
예제 #15
0
 def test_fit(self):
     config = {
         'dim_hidden' : 10
       , 'len' : 2
       , 'step_size' : 0.01
     }
     l = RNN(config)
     l.accept([26])
     x = [np.zeros([26])] * 2
     x[0][0] = 1.0
     x[1][1] = 1.0
     
     y = np.array([1, 2])
     l.fit(x, y, 100, config)
예제 #16
0
파일: rnn_pos.py 프로젝트: ticcky/rnn-pos
    def load_from_file(self, filename):
        f = open(filename, 'rb')

        context = json.loads(f.read())
        self.vocab = context['vocab']
        self.tags = context['tags']
        self.__init__(self.vocab, self.tags)
        
        self.rnn = RNN(context['n_inputs'], context['n_outputs'], context['n_hidden'])
        self.rnn.U = array([array(x) for x in context['U']])
        self.rnn.V = array([array(x) for x in context['V']])
        self.rnn.W = array([array(x) for x in context['W']])

        f.close()
예제 #17
0
def eval(opts, data=None):
    # generate and evaluate a test set for analysis
    print('eval start')
    save_path = opts.save_path
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    print('graph start')
    tf.reset_default_graph()
    if data:
        X, Y, N = data
    else:
        X, Y, N = inputs.create_inputs(opts, train=False)

    opts.n_inputs = X.shape[0]
    opts.batch_size = opts.n_inputs
    X_pl, Y_pl, N_pl = create_placeholders(X.shape[-1], Y.shape[-1],
                                           opts.rnn_size, X.shape[1])
    train_iter, next_element = create_tf_dataset(X_pl,
                                                 Y_pl,
                                                 N_pl,
                                                 opts.batch_size,
                                                 shuffle=False)

    print('rnn start')
    model = RNN(next_element, opts, training=False)

    save_name = opts.activity_name
    print('[*] Testing')
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        sess.run(train_iter.initializer, feed_dict={X_pl: X, Y_pl: Y, N_pl: N})
        # sess.run(train_iter.initializer, feed_dict={X_pl: X, Y_pl: Y})
        print('loading saved')
        model.load()
        save_activity(model, X, Y, N, save_path, save_name)
예제 #18
0
def test(netFile, dataSet, model='RNN', trees=None):
    if trees == None:
        if dataSet == "train":
            trees = tr.load_trees(TRAIN_DATA_FILE)
        elif dataSet == "dev":
            trees = tr.load_trees(DEV_DATA_FILE)
    
    assert netFile is not None, "Must give model to test"
    print "Testing netFile %s" % netFile

    #f = open(netFile, 'rb')
    #opts = pickle.load(f)
    #_ = pickle.load(f)
    opts = joblib.load(netFile + "_opts")
    _ = joblib.load(netFile + "_cost")
    
    if (model=='RNTN'):
        nn = RNTN(opts.wvecDim,opts.outputDim,opts.numWords,opts.minibatch)
    elif(model=='RNN'):
        nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.minibatch)
    elif(model=='RNN2'):
        nn = RNN2(opts.wvecDim,opts.middleDim,opts.outputDim,opts.numWords,opts.minibatch)
    else:
        raise '%s is not a valid neural network so far only RNTN, RNN, and RNN2' % opts.model
    
    nn.initParams()
    #nn.stack = pickle.load(f)
    #nn.stack = np.load(f)
    nn.stack = joblib.load(netFile + "_stack")
    #f.close()

    print "Testing %s..." % model

    cost, correct, guess, total = nn.costAndGrad(trees, test=True)
    correct_sum = 0
    for i in xrange(0, len(correct)):        
        correct_sum += (guess[i] == correct[i])
    
    # confusion matrix
    conf_arr = np.zeros((opts.outputDim, opts.outputDim))
    for i in xrange(len(correct)):
        curr_correct = correct[i]
        curr_guess = guess[i]
        conf_arr[curr_correct][curr_guess] += 1.0

    #makeconf(conf_arr)
    
    print "Cost %f, Acc %f" % (cost, correct_sum / float(total))
    return correct_sum / float(total)
def predicted_labels(sentence, hypothesis, classifier, network='best-GRU'):
    if network == 'best-GRU':
        # load model
        vocab = ['Europeans', 'Germans', 'Italians', 'Romans', 'all', 'children', 'fear', 'hate', 'like', 'love', 'not',
                 'some']
        rels = ['#', '<', '=', '>', '^', 'v', '|']

        word_dim = 25
        n_hidden = 128
        cpr_dim = 75

        model_path = '/Users/mathijs/Documents/Studie/MoL/thesis/mol_thesis/final_experiments/binary_fol_rnn/nobrackets/models/GRUbinary_2dets_4negs_train_0bracket_pairs1.pt'
        net = RNN('GRU', vocab, rels, word_dim, n_hidden, cpr_dim)
        net.load_state_dict(torch.load(model_path))

    s = [sentence.split()]

    _, hidden_vectors = net.rnn_forward(s, 1, hypothesis=hypothesis)
    test_hiddens = np.array(hidden_vectors[0])
    y_pred = classifier.predict(test_hiddens)

    labels = np.array([y_pred])

    return(labels)
예제 #20
0
def runCircTest():
    r0 = 10.0
    r1 = 10.0
    nTrainingPoint = 1000000
    batchSize = 30
    deltaTheta = 0.25
    theta = 0.0
    data = []

    rnn = RNN(inputDim=2, stateDim=20, rate=0.1)

    for i in range(0, nTrainingPoint):
        theta = (theta + deltaTheta) % (2 * np.pi)
        x = r0 * np.cos(theta)
        y = r1 * np.sin(theta)
        data.append(np.array([x, y]))

    for i in range(0, nTrainingPoint, batchSize):
        batch = data[i:i + batchSize]
        rnn.update(batch)

    result = rnn.predict(start=np.array([r0, 0.0]), nStep=10)
    for x, y in result:
        print(x, y)
예제 #21
0
def main():
    input_size, output_size = 3, 3
    rnn = RNN()
    rnn.add_layer(LSTM(input_size, output_size))

    X_train = [[[1, 0, 0]], [[0, 1, 0]], [[0, 0, 1]]]
    Y_train = [[[0, 1, 0]], [[0, 0, 1]], [[1, 0, 0]]]

    epochs = 1000
    rnn.train(X_train, Y_train, epochs=epochs)
    for p, y in zip(rnn.predict(X_train), Y_train):
        _p = np.zeros_like(p).astype(int)
        _p[:, np.argmax(p)] = 1
        print('%30s %10s %10s' % (p.reshape(1, -1), _p, np.array(y)))
예제 #22
0
class Scorer(object):
    def __init__(self, char_list, model_path, rnn_type, ninp, nhid, nlayers,
                 device):
        char_list = list(char_list) + ['sil_start', 'sil_end']
        self.inv_vocab_map = dict([(i, c) for (i, c) in enumerate(char_list)])
        self.vocab_map = dict([(c, i) for (i, c) in enumerate(char_list)])
        self.criterion = nn.CrossEntropyLoss()
        self.device = device
        self.rnn = RNN(rnn_type, len(char_list), ninp, nhid,
                       nlayers).to(self.device)
        self.rnn.load_state_dict(torch.load(model_path))
        self.rnn.eval()
        self.history = defaultdict(tuple)

    def get_score(self, string):
        if len(string) < 2:
            return 0, self.rnn.init_hidden(1)
        string_idx = map(lambda x: self.vocab_map[x], string)
        input = string_idx[:-1]
        grt = string_idx[1:]
        input, grt = torch.LongTensor(input).to(
            self.device), torch.LongTensor(grt).to(self.device)
        input = input.view(1, input.size()[0])
        init_hidden = self.rnn.init_hidden(1)
        pred, hidden = self.rnn(input, init_hidden)
        pred = pred.view(-1, pred.size(-1))
        loss = self.criterion(pred, grt)
        return -(len(string_idx) - 1) * loss.item(), hidden

    def get_score_fast(self, strings):
        strings = [''.join(x) for x in strings]
        history_to_update = defaultdict(tuple)
        scores = []
        for string in strings:
            if len(string) <= 2:
                score, hidden_state = self.get_score(string)
                scores.append(score)
                history_to_update[string] = (score, hidden_state)
            elif string in self.history:
                history_to_update[string] = self.history[string]
                scores.append(self.history[string][0])
            elif string[:-1] in self.history:
                score, hidden = self.history[string[:-1]]
                input, grt = torch.LongTensor([
                    self.vocab_map[string[-2]]
                ]).view(1, 1).to(self.device), torch.LongTensor(
                    [self.vocab_map[string[-1]]]).to(self.device)
                pred, hidden = self.rnn(input, hidden)
                loss = self.criterion(pred.view(-1, pred.size(-1)), grt).item()
                history_to_update[string] = (score - loss, hidden)
                scores.append(score - loss)
            else:
                raise ValueError("%s not stored" % (string[:-1]))
        self.history = history_to_update
        return scores
예제 #23
0
def TrainHebian(identifier, num_epochs=2_000):
	hebian_model = RNN(input_size, hidden_size, output_size)
	#train model using Hebian learning
	trainer = Hebian(hebian_model, task, alpha_trace = 0.5)
	trainer.TrainHebbian(num_trials=num_epochs)
	F = hebian_model.GetF()
	roots, pca = FindFixedPoints(F, [1,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1,\
				-0.1,-0.2,-0.3,-0.4,-0.5,-0.6,-0.7,-0.8,-0.9,-1])
	hebian_model.pca = pca
	hebian_model.save('hebian_model'+str(identifier))
	return hebian_model
예제 #24
0
def TrainFORCE(identifier, num_epochs=2_000):
	force_model = RNN(input_size, hidden_size, output_size)
	#train model using FORCE
	trainer = Force(force_model, task, alpha=1000)
	trainer.trainForce(num_trials=num_epochs)
	F = force_model.GetF()
	roots, pca = FindFixedPoints(F, [1,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1,\
				-0.1,-0.2,-0.3,-0.4,-0.5,-0.6,-0.7,-0.8,-0.9,-1])
	force_model.pca = pca
	force_model.save('force_model'+str(identifier))
	return force_model
예제 #25
0
 def __init__(self, rng, input, h_prev, y_prev, dim, n_feature_maps,
         window_sizes, n_hidden, n_out):
     #self.cnn = CNN(rng=rng, input=input, dim=dim,
         #n_feature_maps=n_feature_maps, window_sizes=window_sizes)
     #self.rnn = RNN(rng=rng, input=self.cnn.output, h_prev=h_prev,
         #y_prev=y_prev, n_in=n_feature_maps*len(window_sizes),
         #n_hidden=n_hidden, n_out=n_out)
     self.avg = Average(input=input, dim=dim)
     self.rnn = RNN(rng=rng, input=self.avg.output, h_prev=h_prev,
         y_prev=y_prev, n_in=dim, n_hidden=n_hidden, n_out=n_out)
     self.h = self.rnn.h
     self.y = self.rnn.y
     self.output = self.rnn.output
     self.loss = self.rnn.loss
     self.error = self.rnn.error
     #self.params = self.cnn.params + self.rnn.params
     self.params = self.rnn.params
예제 #26
0
def test():
    data = LoadTestData()
    untrained_models = []
    config = {'ngram': 3, 'est': 'add-delta', 'delta': 0.3}
    untrained_models.append((HMM(config), 'HMM. config: {}'.format(config)))
    config = {
        'ftrs': ('IS_FIRST', 'IS_LAST', 'VAL', 'PRV_VAL', 'NXT_VAL',
                 'FRST_VAL', 'LST_VAL', 'SCND_VAL', 'SCND_LST_VAL')
    }
    untrained_models.append((MEMM(config), 'MEMM. config: {}'.format(config)))
    config = {
        'ftrs': ('IS_FIRST', 'IS_LAST', 'IDX', 'VAL', 'PRV_VAL', 'NXT_VAL',
                 'FRST_VAL', 'LST_VAL', 'SCND_VAL', 'SCND_LST_VAL')
    }
    untrained_models.append(
        (CRF_WORD(config), 'CRF. config: {}'.format(config)))
    trained_models = [(model.prep_data().shuffle(0xfab1e).split(0).train(),
                       name) for model, name in untrained_models]
    config = {
        'n_layers': 3,
        'hidden_dim': 32,
        'embedding': 'mds',
        'win_len': 4,
        "device": "cpu"
    }
    rnn = RNN(config)
    trained_models.append((rnn.prep_model().load('rnn_model.bin'),
                           'RNN. config: {}'.format(config)))
    for model, name in trained_models:
        trained_model = model
        conf_mat, dist = TestModel(trained_model, data)
        print('\n')
        print(name)
        print('=' * 80)
        print('Vowel metrics:')
        print('-' * 50)
        PrintConfMat(conf_mat)
        print('-' * 50)
        print('Edit distance:')
        print('-' * 50)
        for stage in range(1, 4):
            print('Stage = {}:'.format(stage_names[stage]))
            print('   Average = {}\n   Median = {}\n   Min = {}\n   Max = {}'.
                  format(dist[stage][0], dist[stage][1], dist[stage][2],
                         dist[stage][3]))
예제 #27
0
    def test_backward(self):
        config = {
            'dim_hidden' : 10
          , 'len' : 2
        }
        l = RNN(config)
        l.accept([26])
        x = [np.zeros([26])] * 2
        x[0][0] = 1.0
        x[1][1] = 1.0
         
        y = l.forward(x)


        dy = [None] * 2
        loss, dy[0] = utils.cross_entropy(utils.softmax(y[0]), np.array([0]))
        loss, dy[1] = utils.cross_entropy(utils.softmax(y[1]), np.array([1]))
        
        dW, dU, dV = l.backward(dy)
예제 #28
0
def TrainGenetic(identifier, num_generations=15):
	genetic_model = RNN(input_size, hidden_size, output_size)
	#train model using genetic algorithm
	num_pop=50
	sigma=0.01
	#num_generations=15
	trainer = Genetic(genetic_model, task, num_generations)
	trainer.trainGenetic(num_pop, sigma, batch_size=50, num_parents=5, mutation=0.1)
	F = genetic_model.GetF()
	roots, pca = FindFixedPoints(F, [[1],[0.9],[0.8],[0.7],[0.6],[0.5],[0.4],[0.3],[0.2],[0.1],\
				[-0.1],[-0.2],[-0.3],[-0.4],[-0.5],[-0.6],[-0.7],[-0.8],[-0.9],[-1]])
	genetic_model.pca = pca
	genetic_model.save('genetic_model'+str(identifier))
	return genetic_model
예제 #29
0
def load_model(input_size):

    model = RNN(input_size, hidden_size, num_layers)

    # load on CPU only
    checkpoint = torch.load('checkpoint.pt', map_location='cpu')
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    print(model)
    print('model training loss', checkpoint['loss'])
    print('model training epoch', checkpoint['epoch'])

    return model
예제 #30
0
def run_model(which='all'):
    if which in ['ann', 'all', 'main', 'standard']:
        model = ANN(emb_size, vocab_size, hid_dim, hid_num, class_num,
                    sent_len).cuda()
        ann_loss = train(model, x, target, ann=True)
        plt.plot(ann_loss, label='ann')
    if which in ['wann', 'all', 'standard']:
        model = WANN(emb_size, vocab_size, hid_dim, hid_num, class_num,
                     sent_len).cuda()
        wann_loss = train(model, x, target, ann=True)
        plt.plot(wann_loss, label='wann')
    if which in ['rnn', 'all', 'main']:
        model = RNN(emb_size, vocab_size, hid_dim, hid_num, class_num).cuda()
        rnn_loss = train(model, x, target)
        plt.plot(rnn_loss, label='rnn')
    if which in ['exrnn', 'all']:
        model = EXRNN(emb_size, vocab_size, hid_dim, hid_num, class_num, 2000,
                      2000).cuda()
        exrnn_loss = train(model, x, target)
        plt.plot(exrnn_loss, label='exrnn')
    if which in ['exmem', 'all']:
        model = EXRNN(emb_size,
                      vocab_size,
                      hid_dim,
                      hid_num,
                      class_num,
                      2000,
                      forget_dim=None).cuda()
        exmem_loss = train(model, x, target)
        plt.plot(exmem_loss, label='exmem')
    if which in ['lstm', 'all', 'main']:
        model = LSTM(emb_size, vocab_size, hid_dim, hid_num, class_num).cuda()
        lstm_loss = train(model, x, target)
        plt.plot(lstm_loss, label='lstm')
    if which in ['gru', 'all', 'main']:
        model = GRU(emb_size, vocab_size, hid_dim, hid_num, class_num).cuda()
        gru_loss = train(model, x, target)
        plt.plot(gru_loss, label='gru')
    # plt.ylim([0, 2])
    plt.legend()
    plt.grid(True)
    plt.show()
예제 #31
0
def main(save=True):
    """ Train a model \n
        
        ave {bool} - whether to save the trained model (default: True) \n
        
        Returns: wrapper RNN class for a Keras model (e.g. keras.models.Sequential) """
    startTime = time()
    trainingSet, validationSet, scaler = setup()
    trainGen = DataGenerator(trainingSet,
                             scaler,
                             windowSize=WINDOW_SIZE,
                             lookback=LOOKBACK,
                             sampleRate=SAMPLERATE,
                             prediction=PREDICTION).generator()
    validGen = DataGenerator(validationSet,
                             scaler,
                             windowSize=WINDOW_SIZE,
                             lookback=LOOKBACK,
                             sampleRate=SAMPLERATE,
                             prediction=PREDICTION).generator()
    rnn = RNN(HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, PREDICTION)
    optimizer = rnn.pickOptimizer(OPTIMIZER, lr=LEARNING_RATE)
    rnn.model.compile(loss=LOSS_FUNC, optimizer=optimizer)
    rnn.model.fit_generator(trainGen,
                            steps_per_epoch=STEPS_PER_EPOCH,
                            epochs=EPOCHS,
                            validation_data=validGen,
                            validation_steps=VALIDATION_STEP_PER_EPOCH,
                            verbose=2,
                            shuffle=False)
    endTime = time()
    print(
        f"\nTRAINING DONE. Total time elapsed: {strftime('%H:%M:%S', gmtime(endTime - startTime))}"
    )
    if save:
        weightsFile = constructFilename(BASE_PATH, HIDDEN_NODES, LOOKBACK,
                                        WINDOW_SIZE, SAMPLERATE, PREDICTION,
                                        WEIGHT_EXT)
        architectureFile = constructFilename(BASE_PATH, HIDDEN_NODES, LOOKBACK,
                                             WINDOW_SIZE, SAMPLERATE,
                                             PREDICTION, ARCHITECT_EXT)
        rnn.saveWeights(weightsFile)
        rnn.saveArchitecture(architectureFile)
    return rnn
 def __init__(self, input_dimension=300, output_dimension=1000, hidden_dimension=512, \
         num_layers=3, context_dimension=None):
     super(Decoder, self).__init__()
     self.max_sequence_length = 20
     self.input_dimension = input_dimension
     self.output_dimension = output_dimension
     self.context_dimension = context_dimension
     self.hidden_dimension = hidden_dimension
     self.num_layers = num_layers
     self.step_count = 0
     self.example_count = 0
     self.fc = nn.Linear(self.hidden_dimension, self.output_dimension)
     self.generating_activation = nn.Softmax(dim=1)
     if self.context_dimension is None:
         self.rnn = RNN(self.input_dimension, self.hidden_dimension,
                        self.num_layers)
     else:
         self.rnn = ContextEnhancedRNN(self.input_dimension, self.hidden_dimension, \
                 self.context_dimension, self.num_layers)
     self.initialize_modules()
예제 #33
0
    def __init__(self,
                 vocab_size,
                 hidden_size=256,
                 lr=2e-3,
                 rnn='gru',
                 sampling='sample'):
        super(CHAR_RNN, self).__init__()
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.sampling = sampling

        if rnn == 'rnn':
            self.rnn = RNN(self.vocab_size, self.hidden_size)
        elif rnn == 'gru':
            self.rnn = GRU(self.vocab_size, self.hidden_size)
        else:
            raise NotImplementedError()

        self.optimizer = optim.Adam(self.parameters(), lr=lr)
        self.criterion = nn.CrossEntropyLoss()
예제 #34
0
    def __init__(
        self,
        encoder_output_dim,
        decoder_type,
        vocab_size,
        embedding_dim,
        num_layers,
    ):
        super().__init__()

        # CNN Model
        self.cnn = CNN(encoder_output_dim)

        # Decoder (RNN or Transformer)
        d_model = embedding_dim + encoder_output_dim
        if decoder_type == "rnn":
            self.decoder = RNN(vocab_size, embedding_dim, d_model, num_layers)
        else:
            self.decoder = Transformer(vocab_size, embedding_dim, d_model,
                                       num_layers)
    def __init__(
        self,
        training_data_dir,
        logdir,
        autoencoder_config,
        rnn_config,
        z_length=32,
        z_output_fn=normalize,
    ):

        self.feed = FeedDict(training_data_dir, logdir)
        self.autoencoder = AutoEncoder(z_length=z_length,
                                       z_output_fn=z_output_fn,
                                       **autoencoder_config)
        self.rnn = RNN(z_length=z_length,
                       z_output_fn=z_output_fn,
                       **rnn_config)

        self.sess = tf.Session()
        self.sess.run(tf.initialize_all_variables())
예제 #36
0
파일: runNNet.py 프로젝트: successar/RAE
def test(netFile,dataSet,trees=None):
    if trees==None:
        trees = tr.printtree(dataSet)
    assert netFile is not None, "Must give model to test"
    print "Testing netFile %s"%netFile
    with open(netFile,'r') as fid:
        opts = pickle.load(fid)
        _ = pickle.load(fid)
        
        nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.alpha,opts.minibatch)        
        nn.initParams()
        nn.fromFile(fid)

    cost, Mis = nn.costAndGrad(trees,test=True)
    
    print "Cost = %f, Acc = %f"%(cost, 1.0 - Mis)
    return (1.0 - Mis)
예제 #37
0
''' Generating examples with prediction with softmax every character at time
	softmax(output). Maximize total log probability of training sequence
	which implies that the RNN learns a probability distribution over sequences.
	We can sample from this conditional distribution to get the next character in
	a generated string  and  provide  it  as  the  next  input  to  the  RNN
'''


'''e = 1
for i in range(10):
	X = np.random.rand(100,2)
	y = np.dot(X[:,0], X[:,1])

	net = RNN(2, 30, 1)
	c = net.fit(X, y)
	e = 0.1*np.sqrt(c)+0.9*e
	print(e)'''

nout = 2
net = RNN(2, 30, nout)
np.random.seed(123)
X = np.random.rand(10, 10, 2)
y = np.random.rand(10,2)
'''for i in range(nout):
	y[:,i+1:,i] = X[:,:-i-1,i]'''
tresh=0.5
'''y[0:,][X[1:-1, :,1] > X[:-2,:,0] + tresh] = 1
y[1][X[1:-1, :,1] > X[:-2,:,0] + tresh] = 2'''
net.fit(X, y)
예제 #38
0
    def __init__(self, name, imsize, patchsize, nhid,
                 numpy_rng, eps, hids_scale,
                 feature_network=None, input_feature_layer_name=None,
                 metric_feature_layer_name=None,
                 nchannels=1, weight_decay=0.):
        # CALL PARENT CONSTRUCTOR TO SETUP CONVENIENCE FUNCTIONS
        # (SAVE/LOAD, ...)
        super(RATM, self).__init__(name=name)
        self.imsize = imsize
        assert len(patchsize) == 2
        self.patchsize = patchsize
        self.nhid = nhid
        self.numpy_rng = numpy_rng
        self.eps = eps
        self.hids_scale = hids_scale
        self.nchannels = nchannels
        self.weight_decay = weight_decay
        assert hasattr(feature_network, 'forward')
        assert hasattr(feature_network, 'load')
        self.feature_network = feature_network
        self.input_feature_layer_name = input_feature_layer_name
        assert (self.input_feature_layer_name in
                self.feature_network.layers.keys())
        self.metric_feature_layer_name = metric_feature_layer_name
        assert (self.metric_feature_layer_name in
                self.feature_network.layers.keys())
        # TODO: remove this constraint, if everything else works
        assert (
            self.feature_network.layers.keys().index(
                self.metric_feature_layer_name) >
            self.feature_network.layers.keys().index(
                self.input_feature_layer_name))

        ftensor5 = T.TensorType(theano.config.floatX, (False,) * 5)
        self.inputs = ftensor5(name='inputs')
        self.inputs.tag.test_value = numpy_rng.randn(
            16, 5, nchannels, imsize[0], imsize[1]).astype(np.float32)
        self.targets = T.ftensor3(name='targets')
        self.targets.tag.test_value = numpy_rng.randn(
            16, 5, 4).astype(np.float32)
        self.masks = T.fmatrix(name='masks')
        self.masks.tag.test_value = np.ones((16, 5), dtype=np.float32)

        self.batchsize = self.inputs.shape[0]
        self.nframes = self.inputs.shape[1]

        # shuffle axis, such that time axis is first
        self.inputs_frames = self.inputs.transpose(1, 0, 2, 3, 4)
        self.targets_frames = self.targets.transpose(1, 0, 2)
        self.masks_frames = self.masks.T

        self.attention_mechanism = SelectiveAttentionMechanism(
            imsize=imsize, patchsize=patchsize, eps=self.eps,
            nchannels=nchannels)

        self.targets_widthheight = (self.targets_frames[:, :, 1::2] -
                                    self.targets_frames[:, :, ::2])
        self.targets_XYs = (self.targets_frames[:, :, 1::2] +
                            self.targets_frames[:, :, ::2]) / 2.

        self.targets_centers_widthheight = T.concatenate((
            self.targets_XYs, self.targets_widthheight), axis=2)

        self.nin = self.feature_network.layers[
            self.input_feature_layer_name].outputs_shape[1]
        self.rnn = RNN(nin=self.nin, nout=10, nhid=self.nhid,
                       numpy_rng=self.numpy_rng, scale=hids_scale)

        self.wread = theano.shared(
            numpy_rng.uniform(
                low=-.001, high=.001, size=(self.nhid, 7)
            ).astype(np.float32), name='wread')

        self.targets_params = T.concatenate((
            # center x,y
            self.targets_centers_widthheight[
                :, :, :2] / np.array(((imsize[::-1],),), dtype=np.float32),
            # std x
            (self.targets_centers_widthheight[:, :, 2] /
             patchsize[1]).dimshuffle(0, 1, 'x'),
            # stride x
            np.float32(1.5) * (self.targets_centers_widthheight[:, :, 2] /
                               imsize[1]).dimshuffle(0, 1, 'x'),
            # gamma (unused)
            T.ones((self.nframes, self.batchsize, 1)),
            # std y
            (self.targets_centers_widthheight[:, :, 3] /
             patchsize[0]).dimshuffle(0, 1, 'x'),
            # stride y
            np.float32(1.5) * (self.targets_centers_widthheight[:, :, 3] /
                               imsize[0]).dimshuffle(0, 1, 'x'),
        ), axis=2)

        self.targets_params_reshape = self.targets_params.reshape((
            self.nframes * self.batchsize, 7
        ))

        (self.targets_patches,
         _, _, _, _) = self.attention_mechanism.build_read_graph(
            images_var=self.inputs_frames.reshape((
                self.nframes * self.batchsize, self.nchannels,
                self.imsize[0], self.imsize[1])),
            attention_acts=self.targets_params_reshape)

        self.targets_features = self.feature_network.forward_from_to(
            self.targets_patches,
            to_layer_name=self.metric_feature_layer_name
        )
        self.targets_features = self.targets_features.reshape((
            self.nframes, self.batchsize,
            T.prod(self.targets_features.shape[1:])))

        self.bread_init = T.concatenate((
            # center x,y
            self.targets_centers_widthheight[
                0, :, :2] / np.array((imsize[::-1],), dtype=np.float32),
            # std x
            (self.targets_centers_widthheight[0, :, 2] /
             patchsize[1]).dimshuffle(0, 'x'),
            # stride x
            np.float32(1.5) * (self.targets_centers_widthheight[0, :, 2] /
                               imsize[1]).dimshuffle(
                0, 'x'),
            # gamma (unused)
            T.ones((self.batchsize, 1)),
            # std y
            (self.targets_centers_widthheight[0, :, 3] /
             patchsize[0]).dimshuffle(0, 'x'),
            # stride y
            np.float32(1.5) * (self.targets_centers_widthheight[0, :, 3] /
                               imsize[0]).dimshuffle(
                0, 'x'),
        ), axis=1)

        self.params = [self.wread]  # , self.bread_init_factors]
        self.params.extend(self.rnn.params)
        # we're not using the rnn output layer, so remove params
        self.params.remove(self.rnn.wout)
        self.params.remove(self.rnn.bout)

        def step(x_t, h_tm1, bread, wread):
            (patches_t, window_params_t, muX, muY,
             gX, gY) = self.get_input_patches(
                x_t, h_tm1, wread, bread)
            features_t = self.feature_network.forward_from_to(
                patches_t,
                from_layer_name=self.feature_network.layers.keys()[0],
                to_layer_name=self.input_feature_layer_name)
            h_t, o_t = self.rnn.step(features_t, h_tm1)
            h_t_norm = T.sqrt(T.sum(h_t**2, axis=-1))
            return (h_t, window_params_t, patches_t, features_t,
                    window_params_t, muX, muY, gX, gY, h_t_norm)

        (self.hiddens, breads, self.patches, self.features,
         self.window_params, muX, muY, gX, gY, h_t_norms), self.updates = theano.scan(
             fn=step,
             sequences=self.inputs_frames,
             outputs_info=[
                 T.zeros((self.batchsize, self.nhid),
                         dtype=theano.config.floatX),
                 self.bread_init,
                 None, None, None, None,
                 None, None, None, None],
            non_sequences=[self.wread])

        # vector containing corner  mus of window, in order x1, x2, y1, y2
        self._attention_mus = T.concatenate((
            muX[:, :, 0].dimshuffle(0, 1, 'x'),
            muX[:, :, -1].dimshuffle(0, 1, 'x'),
            muY[:, :, 0].dimshuffle(0, 1, 'x'),
            muY[:, :, -1].dimshuffle(0, 1, 'x')), axis=2)
        self._attention_gs = T.concatenate((
            gX.dimshuffle(0, 1, 'x'),
            gY.dimshuffle(0, 1, 'x')), axis=2)

        # get index of layer after feature layer
        after_feat_layer_idx = self.feature_network.layers.keys().index(
            self.input_feature_layer_name) + 1

        self.attention_features = self.feature_network.forward_from_to(
            self.features.reshape((T.prod(self.features.shape[:2]),
                                   self.features.shape[2])),
            from_layer_name=self.feature_network.layers.keys()[
                after_feat_layer_idx],
            to_layer_name=self.metric_feature_layer_name
        ).reshape((
            self.nframes, self.batchsize, self.targets_features.shape[2]
        ))

        self._stepcosts = T.mean((
            self.targets_features - self.attention_features)**2, axis=2)

        self._dists = self._stepcosts

        # normalize mask to sum up to 1 for each sequence, to give equal
        # contribution to long and short sequences
        self._stepcosts_masked = (
            self._stepcosts * self.masks_frames) / T.sum(
            self.masks_frames, axis=0, keepdims=True)

        self._cost = (
            T.mean(self._stepcosts_masked) +
            self.weight_decay * (
                T.mean(self.rnn.win**2) + T.mean(self.wread**2)
            )
        )

        # grads graph will be built when first accessed
        self.__grads = None

        target_centers_widthheight = T.ftensor3('target_centers_widthheight')
        target_centers_widthheight.tag.test_value = numpy_rng.rand(
            16, 5, 4).astype(np.float32)

        print "compiling get_all_patches_and_windows..."
        self.get_all_patches_and_windows = theano.function(
            [self.inputs, target_centers_widthheight],
            [self.patches, self.window_params],
            givens={
                self.targets_centers_widthheight:
                    target_centers_widthheight.dimshuffle(1, 0, 2)})
        print "done (with compiling get_all_patches_and_windows)"

        print "compiling get_all_patches_and_windows_and_dists..."
        self.get_all_patches_and_windows_and_probs = theano.function(
            [self.inputs, target_centers_widthheight],
            [self.patches, self.window_params, self._dists],
            givens={
                self.targets_centers_widthheight:
                    target_centers_widthheight.dimshuffle(1, 0, 2)})
        print "done (with compiling get_all_patches_and_windows_and_dists)"

        self.get_bbs = theano.function(
            [self.inputs, target_centers_widthheight],
            self._attention_mus,
            givens={
                self.targets_centers_widthheight:
                    target_centers_widthheight.dimshuffle(1, 0, 2)})
예제 #39
0
class RNNHfOptim(BaseEstimator):
    def __init__(self, n_in=5, n_hidden=50, n_out=5, 
                 L1_reg=0.00, L2_reg=0.00,
                 activation='tanh', output_type='real',
                 use_symbolic_softmax=False, model="SGRNN", weight_handler=None):
        self.n_in = int(n_in)
        self.n_hidden = int(n_hidden)
        self.n_out = int(n_out)
        self.L1_reg = float(L1_reg)
        self.L2_reg = float(L2_reg)
        self.activation = activation
        self.output_type = output_type
        self.use_symbolic_softmax = use_symbolic_softmax
        self.weight_handler = weight_handler
        self.model = model
        
        self.ready()
        self.tune_optimizer()

    def tune_optimizer(
            self, initial_lambda=0.1, mu=0.03, global_backtracking=False,
            preconditioner=False, max_cg_iterations=250,
            num_updates=5, validation=None, validation_frequency=1,
            patience=np.inf, save_progress=None, cg_number_batches=100, 
            gd_number_batches=100, plot_cost_file=None):
        #TODO write all parameters with descriptions

        self.initial_lambda = initial_lambda
        self.mu = mu
        self.global_backtracking = global_backtracking
        self.preconditioner = preconditioner
        self.max_cg_iterations = max_cg_iterations
        self.n_updates = num_updates
        self.validation = validation
        self.validation_frequency = validation_frequency
        self.patience = patience
        self.save_progress = save_progress
        self.cg_number_batches = cg_number_batches
        self.gd_number_batches = gd_number_batches
        self.plot_cost_file = plot_cost_file


    def ready(self):
        # input (where first dimension is time)
        self.x = T.matrix()
        # target (where first dimension is time)
        if self.output_type == 'real':
            self.y = T.matrix(name='y', dtype=theano.config.floatX)
        elif self.output_type == 'binary':
            self.y = T.matrix(name='y', dtype='int32')
        elif self.output_type == 'softmax':  # only vector labels supported
            self.y = T.vector(name='y', dtype='int32')
        else:
            raise NotImplementedError
        # initial hidden state of the RNN
        self.h0 = T.vector()
        # learning rate
        self.lr = T.scalar()

        if self.activation == 'tanh':
            activation = T.tanh
        elif self.activation == 'sigmoid':
            activation = T.nnet.sigmoid
        elif self.activation == 'relu':
            activation = lambda x: x * (x > 0)
        elif self.activation == 'cappedrelu':
            activation = lambda x: T.minimum(x * (x > 0), 6)
        else:
            raise NotImplementedError

        if self.model == "SGRNN":
            if self.weight_handler is None:
                raise NotImplementedError("you need to provide a weighthandler")
            else:
                self.rnn = SGRNN(
                    input=self.x, weight_handler=self.weight_handler,
                    activation=activation, output_type=self.output_type,
                    use_symbolic_softmax=self.use_symbolic_softmax)

        else:
            self.rnn = RNN(
                input=self.x, n_in=self.n_in,
                n_hidden=self.n_hidden, n_out=self.n_out,
                activation=activation, output_type=self.output_type,
                use_symbolic_softmax=self.use_symbolic_softmax)

        if self.output_type == 'real':
            self.predict = theano.function(inputs=[self.x, ],
                                           outputs=self.rnn.y_pred,
                                           mode=mode)
        elif self.output_type == 'binary':
            self.predict_proba = theano.function(
                inputs=[self.x, ], outputs=self.rnn.p_y_given_x, mode=mode)
            self.predict = theano.function(
                inputs=[self.x, ],
                outputs=T.round(self.rnn.p_y_given_x),
                mode=mode)
        elif self.output_type == 'softmax':
            self.predict_proba = theano.function(
                inputs=[self.x, ],
                outputs=self.rnn.p_y_given_x, mode=mode)
            self.predict = theano.function(
                inputs=[self.x, ],
                outputs=self.rnn.y_out, mode=mode)
        else:
            raise NotImplementedError

    def shared_dataset(self, data_xy):
        """ Load the dataset into shared variables """

        data_x, data_y = data_xy
        shared_x = theano.shared(np.asarray(data_x,
                                            dtype=theano.config.floatX))

        shared_y = theano.shared(np.asarray(data_y,
                                            dtype=theano.config.floatX))

        if self.output_type in ('binary', 'softmax'):
            return shared_x, T.cast(shared_y, 'int32')
        else:
            return shared_x, shared_y

    def __getstate__(self):
        """ Return state sequence."""
        params = self._get_params()  # parameters set in constructor
        weights = [p.get_value() for p in self.rnn.params]
        state = (params, weights)
        return state

    def _set_weights(self, weights):
        """ Set fittable parameters from weights sequence.

        Parameters must be in the order defined by self.params:
            W, W_in, W_out, h0, bh, by
        """
        i = iter(weights)

        for param in self.rnn.params:
            param.set_value(i.next())

    def __setstate__(self, state):
        """ Set parameters from state sequence.

        Parameters must be in the order defined by self.params:
            W, W_in, W_out, h0, bh, by
        """
        params, weights = state
        self.set_params(**params)
        self.ready()
        self._set_weights(weights)

    def save(self, fpath='.', fname=None):
        """ Save a pickled representation of Model state. """
        fpathstart, fpathext = os.path.splitext(fpath)
        if fpathext == '.pkl':
            # User supplied an absolute path to a pickle file
            fpath, fname = os.path.split(fpath)

        elif fname is None:
            # Generate filename based on date
            date_obj = datetime.datetime.now()
            date_str = date_obj.strftime('%Y-%m-%d-%H:%M:%S')
            class_name = self.__class__.__name__
            fname = '%s.%s.pkl' % (class_name, date_str)

        fabspath = os.path.join(fpath, fname)

        file = open(fabspath, 'wb')
        state = self.__getstate__()
        pickle.dump(state, file, protocol=pickle.HIGHEST_PROTOCOL)
        file.close()

    def load(self, path):
        """ Load model parameters from path. """
        file = open(path, 'rb')
        state = pickle.load(file)
        self.__setstate__(state)
        file.close()

    def fit(self, X_train, Y_train):
        self.prepare(X_train, Y_train)
        ###############
        # TRAIN MODEL #
        ###############
        print "starting training ..."

        for i in range(self.n_updates):
            self.train_step(i)

    def prepare(self, X_train, Y_train):
        """ Fit model

        Pass in X_test, Y_test to compute test error and report during
        training.

        X_train : ndarray (n_seq x n_steps x n_in)
        Y_train : ndarray (n_seq x n_steps x n_out)

        """
        # SequenceDataset wants a list of sequences
        # this allows them to be different lengths, but here they're not
        seq = [i for i in X_train]
        targets = [i for i in Y_train]

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        print "building model..."

        #TODO : batch_size in parameters.
        self.gradient_dataset = SequenceDataset(
            [seq, targets], batch_size=len(seq) / self.gd_number_batches, number_batches=self.gd_number_batches)
        self.cg_dataset = SequenceDataset(
            [seq, targets], batch_size=len(seq) / self.cg_number_batches , number_batches=self.cg_number_batches)

        cost = self.rnn.loss(self.y) \
            + self.L1_reg * self.rnn.L1 \
            + self.L2_reg * self.rnn.L2_sqr

        self.opt = hf_optimizer(
            p=self.rnn.params, inputs=[self.x, self.y],
            s=self.rnn.y_pred,
            costs=[cost], 
            h=self.rnn.h,
            ha=self.rnn.ha)

#TODO add h and ha for structural damping

    def train_step(self, n):
        self.opt.train(
            self.gradient_dataset, self.cg_dataset,
            num_updates=n, save_progress=self.save_progress,
            plot_cost_file=self.plot_cost_file)

    def continue_training(self, n_updates):
        self.n_updates = n_updates
        self.train()
예제 #40
0
    def ready(self):
        # input (where first dimension is time)
        self.x = T.matrix()
        # target (where first dimension is time)
        if self.output_type == 'real':
            self.y = T.matrix(name='y', dtype=theano.config.floatX)
        elif self.output_type == 'binary':
            self.y = T.matrix(name='y', dtype='int32')
        elif self.output_type == 'softmax':  # only vector labels supported
            self.y = T.vector(name='y', dtype='int32')
        else:
            raise NotImplementedError
        # initial hidden state of the RNN
        self.h0 = T.vector()
        # learning rate
        self.lr = T.scalar()

        if self.activation == 'tanh':
            activation = T.tanh
        elif self.activation == 'sigmoid':
            activation = T.nnet.sigmoid
        elif self.activation == 'relu':
            activation = lambda x: x * (x > 0)
        elif self.activation == 'cappedrelu':
            activation = lambda x: T.minimum(x * (x > 0), 6)
        else:
            raise NotImplementedError

        if self.model == "SGRNN":
            if self.weight_handler is None:
                raise NotImplementedError("you need to provide a weighthandler")
            else:
                self.rnn = SGRNN(
                    input=self.x, weight_handler=self.weight_handler,
                    activation=activation, output_type=self.output_type,
                    use_symbolic_softmax=self.use_symbolic_softmax)

        else:
            self.rnn = RNN(
                input=self.x, n_in=self.n_in,
                n_hidden=self.n_hidden, n_out=self.n_out,
                activation=activation, output_type=self.output_type,
                use_symbolic_softmax=self.use_symbolic_softmax)

        if self.output_type == 'real':
            self.predict = theano.function(inputs=[self.x, ],
                                           outputs=self.rnn.y_pred,
                                           mode=mode)
        elif self.output_type == 'binary':
            self.predict_proba = theano.function(
                inputs=[self.x, ], outputs=self.rnn.p_y_given_x, mode=mode)
            self.predict = theano.function(
                inputs=[self.x, ],
                outputs=T.round(self.rnn.p_y_given_x),
                mode=mode)
        elif self.output_type == 'softmax':
            self.predict_proba = theano.function(
                inputs=[self.x, ],
                outputs=self.rnn.p_y_given_x, mode=mode)
            self.predict = theano.function(
                inputs=[self.x, ],
                outputs=self.rnn.y_out, mode=mode)
        else:
            raise NotImplementedError
예제 #41
0
class Model(object):
	def __init__(self, logger, params = None):
		self.logger = logger

		self.ready(params)

	def ready(self, params = None):
		'''
			Sets up the model.
		'''
		#Creates the shapes of the inputs, target, and other variables.
		self.x = t.matrix()
		self.y = t.vector(name = 'y', dtype = 'int32')
		self.h0 = t.vector()
		self.lr = t.scalar()

		#The params to be used (input nodes, output nodes, etc...) are retrieved
		#from the params dictionary.  When the values are not found, the default
		#values are used.
		params = self.defaultparams(params)
		self.setparams(params)

		#The actual RNN.
		self.rnn = RNN(input = self.x, n_in = self.n_in, n_hid = self.n_hid,
						n_out = self.n_out, activation = self.activation)

		#Computes the probabilities of the next token and the next token.
		self.predict_probability = theano.function(inputs = [self.x,],
												outputs = self.rnn.probability_y)
		self.predict = theano.function(inputs = [self.x,],
										outputs = self.rnn.y_out)

	def fit(self, x_train, y_train, x_test = None, y_test = None, validation_freq = 200):
		'''
			Used to train the RNN.

			x_train - the inputs used for training the RNN.
			y_train - the targets used for training the RNN.

			x_test - the inputs used for testing how well the training is going.  Requires
					 that y_test also be provided, otherwise it is ignored.
			y_test - the targets used for testing how well the training is going.  Requires
					 that x_test also be provided, otherwise it is ignored.

			validation_freq - how often the training should be interrupted and tested for
							  accuracy.
		'''
		if x_test is not None and y_test is not None:
			self.runtests = True
			test_x, test_y = self.share_dataset(x_test, y_test)
		else:
			self.runtests = False

		train_x, train_y = self.share_dataset(x_train, y_train)
		n_train = train_x.get_value(borrow = True).shape[0]

		'''
			Creates the model.
		'''
		self.logger.info('Building the model...')

		idx = t.lscalar('index')
		l_r = t.scalar(name = 'l_r', dtype = theano.config.floatX)
		mom = t.scalar(name = 'mom', dtype = theano.config.floatX)

		cost = self.rnn.loss(self.y) + self.L1_reg * self.rnn.L1 \
				+ self.L2_reg * self.rnn.L2_sqr

		train_error = theano.function(inputs = [idx,],
									outputs = self.rnn.loss(self.y),
									givens = {
										self.x: train_x[idx],
										self.y: train_y[idx]
									})

		if self.runtests:
			test_error = theano.function(inputs = [idx,],
									outputs = self.rnn.loss(self.y),
									givens = {
										self.x: test_x[idx],
										self.y: test_y[idx]
									})

		# Compute the cost gradients with BPTT
		gparams = []
		for param in self.rnn.params:
			gparam = t.grad(cost, param)
			gparams.append(gparam)

		updates = {}
		for param, gparam in zip(self.rnn.params, gparams):
			update = self.rnn.updates[param]
			u = mom * update - l_r * gparam

			updates[update] = u
			updates[param] = param + u

		# The function to train the model.
		train_model = theano.function(inputs = [idx, l_r, mom],
									outputs = cost,
									updates = updates,
									givens = {
										self.x: train_x[idx],
										self.y: train_y[idx]
									})

		'''
			Train the model
		'''
		self.logger.info('Training the model...')
		epoch = 0

		while epoch < self.n_epochs:
			epoch += 1

			for i in xrange(n_train):
				t0 = time.time()

				eff_momentum = self.final_momentum \
									if epoch > self.momentum_switchover \
									else self.initial_momentum
				example_cost = train_model(i, self.learning_rate, eff_momentum)

				itr = (epoch - 1) * n_train + i + 1

				if itr % validation_freq == 0:
					train_losses = [train_error(j) for j in xrange(n_train)]
					train_losses = np.mean(train_losses)

					if self.runtests:
						test_losses = [test_error(j) for j in xrange(n_test)]
						test_losses = np.mean(test_losses)

						self.logger.info('epoch {}, seq {} / {}, training losses {}, test losses {}, learning rate {}, elasped time {}.'.format(
											epoch, i + 1, n_train, train_losses,
											test_losses, self.learning_rate, time.time() - t0))
					else:
						self.logger.info('epoch {}, seq {} / {}, training losses {}, learning rate {}, elasped time {}.'.format(
											epoch, i + 1, n_train, train_losses,
											self.learning_rate, time.time() - t0))


	def share_dataset(self, data_x, data_y):
		'''
			Load the datasets into shared variables.
		'''
		shared_x = theano.shared(np.asarray(data_x, dtype = theano.config.floatX))
		shared_y = theano.shared(np.asarray(data_y, dtype = theano.config.floatX))

		return shared_x, t.cast(shared_y, 'int32')

	def __getstate__(self):
		'''
			Returns the current state of the model and RNN.
		'''
		params = self.getparams()
		weights = self.rnn.getweights()

		return (params, weights)

	def __setstate__(self, state):
		'''
			Sets the parameters for the model and RNN.
		'''
		params, weights = state

		self.setparams(params)
		self.ready()
		self.rnn.setweights(weights)

	def load(self, path):
		'''
			Unpickles a pickled model.
		'''
		fs = open(path, 'rb')

		self.logger.info('Model state loading from file {}.'.format(path))

		state = pickle.load(fs)
		self.__setstate__(state)

		fs.close()

		self.logger.info('Model state loaded.')

	def save(self, path = None):
		'''
			Pickles the model.
		'''
		if path is None:
			path = str(uuid.uuid4())

		fs = open(path, 'wb')

		state = self.__getstate__()
		pickle.dump(state, fs, protocol = pickle.HIGHEST_PROTOCOL)

		fs.close()

		self.logger.info('Model state saved to file {}.'.format(path))

	def setparams(self, params):
		'''
			Sets the parameters of the model and RNN.
		'''
		self.n_in = params.get('n_in')
		self.n_hid = params.get('n_hid')
		self.n_out = params.get('n_out')
		self.n_epochs = params.get('n_epochs')
		self.learning_rate = params.get('learning_rate')
		self.activation = params.get('activation')
		self.L1_reg = params.get('L1_reg')
		self.L2_reg = params.get('L2_reg')
		self.initial_momentum = params.get('initial_momentum')
		self.final_momentum = params.get('final_momentum')
		self.momentum_switchover = params.get('momentum_switchover')

	def getparams(self):
		'''
			Gets the parameters of the model.
		'''
		d = {
			'n_in': self.n_in,
			'n_hid': self.n_hid,
			'n_out': self.n_out,
			'n_epochs': self.n_epochs,
			'learning_rate': self.learning_rate,
			'activation': self.activation,
			'L1_reg': self.L1_reg,
			'L2_reg': self.L2_reg,
			'initial_momentum': self.initial_momentum,
			'final_momentum': self.final_momentum,
			'momentum_switchover': self.momentum_switchover
		}

		return d

	def defaultparams(self, params = None):
		'''
			Returns the default parameters for the model or
			ensures that all the necessary parameters are
			present.
		'''
		d = {
			'n_in': 5,
			'n_hid': 50,
			'n_out': 5,
			'n_epochs': 100,
			'learning_rate': 0.01,
			'activation': t.nnet.sigmoid,
			'L1_reg': 0.0,
			'L2_reg': 0.0,
			'initial_momentum': 0.5,
			'final_momentum': 0.9,
			'momentum_switchover': 5
		}

		if params is None:
			return d

		for key in d.keys():
			params[key] = params.get(key) or d.get(key)

		return params
예제 #42
0
파일: main.py 프로젝트: Yevgnen/RNN
    ap = AutoPoetry(file, delimiters, vocabulary_size, start_token, end_token, unknown_token)
    (X, T) = ap.get_training_data()
    with open(data_file, 'wb') as f:
        pickle.dump((X, T, ap), f)
else:
    with open(data_file, 'rb') as f:
        (X, T, ap) = pickle.load(f)

vocabulary_size = ap.vocabulary_size

# RNN training
n_features = vocabulary_size
n_hiddens = 100
epoch = 100
learning_rate = 1e-1
lr_factor = 0.9

rnn = RNN(n_features, n_hiddens, bptt_truncate=10)
rnn.train(X, T, epoch=epoch, learning_rate=learning_rate, lr_factor=lr_factor)

# Generate sentences
num_sentences = 100
senten_min_length = 3

for i in range(num_sentences):
    sent = []
    # We want long sentences, not sentences with one or two words
    while len(sent) < senten_min_length:
        sent = ap.generate_sentence(rnn)
    print(''.join(sent))
예제 #43
0
def train_rnn(num_batches_per_bunch = 512, batch_size = 1, num_bunches_queue = 5, offset = 0, path_name = '/exports/work/inf_hcrc_cstr_udialogue/siva/data/'):
    

    voc_list = Vocabulary(path_name + 'train')
    voc_list.vocab_create()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size
     
    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size)
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size )
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size )
    
    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x = T.fvector('x')
    h0 = T.fvector('h0')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate') 

    #theano shared variables for train, valid and test
    train_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True)
    train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    valid_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True)
    valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    test_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True)
    test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    
    rng = numpy.random.RandomState()
   
    classifier = RNN(rng = rng, input = x, intial_hidden = h0, n_in = vocab_size, n_hidden = int(sys.argv[1]), n_out = vocab_size)
    
    cost = classifier.negative_log_likelihood(y)

    ht1_values = numpy.ones((int(sys.argv[1]), ), dtype = 'float32')
    
    ht1 = theano.shared(value = ht1_values, name = 'hidden_state')
    
    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate = float(sys.argv[2]), scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.)

    log_likelihood = classifier.sum(y)
    likelihood = classifier.likelihood(y)
    
    #test_model
    test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood],  \
                                 givens = {x: test_set_x1,
                                           y: test_set_y,
                                           h0: ht1})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [log_likelihood], \
                                     givens = {x: valid_set_x1,
                                               y: valid_set_y,
                                               h0: ht1})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters 
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))
        
    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, T.cast(param - learning_rate * gradient - 0.000001 * param, dtype = 'float32')))
    
    #hidden_output = classifier.inputlayer.output
    #training_model
    train_model = theano.function(inputs = [learning_rate], outputs = [cost, classifier.inputlayer.output], updates = updates, \
                                 givens = {x: train_set_x1,
                                           y: train_set_y,
                                           h0:ht1})

    print '.....training'
    best_valid_loss = numpy.inf    
    start_time = time.time()
    while(learnrate_schedular.get_rate() != 0):
    
        print 'learning_rate:', learnrate_schedular.get_rate()
        print 'epoch_number:', learnrate_schedular.epoch        
        frames_showed, progress = 0, 0
        start_epoch_time = time.time()
        dataprovider_train.reset()
 
        for feats_lab_tuple in dataprovider_train:
    
            features, labels = feats_lab_tuple 
            
            if labels is None or features is None:
                continue                             
            frames_showed += features.shape[0]

            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features1[temp[0]] = 1
                train_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                train_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
                out = train_model(numpy.asarray(learnrate_schedular.get_rate(), dtype = 'float32'))       
                ht1.set_value(numpy.asarray(out[1], dtype = 'float32'), borrow = True)
            progress += 1
            if progress%10000==0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
            train_set_x1.set_value(numpy.empty((1, ), dtype = 'float32'))
            train_set_y.set_value(numpy.empty((1), dtype = 'int32'))
        
        end_time_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
	
        #classifier_name = 'MLP' + str(learnrate_schedular.epoch)
        #save_mlp(classifier, path+exp_name1 , classifier_name)
    
        print 'Validating...'
        valid_losses = []
        log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time() # it is also stop of training time
        dataprovider_valid.reset()

        for feats_lab_tuple in dataprovider_valid:            
            features, labels = feats_lab_tuple            
            if labels is None or features is None:
                continue                             
            valid_frames_showed += features.shape[0]                
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features1[temp[0]] = 1
                valid_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                valid_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
                log_likelihood.append(validate_model())
            valid_set_x1.set_value(numpy.empty((1), 'float32'))
            valid_set_y.set_value(numpy.empty((1), 'int32'))

            progress += 1
            if progress%1000==0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        
        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)            
        entropy = (-numpy.sum(log_likelihood)/valid_frames_showed)
        print  entropy, numpy.sum(log_likelihood)

        if entropy < best_valid_loss:
           learning_rate = learnrate_schedular.get_next_rate(entropy)
	   best_valid_loss = entropy
        else:
           learnrate_schedular.rate = 0.0
    end_time = time.time()
    print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.)

    print 'Testing...'
    log_likelihood = []
    likelihoods = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time() # it is also stop of training time
    dataprovider_test.reset()
    
    for feats_lab_tuple in dataprovider_test:
        
        features, labels = feats_lab_tuple 
            
        if labels is None or features is None:
            continue                             

        test_frames_showed += features.shape[0]                
        for temp, i in zip(features, xrange(len(labels))):
            temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
            temp_features1[temp[0]] = 1
            test_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
            test_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
            out = test_model()
            log_likelihood.append(out[0])
            likelihoods.append(out[1])
        progress += 1
        if progress%1000==0:
           end_time_test_progress = time.time()
           print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)            
    #save_posteriors(log_likelihood, likelihoods, weight_path+file_name2)
    print numpy.sum(log_likelihood)
예제 #44
0
파일: train.py 프로젝트: MojoJolo/aRNNie
hidden_size = 300 # Size of hidden layer of neurons (H)
seq_length = 50 # Number of steps to unroll the RNN
learning_rate = 2e-3

with open('data/input.txt') as f:    
    data = f.read().replace('\n', ' ').encode('ascii', 'ignore')

args = {
    'hidden_size': hidden_size,
    'seq_length': seq_length,
    'learning_rate': learning_rate,
    'data': data
}

# Initialized the RNN and run the first epoch
rnn = RNN(args)
inputs, hidden, loss = rnn.step()

i = 0

while True:
    inputs, hidden, loss = rnn.step(hidden)  

    if i % 100 == 0:
        print "Iteration {}:".format(i)
        print "Loss: {}".format(loss)
        print ''.join(rnn.generate(hidden, inputs[0], 140))
        print ""

    if i % 10000 == 0:
        rnn.save_model()
예제 #45
0
파일: rnn_pos.py 프로젝트: ticcky/rnn-pos
class RNNPOSTagger:
    def __init__(self, vocab, tags):
        self.vocab = vocab
        self.tags = tags
        self.tag_ndx = dict(map(swap_tuple, enumerate(self.tags)))
        self.vocab_ndx = dict(map(swap_tuple, enumerate(self.vocab)))

    def save_to_file(self, filename):
        f = open(filename, 'wb')

        context = {}
        context['vocab'] = self.vocab
        context['tags'] = self.tags
        context['n_inputs'] = self.rnn.n_inputs
        context['n_outputs'] = self.rnn.n_outputs
        context['n_hidden'] = self.rnn.n_hidden        
        context['U'] = matrix_to_list(self.rnn.U)
        context['V'] = matrix_to_list(self.rnn.V)
        context['W'] = matrix_to_list(self.rnn.W)
                
        f.write(json.dumps(context))
        f.close()

    def load_from_file(self, filename):
        f = open(filename, 'rb')

        context = json.loads(f.read())
        self.vocab = context['vocab']
        self.tags = context['tags']
        self.__init__(self.vocab, self.tags)
        
        self.rnn = RNN(context['n_inputs'], context['n_outputs'], context['n_hidden'])
        self.rnn.U = array([array(x) for x in context['U']])
        self.rnn.V = array([array(x) for x in context['V']])
        self.rnn.W = array([array(x) for x in context['W']])

        f.close()

    def train(self, data, hidden_layer_cnt = 40):
        n_input = len(self.vocab)
        n_output = len(self.tags)
        n_hidden = hidden_layer_cnt

        self.rnn = RNN(n_input, n_output, n_hidden)

        training_set = self.prepare_training_set(data)
        n_epochs = 50
        max_rate = 0.0001
        learning_coeff = 1.0
        history_cnt = 10
        learning_rate_history = [0.0 for i in range(history_cnt)]
        history_pointer = 0
        for epoch in range(n_epochs):
            print "Running epoch #%d" % epoch
            curr_rate = self.rnn.train(training_set, 0.5)  #200.0/(len(training_set))) # * learning_coeff))
            learning_rate_history[history_pointer] = curr_rate
            history_pointer = (history_pointer + 1) % history_cnt
            rate = sum(learning_rate_history)
            #max_rate = max([curr_rate, max_rate])
            #learning_coeff -= (max_rate - curr_rate) / (n_epochs)
            #print curr_rate, max_rate, learning_coeff
            #if rate < 0.1:
            #    break
            if epoch % 20 == 0:
                self.save_to_file('_tmp_save')

    def get_tag(self, word, tag, hidden_state):
        if word in SENTENCE_SEPARATORS:
            hidden_state = self.rnn.get_hidden_state_matrix()
            return word, hidden_state
        
        input_vector = mat(zeros((len(self.vocab), 1)))
        if self.vocab_ndx.has_key(word):
            input_vector[self.vocab_ndx[word],0] = 1.0
            #self.rnn.reset_hidden()
            res, hidden_state = self.rnn.feed(input_vector, hidden_state)
            res_ndx = res.argmax()
            #print res[res_ndx]
            return self.tags[res_ndx], hidden_state
        else:
            return "", hidden_state

    def prepare_training_set(self, data):
        vocab_size = len(self.vocab)
        tag_count = len(self.tags)
        res = []        
        for word, pos in data:
            #x = zeros(vocab_size)
            #y = zeros(tag_count)
            #x[self.vocab_ndx[word]] = 1
            #y[self.tag_ndx[pos]] = 1
            #res += [(x, y)]
            if word in SENTENCE_SEPARATORS:
                res += [(None, None)]
                continue
            res += [(self.vocab_ndx[word], self.tag_ndx[pos])]

        return res
예제 #46
0
    idx2word  = dict((k,v) for v,k in dic['words2idx'].iteritems())

    train_lex, train_ne, train_y = train_set
    valid_lex, valid_ne, valid_y = valid_set
    test_lex,  test_ne,  test_y  = test_set

    vocsize = len(dic['words2idx'])
    nclasses = len(dic['labels2idx'])
    nsentences = len(train_lex)

    # instanciate the model
    numpy.random.seed(s['seed'])
    random.seed(s['seed'])
    rnn = RNN( nh = s['nhidden'],
               nc = nclasses,
               ne = vocsize,
               de = s['emb_dimension'],
               cs = s['win'] )

    # train with early stopping on validation set
    best_f1 = -numpy.inf
    s['clr'] = s['lr']
    for e in xrange(s['nepochs']):
        # shuffle
        shuffle([train_lex, train_ne, train_y], s['seed'])
        s['ce'] = e
        tic = time.time()
        # consider the whole sentence as a mini-batch and perform one update per sentence
        for i in range(nsentences):
            cwords = contextwin(train_lex[i], s['win'])
            words  = map(lambda x: numpy.asarray(x).astype('int32'),\
예제 #47
0
	all_train_x = train_x.append([i for i in dev_x])
	all_train_y = train_y.append([j for j in dev_y])

	np.random.seed(s['seed'])
	random.seed(s['seed'])

	'''
        nh :: dimension of the hidden layer
        nc :: number of classes
        ne :: number of word embeddings in the vocabulary
        de :: dimension of the word embeddings
        cs :: word window context size 
        '''
	rnn = RNN(	nh = s['nhidden'],
				nc = len(languages()),
				ne = s['vocab_size'],
				de = s['emb_dimension'],
				cs = s['win'])

	best_f1 = -numpy.inf
	s['clr'] = s['lr']
	for e in range(s['nepochs']):
		# shuffle
		shuffle([train_lex, train_ne, train_y], s['seed'])
		s['ce'] = e
		tic = time.time()
		for i in xrange(nsentences):
			cwords = contextwin(train_lex[i], s['win'])
			words  = map(lambda x: numpy.asarray(x).astype('int32'),\
							minibatch(cwords, s['bs']))
			labels = train_y[i]
예제 #48
0
with open('data/input.txt') as f:    
    data = f.read().replace('\n', ' ').encode('ascii', 'ignore')

data = data.lower()
data = nltk.word_tokenize(data)

args = {
    'hidden_size': hidden_size,
    'seq_length': seq_length,
    'learning_rate': learning_rate,
    'data': data
}

# Initialized the RNN and run the first epoch
rnn = RNN(args)
inputs, hidden, loss = rnn.step()

i = 0

while True:
    inputs, hidden, loss = rnn.step(hidden)  

    if i % 100 == 0:
        print "Iteration {}:".format(i)
        print "Loss: {}".format(loss)
        print ' '.join(rnn.generate(hidden, inputs[0], 15))
        print ""

    # if i % 10000 == 0:
    #     rnn.save_model()
예제 #49
0
파일: main.py 프로젝트: ryu577/base
		treeTxt = treeTxt + ")"

#Print a sentence.
prnt(train[0].root)
nltktree = Tree.fromstring(treeTxt)
nltktree.pretty_print()

###############################
# Create a toy model for testing.
###############################
numW = len(treeM.loadWordMap())

wvecDim = 10
outputDim = 5

rnn = RNN(wvecDim, outputDim, numW, mbSize = 4)
rnn.initParams()

rnn.L, rnn.W, rnn.b, rnn.Ws, rnn.bs = rnn.stack

# Zero gradients
rnn.dW[:] = 0
rnn.db[:] = 0
rnn.dWs[:] = 0
rnn.dbs[:] = 0
rnn.dL = collections.defaultdict(rnn.defaultVec)

ost = 0.0
correct = []
guess = []
total = 0.0
예제 #50
0
파일: test.py 프로젝트: Yevgnen/RNN
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from rnn import RNN

x = [0, 4, 2, 5, 7]
t = [4, 2, 5, 7, 1]
n_hiddens = 4
n_features = 10
rnn = RNN(n_features, n_hiddens)
rnn.forward_propagation(x)
rnn.check_gradient(x, t)
rnn.train([x], [t], 100)
예제 #51
0
파일: runNNet.py 프로젝트: successar/RAE
def run(args=None):
    usage = "usage : %prog [options]"
    parser = optparse.OptionParser(usage=usage)

    parser.add_option("--test",action="store_true",dest="test",default=False)

    # Optimizer
    parser.add_option("--minibatch",dest="minibatch",type="int",default=30)
    parser.add_option("--optimizer",dest="optimizer",type="string",
        default="adagrad")
    parser.add_option("--epochs",dest="epochs",type="int",default=50)
    parser.add_option("--step",dest="step",type="float",default=1e-2)

    parser.add_option("--wvecDim",dest="wvecDim",type="int",default=30)
    parser.add_option("--outputDim",dest="outputDim",type="int",default=2)
    parser.add_option("--alpha",dest="alpha",type="int",default=0.2)
    
    parser.add_option("--outFile",dest="outFile",type="string",
        default="models/test.bin")
    parser.add_option("--inFile",dest="inFile",type="string",
        default="models/test.bin")
    parser.add_option("--data",dest="data",type="string",default="brae.pos")
    parser.add_option("--dev",dest="dev",type="string",default="brae.dev")
    parser.add_option("--wordMap",dest="map",type="string",default="brae.tot")

    (opts,args)=parser.parse_args(args)


    # make this false if you dont care about your accuracies per epoch, makes things faster!
    evaluate_accuracy_while_training = True

    # Testing
    if opts.test:
        test(opts.inFile,opts.data)
        return
    
    print "Loading data..."
    train_accuracies = []
    dev_accuracies = []
    
    trees = tr.printtree(opts.data)
    opts.numWords = len(tr.loadWordMap(opts.map))

    nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.alpha,opts.minibatch)    
    nn.initParams()
    sgd = optimizer.SGD(nn,alpha=opts.step,minibatch=opts.minibatch,
        optimizer=opts.optimizer)


    dev_trees = tr.printtree(opts.dev)
    for e in range(opts.epochs):
        start = time.time()
        print "Running epoch %d"%e
        sgd.run(trees)
        end = time.time()
        print "Time per epoch : %f"%(end-start)

        with open(opts.outFile,'w') as fid:
            pickle.dump(opts,fid)
            pickle.dump(sgd.costt,fid)
            nn.toFile(fid)
        if evaluate_accuracy_while_training:
            print "testing on training set real quick"
            train_accuracies.append(test(opts.outFile,opts.data,trees))
            print "testing on dev set real quick"
            dev_accuracies.append(test(opts.outFile,opts.dev,dev_trees))


    if evaluate_accuracy_while_training:
        pdb.set_trace()
        print train_accuracies
        print dev_accuracies
예제 #52
0
파일: cws.py 프로젝트: zbxzc35/cws
    def __init__(self, s):
	self.rnn = RNN(s['ne'], s['de'], s['win'], s['nh'], s['nc'], np.random.RandomState(s['seed']))
	self.s = s
예제 #53
0
# -*- coding: utf-8 -*-
import sys  

reload(sys)  
sys.setdefaultencoding('utf8')

from rnn import RNN

# To load the model
rnn = RNN.load_model("model.json")
inputs, hidden, loss = rnn.step()

print rnn.generate(hidden, inputs[0], 140)