def test(args): print '\nNEURAL POS TAGGER START\n' print '\tINITIAL EMBEDDING\t%s %s' % (args.word_list, args.emb_list) print '\tWORD\t\t\tEmb Dim: %d Hidden Dim: %d' % (args.w_emb_dim, args.w_hidden_dim) print '\tCHARACTER\t\tEmb Dim: %d Hidden Dim: %d' % (args.c_emb_dim, args.c_hidden_dim) print '\tOPTIMIZATION\t\tMethod: %s Learning Rate: %f\n' % (args.opt, args.lr) """ load vocab """ print 'Loading vocabularies...\n' vocab_word = io_utils.load_data('vocab_word') vocab_char = io_utils.load_data('vocab_char') vocab_tag = io_utils.load_data('vocab_tag') print '\tWord size: %d Char size: %d' % (vocab_word.size(), vocab_char.size()) """ load data """ print '\nLoading data set...\n' test_corpus, test_vocab_word, test_vocab_char, test_vocab_tag = io_utils.load_conll(args.dev_data) print '\tTest Sentences: %d' % len(test_corpus) """ converting into ids """ print '\nConverting into IDs...\n' test_x, test_c, test_b, test_y = preprocessor.convert_into_ids(test_corpus, vocab_word, vocab_char, vocab_tag) """ tagger set up """ tagger = io_utils.load_data(args.load) dev_f = theano.function( inputs=tagger.input[:-1], outputs=tagger.result, mode='FAST_RUN' ) """ Prediction """ print '\nPREDICTION START\n' print '\tBatch Index: ', start = time.time() total = 0.0 correct = 0 for index in xrange(len(test_x)): if index % 100 == 0 and index != 0: print index, sys.stdout.flush() if tagger.name == 'char': corrects = dev_f(test_x[index], test_c[index], test_b[index], test_y[index]) else: corrects = dev_f(test_x[index], test_y[index]) total += len(corrects) correct += np.sum(corrects) end = time.time() print '\n\tTime: %f seconds' % (end - start) print '\tAccuracy:%f Total:%d Correct:%d' % ((correct / total), total, correct)
def load_weights(model, pth): for layer in model.layers: wn = layer.weights for w in wn: vn = str(w).replace('/', '_') wv = load_data(pth, vn)[vn] w.set_value(wv)
def test(argv): print '\nSETTING UP A TEST SETTING\n' task = argv.task batch_size = argv.batch_size window = argv.window print '\tTASK: %s\tBATCH: %d\tWINDOW: %d' % (task, batch_size, window) ############## # LOAD FILES # ############## """ Load files """ # corpus: 1D: n_sents, 2D: n_words, 3D: (word, pas_info, pas_id) vocab_word = io_utils.load_data(argv.load_vocab) if argv.dev_data: dev_corpus, _ = io_utils.load(argv.dev_data, vocab_word, False) print '\nDEV CORPUS' corpus_statistics(dev_corpus) if argv.test_data: test_corpus, _ = io_utils.load(argv.test_data, vocab_word, False) print '\nTEST CORPUS' corpus_statistics(test_corpus) print '\nVocab: %d' % vocab_word.size() ############## # PREPROCESS # ############## """ Preprocessing """ # samples: 1D: n_sents, 2D: [word_ids, tag_ids, prd_indices, contexts] if argv.dev_data: dev_samples = sample_format(dev_corpus, vocab_word, window) n_dev_samples = len(dev_samples) if argv.test_data: test_samples = sample_format(test_corpus, vocab_word, window) n_te_samples = len(test_samples) # dataset = [x, y, l] # x=features: 1D: n_samples * n_words, 2D: window; elem=word id # y=labels: 1D: n_samples; elem=scalar # l=question length: 1D: n_samples * 2; elem=scalar # bb_x=batch indices for x: 1D: n_samples / batch_size + 1; elem=(bob, eob) # bb_y=batch indices for y: 1D: n_samples / batch_size + 1; elem=(bob, eob) if argv.dev_data: dev_dataset, dev_bb_x, dev_bb_y = theano_format(dev_samples, batch_size) if argv.test_data: te_dataset, te_bb_x, te_bb_y = theano_format(test_samples, batch_size) ###################### # BUILD ACTUAL MODEL # ###################### """ Set a model """ print '\n\nBuilding a model...' model = io_utils.load_data(argv.load_model) if argv.dev_data: dev_f = set_predict_f(model, dev_dataset) if argv.test_data: test_f = set_predict_f(model, te_dataset) ######## # TEST # ######## if argv.dev_data: print '\n\tDEV\n\t', predict(dev_f, dev_bb_x, dev_bb_y, n_dev_samples) if argv.test_data: print '\n\tTEST\n\t', predict(test_f, te_bb_x, te_bb_y, n_te_samples)
if accum_vote[i] == Validation_Label_List[current_ind]: running_corrects_vote += 1 current_ind += n_samples[i] epoch_acc_vote = running_corrects_vote / len(n_samples) print('Epoch %d, Vote accuracy: %f' % (epoch, epoch_acc_vote)) return epoch_acc_single, epoch_acc_vote, loss # Define Input data: (batch_size, seq_len, dims) print('Loading. Please wait... It may take 2-3 minutes') since = time.time() SampleRHS = LoadinRHS(input_file) Train_loader, _ = load_data(SampleRHS['Train_RHS_Sample'], SampleRHS['Train_RHS_Label_Sample'], BATCH_SIZE, True) Validation_loader, Validation_Label_List = load_data( SampleRHS['Validation_RHS_Sample'], SampleRHS['Validation_RHS_Label_Sample'], 1, False) n_samples_train, n_samples_val = SampleRHS['Train_NSamples'], SampleRHS[ 'Validation_NSamples'] LenTrain = len(SampleRHS['Train_RHS_Label_Sample']) LenValidation = len(SampleRHS['Validation_RHS_Label_Sample']) print('Number of Training data: ', LenTrain) print('Number of Validation data: ', LenValidation) # Hidden_dim is determined by the needs model = LSTM(in_dim=2, hidden_dim=100, n_layer=1, n_class=NumOfCategory) # print(model) if use_gpu:
def test(argv): print '\nSETTING UP A TEST SETTING\n' task = argv.task batch_size = argv.batch_size window = argv.window print '\tTASK: %s\tBATCH: %d\tWINDOW: %d' % (task, batch_size, window) ############## # LOAD FILES # ############## """ Load files """ # corpus: 1D: n_sents, 2D: n_words, 3D: (word, pas_info, pas_id) vocab_word = io_utils.load_data(argv.load_vocab) if argv.dev_data: dev_corpus, _ = io_utils.load(argv.dev_data, vocab_word, False) print '\nDEV CORPUS' corpus_statistics(dev_corpus) if argv.test_data: test_corpus, _ = io_utils.load(argv.test_data, vocab_word, False) print '\nTEST CORPUS' corpus_statistics(test_corpus) print '\nVocab: %d' % vocab_word.size() ############## # PREPROCESS # ############## """ Preprocessing """ # samples: 1D: n_sents, 2D: [word_ids, tag_ids, prd_indices, contexts] if argv.dev_data: dev_samples = sample_format(dev_corpus, vocab_word, window) n_dev_samples = len(dev_samples) if argv.test_data: test_samples = sample_format(test_corpus, vocab_word, window) n_te_samples = len(test_samples) # dataset = [x, y, l] # x=features: 1D: n_samples * n_words, 2D: window; elem=word id # y=labels: 1D: n_samples; elem=scalar # l=question length: 1D: n_samples * 2; elem=scalar # bb_x=batch indices for x: 1D: n_samples / batch_size + 1; elem=(bob, eob) # bb_y=batch indices for y: 1D: n_samples / batch_size + 1; elem=(bob, eob) if argv.dev_data: dev_dataset, dev_bb_x, dev_bb_y = theano_format( dev_samples, batch_size) if argv.test_data: te_dataset, te_bb_x, te_bb_y = theano_format(test_samples, batch_size) ###################### # BUILD ACTUAL MODEL # ###################### """ Set a model """ print '\n\nBuilding a model...' model = io_utils.load_data(argv.load_model) if argv.dev_data: dev_f = set_predict_f(model, dev_dataset) if argv.test_data: test_f = set_predict_f(model, te_dataset) ######## # TEST # ######## if argv.dev_data: print '\n\tDEV\n\t', predict(dev_f, dev_bb_x, dev_bb_y, n_dev_samples) if argv.test_data: print '\n\tTEST\n\t', predict(test_f, te_bb_x, te_bb_y, n_te_samples)