def play_ner(replay, saved_robot, saved_initdata, nextepisode): actions = 2 global AGENT if AGENT == "random": robot = RobotRandom(actions) elif AGENT == "DQN": robot = RobotDQN(actions) elif AGENT == "CNNDQN": robot = RobotCNNDQN(actions) else: print "** There is no robot." raise SystemExit if replay: robot.restore(saved_robot) global TRAIN_LANG, TRAIN_LANG_NUM, BUDGET, CHECKPOINT robot.setCheckPoint(CHECKPOINT) for i in range(TRAIN_LANG_NUM): train = TRAIN_LANG[i][0] test = TRAIN_LANG[i][1] dev = TRAIN_LANG[i][2] emb = TRAIN_LANG[i][3] tagger = TRAIN_LANG[i][4] # initilise a NER game game = initialise_game(train, test, dev, emb, BUDGET, True, replay, saved_initdata) # initialise a decision robot robot.update_embeddings(game.w2v) # tagger model = CRFTagger(tagger) model.clean() # play game episode = 1 if replay: episode = nextepisode print(">>>>>> Playing game ..") while episode <= MAX_EPISODE: #copy the baseline model to the saved model #print tagger #shutil.copy('eng.model.baseline', tagger) print '>>>>>>> Current game round ', episode, 'Maximum ', MAX_EPISODE observation = game.get_frame(model) action = robot.get_action(observation) print '> Action', action reward, observation2, terminal = game.feedback(action, model, True) print '> Reward', reward robot.update(observation, action, reward, observation2, terminal) if terminal == True: episode += 1 print '> Terminal <' return robot
def test(robot): global TEST_LANG, TEST_LANG_NUM, BUDGET for i in range(TEST_LANG_NUM): train = TEST_LANG[i][0] test = TEST_LANG[i][1] dev = TEST_LANG[i][2] emb = TEST_LANG[i][3] tagger = TEST_LANG[i][4] game2 = initialise_game(train, test, dev, emb, BUDGET, False, False, '') robot.update_embeddings(game2.w2v) model = CRFTagger(tagger) model.clean() test_agent_batchNew(robot, game2, model, 1000, True)
def build_model(model_name, model_file, max_len, input_dim, output_dim, embedding_matrix): if model_name == 'CRF': model = CRFTagger(model_file=model_file) elif model_name == 'RNN': model = RNNTagger(model_file=model_file, max_len=max_len, input_dim=input_dim, output_dim=output_dim, embedding_matrix=embedding_matrix) else: logging.error('Invalid model type') assert False return model
def test(robot): global TEST_LANG, TEST_LANG_NUM, BUDGET for i in range(TEST_LANG_NUM): train = TEST_LANG[i][0] test = TEST_LANG[i][1] dev = TEST_LANG[i][2] emb = TEST_LANG[i][3] tagger = TEST_LANG[i][4] game2 = initialise_game(train, test, dev, emb, BUDGET) robot.update_embeddings(game2.w2v) model = CRFTagger(tagger) test_agent_batch(robot, game2, model, BUDGET) test_agent_online(robot, game2, model, BUDGET)
def play_ner(): actions = 2 global AGENT robot = RobotCNNDQN(AGENT, actions) # if AGENT == "random": # robot = RobotRandom(actions) # elif AGENT == "DQN": # robot = RobotDQN(actions) # elif AGENT == "CNNDQN": # robot = RobotCNNDQN(actions) # else: # print ("** There is no robot.") # raise SystemExit global TRAIN_LANG, TRAIN_LANG_NUM, BUDGET for i in range(TRAIN_LANG_NUM): train = TRAIN_LANG[i][0] test = TRAIN_LANG[i][1] dev = TRAIN_LANG[i][2] emb = TRAIN_LANG[i][3] tagger = TRAIN_LANG[i][4] # initilise a NER game game = initialise_game(train, test, dev, emb, BUDGET) # initialise a decision robot robot.initialise(game.max_len, game.w2v) # robot.update_embeddings(game.w2v) # tagger model = CRFTagger(tagger) # play game episode = 1 print(">>>>>> Playing game ..") while episode <= MAX_EPISODE: print('>>>>>>> Current game round ', episode, 'Maximum ', MAX_EPISODE) observation = game.get_frame(model) action = robot.get_action(observation) print('> Action', action) reward, observation2, terminal = game.feedback(action, model) print('> Reward', reward) robot.update(observation, action, reward, observation2, terminal) if terminal == True: episode += 1 print('> Terminal <') return (robot, game, model)
def dreaming_phase(train_la, train_la_idx, train_pool, train_pool_idx, sent_dev, budget, episodes, agent, expert): logger.info(' * Start Dreaming phase * ') states = [] actions = [] for tau in range(0, episodes): # Shuffle and split initial train, validation set sent_trn = list(train_la) idx_trn = list(train_la_idx) sent_pool = list(train_pool) idx_pool = list(train_pool_idx) logger.info( "[Episode {}] Partition data: labeled = {}, val = {}, unlabeled pool = {} " .format(str(tau), len(sent_trn), len(sent_dev), len(sent_pool))) # Memory (two lists) to store states and actions tagger_dreamming = "{}/{}_tagger_temp.h5".format( args.output, DATASET_NAME) if os.path.exists(tagger_dreamming): os.remove(tagger_dreamming) model = CRFTagger(tagger_dreamming, num_classes=num_classes) if len(sent_trn) > 0: model.train(sent_trn) # In every episode, run the trajectory for t in range(0, budget): logger.info('[Dreaming phase] Episode:' + str(tau + 1) + ' Budget:' + str(t + 1)) row = 0 f1 = -1 # save the index of best data point or acturally the index of action bestindex = 0 # Random sample k points from D_pool if args.dreaming_candidate_selection_mode == 'random': logger.info(" * Random candidate selections") random_pool, random_pool_idx, queryindices = utilities.randomKSamples( sent_pool, idx_pool, k) elif args.dreaming_candidate_selection_mode == 'certainty': logger.info(" * Certainty candidate selections") random_pool, random_pool_idx, queryindices = utilities.sample_from_top_n_certainty( sent_pool, idx_pool, expert, args.n_learning, k) elif args.dreaming_candidate_selection_mode == 'mix': logger.info(" * Mix method candidate selections") c = np.random.rand(1) if c > 0.5: random_pool, random_pool_idx, queryindices = utilities.randomKSamples( sent_pool, idx_pool, k) else: random_pool, random_pool_idx, queryindices = utilities.sample_from_top_n_certainty( sent_pool, idx_pool, expert, args.n_learning, k) else: logger.info(" * Unknown mode, use Random candidate selections") random_pool, random_pool_idx, queryindices = utilities.randomKSamples( sent_pool, idx_pool, k) logger.debug(' * Generate label using expert') x_tokens = [' '.join(expert.sent2tokens(s)) for s in random_pool] y_labels = expert.predict(random_pool) pred_sents = utilities.data2sents(x_tokens, y_labels) for datapoint in zip(pred_sents, random_pool_idx): seq = datapoint[0] idx = datapoint[1] train_la_temp = list(sent_trn) train_la_temp.append(seq) if os.path.exists(tagger_temp): os.remove(tagger_temp) model_temp = CRFTagger(tagger_temp, num_classes=num_classes) model_temp.train(train_la_temp) f1_temp = model_temp.test(dev_sents, label2str) if (f1_temp > f1): bestindex = row f1 = f1_temp row = row + 1 del model_temp del train_la_temp gc.collect() # get the state and action state = utilities.getAllState(idx_trn, random_pool, random_pool_idx, model, w2v, max_len, num_classes) # action=bestindex coin = np.random.rand(1) if (coin > 0.5): logger.debug(' * Use the POLICY [coin = {}]'.format(str(coin))) action = bestindex else: action = agent.predict(args.k, state) states.append(state) actions.append(action) # update the model theindex = queryindices[bestindex] sent_trn.append(sent_pool[theindex]) idx_trn.append(idx_pool[theindex]) model.train(sent_trn) # delete the selected data point from the pool del sent_pool[theindex] del idx_pool[theindex] cur_actions = to_categorical(np.asarray(actions), num_classes=k) agent.train_policy(args.k, states, cur_actions) del sent_pool del idx_pool del sent_trn del idx_trn gc.collect() return agent
train_la_idx = [] train_pool_idx = [] for i in range(0, len(train_sents)): if (i < args.initial_training_size): train_la.append(train_sents[indices[i]]) train_la_idx.append(train_idx[indices[i]]) else: train_pool.append(train_sents[indices[i]]) train_pool_idx.append(train_idx[indices[i]]) logger.info(' * Begin dreaming policy..') step = 0 f1_list = [] if args.model_path is not None: copyfile(args.model_path, tagger_output) tagger = CRFTagger(tagger_output, num_classes=num_classes) if args.initial_training_size > 0: tagger.train(train_la) f1_score = tagger.test(test_sents, label2str) f1_list.append(f1_score) logger.info(" Initial F1 : {}".format(str(f1_score))) episode = args.ndream while step < BUDGET: tagger, step, f1_list, train_la, train_la_idx, train_pool, train_pool_idx = learning_phase( train_la, train_la_idx, train_pool, train_pool_idx, test_sents, tagger, agent, args.learning_phase_length, step, f1_list) episode = args.ndream + int(step / args.dream_increase_step) agent = dreaming_phase(train_la, train_la_idx, train_pool, train_pool_idx, dev_sents, args.dreaming_budget, episode, agent, tagger) logger.info("Save policy to {}".format(policy_output))
train_la_idx = [] train_pool_idx = [] for i in range(0, len(allsents)): if (i < 10): train_la.append(allsents[indices[i]]) train_la_idx.append(allidx[indices[i]]) if (i > 10 and i < (len(dev_sents) + 10)): train_val.append(allsents[indices[i]]) else: train_pool.append(allsents[indices[i]]) train_pool_idx.append(allidx[indices[i]]) #Initialise the model if os.path.exists(tagger_output): os.remove(tagger_output) model = CRFTagger(tagger_output, num_classes=num_classes) if args.initial_training_size > 0: model.train(train_la) for t in range(0, BUDGET): if (t % 10) == 0: logger.info(' * Episode: {} Budget so far: {}'.format( str(tau + 1), str(t + 1))) #Random get k sample from train_pool and train_pool_idx random_pool, random_pool_idx, queryindices = utilities.randomKSamples( train_pool, train_pool_idx, k) row = 0 f1 = -1 bestindex = 0 newseq = [] newidx = [] coin = np.random.rand(1) # beta=0.5 fixed
# # f1array=np.array(allf1list) # averageacc=list(np.mean(f1array, axis=0)) # print('F1 list: ') # print(allf1list) # ww=open(resultname,'w') # ww.writelines(str(line)+ "\n" for line in averageacc) # ww.close() # print("Test:--- %s seconds ---" % (time.time() - start_time)) # # logger.info(resultname) logger.info(">>>>> Dataset {} size {}".format(DATASET_NAME, len(train_sents))) # num_initial_data = [10, 20, 50, 100, 150, 200, 300, 400, 500, 1000] # for n in num_initial_data: # f1s = [] # for i in range(50): # sent_trn, idx_trn, query = utilities.randomKSamples(train_sents, train_idx, n) tagger_output = "{}/{}_tagger.h5".format(args.output, DATASET_NAME) if os.path.exists(tagger_output): os.remove(tagger_output) model = CRFTagger(tagger_output, num_classes=num_classes) model.train(train_sents) f1_score = model.test(test_sents, label2str) logger.info('*********************************') logger.info(' F1 score : {}'.format(f1_score)) # f1_max = np.max(np.asarray(f1s)) # f1_mean = np.mean(np.asarray(f1s)) # f1_min = np.min(np.asarray(f1s)) # logger.info(' * Average f1 scrore: {}'.format(f1_mean)) # logger.info(' * Min f1 scrore: {}'.format(f1_min)) # logger.info(' * Max f1 scrore: {}'.format(f1_max))
#Shuffle train_sents, split into train_la and train_pool indices = np.arange(len(train_sents)) np.random.shuffle(indices) train_la = [] train_pool = [] train_la_idx = [] train_pool_idx = [] for i in range(0, len(train_sents)): if (i < 10): train_la.append(train_sents[indices[i]]) train_la_idx.append(train_idx[indices[i]]) else: train_pool.append(train_sents[indices[i]]) train_pool_idx.append(train_idx[indices[i]]) #Initialise the model model = CRFTagger('esp.tagger') model.train(train_la) coin = np.random.rand(1) states = [] actions = [] for t in range(0, BUDGET): print('Episode: ' + str(tau + 1) + ' Budget: ' + str(t + 1)) #Random get k sample from train_pool and train_pool_idx random_pool, random_pool_idx, queryindices = utilities.randomKSamples( train_pool, train_pool_idx, k) #get the state and action state = utilities.getAllState(random_pool, random_pool_idx, model, w2v, 200) tempstates = np.expand_dims(state, axis=0)