def init_static_dialog_agent(args) : print "reading in Ontology" ont = Ontology.Ontology(sys.argv[1]) print "predicates: " + str(ont.preds) print "types: " + str(ont.types) print "entries: " + str(ont.entries) print "reading in Lexicon" lex = Lexicon.Lexicon(ont, sys.argv[2]) print "surface forms: " + str(lex.surface_forms) print "categories: " + str(lex.categories) print "semantic forms: " + str(lex.semantic_forms) print "entries: " + str(lex.entries) print "instantiating Feature Extractor" f_extractor = FeatureExtractor.FeatureExtractor(ont, lex) print "instantiating Linear Learner" learner = LinearLearner.LinearLearner(ont, lex, f_extractor) print "instantiating KBGrounder" grounder = KBGrounder.KBGrounder(ont) load_parser_from_file = False if len(args) > 4 : if args[4].lower() == 'true' : load_parser_from_file = True if load_parser_from_file : parser = load_model('static_parser') grounder.parser = parser grounder.ontology = parser.ontology else : print "instantiating Parser" parser = Parser.Parser(ont, lex, learner, grounder, beam_width=10, safety=True) print "instantiating Generator" generator = Generator.Generator(ont, lex, learner, parser, beam_width=sys.maxint, safety=True) print "instantiating DialogAgent" static_policy = StaticDialogPolicy.StaticDialogPolicy() A = StaticDialogAgent(parser, generator, grounder, static_policy, None, None) if not load_parser_from_file : print "reading in training data" D = A.read_in_utterance_action_pairs(args[3]) if len(args) > 4 and args[4] == "both": print "training parser and generator jointly from actions" converged = A.jointly_train_parser_and_generator_from_utterance_action_pairs( D, epochs=10, parse_beam=30, generator_beam=10) else: print "training parser from actions" converged = A.train_parser_from_utterance_action_pairs( D, epochs=10, parse_beam=30) print "theta: "+str(parser.learner.theta) save_model(parser, 'static_parser') return A
def init_pomdp_dialog_agent(args) : print "Reading in Ontology" ont = Ontology.Ontology(args[1]) print "predicates: " + str(ont.preds) print "types: " + str(ont.types) print "entries: " + str(ont.entries) print "Reading in Lexicon" lex = Lexicon.Lexicon(ont, args[2]) print "surface forms: " + str(lex.surface_forms) print "categories: " + str(lex.categories) print "semantic forms: " + str(lex.semantic_forms) print "entries: " + str(lex.entries) print "Instantiating Feature Extractor" f_extractor = FeatureExtractor.FeatureExtractor(ont, lex) print "Instantiating Linear Learner" learner = LinearLearner.LinearLearner(ont, lex, f_extractor) print "Instantiating KBGrounder" grounder = KBGrounder.KBGrounder(ont) load_models_from_file = False if len(args) > 4 : if args[4].lower() == 'true' : load_models_from_file = True if load_models_from_file : parser = load_model('pomdp_parser') grounder.parser = parser grounder.ontology = parser.ontology else : print "Instantiating Parser" parser = Parser.Parser(ont, lex, learner, grounder, beam_width=10) print "Instantiating DialogAgent" if load_models_from_file : agent = PomdpDialogAgent(parser, grounder, None, None, parse_depth=10, load_policy_from_file=True) else : agent = PomdpDialogAgent(parser, grounder, None, None, parse_depth=10, load_policy_from_file=False) if not load_models_from_file : print "reading in data and training parser from actions" D = agent.read_in_utterance_action_pairs(args[3]) converged = agent.train_parser_from_utterance_action_pairs(D, epochs=10, parse_beam=30) print "theta: "+str(parser.learner.theta) save_model(parser, 'pomdp_parser') #print 'Parser ontology : ', parser.ontology.preds return agent
def __init__(self, polarity_strategy=None, ego_involvement_strategy=None, lexicon_size=20, agent_vector_size=20, lexicon_output_location=None): self.uidgen = UID() #the unique id generator for this builder self.polarity_strategy = polarity_strategy #the polarity generator for this set of agents self.ego_involvement_strategy = ego_involvement_strategy #the ego-involvement parameter generator for this set of agents self.lex = Lexicon.Lexicon(cloudsize=lexicon_size, vector_size=agent_vector_size, filePath=lexicon_output_location) self.__numAgents = 0 #the size of this group of agents
def init_dialog_agent(args): print "Reading in Ontology" ont = Ontology.Ontology(args[1]) print "predicates: " + str(ont.preds) print "types: " + str(ont.types) print "entries: " + str(ont.entries) print "Reading in Lexicon" lex = Lexicon.Lexicon(ont, args[2]) print "surface forms: " + str(lex.surface_forms) print "categories: " + str(lex.categories) print "semantic forms: " + str(lex.semantic_forms) print "entries: " + str(lex.entries) print "Instantiating Feature Extractor" f_extractor = FeatureExtractor.FeatureExtractor(ont, lex) print "Instantiating Linear Learner" learner = LinearLearner.LinearLearner(ont, lex, f_extractor) print "Instantiating KBGrounder" grounder = KBGrounder.KBGrounder(ont) print "Instantiating Parser" parser = Parser.Parser(ont, lex, learner, grounder, beam_width=10) parser = load_model('parser') grounder.parser = parser grounder.ontology = parser.ontology print "Instantiating DialogAgent" agent = PomdpDialogAgent(parser, grounder, None, None) #print "reading in data and training parser from actions" #D = agent.read_in_utterance_action_pairs(args[3]) #converged = agent.train_parser_from_utterance_action_pairs(D, epochs=10, parse_beam=30) #print "theta: "+str(parser.learner.theta) #save_model(parser, 'parser') #print 'Parser ontology : ', parser.ontology.preds return agent
def reset_lex(self): self.evolution_steps = 0 self.evolution_steps_label['text'] = self.evolution_steps self.lexicon = Lexicon(self.lexicon_size(), phones=self.n_symbols(), frequency_groups=self.lexicon.frequency_groups, hard_max_length=self.lexicon.hard_max_length, hard_start_length=self.hard_word_length()) # figure out minimum length needed for whole lexicon total_possible_forms = 0 for i in range(1, self.lexicon.hard_max_length): total_possible_forms += self.n_symbols()**i if total_possible_forms > len(self.lexicon): break self.min_len_needed[ 'text'] = 'max length needed for lexicon: {0}'.format(i) self.possible_forms['text'] = 'possible forms at length: {0}'.format( total_possible_forms) # zipf self.plot_3.cla() sorted_unig = sorted([w.unigram for w in self.lexicon.words]) self.plot_3.set_xlim(0, self.lexicon.hard_max_length) self.plot_3.set_ylim(sorted_unig[0] - 1, sorted_unig[-1] + 1) self.plot_3.set_title('word length and unigram word information') lengths, unigrams = zip(*self.lexicon.lengths_and_unigrams()) slope, intercept, r_value, p_value, std_err = stats.linregress( lengths, unigrams) unig_pred = intercept + (slope * np.arange(self.lexicon.hard_max_length)) self.zipf_scatter, = self.plot_3.plot(lengths, unigrams, 'o') self.zipf_line, = self.plot_3.plot( np.arange(self.lexicon.hard_max_length), unig_pred) self.update()
def __init__(self, args): print 'args = ', args, '\n\n\n\n' if len(args) < 4: print 'Usage ', args[ 0], ' ont_file lex_file parser_train_pairs_file [load_models_from_file=true/false]' rospy.init_node('dialog_agent_aishwarya') self.user_log = open(MAIN_LOG_PATH + USER_LOG_FILE, 'a') self.error_log = open(MAIN_LOG_PATH + MAIN_ERROR_LOG_FILE, 'a') self.started_users = set() print "reading in Ontology" ont = Ontology.Ontology(args[1]) print "predicates: " + str(ont.preds) print "types: " + str(ont.types) print "entries: " + str(ont.entries) self.ont = ont print "reading in Lexicon" lex = Lexicon.Lexicon(ont, args[2]) print "surface forms: " + str(lex.surface_forms) print "categories: " + str(lex.categories) print "semantic forms: " + str(lex.semantic_forms) print "entries: " + str(lex.entries) self.lex = lex self.parser_train_file = args[3] self.load_models_from_file = False if len(args) > 4: if args[4].lower() == 'true': print 'Going to load from file' # DEBUG self.load_models_from_file = True self.lock = Lock() self.service = rospy.Service('register_user', register_user, self.on_user_receipt)
import sys sys.path.append('.') # necessary to import local libraries import Ontology import Lexicon import CKYParser print "reading in Ontology" ont = Ontology.Ontology(sys.argv[1]) print "predicates: " + str(ont.preds) print "types: " + str(ont.types) print "entries: " + str(ont.entries) print "reading in Lexicon" lex = Lexicon.Lexicon(ont, sys.argv[2], word_embeddings_fn=sys.argv[5]) print "surface forms: " + str(lex.surface_forms) print "categories: " + str(lex.categories) print "semantic forms: " + str(lex.semantic_forms) print "entries: " + str(lex.entries) print "instantiating CKYParser" parser = CKYParser.CKYParser(ont, lex, lexicon_weight=1) parser.allow_merge = False parser.max_multiword_expression = 1 parser.max_missing_words_to_try = 0 print "reading in data and beginning training test" d = parser.read_in_paired_utterance_semantics(sys.argv[3]) converged = parser.train_learner_on_semantic_forms(d, 20, reranker_beam=10) if not converged:
import sys sys.path.append('.') # necessary to import local libraries import Ontology import Lexicon import CKYParser print "reading in Ontology" ont = Ontology.Ontology(sys.argv[1]) commutative_idxs = [ont.preds.index('and'), ont.preds.index('or')] print "predicates: " + str(ont.preds) print "types: " + str(ont.types) print "entries: " + str(ont.entries) print "reading in Lexicon" lex = Lexicon.Lexicon(ont, sys.argv[2]) print "surface forms: " + str(lex.surface_forms) print "categories: " + str(lex.categories) print "semantic forms: " + str(lex.semantic_forms) print "entries: " + str(lex.entries) print "instantiating CKYParser" parser = CKYParser.CKYParser(ont, lex, use_language_model=True) print "reading in data and beginning training test" d = parser.read_in_paired_utterance_semantics(sys.argv[3]) converged = parser.train_learner_on_semantic_forms(d, 10, reranker_beam=10) if not converged: raise AssertionError("Training failed to converge to correct values.") print "reading in data and beginning evaluation test"
def main(): # Load parameters from command line. ontology_fn = FLAGS_ontology_fn lexicon_fn = FLAGS_lexicon_fn train_pairs_fn = FLAGS_train_pairs_fn model_fn = FLAGS_model_fn validation_pairs_fn = FLAGS_validation_pairs_fn lexicon_embeddings = FLAGS_lexicon_embeddings max_epochs = FLAGS_max_epochs epochs_between_validations = FLAGS_epochs_between_validations lexicon_weight = FLAGS_lexicon_weight allow_merge = True if FLAGS_allow_merge == 1 else False perform_type_raising = True if FLAGS_perform_type_raising == 1 else False verbose = FLAGS_verbose use_condor = True if FLAGS_use_condor == 1 else False condor_target_dir = FLAGS_condor_target_dir condor_script_dir = FLAGS_condor_script_dir assert validation_pairs_fn is None or max_epochs >= epochs_between_validations assert not use_condor or (condor_target_dir is not None and condor_script_dir is not None) assert max_epochs >= 0 or train_pairs_fn is not None o = Ontology.Ontology(ontology_fn) l = Lexicon.Lexicon( o, lexicon_fn, word_embeddings_fn=lexicon_embeddings, ) p = CKYParser.CKYParser(o, l, allow_merge=allow_merge, lexicon_weight=lexicon_weight, perform_type_raising=perform_type_raising) # hyperparameter adjustments p.max_multiword_expression = 1 p.max_missing_words_to_try = 0 # basically disallows polysemy that isn't already present in lexicon # Train the parser one epoch at a time, examining validation performance between each epoch. if max_epochs > 0: train_data = p.read_in_paired_utterance_semantics(train_pairs_fn) val_data = p.read_in_paired_utterance_semantics(validation_pairs_fn) \ if validation_pairs_fn is not None else None print "finished instantiating parser; beginning training" for epoch in range(0, max_epochs, epochs_between_validations): if val_data is not None: acc_at_1 = get_performance_on_pairs(p, val_data) print "validation accuracy at 1 for epoch " + str( epoch) + ": " + str(acc_at_1) converged = p.train_learner_on_semantic_forms( train_data, epochs=epochs_between_validations, epoch_offset=epoch, reranker_beam=1, verbose=verbose, use_condor=use_condor, condor_target_dir=condor_target_dir, condor_script_dir=condor_script_dir) if converged: print "training converged after epoch " + str(epoch) break if val_data is not None: acc_at_1 = get_performance_on_pairs(p, val_data) print "validation accuracy at 1 at training stop: " + str(acc_at_1) # Write the parser to file. print "writing trained parser to file..." with open(model_fn, 'wb') as f: pickle.dump(p, f) print "... done"
from Lexicon import * from GUI import * if __name__ == '__main__': print('\n\n\n') n_words = 1000 n_phones = 10 #phones = {'a' : 10, 'b' : 5, 'c' : 5, 'd' : 1} l = Lexicon(n_words, phones=n_phones, frequency_groups=2, hard_start_length=6) EvolGUI(l)
except KeyError: index = self.labels_dict['O'] labels_list.append(index) return labels_list def show_data_info(self): """ :return: None 显示Data对象的信息, 包括句子最大长度, 单个句子中words的最大长度, 输入文件中句子数目 """ print('Data信息:') print('句子最大长度:', self.properties['max_sentence_length']) print('单个句子中最大单词个数:', self.properties['max_words_number']) print('句子数目:', self.properties['sentence_number']) if __name__ == '__main__': tic = time.time() print('测试Data类...') lex = Lexicon.Lexicon() print(1) path = r'NERData\MSRA\msra_train_bio.txt' data = Data(path, lex) data.show_data_info() print(data.chars_dict, data.labels_dict, sep='\n') for i in data.data[0]: print(i) toc = time.time() print('运行时间:', toc - tic)