예제 #1
0
def init_static_dialog_agent(args) :
    print "reading in Ontology"
    ont = Ontology.Ontology(sys.argv[1])
    print "predicates: " + str(ont.preds)
    print "types: " + str(ont.types)
    print "entries: " + str(ont.entries)

    print "reading in Lexicon"
    lex = Lexicon.Lexicon(ont, sys.argv[2])
    print "surface forms: " + str(lex.surface_forms)
    print "categories: " + str(lex.categories)
    print "semantic forms: " + str(lex.semantic_forms)
    print "entries: " + str(lex.entries)

    print "instantiating Feature Extractor"
    f_extractor = FeatureExtractor.FeatureExtractor(ont, lex)

    print "instantiating Linear Learner"
    learner = LinearLearner.LinearLearner(ont, lex, f_extractor)

    print "instantiating KBGrounder"
    grounder = KBGrounder.KBGrounder(ont)

    load_parser_from_file = False
    if len(args) > 4 :
        if args[4].lower() == 'true' :
            load_parser_from_file = True
            
    if load_parser_from_file :
        parser = load_model('static_parser')
        grounder.parser = parser
        grounder.ontology = parser.ontology
    else :
        print "instantiating Parser"
        parser = Parser.Parser(ont, lex, learner, grounder, beam_width=10, safety=True)

    print "instantiating Generator"
    generator = Generator.Generator(ont, lex, learner, parser, beam_width=sys.maxint, safety=True)

    print "instantiating DialogAgent"
    static_policy = StaticDialogPolicy.StaticDialogPolicy()
    A = StaticDialogAgent(parser, generator, grounder, static_policy, None, None)

    if not load_parser_from_file :
        print "reading in training data"
        D = A.read_in_utterance_action_pairs(args[3])

        if len(args) > 4 and args[4] == "both":
            print "training parser and generator jointly from actions"
            converged = A.jointly_train_parser_and_generator_from_utterance_action_pairs(
                D, epochs=10, parse_beam=30, generator_beam=10)
        else:
            print "training parser from actions"
            converged = A.train_parser_from_utterance_action_pairs(
                D, epochs=10, parse_beam=30)

        print "theta: "+str(parser.learner.theta)
        save_model(parser, 'static_parser')
    
    return A
예제 #2
0
def init_pomdp_dialog_agent(args) :
    print "Reading in Ontology"
    ont = Ontology.Ontology(args[1])
    print "predicates: " + str(ont.preds)
    print "types: " + str(ont.types)
    print "entries: " + str(ont.entries)

    print "Reading in Lexicon"
    lex = Lexicon.Lexicon(ont, args[2])
    print "surface forms: " + str(lex.surface_forms)
    print "categories: " + str(lex.categories)
    print "semantic forms: " + str(lex.semantic_forms)
    print "entries: " + str(lex.entries)

    print "Instantiating Feature Extractor"
    f_extractor = FeatureExtractor.FeatureExtractor(ont, lex)

    print "Instantiating Linear Learner"
    learner = LinearLearner.LinearLearner(ont, lex, f_extractor)

    print "Instantiating KBGrounder"
    grounder = KBGrounder.KBGrounder(ont)

    load_models_from_file = False
    if len(args) > 4 :
        if args[4].lower() == 'true' :
            load_models_from_file = True

    if load_models_from_file :
        parser = load_model('pomdp_parser')
        grounder.parser = parser
        grounder.ontology = parser.ontology
    else :
        print "Instantiating Parser"
        parser = Parser.Parser(ont, lex, learner, grounder, beam_width=10)

    print "Instantiating DialogAgent"
    if load_models_from_file :
        agent = PomdpDialogAgent(parser, grounder, None, None, parse_depth=10, load_policy_from_file=True)
    else :
        agent = PomdpDialogAgent(parser, grounder, None, None, parse_depth=10, load_policy_from_file=False)

    if not load_models_from_file :
        print "reading in data and training parser from actions"
        D = agent.read_in_utterance_action_pairs(args[3])
        converged = agent.train_parser_from_utterance_action_pairs(D, epochs=10, parse_beam=30)
        print "theta: "+str(parser.learner.theta)
        save_model(parser, 'pomdp_parser')
        #print 'Parser ontology : ', parser.ontology.preds

    return agent
예제 #3
0
    def __init__(self,
                 polarity_strategy=None,
                 ego_involvement_strategy=None,
                 lexicon_size=20,
                 agent_vector_size=20,
                 lexicon_output_location=None):
        self.uidgen = UID()  #the unique id generator for this builder
        self.polarity_strategy = polarity_strategy  #the polarity generator for this set of agents
        self.ego_involvement_strategy = ego_involvement_strategy  #the ego-involvement parameter generator for this set of agents

        self.lex = Lexicon.Lexicon(cloudsize=lexicon_size,
                                   vector_size=agent_vector_size,
                                   filePath=lexicon_output_location)
        self.__numAgents = 0  #the size of this group of agents
예제 #4
0
def init_dialog_agent(args):
    print "Reading in Ontology"
    ont = Ontology.Ontology(args[1])
    print "predicates: " + str(ont.preds)
    print "types: " + str(ont.types)
    print "entries: " + str(ont.entries)

    print "Reading in Lexicon"
    lex = Lexicon.Lexicon(ont, args[2])
    print "surface forms: " + str(lex.surface_forms)
    print "categories: " + str(lex.categories)
    print "semantic forms: " + str(lex.semantic_forms)
    print "entries: " + str(lex.entries)

    print "Instantiating Feature Extractor"
    f_extractor = FeatureExtractor.FeatureExtractor(ont, lex)

    print "Instantiating Linear Learner"
    learner = LinearLearner.LinearLearner(ont, lex, f_extractor)

    print "Instantiating KBGrounder"
    grounder = KBGrounder.KBGrounder(ont)

    print "Instantiating Parser"
    parser = Parser.Parser(ont, lex, learner, grounder, beam_width=10)
    parser = load_model('parser')
    grounder.parser = parser
    grounder.ontology = parser.ontology

    print "Instantiating DialogAgent"
    agent = PomdpDialogAgent(parser, grounder, None, None)

    #print "reading in data and training parser from actions"
    #D = agent.read_in_utterance_action_pairs(args[3])
    #converged = agent.train_parser_from_utterance_action_pairs(D, epochs=10, parse_beam=30)
    #print "theta: "+str(parser.learner.theta)
    #save_model(parser, 'parser')
    #print 'Parser ontology : ', parser.ontology.preds

    return agent
예제 #5
0
    def reset_lex(self):
        self.evolution_steps = 0
        self.evolution_steps_label['text'] = self.evolution_steps

        self.lexicon = Lexicon(self.lexicon_size(),
                               phones=self.n_symbols(),
                               frequency_groups=self.lexicon.frequency_groups,
                               hard_max_length=self.lexicon.hard_max_length,
                               hard_start_length=self.hard_word_length())

        # figure out minimum length needed for whole lexicon
        total_possible_forms = 0
        for i in range(1, self.lexicon.hard_max_length):
            total_possible_forms += self.n_symbols()**i
            if total_possible_forms > len(self.lexicon):
                break
        self.min_len_needed[
            'text'] = 'max length needed for lexicon: {0}'.format(i)
        self.possible_forms['text'] = 'possible forms at length: {0}'.format(
            total_possible_forms)

        # zipf
        self.plot_3.cla()
        sorted_unig = sorted([w.unigram for w in self.lexicon.words])
        self.plot_3.set_xlim(0, self.lexicon.hard_max_length)
        self.plot_3.set_ylim(sorted_unig[0] - 1, sorted_unig[-1] + 1)
        self.plot_3.set_title('word length and unigram word information')

        lengths, unigrams = zip(*self.lexicon.lengths_and_unigrams())
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            lengths, unigrams)
        unig_pred = intercept + (slope *
                                 np.arange(self.lexicon.hard_max_length))

        self.zipf_scatter, = self.plot_3.plot(lengths, unigrams, 'o')
        self.zipf_line, = self.plot_3.plot(
            np.arange(self.lexicon.hard_max_length), unig_pred)

        self.update()
예제 #6
0
    def __init__(self, args):
        print 'args = ', args, '\n\n\n\n'
        if len(args) < 4:
            print 'Usage ', args[
                0], ' ont_file lex_file parser_train_pairs_file [load_models_from_file=true/false]'

        rospy.init_node('dialog_agent_aishwarya')

        self.user_log = open(MAIN_LOG_PATH + USER_LOG_FILE, 'a')
        self.error_log = open(MAIN_LOG_PATH + MAIN_ERROR_LOG_FILE, 'a')
        self.started_users = set()

        print "reading in Ontology"
        ont = Ontology.Ontology(args[1])
        print "predicates: " + str(ont.preds)
        print "types: " + str(ont.types)
        print "entries: " + str(ont.entries)
        self.ont = ont

        print "reading in Lexicon"
        lex = Lexicon.Lexicon(ont, args[2])
        print "surface forms: " + str(lex.surface_forms)
        print "categories: " + str(lex.categories)
        print "semantic forms: " + str(lex.semantic_forms)
        print "entries: " + str(lex.entries)
        self.lex = lex

        self.parser_train_file = args[3]

        self.load_models_from_file = False
        if len(args) > 4:
            if args[4].lower() == 'true':
                print 'Going to load from file'  # DEBUG
                self.load_models_from_file = True

        self.lock = Lock()
        self.service = rospy.Service('register_user', register_user,
                                     self.on_user_receipt)
예제 #7
0
파일: main.py 프로젝트: thomason-jesse/tsp
import sys

sys.path.append('.')  # necessary to import local libraries
import Ontology
import Lexicon
import CKYParser

print "reading in Ontology"
ont = Ontology.Ontology(sys.argv[1])
print "predicates: " + str(ont.preds)
print "types: " + str(ont.types)
print "entries: " + str(ont.entries)

print "reading in Lexicon"
lex = Lexicon.Lexicon(ont, sys.argv[2], word_embeddings_fn=sys.argv[5])
print "surface forms: " + str(lex.surface_forms)
print "categories: " + str(lex.categories)
print "semantic forms: " + str(lex.semantic_forms)
print "entries: " + str(lex.entries)

print "instantiating CKYParser"
parser = CKYParser.CKYParser(ont, lex, lexicon_weight=1)
parser.allow_merge = False
parser.max_multiword_expression = 1
parser.max_missing_words_to_try = 0

print "reading in data and beginning training test"
d = parser.read_in_paired_utterance_semantics(sys.argv[3])
converged = parser.train_learner_on_semantic_forms(d, 20, reranker_beam=10)
if not converged:
예제 #8
0
import sys

sys.path.append('.')  # necessary to import local libraries
import Ontology
import Lexicon
import CKYParser

print "reading in Ontology"
ont = Ontology.Ontology(sys.argv[1])
commutative_idxs = [ont.preds.index('and'), ont.preds.index('or')]
print "predicates: " + str(ont.preds)
print "types: " + str(ont.types)
print "entries: " + str(ont.entries)

print "reading in Lexicon"
lex = Lexicon.Lexicon(ont, sys.argv[2])
print "surface forms: " + str(lex.surface_forms)
print "categories: " + str(lex.categories)
print "semantic forms: " + str(lex.semantic_forms)
print "entries: " + str(lex.entries)

print "instantiating CKYParser"
parser = CKYParser.CKYParser(ont, lex, use_language_model=True)

print "reading in data and beginning training test"
d = parser.read_in_paired_utterance_semantics(sys.argv[3])
converged = parser.train_learner_on_semantic_forms(d, 10, reranker_beam=10)
if not converged:
    raise AssertionError("Training failed to converge to correct values.")

print "reading in data and beginning evaluation test"
예제 #9
0
파일: main.py 프로젝트: thomason-jesse/tsp
def main():

    # Load parameters from command line.
    ontology_fn = FLAGS_ontology_fn
    lexicon_fn = FLAGS_lexicon_fn
    train_pairs_fn = FLAGS_train_pairs_fn
    model_fn = FLAGS_model_fn
    validation_pairs_fn = FLAGS_validation_pairs_fn
    lexicon_embeddings = FLAGS_lexicon_embeddings
    max_epochs = FLAGS_max_epochs
    epochs_between_validations = FLAGS_epochs_between_validations
    lexicon_weight = FLAGS_lexicon_weight
    allow_merge = True if FLAGS_allow_merge == 1 else False
    perform_type_raising = True if FLAGS_perform_type_raising == 1 else False
    verbose = FLAGS_verbose
    use_condor = True if FLAGS_use_condor == 1 else False
    condor_target_dir = FLAGS_condor_target_dir
    condor_script_dir = FLAGS_condor_script_dir
    assert validation_pairs_fn is None or max_epochs >= epochs_between_validations
    assert not use_condor or (condor_target_dir is not None
                              and condor_script_dir is not None)
    assert max_epochs >= 0 or train_pairs_fn is not None

    o = Ontology.Ontology(ontology_fn)
    l = Lexicon.Lexicon(
        o,
        lexicon_fn,
        word_embeddings_fn=lexicon_embeddings,
    )
    p = CKYParser.CKYParser(o,
                            l,
                            allow_merge=allow_merge,
                            lexicon_weight=lexicon_weight,
                            perform_type_raising=perform_type_raising)

    # hyperparameter adjustments
    p.max_multiword_expression = 1
    p.max_missing_words_to_try = 0  # basically disallows polysemy that isn't already present in lexicon

    # Train the parser one epoch at a time, examining validation performance between each epoch.
    if max_epochs > 0:
        train_data = p.read_in_paired_utterance_semantics(train_pairs_fn)
        val_data = p.read_in_paired_utterance_semantics(validation_pairs_fn) \
            if validation_pairs_fn is not None else None
        print "finished instantiating parser; beginning training"
        for epoch in range(0, max_epochs, epochs_between_validations):
            if val_data is not None:
                acc_at_1 = get_performance_on_pairs(p, val_data)
                print "validation accuracy at 1 for epoch " + str(
                    epoch) + ": " + str(acc_at_1)
            converged = p.train_learner_on_semantic_forms(
                train_data,
                epochs=epochs_between_validations,
                epoch_offset=epoch,
                reranker_beam=1,
                verbose=verbose,
                use_condor=use_condor,
                condor_target_dir=condor_target_dir,
                condor_script_dir=condor_script_dir)
            if converged:
                print "training converged after epoch " + str(epoch)
                break
        if val_data is not None:
            acc_at_1 = get_performance_on_pairs(p, val_data)
            print "validation accuracy at 1 at training stop: " + str(acc_at_1)

    # Write the parser to file.
    print "writing trained parser to file..."
    with open(model_fn, 'wb') as f:
        pickle.dump(p, f)
    print "... done"
예제 #10
0
파일: main.py 프로젝트: AdamKing11/Lex_Evol
from Lexicon import *
from GUI import *

if __name__ == '__main__':
    print('\n\n\n')
    n_words = 1000
    n_phones = 10
    #phones = {'a' : 10, 'b' : 5, 'c' : 5, 'd' : 1}
    l = Lexicon(n_words,
                phones=n_phones,
                frequency_groups=2,
                hard_start_length=6)

    EvolGUI(l)
예제 #11
0
            except KeyError:
                index = self.labels_dict['O']
            labels_list.append(index)

        return labels_list

    def show_data_info(self):
        """
        :return: None
        显示Data对象的信息, 包括句子最大长度, 单个句子中words的最大长度, 输入文件中句子数目
        """
        print('Data信息:')
        print('句子最大长度:', self.properties['max_sentence_length'])
        print('单个句子中最大单词个数:', self.properties['max_words_number'])
        print('句子数目:', self.properties['sentence_number'])


if __name__ == '__main__':
    tic = time.time()
    print('测试Data类...')
    lex = Lexicon.Lexicon()
    print(1)
    path = r'NERData\MSRA\msra_train_bio.txt'
    data = Data(path, lex)
    data.show_data_info()
    print(data.chars_dict, data.labels_dict, sep='\n')
    for i in data.data[0]:
        print(i)
    toc = time.time()
    print('运行时间:', toc - tic)