Python Lexicon Beispiele, Lexicon Python Beispiele

Beispiel #1

0

Datei anzeigen

    def checkCfgConfig(self, cfg_params):
        """
        """
        self.datasvc_url = cfg_params.get("USER.datasvc_url", self.datasvc_url)
        self.srm_version = cfg_params.get("USER.srm_version", 'srmv2')
        self.node = cfg_params.get('USER.storage_element', None)
        self.usenamespace = cfg_params.get("USER.usenamespace", 0)
        self.user_remote_dir = cfg_params.get("USER.user_remote_dir", '')

        self.publish_data = cfg_params.get("USER.publish_data", 0)
        if int(self.publish_data) == 1:
            # only accepts valid PhEDEx Node Names
            import Lexicon
            try:
                Lexicon.cmsname(self.node)
            except Exception, text:
                msg = "%s\n'%s' is not a valid Phedex Node Name" % (text,
                                                                    self.node)
                msg += "\n***************** NOTICE ***************"
                msg += "\nOnly valid Phexex Node Names can be used as location for published data"
                msg += "\nPlease fix storage_element or set publish_data=0 in [USER] section of crab.cfg"
                if 'group' in self.user_remote_dir:
                    msg += '\nIf you are trying to stage out to some /store/group area, you can do like:'
                    msg += '\n   storage_element = T2_US_UCSD'
                    msg += '\n   user_remote_dir = /store/group/foo/bar'
                msg += "\n****************************************"
                raise CrabException(msg)

Beispiel #2

0

Datei anzeigen

Datei: PhEDExDatasvcInfo.py Projekt: belforte/CRAB2

    def checkCfgConfig(self,cfg_params):
        """
        """
        self.datasvc_url = cfg_params.get("USER.datasvc_url",self.datasvc_url)
        self.srm_version = cfg_params.get("USER.srm_version",'srmv2')
        self.node = cfg_params.get('USER.storage_element',None)
        self.usenamespace = cfg_params.get("USER.usenamespace",0)
        self.user_remote_dir = cfg_params.get("USER.user_remote_dir",'')

        self.publish_data = cfg_params.get("USER.publish_data",0)
        if int(self.publish_data) == 1:
            # only accepts valid PhEDEx Node Names
            import Lexicon
            try:
                Lexicon.cmsname(self.node)
            except Exception, text:
                msg =  "%s\n'%s' is not a valid Phedex Node Name" % (text,self.node)
                msg += "\n***************** NOTICE ***************"
                msg += "\nOnly valid Phexex Node Names can be used as location for published data"
                msg += "\nPlease fix storage_element or set publish_data=0 in [USER] section of crab.cfg"
                if 'group' in   self.user_remote_dir:
                    msg += '\nIf you are trying to stage out to some /store/group area, you can do like:'
                    msg += '\n   storage_element = T2_US_UCSD'
                    msg += '\n   user_remote_dir = /store/group/foo/bar'
                msg += "\n****************************************"
                raise CrabException(msg)

Beispiel #3

0

Datei anzeigen

Datei: Engine.py Projekt: joeyoung33333/Sentiment_Analysis

def runLexicon(articles, sentLex):
    # generate dictionary containing articles and their scores
    articleScores = Lexicon.allScores(articles, sentLex)

    # get the count of POS/NEG/NEUt articles
    POS, NEG, NEUT = Lexicon.categorizeScores(articleScores)

    return POS, NEG, NEUT

Beispiel #4

0

Datei anzeigen

    def getEndpoint(self):
        '''
        Return full SE endpoint and related infos
        '''
        self.lfn = self.getLFN()

        if int(self.publish_data) == 1:
            try:
                # this is not a full LFN, only the path part, add dummy filename for validating format
                Lexicon.lfn(self.lfn + "dummy.root")
            except Exception, text:
                msg = "PhEDExDataSvcInfo.py: ERROR in generated LFN :\n%s" % text
                raise CrabException(msg)

Beispiel #5

0

Datei anzeigen

Datei: PhEDExDatasvcInfo.py Projekt: belforte/CRAB2

    def getEndpoint(self):   
        '''
        Return full SE endpoint and related infos
        '''
        self.lfn = self.getLFN()

        if int(self.publish_data) == 1 :
            try:
            # this is not a full LFN, only the path part, add dummy filename for validating format
                Lexicon.lfn(self.lfn+"dummy.root")
            except Exception, text:
                msg = "PhEDExDataSvcInfo.py: ERROR in generated LFN :\n%s" % text
                raise CrabException(msg)

Beispiel #6

0

Datei anzeigen

Datei: Tokenization.py Projekt: BenLin0/deepparser

    def CleanOutput_FeatureLeave(self):
        a = JsonClass()
        a.text = self.text
        if self.norm != self.text:
            a.norm = self.norm
        if self.pnorm:
            a.pnorm = self.pnorm
        if self.iepair:
            a.iepair = self.iepair
        if self.atom != self.text:
            a.atom = self.atom
        features = [
            FeatureOntology.GetFeatureName(f)
            for f in Lexicon.CopyFeatureLeaves(self.features)
            if f not in FeatureOntology.NotShowList
        ]
        for f in features:
            # if isinstance(f, int):
            #     f = "L" + str(f)
            setattr(a, f, '')
        a.StartOffset = self.StartOffset
        a.EndOffset = self.EndOffset
        if self.UpperRelationship:
            a.UpperRelationship = self.UpperRelationship
        if self.sons \
                and utils.FeatureID_0 not in self.features:
            a.sons = [s.CleanOutput_FeatureLeave() for s in self.sons]

        #logging.info("in featureleave" + str(self) + "f:" + str(features))
        return a

Beispiel #7

0

Datei anzeigen

def init_static_dialog_agent(args) :
    print "reading in Ontology"
    ont = Ontology.Ontology(sys.argv[1])
    print "predicates: " + str(ont.preds)
    print "types: " + str(ont.types)
    print "entries: " + str(ont.entries)

    print "reading in Lexicon"
    lex = Lexicon.Lexicon(ont, sys.argv[2])
    print "surface forms: " + str(lex.surface_forms)
    print "categories: " + str(lex.categories)
    print "semantic forms: " + str(lex.semantic_forms)
    print "entries: " + str(lex.entries)

    print "instantiating Feature Extractor"
    f_extractor = FeatureExtractor.FeatureExtractor(ont, lex)

    print "instantiating Linear Learner"
    learner = LinearLearner.LinearLearner(ont, lex, f_extractor)

    print "instantiating KBGrounder"
    grounder = KBGrounder.KBGrounder(ont)

    load_parser_from_file = False
    if len(args) > 4 :
        if args[4].lower() == 'true' :
            load_parser_from_file = True
            
    if load_parser_from_file :
        parser = load_model('static_parser')
        grounder.parser = parser
        grounder.ontology = parser.ontology
    else :
        print "instantiating Parser"
        parser = Parser.Parser(ont, lex, learner, grounder, beam_width=10, safety=True)

    print "instantiating Generator"
    generator = Generator.Generator(ont, lex, learner, parser, beam_width=sys.maxint, safety=True)

    print "instantiating DialogAgent"
    static_policy = StaticDialogPolicy.StaticDialogPolicy()
    A = StaticDialogAgent(parser, generator, grounder, static_policy, None, None)

    if not load_parser_from_file :
        print "reading in training data"
        D = A.read_in_utterance_action_pairs(args[3])

        if len(args) > 4 and args[4] == "both":
            print "training parser and generator jointly from actions"
            converged = A.jointly_train_parser_and_generator_from_utterance_action_pairs(
                D, epochs=10, parse_beam=30, generator_beam=10)
        else:
            print "training parser from actions"
            converged = A.train_parser_from_utterance_action_pairs(
                D, epochs=10, parse_beam=30)

        print "theta: "+str(parser.learner.theta)
        save_model(parser, 'static_parser')
    
    return A

Beispiel #8

0

Datei anzeigen

Datei: NodeNameUtils.py Projekt: belforte/CRAB2

def  expandIntoListOfPhedexNodeNames(location_list):
    """
    take as input a list of locations, returns a list of PNN's
    raise CrabExceptoin if input is not a valid PNN abbreviation
    use https://cmsweb.cern.ch/phedex/datasvc/doc/nodes
    """

    # build API node filter, add wildcards wich are not required by Crab2
    args = ''
    for loc in location_list:
        phedexNode = loc.strip()
        try:
            Lexicon.cmsname(phedexNode)
        except Exception, text:
            msg =  "%s\n'%s' is not a valid Phedex Node Name" % (text,phedexNode)
            raise CrabException(msg)
        args += "&node=%s*" % phedexNode

Beispiel #9

0

Datei anzeigen

def expandIntoListOfPhedexNodeNames(location_list):
    """
    take as input a list of locations, returns a list of PNN's
    raise CrabExceptoin if input is not a valid PNN abbreviation
    use https://cmsweb.cern.ch/phedex/datasvc/doc/nodes
    """

    # build API node filter, add wildcards wich are not required by Crab2
    args = ''
    for loc in location_list:
        phedexNode = loc.strip()
        try:
            Lexicon.cmsname(phedexNode)
        except Exception, text:
            msg = "%s\n'%s' is not a valid Phedex Node Name" % (text,
                                                                phedexNode)
            raise CrabException(msg)
        args += "&node=%s*" % phedexNode

Beispiel #10

0

Datei anzeigen

def validateBWLists(cfg_params):
    # convert to lists for processing. But leave cfg_params
    # as strings, since this is what Crab2 code expects
    blackList = cfg_params.get("GRID.se_black_list", [])
    if type(blackList) == type("string"):
        blackList = blackList.strip().split(',')
    whiteList = cfg_params.get("GRID.se_white_list", [])
    if type(whiteList) == type("string"):
        whiteList = whiteList.strip().split(',')

    # make sure each item in the list is a valid cms node name
    # or possibly a shortcut like T3

    for site in blackList:
        try:
            Lexicon.cmsname(site)
        except Exception, text:
            msg = "ERROR in GRID.se_black_list: %s\n" % blackList
            msg += "%s\n'%s' is not a valid Phedex Node Name" % (text, site)
            raise CrabException(msg)

Beispiel #11

0

Datei anzeigen

Datei: NodeNameUtils.py Projekt: belforte/CRAB2

def validateBWLists(cfg_params):
    # convert to lists for processing. But leave cfg_params
    # as strings, since this is what Crab2 code expects
    blackList = cfg_params.get("GRID.se_black_list", [] )
    if type(blackList) == type("string") :
        blackList = blackList.strip().split(',')
    whiteList = cfg_params.get("GRID.se_white_list", [] )
    if type(whiteList) == type("string") :
        whiteList = whiteList.strip().split(',')

    # make sure each item in the list is a valid cms node name
    # or possibly a shortcut like T3

    for site in blackList:
        try:
            Lexicon.cmsname(site)
        except Exception, text:
            msg = "ERROR in GRID.se_black_list: %s\n" % blackList
            msg += "%s\n'%s' is not a valid Phedex Node Name" % (text,site)
            raise CrabException(msg)

Beispiel #12

0

Datei anzeigen

    def reset_lex(self):
        self.evolution_steps = 0
        self.evolution_steps_label['text'] = self.evolution_steps

        self.lexicon = Lexicon(self.lexicon_size(),
                               phones=self.n_symbols(),
                               frequency_groups=self.lexicon.frequency_groups,
                               hard_max_length=self.lexicon.hard_max_length,
                               hard_start_length=self.hard_word_length())

        # figure out minimum length needed for whole lexicon
        total_possible_forms = 0
        for i in range(1, self.lexicon.hard_max_length):
            total_possible_forms += self.n_symbols()**i
            if total_possible_forms > len(self.lexicon):
                break
        self.min_len_needed[
            'text'] = 'max length needed for lexicon: {0}'.format(i)
        self.possible_forms['text'] = 'possible forms at length: {0}'.format(
            total_possible_forms)

        # zipf
        self.plot_3.cla()
        sorted_unig = sorted([w.unigram for w in self.lexicon.words])
        self.plot_3.set_xlim(0, self.lexicon.hard_max_length)
        self.plot_3.set_ylim(sorted_unig[0] - 1, sorted_unig[-1] + 1)
        self.plot_3.set_title('word length and unigram word information')

        lengths, unigrams = zip(*self.lexicon.lengths_and_unigrams())
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            lengths, unigrams)
        unig_pred = intercept + (slope *
                                 np.arange(self.lexicon.hard_max_length))

        self.zipf_scatter, = self.plot_3.plot(lengths, unigrams, 'o')
        self.zipf_line, = self.plot_3.plot(
            np.arange(self.lexicon.hard_max_length), unig_pred)

        self.update()

Beispiel #13

0

Datei anzeigen

def init_pomdp_dialog_agent(args) :
    print "Reading in Ontology"
    ont = Ontology.Ontology(args[1])
    print "predicates: " + str(ont.preds)
    print "types: " + str(ont.types)
    print "entries: " + str(ont.entries)

    print "Reading in Lexicon"
    lex = Lexicon.Lexicon(ont, args[2])
    print "surface forms: " + str(lex.surface_forms)
    print "categories: " + str(lex.categories)
    print "semantic forms: " + str(lex.semantic_forms)
    print "entries: " + str(lex.entries)

    print "Instantiating Feature Extractor"
    f_extractor = FeatureExtractor.FeatureExtractor(ont, lex)

    print "Instantiating Linear Learner"
    learner = LinearLearner.LinearLearner(ont, lex, f_extractor)

    print "Instantiating KBGrounder"
    grounder = KBGrounder.KBGrounder(ont)

    load_models_from_file = False
    if len(args) > 4 :
        if args[4].lower() == 'true' :
            load_models_from_file = True

    if load_models_from_file :
        parser = load_model('pomdp_parser')
        grounder.parser = parser
        grounder.ontology = parser.ontology
    else :
        print "Instantiating Parser"
        parser = Parser.Parser(ont, lex, learner, grounder, beam_width=10)

    print "Instantiating DialogAgent"
    if load_models_from_file :
        agent = PomdpDialogAgent(parser, grounder, None, None, parse_depth=10, load_policy_from_file=True)
    else :
        agent = PomdpDialogAgent(parser, grounder, None, None, parse_depth=10, load_policy_from_file=False)

    if not load_models_from_file :
        print "reading in data and training parser from actions"
        D = agent.read_in_utterance_action_pairs(args[3])
        converged = agent.train_parser_from_utterance_action_pairs(D, epochs=10, parse_beam=30)
        print "theta: "+str(parser.learner.theta)
        save_model(parser, 'pomdp_parser')
        #print 'Parser ontology : ', parser.ontology.preds

    return agent

Beispiel #14

0

Datei anzeigen

Datei: Agent.py Projekt: lyfeinclouds/CSS610_Project

    def __init__(self,
                 polarity_strategy=None,
                 ego_involvement_strategy=None,
                 lexicon_size=20,
                 agent_vector_size=20,
                 lexicon_output_location=None):
        self.uidgen = UID()  #the unique id generator for this builder
        self.polarity_strategy = polarity_strategy  #the polarity generator for this set of agents
        self.ego_involvement_strategy = ego_involvement_strategy  #the ego-involvement parameter generator for this set of agents

        self.lex = Lexicon.Lexicon(cloudsize=lexicon_size,
                                   vector_size=agent_vector_size,
                                   filePath=lexicon_output_location)
        self.__numAgents = 0  #the size of this group of agents

Beispiel #15

0

Datei anzeigen

Datei: Tokenization.py Projekt: BenLin0/deepparser

    def newnode(self, start, count, compound=False):
        #logging.info("new node: start=" + str(start) + " count=" + str(count))
        if not self.head:
            raise RuntimeError(
                "This SentenceLinkedList is null! Can't combine.")
        if start + count > self.size:
            logging.error(self.__str__())
            raise RuntimeError("Can't get " + str(count) +
                               " items start from " + str(start) +
                               " from the sentence!")

        startnode = self.get(start)
        endnode = self.get(start + count - 1)
        p = startnode
        sons = []
        EndOffset = p.StartOffset
        NewText = ""
        NewNorm = ""
        NewAtom = ""
        hasUpperRelations = []
        for i in range(count):
            if i == 0:
                spaces = ""
            else:
                if compound:
                    spaces = "_"
                else:
                    spaces = " " * (p.StartOffset - EndOffset)
            EndOffset = p.EndOffset
            NewText += spaces + p.text
            NewNorm += spaces + p.norm
            NewAtom += spaces + p.atom
            if p.UpperRelationship and p.UpperRelationship != 'H':
                hasUpperRelations.append(
                    FeatureOntology.GetFeatureID("has" + p.UpperRelationship))
            sons.append(p)
            p = p.next

        NewNode = SentenceNode(NewText)
        NewNode.norm = NewNorm
        NewNode.atom = NewAtom
        NewNode.sons = sons
        NewNode.StartOffset = startnode.StartOffset
        NewNode.EndOffset = endnode.EndOffset
        Lexicon.ApplyWordLengthFeature(NewNode)
        for haverelation in hasUpperRelations:
            NewNode.ApplyFeature(haverelation)
        return NewNode, startnode, endnode

Beispiel #16

0

Datei anzeigen

Datei: main.py Projekt: grb2015/machine-learning

def generatePrefixDict():
    global Possibility, PrefixDict, total, Dictionary_URL
    fo = Lexicon.loadDict(Dictionary_URL)
    PrefixDict = set()
    FREQ = {}
    for line in fo.read().rstrip().split("\n"):
        word, freq = line.split(' ')[:2]
        FREQ[word] = float(freq)
        total += float(freq)  #calculate the total number of words
        for idx in range(len(word)):  #generate the prefix dictionary
            prefix = word[0:idx + 1]
            PrefixDict.add(prefix)
    fo.close()
    #Transform the freq into possibilities
    Possibility = dict(
        (key, log(value / total)) for key, value in FREQ.items())

Beispiel #17

0

Datei anzeigen

def LexicalAnalyzeTask( SubSentence, schema):

    NodeList = Tokenization.Tokenize(SubSentence)
    if not NodeList or NodeList.size == 0:
        return None, None

    Lexicon.ApplyLexiconToNodes(NodeList)
    # print("after ApplyLexiconToNodes" + OutputStringTokens_oneliner(NodeList))

    PrepareJSandJM(NodeList)
    #Lexicon.LexiconoQoCLookup(NodeList)

    NodeList, Dag, WinningRules = DynamicPipeline(NodeList, schema)
        # t = Thread(target=Cache.WriteSentenceDB, args=(SubSentence, NodeList))
        # t.start()

    return NodeList, Dag, WinningRules

Beispiel #18

0

Datei anzeigen

def init_dialog_agent(args):
    print "Reading in Ontology"
    ont = Ontology.Ontology(args[1])
    print "predicates: " + str(ont.preds)
    print "types: " + str(ont.types)
    print "entries: " + str(ont.entries)

    print "Reading in Lexicon"
    lex = Lexicon.Lexicon(ont, args[2])
    print "surface forms: " + str(lex.surface_forms)
    print "categories: " + str(lex.categories)
    print "semantic forms: " + str(lex.semantic_forms)
    print "entries: " + str(lex.entries)

    print "Instantiating Feature Extractor"
    f_extractor = FeatureExtractor.FeatureExtractor(ont, lex)

    print "Instantiating Linear Learner"
    learner = LinearLearner.LinearLearner(ont, lex, f_extractor)

    print "Instantiating KBGrounder"
    grounder = KBGrounder.KBGrounder(ont)

    print "Instantiating Parser"
    parser = Parser.Parser(ont, lex, learner, grounder, beam_width=10)
    parser = load_model('parser')
    grounder.parser = parser
    grounder.ontology = parser.ontology

    print "Instantiating DialogAgent"
    agent = PomdpDialogAgent(parser, grounder, None, None)

    #print "reading in data and training parser from actions"
    #D = agent.read_in_utterance_action_pairs(args[3])
    #converged = agent.train_parser_from_utterance_action_pairs(D, epochs=10, parse_beam=30)
    #print "theta: "+str(parser.learner.theta)
    #save_model(parser, 'parser')
    #print 'Parser ontology : ', parser.ontology.preds

    return agent

Beispiel #19

0

Datei anzeigen

    def __init__(self, args):
        print 'args = ', args, '\n\n\n\n'
        if len(args) < 4:
            print 'Usage ', args[
                0], ' ont_file lex_file parser_train_pairs_file [load_models_from_file=true/false]'

        rospy.init_node('dialog_agent_aishwarya')

        self.user_log = open(MAIN_LOG_PATH + USER_LOG_FILE, 'a')
        self.error_log = open(MAIN_LOG_PATH + MAIN_ERROR_LOG_FILE, 'a')
        self.started_users = set()

        print "reading in Ontology"
        ont = Ontology.Ontology(args[1])
        print "predicates: " + str(ont.preds)
        print "types: " + str(ont.types)
        print "entries: " + str(ont.entries)
        self.ont = ont

        print "reading in Lexicon"
        lex = Lexicon.Lexicon(ont, args[2])
        print "surface forms: " + str(lex.surface_forms)
        print "categories: " + str(lex.categories)
        print "semantic forms: " + str(lex.semantic_forms)
        print "entries: " + str(lex.entries)
        self.lex = lex

        self.parser_train_file = args[3]

        self.load_models_from_file = False
        if len(args) > 4:
            if args[4].lower() == 'true':
                print 'Going to load from file'  # DEBUG
                self.load_models_from_file = True

        self.lock = Lock()
        self.service = rospy.Service('register_user', register_user,
                                     self.on_user_receipt)

Beispiel #20

0

Datei anzeigen

Datei: main.py Projekt: thomason-jesse/tsp

import sys

sys.path.append('.')  # necessary to import local libraries
import Ontology
import Lexicon
import CKYParser

print "reading in Ontology"
ont = Ontology.Ontology(sys.argv[1])
print "predicates: " + str(ont.preds)
print "types: " + str(ont.types)
print "entries: " + str(ont.entries)

print "reading in Lexicon"
lex = Lexicon.Lexicon(ont, sys.argv[2], word_embeddings_fn=sys.argv[5])
print "surface forms: " + str(lex.surface_forms)
print "categories: " + str(lex.categories)
print "semantic forms: " + str(lex.semantic_forms)
print "entries: " + str(lex.entries)

print "instantiating CKYParser"
parser = CKYParser.CKYParser(ont, lex, lexicon_weight=1)
parser.allow_merge = False
parser.max_multiword_expression = 1
parser.max_missing_words_to_try = 0

print "reading in data and beginning training test"
d = parser.read_in_paired_utterance_semantics(sys.argv[3])
converged = parser.train_learner_on_semantic_forms(d, 20, reranker_beam=10)
if not converged:

Beispiel #21

0

Datei anzeigen

Datei: NodeNameUtils.py Projekt: belforte/CRAB2

        whiteList = whiteList.strip().split(',')

    # make sure each item in the list is a valid cms node name
    # or possibly a shortcut like T3

    for site in blackList:
        try:
            Lexicon.cmsname(site)
        except Exception, text:
            msg = "ERROR in GRID.se_black_list: %s\n" % blackList
            msg += "%s\n'%s' is not a valid Phedex Node Name" % (text,site)
            raise CrabException(msg)

    for site in whiteList:
        try:
            Lexicon.cmsname(site)
        except Exception, text:
            msg = "ERROR in GRID.se_white_list: %s\n" % whiteList
            msg += "%s\n'%s' is not a valid Phedex Node Name" % (text,site)
            raise CrabException(msg)


def parseIntoList(param):
    """
    to be used to make sure that one crab config parameter is usable as a list of strings,
    eve if it is a string with comma insides in the config. file
    """
    if type(param) == type("string") :
        list = param.split(',')
        for item in list:
            item = item.strip()

Beispiel #22

0

Datei anzeigen

class EvolGUI():
    def __init__(self, lexicon):
        self.lexicon = lexicon

        # each `root` object is a window
        # open one for each figure and all the buttons/sliders
        self.root = tk.Tk()
        self.root.title("Segmental Information by position")
        self.fig1 = Figure()
        self.canvas = FigureCanvasTkAgg(self.fig1, master=self.root)
        self.canvas.get_tk_widget().grid(row=0, column=0)

        self.root2 = tk.Tk()
        self.root2.title("Word Length Distribution")
        self.fig2 = Figure()
        self.canvas2 = FigureCanvasTkAgg(self.fig2, master=self.root2)
        self.canvas2.get_tk_widget().grid(row=0, column=0)

        self.root3 = tk.Tk()
        self.root3.title("Word Length and -log Word Probability")
        self.fig3 = Figure()
        self.canvas3 = FigureCanvasTkAgg(self.fig3, master=self.root3)
        self.canvas3.get_tk_widget().grid(row=0, column=0)

        self.root4 = tk.Tk()
        self.root4.title("Lexical Phoneme distribution")
        self.fig4 = Figure()
        self.canvas4 = FigureCanvasTkAgg(self.fig4, master=self.root4)
        self.canvas4.get_tk_widget().grid(row=0, column=0)

        self.root5 = tk.Tk()
        self.root5.title("Positional Entropy")
        self.fig5 = Figure()
        self.canvas5 = FigureCanvasTkAgg(self.fig5, master=self.root5)
        self.canvas5.get_tk_widget().grid(row=0, column=0)

        self.interaction_root = tk.Tk()
        self.interaction_root.title("Lexical Evolution GUI")
        button_frame = tk.Frame(self.interaction_root)
        button_frame.grid(row=0, column=0)

        self.evolution_steps = 0
        tk.Label(button_frame, text='evolution steps:').grid(row=0, column=0)
        self.evolution_steps_label = tk.Label(button_frame,
                                              text=self.evolution_steps)
        self.evolution_steps_label.grid(row=0, column=1)

        tk.Label(button_frame, text='lexicon size').grid(row=1, column=0)
        tk.Label(button_frame, text='n. symbols').grid(row=1, column=1)
        self.last_lexicon_size = len(self.lexicon)
        self.last_n_symbols = len(self.lexicon.phones)
        self.lexicon_size_text = tk.Entry(button_frame, width=6)
        self.lexicon_size_text.grid(row=2, column=0)
        self.lexicon_size_text.insert(0, str(self.last_lexicon_size))
        self.n_symbols_text = tk.Entry(button_frame, width=4)
        self.n_symbols_text.grid(row=2, column=1)
        self.n_symbols_text.insert(0, str(self.last_n_symbols))

        # figure out minimum length needed for whole lexicon
        total_possible_forms = 0
        for i in range(1, self.lexicon.hard_max_length):
            total_possible_forms += self.n_symbols()**i
            if total_possible_forms > len(self.lexicon):
                break

        self.min_len_needed = tk.Label(
            button_frame, text='max length needed for lexicon: {0}'.format(i))
        self.min_len_needed.grid(row=3, column=0)
        self.possible_forms = tk.Label(
            button_frame,
            text='possible forms at length: {0}'.format(total_possible_forms))
        self.possible_forms.grid(row=3, column=1)

        tk.Label(button_frame,
                 text='starting word length (-1 for Zipfian)').grid(row=4,
                                                                    column=0)
        self.last_hard_word_length = 6
        self.hard_word_length_text = tk.Entry(button_frame, width=4)
        self.hard_word_length_text.grid(row=4, column=1)
        self.hard_word_length_text.insert(0, str(self.last_hard_word_length))

        slider_frame = tk.Frame(self.interaction_root)
        slider_frame.grid(row=0, column=1)

        tk.Label(slider_frame, text='lexicon name').grid(row=0, column=1)
        self.last_lex_name = ''
        self.lex_name_text = tk.Entry(slider_frame, width=6)
        self.lex_name_text.grid(row=1, column=1)
        self.lex_name_text.insert(0, str(self.last_lex_name))

        self.merger_p_slider = tk.Scale(slider_frame,
                                        from_=0,
                                        to=100,
                                        orient=tk.HORIZONTAL,
                                        label='merger prob.')
        self.merger_p_slider.grid(row=2, column=1)
        self.merger_p_slider.set(85)

        # determine skew in distribution for merger
        tk.Label(slider_frame, text='phone. dist. E').grid(row=3, column=1)
        self.last_symbol_E = 1.5
        self.symbol_E_text = tk.Entry(slider_frame, width=4)
        self.symbol_E_text.grid(row=4, column=1)
        self.symbol_E_text.insert(0, str(self.last_symbol_E))

        # skew in distribution for chosing word to undergo merger/deletion
        tk.Label(slider_frame, text='word E').grid(row=3, column=0)
        self.last_word_E = 1.
        self.word_E_text = tk.Entry(slider_frame, width=4)
        self.word_E_text.grid(row=4, column=0)
        self.word_E_text.insert(0, str(self.last_word_E))

        # skew in distribution for chosing segment in word which will undergo merger/deletion
        tk.Label(slider_frame, text='segment E').grid(row=3, column=2)
        self.last_segment_E = 1.5
        self.segment_E_text = tk.Entry(slider_frame, width=4)
        self.segment_E_text.grid(row=4, column=2)
        self.segment_E_text.insert(0, str(self.last_segment_E))

        nb_steps = 1000
        # prepare the buttons
        tk.Button(button_frame, text="One Step",
                  command=self.step).grid(row=5, column=0)
        tk.Button(button_frame,
                  text="{0} Steps".format(nb_steps),
                  command=lambda: self.step(nb_steps)).grid(row=5, column=1)
        tk.Button(button_frame, text="Reset Lexicon",
                  command=self.reset_lex).grid(row=6, column=0)
        tk.Button(button_frame, text="Quit", command=sys.exit).grid(row=6,
                                                                    column=1)
        tk.Button(button_frame,
                  text='Save Lexicon',
                  command=lambda: self.lexicon.save('lex_{0}_{1}.txt'.format(
                      self.lex_name_text.get(), self.evolution_steps))).grid(
                          row=7, column=0)
        tk.Button(button_frame, text='Save Plots',
                  command=self.save_plots).grid(row=7, column=1)

        # prepare the line graph
        self.plot_1 = self.fig1.subplots()

        max_si = 0
        self.avg_si_lines = []
        if self.lexicon.frequency_groups == 2:
            line_labels = ['high frequency', 'low frequency']
        else:
            line_labels = [
                'group {0}'.format(i + 1)
                for i in range(self.lexicon.frequency_groups)
            ]

        for i in range(self.lexicon.frequency_groups):
            x = np.arange(self.lexicon.hard_max_length) + 1
            avg_si = self.lexicon.avg_segmental_info(which_group=i + 1)
            new_line, = self.plot_1.plot(x,
                                         avg_si,
                                         color=_colors[i],
                                         label=line_labels[i])
            self.avg_si_lines.append(new_line)
            max_si = max(max_si, max(avg_si))

        self.plot_1.set_xlim(1, self.lexicon.hard_max_length)
        for y_lim in range(5, 50, 5):
            if y_lim > max_si: break
        self.plot_1.set_ylim(-.5, y_lim)

        self.plot_1.legend(handles=self.avg_si_lines)
        self.plot_1.set_xlabel('seg. position')
        self.plot_1.set_ylabel('mean seg. info.')

        #self.plot_1.set_title('avg. seg info')

        # prep the word length histogram
        self.plot_2 = self.fig2.subplots()

        hist_data = [
            self.lexicon.word_lengths(i + 1)
            for i in range(self.lexicon.frequency_groups)
        ]
        self.wl_hist = self.plot_2.hist(
            hist_data,
            range=(1, self.lexicon.hard_max_length),
            stacked=False,
            color=_colors[:self.lexicon.frequency_groups])
        self.plot_2.set_xlabel('word length')
        self.plot_2.set_ylabel('count')

        # zipf!
        sorted_unig = sorted([w.unigram for w in self.lexicon.words])
        self.plot_3 = self.fig3.subplots()
        self.plot_3.set_xlim(0, self.lexicon.hard_max_length)
        self.plot_3.set_ylim(sorted_unig[0] - 1, sorted_unig[-1] + 1)
        self.plot_3.set_xlabel('word length')
        self.plot_3.set_ylabel('-log word prob.')
        #self.plot_3.set_title('word length and unigram word information')

        lengths, unigrams = zip(*self.lexicon.lengths_and_unigrams())
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            lengths, unigrams)
        unig_pred = intercept + (slope *
                                 np.arange(self.lexicon.hard_max_length))

        self.zipf_scatter, = self.plot_3.plot(lengths, unigrams, 'o')
        self.zipf_line, = self.plot_3.plot(
            np.arange(self.lexicon.hard_max_length), unig_pred)

        # phoneme distribution
        """
		self.plot_4 = self.fig4.subplots(2)
		ks, ps =  p_dist_to_lists(self.lexicon.seg_ps, sort_by_keys = True)
		self.phoneme_dist_bars = self.plot_4[0].bar(np.arange(len(ks)), ps, color = _colors[-3])
		self.plot_4[0].set_ylim(0,.75 if max(ps) < .75 else 1)
		self.plot_4[0].set_xticks(np.arange(len(ks)))
		self.plot_4[0].set_xticklabels(ks)
		self.plot_4[0].set_title('seg. distribution in lexicon')

		# segmental entropy
		self.edge_ent_bars = []
		for i, edge_ent in enumerate(self.lexicon.edge_entropies()):
			new_bar = self.plot_4[1].bar((i * .5) + (1.5 * np.arange(self.lexicon.frequency_groups)), 
				edge_ent, color = _colors[-2+i], width = .5)
			self.edge_ent_bars.append(new_bar)
		
		self.plot_4[1].set_xticks(.25 + np.arange(self.lexicon.frequency_groups) * 1.5)
		if self.lexicon.frequency_groups == 2:
			self.plot_4[1].set_xticklabels(['high frequency', 'low frequency'])
		else:
			self.plot_4[1].set_xticklabels(['group {0}'.format(i + 1) for i in range(self.lexicon.frequency_groups)])
		self.plot_4[1].set_title('seg. entropy - first/last segment')		
		self.plot_4[1].legend(labels = ['first', 'last'])
		"""

        self.plot_4 = self.fig4.subplots()
        ks, ps = p_dist_to_lists(self.lexicon.seg_ps, sort_by_keys=True)
        self.phoneme_dist_bars = self.plot_4.bar(np.arange(len(ks)),
                                                 ps,
                                                 color=_colors[-3])
        self.plot_4.set_ylim(0, .75 if max(ps) < .75 else 1)
        self.plot_4.set_xticks(np.arange(len(ks)))
        self.plot_4.set_xticklabels(ks)
        self.plot_4.set_title('seg. distribution in lexicon')

        # positional entropy
        self.plot_5 = self.fig5.subplots()
        self.pos_ent_lines = []
        max_pe = 0
        # could use this from above but this makes it easier to read...
        if self.lexicon.frequency_groups == 2:
            line_labels = ['high frequency', 'low frequency']
        else:
            line_labels = [
                'group {0}'.format(i + 1)
                for i in range(self.lexicon.frequency_groups)
            ]

        for i in range(self.lexicon.frequency_groups):
            x = np.arange(self.lexicon.hard_max_length) + 1
            pos_ent = self.lexicon.positional_entropy(which_group=i + 1)
            new_line, = self.plot_5.plot(x,
                                         pos_ent,
                                         color=_colors[i],
                                         label=line_labels[i])
            self.pos_ent_lines.append(new_line)
            max_pe = max(max_pe, max(pos_ent))

        self.plot_5.set_xlim(1, self.lexicon.hard_max_length)
        for y_lim in range(5, 30, 3):
            if y_lim > max_pe: break
        self.plot_5.set_ylim(-.5, y_lim)

        self.plot_5.legend(handles=self.pos_ent_lines)

        tk.mainloop()

    def save_plots(self):
        self.fig1.savefig('plts/plt_1_iter_{0}.png'.format(
            self.evolution_steps))
        self.fig2.savefig('plts/plt_2_iter_{0}.png'.format(
            self.evolution_steps))
        self.fig3.savefig('plts/plt_3_iter_{0}.png'.format(
            self.evolution_steps))
        self.fig4.savefig('plts/plt_4_iter_{0}.png'.format(
            self.evolution_steps))
        self.fig5.savefig('plts/plt_5_iter_{0}.png'.format(
            self.evolution_steps))

    def update(self):
        # put all the updating in try/catch in case someone closes a window
        try:
            # update the plot with new data
            max_si = 0
            for i, line in enumerate(self.avg_si_lines):
                avg_si = self.lexicon.avg_segmental_info(which_group=i + 1)
                line.set_ydata(avg_si)
                max_si = max(max_si, max(avg_si))

            for y_lim in range(5, 50, 5):
                if y_lim > max_si: break
            self.plot_1.set_ylim(-.5, y_lim)
            self.canvas.draw()
        except:
            pass
            #print('unable to update plot 1')

        # word length histogram
        try:
            self.plot_2.cla()
            hist_data = [
                self.lexicon.word_lengths(i + 1)
                for i in range(self.lexicon.frequency_groups)
            ]
            self.wl_hist = self.plot_2.hist(
                hist_data,
                range=(1, self.lexicon.hard_max_length),
                stacked=False,
                color=_colors[:self.lexicon.frequency_groups])
            self.plot_2.set_title('word lengths')
            self.canvas2.draw()
        except:
            pass
            #print('unable to update plot 2')

        try:
            # zipf scatter
            lengths, unigrams = zip(*self.lexicon.lengths_and_unigrams())
            slope, intercept, r_value, p_value, std_err = stats.linregress(
                lengths, unigrams)
            unig_pred = intercept + (slope *
                                     np.arange(self.lexicon.hard_max_length))

            sorted_unig = sorted([w.unigram for w in self.lexicon.words])
            self.plot_3.set_ylim(sorted_unig[0] - 1, sorted_unig[-1] + 1)
            self.zipf_scatter.set_xdata(lengths)
            self.zipf_line.set_ydata(unig_pred)
            self.canvas3.draw()
        except:
            pass
            #print('unable to update plot 3')

        try:
            """
			ks, ps =  p_dist_to_lists(self.lexicon.seg_ps, sort_by_keys = True)
			self.plot_4[0].cla()
			self.phoneme_dist_bars = self.plot_4[0].bar(np.arange(len(ks)), ps, color = _colors[-3])
			self.plot_4[0].set_ylim(0,.75 if max(ps) < .75 else 1)
			self.plot_4[0].set_xticks(np.arange(len(ks)))
			self.plot_4[0].set_xticklabels(ks)
			self.plot_4[0].set_title('seg. distribution in lexicon')

		# first/last seg info
			self.plot_4[1].cla()
			for i, edge_ent in enumerate(self.lexicon.edge_entropies()):
				new_bar = self.plot_4[1].bar((i * .5) + (1.5 * np.arange(self.lexicon.frequency_groups)), 
					edge_ent, color = _colors[-2+i], width = .5)
				self.edge_ent_bars.append(new_bar)
			self.plot_4[1].set_xticks(.25 + np.arange(self.lexicon.frequency_groups) * 1.5)
			if self.lexicon.frequency_groups == 2:
				self.plot_4[1].set_xticklabels(['high frequency', 'low frequency'])
			else:
				self.plot_4[1].set_xticklabels(['group {0}'.format(i + 1) for i in range(self.lexicon.frequency_groups)])
			self.plot_4[1].set_title('avg. information - first/last segment')
			self.plot_4[1].legend(labels = ['first', 'last'])
			self.canvas4.draw()
			"""
            ks, ps = p_dist_to_lists(self.lexicon.seg_ps, sort_by_keys=True)
            self.plot_4.cla()
            self.phoneme_dist_bars = self.plot_4.bar(np.arange(len(ks)),
                                                     ps,
                                                     color=_colors[-3])
            self.plot_4.set_ylim(0, .75 if max(ps) < .75 else 1)
            self.plot_4.set_xticks(np.arange(len(ks)))
            self.plot_4.set_xticklabels(ks)
            self.plot_4.set_title('seg. distribution in lexicon')
            self.canvas4.draw()
        except:
            pass
            #print('unable to update plot 4')

        try:
            max_pe = 0
            for i, line in enumerate(self.pos_ent_lines):
                pos_ent = self.lexicon.positional_entropy(which_group=i + 1)
                line.set_ydata(pos_ent)
                max_si = max(max_pe, max(pos_ent))

            for y_lim in range(5, 30, 3):
                if y_lim > max_pe: break
            self.plot_5.set_ylim(-.5, y_lim)
            self.canvas5.draw()

        except:
            pass
            #print('unable to update plot 5')

    def step(self, n_steps=1):
        if self.evolution_steps == 0:
            self.lexicon.save('lex_{0}_{1}.txt'.format(
                self.lex_name_text.get(), self.evolution_steps))

        for i in range(n_steps):
            self.evolution_steps += 1
            self.evolution_steps_label['text'] = self.evolution_steps
            self.lexicon.change_segs(word_E=self.word_E(),
                                     symbol_E=self.symbol_E(),
                                     merger_p=self.merger_p())
            print('step: {0} - total steps: {1}'.format(
                i + 1, self.evolution_steps))
            if self.evolution_steps % 25 == 0:
                self.update()
                self.lexicon.save('lex_{0}_{1}.txt'.format(
                    self.lex_name_text.get(), self.evolution_steps))

        for i, w in enumerate(self.lexicon.words[:10]):
            print(i, w, w.frequency)

        self.update()
        self.lexicon.save('lex_{0}_{1}.txt'.format(self.lex_name_text.get(),
                                                   self.evolution_steps))

    def reset_lex(self):
        self.evolution_steps = 0
        self.evolution_steps_label['text'] = self.evolution_steps

        self.lexicon = Lexicon(self.lexicon_size(),
                               phones=self.n_symbols(),
                               frequency_groups=self.lexicon.frequency_groups,
                               hard_max_length=self.lexicon.hard_max_length,
                               hard_start_length=self.hard_word_length())

        # figure out minimum length needed for whole lexicon
        total_possible_forms = 0
        for i in range(1, self.lexicon.hard_max_length):
            total_possible_forms += self.n_symbols()**i
            if total_possible_forms > len(self.lexicon):
                break
        self.min_len_needed[
            'text'] = 'max length needed for lexicon: {0}'.format(i)
        self.possible_forms['text'] = 'possible forms at length: {0}'.format(
            total_possible_forms)

        # zipf
        self.plot_3.cla()
        sorted_unig = sorted([w.unigram for w in self.lexicon.words])
        self.plot_3.set_xlim(0, self.lexicon.hard_max_length)
        self.plot_3.set_ylim(sorted_unig[0] - 1, sorted_unig[-1] + 1)
        self.plot_3.set_title('word length and unigram word information')

        lengths, unigrams = zip(*self.lexicon.lengths_and_unigrams())
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            lengths, unigrams)
        unig_pred = intercept + (slope *
                                 np.arange(self.lexicon.hard_max_length))

        self.zipf_scatter, = self.plot_3.plot(lengths, unigrams, 'o')
        self.zipf_line, = self.plot_3.plot(
            np.arange(self.lexicon.hard_max_length), unig_pred)

        self.update()

    def merger_p(self):
        return self.merger_p_slider.get() / 100

    def symbol_E(self):
        try:
            symbol_E = float(self.symbol_E_text.get())
        except:
            symbol_E = self.last_symbol_E
        self.last_symbol_E = symbol_E
        return symbol_E

    def word_E(self):
        try:
            word_E = int(self.word_E_text.get())
        except:
            word_E = self.last_word_E
        self.last_word_E = word_E
        return word_E

    def lexicon_size(self):
        try:
            lexicon_size = int(self.lexicon_size_text.get())
        except:
            symbol_E = self.last_lexicon_size
        self.last_lexicon_size = lexicon_size
        return lexicon_size

    def n_symbols(self):
        try:
            n_symbols = int(self.n_symbols_text.get())
        except:
            n_symbols = self.last_n_symbols
        self.last_n_symbols = n_symbols
        return n_symbols

    def hard_word_length(self):
        try:
            hard_word_length = int(self.hard_word_length_text.get())
        except:
            hard_word_length = self.last_hard_word_length
        self.last_hard_word_length = hard_word_length
        if hard_word_length < 0:
            hard_word_length = None
        return hard_word_length

Beispiel #23

0

Datei anzeigen

import sys

sys.path.append('.')  # necessary to import local libraries
import Ontology
import Lexicon
import CKYParser

print "reading in Ontology"
ont = Ontology.Ontology(sys.argv[1])
commutative_idxs = [ont.preds.index('and'), ont.preds.index('or')]
print "predicates: " + str(ont.preds)
print "types: " + str(ont.types)
print "entries: " + str(ont.entries)

print "reading in Lexicon"
lex = Lexicon.Lexicon(ont, sys.argv[2])
print "surface forms: " + str(lex.surface_forms)
print "categories: " + str(lex.categories)
print "semantic forms: " + str(lex.semantic_forms)
print "entries: " + str(lex.entries)

print "instantiating CKYParser"
parser = CKYParser.CKYParser(ont, lex, use_language_model=True)

print "reading in data and beginning training test"
d = parser.read_in_paired_utterance_semantics(sys.argv[3])
converged = parser.train_learner_on_semantic_forms(d, 10, reranker_beam=10)
if not converged:
    raise AssertionError("Training failed to converge to correct values.")

print "reading in data and beginning evaluation test"

Beispiel #24

0

Datei anzeigen

def LoadCommon():
    if not utils.DisableDB:
        InitDB()

        import Cache
        Cache.LoadSentenceDB()

    PipeLineLocation = ParserConfig.get("main", "Pipelinefile")
    FILE_ABS_PATH = os.path.dirname(os.path.abspath(__file__))
    XLocation = FILE_ABS_PATH  + '/' + os.path.dirname(PipeLineLocation) + "/"
    #XLocation = os.path.dirname(PipeLineLocation) + "/"

    FeaturefileLocation = os.path.join(XLocation, "../Y/feature.txt")
    GlobalmacroLocation = os.path.join(XLocation, "../Y/GlobalMacro.txt")
    # PunctuatefileLocation = os.path.join(XLocation, "../Y/LexY-EnglishPunctuate.txt")


    FeatureOntology.LoadFeatureOntology(FeaturefileLocation)
    systemfileolderthanDB = SystemFileOlderThanDB(XLocation)

    LoadPipeline(PipeLineLocation)

    if logging.root.isEnabledFor(logging.DEBUG):
        logging.debug("Runtype:" + ParserConfig.get("main", "runtype"))
    if logging.root.isEnabledFor(logging.DEBUG):
        logging.debug("utils.Runtype:" + utils.ParserConfig.get("main", "runtype"))

    Rules.LoadGlobalMacro(GlobalmacroLocation)


    # Lexicon.LoadLexicon(PunctuatefileLocation)

    for action in PipeLine:
        if action.startswith("FSA "):
            Rulefile = action[3:].strip()
            Rules.LoadRules(XLocation, Rulefile,systemfileolderthanDB)

        if action.startswith("DAGFSA "):
            Rulefile = action[6:].strip()
            Rules.LoadRules(XLocation, Rulefile,systemfileolderthanDB)

        if action.startswith("DAGFSA_APP "): #FUZZY
            Rulefile = action[10:].strip()
            Rules.LoadRules(XLocation, Rulefile,systemfileolderthanDB, fuzzy=True)

        if action.startswith("Lookup Spelling:"):
            Spellfile = action[action.index(":")+1:].strip().split(",")
            for spell in Spellfile:
                spell = spell.strip()
                if spell:
                    Lexicon.LoadExtraReference(XLocation + spell, Lexicon._LexiconCuobieziDict)

        if action.startswith("Lookup Encoding:"):
            Encodefile = action[action.index(":")+1:].strip().split(",")
            for encode in Encodefile:
                encode = encode.strip()
                if encode:
                    Lexicon.LoadExtraReference(XLocation + encode, Lexicon._LexiconFantiDict)

        if action.startswith("Lookup Main:"):
            Mainfile = action[action.index(":")+1:].strip().split(",")
            for main in Mainfile:
                main = main.strip()
                if main:
                    Lexicon.LoadMainLexicon(XLocation + main)

        if action.startswith("Lookup SegmentSlash:"):
            Slashfile = action[action.index(":")+1:].strip().split(",")
            for slash in Slashfile:
                slash = slash.strip()
                if slash:
                    Lexicon.LoadSegmentSlash(XLocation + slash)

        if action.startswith("Lookup Lex:"):
            Lexfile = action[action.index(":")+1:].strip().split(",")
            for lex in Lexfile:
                lex = lex.strip()
                if lex:
                    Lexicon.LoadLexicon(XLocation + lex)

        # (O.O)
        if action.startswith("Stemming:"):
            Stemfile = action[action.index(":") + 1:].strip().split(",")
            inf = Stemfile[0].strip()
            Rules.LoadRules(XLocation, inf, systemfileolderthanDB)
            Lexicon.LoadSuffix(XLocation + inf, inf)
            for stem in Stemfile[1:]:
                stem = stem.strip()
                if stem:
                    Lexicon.LoadLexicon(XLocation + stem, lookupSource=LexiconLookupSource.stemming)

        if action.startswith("Lookup Compound:"):
            Compoundfile = action[action.index(":")+1:].strip().split(",")
            for compound in Compoundfile:
                compound = compound.strip()
                if compound:
                    Lexicon.LoadLexicon(XLocation + compound, lookupSource=LexiconLookupSource.Compound)

        if action.startswith("Lookup defLex:"):
            Compoundfile = action[action.index(":")+1:].strip().split(",")
            for compound in Compoundfile:
                compound = compound.strip()
                if compound:
                    Lexicon.LoadLexicon(XLocation + compound, lookupSource=LexiconLookupSource.defLex)

        if action.startswith("Lookup External:"):
            Externalfile = action[action.index(":")+1:].strip().split(",")
            for external in Externalfile:
                external = external.strip()
                if external:
                    Lexicon.LoadLexicon(XLocation + external,lookupSource=LexiconLookupSource.External)

        if action.startswith("Lookup oQcQ:"):
            oQoCfile = action[action.index(":")+1:].strip().split(",")
            for oQoC in oQoCfile:
                oQoC = oQoC.strip()
                if oQoC:
                    Lexicon.LoadLexicon(XLocation + oQoC,lookupSource=LexiconLookupSource.oQcQ)

        if action.startswith("Lookup IE:"):
            compositefile = action[action.index(":")+1:].strip().split(",")
            for composite in compositefile:
                comp = composite.strip()
                if comp:
                    Lexicon.LoadCompositeKG(XLocation + comp)

    Lexicon.LoadSegmentLexicon()
    UpdateSystemFileFromDB(XLocation)

    if not utils.DisableDB:
        CloseDB(utils.DBCon)
    if ParserConfig.get("main", "runtype") == "Debug":
        logging.debug("Start writing temporary rule files")
        Rules.OutputRuleFiles(ParserConfig.get("main", "compiledfolder"))
        FeatureOntology.OutputFeatureOntologyFile(ParserConfig.get("main", "compiledfolder"))
        logging.debug("Start writing temporary lex file.")
        #Lexicon.OutputLexiconFile(ParserConfig.get("main", "compiledfolder"))


    #Rules._PreProcess_RuleIDNormalize()
    logging.debug("Done of LoadCommon!")

Beispiel #25

0

Datei anzeigen

            except KeyError:
                index = self.labels_dict['O']
            labels_list.append(index)

        return labels_list

    def show_data_info(self):
        """
        :return: None
        显示Data对象的信息, 包括句子最大长度, 单个句子中words的最大长度, 输入文件中句子数目
        """
        print('Data信息:')
        print('句子最大长度:', self.properties['max_sentence_length'])
        print('单个句子中最大单词个数:', self.properties['max_words_number'])
        print('句子数目:', self.properties['sentence_number'])


if __name__ == '__main__':
    tic = time.time()
    print('测试Data类...')
    lex = Lexicon.Lexicon()
    print(1)
    path = r'NERData\MSRA\msra_train_bio.txt'
    data = Data(path, lex)
    data.show_data_info()
    print(data.chars_dict, data.labels_dict, sep='\n')
    for i in data.data[0]:
        print(i)
    toc = time.time()
    print('运行时间:', toc - tic)

Beispiel #26

0

Datei anzeigen

Datei: main.py Projekt: thomason-jesse/tsp

def main():

    # Load parameters from command line.
    ontology_fn = FLAGS_ontology_fn
    lexicon_fn = FLAGS_lexicon_fn
    train_pairs_fn = FLAGS_train_pairs_fn
    model_fn = FLAGS_model_fn
    validation_pairs_fn = FLAGS_validation_pairs_fn
    lexicon_embeddings = FLAGS_lexicon_embeddings
    max_epochs = FLAGS_max_epochs
    epochs_between_validations = FLAGS_epochs_between_validations
    lexicon_weight = FLAGS_lexicon_weight
    allow_merge = True if FLAGS_allow_merge == 1 else False
    perform_type_raising = True if FLAGS_perform_type_raising == 1 else False
    verbose = FLAGS_verbose
    use_condor = True if FLAGS_use_condor == 1 else False
    condor_target_dir = FLAGS_condor_target_dir
    condor_script_dir = FLAGS_condor_script_dir
    assert validation_pairs_fn is None or max_epochs >= epochs_between_validations
    assert not use_condor or (condor_target_dir is not None
                              and condor_script_dir is not None)
    assert max_epochs >= 0 or train_pairs_fn is not None

    o = Ontology.Ontology(ontology_fn)
    l = Lexicon.Lexicon(
        o,
        lexicon_fn,
        word_embeddings_fn=lexicon_embeddings,
    )
    p = CKYParser.CKYParser(o,
                            l,
                            allow_merge=allow_merge,
                            lexicon_weight=lexicon_weight,
                            perform_type_raising=perform_type_raising)

    # hyperparameter adjustments
    p.max_multiword_expression = 1
    p.max_missing_words_to_try = 0  # basically disallows polysemy that isn't already present in lexicon

    # Train the parser one epoch at a time, examining validation performance between each epoch.
    if max_epochs > 0:
        train_data = p.read_in_paired_utterance_semantics(train_pairs_fn)
        val_data = p.read_in_paired_utterance_semantics(validation_pairs_fn) \
            if validation_pairs_fn is not None else None
        print "finished instantiating parser; beginning training"
        for epoch in range(0, max_epochs, epochs_between_validations):
            if val_data is not None:
                acc_at_1 = get_performance_on_pairs(p, val_data)
                print "validation accuracy at 1 for epoch " + str(
                    epoch) + ": " + str(acc_at_1)
            converged = p.train_learner_on_semantic_forms(
                train_data,
                epochs=epochs_between_validations,
                epoch_offset=epoch,
                reranker_beam=1,
                verbose=verbose,
                use_condor=use_condor,
                condor_target_dir=condor_target_dir,
                condor_script_dir=condor_script_dir)
            if converged:
                print "training converged after epoch " + str(epoch)
                break
        if val_data is not None:
            acc_at_1 = get_performance_on_pairs(p, val_data)
            print "validation accuracy at 1 at training stop: " + str(acc_at_1)

    # Write the parser to file.
    print "writing trained parser to file..."
    with open(model_fn, 'wb') as f:
        pickle.dump(p, f)
    print "... done"

Beispiel #27

0

Datei anzeigen

Datei: main.py Projekt: AdamKing11/Lex_Evol

from Lexicon import *
from GUI import *

if __name__ == '__main__':
    print('\n\n\n')
    n_words = 1000
    n_phones = 10
    #phones = {'a' : 10, 'b' : 5, 'c' : 5, 'd' : 1}
    l = Lexicon(n_words,
                phones=n_phones,
                frequency_groups=2,
                hard_start_length=6)

    EvolGUI(l)

Beispiel #28

0

Datei anzeigen

def DynamicPipeline(NodeList, schema):
    WinningRules = {}
    Dag = DependencyTree.DependencyTree()


    for action in PipeLine:
        if action == "segmentation":
            continue
        if action == "apply lexicons":
            continue

        if action == "SEGMENTATION COMPLETE" and schema == "segonly":
            break
        if action == "SHALLOW COMPLETE" and schema == "shallowcomplete":
            break

        #applies caseab, caseAb, caseaB, or caseAB
        if action == "CASES":
            Lexicon.ApplyCasesToNodes(NodeList)

        if action.startswith("FSA "):
            Rulefile = action[3:].strip()
            WinningRules.update(MatchAndApplyRuleFile(NodeList, Rulefile))
            # if NodeList:
            #     logging.debug(NodeList.root(True).CleanOutput(KeepOriginFeature=True).toJSON())

        # if action.startswith("lookup"):
        #     lookupSourceName = action[6:].strip()
        #     for x in LexiconLookupSource:
        #         if x.name == lookupSourceName:
        #             Lexicon.LexiconLookup(NodeList, x)
        #
        # if action == "APPLY COMPOSITE KG":
        #     Lexicon.ApplyCompositeKG(NodeList)

        if action.startswith("Lookup defLex:") or action.startswith("Lookup External:") \
                or action.startswith("Lookup oQcQ") or action.startswith("Lookup Compound:"):
            lookupSourceName = action[6:action.index(":")].strip()
            for x in LexiconLookupSource:
                if x.name == lookupSourceName:
                    Lexicon.LexiconLookup(NodeList, x)

        if action.startswith("Lookup IE"):
            Lexicon.ApplyCompositeKG(NodeList)
        #
        # if action == "TRANSFORM DAG":
        #     Dag.transform(NodeList)
        #     logging.info("Dag:{}".format(Dag))

        if action.startswith("DAGFSA "):
            if len(Dag.nodes) == 0:
                try:
                    Dag.transform(NodeList)
                except Exception as e:
                    logging.error("Failed to transfer the NodeList to Dag due to:\n{}".format(e))
                    return NodeList, Dag, WinningRules
            Rulefile = action[7:].strip()
            WinningRules.update(MatchAndApplyDagRuleFile(Dag, Rulefile))

        if action.startswith("DAGFSA_APP "):
            if len(Dag.nodes) == 0:
                try:
                    Dag.transform(NodeList)
                except Exception as e:
                    logging.error("Failed to transfer the NodeList to Dag due to:\n{}".format(e))
                    return NodeList, Dag, WinningRules
            Rulefile = action[10:].strip()
            WinningRules.update(MatchAndApplyDagRuleFile(Dag, Rulefile))

    return NodeList, Dag, WinningRules

Beispiel #29

0

Datei anzeigen

Datei: DBS3SetDatasetLocation.py Projekt: belforte/CRAB2

        for suffix in ['DBSReader', 'DBSWriter']:
            if url.endswith(suffix):
                url=url[0:-len(suffix)]
        readUrl  = url + 'DBSReader'
        writeUrl = url + 'DBSWriter'

    readApi  = DbsApi(url=readUrl)
    writeApi = DbsApi(url=writeUrl)

    dataset = options.dataset
    if options.new_location:
        new_location = options.new_location

    ###sanitize input
    # dataset name
    Lexicon.dataset(dataset)
    
    # PNN
    if new_location:
        Lexicon.cmsname(new_location)

    # process dataset by blocks

    blockDicts = readApi.listBlocks(dataset=dataset, detail=True)
    for block in blockDicts:
        blName = block['block_name']
        location = block['origin_site_name']
        logging.debug('block %s at location: %s' % (blName, location))
        if new_location:
            writeApi.updateBlockSiteName(block_name=blName, origin_site_name=new_location)
            logging.debug('location set to %s' % (new_location))

Beispiel #30

0

Datei anzeigen

    def transform(self, nodelist):  #Transform from SentenceLinkedList to Depen
        if logging.root.isEnabledFor(logging.DEBUG):
            logging.debug("Start to transform:\n {}".format(
                jsonpickle.dumps(nodelist)))
        self.fulltext = nodelist.root().text
        self.fullnorm = nodelist.root().norm
        self.fullatom = nodelist.root().atom
        root = nodelist.head
        if root.text == '' and utils.FeatureID_JS in root.features:
            root = root.next  #ignore the first empty (virtual) JS node

        temp_subgraphs = []
        # Collect all the leaf nodes into self.nodes.
        while root is not None:
            #each "root" has a tree, independent from others.
            node = root
            nodestack = set()
            while node:
                if node.sons:
                    if len(node.sons) == 2 and len(node.text) == 2 and len(
                            node.sons[0].text) == 1 and len(
                                node.sons[1].text) == 1:
                        DanziDict.update({node: node.sons})
                    if node.next:
                        nodestack.add(node.next)
                    node = node.sons[0]
                else:
                    if not (node.text == ''
                            and utils.FeatureID_JM in node.features):
                        self.nodes.update({node.ID: copy.deepcopy(node)
                                           })  # add leaf node to self.nodes.

                    if node == root:  #if node is in root level, don't get next.
                        if nodestack:
                            node = nodestack.pop()
                        else:
                            node = None
                        continue

                    node = node.next
                    if node is None and nodestack:
                        node = nodestack.pop()
            if not (root.text == '' and utils.FeatureID_JM in root.features):
                temp_subgraphs.append(SubGraph(root))
                self._roots.append(root.ID)
            root = root.next

        #filling up the subgraphs.
        while temp_subgraphs:
            subgraph = temp_subgraphs.pop()
            node = subgraph.startnode

            if node.sons:
                subnode = node.sons[0]
                nodestack = set()
                while subnode:
                    if subnode.sons:
                        if utils.FeatureID_H not in subnode.features:
                            temp_subgraphs.append(SubGraph(
                                subnode))  # non-leaf, non-H. it is a subgraph.
                            subgraph.leaves.append(
                                [subnode.ID, subnode.UpperRelationship])
                            subnode = subnode.next
                            if subnode is None and nodestack:
                                subnode = nodestack.pop()
                        else:
                            if subnode.next:
                                nodestack.add(subnode.next)
                            subnode = subnode.sons[0]
                    else:  # this is a leaf node.
                        #  use the copy in self.nodes to apply feature modification
                        if utils.FeatureID_H in subnode.features:
                            subgraph.headID = subnode.ID
                            self.nodes[subnode.ID].features.update(
                                subgraph.startnode.features)
                            Lexicon.ApplyWordLengthFeature(
                                self.nodes[subnode.ID])
                        else:
                            if not (subnode.text == '' and utils.FeatureID_JM
                                    in subnode.features):
                                subgraph.leaves.append(
                                    [subnode.ID, subnode.UpperRelationship])
                        subnode = subnode.next
                        if subnode is None and nodestack:
                            subnode = nodestack.pop()
            else:
                subgraph.headID = subgraph.startnode.ID

            self._subgraphs.append(subgraph)  # add to the permanent subgraphs

        # now set the roots, from the top node to the head.
        for i in range(len(self._roots)):
            if self._roots[i] not in self.nodes:
                for _subgraph in self._subgraphs:
                    if _subgraph.startnode.ID == self._roots[i]:
                        self._roots[i] = _subgraph.headID

        # now process the non-leaf, non-H points.
        # copy information to self.graph
        for subgraph in self._subgraphs:
            for relation in subgraph.leaves:
                if relation[0] not in self.nodes:
                    for _subgraph in self._subgraphs:
                        if _subgraph.startnode.ID == relation[0]:
                            relation[0] = _subgraph.headID
                            #print("The previous ID" + str(relation[0]) + " is replaced by head ID" + str(_subgraph.headID))
                            break
                self._AddEdge(relation[0], relation[1], subgraph.headID)
        index = 0
        prevnode = None
        for node in sorted(self.nodes.values(),
                           key=operator.attrgetter("StartOffset")):
            node.Index = index
            if prevnode:
                self._AddEdge(node.ID, "RIGHT", prevnode.ID)
                self._AddEdge(prevnode.ID, "LEFT", node.ID)
            prevnode = node
            index += 1

        self._MarkNext()
        self.root = self._roots[0]

        if logging.root.isEnabledFor(logging.DEBUG):
            logging.debug("End of transform:\n {}".format(self))

Beispiel #31

0

Datei anzeigen

Datei: Tokenization.py Projekt: BenLin0/deepparser


# def LoopTest2(n):
#     for _ in range(n):
#         old_Tokenize_cn('響著錄中文规则很长 very long , 为啥是不？')

if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG,
                        format='%(asctime)s [%(levelname)s] %(message)s')

    logging.info("Start")
    # import ProcessSentence
    # ProcessSentence.LoadCommon()  # too heavy to load for debugging

    FeatureOntology.LoadFeatureOntology('../../fsa/Y/feature.txt')
    Lexicon.LoadSegmentLexicon()
    XLocation = '../../fsa/X/'
    Lexicon.LoadExtraReference(XLocation + 'CuobieziX.txt',
                               Lexicon._LexiconCuobieziDict)
    Lexicon.LoadExtraReference(XLocation + 'Fanti.txt',
                               Lexicon._LexiconFantiDict)

    main_x = Tokenize('科普：。，？带你看懂蜀绣冰壶比赛')
    #old_Tokenize_cn('很少有科普：3 minutes 三分钟带你看懂蜀绣冰壶比赛')

    import cProfile, pstats

    cProfile.run("LoopTest1(100)", 'restatslex')
    pstat = pstats.Stats('restatslex')
    pstat.sort_stats('time').print_stats(10)

Beispiel #32

0

Datei anzeigen

Datei: Engine.py Projekt: joeyoung33333/Sentiment_Analysis

def engine():
    ###########################################
    # EDIT HERE: Input company and dates to search
    ###########################################
    company = 'facebook'
    year = '2018'
    month = '05'
    today = 11

    # gather lexicon/naive bayes trained data
    sentLex = Lexicon.sentLexicon()
    freqPOS, freqNEG = Naive_Bayes.frequency()
    countPOS, countNEG, countPOSNEG = Naive_Bayes.count(freqPOS, freqNEG)

    # reporting data
    data = ""
    artData = ""

    # graphing data
    days = []
    lexPOSGraph = []
    lexNEGGraph = []
    nbPOSGraph = []
    nbNEGGraph = []

    # gather articles and analyze sentiment for entire month up to date
    for day in range(1, today + 1):
        day = str(day)
        articles = newsArticles(company, year, month, day)

        # article data for report
        artData += 'Company: {}, Month: {}, Day: {}, Year: {}\n\n'.format(
            company, month, day, year)
        for article in articles:
            artData += article + "\n\n"

        lexPOS, lexNEG, lexNEUT = runLexicon(articles, sentLex)
        nbPOS, nbNEG, nbNEUT = runNaiveBayes(articles, freqPOS, freqNEG,
                                             countPOS, countNEG, countPOSNEG)
        result = suggestion(lexPOS, lexNEG, nbPOS, nbNEG)

        # graphing data
        days.append(day)
        lexPOSGraph.append(lexPOS)
        lexNEGGraph.append(lexNEG)
        nbPOSGraph.append(nbPOS)
        nbNEGGraph.append(nbNEG)

        # display data
        print('Company: {}, Month: {}, Day: {}, Year: {}'.format(
            company, month, day, year))
        print("Lexicon     - POS: {}, NEG: {}, NEUT: {}".format(
            lexPOS, lexNEG, lexNEUT))
        print("Naive Bayes - POS: {}, NEG: {}, NEUT: {}".format(
            nbPOS, nbNEG, nbNEUT))
        print("Results: " + result)
        print("______________________________________________________")

        # sentiment data for report
        data += '\nMonth: {}, Day: {}, Year: {}\n'.format(month, day, year)
        data += "Results: " + result + "\n"
        data += "Lexicon     - POS: {},\tNEG: {},\tNEUT: {}\n".format(
            lexPOS, lexNEG, lexNEUT)
        data += "Naive Bayes - POS: {},\tNEG: {},\tNEUT: {}\n\n".format(
            nbPOS, nbNEG, nbNEUT)
        data += "______________________________________________________\n"

    graph(days, month, year, company, lexPOSGraph, lexNEGGraph, nbPOSGraph,
          nbNEGGraph)
    print("Graphs generated...")

    report(company, str(today), month, year, data, artData)
    print("Report generated...")

Beispiel #33

0

Datei anzeigen

        whiteList = whiteList.strip().split(',')

    # make sure each item in the list is a valid cms node name
    # or possibly a shortcut like T3

    for site in blackList:
        try:
            Lexicon.cmsname(site)
        except Exception, text:
            msg = "ERROR in GRID.se_black_list: %s\n" % blackList
            msg += "%s\n'%s' is not a valid Phedex Node Name" % (text, site)
            raise CrabException(msg)

    for site in whiteList:
        try:
            Lexicon.cmsname(site)
        except Exception, text:
            msg = "ERROR in GRID.se_white_list: %s\n" % whiteList
            msg += "%s\n'%s' is not a valid Phedex Node Name" % (text, site)
            raise CrabException(msg)


def parseIntoList(param):
    """
    to be used to make sure that one crab config parameter is usable as a list of strings,
    eve if it is a string with comma insides in the config. file
    """
    if type(param) == type("string"):
        list = param.split(',')
        for item in list:
            item = item.strip()

Beispiel #34

0

Datei anzeigen

for emo in emotions:
    d = r"Thesis - Inputdata\2018-EI-oc-En-" + emo + "-dev-and-train.txt"
    training_data_paths.append(d)

test_data_paths = []
for emo in emotions:
    d = r"Thesis - Inputdata\2018-EI-oc-En-" + emo + "-test-gold.txt"
    test_data_paths.append(d)
##
# Insert path to the Lexicon Files:
print("Loading Lexicons")
path_emo_lex = r"Lexicons\NRC-Hashtag-Emotion-Lexicon-v0.2\NRC-Hashtag-Emotion-Lexicon-v0.2.txt"
path_sen_lex = r"Lexicons\NRC-Hashtag-Sentiment-Lexicon-v1.0\HS-unigrams.txt"

# Generating the lexicons
lexi = l.load_lexicon(l.datareader(path_emo_lex))
lexi += l.load_lexicon(l.datareader(path_sen_lex, Elex=False), Elex=False)
print("Complete")

##
# Insert paths/type of embeddings(bert, Glove, Word2vec: Skipgram)
path_bert = 'book_corpus_wiki_en_cased'
path_glove = r"Thesis - Embeddings\GloVe\glove.6B.300d.w2vformat.txt"
path_word2vec = r"Thesis - Embeddings\Word2Vec\GoogleNews-vectors-negative300.bin"

# loading the embeding methods
glove = em.WordToVec("glove", path_glove)
word_two_vec = em.WordToVec("word2vec", path_word2vec)
bert = em.Bert("bert", path_bert)
embeds = [glove, word_two_vec, bert]

Beispiel #35

0

Datei anzeigen

        for suffix in ['DBSReader', 'DBSWriter']:
            if url.endswith(suffix):
                url = url[0:-len(suffix)]
        readUrl = url + 'DBSReader'
        writeUrl = url + 'DBSWriter'

    readApi = DbsApi(url=readUrl)
    writeApi = DbsApi(url=writeUrl)

    dataset = options.dataset
    if options.new_location:
        new_location = options.new_location

    ###sanitize input
    # dataset name
    Lexicon.dataset(dataset)

    # PNN
    if new_location:
        Lexicon.cmsname(new_location)

    # process dataset by blocks

    blockDicts = readApi.listBlocks(dataset=dataset, detail=True)
    for block in blockDicts:
        blName = block['block_name']
        location = block['origin_site_name']
        logging.debug('block %s at location: %s' % (blName, location))
        if new_location:
            writeApi.updateBlockSiteName(block_name=blName,
                                         origin_site_name=new_location)

Beispiel #36

0

Datei anzeigen

    def Reload(self, ReloadTask):
        utils.InitDB()
        PipeLineLocation = ParserConfig.get("main", "Pipelinefile")
        XLocation = os.path.dirname(PipeLineLocation) + "/"
        Reply = "Lexicon/Rule/Pipeline:"
        systemfileolderthanDB = ProcessSentence.SystemFileOlderThanDB(
            XLocation)

        if ReloadTask.lower() == "/lexicon":
            logging.info("Start loading lexicon...")
            Lexicon.ResetAllLexicons()
            # ProcessSentence.LoadCommonLexicon(XLocation)
            for action in ProcessSentence.PipeLine:
                if action.startswith("Lookup Spelling:"):
                    Spellfile = action[action.index(":") +
                                       1:].strip().split(",")
                    for spell in Spellfile:
                        spell = spell.strip()
                        if spell:
                            Lexicon.LoadExtraReference(
                                XLocation + spell,
                                Lexicon._LexiconCuobieziDict)

                if action.startswith("Lookup Encoding:"):
                    Encodefile = action[action.index(":") +
                                        1:].strip().split(",")
                    for encode in Encodefile:
                        encode = encode.strip()
                        if encode:
                            Lexicon.LoadExtraReference(
                                XLocation + encode, Lexicon._LexiconFantiDict)

                if action.startswith("Lookup Main:"):
                    Mainfile = action[action.index(":") +
                                      1:].strip().split(",")
                    for main in Mainfile:
                        main = main.strip()
                        if main:
                            Lexicon.LoadMainLexicon(XLocation + main)

                if action.startswith("Lookup SegmentSlash:"):
                    Slashfile = action[action.index(":") +
                                       1:].strip().split(",")
                    for slash in Slashfile:
                        slash = slash.strip()
                        if slash:
                            Lexicon.LoadSegmentSlash(XLocation + slash)

                if action.startswith("Lookup Lex:"):
                    Lexfile = action[action.index(":") + 1:].strip().split(",")
                    for lex in Lexfile:
                        lex = lex.strip()
                        if lex:
                            Lexicon.LoadLexicon(XLocation + lex)

                if action.startswith("Lookup defLex:"):
                    Compoundfile = action[action.index(":") +
                                          1:].strip().split(",")
                    for compound in Compoundfile:
                        compound = compound.strip()
                        if compound:
                            Lexicon.LoadLexicon(
                                XLocation + compound,
                                lookupSource=LexiconLookupSource.defLex)

                if action.startswith("Lookup External:"):
                    Externalfile = action[action.index(":") +
                                          1:].strip().split(",")
                    for external in Externalfile:
                        external = external.strip()
                        if external:
                            Lexicon.LoadLexicon(
                                XLocation + external,
                                lookupSource=LexiconLookupSource.External)

                if action.startswith("Lookup oQcQ:"):
                    oQoCfile = action[action.index(":") +
                                      1:].strip().split(",")
                    for oQoC in oQoCfile:
                        oQoC = oQoC.strip()
                        if oQoC:
                            Lexicon.LoadLexicon(
                                XLocation + oQoC,
                                lookupSource=LexiconLookupSource.oQcQ)
            Lexicon.LoadSegmentLexicon()
            Reply += "Reloaded lexicon at " + str(datetime.now())

        if ReloadTask.lower() == "/rule":
            logging.info("Start loading rules...")
            #Rules.ResetAllRules()
            #ProcessSentence.WinningRuleDict.clear()
            GlobalmacroLocation = os.path.join(XLocation,
                                               "../Y/GlobalMacro.txt")
            Rules.LoadGlobalMacro(GlobalmacroLocation)

            for action in ProcessSentence.PipeLine:
                if action.startswith("FSA "):
                    Rulefile = action[3:].strip()
                    RuleLocation = os.path.join(XLocation, Rulefile)
                    if RuleLocation.startswith("."):
                        RuleLocation = os.path.join(
                            os.path.dirname(os.path.realpath(__file__)),
                            RuleLocation)
                    if not systemfileolderthanDB or not Rules.RuleFileOlderThanDB(
                            RuleLocation):
                        Rules.LoadRules(XLocation, Rulefile,
                                        systemfileolderthanDB)

                elif action.startswith("DAGFSA_APP "):  # FUZZY
                    Rulefile = action[10:].strip()
                    RuleLocation = os.path.join(XLocation, Rulefile)
                    if RuleLocation.startswith("."):
                        RuleLocation = os.path.join(
                            os.path.dirname(os.path.realpath(__file__)),
                            RuleLocation)
                    if not systemfileolderthanDB or not Rules.RuleFileOlderThanDB(
                            RuleLocation):
                        Rules.LoadRules(XLocation,
                                        Rulefile,
                                        systemfileolderthanDB,
                                        fuzzy=True)
                    # Rules.LoadRules(XLocation, Rulefile, systemfileolderthanDB, fuzzy=True)

                elif action.startswith("DAGFSA "):
                    Rulefile = action[6:].strip()
                    RuleLocation = os.path.join(XLocation, Rulefile)
                    if RuleLocation.startswith("."):
                        RuleLocation = os.path.join(
                            os.path.dirname(os.path.realpath(__file__)),
                            RuleLocation)
                    if not systemfileolderthanDB or not Rules.RuleFileOlderThanDB(
                            RuleLocation):
                        Rules.LoadRules(XLocation, Rulefile,
                                        systemfileolderthanDB)

            Reply += "Reloaded rules at " + str(datetime.now())

        if ReloadTask.lower() == "/pipeline":
            logging.info("Start loading pipeline...")
            Rules.ResetAllRules()
            ProcessSentence.PipeLine = []
            ProcessSentence.LoadCommon()
            Reply += "Reloaded pipeline at " + str(datetime.now())

        ProcessSentence.UpdateSystemFileFromDB(XLocation)

        self.send_response(200)
        self.send_header('Content-type', "text/html; charset=utf-8")
        self.end_headers()
        self.wfile.write(Reply.encode("utf-8"))
        utils.CloseDB(utils.DBCon)

Python Lexicon, jardin-archived Beispiele