def generateDisconinousFeatures(configuration, sent, transDic): tokens = Sentence.getTokens([configuration.stack[-1]]) tokenTxt = Sentence.getTokenLemmas(tokens) for key in Corpus.mweDictionary.keys(): if tokenTxt in key and tokenTxt != key: bufidx = 0 for bufElem in configuration.buffer[:5]: if bufElem.lemma != '' and ( (tokenTxt + ' ' + bufElem.lemma) in key or (bufElem.lemma + ' ' + tokenTxt) in key): transDic['S0B' + str(bufidx) + 'ArePartsOfMWE'] = True transDic['S0B' + str(bufidx) + 'ArePartsOfMWEDistance'] = sent.tokens.index( bufElem) - sent.tokens.index(tokens[-1]) bufidx += 1 break
def areInLexic(tokensList): if Sentence.getTokenLemmas(tokensList) in Corpus.mweDictionary.keys(): return True return False