def __init__(self, backoff=None): """Setup for SequentialBackoffLemmatizer() :param backoff: Next lemmatizer in backoff chain. """ LemmatizerI.__init__(self) SequentialBackoffTagger.__init__(self, backoff)
def __init__(self, list_of_drugs, backoff=None): SequentialBackoffTagger.__init__(self, backoff) drug_list = [ x.strip() for x in open(list_of_drugs, 'r')] self.drugs = {} # HashTable it for O(1) lookup for drug in drug_list: self.drugs[drug] = 1
def __init__(self, list_of_drugs, backoff=None): SequentialBackoffTagger.__init__(self, backoff) drug_list = [x.strip() for x in open(list_of_drugs, 'r')] self.drugs = {} # HashTable it for O(1) lookup for drug in drug_list: self.drugs[drug] = 1
def __init__(self, *args, **kwargs): SequentialBackoffTagger.__init__(self, *args, **kwargs) coca_path = os.path.join(os.path.dirname(__file__), '../data/coca_500k.csv') coca_list = csv.reader(open(coca_path), delimiter='\t') self.tag_map = dict() for row in coca_list: freq = int(row[0]) word = row[1].strip() pos = row[2].strip() self.insertPair(word, pos, freq)
def __init__(self, *args, **kwargs): SequentialBackoffTagger.__init__(self, *args, **kwargs) # maps wordnet tags to claws7 tags self.wordnet_tag_map = { 'n': 'nn', 's': 'jj', 'a': 'jj', 'r': 'rr', 'v': 'vv0' } self.wordnet = wordnet
def __init__(self, *args, **kwargs): SequentialBackoffTagger.__init__(self, *args, **kwargs) self.dist = FreqDist() # train_sents = brown.tagged_sents() train_sents = pickle.load(open("pickles/brown_clawstags.pickle")) # make sure all tuples are in the required format: (TAG, word) train_sents = [[t for t in sentence if len(t) == 2] for sentence in train_sents] default_tagger = DefaultTagger('nn') wn_tagger = WordNetTagger(default_tagger) names_tagger = NamesTagger(wn_tagger) coca_tagger = COCATagger(names_tagger) bigram_tagger = BigramTagger(train_sents, backoff=coca_tagger) trigram_tagger = TrigramTagger(train_sents, backoff=bigram_tagger) # doesn't include self cause it's a dumb tagger (would always return None) self._taggers = trigram_tagger._taggers
def __init__(self: object, backoff: object, verbose: bool = False): """ Setup for SequentialBackoffLemmatizer :param backoff: Next lemmatizer in backoff chain :param verbose: Flag to include which lemmatizer assigned in a given tag in the return tuple """ SequentialBackoffTagger.__init__(self, backoff=None) # Setup backoff chain if backoff is None: self._taggers = [self] else: self._taggers = [self] + backoff._taggers self.VERBOSE = verbose self.repr = reprlib.Repr() self.repr.maxlist = 1 self.repr.maxdict = 1
def __init__(self: object, backoff: object, verbose: bool = False): """ Setup for SequentialBackoffLemmatizer :param backoff: Next lemmatizer in backoff chain :type verbose: bool :param verbose: Flag to include which lemmatizer assigned in a given tag in the return tuple """ SequentialBackoffTagger.__init__(self, backoff=None) # Setup backoff chain if backoff is None: self._taggers = [self] else: self._taggers = [self] + backoff._taggers self.VERBOSE = verbose self.repr = reprlib.Repr() self.repr.maxlist = 1 self.repr.maxdict = 1
def __init__(self, *args, **kwargs): SequentialBackoffTagger.__init__(self, *args, **kwargs) self.dist = FreqDist() tagged_brown_path = os.path.join(os.path.dirname(__file__), '../data/brown_clawstags.pickle') train_sents = pickle.load(open(tagged_brown_path, 'rb')) # make sure all tuples are in the required format: (TAG, word) train_sents = [[t for t in sentence if len(t) == 2] for sentence in train_sents] # default_tagger = DefaultTagger('nn') wn_tagger = WordNetTagger() names_tagger = NamesTagger(wn_tagger) coca_tagger = COCATagger(names_tagger) bigram_tagger = BigramTagger(train_sents, backoff=coca_tagger) trigram_tagger = TrigramTagger(train_sents, backoff=bigram_tagger) # doesn't include self cause it's a dumb tagger (would always return None) self._taggers = trigram_tagger._taggers
def __init__(self, train=None): self._morph = pymorphy2.MorphAnalyzer() SequentialBackoffTagger.__init__(self, None)
def __init__(self, *args, **kwargs): SequentialBackoffTagger.__init__(self, *args, **kwargs)