def __init__(self, *args, **kwargs): ''' field is 'sentence' by default, but can be e.g. pos_sentence. ''' self.field = kwargs.pop('field', 'sentence') super(LanguageModel, self).__init__(*args, **kwargs) self.class_to_code = {'VBZ': 0, 'VBP': 1} self.inflect_verb, _ = gen_inflect_from_vocab(self.vocab_file)
def __init__(self, infile, modes=('infreq_pos', ), most_common=10000, skip=0, stop_after=None, verbose=True, criterion=None, vocab_file=filenames.vocab_file): ''' modes is a tuple of one or more of the following modes: 'word' - write actual words 'pos' - replace words with their part of speech 'infreq_pos' - replace infrequent words with their part of speech or None, in which case all modes are produced. most_common: if mode is 'infreq_pos', only retain this number of words, replace the rest with part of speech skip: number of sentences to skip after each sentence (to avoid all sentences starting with the same words if the corpus is sorted) criterion: None, or function that take a dict representing a dependency and returns True if the dependency should be kept ''' self.infile = infile self.skip = skip self.most_common = most_common self.stop_after = stop_after self.load_freq_dict(vocab_file) self.verbose = verbose self.inflect_verb, self.inflect_noun = gen_inflect_from_vocab( vocab_file) self.criterion = criterion allowed_modes = ('word', 'pos', 'infreq_pos') self.modes = allowed_modes if modes is None else modes if set(self.modes) - set(allowed_modes) != set(): raise ValueError('Only the following modes are allowed: %s' % allowed_modes)
def __init__(self, *args, **kwargs): RNNAcceptor.__init__(self, *args, **kwargs) self.class_to_code = {'grammatical': 0, 'ungrammatical': 1} self.code_to_class = {x: y for y, x in self.class_to_code.items()} self.inflect_verb, _ = gen_inflect_from_vocab(self.vocab_file)
def __init__(self, *args, **kwargs): super(InflectVerb, self).__init__(*args, **kwargs) self.inflect_verb, _ = gen_inflect_from_vocab(self.vocab_file)