def __init__(self, model_path, word_dim=None, caps_dim=None, suffix_dim=None): self.model_path = model_path if word_dim is None: # use as supertagger with open(os.path.join(model_path, "tagger_defs.txt")) as defs_file: defs = json.load(defs_file) self.word_dim = defs["word_dim"] self.caps_dim = defs["caps_dim"] self.suffix_dim = defs["suffix_dim"] else: # training self.word_dim = word_dim self.caps_dim = caps_dim self.suffix_dim = suffix_dim self.words = read_model_defs(os.path.join(model_path, "words.txt")) self.suffixes = read_model_defs(os.path.join(model_path, "suffixes.txt")) self.caps = read_model_defs(os.path.join(model_path, "caps.txt")) self.targets = read_model_defs(os.path.join(model_path, "target.txt")) # self.unk_word = self.words["*UNKNOWN*"] self.unk_suffix = self.suffixes["UNK"] in_dim = 7 * (self.word_dim + self.caps_dim + self.suffix_dim) super(EmbeddingTagger, self).__init__( emb_word=L.EmbedID(len(self.words), self.word_dim), emb_caps=L.EmbedID(len(self.caps), self.caps_dim), emb_suffix=L.EmbedID(len(self.suffixes), self.suffix_dim), linear=L.Linear(in_dim, len(self.targets)), )
def __init__(self, model_path, samples_path): self.model_path = model_path self.words = read_model_defs(os.path.join(model_path, "words.txt")) self.suffixes = read_model_defs(os.path.join(model_path, "suffixes.txt")) self.caps = read_model_defs(os.path.join(model_path, "caps.txt")) self.targets = read_model_defs(os.path.join(model_path, "target.txt")) self.samples = open(samples_path).readlines() self.unk_word = self.words["*UNKNOWN*"] self.unk_suffix = self.suffixes["UNK"]
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2, hidden_dim=128, dep_dim=100, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: # training self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.afix_dim = afix_dim p.hidden_dim = hidden_dim p.nlayers = nlayers p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt")) p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) self.in_dim = self.word_dim + 8 * self.afix_dim self.dropout_ratio = dropout_ratio super(FastBiaffineLSTMParser, self).__init__(emb_word=L.EmbedID(self.n_words, self.word_dim, ignore_label=IGNORE), emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE), emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE), lstm_f=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), lstm_b=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), arc_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), arc_head=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine_arc=Biaffine(self.dep_dim), biaffine_tag=Bilinear(self.dep_dim, self.dep_dim, len(self.targets)))
def __init__(self, model_path): self.model_path = model_path self.words = read_model_defs(model_path + "/words.txt") self.chars = read_model_defs(model_path + "/chars.txt") self.unk_word = self.words[UNK] self.start_word = self.words[START] self.end_word = self.words[END] self.unk_char = self.chars[UNK] self.start_char = self.chars[START] self.end_char = self.chars[END]
def __init__(self, model_path, word_dim=None, char_dim=None, nlayers=2, hidden_dim=128, dep_dim=100, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.char_dim = char_dim p.hidden_dim = hidden_dim p.nlayers = nlayers p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_chars = len(read_model_defs(model_path + "/chars.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) self.in_dim = self.word_dim + self.char_dim self.dropout_ratio = dropout_ratio super(BiaffineJaLSTMParser, self).__init__(emb_word=L.EmbedID(self.n_words, self.word_dim), emb_char=L.EmbedID(self.n_chars, 50, ignore_label=IGNORE), conv_char=L.Convolution2D(1, self.char_dim, (3, 50), stride=1, pad=(1, 0)), lstm_f=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), lstm_b=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), arc_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), arc_head=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine_arc=Biaffine(self.dep_dim), biaffine_tag=L.Bilinear(self.dep_dim, self.dep_dim, len(self.targets)))
def __init__(self, model_path): self.words = read_model_defs(model_path + "/words.txt") self.suffixes = read_model_defs(model_path + "/suffixes.txt") self.prefixes = read_model_defs(model_path + "/prefixes.txt") self.unk_word = self.words[UNK] self.start_word = self.words[START] self.end_word = self.words[END] self.unk_suf = self.suffixes[UNK] self.unk_prf = self.prefixes[UNK] self.start_pre = [[self.prefixes[START]] + [IGNORE] * 3] self.start_suf = [[self.suffixes[START]] + [IGNORE] * 3] self.end_pre = [[self.prefixes[END]] + [IGNORE] * 3] self.end_suf = [[self.suffixes[END]] + [IGNORE] * 3]
def __init__(self, model_path, word_dim=None, char_dim=None): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: # use as supertagger with open(defs_file) as f: defs = json.load(f) self.word_dim = defs["word_dim"] self.char_dim = defs["char_dim"] else: # training self.word_dim = word_dim self.char_dim = char_dim with open(defs_file, "w") as f: json.dump( { "model": self.__class__.__name__, "word_dim": self.word_dim, "char_dim": self.char_dim }, f) self.extractor = FeatureExtractor(model_path) self.targets = read_model_defs(model_path + "/target.txt") self.train = True hidden_dim = 1000 in_dim = WINDOW_SIZE * (self.word_dim + self.char_dim) super(JaCCGEmbeddingTagger, self).__init__( emb_word=L.EmbedID(len(self.extractor.words), self.word_dim), emb_char=L.EmbedID(len(self.extractor.chars), self.char_dim, ignore_label=IGNORE), linear1=L.Linear(in_dim, hidden_dim), linear2=L.Linear(hidden_dim, len(self.targets)), )
def __init__(self, model_path, samples_path): self.model_path = model_path self.targets = read_model_defs(model_path + "/target.txt") self.extractor = FeatureExtractor(model_path) with open(samples_path) as f: self.samples = json.load(f).items() assert isinstance(self.samples[0][0], unicode)
def __init__(self, model_path, samples_path): self.model_path = model_path self.extractor = FeatureExtractor(model_path) self.targets = read_model_defs(model_path + "/target.txt") with open(samples_path) as f: self.samples = sorted( json.load(f).items(), key=lambda x: len(x[1][0]))
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2, hidden_dim=128, relu_dim=64, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: self.train = True p = Param(self) p.word_dim = word_dim p.afix_dim = afix_dim p.hidden_dim = hidden_dim p.relu_dim = relu_dim p.nlayers = nlayers p.dropout_ratio = dropout_ratio p.in_dim = self.word_dim + 8 * self.afix_dim p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt")) p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) super(PeepHoleLSTMTagger, self).__init__( emb_word=L.EmbedID(self.n_words, self.word_dim, ignore_label=IGNORE), emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE), emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE), lstm_f1=DyerLSTM(self.in_dim, self.hidden_dim), lstm_f2=DyerLSTM(self.hidden_dim, self.hidden_dim), lstm_b1=DyerLSTM(self.in_dim, self.hidden_dim), lstm_b2=DyerLSTM(self.hidden_dim, self.hidden_dim), linear1=L.Linear(2 * self.hidden_dim, self.relu_dim), linear2=L.Linear(self.relu_dim, len(self.targets)), )
def __init__(self, model_path, ccgbank_path, tritrain_path, weight, length=False): self.model_path = model_path self.targets = read_model_defs(model_path + "/target.txt") self.extractor = FeatureExtractor(model_path, length) self.weight = weight self.ncopies = 15 with open(ccgbank_path) as f: self.ccgbank_samples = json.load(f) self.ccgbank_size = len(self.ccgbank_samples) with open(tritrain_path) as f: self.tritrain_samples = json.load(f) self.tritrain_size = len(self.tritrain_samples) print("len(ccgbank):", self.ccgbank_size, file=sys.stderr) print("len(ccgbank) * # copies:", self.ccgbank_size * self.ncopies, file=sys.stderr) print("len(tritrain):", self.tritrain_size, file=sys.stderr)
def __init__(self, model_path, word_dim=None, char_dim=None, nlayers=2, hidden_dim=128, relu_dim=64, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: # use as supertagger with open(defs_file) as f: defs = json.load(f) self.word_dim = defs["word_dim"] self.char_dim = defs["char_dim"] self.hidden_dim = defs["hidden_dim"] self.relu_dim = defs["relu_dim"] self.nlayers = defs["nlayers"] self.train = False self.extractor = FeatureExtractor(model_path) else: # training self.word_dim = word_dim self.char_dim = char_dim self.hidden_dim = hidden_dim self.relu_dim = relu_dim self.nlayers = nlayers self.train = True with open(defs_file, "w") as f: json.dump( { "model": self.__class__.__name__, "word_dim": self.word_dim, "char_dim": self.char_dim, "hidden_dim": hidden_dim, "relu_dim": relu_dim, "nlayers": nlayers }, f) self.targets = read_model_defs(model_path + "/target.txt") self.words = read_model_defs(model_path + "/words.txt") self.chars = read_model_defs(model_path + "/chars.txt") self.in_dim = self.word_dim + self.char_dim self.dropout_ratio = dropout_ratio super(JaLSTMTagger, self).__init__( emb_word=L.EmbedID(len(self.words), self.word_dim), emb_char=L.EmbedID(len(self.chars), 50, ignore_label=IGNORE), conv_char=L.Convolution2D(1, self.char_dim, (3, 50), stride=1, pad=(1, 0)), lstm_f=L.NStepLSTM(nlayers, self.in_dim, self.hidden_dim, 0.), lstm_b=L.NStepLSTM(nlayers, self.in_dim, self.hidden_dim, 0.), conv1=L.Convolution2D(1, 2 * self.hidden_dim, (7, 2 * self.hidden_dim), stride=1, pad=(3, 0)), linear1=L.Linear(2 * self.hidden_dim, self.relu_dim), linear2=L.Linear(self.relu_dim, len(self.targets)), )
def __init__(self, model_path): self.words = read_model_defs(model_path + "/words.txt") self.chars = read_model_defs(model_path + "/chars.txt") self.unk_word = self.words[UNK] self.unk_char = self.chars[UNK] self.max_char_len = max(len(w) for w in self.words if w != UNK)
def __init__(self, model_path, samples_path, length=False): self.model_path = model_path self.targets = read_model_defs(model_path + "/target.txt") self.extractor = FeatureExtractor(model_path, length) with open(samples_path) as f: self.samples = json.load(f)