def __init__(self, nums_chars, nums_tags, buckets_char, counts=None, batch_size=10, crf=1, ngram=None, sent_seg=False, is_space=True, emb_path=None, tag_scheme='BIES'): self.nums_chars = nums_chars self.nums_tags = nums_tags self.buckets_char = buckets_char self.counts = counts self.crf = crf self.ngram = ngram self.emb_path = emb_path self.emb_layer = None self.tag_scheme = tag_scheme self.gram_layers = [] self.batch_size = batch_size self.l_rate = None self.decay = None self.train_step = None self.saver = None self.decode_holders = None self.scores = None self.params = None self.pixels = None self.is_space = is_space self.sent_seg = sent_seg self.updates = [] self.bucket_dit = {} self.input_v = [] self.input_w = [] self.input_p = None self.output = [] self.output_ = [] self.output_p = [] if self.crf > 0: self.transition_char = tf.get_variable( 'transitions_char', [self.nums_tags + 1, self.nums_tags + 1]) else: self.transition_char = None while len(self.buckets_char) > len(self.counts): self.counts.append(1) self.real_batches = toolbox.get_real_batch(self.counts, self.batch_size)
def __init__(self, nums_chars, nums_tags, buckets_char, counts=None, pic_size=None, font=None, batch_size=10, tag_scheme='BIES', word_vec=True, radical=False, graphic=False, crf=1, ngram=None, metric='F1-score', mode='RNN'): self.nums_chars = nums_chars self.nums_tags = nums_tags self.buckets_char = buckets_char self.counts = counts self.tag_scheme = tag_scheme self.graphic = graphic self.word_vec = word_vec self.radical = radical self.crf = crf self.ngram = ngram self.emb_layer = None self.radical_layer = None self.pos_emb_f, self.pos_emb_b = None, None self.gram_layers = [] self.font = font self.pic_size = pic_size self.batch_size = batch_size self.l_rate, self.decay = None, None self.train_step = None self.saver = None self.decode_holders = None self.scores = None self.params = None self.pixels = None self.metric = metric self.mode = mode self.updates = [] self.bucket_dit = {} self.input_v = [] self.input_w = [] self.input_p = None self.output, self.output_, self.output_p, self.output_w, self.output_w_ = [], [], [], [], [] if self.crf > 0: self.transition_char = [] for i in range(len(self.nums_tags)): self.transition_char.append(tf.get_variable('transitions_char' + str(i), [self.nums_tags[i] + 1, self.nums_tags[i] + 1])) self.all_metrics = None self.all_metrics = ['Precision', 'Recall', 'F1-score', 'True-Negative-Rate', 'Boundary-F1-score'] while len(self.buckets_char) > len(self.counts): self.counts.append(1) self.real_batches = toolbox.get_real_batch(self.counts, self.batch_size)
def __init__(self, nums_chars, nums_tags, buckets_char, counts=None, pic_size=None, font=None, batch_size=10, tag_scheme='BIES', word_vec=True, radical=False, graphic=False, crf=1, ngram=None): self.nums_chars = nums_chars self.nums_tags = nums_tags self.buckets_char = buckets_char self.counts = counts self.tag_scheme = tag_scheme self.graphic = graphic self.word_vec = word_vec self.radical = radical self.crf = crf self.ngram = ngram self.emb_layer = None self.radical_layer = None self.gram_layers = [] self.font = font self.pic_size = pic_size self.batch_size = batch_size self.l_rate = None self.decay = None self.train_step = None self.saver = None self.decode_holders = None self.scores = None self.params = None self.pixels = None self.updates = [] self.bucket_dit = {} self.input_v = [] self.input_w = [] self.input_p = None self.output = [] self.output_ = [] self.output_p = [] self.output_w = [] self.output_w_ = [] if self.crf > 0: self.transition_char = [] for i in range(len(self.nums_tags)): self.transition_char.append(tf.get_variable('transitions_char' + str(i), [self.nums_tags[i] + 1, self.nums_tags[i] + 1])) while len(self.buckets_char) > len(self.counts): self.counts.append(1) self.real_batches = toolbox.get_real_batch(self.counts, self.batch_size)
def __init__(self, nums_chars, nums_tags, buckets_char, window_size=0, filters_number=0, counts=None, pic_size=None, font=None, batch_size=10, tag_scheme='BIES', word_vec=True, radical=False, graphic=False, crf=1, ngram=None, metric='F1-score'): self.window_size = window_size self.filters_number = filters_number # 字符种类数目 self.nums_chars = nums_chars # 标签种类数目,例如[18],至于为什么要用数组多此一举不清楚 self.nums_tags = nums_tags # 每个 bucket 中句子的长度 self.buckets_char = buckets_char # 训练集中每个 bucket 中的句子个数 self.counts = counts self.tag_scheme = tag_scheme self.graphic = graphic self.word_vec = word_vec # 是否使用偏旁部首信息 self.radical = radical # 默认为1,即使用一阶条件随机场 self.crf = crf self.ngram = ngram self.emb_layer = None self.radical_layer = None self.gram_layers = [] self.font = font self.pic_size = pic_size self.batch_size = batch_size self.l_rate = None self.decay = None self.train_steps = None self.saver = None self.decode_holders = None self.scores = None self.params = None self.pixels = None self.drop_out = None self.drop_out_v = None # 默认是 F1-score self.metric = metric self.updates = [] self.bucket_dit = {} # shape = (bucket数量,每个 bucket 中的句子数量,句子长度) self.input_v = [] self.input_w = [] self.input_p = None # LSTM 经全连接后的输出 self.output = [] # 标签,ground truth self.output_ = [] self.output_p = [] self.output_w = [] self.output_w_ = [] self.lm_predictions = [] self.lm_groundtruthes = [] self.merged_summary = None self.merged_summary = None self.summaries = [] # 使用 viterbi 解码 if self.crf > 0: self.transition_char = [] for i in range(len(self.nums_tags)): self.transition_char.append( # 标签转移矩阵,为什么要额外加一呢? tf.get_variable('transitions_char' + str(i), [self.nums_tags[i] + 1, self.nums_tags[i] + 1]) ) self.all_metrics = None self.all_metrics = ['Precision', 'Recall', 'F1-score', 'True-Negative-Rate', 'Boundary-F1-score'] while len(self.buckets_char) > len(self.counts): self.counts.append(1) # 获取每一个 batch 的大小 # 一个 bucket 就是一个 batch,如果 bucket 中句子的个数小于设定的 batch_size,则对应 batch 的大小就是 bucket 中的句子个数,否则是 batch_size。 # 即限制了 batch 最大为 batch_size self.real_batches = toolbox.get_real_batch(self.counts, self.batch_size) self.losses = []
def __init__(self, nums_chars, nums_tags, buckets_char, counts=None, batch_size=10, tag_scheme='BIES', crf=1, metric='F1-score', ngram=None, co_train=False, highway_layers=1, lambda0=0, lambda1=0, char_freq_loss=False): self.ngram = ngram self.gram_layers = [] # 字符种类数目 self.nums_chars = nums_chars # 标签种类数目,例如[18],至于为什么要用数组多此一举不清楚 self.nums_tags = nums_tags # 每个 bucket 中句子的长度 self.buckets_char = buckets_char # 训练集中每个 bucket 中的句子个数 self.counts = counts self.tag_scheme = tag_scheme # 默认为1,即使用一阶条件随机场 self.crf = crf self.emb_layer = None self.batch_size = batch_size self.l_rate = None self.decay = None self.train_steps = None self.saver = None self.decode_holders = None self.scores = None self.params = None self.drop_out = None self.drop_out_v = None # 默认是 F1-score self.metric = metric self.updates = [] self.bucket_dit = {} # shape = (bucket数量,每个 bucket 中的句子数量,句子长度) self.input_v = [] self.input_w = [] self.input_p = None # LSTM 经全连接后的输出 self.output = [] # 标签,ground truth self.output_ = [] self.output_p = [] self.output_w = [] self.output_w_ = [] self.co_train = co_train self.highway_layers = highway_layers self.char_freq_loss = char_freq_loss if self.char_freq_loss: self.char_freq_predictions = [] self.char_freq_groundtruthes = [] self.lambda1 = lambda1 if self.co_train: self.lm_fw_predictions = [] self.lm_bw_predictions = [] self.lm_fw_groundtruthes = [] self.lm_bw_groundtruthes = [] self.lambda0 = lambda0 self.summaries = [] # 使用 viterbi 解码 if self.crf > 0: self.transition_char = [] for i in range(len(self.nums_tags)): self.transition_char.append( # 标签转移矩阵,为什么要额外加一呢? tf.get_variable( 'transitions_char' + str(i), [self.nums_tags[i] + 1, self.nums_tags[i] + 1])) self.all_metrics = [ 'Precision', 'Recall', 'F1-score', 'True-Negative-Rate', 'Boundary-F1-score' ] while len(self.buckets_char) > len(self.counts): self.counts.append(1) # 获取每一个 batch 的大小 # 一个 bucket 就是一个 batch,如果 bucket 中句子的个数小于设定的 batch_size,则对应 batch 的大小就是 bucket 中的句子个数,否则是 batch_size。 # 即限制了 batch 最大为 batch_size self.real_batches = toolbox.get_real_batch(self.counts, self.batch_size) self.losses = []