def make_dataloader(opt): # make train's dataloader train_mix_reader = AudioData(opt['datasets']['train']['dataroot_mix'], **opt['datasets']['audio_setting']) train_target_readers = [ AudioData(opt['datasets']['train']['dataroot_targets'][0], **opt['datasets']['audio_setting']), AudioData(opt['datasets']['train']['dataroot_targets'][1], **opt['datasets']['audio_setting']) ] train_dataset = Dataloader.dataset(train_mix_reader, train_target_readers) train_dataloader = Dataloader.dataloader( train_dataset, **opt['datasets']['dataloader_setting']) # make validation dataloader val_mix_reader = AudioData(opt['datasets']['val']['dataroot_mix'], **opt['datasets']['audio_setting']) val_target_readers = [ AudioData(opt['datasets']['val']['dataroot_targets'][0], **opt['datasets']['audio_setting']), AudioData(opt['datasets']['val']['dataroot_targets'][1], **opt['datasets']['audio_setting']) ] val_dataset = Dataloader.dataset(val_mix_reader, val_target_readers) val_dataloader = Dataloader.dataloader( val_dataset, **opt['datasets']['dataloader_setting']) return train_dataloader, val_dataloader
def _build_dataset(self): if not self.config['use_synthetic_data']: with open(self.config['dict_path'], 'r') as fp: for item in fp.readlines(): item = item.strip().split(' ') self.char_dict[int(item[1])] = item[0] self.data_loader = Dataloader( self.config['data_path'], self.config['maxlen_in'], self.config['maxlen_tgt'], self.config['vocab_size'], self.config['fbank_size'], training=self.training, dtype=self.config['dtype'], use_synthetic_data=self.config['use_synthetic_data']) self.data_loader.load_data() output_types = (self.dtype, tf.int32, tf.int32, tf.int32) output_shapes = (tf.TensorShape([self.config['maxlen_in'], 83, 1]), tf.TensorShape([]), tf.TensorShape([self.config['maxlen_tgt']]), tf.TensorShape([])) dataset = tf.data.Dataset.from_generator(self.data_loader, output_types, output_shapes=output_shapes) dataset = dataset.batch(self.config['batch_size'], drop_remainder=True) self.infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, prefetch_depth=15)
def test_loader_with_sample_data(self): os.chdir(os.path.dirname(os.path.realpath(__file__))) os.chdir('../') from data_loader import Dataloader loader = Dataloader('datas/train', 768, 49, 4233, 83, use_synthetic_data=True) loader.load_data() (feat, feat_len, label, label_len) = next(loader()) assert feat.shape[0] == 768 assert feat.shape[2] == 1
def get_the_final_result(): # 参数配置 batch_size = 512 seq_length = 20 embeddings_size = 300 hidden_size = 256 num_layers = 2 num_classes = 9 learning_rate = 0.003 dropout = 0.3 # 数据文件路径 word2vec_path = './data/word2vec.bin' train_file = './data/train.json' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 定义模型 model = TextRCNN(embeddings_size, num_classes, hidden_size, num_layers, True, dropout) model.to(device) # 加载训练好的模型参数 checkpoints = torch.load('./saved_model/text_rcnn.pth') model.load_state_dict(checkpoints['model_state']) # 加载数据 data_loader = Dataloader(word2vec_path, batch_size, embeddings_size, seq_length, device) # 初始化数据迭代器 texts, labels = data_loader.load_data(train_file, shuffle=True, mode='train') # 加载数据 print('Data load completed...') # 在测试集上进行测试 test_texts = texts[int(len(texts) * 0.8):] test_labels = labels[int(len(texts) * 0.8):] steps = len(test_texts) // batch_size loader = data_loader.data_iterator(test_texts, test_labels) # 测试集上的准确率 accuracy = evaluate(model, loader, steps) print('The final result(Accuracy in Test) is %.2f' % (accuracy * 100))
def __init__(self, cfg): super(CifarClassifier, self).__init__() self.net = None self.cfg = cfg self.trainloader, self.testloader = Dataloader(cfg['data_dir'], cfg['train_batch'], cfg['test_batch']) self.use_cuda = torch.cuda.is_available() and (not cfg['no_cuda']) self.best_acc = 0 # best test accuracy self.start_epoch = 0 # start from epoch 0 or last checkpoint epoch # Net Config if cfg['resume']: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isdir( cfg['check_dir']), 'Error: no checkpoint directory found!' checkpoint = torch.load(cfg['check_dir'] + '/' + cfg['model'] + '.pth') self.net = checkpoint['net'] self.best_acc = checkpoint['acc'] self.start_epoch = checkpoint['epoch'] else: print('==> Building model from scratch') self.net = Model.model(cfg['model']) # Cuda Config if self.use_cuda: self.net.cuda() self.net = torch.nn.DataParallel(self.net, device_ids=range( torch.cuda.device_count())) cudnn.benchmark = True # Optimizer Config if cfg['opt'] == 'sgd': self.optimizer = optim.SGD(self.net.parameters(), lr=cfg["lr"], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) if cfg['opt'] == 'adam': self.optimizer == optim.Adam(self.net.parameters(), lr=cfg["lr"], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) if cfg['opt'] == 'rmsprop': self.optimizer == optim.RMSprop(self.net.parameters(), lr=cfg["lr"], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) if cfg['opt'] == 'adagrad': self.optimizer == optim.Adagrad(self.net.parameters(), lr=cfg["lr"], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) # Loss Config self.criterion = nn.CrossEntropyLoss()
def test_loader_precision(self): os.chdir(os.path.dirname(os.path.realpath(__file__))) os.chdir('../') from data_loader import Dataloader loader = Dataloader('datas/train', 768, 49, 4233, 83, dtype='FLOAT16', use_synthetic_data=True) loader.load_data() (feat, feat_len, label, label_len) = next(loader()) assert feat.dtype == 'float16' loader = Dataloader('datas/train', 768, 49, 4233, 83, dtype='FLOAT32', use_synthetic_data=True) loader.load_data() (feat, feat_len, label, label_len) = next(loader()) assert feat.dtype == 'float32'
def make_dataloader(opt): # make train's dataloader train_mix_reader = AudioData(opt['datasets']['train']['dataroot_mix'], **opt['datasets']['audio_setting']) train_target_readers = [ AudioData(opt['datasets']['train']['dataroot_targets'][0], **opt['datasets']['audio_setting']), AudioData(opt['datasets']['train']['dataroot_targets'][1], **opt['datasets']['audio_setting']) ] train_dataset = Dataloader.dataset( train_mix_reader, train_target_readers, opt['datasets']['dataloader_setting']['cmvn_file']) train_dataloader = Loader( train_dataset, batch_size=opt['datasets']['dataloader_setting']['batch_size'], num_workers=opt['datasets']['dataloader_setting']['num_workers'], shuffle=opt['datasets']['dataloader_setting']['shuffle']) # make validation dataloader val_mix_reader = AudioData(opt['datasets']['val']['dataroot_mix'], **opt['datasets']['audio_setting']) val_target_readers = [ AudioData(opt['datasets']['val']['dataroot_targets'][0], **opt['datasets']['audio_setting']), AudioData(opt['datasets']['val']['dataroot_targets'][1], **opt['datasets']['audio_setting']) ] val_dataset = Dataloader.dataset( val_mix_reader, val_target_readers, opt['datasets']['dataloader_setting']['cmvn_file']) val_dataloader = Loader( val_dataset, batch_size=opt['datasets']['dataloader_setting']['batch_size'], num_workers=opt['datasets']['dataloader_setting']['num_workers'], shuffle=opt['datasets']['dataloader_setting']['shuffle']) return train_dataloader, val_dataloader
def train_and_test(model, optimizer, criterian, scheduler, batch_size, embeddings_size, seq_length, save_madel_path, epochs, device): ''' 配置模型的训练和测试过程 ''' data_loader = Dataloader(word2vec_path, batch_size, embeddings_size, seq_length, device) # 初始化数据迭代器 texts, labels = data_loader.load_data(train_file, shuffle=True, mode='train') # 加载数据 print('Data load completed...') # 将数据集划分为训练集和测试集 train_texts = texts[: int(len(texts) * 0.8)] test_texts = texts[int(len(texts) * 0.8): ] train_labels = labels[: int(len(texts) * 0.8)] test_labels = labels[int(len(texts) * 0.8): ] # 获取训练/测试步数 train_steps = len(train_texts) // batch_size test_steps = len(test_texts) // batch_size best_test_acc = 0.0 # 记录训练过程中的最优结果 for e in range(1, epochs + 1): print('Epoch {}/{}'.format(e, epochs)) # 模型训练 train_data_iterator = data_loader.data_iterator(train_texts, train_labels) train(model, train_data_iterator, train_steps, criterian, optimizer, scheduler) train_data_iterator = data_loader.data_iterator(train_texts, train_labels) test_data_iterator = data_loader.data_iterator(test_texts, test_labels) # 模型测试 train_accuracy = evaluate(model, train_data_iterator, train_steps) test_accuracy = evaluate(model, test_data_iterator, test_steps) print('Training accuracy: ', train_accuracy) print('Testing accuracy: ', test_accuracy) improve_acc = test_accuracy - best_test_acc if improve_acc > 0: # 保存最优模型的参数 print('Found a new best accuracy...') best_test_acc = test_accuracy checkpoint = { 'model_state': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': e, 'accuracy': best_test_acc } torch.save(checkpoint, save_madel_path)
if __name__ == '__main__': config = Config() input_x = tf.placeholder(tf.float32, shape=[None, None, config.feat_dimension], name='input_x') train_flag = tf.placeholder(tf.bool, shape=[], name='train_flag') start_scores, end_scores, action_scores = build_model( config, input_x, train_flag) saver = tf.train.Saver() sess = tf.InteractiveSession() saver.restore(sess, config.checkpoints_path) dataloader = Dataloader(config, 'test') data_iterator = dataloader.batch_data_iterator() all_results = {} print 'total num:{}'.format(dataloader.batch_num) for vid_idx in range(dataloader.batch_num): data = data_iterator.next() _start_scores, _end_scores, _action_scores = sess.run( [start_scores, end_scores, action_scores], feed_dict={ input_x: data['batch_feat_masked'], train_flag: False }) cur_video_name = data['batch_video_name'][0] cur_fps = float(dataloader.split_gt_dict[cur_video_name]['fps'])
# Segmentation models losses can be combined together by '+' and scaled by integer or float factor dice_loss = sm.losses.DiceLoss(class_weights=np.ones(n_classes)) focal_loss = sm.losses.CategoricalFocalLoss() total_loss = dice_loss + (1 * focal_loss) metrics = [ sm.metrics.IOUScore(threshold=0.5), sm.metrics.FScore(threshold=0.5) ] # compile keras model with defined optimozer, loss and metrics model.compile(optim, total_loss, metrics) #Dataloaderss train_dataloader = Dataloader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) valid_dataloader = Dataloader(valid_dataset, batch_size=1, shuffle=False) # check shapes for errors assert train_dataloader[0][0].shape == (BATCH_SIZE, image_size, image_size, image_channels) assert train_dataloader[0][1].shape == (BATCH_SIZE, image_size, image_size, n_classes) # define callbacks for learning rate scheduling and best checkpoints saving callbacks = [ keras.callbacks.ModelCheckpoint(os.path.join(result_dir, 'best_model.h5'), save_weights_only=True, save_best_only=True, mode='min'),
class ConformerAM(object): def __init__(self, config): self.config = config print(self.config) self.infeed_queue = None self.outfeed_queue = None self.dtype = tf.float16 if self.config['dtype'] == 'FLOAT16' \ else tf.float32 self.output_names = [] self.training = True if self.config['is_training'] else False self.computational_stages = [] self.device_mapping = [] self.pipeline = [] self.outputs = [ 'loss', 'logits', 'labels', 'mask', 'kl_loss', 'ctc_loss' ] self.data_loader = None self.char_dict = {} self.kernel_regularizer = None self.bias_regularizer = None self.mask_value = -10000 def save_pb(self, session, output_names): graph_def = tf.compat.v1.get_default_graph().as_graph_def() graph_def = graph_util.convert_variables_to_constants( session, graph_def, output_names) with tf.gfile.FastGFile('logs/model.pb', mode='wb') as f: f.write(graph_def.SerializeToString()) def get_lr(self, global_step): lr = self.config['lr'] * self.config['adim']**(-0.5) * min( global_step**(-0.5), global_step * self.config['warmup_steps']**(-1.5)) return lr def check_loss(self, tgt, loss): mask = np.where(tgt == 0, 1, 0).astype(loss.dtype) mask = np.expand_dims(mask, axis=-1) s = np.sum(mask * loss) return s def get_kl_acc(self, y_pred, y_true, ignore_id=0): y_pred = y_pred.flatten() y_true = y_true.flatten() mask = y_true != ignore_id numerator = np.sum(y_pred[mask] == y_true[mask]) denominator = np.sum(mask) return float(numerator) / float(denominator) def get_ctc_acc(self, y_pred, y_true, blank_id=0): y_pred = np.reshape(y_pred, [-1, y_pred.shape[-1]]) y_true = np.reshape(y_true, [-1, y_true.shape[-1]]) cers, char_ref_lens = [], [] for i, y in enumerate(y_pred): y_hat_i = [x[0] for x in groupby(y)] y_true_i = y_true[i] seq_hat, seq_true = [], [] for idx in y_hat_i: idx = int(idx) if idx in self.char_dict.keys(): seq_hat.append(self.char_dict[int(idx)]) for idx in y_true_i: idx = int(idx) if idx in self.char_dict.keys(): seq_true.append(self.char_dict[int(idx)]) hyp_chars = "".join(seq_hat) ref_chars = "".join(seq_true) if len(ref_chars) > 0: cers.append(editdistance.eval(hyp_chars, ref_chars)) char_ref_lens.append(len(ref_chars)) cer_ctc = float(sum(cers)) / sum(char_ref_lens) if cers else None return cer_ctc def _build_dataset(self): if not self.config['use_synthetic_data']: with open(self.config['dict_path'], 'r') as fp: for item in fp.readlines(): item = item.strip().split(' ') self.char_dict[int(item[1])] = item[0] self.data_loader = Dataloader( self.config['data_path'], self.config['maxlen_in'], self.config['maxlen_tgt'], self.config['vocab_size'], self.config['fbank_size'], training=self.training, dtype=self.config['dtype'], use_synthetic_data=self.config['use_synthetic_data']) self.data_loader.load_data() output_types = (self.dtype, tf.int32, tf.int32, tf.int32) output_shapes = (tf.TensorShape([self.config['maxlen_in'], 83, 1]), tf.TensorShape([]), tf.TensorShape([self.config['maxlen_tgt']]), tf.TensorShape([])) dataset = tf.data.Dataset.from_generator(self.data_loader, output_types, output_shapes=output_shapes) dataset = dataset.batch(self.config['batch_size'], drop_remainder=True) self.infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, prefetch_depth=15) def _build_pos_embedding(self, max_len, dmodel, reverse=False): ''' Args: max_len: max_len of position embedding dmodel: dimension of the position embedding reverse: Whether to reverse the input position Returns: position embedding: (1, max_len, dmodel), untrainable ''' if reverse: pos = tf.range(max_len - 1, -1, -1.0, dtype=tf.float32) else: pos = tf.range(0, max_len, 1.0, dtype=tf.float32) index = tf.range(0, dmodel, 2.0, dtype=tf.float32) index = 1 / tf.pow(10000.0, (index / dmodel)) sinusoid = tf.expand_dims(tf.einsum("i,j->ij", pos, index), axis=2) pos_emb = tf.concat([tf.sin(sinusoid), tf.cos(sinusoid)], axis=-1) pos_emb = tf.reshape(pos_emb, [1, pos_emb.shape[0], -1]) return tf.cast(pos_emb, self.dtype) def _build_encoder_embedding(self, input, input_len, scope_name=None): ''' Args: input: 4D-tensor, (batch_size, max_wav_len, feat_dim, 1) input_len: 1D-tensor, (batch_size,), valid length for each item Returns: x: 3D-tensor, (batch_size, subsmp_len, attention_dim) pos_emb: 3D-tensor, (1, subsmp_len, attention_dim) mask_adder: 4D-tensor, (batch_size, 1, 1, subsmp_len) ''' with tf.compat.v1.variable_scope(scope_name, dtype=self.dtype, use_resource=True) as scope: subsmp_len = (( (self.config['maxlen_in'] - 3) // 2 + 1) - 3) // 2 + 1 mask = tf.sequence_mask(input_len, maxlen=subsmp_len) mask = tf.cast(mask, scope.dtype) mask = tf.reshape(mask, (mask.shape[0], 1, 1, mask.shape[1])) mask_adder = (1.0 - mask) * self.mask_value # subsampling conv1, channels_last conv1 = tf.compat.v1.layers.Conv2D( self.config['adim'], 3, 2, activation="relu", use_bias=True, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name='subsample/conv1') x = conv1(input) # subsampling conv2, channels_last conv2 = tf.compat.v1.layers.Conv2D( self.config['adim'], 3, 2, activation="relu", use_bias=True, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name='subsample/conv2') x = conv2(x) x = tf.reshape(x, [x.shape[0], x.shape[1], -1]) # embedding linear dense = tf.compat.v1.layers.Dense( units=self.config['adim'], use_bias=True, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name="subsample/emb_ff") x = dense(x) # scaling x = math.sqrt(self.config['adim']) * x # position embedding _, length, dmodel = x.shape.as_list() pos_emb = self._build_pos_embedding(length, dmodel, reverse=True) if self.training: if self.config['use_ipu_dropout']: wav_emb = ipu.rand_ops.dropout( x, rate=self.config['dropout_rate']) pos_emb = ipu.rand_ops.dropout( pos_emb, rate=self.config['dropout_rate']) else: wav_emb = tf.nn.dropout(x, rate=self.config['dropout_rate']) pos_emb = tf.nn.dropout(pos_emb, rate=self.config['dropout_rate']) else: wav_emb = x return wav_emb, pos_emb, mask_adder def _build_layer_norm(self, input, scope_name): ''' Args: input: 3D-tensor, (batch_size, length, attention_dim) scope_name: scope name Returns: x: layer normalized tensor, norm axis=-1 ''' with tf.compat.v1.variable_scope(scope_name, dtype=self.dtype, use_resource=True) as scope: x = ipu.normalization_ops.layer_norm(input, epsilon=1e-3, training=self.training, trainable=self.training, scope="norm") return x def _build_feed_forward(self, input, scale, scope_name): ''' Args: input: 3D-tensor, (batch_size, length, attention_dim) scope_name: scope name Returns: x: 3D-tensor, (batch_size, length, attention_dim) ''' with tf.compat.v1.variable_scope(scope_name, dtype=self.dtype, use_resource=True) as scope: # linear 1 dense_1 = tf.compat.v1.layers.Dense( units=self.config['eunits'], use_bias=True, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name="ff/dense_1") x = swish(dense_1(input)) if self.training: if self.config['use_ipu_dropout']: x = ipu.rand_ops.dropout(x, rate=self.config['dropout_rate']) else: x = tf.nn.dropout(x, rate=self.config['dropout_rate']) # linear 2 dense_2 = tf.compat.v1.layers.Dense( units=self.config['adim'], use_bias=True, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name="ff/dense_2") x = dense_2(x) if self.training: if self.config['use_ipu_dropout']: x = ipu.rand_ops.dropout(x, rate=self.config['dropout_rate']) else: x = tf.nn.dropout(x, rate=self.config['dropout_rate']) x = scale * x return x def _relative_shift(self, x): ''' Args: x: 4D-tensor, (batch_size, n_head, length_q, length_v) Returns: 4D-tensor, (batch_size, n_head, length_q, length_v) ''' x_shape = tf.shape(x) x = tf.pad(x, [[0, 0], [0, 0], [0, 0], [1, 0]]) x = tf.reshape(x, [x_shape[0], x_shape[1], x_shape[3] + 1, x_shape[2]]) x = tf.reshape(x[:, :, 1:, :], x_shape) return x def _build_self_attention(self, query, key, value, scope_name, mask_adder=None, pos_emb=None): ''' Args: query: 3D-tensor, (batch_size, length_q, attention_dim) key: 3D-tensor, (batch_size, length_v, attention_dim) value: 3D-tensor, (batch_size, length_v, attention_dim) scope_name: scope name mask_adder: 4D-tensor, (batch_size, 1, 1, length_v) or (batch_size, 1, length_q, length_v) pos_emb: 3D-tensor, (1, length_q, attention_dim) or None Returns: 3D-tensor, (batch_size, length_q, attention_dim) ''' assert self.config['adim'] % self.config['aheads'] == 0 head_size = self.config['adim'] // self.config['aheads'] q_shape = (query.shape[0], query.shape[1], self.config['aheads'], head_size) k_shape = (key.shape[0], key.shape[1], self.config['aheads'], head_size) v_shape = k_shape input_shape = query.shape with tf.compat.v1.variable_scope(scope_name, dtype=self.dtype, use_resource=True) as scope: # qkv q_dense = tf.compat.v1.layers.Dense( units=self.config['adim'], use_bias=True, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name="att/q_dense") k_dense = tf.compat.v1.layers.Dense( units=self.config['adim'], use_bias=True, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name="att/k_dense") v_dense = tf.compat.v1.layers.Dense( units=self.config['adim'], use_bias=True, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name="att/v_dense") # (batch_size, length, adim) query = q_dense(query) key = k_dense(key) value = v_dense(value) # (batch_size, n_head, length, head_size) query = tf.transpose(tf.reshape(query, q_shape), (0, 2, 1, 3)) key = tf.transpose(tf.reshape(key, k_shape), (0, 2, 1, 3)) value = tf.transpose(tf.reshape(value, v_shape), (0, 2, 1, 3)) # relative self-attention if pos_emb is not None: p_dense = tf.compat.v1.layers.Dense( units=self.config['adim'], use_bias=False, kernel_initializer=create_initializer( self.config['initializer_range'], dtype=self.dtype), kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name="att/p_dense") pos_bias_u = tf.compat.v1.get_variable( 'pos_bias_u', [self.config['aheads'], head_size], scope.dtype, initializer=tf.keras.initializers.glorot_uniform, trainable=self.training, regularizer=self.bias_regularizer) pos_bias_v = tf.compat.v1.get_variable( 'pos_bias_v', [self.config['aheads'], head_size], scope.dtype, initializer=tf.keras.initializers.glorot_uniform, trainable=self.training, regularizer=self.bias_regularizer) # (1, length_q, adim) pos = p_dense(pos_emb) # (1, length_q, n_head, head_size) pos = tf.reshape(pos, (1, -1, self.config['aheads'], head_size)) # (batch_size, length_q, n_head, head_size) query_with_u = tf.transpose(query, (0, 2, 1, 3)) + pos_bias_u query_with_v = tf.transpose(query, (0, 2, 1, 3)) + pos_bias_v # (batch_size, n_head, length_q, length_v) logits_with_u = tf.matmul( tf.transpose(query_with_u, (0, 2, 1, 3)), tf.transpose(key, (0, 1, 3, 2))) logits_with_v = tf.matmul( tf.transpose(query_with_v, (0, 2, 1, 3)), tf.transpose(pos, (0, 2, 3, 1))) logits_with_v = self._relative_shift(logits_with_v) logits = logits_with_u + logits_with_v else: logits = tf.matmul(query, tf.transpose(key, (0, 1, 3, 2))) # logits, (batch_size, n_head, length_q, length_v) logits = logits / math.sqrt(head_size) if mask_adder is not None: logits = tf.add(logits, mask_adder) scores = tf.nn.softmax(logits) # if mask_adder is not None: # zeros = tf.zeros_like(mask_adder) # scores = tf.multiply(scores, tf.where(mask_adder < 0, zeros, 1 - zeros)) if self.training: if self.config['use_ipu_dropout']: scores = ipu.rand_ops.dropout( scores, rate=self.config['attn_dropout_rate']) else: scores = tf.nn.dropout( scores, rate=self.config['attn_dropout_rate']) # (batch_size, n_head, length_q, length_v) * (batch_size, n_head, length_v, head_size) # | # (batch_size, n_head, length_q, head_size) qkv = tf.matmul(scores, value) # (batch_size, length_q, adim) qkv = tf.reshape(tf.transpose(qkv, (0, 2, 1, 3)), input_shape) # linear out o_dense = tf.compat.v1.layers.Dense( units=self.config['adim'], use_bias=True, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name="att/o_dense") qkv_o = o_dense(qkv) if self.training: if self.config['use_ipu_dropout']: qkv_o = ipu.rand_ops.dropout( qkv_o, rate=self.config['dropout_rate']) else: qkv_o = tf.nn.dropout(qkv_o, rate=self.config['dropout_rate']) return qkv_o def _build_conv_module(self, input, scope_name): ''' Args: input: 3D-tensor, (batch_size, length, attention_dim) scope_name: scope name Returns: 3D-tensor, (batch_size, length, attention_dim) ''' with tf.compat.v1.variable_scope(scope_name, dtype=self.dtype, use_resource=True) as scope: x = input # pointwise conv conv1 = tf.compat.v1.keras.layers.Conv1D( 2 * self.config['adim'], 1, 1, use_bias=True, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name='convolution/conv1', dtype=self.dtype) x = conv1(x) x = glu(x[:, :, :self.config['adim']], x[:, :, self.config['adim']:]) # tf 1.15 don't support DepthWiseConv1D x = tf.expand_dims(x, axis=1) conv2 = tf.compat.v1.keras.layers.DepthwiseConv2D( [1, self.config['kernel_size']], use_bias=True, padding='SAME', depthwise_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name='convolution/conv2', dtype=self.dtype) x = tf.squeeze(conv2(x), axis=1) # replace `batch_normalization` with `layer_normalization` x = ipu.normalization_ops.layer_norm(x, epsilon=1e-3, training=self.training, trainable=self.training, scope="norm") x = swish(x) # pointwise conv conv3 = tf.compat.v1.keras.layers.Conv1D( self.config['adim'], 1, 1, padding='VALID', kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name='convolution/conv3', dtype=self.dtype) x = conv3(x) if self.training: if self.config['use_ipu_dropout']: x = ipu.rand_ops.dropout(x, rate=self.config['dropout_rate']) else: x = tf.nn.dropout(x, rate=self.config['dropout_rate']) return x def _build_encoder_layer(self, x, mask_adder, pos_emb, prefix): ''' Args: x: 3D-tensor, (batch_size, length, attention_dim) mask_adder: 4D-tensor, (batch_size, 1, 1, length) pos_emb: 3D-tensor, (1, length, attention_dim) prefix: scope name prefix Returns: 3D-tensor, (batch_size, length, attention_dim) ''' scope_name = str(prefix) residual = x x = self._build_layer_norm(x, scope_name + '/norm_1') x = self._build_feed_forward(x, 0.5, scope_name + '/ff_1') x = x + residual residual = x x = self._build_layer_norm(x, scope_name + '/norm_2') x = self._build_self_attention(x, x, x, scope_name + '/self_att', mask_adder, pos_emb) x = x + residual residual = x x = self._build_layer_norm(x, scope_name + '/norm_3') x = self._build_conv_module(x, scope_name + '/conv_module') x = x + residual residual = x x = self._build_layer_norm(x, scope_name + '/norm_4') x = self._build_feed_forward(x, 0.5, scope_name + '/ff_2') x = x + residual x = self._build_layer_norm(x, scope_name + '/norm_5') return x def _build_encoder(self, x, pos_emb, mask_adder): ''' Args: x: 3D-tensor, (batch_size, length, attention_dim) pos_emb: 3D-tensor, (1, length, attention_dim) mask_adder: 4D-tensor, (batch_size, 1, 1, length) Returns: 3D-tensor, (batch_size, length, attention_dim) ''' for i in range(self.config['elayers']): x = self._build_encoder_layer(x, mask_adder, pos_emb, "encoder/encoder_" + str(i)) x = self._build_layer_norm(x, "encoder/norm") return x # decoder related def _build_decoder_embedding(self, input, seq_len, scope_name): ''' Args: input: 2D-tensor, (batch_size, maxlen_tgt), target word index seq_len: 1D-tensor, (batch_size,), valid target word length scope_name: scope name Returns: x: 3D-tensor, (batch_size, maxlen_tgt, attention_dim) loss_mask: 3D-tensor, (batch_size, maxlen_tgt-1, 1) att_mask: 4D-tensor, (1, 1, maxlen_tgt-1, maxlen_tgt-1), constant ''' with tf.compat.v1.variable_scope(scope_name, dtype=self.dtype, use_resource=True) as scope: loss_mask = tf.sequence_mask(seq_len, maxlen=self.config['maxlen_tgt'] - 1, dtype=scope.dtype) loss_mask = tf.expand_dims(loss_mask, axis=2) embedding = tf.compat.v1.get_variable( "embedding_table", [self.config['vocab_size'], self.config['adim']], scope.dtype, initializer=tf.initializers.random_uniform(minval=0, maxval=1.0, dtype=scope.dtype), trainable=self.training, regularizer=self.kernel_regularizer) # x = tf.nn.embedding_lookup(embedding, input) x = embedding_ops.embedding_lookup(embedding, input) # position embedding _, max_len, dmodel = x.shape.as_list() pos_emb = self._build_pos_embedding(max_len, dmodel, reverse=False) x = math.sqrt(self.config['adim']) * x + pos_emb if self.training: if self.config['use_ipu_dropout']: x = ipu.rand_ops.dropout(x, rate=self.config['dropout_rate']) else: x = tf.nn.dropout(x, rate=self.config['dropout_rate']) # subsequent_mask index = tf.range(1, self.config['maxlen_tgt'], 1, dtype=tf.int32) index = tf.reshape(index, (1, 1, -1)) att_mask = tf.sequence_mask(index, dtype=scope.dtype) att_mask = (1.0 - att_mask) * self.mask_value return x, loss_mask, att_mask def _build_decoder_layer(self, tgt, tgt_mask, mem, mem_mask, prefix): ''' Args: tgt: 3D-tensor, (batch_size, maxlen_tgt, attention_dim) tgt_mask: 4D-tensor, (batch_size, 1, maxlen_tgt, maxlen_tgt) mem: 3D-tensor, (batch_size, maxlen_wav, attention_dim) mem_mask: 4D-tensor, (batch_size, 1, 1, maxlen_wav) Returns: x: 3D-tensor, (batch_size, maxlen_tgt, attention_dim) ''' scope_name = str(prefix) x = tgt residual = x x = self._build_layer_norm(x, scope_name + '/norm_1') x = self._build_self_attention(x, x, x, scope_name + '/tgt_att', tgt_mask) x = x + residual residual = x x = self._build_layer_norm(x, scope_name + '/norm_2') x = self._build_self_attention(x, mem, mem, scope_name + '/mem_att', mem_mask) x = x + residual residual = x x = self._build_layer_norm(x, scope_name + '/norm_3') x = self._build_feed_forward(x, 1.0, scope_name + '/ff') x = x + residual return x def _build_classifier_output(self, input, scope_name): ''' Args: input: 3D-tensor, (batch_size, maxlen_tgt, attention_dim) scope_name: scope name Returns: 3D-tensor, (batch_size, maxlen_tgt, vocab_size) ''' with tf.compat.v1.variable_scope(scope_name, dtype=self.dtype, use_resource=True) as scope: dense = tf.compat.v1.layers.Dense( units=self.config['vocab_size'], use_bias=True, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, name="cls") x = dense(input) return x def _build_decoder(self, tgt, tgt_mask, mem, mem_mask): ''' Args: tgt: 3D-tensor, (batch_size, maxlen_tgt, attention_dim) tgt_mask: 4D-tensor, (batch_size, 1, 1, maxlen_tgt) mem: 3D-tensor, (batch_size, maxlen_wav, attention_dim) mem_mask: 4D-tensor, (batch_size, 1, 1, maxlen_wav) Returns: 3D-tensor, (batch_size, maxlen_tgt, vocab_size) ''' x = tgt for i in range(self.config['dlayers']): x = self._build_decoder_layer(x, tgt_mask, mem, mem_mask, "decoder/decoder_" + str(i)) x = self._build_layer_norm(x, "decoder/norm") x = self._build_classifier_output(x, "loss/kl_logits") return x def _build_kl_loss(self, logits, labels, mask): with tf.compat.v1.variable_scope("loss/kl_loss", use_resource=True): on_value = 1.0 - self.config['lsm_weight'] off_value = self.config['lsm_weight'] / ( self.config['vocab_size'] - 1) y_true = tf.one_hot(labels, self.config['vocab_size'], on_value=on_value, off_value=off_value) y_true = tf.cast(y_true, self.dtype) y_pred = tf.nn.log_softmax(logits) loss_pre = y_true * (tf.math.log(y_true) - y_pred) * mask loss = tf.reduce_sum(loss_pre) return loss def optimizer_function(self, lr, loss, kl_cls, tgt): optimizer_type = self.config['optimizer'].lower() loss = self.config['loss_scale'] * loss if optimizer_type == 'sgd': lr = lr / self.config['loss_scale'] optimizer = tf.compat.v1.train.GradientDescentOptimizer(lr) elif optimizer_type == 'sgdm': optimizer = tf.compat.v1.train.MomentumOptimizer(lr, 0.9) elif optimizer_type == 'adam': optimizer = tf.compat.v1.train.AdamOptimizer(lr, beta1=0.9, beta2=0.98, epsilon=1e-6) elif optimizer_type == 'adaml': optimizer = AdamLossScalingOptimizer(lr, self.config['loss_scale'], weights_dtype=tf.float32) else: raise ValueError(f"Optimizer {optimizer_type} not implemented.") if self.config['replica'] > 1: optimizer = ipu.optimizers.cross_replica_optimizer.CrossReplicaOptimizer( optimizer) return pipelining_ops.OptimizerFunctionOutput(optimizer, loss) def _build_embedding_stage(self, input, input_len): enc_emb, pos_emb, enc_mask = \ self._build_encoder_embedding(input, input_len, "encoder/embedding") return enc_emb, pos_emb, enc_mask def _build_encoder_stage(self, enc_emb, pos_emb, enc_mask): for i in range(self.tmp_start, self.tmp_end): enc_emb = self._build_encoder_layer(enc_emb, enc_mask, pos_emb, "encoder/encoder_" + str(i)) return enc_emb, pos_emb, enc_mask def _build_decoder_embedding_stage(self, tgt, tgt_len): dec_emb, loss_mask, dec_mask = \ self._build_decoder_embedding(tgt, tgt_len, "decoder/embedding") dec_emb = dec_emb[:, :-1, :] # the first tgt is `sos` return tgt, dec_emb, loss_mask, dec_mask def _build_decoder_stage(self, enc_emb, enc_mask, dec_emb, dec_mask): for i in range(self.tmp_start, self.tmp_end): dec_emb = self._build_decoder_layer(dec_emb, dec_mask, enc_emb, enc_mask, "decoder/decoder_" + str(i)) return enc_emb, enc_mask, dec_emb, dec_mask def _build_output_loss_stage(self, tgt, dec_emb, loss_mask): tgt_exclude_sos = tgt[:, 1:] kl_logits = self._build_layer_norm(dec_emb, "decoder/norm") kl_logits = self._build_classifier_output(kl_logits, "loss/kl_logits") kl_cls = tf.compat.v1.argmax(kl_logits, axis=-1, output_type=tf.int32) kl_loss = self._build_kl_loss(kl_logits, tgt_exclude_sos, loss_mask) loss = kl_loss / self.global_batch_size return loss, kl_cls, tgt_exclude_sos def _build_1st_stage(self, lr, input, input_len, tgt, tgt_len): enc_emb, pos_emb, enc_mask = self._build_embedding_stage( input, input_len) self.tmp_start, self.tmp_end = 0, 8 enc_emb, pos_emb, enc_mask = self._build_encoder_stage( enc_emb, pos_emb, enc_mask) return lr, enc_emb, pos_emb, enc_mask, tgt, tgt_len def _build_2nd_stage(self, lr, enc_emb, pos_emb, enc_mask, tgt, tgt_len): self.tmp_start, self.tmp_end = 8, 16 enc_emb, pos_emb, enc_mask = self._build_encoder_stage( enc_emb, pos_emb, enc_mask) tgt, dec_emb, loss_mask, dec_mask = self._build_decoder_embedding_stage( tgt, tgt_len) self.tmp_start, self.tmp_end = 0, 1 enc_emb, enc_mask, dec_emb, dec_mask = self._build_decoder_stage( enc_emb, enc_mask, dec_emb, dec_mask) loss, kl_cls, tgt_exclude_sos = self._build_output_loss_stage( tgt, dec_emb, loss_mask) return lr, loss, kl_cls, tgt_exclude_sos def _build_computational_stages(self): self.computational_stages.append(partial(self._build_1st_stage)) self.computational_stages.append(partial(self._build_2nd_stage)) self.device_mapping = [0, 1] def get_global_batch_size(self): self.global_batch_size = self.config[ 'gradient_accumulation_count'] * self.config[ 'replica'] * self.config['batch_size'] print('local batch size: {}, ga: {}, global batch size: {}'.format( self.config['batch_size'], self.config['gradient_accumulation_count'], self.global_batch_size)) def run_with_pipeline(self): self._build_dataset() self._build_computational_stages() self.get_global_batch_size() self.outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue() def train(lr, infeed, outfeed, gradient_accumulation_count): pipeline_op = pipelining_ops.pipeline( self.computational_stages, gradient_accumulation_count=gradient_accumulation_count, gradient_accumulation_dtype=self.dtype, inputs=[lr], infeed_queue=infeed, outfeed_queue=outfeed, device_mapping=self.device_mapping, optimizer_function=self.optimizer_function, offload_weight_update_variables=False) return pipeline_op def infer(lr, infeed, outfeed, gradient_accumulation_count): pipeline_op = pipelining_ops.pipeline( self.computational_stages, gradient_accumulation_count=gradient_accumulation_count, gradient_accumulation_dtype=self.dtype, inputs=[lr], infeed_queue=infeed, outfeed_queue=outfeed, device_mapping=self.device_mapping) return pipeline_op model = train if self.training else infer with tf.compat.v1.device("cpu"): lr = tf.compat.v1.placeholder(np.float32, []) pipeline_md = partial(model, lr=lr, infeed=self.infeed_queue, outfeed=self.outfeed_queue, gradient_accumulation_count=self. config['gradient_accumulation_count']) with ipu_scope('/device:IPU:0'): compiled = ipu_compiler.compile(pipeline_md, []) outfeed = self.outfeed_queue.dequeue() saver = tf.compat.v1.train.Saver() total_parameters = 0 variables = tf.compat.v1.trainable_variables() if not os.path.exists('logs'): os.mkdir('logs') with open('logs/' + self.config['logfile'], 'w') as fp: for var in variables: fp.write(str(var) + '\n') total_parameters += np.prod(var.shape) fp.write('\nTotal Parameters : ' + str(total_parameters) + '\n') # Create ipu_options # we assume one ipu for one stage here ipu_options = get_config(num_ipus=len(self.device_mapping) * self.config['replica']) ipu_options.configure_ipu_system() total_steps = self.data_loader.num_utts * self.config[ 'epochs'] // self.global_batch_size print('total_steps: ', total_steps) if self.config['wandb_name'] is not None: try: import wandb except: raise ImportError('wandb not installed') wandb.init(self.config['wandb_name']) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(self.infeed_queue.initializer) step_per_epoch = self.data_loader.num_utts // self.global_batch_size for epoch in range(1, self.config['epochs'] + 1): for step in range(1, step_per_epoch + 1): global_step = (epoch - 1) * step_per_epoch + step step_lr = self.get_lr(global_step) start = time.time() _ = sess.run(compiled, {lr: step_lr}) result = sess.run(outfeed) duration = time.time() - start if step % 10 == 0: tput = self.global_batch_size / duration print( 'epoch: {}/{}, global_step: {}/{}, loss: {}, through_put: {}' .format(epoch, self.config['epochs'], global_step, total_steps, np.mean(result[1]), tput)) kl_acc = self.get_kl_acc(result[2], result[3]) if self.config['wandb_name'] is not None: wandb.log({ "loss": np.mean(result[1]), 'acc': kl_acc, }) if self.config['save_checkpoint']: saver.save(sess, 'logs/model.ckpt', global_step=global_step) if self.config['freeze']: self.save_pb(sess, self.output_names)
def adversarial_train(generator, discriminator, generator_batcher, discriminator_batcher, summary_writer, sess_context_manager): print("Start adversarial training...") with sess_context_manager as sess: D_rewards = np.zeros((FLAGS.batch_size, FLAGS.max_dec_steps)) rouge_rewards = np.zeros((FLAGS.batch_size, 1)) while True: # Train generator for one step print("Start to train generator...") batch = generator_batcher.next_batch() batch.batch_reward = D_rewards batch.batch_rouge_reward = rouge_rewards t0 = time.time() result_train = generator.run_train_step(sess, batch) loss = result_train['loss'] summaries = result_train['summaries'] train_step = result_train['global_step'] summary_writer.add_summary(summaries, train_step) print("train step: %d train loss: %.3f time: %.3fs" % (train_step, loss, time.time() - t0)) rouge_rewards = [] target_token = batch.target_batch output_sample_token = np.transpose( np.squeeze(result_train['output_sample_token'])) output_argmax_token = np.transpose( np.squeeze(result_train['output_summary_token'])) rouge = Rouge() for target, sample, argmax in zip(target_token, output_sample_token, output_argmax_token): target_ = remove_eos(target) sample_ = remove_eos(sample) argmax_ = remove_eos(argmax) if len(argmax_) > 0: r_baseline = rouge.get_scores(argmax_, target_)[0]["rouge-l"]["f"] else: r_baseline = 0 if len(sample_) > 0: r_sample = rouge.get_scores(sample_, target_)[0]["rouge-l"]["f"] else: r_sample = 0 #print("r_baseline:", r_baseline) #print("r_sample:", r_sample) rouge_rewards.append(r_baseline - r_sample) rouge_rewards = np.reshape(rouge_rewards, [FLAGS.batch_size, 1]) print("RL reward for rouge-L: %.3f" % np.mean(rouge_rewards)) print("running rollout step...") t0 = time.time() result_rollout = generator.run_rollout_step(sess, batch) rollout_output = result_rollout[ 'rollout_token'] # shape [rollout_num, seqlen(this is number of roll), batch_size, seq_len] print("rollout step: %.3fs" % (time.time() - t0)) # calculate D_reward print("start to calculate D_rewards") feed_output_token = [] rollout_output = np.reshape(rollout_output, [-1, FLAGS.max_dec_steps]) for sent in rollout_output: index_list = np.where(sent == 3)[0] if len(index_list) != 0: ind = index_list[0] new_sent = np.concatenate([ sent[:ind + 1], np.ones(FLAGS.max_dec_steps - ind - 1) ]) feed_output_token.append(new_sent) else: feed_output_token.append(sent) feed_output_token = np.array(feed_output_token) feed_output_token[feed_output_token > FLAGS.vocab_size - 1] = 0 # update ypred_for_auc = [] for token in np.split(feed_output_token, FLAGS.rollout): feed = { discriminator.input_x: token, discriminator.dropout_keep_prob: 1.0 } ypred_auc = sess.run( discriminator.ypred_for_auc, feed ) # shape: [rollout_num * seqlen(this is number of roll) * batch_size, 2] ypred_for_auc.append(ypred_auc) ypred_for_auc = np.concatenate(ypred_for_auc) ypred = np.array([item[1] for item in ypred_for_auc]) ypred = np.reshape(ypred, [FLAGS.rollout, -1, FLAGS.batch_size]) rewards = np.transpose(np.sum(ypred, 0)) / ( 1.0 * FLAGS.rollout) # [batch_size, output_max_len// 20] if np.std(rewards) != 0.: rewards = (rewards - np.mean(rewards)) / np.std(rewards) D_rewards = np.zeros([FLAGS.batch_size, FLAGS.max_dec_steps]) for count, i in enumerate(range(1, FLAGS.max_dec_steps, 10)): D_rewards[:, i] = rewards[:, count] print("D_rewards:", D_rewards.shape) # Train discriminator print("Start to train discriminator...") for _ in tqdm(range(5)): batch = discriminator_batcher.next_batch() result = generator.run_summary_token_step(sess, batch) output_summary_token = result['output_summary_token'] output_summary_token = np.transpose( np.squeeze( output_summary_token)) # [batch_size, max_dec_steps] ground_truth = batch.target_batch # [batch_size, max_dec_steps] output_summary = [] for sent in output_summary_token: index_list = np.where(sent == 3)[0] if len(index_list) != 0: ind = index_list[0] new_sent = np.concatenate([ sent[:ind + 1], np.ones(FLAGS.max_dec_steps - ind - 1) ]) output_summary.append(new_sent) else: output_summary.append(sent) output_summary = np.array(output_summary) max_epoch = 3 dis_loader = Dataloader(FLAGS.batch_size, FLAGS.vocab_size) pos_train = [ground_truth[i] for i in range(len(ground_truth))] neg_train = [ output_summary[i] for i in range(len(output_summary)) ] for _ in range(max_epoch): dis_loader.load_data(pos_train, neg_train) dis_loader.reset_pointer() # train for 1 epoch for it in range(dis_loader.num_batch): x_batch, y_batch = dis_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: 0.5 } sess.run(discriminator.train_op, feed)
def pretrain_discriminator(discriminator, sess): print("Pretrain discriminator...") dis_train_loader = Dataloader(FLAGS.dis_batch_size, FLAGS.vocab_size) dis_val_loader = Dataloader(FLAGS.dis_batch_size, FLAGS.vocab_size) pretrain_dis_data = np.load(FLAGS.pretrain_dis_data_path) pos_summary = pretrain_dis_data['pos_summary_idx'] neg_summary = pretrain_dis_data['neg_summary_idx'] assert len(pos_summary) == len(neg_summary) train_max_epoch = 20 # max training epochs val_num = 1000 # number of validation samples pos_train = [] neg_train = [] pos_val = [] neg_val = [] val_select = random.sample(list(range(0, len(pos_summary))), val_num) for i in range(len(pos_summary)): if i in val_select: pos_val.append(pos_summary[i][:FLAGS.max_dec_steps]) neg_val.append(neg_summary[i][:FLAGS.max_dec_steps]) else: pos_train.append(pos_summary[i][:FLAGS.max_dec_steps]) neg_train.append(neg_summary[i][:FLAGS.max_dec_steps]) print("length train:", len(pos_train)) print("length val:", len(pos_val)) for epoch in tqdm(range(train_max_epoch)): # training process dis_train_loader.load_data(pos_train, neg_train) dis_train_loader.reset_pointer() for it in range(dis_train_loader.num_batch): x_batch, y_batch = dis_train_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: 0.5 } sess.run(discriminator.train_op, feed) # validation process dis_val_loader.load_data(pos_val, neg_val) dis_val_loader.reset_pointer() acc_list = [] for it in range(dis_val_loader.num_batch): x_batch, y_batch = dis_val_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: 1.0 } pred = sess.run(discriminator.predictions, feed) target = np.where( np.array(y_batch) == 1)[-1] # np.concatenate(y_batch) acc_list.append(accuracy_score(y_pred=pred, y_true=target)) eval_acc = np.mean(acc_list) print('pretrain epoch:{}, eval accuracy: {}'.format(epoch, eval_acc))
def pretrain_discriminator(discriminator, sess_context_manager): dis_train_data_loader = Dataloader(FLAGS.dis_batch_size, FLAGS.vocab_size) dis_test_data_loader = Dataloader(FLAGS.dis_batch_size, FLAGS.vocab_size) print("Pre-train Discriminator") pretrain_dis_data = np.load(FLAGS.pretrain_dis_data_path) pos_summary, neg_summary = pretrain_dis_data[ 'pos_summary_idx'], pretrain_dis_data['neg_summary_idx'] positive_train_summary = [] negative_train_summary = [] positive_eval_summary = [] negative_eval_summary = [] ############################################################################## ############# Prepare Train and Eval data ############################## for i in range(len(pos_summary)): if i < 143800: positive_train_summary.append(pos_summary[i][:FLAGS.max_dec_steps]) negative_train_summary.append(neg_summary[i][:FLAGS.max_dec_steps]) else: positive_eval_summary.append(pos_summary[i][:FLAGS.max_dec_steps]) negative_eval_summary.append(neg_summary[i][:FLAGS.max_dec_steps]) ############################################################################## ############# Training ############################################ train_max_epoch = 15 sess = sess_context_manager for epoch in tqdm(range(train_max_epoch)): dis_train_data_loader.load_data(positive_train_summary, negative_train_summary) dis_train_data_loader.reset_pointer() for it in range(dis_train_data_loader.num_batch): x_batch, y_batch = dis_train_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: 0.5 } sess.run(discriminator.train_op, feed) dis_test_data_loader.load_data(positive_eval_summary, negative_eval_summary) dis_test_data_loader.reset_pointer() acc_list = [] for it in range(dis_test_data_loader.num_batch): x_batch, y_batch = dis_test_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: 1.0 } pred = sess.run(discriminator.predictions, feed) target = np.where( np.array(y_batch) == 1)[-1] #np.concatenate(y_batch) acc_list.append(accuracy_score(y_pred=pred, y_true=target)) eval_acc = np.mean(acc_list) print('Pretrain epoch:{}, Eval accuracy: {}'.format(epoch, eval_acc))
def run_training(generator, discriminator, generator_batcher, discriminator_batcher, summary_writer, sess_context_manager): print( '#########################################################################' ) print('Start Adversarial Training...') with sess_context_manager as sess: D_rewards = np.zeros((FLAGS.batch_size, FLAGS.max_dec_steps)) rouge_rewards = np.zeros((FLAGS.batch_size, 1)) while True: # Train the generator for one step for it in range(1): batch = generator_batcher.next_batch() batch.batch_reward = D_rewards batch.batch_rouge_reward = rouge_rewards tf.logging.info('running training step...') t0 = time.time() result_train = generator.run_train_step(sess, batch) t1 = time.time() tf.logging.info('seconds for training step: %.3f', t1 - t0) loss = result_train['loss'] tf.logging.info('Generator train loss: %f', loss) # print the loss to screen summaries = result_train['summaries'] train_step = result_train['global_step'] summary_writer.add_summary(summaries, train_step) # write the summaries rg = Rouge() gtruth_token = batch.target_batch output_sample_token = np.transpose( np.squeeze(result_train['output_sample_token'])) output_argmax_token = np.transpose( np.squeeze(result_train['output_summary_token'])) def remove_eos(input_text): _input_text_eos = np.where(input_text == 3)[0] if len(_input_text_eos) != 0: cliped_text = input_text[:_input_text_eos[0]] else: cliped_text = input_text return ' '.join(map(str, cliped_text)) rouge_rewards = [] for gt, sample, argmax in zip(gtruth_token, output_sample_token, output_argmax_token): _gt = remove_eos(gt) _sample = remove_eos(sample) _argmax = remove_eos(argmax) r_baseline = rg.get_scores(_gt, _argmax)[0]['rouge-l']['f'] r_sample = rg.get_scores(_gt, _sample)[0]['rouge-l']['f'] rouge_rewards.append(r_baseline - r_sample) rouge_rewards = np.reshape(rouge_rewards, [FLAGS.batch_size, 1]) tf.logging.info('RL reward for rouge-L: %.3f', np.mean(rouge_rewards)) tf.logging.info('running rollout step...') t0 = time.time() result_rollout = generator.run_rollout_step(sess, batch) t1 = time.time() tf.logging.info('seconds for rollout step: %.3f', t1 - t0) # shape [rollout_num, seqlen(this is number of roll), batch_size, seq_len] rollout_output = result_rollout['rollout_token'] given_number_of_rollout = rollout_output.shape[1] # calculate D_reward print("start to calculate D_rewards") _feed_output_token = np.reshape(rollout_output, [-1, FLAGS.max_dec_steps]) feed_output_token = [] for sent in _feed_output_token: index_list = np.where(sent == 3)[0] if len(index_list) != 0: ind = index_list[0] new_sent = np.concatenate( [sent[:ind + 1], np.ones(100 - ind - 1)]) feed_output_token.append(new_sent) else: new_sent = np.array(sent, dtype=np.int32) feed_output_token.append(new_sent) feed_output_token = np.array(feed_output_token) feed_output_token = feed_output_token.reshape( (len(feed_output_token), -1)) print("feed_out_token.shape:", feed_output_token.shape) ''' clip_index = np.where(feed_output_token > FLAGS.vocab_size-1) index_x = clip_index[0] index_y = clip_index[1] for i in range(len(index_x)): feed_output_token[index_x[i]][index_y[i]] = 0 ''' if feed_output_token.shape[1] > 1: for i in range(len(feed_output_token)): clip_index = np.where( np.array(feed_output_token[i]) > FLAGS.vocab_size - 1) for idx in clip_index: feed_output_token[i][idx] = 0 # update ypred_for_auc = [] for feed_output_token_small in np.split( feed_output_token, FLAGS.rollout): feed = { discriminator.input_x: feed_output_token_small, discriminator.dropout_keep_prob: 1.0 } # ypred_for_auc: [rollout_num * seqlen(this is number of roll) * batch_size, 2] ypred_for_auc.append( sess.run(discriminator.ypred_for_auc, feed)) ypred_for_auc = np.concatenate(ypred_for_auc) ypred = np.array([item[1] for item in ypred_for_auc]) framed_yred = np.reshape(ypred, [ FLAGS.rollout, given_number_of_rollout, FLAGS.batch_size ]) rewards = np.transpose(np.sum(framed_yred, 0)) / ( 1.0 * FLAGS.rollout ) # [batch_size, output_max_len// 20] if np.std(rewards) != 0.: rewards = (rewards - np.mean(rewards)) / np.std(rewards) D_rewards = np.zeros( (FLAGS.batch_size, FLAGS.max_dec_steps)) print("rewards.shape:", rewards.shape) for count, i in enumerate( range(1, FLAGS.max_dec_steps, int(FLAGS.max_dec_steps / rewards.shape[1]))): D_rewards[:, i] = rewards[:, count] else: tmp = [] for i in range(len(feed_output_token)): tmp.append(feed_output_token[i][0]) feed_output_token = np.array(tmp).copy() print("feed-new:", feed_output_token.shape) print("Filter out!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") # Train the discriminator print("Start to train the Discriminator!") for _ in tqdm(range(5)): batch = discriminator_batcher.next_batch() res = generator.run_summary_token_step(sess, batch) _output_argmax_summary = res['output_summary_token'] _output_argmax_summary = np.transpose( np.squeeze( _output_argmax_summary)) # [batch_size, max_dec_steps] gtruth_data = batch.target_batch # [batch_size, max_dec_steps]; format: [[], [], ...] output_argmax_summary = [] for sent in _output_argmax_summary: index_list = np.where(sent == 3)[0] if len(index_list) != 0: ind = index_list[0] new_sent = np.concatenate([ sent[:ind + 1], np.ones(FLAGS.max_dec_steps - ind - 1) ]) output_argmax_summary.append(new_sent) else: output_argmax_summary.append(sent) output_argmax_summary = np.array(output_argmax_summary) positive_examples = [] negative_examples = [] for ele in gtruth_data: positive_examples.append(ele) for ele in output_argmax_summary: negative_examples.append(ele) dis_data_loader = Dataloader(FLAGS.batch_size, FLAGS.vocab_size) max_epoch = 3 for epoch in range(max_epoch): dis_data_loader.load_data(positive_examples, negative_examples) dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: 0.5 } _ = sess.run(discriminator.train_op, feed)
else: fold_dev_preds = torch.argmax(nnmodel(fold_dev_x), dim=1) kfoldpreds.append(fold_dev_preds) kfoldtrue.append(fold_dev_y) if gpu: dev_preds = [ id2label[x] for x in (torch.argmax(nnmodel(dev_data.cuda()), dim=1).detach().cpu().numpy()) ] else: dev_preds = [ id2label[x] for x in (torch.argmax(nnmodel(dev_data), dim=1).detach().numpy()) ] dataloader = Dataloader() dev_spans = dataloader.read_spans( file_name="./datasets/dev-task-TC-template.out") dev_spans["gold_label"] = dev_preds datawriter = Datawriter() datawriter.pred_writer(dev_spans, "./predictions/dev_preds.txt") if gpu: test_preds = [ id2label[x] for x in (torch.argmax(nnmodel(test_data.cuda()), dim=1).detach().cpu().numpy()) ]
help="training epoch of the process") args = parser.parse_args() # load default parameters cfg = json.load(open("./config/parameters.json", "r")) # adjust parameters by command line cfg = update_hyper(args, cfg) use_cuda = torch.cuda.is_available() and (not cfg['no_cuda']) best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch # DataLoader print('==> Preparing data..') trainloader, testloader = Dataloader(cfg['data_dir'], cfg['train_batch'], cfg['test_batch']) # Net config if cfg['resume']: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isdir( cfg['check_dir']), 'Error: no checkpoint directory found!' checkpoint = torch.load(cfg['check_dir'] + '/' + cfg['model'] + '.pth') net = checkpoint['net'] best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] else: print('==> Building model from scratch') net = Model.model(cfg['model'])
import time import ktrain def datetime_to_timestamp(t): timestamp = datetime.datetime.strptime(t.replace(' +0000', ''), '%a %b %d %H:%M:%S %Y') return time.mktime(timestamp.timetuple()) def load_model_sentiment(model_path): return ktrain.load_predictor(model_path) def sentiment(text_list, model, prob=False): if prob: return model.predict_proba(text_list) else: return model.predict(text_list) if __name__ == '__main__': file = 'baltimore_data' from data_loader import Dataloader dataloader = Dataloader('/root/tweets_dataset') dataset = dataloader.load_files(file, 100) import random samples = random.sample(dataset, 10) for item in samples: item['created_at'] = int(datetime_to_timestamp(item['created_at'])) print(item['created_at'])
sentiment=sentiment) else: temp_graph = copy.deepcopy(dygraph[previous_key]) dygraph[key] = graph_types[graph_type](graph_list, graph=temp_graph, sentiment=sentiment) previous_key = key else: for key, graph_list in dygraph.items(): dygraph[key] = graph_types[graph_type](graph_list, sentiment=sentiment) return dygraph if __name__ == '__main__': from data_loader import Dataloader file = 'baltimore_data' dataloader = Dataloader( 'E:/Network Science (99-3-30)/Tasks/Project 2 - Polarization/DataSets/baltimore' ) dataset = dataloader.load_files(file, 1000) print('size of dataset = %s' % len(dataset)) graphs = dynamic_graph(dataset, graph_type='reply', cumulative=False) # print('reply graph --> nodes = %s, edges = %s' % (len(graph.nodes), len(graph.edges))) count = 1 for key, graph in graphs.items(): print('%s. graph (%s) --> nodes = %s, edges = %s' % (count, key, len(graph.nodes), len(graph.edges))) count += 1 print(len(graphs))