Beispiel #1
0
def override_parameters(params, args):
    params.dev_params = args.dev_params
    params.model = args.model
    params.input = args.input or params.input
    params.output = args.output or params.output
    params.vocab = args.vocab or params.vocab
    params.validation = args.validation or params.validation
    params.references = args.references or params.references
    params.parse(args.parameters)

    vocab_dir = os.path.dirname(params.vocab[0])
    params.vocabulary = {
        "source":
        vocab.load_vocab(params.vocab[0], params),
        "target":
        vocab.load_vocab(params.vocab[1], params),
        "lf":
        vocab.load_vocab(os.path.join(vocab_dir, 'vocab-lf.txt'), params),
        "sketch":
        vocab.load_vocab(os.path.join(vocab_dir, 'vocab-sketch.txt'), params)
    }

    if params.use_pretrained_embedding:
        params.init_word_matrix = vocab.load_word_matrix(args.vocab[0])

    return params
def override_parameters(params, args):
    params.input = args.input or params.input
    params.output = args.output or params.output
    params.vocab = args.vocab
    params.parse(args.parameters)

    params.vocabulary = {
        "source": vocab.load_vocab(args.vocab[0]),
        "target": vocab.load_vocab(args.vocab[1])
    }

    return params
Beispiel #3
0
def override_parameters(params, args):
    params.model = args.model
    params.input = args.input or params.input
    params.output = args.output or params.output
    params.vocab = args.vocab or params.vocab
    params.validation = args.validation or params.validation
    params.references = args.references or params.references
    params.parse(args.parameters)

    params.vocabulary = {
        "source": vocab.load_vocab(params.vocab[0]),
        "target": vocab.load_vocab(params.vocab[1])
    }

    return params
Beispiel #4
0
def override_parameters(params, args):
    params.input = args.input or params.input
    params.output = args.output or params.output
    params.vocab = args.vocab
    if params.use_pretrained_embedding:
        params.embeddings = args.embeddings or params.embeddings
    params.parse(args.parameters)

    params.vocabulary = {
        "source": vocab.load_vocab(args.vocab[0]),
        "memory": vocab.load_vocab(args.vocab[1]),
        "target": vocab.load_vocab(args.vocab[2])
    }

    return params
Beispiel #5
0
def main(input_file, vocab):
    token2idx, idx2token = load_vocab(vocab)
    with open(input_file) as f:
        for line in f:
            line = line.strip().split()
            for token in line:
                if token not in token2idx.keys():
                    print(token)
                    sys.exit()
Beispiel #6
0
def main():
    from utils.vocab import load_vocab
    from utils.math_numpy import softmax

    token2idx, idx2token = load_vocab(
        '/Users/easton/Documents/vocab_3673+1.txt')
    vocab = token2idx.keys()
    table_value = homophone_table(vocab)
    # print(table_value, table_value.shape)
    target_distributions = softmax(table_value)

    m = target_distributions[10]
    print(m[m > 0])
    # import pdb; pdb.set_trace()
    print('asd')
Beispiel #7
0
def testRNADecode():
    import pickle
    from utils.vocab import load_vocab

    _, idx2token = load_vocab(
        '/Users/easton/Projects/eastonCode/examples/decode/vocab_3673+1.txt')
    with open('/Users/easton/Projects/eastonCode/examples/decode/distribution.txt', 'rb') as f,\
        open('/Users/easton/Projects/eastonCode/examples/decode/dev_rna_res.txt', 'w') as fw:
        while True:
            try:
                res, _ = rna_decode(pickle.load(f),
                                    beam_size=10,
                                    prune=0.0002,
                                    alpha=0.30,
                                    beta=5)
                # res, _ = ctc_decode(pickle.load(f), beam_size=1)
                res = ' '.join(idx2token[id] for id in res)
                print(res)
                fw.write(res + '\n')
            except EOFError:
                break
Beispiel #8
0
intent_train_set = IntentDataset(train_file)

# get initial samples
initial_samples = active_learning.get_balanced_sample_indices(
    intent_train_set.targets,
    num_classes=num_classes,
    n_per_digit=num_initial_samples // num_classes)

# build active learning data
active_learning_data = active_learning.ActiveLearningData(intent_train_set)

active_learning_data.acquire(initial_samples)

# build train generator
if model_type == 'bert':
    vocabulary = load_vocab()
    train_generator = data_generator_2.DataGenerator(
        active_learning_data.training_dataset,
        training_args.batch_size,
        data_args,
        vocabulary,
        intent_labels,
        shuffle=True)
    pool_generator = data_generator_2.DataGenerator(
        active_learning_data.pool_dataset, pool_batch_size, data_args,
        vocabulary, intent_labels)
    train_2.train_main(train_generator)

else:
    vocabulary, id2embeddings = load_vocab_w2v()
    train_generator = data_generator_2.DataGeneratorW2V(
Beispiel #9
0
        int(i) for i in args.bucket_boundaries.split(',')
    ]
else:
    args.list_bucket_boundaries = [
        i for i in range(args.size_bucket_start, args.size_bucket_end,
                         args.size_bucket_gap)
    ]

assert args.num_batch_tokens
args.list_batch_size = ([
    int(args.num_batch_tokens / boundary) * args.num_gpus
    for boundary in (args.list_bucket_boundaries)
] + [args.num_gpus])
logging.info('\nbucket_boundaries: {} \nbatch_size: {}'.format(
    args.list_bucket_boundaries, args.list_batch_size))

# vocab
args.token2idx, args.idx2token = load_vocab(args.dirs.vocab)
args.dim_output = len(args.token2idx)
if '<eos>' in args.token2idx.keys():
    args.eos_idx = args.token2idx['<eos>']
else:
    args.eos_idx = None

if '<sos>' in args.token2idx.keys():
    args.sos_idx = args.token2idx['<sos>']
elif '<blk>' in args.token2idx.keys():
    args.sos_idx = args.token2idx['<blk>']
else:
    args.sos_idx = None
Beispiel #10
0
    if result_dir is not None:
        dst_f1.close()
        dst_f2.close()
    return [result_save_path1, result_save_path2]


if __name__ == "__main__":
    args = load_s2ss_arguments()

    # Step 1: build vocab and load data
    print("Sharing vocabulary")
    if not os.path.isfile(args.vocab_file):
        build_vocab_from_file(args.train_data, args.vocab_file)
        print("Build vocabulary")
    vocab, vocab_size = load_vocab(args.vocab_file)
    print('Vocabulary size:%s' % vocab_size)

    vocab_rev = tf.contrib.lookup.index_to_string_table_from_file(
        args.vocab_file,  # target vocabulary file(each lines has a word)
        vocab_size=vocab_size - constants.NUM_OOV_BUCKETS,
        default_value=constants.UNKNOWN_TOKEN)

    with tf.device(
            "/cpu:0"):  # Input pipeline should always be place on the CPU.

        print("args.pseudo_data:", args.pseudo_data)

        if args.mode == "train":
            train_iterator = load_paired_dataset(args.pseudo_data,
                                                 vocab,
Beispiel #11
0
if args.dirs.lm_config:
    from utils.configReader import AttrDict
    import yaml
    args.args_lm = AttrDict(yaml.load(open(args.dirs.lm_config)))
    args.args_lm.dim_output = len(args.token2idx)
    args.args_lm.list_gpus = args.list_gpus
    from tfSeq2SeqModels.languageModel import LanguageModel
    args.Model_LM = LanguageModel

# vocab
logging.info('using vocab: {}'.format(args.dirs.vocab))

if args.dirs.vocab_pinyin:
    from utils.vocab import load_vocab
    logging.info('using pinyin vocab: {}'.format(args.dirs.vocab_pinyin))
    args.phone.token2idx, args.phone.idx2token = load_vocab(
        args.dirs.vocab_pinyin)
    args.phone.dim_output = len(args.phone.token2idx)
    args.phone.eos_idx = None
    args.phone.sos_idx = args.phone.token2idx['<blk>']


def read_tfdata_info(dir_tfdata):
    data_info = {}
    with open(dir_tfdata / 'tfdata.info') as f:
        for line in f:
            if 'dim_feature' in line or \
                'num_tokens' in line or \
                'size_dataset' in line:
                line = line.strip().split(' ')
                data_info[line[0]] = int(line[1])
Beispiel #12
0
def convert_data(train_PATH, valid_PATH, test_PATH, word_vocab_PATH,
                 position_vocab_PATH, tag_vocab_PATH, out_DIR):
    train_ys, train_codes, train_times = pkl.load(open(train_PATH, 'rb'))
    valid_ys, valid_codes, valid_times = pkl.load(open(valid_PATH, 'rb'))
    test_ys, test_codes, test_times = pkl.load(open(test_PATH, 'rb'))

    word_vocab = load_vocab(word_vocab_PATH)
    position_vocab_PATH = load_vocab(position_vocab_PATH)
    tag_vocab_PATH = load_vocab(tag_vocab_PATH, mode="tag")

    print("Vocabuary size for word, position and tags are {}, {}, {}".format(
        len(word_vocab), len(position_vocab_PATH), len(tag_vocab_PATH)))

    train_visits, train_labels, train_days = [], [], []
    valid_visits, valid_labels, valid_days = [], [], []
    test_visits, test_labels, test_days = [], [], []

    train_counter = {"case": 0, "control": 0}
    valid_counter = {"case": 0, "control": 0}
    test_counter = {"case": 0, "control": 0}

    # traverse each train patient
    for index, (y, patient_codes, patient_times) in enumerate(
            zip(train_ys, train_codes, train_times)):
        train_visits.append(make_visits(patient_codes, word_vocab))
        train_labels.append(y)
        key = "case" if y == 1 else "control"
        train_counter[key] += 1
        train_days.append(make_days(patient_times))

    # valid
    for index, (y, patient_codes, patient_times) in enumerate(
            zip(valid_ys, valid_codes, valid_times)):
        valid_visits.append(make_visits(patient_codes, word_vocab))
        valid_labels.append(y)
        key = "case" if y == 1 else "control"
        valid_counter[key] += 1
        valid_days.append(make_days(patient_times))

    # test
    for index, (y, patient_codes, patient_times) in enumerate(
            zip(test_ys, test_codes, test_times)):
        test_visits.append(make_visits(patient_codes, word_vocab))
        test_labels.append(y)
        key = "case" if y == 1 else "control"
        test_counter[key] += 1
        test_days.append(make_days(patient_times))

    pkl_train_visit_file = open(out_DIR + 'visits.train', 'wb')
    pkl_valid_visit_file = open(out_DIR + 'visits.valid', 'wb')
    pkl_test_visit_file = open(out_DIR + 'visits.test', 'wb')

    pkl_train_label_file = open(out_DIR + 'labels.train', 'wb')
    pkl_valid_label_file = open(out_DIR + 'labels.valid', 'wb')
    pkl_test_label_file = open(out_DIR + 'labels.test', 'wb')

    pkl_train_day_file = open(out_DIR + 'days.train', 'wb')
    pkl_valid_day_file = open(out_DIR + 'days.valid', 'wb')
    pkl_test_day_file = open(out_DIR + 'days.test', 'wb')

    print(
        'In total converted {}, {}, {} train, valid, and test samples'.format(
            len(train_labels), len(valid_labels), len(test_labels)))
    print(
        'In which the distributions of cases and controls for train, valid and test are\n\t{}\t{}\t{}\t{}\t{}\t{}'
        .format(train_counter['case'], train_counter['control'],
                valid_counter['case'], valid_counter['control'],
                test_counter['case'], test_counter['control']))

    pkl.dump(train_visits, pkl_train_visit_file, protocol=2)
    pkl.dump(valid_visits, pkl_valid_visit_file, protocol=2)
    pkl.dump(test_visits, pkl_test_visit_file, protocol=2)

    pkl.dump(train_labels, pkl_train_label_file, protocol=2)
    pkl.dump(valid_labels, pkl_valid_label_file, protocol=2)
    pkl.dump(test_labels, pkl_test_label_file, protocol=2)

    pkl.dump(train_days, pkl_train_day_file, protocol=2)
    pkl.dump(valid_days, pkl_valid_day_file, protocol=2)
    pkl.dump(test_days, pkl_test_day_file, protocol=2)
Beispiel #13
0
def train_main(p):

    in_file = Path(data_dir) / f'labeled_{p}.json' if isinstance(
        p, int) else Path(common_data_path) / 'intent_data' / p

    ###############################################
    # args
    ###############################################
    @dataclass
    class ModelArguments:
        model_path_or_name: str = field(default=str(bert_model_path))
        # model_path_or_name: str = field(default=str(roberta_model_path))
        # model_path_or_name: str = field(default=str(Path(data_dir)/'checkpoints'/'checkpoint-6000'))

    @dataclass
    class DataTrainingArguments:
        max_seq_length: int = field(default=200)

    parser = HfArgumentParser(
        (ModelArguments, DataTrainingArguments, TrainingArguments))
    model_args, data_args, training_args = parser.parse_args_into_dataclasses()

    global_step = 0

    ###############################################
    # distant debug
    ###############################################
    if training_args.server_ip and training_args.server_port:
        import ptvsd
        print('Waiting for debugger attach')
        ptvsd.enable_attach(address='')

    ###############################################
    # model
    ###############################################
    num_labels = len(intent_labels)
    config = AutoConfig.from_pretrained(
        pretrained_model_name_or_path=model_args.model_path_or_name,
        num_labels=num_labels)
    model = BertForSequenceClassification.from_pretrained(
        pretrained_model_name_or_path=model_args.model_path_or_name,
        config=config,
        num_labels=num_labels)

    ###############################################
    # data process
    ###############################################
    train = [(_['text'], _['label']) for _ in json.load(in_file.open())]
    dev = [(_['text'], _['label'])
           for _ in json.load((Path(common_data_path) / 'intent_data' /
                               'dev_data.json').open())]

    vocabulary = load_vocab()
    # vocabulary = load_vocab(vocab_file=(Path(roberta_model_path) / 'vocab.txt'))

    train_loader = DataGenerator(train,
                                 training_args,
                                 data_args,
                                 vocabulary,
                                 intent_labels,
                                 shuffle=True)
    dev_loader = DataGenerator(dev, training_args, data_args, vocabulary,
                               intent_labels)

    ###############################################
    # optimizer
    ###############################################
    def get_optimizer(num_training_steps):
        no_decay = ['bias', 'LayerNorm.weight']
        optimize_group_params = [{
            'params': [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            training_args.weight_decay
        }, {
            'params': [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0
        }]

        optimizer = AdamW(optimize_group_params,
                          lr=training_args.learning_rate,
                          weight_decay=training_args.weight_decay)

        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=training_args.warmup_steps,
            num_training_steps=num_training_steps)

        return optimizer, scheduler

    optimizer, scheduler = get_optimizer(num_training_steps=len(train_loader) *
                                         training_args.epoch_num /
                                         training_args.batch_size)

    ###############################################
    # continue training from checkpoints
    ###############################################
    if ('checkpoint' in model_args.model_path_or_name and os.path.isfile(
            os.path.join(model_args.model_path_or_name, 'optimizer.pt'))
            and os.path.isfile(
                os.path.join(model_args.model_path_or_name, 'scheduler.pt'))):
        optimizer.load_state_dict(
            torch.load(
                os.path.join(model_args.model_path_or_name, "optimizer.pt"),
                map_location='cuda' if torch.cuda.is_available() else 'cpu'))
        scheduler.load_state_dict(
            torch.load(
                os.path.join(model_args.model_path_or_name, "scheduler.pt"),
                map_location='cuda' if torch.cuda.is_available() else 'cpu'))

    epoch_trained = 0
    step_trained_cur_epoch = 0
    if 'checkpoint' in model_args.model_path_or_name:
        global_step = int(
            str(Path(
                model_args.model_path_or_name)).split('-')[-1].split('/')[0])
        epoch_trained = global_step // (
            train_loader.steps // training_args.gradient_accumulation_steps)
        step_trained_cur_epoch = global_step % (
            train_loader.steps // training_args.gradient_accumulation_steps)

        logger.info(
            ' Continuing Training from checkpoint, will skip to saved global_step'
        )
        logger.info(f' Continuing Training from epoch {epoch_trained}')
        logger.info(f' Continuing Training from global step {global_step}')
        logger.info(
            f' Will skip the first {step_trained_cur_epoch} steps in the first epoch'
        )

    ###############################################
    # tensorboard
    ###############################################
    tb_writer = SummaryWriter(log_dir=Path(data_dir) / 'logs')

    def tb_log(logs):
        for k_, v_ in logs.items():
            tb_writer.add_scalar(k_, v_, global_step)

    tb_writer.add_text('args', training_args.to_json_string())
    tb_writer.add_hparams(training_args.to_sanitized_dict(), metric_dict={})

    ###############################################
    # save
    ###############################################
    def save_model(output_dir, model):
        os.makedirs(output_dir, exist_ok=True)
        logger.info(f'Saving model checkpoint to {output_dir}')

        model_to_save = model.module if hasattr(model, 'module') else model

        model_to_save.config.architectures = [
            model_to_save.__class__.__name__
        ]  # architectures是什么

        output_model_file = os.path.join(output_dir, 'pytorch.bin')
        torch.save(model_to_save.state_dict(), output_model_file)
        logger.info(f'Model weights saved in {output_model_file}')

        output_config_file = os.path.join(output_dir, 'config.json')
        model_to_save.config.to_json_file(output_config_file)
        logger.info(f'Configuration saved in {output_config_file}')

        torch.save(training_args, os.path.join(output_dir,
                                               'training_args.bin'))

    def sorted_checkpoints(checkpoint_prefix="checkpoint", use_mtime=False):
        ordering_and_checkpoint_path = []

        glob_checkpoints = [
            str(x) for x in Path(training_args.output_dir).glob(
                f"{checkpoint_prefix}-*")
        ]

        for path in glob_checkpoints:
            if use_mtime:
                ordering_and_checkpoint_path.append(
                    (os.path.getmtime(path), path))
            else:
                regex_match = re.match(f".*{checkpoint_prefix}-([0-9]+)", path)
                if regex_match and regex_match.groups():
                    ordering_and_checkpoint_path.append(
                        (int(regex_match.groups()[0]), path))

        checkpoints_sorted = sorted(ordering_and_checkpoint_path)
        checkpoints_sorted = [
            checkpoint[1] for checkpoint in checkpoints_sorted
        ]
        return checkpoints_sorted

    def rotate_checkpoints(use_mtime=False) -> None:
        if training_args.save_total_limit is None or training_args.save_total_limit <= 0:
            return

        # Check if we should delete older checkpoint(s)
        checkpoints_sorted = sorted_checkpoints(use_mtime=use_mtime)
        if len(checkpoints_sorted) <= training_args.save_total_limit:
            return

        number_of_checkpoints_to_delete = max(
            0,
            len(checkpoints_sorted) - training_args.save_total_limit)
        checkpoints_to_be_deleted = checkpoints_sorted[:
                                                       number_of_checkpoints_to_delete]
        for checkpoint in checkpoints_to_be_deleted:
            logger.info(
                "Deleting older checkpoint [{}] due to args.save_total_limit".
                format(checkpoint))
            shutil.rmtree(checkpoint)

    ###############################################
    # train
    ###############################################
    model.to(training_args.device)
    if training_args.n_gpu > 1:
        model = nn.DataParallel(model)

    best_acc = 0
    best_epoch = 0
    model.zero_grad()
    for e in range(epoch_trained, training_args.epoch_num):
        # for e in range(1):  # debug
        model.train()
        t_loss = 0
        logging_loss = 0
        for step, batch in enumerate(train_loader):

            # if step > 0: break  # debug

            if step_trained_cur_epoch > 0:
                step_trained_cur_epoch -= 1
                continue

            raw_text = batch[-1]
            batch = [_.to(training_args.device) for _ in batch[:-1]]
            X_ids, Y_ids, Mask = batch
            if step < 5: logger.info(f'batch_size: {X_ids.size()}')
            loss, logits = model(X_ids, Y_ids, Mask)

            if training_args.n_gpu > 1:
                loss = loss.mean()
            loss.backward()
            t_loss += loss.item()

            if training_args.gradient_accumulation_steps > 1:
                loss = loss / training_args.gradient_accumulation_steps

            if ((step + 1) % training_args.gradient_accumulation_steps == 0
                    or (train_loader.steps <=
                        training_args.gradient_accumulation_steps)
                    and step + 1 == train_loader.steps):
                torch.nn.utils.clip_grad_norm_(
                    model.parameters(),
                    max_norm=training_args.max_gradient_norm)
                optimizer.step()
                scheduler.step()
                model.zero_grad()

                global_step += 1
                epoch = e + (step + 1) / train_loader.steps

                if global_step % training_args.logging_steps == 0:
                    train_logs = {
                        'loss':
                        (t_loss - logging_loss) / training_args.logging_steps,
                        'learning_rate': scheduler.get_lr()[0],
                        'epoch': epoch
                    }

                    logging_loss = t_loss
                    tb_log(train_logs)

                    logger.info(
                        f'epoch: {e} - batch: {step}/{train_loader.steps} - loss: {t_loss / (step + 1): 6f}'
                    )

                # if global_step % training_args.saving_steps == 0:
                #     output_dir = os.path.join(training_args.output_dir, f'checkpoint-{global_step}')
                #
                #     save_model(output_dir, model)
                #     rotate_checkpoints()
                #
                #     torch.save(optimizer.state_dict(), Path(output_dir)/'optimizer.pt')
                #     torch.save(scheduler.state_dict(), Path(output_dir)/'scheduler.pt')
                #     logger.info(f'Saving optimizer and scheduler states to {output_dir}')

        model.eval()
        dev_acc = 0
        eval_loss = 0
        err = []
        cat = defaultdict(lambda: 1e-10)
        for k, batch in enumerate(dev_loader):

            # if k > 0: break  # debug

            raw_text = batch[-1]
            batch = [_.to(training_args.device) for _ in batch[:-1]]
            X_ids, Y_ids, Mask = batch
            with torch.no_grad():
                loss, logits = model(X_ids, Y_ids, Mask)
                if training_args.n_gpu > 1:
                    loss = loss.mean()
                eval_loss += loss.item()

            for logit, y_id, t in zip(logits, Y_ids, raw_text):
                logit = logit.detach().cpu().numpy()
                true_label = y_id.detach().cpu().numpy()

                pred_label = np.argmax(logit)

                # metric 1
                if true_label == pred_label:
                    dev_acc += 1
                else:
                    score = max(logit)
                    err.append({
                        'text': t,
                        'pred': intent_labels[pred_label],
                        'true': intent_labels[true_label],
                        'score': f'{score: .4f}'
                    })
                # metric 2
                cat[f'{intent_labels[true_label]}_A'] += int(
                    pred_label == true_label)
                cat[f'{intent_labels[true_label]}_B'] += 1
                cat[f'{intent_labels[pred_label]}_C'] += 1
        acc = dev_acc / len(dev_loader)

        eval_logs = {
            'eval_acc': acc,
            'eval_loss': eval_loss / dev_loader.steps,
        }
        tb_log(eval_logs)

        if acc > best_acc:
            # if acc >= best_acc:  # debug
            best_acc = acc
            best_epoch = e

            # save #
            model_to_save = model.module if hasattr(model, 'module') else model
            torch.save(model_to_save.state_dict(),
                       Path(data_dir) / f'cls_model_{p}.pt')

            # save #
            json.dump(err, (Path(data_dir) / 'err.json').open('w'),
                      ensure_ascii=False,
                      indent=4)

        logger.info(
            f'epoch: {e} - dev_acc: {acc:.5f} {dev_acc}/{len(dev_loader)} - best_score: {best_acc:.5f} - best_epoch: {best_epoch} '
        )
        for t in intent_labels:
            logger.info(
                f'cat: {t} - '
                f'precision: {cat[t + "_A"] / cat[t + "_C"]:.5f} - '
                f'recall: {cat[t + "_A"] / cat[t + "_B"]:.5f} - '
                f'f1: {2 * cat[t + "_A"] / (cat[t + "_B"] + cat[t + "_C"]):.5f}'
            )

    tb_writer.close()
Beispiel #14
0
def infer_main(p):
    ###############################################
    # args
    ###############################################
    @dataclass
    class ModelArguments:
        model_path_or_name: str = field(default=str(bert_model_path))
        # model_path_or_name: str = field(default=str(roberta_model_path))
        # model_path_or_name: str = field(default=str(Path(data_dir)/'checkpoints'/'checkpoint-6000'))

    @dataclass
    class DataTrainingArguments:
        max_seq_length: int = field(default=200)

    parser = HfArgumentParser(
        (ModelArguments, DataTrainingArguments, TrainingArguments))
    model_args, data_args, training_args = parser.parse_args_into_dataclasses()

    ###############################################
    # model
    ###############################################
    num_labels = len(intent_labels)
    config = AutoConfig.from_pretrained(
        pretrained_model_name_or_path=model_args.model_path_or_name,
        num_labels=num_labels)
    model = BertForSequenceClassification(config, num_labels)
    model.load_state_dict(
        torch.load(
            Path(data_dir) / f'cls_model_{100-p}.pt',
            map_location='cuda' if torch.cuda.is_available() else "cpu"))

    model.to(training_args.device)

    ###############################################
    # data process
    ###############################################
    try:
        d_80 = [
            (_['text'], _['label'])
            for _ in json.load((Path(data_dir) / f'unlabeled_{p}.json').open())
        ]
    except:

        d_80 = [
            (_['text'], _['true_label'])
            for _ in json.load((Path(data_dir) / f'unlabeled_{p}.json').open())
        ]
    vocabulary = load_vocab()
    # vocabulary = load_vocab(vocab_file=(Path(roberta_model_path) / 'vocab.txt'))

    d_80_loader = DataGenerator(d_80, training_args, data_args, vocabulary,
                                intent_labels)

    ###############################################
    # train
    ###############################################
    d_80_score = []
    for k, batch in enumerate(d_80_loader):

        # if k > 0: break  # debug

        raw_text = batch[-1]
        batch = [_.to(training_args.device) for _ in batch[:-1]]
        X_ids, Y_ids, Mask = batch
        with torch.no_grad():
            _, logits = model(X_ids, Y_ids, Mask)

        for logit, y_id, t in zip(logits, Y_ids, raw_text):
            logit = logit.detach().cpu().numpy()
            true_label = y_id.detach().cpu().numpy()

            pred_label = np.argmax(logit)
            score = max(logit)

            d_80_score.append({
                'text': t,
                'true_label': intent_labels[true_label],
                'pred_label': intent_labels[pred_label],
                'score': f'{score:.4f}',
            })
    json.dump(d_80_score, (Path(data_dir) / f'unlabeled_{p}.json').open('w'),
              ensure_ascii=False,
              indent=2)
    print('Done')
Beispiel #15
0
def convert_data(train_PATH,
                 test_PATH,
                 word_vocab_PATH,
                 position_vocab_PATH,
                 tag_vocab_PATH,
                 out_DIR,
                 flag='notime'):
    train_ys, train_codes, train_times = pkl.load(open(train_PATH, 'rb'))
    test_ys, test_codes, test_times = pkl.load(open(test_PATH, 'rb'))

    word_vocab = load_vocab(word_vocab_PATH)
    position_vocab = load_vocab(position_vocab_PATH)
    tag_vocab = load_vocab(tag_vocab_PATH, mode="tag")

    print("Vocabuary size for word, position and tags are {}, {}, {}".format(
        len(word_vocab), len(position_vocab), len(tag_vocab)))

    feature_template = make_feature_template(word_vocab, position_vocab, flag)
    print(f"the feature number for lr is {len(feature_template)}")

    train_visits, train_labels, train_days = [], [], []
    test_visits, test_labels, test_days = [], [], []

    train_X, valid_X, test_X = [], [], []
    train_Y, valid_Y, test_Y = [], [], []

    # traverse each train patient
    for index, (y, patient_codes, patient_times) in enumerate(
            zip(train_ys, train_codes, train_times)):
        x = make_patient(patient_codes, patient_times, feature_template, flag)
        train_X.append(x)
        train_Y.append(y)
    for index, (y, patient_codes, patient_times) in enumerate(
            zip(test_ys, test_codes, test_times)):
        x = make_patient(patient_codes, patient_times, feature_template, flag)
        test_X.append(x)
        test_Y.append(y)

    train_X = csr_matrix(np.asarray(train_X))
    test_X = csr_matrix(np.asarray(test_X))
    '''
        train_visits.append(make_visits(patient_codes, word_vocab))
        train_labels.append(y)
        key = "case" if y == 1 else "control"
        train_counter[key] += 1
        train_days.append(make_days(patient_times))

    # valid    
    for index, (y, patient_codes, patient_times) in enumerate(zip(valid_ys, valid_codes, valid_times)):
        valid_visits.append(make_visits(patient_codes, word_vocab))
        valid_labels.append(y)
        key = "case" if y == 1 else "control"
        valid_counter[key] += 1
        valid_days.append(make_days(patient_times))

    # test
    for index, (y, patient_codes, patient_times) in enumerate(zip(test_ys, test_codes, test_times)):
        test_visits.append(make_visits(patient_codes, word_vocab))
        test_labels.append(y)
        key = "case" if y == 1 else "control"
        test_counter[key] += 1
        test_days.append(make_days(patient_times))
    '''
    pkl.dump(train_X, open(out_DIR + 'lr_time_day.trainX', 'wb'))
    pkl.dump(test_X, open(out_DIR + 'lr_time_day.testX', 'wb'))
    #pkl.dump(train_X, open(out_DIR + 'lr_notime.trainX', 'wb'))
    #pkl.dump(test_X, open(out_DIR + 'lr_notime.testX', 'wb'))
    pkl.dump(np.asarray(train_Y), open(out_DIR + 'lr.trainY', 'wb'))
    pkl.dump(np.asarray(test_Y), open(out_DIR + 'lr.testY', 'wb'))
    print("output data finished")
def main():
    # === Load arguments
    args = load_cycle_arguments()
    if args.task_suffix == 'beta':
        final_model_save_path = args.final_model_save_dir + '-beta=' + str(args.beta) + '/'
        final_tsf_result_dir = args.final_tsf_result_dir + '-beta=' + str(args.beta)
    else:
        final_model_save_path = args.final_model_save_dir + '-' + args.task_suffix + '/'
        final_tsf_result_dir = args.final_tsf_result_dir + '-' + args.task_suffix
    dump_args_to_yaml(args, final_model_save_path) #把参数记录到文件中
    print(args)
    s2ss_args = load_args_from_yaml(args.s2ss_model_save_dir)
    s2ss_args.RL_learning_rate = args.RL_learning_rate  # a smaller learning_rate for RL
    s2ss_args.MLE_learning_rate = args.MLE_learning_rate  # a smaller learning_rate for MLE
    s2ss_args.batch_size = args.batch_size  # a bigger batch_size for RL
    min_seq_len = args.min_seq_len
    max_seq_len = args.max_seq_len


    # === Load global vocab 词到索引的映射表
    vocab, vocab_size = load_vocab(args.vocab_file)
    print("Vocabulary size: %s" % vocab_size)
    vocab_rev = tf.contrib.lookup.index_to_string_table_from_file(
        args.vocab_file,  # target vocabulary file(each lines has a word)
        vocab_size=vocab_size - constants.NUM_OOV_BUCKETS,
        default_value=constants.UNKNOWN_TOKEN)


    # === Load evaluator
    bleu_evaluator = BLEUEvaluator()


    # === Create session
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    #tf_config.gpu_options.per_process_gpu_memory_fraction = 0.4
    sess = tf.Session(config=tf_config)  # limit gpu memory; don't pre-allocate memory; allocate as-needed


    # === Load dataset
    with tf.device("/cpu:0"):  # Input pipeline should always be place on the CPU.

        #Acquire samples from Ps
        raml_train_data = args.raml_train_data + '-tau=' + str(args.tau) + '.txt'
        if not os.path.exists(raml_train_data):
            raml_data = sampling(args.train_template_data, args.vocab_file, raml_train_data, args.sample_size, args.tau)
        else:
            raml_data = load_raml_data(raml_train_data)
        print('pre-sampling complete, len(raml_data) = ', len(raml_data))

        train_data_iterator = load_paired_dataset(raml_train_data, vocab, batch_size=args.batch_size,
                                                  min_seq_len=min_seq_len, max_seq_len=max_seq_len)
        test_data_iterator = load_dataset(args.test_data, vocab, mode=constants.TEST, batch_size=100,
                                          min_seq_len=min_seq_len, max_seq_len=max_seq_len)
        #paired_train_data_iterator = load_paired_dataset(args.pseudo_data, vocab, batch_size=args.batch_size,
                                                         #min_seq_len=min_seq_len, max_seq_len=max_seq_len)

        train_data_next = train_data_iterator.get_next()  # to avoid high number of `Iterator.get_next()` calls
        test_data_next = test_data_iterator.get_next()
        #paired_train_data_next = paired_train_data_iterator.get_next()


    # === Initialize and build Seq2SentiSeq model 创建了三种模型 分别用于训练/测试_greedy/测试_random
    load_model = False if args.no_pretrain else True
    s2ss_train = s2ss_create_model(sess, s2ss_args, constants.TRAIN, vocab_size, load_pretrained_model=load_model) #load预训练模型

    #decode_type_before = s2ss_args.decode_type
    s2ss_args.decode_type = constants.GREEDY #不同的decode_type 对应不同的infer
    s2ss_greedy_infer = s2ss_create_model(sess, s2ss_args, constants.INFER, vocab_size, reuse=True) # infer reuse表示
    #s2ss_args.decode_type = constants.RANDOM
    #s2ss_random_infer = s2ss_create_model(sess, s2ss_args, constants.INFER, vocab_size, reuse=True)
    #s2ss_args.decode_type = decode_type_before

    if args.task_suffix == 'beta':
        disc_model_save_dir = args.disc_model_save_dir + '-beta=' + str(args.beta) + '/'
    else:
        disc_model_save_dir = args.disc_model_save_dir + '-' + args.task_suffix + '/'
    if args.disc_pretrain and args.task_suffix != 'wo-RAT' and args.task_suffix != 'wo-disc':
        # === Pre-train discriminator
        src_fs = get_src_samples(args.first_sample_num, args.train_template_data, args.disc_data_dir)
        first_sample_test_data_iterator = load_dataset(src_fs[0], vocab, mode=constants.TEST, batch_size=100,
                                                 min_seq_len=min_seq_len, max_seq_len=max_seq_len)
        first_sample_test_data_next = first_sample_test_data_iterator.get_next()

        dst_fs = inference(s2ss_greedy_infer, sess=sess, args=s2ss_args, decoder_s=constants.SENT_LIST,
                           src_test_iterator=first_sample_test_data_iterator, src_test_next=first_sample_test_data_next,
                           vocab_rev=vocab_rev, result_dir=args.disc_data_dir)
        
        get_disc_train_data(src_fs[1], dst_fs[0], args.disc_data_dir)
        
        if not os.path.exists(disc_model_save_dir):
            os.makedirs(disc_model_save_dir)
        _ = disc_model_bert.bert_main('train', args.disc_data_dir, disc_model_save_dir, args.disc_pretrain_n_epoch, args.disc_batch_size)
    

    # === Start adversarial training
    for adv_iter in range(args.adv_iters): 

        if args.task_suffix != 'wo-RAT':
            # calculate bleu reward
            raml_bleu_data = args.raml_bleu_data + '-tau=' + str(args.tau) + '.txt'
            if not os.path.exists(raml_bleu_data):
                # calculate bleu reward
                cont_reward = get_sentence_bleu(raml_train_data)
                cont_reward = sigmoid(cont_reward, x_trans=0.3, x_scale=8)
                #print(cont_reward[:10])
                #print(np.min(cont_reward), np.max(cont_reward), np.median(cont_reward), np.mean(cont_reward))
                # to do: write bleu data
                with open(raml_bleu_data, 'w') as f:
                    cont_reward_str = [str(r) for r in cont_reward]
                    f.write('\n'.join(cont_reward_str))
            else:
                #Load bleu data
                with open(raml_bleu_data) as f:
                    cont_reward = np.array([float(line.strip()) for line in f])
                print(np.min(cont_reward), np.max(cont_reward), np.median(cont_reward), np.mean(cont_reward))
            print(len(cont_reward))
            cont_reward += safe_divide_constant

            if args.task_suffix == 'wo-disc':
                reward = cont_reward
                print('length of reward :%d' % len(reward))
            else:
                # calculate senti reward
                sample_size = 1
                if not os.path.exists(args.disc_data_dir):
                    os.makedirs(args.disc_data_dir)
                get_disc_infer_data(args.disc_data_dir, raml_data, sample_size)
                senti_rewards = disc_model_bert.bert_main('predict', args.disc_data_dir, disc_model_save_dir, predict_batch_size=args.disc_pred_batch_size)
                senti_reward_per_sample = np.array(senti_rewards).reshape((-1,2*sample_size)).sum(axis=1)
                senti_reward = sigmoid(senti_reward_per_sample, x_trans=0.5, x_scale=8)
                print(np.min(senti_reward_per_sample), np.max(senti_reward_per_sample), np.median(senti_reward_per_sample), np.mean(senti_reward_per_sample))
                print(np.min(senti_reward), np.max(senti_reward), np.median(senti_reward), np.mean(senti_reward))

                assert len(cont_reward) == len(senti_reward)

                senti_reward += safe_divide_constant

                beta = args.beta #trade-off between two rewards
                reward = (1 + beta * beta) * senti_reward * cont_reward / (beta * beta * senti_reward + cont_reward)
                print('length of reward :%d' % len(reward))

            '''
            # calculate fluency reward
            lm_scores = []
            ppl_scores = []
            for idx,data in enumerate(raml_data):
                sent = data['src']
                lm_scores.append(model.score(' '.join(sent), bos=True, eos=True))
                ppl_scores.append(model.perplexity(' '.join(sent)))
                #if idx<5:
                    #print(sent,' '.join(sent),lm_scores[idx])
            fluency_reward = sigmoid(np.array(lm_scores), x_trans=-21, x_scale=0.4)
            #print(np.min(np.array(lm_scores)), np.max(np.array(lm_scores)), np.median(np.array(lm_scores)), np.mean(np.array(lm_scores)))
            #print(fluency_reward[:10])
            print(np.min(np.array(ppl_scores)), np.max(np.array(ppl_scores)), np.median(np.array(ppl_scores)), np.mean(np.array(ppl_scores)))
            print(np.min(np.array(lm_scores)), np.max(np.array(lm_scores)), np.median(np.array(lm_scores)), np.mean(np.array(lm_scores)))
            print(np.min(fluency_reward), np.max(fluency_reward), np.median(fluency_reward), np.mean(fluency_reward))
            #print(len(fluency_reward))
            #break
            '''
            #assert len(cont_reward) == len(senti_reward) == len(fluency_reward)

            #fluency_reward += safe_divide_constant

            '''
            #reward = (senti_reward + cont_reward + fluency_reward) / 3
            w1 = w2 = w3 = 1
            reward = (w1 + w2 + w3) * cont_reward * senti_reward * fluency_reward / (w1 * senti_reward * fluency_reward + w2 * cont_reward * fluency_reward + w3 * cont_reward * senti_reward)
            print(np.min(reward), np.max(reward), np.median(reward), np.mean(reward))

            print('length of reward :%d' % len(reward))
            #break
            '''

        # === Start train G
        n_batch = -1
        global_step = -1

        for i in range(args.n_epoch):
            print("Epoch:%s" % i)

            sess.run([train_data_iterator.initializer])

            while True:
                n_batch += 1
                global_step += 1
                if n_batch % args.eval_step == 0: #eval
                    print('\n================ N_batch / Global_step (%s / %s): Evaluate on test datasets ================\n'
                          % (n_batch, global_step))
                    dst_fs = inference(s2ss_greedy_infer, sess=sess, args=s2ss_args, decoder_s=constants.SENT_LIST,
                                       src_test_iterator=test_data_iterator, src_test_next=test_data_next,
                                       vocab_rev=vocab_rev, result_dir=final_tsf_result_dir,
                                       step=global_step if args.save_each_step else global_step)
                    t0 = time.time()
                    bleu_scores = bleu_evaluator.score(args.reference, dst_fs[1], all_bleu=True)
                    print("Test(Batch:%d)\tBLEU-1:%.3f\tBLEU-2:%.3f\tBLEU:%.3f\tCost time:%.2f" %
                          (n_batch, bleu_scores[1], bleu_scores[2], bleu_scores[0], time.time() - t0))

                if n_batch % args.save_per_step == 0:
                    print("Save model at dir:", final_model_save_path)
                    s2ss_train.saver.save(sess, final_model_save_path, global_step=global_step)

                try:
                    t0 = time.time()
                    data = sess.run(train_data_next)  # get real data!!
                    batch_size = np.shape(data["source_ids"])[0]
                    #decode_width = s2ss_args.decode_width

                    t0 = time.time()

                    if args.task_suffix != 'wo-RAT':
                        #batched_cont_reward = cont_reward[n_batch*batch_size:(n_batch+1)*batch_size]
                        batched_reward = reward[n_batch*batch_size:(n_batch+1)*batch_size]

                        feed_dict = {s2ss_train.encoder_input: data["source_ids"],
                                     s2ss_train.encoder_input_len: data["source_length"],
                                     s2ss_train.decoder_input: data["target_ids_in"],
                                     s2ss_train.decoder_target: data["target_ids_out"],
                                     s2ss_train.decoder_target_len: data["target_length"] + 1,
                                     s2ss_train.decoder_s: data["target_senti"],
                                     s2ss_train.reward: batched_reward}
                        res = sess.run([s2ss_train.rl_loss, s2ss_train.retrain_op], feed_dict=feed_dict)
                        #sess.run([s2ss_train.loss, s2ss_train.train_op], feed_dict=feed_dict)
                    
                    if args.task_suffix != 'wo-bt':
                        # baseline #每个句子对应1个tgt_senti
                        greedy_predictions = sess.run(
                            s2ss_greedy_infer.predictions,
                            feed_dict={s2ss_greedy_infer.encoder_input: data["source_ids"],
                                       s2ss_greedy_infer.encoder_input_len: data["source_length"],
                                       s2ss_greedy_infer.decoder_s: data["target_senti"]})

                        mid_ids_bs, mid_ids_in_bs, mid_ids_out_bs, mid_ids_in_out_bs, mid_ids_length_bs = \
                            process_mid_ids(greedy_predictions, min_seq_len, max_seq_len, vocab_size) #处理一些符号
                
                        # Update Seq2SentiSeq with previous model generated data with noise
                        if global_step < 1 :
                            print('$$$Update B use back_trans_noise data')
                        noise_ids, noise_ids_length = add_noise(mid_ids_bs, mid_ids_length_bs)
                        feed_dict = {
                            s2ss_train.encoder_input: noise_ids,
                            s2ss_train.encoder_input_len: noise_ids_length,
                            s2ss_train.decoder_input: data["source_ids_in"],
                            s2ss_train.decoder_target: data["source_ids_out"],
                            s2ss_train.decoder_target_len: data["source_length"] + 1,
                            s2ss_train.decoder_s: data["source_senti"],
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op], feed_dict=feed_dict)
                    

                except tf.errors.OutOfRangeError:  # next epoch
                    print("Train---Total N batch:{}\tCost time:{}".format(n_batch, time.time() - t0))
                    n_batch = -1
                    break

                if n_batch % args.eval_step == 0:
                    print('train loss: %.4f' % res[0])

        if args.task_suffix != 'wo-RAT' and args.task_suffix != 'wo-disc':
            # Train discriminator to distinguish real data and generated data
            src_fs = get_src_samples(args.sample_num, args.train_template_data, args.disc_data_dir)
            sample_test_data_iterator = load_dataset(src_fs[0], vocab, mode=constants.TEST, batch_size=100,
                                                     min_seq_len=min_seq_len, max_seq_len=max_seq_len)
            sample_test_data_next = sample_test_data_iterator.get_next()

            dst_fs = inference(s2ss_greedy_infer, sess=sess, args=s2ss_args, decoder_s=constants.SENT_LIST,
                               src_test_iterator=sample_test_data_iterator, src_test_next=sample_test_data_next,
                               vocab_rev=vocab_rev, result_dir=args.disc_data_dir)
            get_disc_train_data(src_fs[1], dst_fs[0], args.disc_data_dir)

            _ = disc_model_bert.bert_main('train', args.disc_data_dir, disc_model_save_dir, args.disc_n_epoch, args.disc_batch_size)
Beispiel #17
0
        A = 1
        B = 0
    return A, B


if __name__ == "__main__":
    args = load_nmt_arguments()

    # === Get translation direction
    A, B = get_nmt_direction(args.nmt_direction)
    print("A=%s, B=%s" % (A, B))

    # ===  Build vocab and load data
    if not os.path.isfile(args.global_vocab_file):
        build_vocab_from_file(args.train_data, args.global_vocab_file)
    vocab, vocab_size = load_vocab(args.global_vocab_file)
    src_vocab = tgt_vocab = vocab
    src_vocab_size = tgt_vocab_size = vocab_size
    print('Vocabulary size:src:%s' % vocab_size)

    vocab_rev = tf.contrib.lookup.index_to_string_table_from_file(
        args.
        global_vocab_file,  # target vocabulary file(each lines has a word)
        vocab_size=vocab_size - constants.NUM_OOV_BUCKETS,
        default_value=constants.UNKNOWN_TOKEN)
    src_vocab_rev = tgt_vocab_rev = vocab_rev

    with tf.device(
            "/cpu:0"):  # Input pipeline should always be placed on the CPU.
        print("Use x'->y to update model f(x->y)")
        train_iterator = load_paired_dataset(args.tsf_train_data[B],
Beispiel #18
0
def main():
    # capture the config path from the runtime arguments
    # then process the json configuration file
    args = get_args()
    print("Reading config from {}".format(args.config))
    config, _ = get_config_from_json(args.config)
    # add summary and model directory
    config = update_config_by_summary(config)
    # if to remove the previous results, set -d 1 as a parameter
    print('Whether to del the previous saved model', args.delete)
    if args.delete == '1':
        # delete existing checkpoints and summaries
        print('Deleting existing models and logs from:')
        print(config.summary_dir, config.checkpoint_dir)
        remove_dir(config.summary_dir)
        remove_dir(config.checkpoint_dir)

    # create the experiments dirs
    create_dirs([config.summary_dir, config.checkpoint_dir])
    """Load data"""
    # load global word, position and tag vocabularies
    word_vocab = load_vocab(path=config.datadir + config.word_vocab_path,
                            mode='word')
    position_vocab = load_vocab(path=config.datadir + config.pos_vocab_path,
                                mode='pos')
    tag_vocab = load_vocab(path=config.datadir + config.tag_vocab_path,
                           mode='tag')
    config.word_vocab_size = len(word_vocab)
    config.pos_vocab_size = len(position_vocab)
    config.tag_vocab_size = len(tag_vocab)

    print('word vocab size:', config.word_vocab_size)

    # create your data generator to load train data
    x_path = config.datadir + config.train_path
    train_loader = DataLoader(config, x_path, word_vocab, position_vocab,
                              tag_vocab)
    train_loader.load_data()
    # update the max length for each patient and each visit to be used in lstm
    train_max_patient_len = train_loader.max_patient_len
    train_max_visit_len = train_loader.max_visit_len

    # create your data generator to load valid data
    x_path = config.datadir + config.valid_path
    valid_loader = DataLoader(config, x_path, word_vocab, position_vocab,
                              tag_vocab)
    valid_loader.load_data()
    valid_max_patient_len = valid_loader.max_patient_len
    valid_max_visit_len = valid_loader.max_visit_len

    # create your data generator to load test data
    x_path = config.datadir + config.test_path
    test_loader = DataLoader(config, x_path, word_vocab, position_vocab,
                             tag_vocab)
    test_loader.load_data()
    test_max_patient_len = test_loader.max_patient_len
    test_max_visit_len = test_loader.max_visit_len

    print("The max patient lengths for train, valid and test are {}, {}, {}".
          format(train_max_patient_len, valid_max_patient_len,
                 test_max_patient_len))
    print("The max visit lengths for train, valid and test are {}, {}, {}".
          format(train_max_visit_len, valid_max_visit_len, test_max_visit_len))

    # select the maximum lengths of visits and codes as the size of lstm
    config.max_patient_len = max(
        [train_max_patient_len, valid_max_patient_len, test_max_patient_len])
    config.max_visit_len = max(
        [train_max_visit_len, valid_max_visit_len, test_max_visit_len])

    train_loader.pad_data(config.max_patient_len, config.max_visit_len)
    valid_loader.pad_data(config.max_patient_len, config.max_visit_len)
    test_loader.pad_data(config.max_patient_len, config.max_visit_len)

    # add num_iter_per_epoch to config for trainer
    config.train_size = train_loader.get_datasize()
    config.valid_size = valid_loader.get_datasize()
    config.test_size = test_loader.get_datasize()
    config.num_iter_per_epoch = int(config.train_size / config.batch_size)
    print("The sizes for train, valid and test are {}, {}, {}".format(
        config.train_size, config.valid_size, config.test_size))
    """Run model"""
    # create tensorflow session
    # specify only using one GPU
    tfconfig = tf.ConfigProto(device_count={'GPU': 1})
    # allow the dynamic increase of GPU memory
    tfconfig.gpu_options.allow_growth = True
    # limit the maximum of GPU usage as 0.5
    #tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.5
    with tf.Session(config=tfconfig) as sess:
        # create an instance of the model you want
        model = Model(config)
        # create tensorboard logger
        logger = Logger(sess, config)
        # create trainer and pass all the previous components to it
        trainer = PredTrainer(sess, model, train_loader, valid_loader,
                              test_loader, config, logger)
        # load model if exists
        model.load(sess)
        # here you train your model
        trainer.train()
Beispiel #19
0
def main():
    # === Load arguments
    args = load_dual_arguments()
    dump_args_to_yaml(args, args.final_model_save_dir)

    cls_args = load_args_from_yaml(args.cls_model_save_dir)
    nmt_args = load_args_from_yaml(os.path.join(args.nmt_model_save_dir,
                                                '0-1'))
    nmt_args.learning_rate = args.learning_rate  # a smaller learning rate for RL
    min_seq_len = min(int(max(re.findall("\d", cls_args.filter_sizes))),
                      args.min_seq_len)

    # === Load global vocab
    word2id, word2id_size = load_vocab_dict(args.global_vocab_file)
    global_vocab, global_vocab_size = load_vocab(args.global_vocab_file)
    print("Global_vocab_size: %s" % global_vocab_size)
    global_vocab_rev = tf.contrib.lookup.index_to_string_table_from_file(
        args.global_vocab_file,
        vocab_size=global_vocab_size - constants.NUM_OOV_BUCKETS,
        default_value=constants.UNKNOWN_TOKEN)
    src_vocab = tgt_vocab = global_vocab
    src_vocab_size = tgt_vocab_size = global_vocab_size
    src_vocab_rev = tgt_vocab_rev = global_vocab_rev

    # === Create session
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    tf_config.gpu_options.per_process_gpu_memory_fraction = 0.3
    sess = tf.Session(config=tf_config)

    # === Initial and build model
    cls = cls_create_model(sess,
                           cls_args,
                           global_vocab_size,
                           mode=constants.EVAL,
                           load_pretrained_model=True)

    nmts_train = []
    nmts_random_infer = []
    nmts_greedy_infer = []
    train_data_next = []
    dev_data_next = []
    test_data_next = []
    train_iterators = []
    test_iterators = []
    paired_train_iterators = []
    paired_train_data_next = []
    final_model_save_paths = []

    # === Define nmt model
    for A, B in [(0, 1), (1, 0)]:
        with tf.device("/cpu:0"
                       ):  # Input pipeline should always be placed on the CPU.
            src_train_iterator = load_dataset(args.train_data[A],
                                              src_vocab,
                                              mode=constants.TRAIN,
                                              batch_size=args.batch_size,
                                              min_seq_len=min_seq_len)
            src_dev_iterator = load_dataset(args.dev_data[A],
                                            src_vocab,
                                            mode=constants.EVAL,
                                            batch_size=500)
            src_test_iterator = load_dataset(args.test_data[A],
                                             src_vocab,
                                             mode=constants.EVAL,
                                             batch_size=500)
            # Use (X', Y) to produce pseudo parallel data
            paired_src_train_iterator = load_paired_dataset(
                args.tsf_train_data[B],
                args.train_data[B],
                src_vocab,
                tgt_vocab,
                batch_size=args.batch_size,
                min_seq_len=min_seq_len)

            src_train_next_op = src_train_iterator.get_next(
            )  # To avoid frequent calls of `Iterator.get_next()`
            src_dev_next_op = src_dev_iterator.get_next()
            src_test_next_op = src_test_iterator.get_next()
            src_paired_train_next_op = paired_src_train_iterator.get_next()

            train_data_next.append(src_train_next_op)
            dev_data_next.append(src_dev_next_op)
            test_data_next.append(src_test_next_op)
            paired_train_data_next.append(src_paired_train_next_op)

            train_iterators.append(src_train_iterator)
            test_iterators.append(src_test_iterator)
            paired_train_iterators.append(paired_src_train_iterator)

        direction = "%s-%s" % (A, B)
        nmt_args.sampling_probability = 0.5

        # == Define train model
        nmt_train = nmt_create_model(sess,
                                     nmt_args,
                                     src_vocab_size,
                                     tgt_vocab_size,
                                     src_vocab_rev,
                                     tgt_vocab_rev,
                                     mode=constants.TRAIN,
                                     direction=direction,
                                     load_pretrained_model=True)

        # == Define inference model
        decode_type_before = nmt_args.decode_type

        nmt_args.decode_type = constants.RANDOM
        nmt_random_infer = nmt_create_model(sess,
                                            nmt_args,
                                            src_vocab_size,
                                            tgt_vocab_size,
                                            src_vocab_rev,
                                            tgt_vocab_rev,
                                            mode=constants.INFER,
                                            direction=direction,
                                            reuse=True)

        nmt_args.decode_type = constants.GREEDY
        nmt_greedy_infer = nmt_create_model(sess,
                                            nmt_args,
                                            src_vocab_size,
                                            tgt_vocab_size,
                                            src_vocab_rev,
                                            tgt_vocab_rev,
                                            mode=constants.INFER,
                                            direction=direction,
                                            reuse=True)

        nmt_args.decode_type = decode_type_before  # restore to previous setting

        nmts_train.append(nmt_train)
        nmts_random_infer.append(nmt_random_infer)
        nmts_greedy_infer.append(nmt_greedy_infer)

        # == Prepare for model saver
        print("Prepare for model saver")
        final_model_save_path = "%s/%s-%s/" % (args.final_model_save_dir, A, B)
        if not os.path.exists(final_model_save_path):
            os.makedirs(final_model_save_path)
        print("Model save path:", final_model_save_path)
        final_model_save_paths.append(final_model_save_path)

    # === Start train
    n_batch = -1
    global_step = -1
    A = 1
    B = 0
    G_scores = []

    for i in range(args.n_epoch):
        print("Epoch:%s" % i)
        sess.run([train_iterators[A].initializer])
        sess.run([train_iterators[B].initializer])
        sess.run([paired_train_iterators[A].initializer])
        sess.run([paired_train_iterators[B].initializer])

        while True:
            n_batch += 1
            global_step += 1
            if n_batch % args.eval_step == 0:
                print(
                    '===== Start (N_batch: %s, Steps: %s): Evaluate on test datasets ===== '
                    % (n_batch, global_step))
                _, dst_f_A = inference(nmts_greedy_infer[A],
                                       sess=sess,
                                       args=nmt_args,
                                       A=A,
                                       B=B,
                                       src_test_iterator=test_iterators[A],
                                       src_test_next=test_data_next[A],
                                       src_vocab_rev=src_vocab_rev,
                                       result_dir=args.final_tsf_result_dir,
                                       step=global_step)
                _, dst_f_B = inference(nmts_greedy_infer[B],
                                       sess=sess,
                                       args=nmt_args,
                                       A=B,
                                       B=A,
                                       src_test_iterator=test_iterators[B],
                                       src_test_next=test_data_next[B],
                                       src_vocab_rev=src_vocab_rev,
                                       result_dir=args.final_tsf_result_dir,
                                       step=global_step)
                t0 = time.time()
                # calculate accuracy score
                senti_acc = cls_evaluate_file(sess,
                                              cls_args,
                                              word2id,
                                              cls, [dst_f_A, dst_f_B],
                                              index_list=[B, A])
                # calculate bleu score
                bleu_score_A = bleu_evaluator.score(args.reference[A], dst_f_A)
                bleu_score_B = bleu_evaluator.score(args.reference[B], dst_f_B)
                bleu_score = (bleu_score_A + bleu_score_B) / 2

                G_score = np.sqrt(senti_acc * bleu_score)
                H_score = 2 / (1 / senti_acc + 1 / bleu_score)
                G_scores.append(G_score)
                print(
                    "%s-%s_Test(Batch:%d)\tSenti:%.3f\tBLEU(4ref):%.3f(A:%.3f+B:%.3f)"
                    "\tG-score:%.3f\tH-score:%.3f\tCost time:%.2f" %
                    (A, B, n_batch, senti_acc, bleu_score, bleu_score_A,
                     bleu_score_B, G_score, H_score, time.time() - t0))
                print(
                    '=====  End (N_batch: %s, Steps: %s): Evaluate on test datasets ====== '
                    % (n_batch, global_step))

            if n_batch % args.save_per_step == 0:
                print("=== Save model at dir:", final_model_save_paths[A],
                      final_model_save_paths[B])
                nmts_train[A].saver.save(sess,
                                         final_model_save_paths[A],
                                         global_step=global_step)
                nmts_train[B].saver.save(sess,
                                         final_model_save_paths[B],
                                         global_step=global_step)

            if n_batch % args.change_per_step == 0:
                A, B = B, A
                print(
                    "============= Change to train model {}-{} at {} steps =============="
                    .format(A, B, global_step))

            try:
                t0 = time.time()
                src = sess.run(train_data_next[A])  # get real data!!
                batch_size = np.shape(src["ids"])[0]
                decode_width = nmt_args.decode_width

                tile_src_ids = np.repeat(src["ids"], decode_width,
                                         axis=0)  # [batch_size*sample_size],
                tile_src_length = np.repeat(src['length'],
                                            decode_width,
                                            axis=0)
                tile_src_ids_in = np.repeat(src["ids_in"],
                                            decode_width,
                                            axis=0)
                tile_src_ids_out = np.repeat(src["ids_out"],
                                             decode_width,
                                             axis=0)
                tile_src_ids_in_out = np.repeat(src["ids_in_out"],
                                                decode_width,
                                                axis=0)

                random_predictions = sess.run(
                    nmts_random_infer[A].predictions,
                    feed_dict={
                        nmts_random_infer[A].input_ids: src['ids'],
                        nmts_random_infer[A].input_length: src['length']
                    })
                assert np.shape(
                    random_predictions["ids"])[1] == nmt_args.decode_width
                mid_ids_log_prob = np.reshape(random_predictions["log_probs"],
                                              -1)
                mid_ids, mid_ids_in, mid_ids_out, mid_ids_in_out, mid_ids_length = \
                    process_mid_ids(random_predictions["ids"], random_predictions["length"],
                                   min_seq_len, global_vocab_size)

                greedy_predictions = sess.run(
                    nmts_greedy_infer[A].predictions,
                    feed_dict={
                        nmts_greedy_infer[A].input_ids: src['ids'],
                        nmts_greedy_infer[A].input_length: src['length']
                    })
                assert np.shape(greedy_predictions["ids"])[1] == 1
                mid_ids_bs, mid_ids_in_bs, mid_ids_out_bs, mid_ids_in_out_bs, mid_ids_length_bs = \
                    process_mid_ids(greedy_predictions["ids"], greedy_predictions["length"],
                                   min_seq_len, global_vocab_size)

                # Get style reward from classifier
                cls_probs = sess.run(cls.probs,
                                     feed_dict={
                                         cls.x: mid_ids,
                                         cls.dropout: 1
                                     })
                y_hat = [p > 0.5 for p in cls_probs]  # 1 or 0
                cls_acu = [p == B for p in y_hat
                           ]  # accuracy: count the number of style B
                style_reward = np.array(cls_acu, dtype=np.float32)

                # Get content reward from backward reconstruction
                feed_dict = {
                    nmts_train[B].input_ids: mid_ids,
                    nmts_train[B].input_length: mid_ids_length,
                    nmts_train[B].target_ids_in: tile_src_ids_in,
                    nmts_train[B].target_ids_out: tile_src_ids_out,
                    nmts_train[B].target_length: tile_src_length
                }
                nmtB_loss = sess.run(
                    nmts_train[B].loss_per_sequence,
                    feed_dict=feed_dict)  # nmtB_loss = -log(prob)
                nmtB_reward = nmtB_loss * (
                    -1)  # reward = log(prob) ==> bigger is better

                # Get baseline reward from backward reconstruction
                feed_dict = {
                    nmts_train[B].input_ids: mid_ids_bs,
                    nmts_train[B].input_length: mid_ids_length_bs,
                    nmts_train[B].target_ids_in: src["ids_in"],
                    nmts_train[B].target_ids_out: src["ids_out"],
                    nmts_train[B].target_length: src["length"]
                }
                nmtB_loss_bs = sess.run(nmts_train[B].loss_per_sequence,
                                        feed_dict=feed_dict)
                nmtB_reward_bs = nmtB_loss_bs * (-1)  # nmt baseline reward

                def norm(x):
                    x = np.array(x)
                    x = (x - x.mean()) / (x.std() + safe_divide_constant)
                    # x = x - x.min()  # to make sure > 0
                    return x

                def sigmoid(x,
                            x_trans=0.0,
                            x_scale=1.0,
                            max_y=1,
                            do_norm=False):
                    value = max_y / (1 + np.exp(-(x - x_trans) * x_scale))
                    if do_norm:
                        value = norm(value)
                    return value

                def norm_nmt_reward(x, baseline=None, scale=False):
                    x = np.reshape(x, (batch_size, -1))  # x is in [-16, 0]
                    dim1 = np.shape(x)[1]

                    if baseline is not None:
                        x_baseline = baseline  # [batch_size]
                    else:
                        x_baseline = np.mean(x, axis=1)  # [batch_size]
                    x_baseline = np.repeat(x_baseline,
                                           dim1)  # [batch_size*dim1]
                    x_baseline = np.reshape(x_baseline, (batch_size, dim1))

                    x_norm = x - x_baseline

                    if scale:
                        # x_norm = sigmoid(x_norm, x_scale=0.5)  # x_norm: [-12, 12] => [0, 1]
                        x_norm = sigmoid(
                            x_norm
                        )  # Sharper normalization, x_norm: [-6, 6] => [0, 1]
                    return x_norm.reshape(-1)

                if args.use_baseline:
                    content_reward = norm_nmt_reward(nmtB_reward,
                                                     baseline=nmtB_reward_bs,
                                                     scale=True)
                else:
                    content_reward = norm_nmt_reward(nmtB_reward, scale=True)

                # Calculate reward
                style_reward += safe_divide_constant
                content_reward += safe_divide_constant
                reward = (1 + 0.25) * style_reward * content_reward / (
                    style_reward + 0.25 * content_reward)
                if args.normalize_reward:
                    reward = norm(reward)

                # == Update nmtA via policy gradient training
                feed_dict = {
                    nmts_train[A].input_ids: tile_src_ids,
                    nmts_train[A].input_length: tile_src_length,
                    nmts_train[A].target_ids_in: mid_ids_in,
                    nmts_train[A].target_ids_out: mid_ids_out,
                    nmts_train[A].target_length: mid_ids_length,
                    nmts_train[A].reward: reward
                }
                ops = [
                    nmts_train[A].lr_loss, nmts_train[A].loss,
                    nmts_train[A].loss_per_sequence, nmts_train[A].retrain_op
                ]
                nmtA_loss_final, nmtA_loss_, loss_per_sequence_, _ = sess.run(
                    ops, feed_dict=feed_dict)

                # == Update nmtA with pseudo data
                if args.MLE_anneal:
                    gap = min(
                        args.anneal_max_gap,
                        int(args.anneal_initial_gap *
                            np.power(args.anneal_rate,
                                     global_step / args.anneal_steps)))
                else:
                    gap = args.anneal_initial_gap

                if n_batch % gap == 0:
                    # Update nmtA using original pseudo data (used as pre-training)
                    # This is not a ideal way since the quality of the pseudo-parallel data is not acceptable for
                    # the later iterations of training.
                    # We highly recommend you adopt back translation to generate the pseudo-parallel data on-the-fly
                    if "pseudo" in args.teacher_forcing:
                        data = sess.run(
                            paired_train_data_next[A])  # get real data!!
                        feed_dict = {
                            nmts_train[A].input_ids: data["ids"],
                            nmts_train[A].input_length: data["length"],
                            nmts_train[A].target_ids_in: data["trans_ids_in"],
                            nmts_train[A].target_ids_out:
                            data["trans_ids_out"],
                            nmts_train[A].target_length: data["trans_length"],
                        }
                        nmtA_pse_loss_, _ = sess.run(
                            [nmts_train[A].loss, nmts_train[A].train_op],
                            feed_dict=feed_dict)

                    # Update nmtB using pseudo data generated via back_translation (on-the-fly)
                    if "back_trans" in args.teacher_forcing:
                        feed_dict = {
                            nmts_train[B].input_ids: mid_ids_bs,
                            nmts_train[B].input_length: mid_ids_length_bs,
                            nmts_train[B].target_ids_in: src["ids_in"],
                            nmts_train[B].target_ids_out: src["ids_out"],
                            nmts_train[B].target_length: src["length"],
                        }
                        nmtB_loss_, _ = sess.run(
                            [nmts_train[B].loss, nmts_train[B].train_op],
                            feed_dict=feed_dict)

            except tf.errors.OutOfRangeError as e:  # next epoch
                print("===== DualTrain: Total N batch:{}\tCost time:{} =====".
                      format(n_batch,
                             time.time() - t0))
                n_batch = -1
                break
def main():
    args = load_cycle_arguments()
    dump_args_to_yaml(args, args.final_model_save_dir)
    print(args)

    reg_args = load_args_from_yaml(args.reg_model_save_dir)
    s2ss_args = load_args_from_yaml(args.s2ss_model_save_dir)
    # s2ss_args.seq2seq_model_save_dir = args.seq2seq_model_save_dir
    s2ss_args.RL_learning_rate = args.RL_learning_rate  # a smaller learning_rate for RL
    s2ss_args.MLE_learning_rate = args.MLE_learning_rate  # a smaller learning_rate for MLE
    s2ss_args.batch_size = args.batch_size  # a bigger batch_size for RL
    min_seq_len = args.min_seq_len
    max_seq_len = args.max_seq_len

    # === Load global vocab
    vocab, vocab_size = load_vocab(args.vocab_file)
    print("Vocabulary size: %s" % vocab_size)
    vocab_rev = tf.contrib.lookup.index_to_string_table_from_file(
        args.vocab_file,  # target vocabulary file(each lines has a word)
        vocab_size=vocab_size - constants.NUM_OOV_BUCKETS,
        default_value=constants.UNKNOWN_TOKEN)

    bleu_evaluator = BLEUEvaluator()

    # === Create session
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    tf_config.gpu_options.per_process_gpu_memory_fraction = 0.4
    sess = tf.Session(
        config=tf_config
    )  # limit gpu memory; don't pre-allocate memory; allocate as-needed

    # === Load dataset
    with tf.device(
            "/cpu:0"):  # Input pipeline should always be place on the CPU.
        train_data_iterator = load_dataset(args.train_data,
                                           vocab,
                                           mode=constants.TRAIN,
                                           batch_size=args.batch_size,
                                           min_seq_len=min_seq_len,
                                           max_seq_len=max_seq_len)
        dev_data_iterator = load_dataset(args.dev_data,
                                         vocab,
                                         mode=constants.EVAL,
                                         batch_size=100,
                                         min_seq_len=min_seq_len,
                                         max_seq_len=max_seq_len)
        test_data_iterator = load_dataset(args.test_data,
                                          vocab,
                                          mode=constants.TEST,
                                          batch_size=100,
                                          min_seq_len=min_seq_len,
                                          max_seq_len=max_seq_len)
        paired_train_data_iterator = load_paired_dataset(
            args.pseudo_data,
            vocab,
            batch_size=args.batch_size,
            min_seq_len=min_seq_len,
            max_seq_len=max_seq_len)

        train_data_next = train_data_iterator.get_next(
        )  # to avoid high number of `Iterator.get_next()` calls
        dev_data_next = dev_data_iterator.get_next()
        test_data_next = test_data_iterator.get_next()
        paired_train_data_next = paired_train_data_iterator.get_next()

    # === Initialize and build Seq2SentiSeq model
    load_model = False if args.no_pretrain else True
    s2ss_train = s2ss_create_model(sess,
                                   s2ss_args,
                                   constants.TRAIN,
                                   vocab_size,
                                   load_pretrained_model=load_model)

    decode_type_before = s2ss_args.decode_type
    s2ss_args.decode_type = constants.GREEDY
    s2ss_greedy_infer = s2ss_create_model(sess,
                                          s2ss_args,
                                          constants.INFER,
                                          vocab_size,
                                          reuse=True)
    s2ss_args.decode_type = constants.RANDOM
    s2ss_random_infer = s2ss_create_model(sess,
                                          s2ss_args,
                                          constants.INFER,
                                          vocab_size,
                                          reuse=True)
    s2ss_args.decode_type = decode_type_before

    # === Load pre-trained sentiment regression model
    eval_reg = reg_create_model(sess,
                                reg_args,
                                vocab_size,
                                mode=constants.EVAL,
                                load_pretrained_model=True)

    print("Prepare for model saver")
    final_model_save_path = args.final_model_save_dir

    # === Start train
    n_batch = -1
    global_step = -1

    for i in range(args.n_epoch):
        print("Epoch:%s" % i)

        sess.run([train_data_iterator.initializer])
        sess.run([paired_train_data_iterator.initializer])

        senti_reward_all = {  # reward to measure the sentiment transformation of generated sequence
            "upper":
            [],  # reward of ground truth (existed sequence in train dataset)
            "lower": [],  # reward of baseline: random generated sequence
            "real": [],  # reward of real generated sequence
        }
        cont_reward_all = {  # reward to measure the content preservation of generated sequence
            "upper":
            [],  # reward of ground truth (existed sequence in train dataset)
            "lower": [],  # reward of baseline: random generated sequence
            "real": [],  # reward of real generated sequence
        }
        reward_all = []
        reward_expect_all = []  # reward expectation: r*p(y_k|x)

        while True:
            n_batch += 1
            global_step += 1
            if n_batch % args.eval_step == 0:
                print(
                    '\n================ N_batch / Global_step (%s / %s): Evaluate on test datasets ================\n'
                    % (n_batch, global_step))
                dst_fs = inference(
                    s2ss_greedy_infer,
                    sess=sess,
                    args=s2ss_args,
                    decoder_s=constants.SENT_LIST,
                    src_test_iterator=test_data_iterator,
                    src_test_next=test_data_next,
                    vocab_rev=vocab_rev,
                    result_dir=args.final_tsf_result_dir,
                    step=global_step if args.save_each_step else global_step)
                t0 = time.time()
                bleu_scores = bleu_evaluator.score(args.reference,
                                                   dst_fs[1],
                                                   all_bleu=True)
                print(
                    "Test(Batch:%d)\tBLEU-1:%.3f\tBLEU-2:%.3f\tBLEU:%.3f\tCost time:%.2f"
                    % (n_batch, bleu_scores[1], bleu_scores[2], bleu_scores[0],
                       time.time() - t0))

                # improve the diversity of generated sentences
                dst_fs = inference(
                    s2ss_random_infer,
                    sess=sess,
                    args=s2ss_args,
                    decoder_s=constants.SENT_LIST,
                    src_test_iterator=test_data_iterator,
                    src_test_next=test_data_next,
                    vocab_rev=vocab_rev,
                    result_dir=args.final_tsf_result_dir + '-sample',
                    step=global_step if args.save_each_step else global_step)
                t0 = time.time()
                bleu_scores = bleu_evaluator.score(args.reference,
                                                   dst_fs[1],
                                                   all_bleu=True)
                print(
                    "Test(Batch:%d)\tBLEU-1:%.3f\tBLEU-2:%.3f\tBLEU:%.3f\tCost time:%.2f ===> Sampled results"
                    % (n_batch, bleu_scores[1], bleu_scores[2], bleu_scores[0],
                       time.time() - t0))

            if n_batch % args.save_per_step == 0:
                print("Save model at dir:", final_model_save_path)
                s2ss_train.saver.save(sess,
                                      final_model_save_path,
                                      global_step=n_batch)

            try:
                t0 = time.time()
                src = sess.run(train_data_next)  # get real data!!
                batch_size = np.shape(src["ids"])[0]
                decode_width = s2ss_args.decode_width

                t0 = time.time()

                tile_src_ids = np.repeat(src["ids"], decode_width,
                                         axis=0)  # [batch_size*beam_size],
                tile_src_length = np.repeat(src['length'],
                                            decode_width,
                                            axis=0)
                tile_src_ids_in = np.repeat(src["ids_in"],
                                            decode_width,
                                            axis=0)
                tile_src_ids_out = np.repeat(src["ids_out"],
                                             decode_width,
                                             axis=0)
                tile_src_ids_in_out = np.repeat(src["ids_in_out"],
                                                decode_width,
                                                axis=0)
                tile_src_decoder_s = np.repeat(src["senti"],
                                               decode_width,
                                               axis=0)

                tile_tgt_decoder_s = get_tareget_sentiment(size=batch_size)
                tgt_decoder_s = get_tareget_sentiment(size=batch_size,
                                                      random=True)

                t0 = time.time()

                # random
                random_predictions, log_probs = sess.run(
                    [
                        s2ss_random_infer.predictions,
                        s2ss_random_infer.log_probs
                    ],
                    feed_dict={
                        s2ss_random_infer.encoder_input: tile_src_ids,
                        s2ss_random_infer.encoder_input_len: tile_src_length,
                        s2ss_random_infer.decoder_s: tile_tgt_decoder_s
                    })

                mid_ids_log_prob = log_probs
                mid_ids, mid_ids_in, mid_ids_out, mid_ids_in_out, mid_ids_length = \
                    process_mid_ids(random_predictions, min_seq_len, max_seq_len, vocab_size)
                assert tile_src_length[0] == tile_src_length[decode_width - 1]

                # baseline
                greedy_predictions = sess.run(
                    s2ss_greedy_infer.predictions,
                    feed_dict={
                        s2ss_greedy_infer.encoder_input: src['ids'],
                        s2ss_greedy_infer.encoder_input_len: src['length'],
                        s2ss_greedy_infer.decoder_s: tgt_decoder_s
                    })

                mid_ids_bs, mid_ids_in_bs, mid_ids_out_bs, mid_ids_in_out_bs, mid_ids_length_bs = \
                    process_mid_ids(greedy_predictions, min_seq_len, max_seq_len, vocab_size)

                t0 = time.time()

                # == get reward from sentiment scorer/regressor
                def get_senti_reward(pred, gold):
                    if args.scale_sentiment:
                        gold = gold * 0.2 - 0.1  # todo: move this function to one file
                    reward_ = 1 / (np.fabs(pred - gold) + 1.0)
                    return reward_

                # real sentiment reward
                pred_senti_score = sess.run(eval_reg.predict_score,
                                            feed_dict={
                                                eval_reg.x:
                                                mid_ids,
                                                eval_reg.sequence_length:
                                                mid_ids_length
                                            })
                senti_reward = get_senti_reward(pred_senti_score,
                                                tile_tgt_decoder_s)

                # upper bound of sentiment reward
                upper_pred_senti_score = sess.run(eval_reg.predict_score,
                                                  feed_dict={
                                                      eval_reg.x:
                                                      src["ids"],
                                                      eval_reg.sequence_length:
                                                      src["length"]
                                                  })
                upper_senti_reward = get_senti_reward(upper_pred_senti_score,
                                                      src["senti"])

                # lower bound of sentiment reward
                lower_pred_senti_score = sess.run(
                    eval_reg.predict_score,
                    feed_dict={
                        eval_reg.x:
                        np.random.choice(vocab_size, np.shape(tile_src_ids)),
                        eval_reg.sequence_length:
                        tile_src_length
                    })
                lower_senti_reward = get_senti_reward(lower_pred_senti_score,
                                                      tile_src_decoder_s)

                # == get reward from backward reconstruction
                feed_dict = {
                    s2ss_train.encoder_input: mid_ids,
                    s2ss_train.encoder_input_len: mid_ids_length,
                    s2ss_train.decoder_input: tile_src_ids_in,
                    s2ss_train.decoder_target: tile_src_ids_out,
                    s2ss_train.decoder_target_len: tile_src_length + 1,
                    s2ss_train.decoder_s: tile_src_decoder_s,
                }

                loss = sess.run(s2ss_train.loss_per_sequence,
                                feed_dict=feed_dict)
                cont_reward = loss * (-1)  # bigger is better

                t0 = time.time()

                # get baseline content reward
                feed_dict = {
                    s2ss_train.encoder_input: mid_ids_bs,
                    s2ss_train.encoder_input_len: mid_ids_length_bs,
                    s2ss_train.decoder_input: src["ids_in"],
                    s2ss_train.decoder_target: src["ids_out"],
                    s2ss_train.decoder_target_len: src["length"] + 1,
                    s2ss_train.decoder_s: src["senti"],
                }
                loss_bs = sess.run(s2ss_train.loss_per_sequence,
                                   feed_dict=feed_dict)
                cont_reward_bs = loss_bs * (-1)  # baseline content reward

                # get lower bound of content reward
                feed_dict = {
                    s2ss_train.encoder_input:
                    np.random.choice(vocab_size, np.shape(mid_ids)),
                    s2ss_train.encoder_input_len:
                    mid_ids_length,
                    s2ss_train.decoder_input:
                    np.random.choice(vocab_size, np.shape(tile_src_ids_in)),
                    s2ss_train.decoder_target:
                    np.random.choice(vocab_size, np.shape(tile_src_ids_out)),
                    s2ss_train.decoder_target_len:
                    tile_src_length + 1,
                    s2ss_train.decoder_s:
                    tile_src_decoder_s,
                }
                lower_loss = sess.run(s2ss_train.loss_per_sequence,
                                      feed_dict=feed_dict)
                lower_cont_reward = lower_loss * (-1)  # bigger is better

                def norm(x):
                    x = np.array(x)
                    x = (x - x.mean()) / (x.std() + 1e-6)  # safe divide
                    # x = x - x.min()  # to make x > 0
                    return x

                def sigmoid(x,
                            x_trans=0.0,
                            x_scale=1.0,
                            max_y=1,
                            do_norm=False):
                    value = max_y / (1 + np.exp(-(x - x_trans) * x_scale))
                    if do_norm:
                        value = norm(value)
                    return value

                def norm_s2ss_reward(x,
                                     baseline=None,
                                     scale=False,
                                     norm=False):
                    x = np.reshape(x, (batch_size, -1))  # x in [-16, 0]
                    dim1 = np.shape(x)[1]

                    if baseline is not None:
                        x_baseline = baseline  # [batch_size]
                    else:
                        x_baseline = np.mean(x, axis=1)  # [batch_size]
                    x_baseline = np.repeat(x_baseline,
                                           dim1)  # [batch_size*dim1]
                    x_baseline = np.reshape(x_baseline, (batch_size, dim1))

                    x_norm = x - x_baseline

                    if scale:
                        x_norm = sigmoid(x_norm)
                    if norm:
                        x_norm = 2 * x_norm - 1  # new x_norm in [-1, 1]
                    return x_norm.reshape(-1)

                if args.use_baseline:
                    if global_step < 1:  # only print at first 10 steps
                        print('%%% use_baseline')
                    cont_reward = norm_s2ss_reward(cont_reward,
                                                   baseline=cont_reward_bs,
                                                   scale=True)
                    lower_cont_reward = norm_s2ss_reward(
                        lower_cont_reward, baseline=cont_reward_bs, scale=True)

                elif args.scale_cont_reward:
                    if global_step < 1:  # only print at first 1 steps
                        print('%%% scale_cont_reward')
                    cont_reward = sigmoid(
                        cont_reward, x_trans=-3)  # [-6, -2] => [0.1, 0.78]
                    lower_cont_reward = sigmoid(lower_cont_reward, x_trans=-3)

                if args.scale_senti_reward:
                    if global_step < 1:  # only print at first 1 steps
                        print('%%% scale_senti_reward')
                    senti_reward = sigmoid(
                        senti_reward, x_trans=-0.8,
                        x_scale=15)  # [0.6, 1.0] => [0.04, 0.95]
                    lower_senti_reward = sigmoid(lower_senti_reward,
                                                 x_trans=-0.8,
                                                 x_scale=15)
                    upper_senti_reward = sigmoid(upper_senti_reward,
                                                 x_trans=-0.8,
                                                 x_scale=15)

                cont_reward_all["lower"].extend(lower_cont_reward)
                cont_reward_all["real"].extend(cont_reward)

                senti_reward_all["upper"].extend(upper_senti_reward)
                senti_reward_all["lower"].extend(lower_senti_reward)
                senti_reward_all["real"].extend(senti_reward)

                senti_reward += safe_divide_constant
                cont_reward += safe_divide_constant

                if args.increase_beta:
                    beta = min(1, 0.1 * global_step / args.increase_step)
                else:
                    beta = 1

                reward_merge_type = 'H(sentiment, content), beta=%.2f' % beta  # enlarge the influence of senti_reward
                reward = (1 + beta * beta) * senti_reward * cont_reward / (
                    beta * beta * senti_reward + cont_reward)

                reward_all.extend(reward)
                reward_expect_all.extend(reward * np.exp(mid_ids_log_prob))

                # policy gradient training
                if not args.no_RL:
                    feed_dict = {
                        s2ss_train.encoder_input: tile_src_ids,
                        s2ss_train.encoder_input_len: tile_src_length,
                        s2ss_train.decoder_input: mid_ids_in,
                        s2ss_train.decoder_target: mid_ids_out,
                        s2ss_train.decoder_target_len: mid_ids_length + 1,
                        s2ss_train.decoder_s: tile_tgt_decoder_s,
                        s2ss_train.reward: reward
                    }
                    sess.run([s2ss_train.rl_loss, s2ss_train.retrain_op],
                             feed_dict=feed_dict)

                # Teacher forcing data types:
                #  1. back translation data (greedy decode)
                #  2. back translation data (random decode)
                #  3. back translation noise data
                #  4. pseudo data
                #  5. same data (x->x)
                #  6. same_noise (x'->x)

                if "back_trans" in args.teacher_forcing:
                    if args.MLE_decay:
                        if args.MLE_decay_type == "linear":
                            gap = min(10, 2 + global_step /
                                      args.MLE_decay_steps)  # 10 after 1 epoch
                        else:
                            gap = min(
                                5,
                                int(1 / np.power(
                                    args.MLE_decay_rate,
                                    global_step / args.MLE_decay_steps)))
                    else:
                        gap = 1
                    if n_batch % gap == 0:
                        if global_step < 1:
                            print(
                                '$$$Update B use back-translated data (Update gap:%s)'
                                % gap)
                        # Update Seq2SentiSeq with previous model generated data  # senti-, bleu+
                        feed_dict = {
                            s2ss_train.encoder_input: mid_ids_bs,
                            s2ss_train.encoder_input_len: mid_ids_length_bs,
                            s2ss_train.decoder_input: src["ids_in"],
                            s2ss_train.decoder_target: src["ids_out"],
                            s2ss_train.decoder_target_len: src["length"] + 1,
                            s2ss_train.decoder_s: src["senti"],
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op],
                                 feed_dict=feed_dict)

                if "back_trans_random" in args.teacher_forcing:
                    if args.MLE_decay:
                        if args.MLE_decay_type == "linear":
                            gap = min(10, 2 + global_step /
                                      args.MLE_decay_steps)  # 10 after 1 epoch
                        else:
                            gap = min(
                                5,
                                int(1 / np.power(
                                    args.MLE_decay_rate,
                                    global_step / args.MLE_decay_steps)))
                    else:
                        gap = 1
                    if n_batch % gap == 0:
                        if global_step < 1:
                            print(
                                '$$$Update B use back_trans_random data (Update gap:%s)'
                                % gap)
                        # Update Seq2SentiSeq with previous model generated data with noise
                        feed_dict = {
                            s2ss_train.encoder_input: mid_ids,
                            s2ss_train.encoder_input_len: mid_ids_length,
                            s2ss_train.decoder_input: tile_src_ids_in,
                            s2ss_train.decoder_target: tile_src_ids_out,
                            s2ss_train.decoder_target_len: tile_src_length + 1,
                            s2ss_train.decoder_s: tile_src_decoder_s,
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op],
                                 feed_dict=feed_dict)

                if "back_trans_noise" in args.teacher_forcing:
                    if args.MLE_decay:
                        if args.MLE_decay_type == "linear":
                            gap = min(10, 2 + global_step /
                                      args.MLE_decay_steps)  # 10 after 1 epoch
                        else:
                            gap = min(
                                5,
                                int(1 / np.power(
                                    args.MLE_decay_rate,
                                    global_step / args.MLE_decay_steps)))
                    else:
                        gap = 1
                    if n_batch % gap == 0:
                        if global_step < 1:
                            print(
                                '$$$Update B use back_trans_noise data (Update gap:%s)'
                                % gap)
                        # Update Seq2SentiSeq with previous model generated data with noise
                        noise_ids, noise_ids_length = add_noise(
                            mid_ids_bs, mid_ids_length_bs)
                        feed_dict = {
                            s2ss_train.encoder_input: noise_ids,
                            s2ss_train.encoder_input_len: noise_ids_length,
                            s2ss_train.decoder_input: src["ids_in"],
                            s2ss_train.decoder_target: src["ids_out"],
                            s2ss_train.decoder_target_len: src["length"] + 1,
                            s2ss_train.decoder_s: src["senti"],
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op],
                                 feed_dict=feed_dict)

                if "pseudo_data" in args.teacher_forcing:  # balance
                    if args.MLE_decay:
                        if args.MLE_decay_type == "linear":
                            gap = min(10, 3 + global_step /
                                      args.MLE_decay_steps)  # 10 after 1 epoch
                        else:
                            gap = min(
                                100,
                                int(3 / np.power(
                                    args.MLE_decay_rate,
                                    global_step / args.MLE_decay_steps)))
                    else:
                        gap = 3
                    if n_batch % gap == 0:
                        if global_step < 1:
                            print('$$$Update use pseudo data (Update gap:%s)' %
                                  gap)
                        data = sess.run(
                            paired_train_data_next)  # get real data!!
                        feed_dict = {
                            s2ss_train.encoder_input: data["source_ids"],
                            s2ss_train.encoder_input_len:
                            data["source_length"],
                            s2ss_train.decoder_input: data["target_ids_in"],
                            s2ss_train.decoder_target: data["target_ids_out"],
                            s2ss_train.decoder_target_len:
                            data["target_length"] + 1,
                            s2ss_train.decoder_s: data["target_senti"]
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op],
                                 feed_dict=feed_dict)

                if "same" in args.teacher_forcing:
                    if args.same_decay:
                        if args.same_decay_type == "linear":
                            gap = min(
                                8, 2 + global_step /
                                args.same_decay_steps)  # 10 after 1 epoch
                        else:
                            gap = min(
                                10,
                                int(2 / np.power(
                                    args.same_decay_rate,
                                    global_step / args.same_decay_rate)))
                    else:
                        gap = 2
                    if n_batch % gap == 0:
                        print('$$$Update use same data (Update gap:%s)' % gap)
                        # Update Seq2SentiSeq with target output  # senti-, bleu+
                        feed_dict = {
                            s2ss_train.encoder_input: src["ids"],
                            s2ss_train.encoder_input_len: src["length"],
                            s2ss_train.decoder_input: src["ids_in"],
                            s2ss_train.decoder_target: src["ids_out"],
                            s2ss_train.decoder_target_len: src["length"] + 1,
                            s2ss_train.decoder_s: src["senti"]
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op],
                                 feed_dict=feed_dict)

                if "same_noise" in args.teacher_forcing:
                    if args.same_decay:
                        if args.same_decay_type == "linear":
                            gap = min(
                                8, 2 + global_step /
                                args.same_decay_steps)  # 10 after 1 epoch
                        else:
                            gap = min(
                                10,
                                int(2 / np.power(
                                    args.same_decay_rate,
                                    global_step / args.same_decay_rate)))
                    else:
                        gap = 2
                    if n_batch % gap == 0:
                        print('$$$Update use same_noise data (Update gap:%s)' %
                              gap)
                        noise_ids, noise_ids_length = add_noise(
                            src["ids"], src["length"])
                        feed_dict = {
                            s2ss_train.encoder_input: noise_ids,
                            s2ss_train.encoder_input_len: noise_ids_length,
                            s2ss_train.decoder_input: src["ids_in"],
                            s2ss_train.decoder_target: src["ids_out"],
                            s2ss_train.decoder_target_len: src["length"] + 1,
                            s2ss_train.decoder_s: src["senti"]
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op],
                                 feed_dict=feed_dict)
            except tf.errors.OutOfRangeError:  # next epoch
                print("Train---Total N batch:{}\tCost time:{}".format(
                    n_batch,
                    time.time() - t0))
                n_batch = -1
                break