Example #1
0
def evaluate():
    # Load model
    weight_path = 'model/09031344_epoch_4_train_loss_3.7933.h5'

    # Load data
    X, Sources, Targets = load_test_data()
    de2idx, idx2de = load_de_vocab()
    en2idx, idx2en = load_en_vocab()

    model = TransformerModel(in_vocab_len=len(idx2de),
                             out_vocab_len=len(idx2en),
                             max_len=hp.maxlen)
    model.load_model(weight_path)

    for i in range(len(X) // hp.batch_size):
        x = X[i * hp.batch_size:(i + 1) * hp.batch_size]
        sources = Sources[i * hp.batch_size:(i + 1) * hp.batch_size]
        targets = Targets[i * hp.batch_size:(i + 1) * hp.batch_size]

        preds = model.translate(x, idx2en)

        for source, target, pred in zip(sources, targets, preds):
            print('source:', source)
            print('expected:', target)
            print('pred:', pred)
            print()
Example #2
0
def evaluate_train():
    # Load model
    weight_path = 'model/09031925_epoch_0_train_loss_5.9855.h5'

    # Load data
    Sources, Targets = load_train_data()
    de2idx, idx2de = load_de_vocab()
    en2idx, idx2en = load_en_vocab()
    batch_size = 5

    model = TransformerModel(in_vocab_len=len(idx2de),
                             out_vocab_len=len(idx2en),
                             max_len=hp.maxlen)
    model.load_model(weight_path)

    for i in range(5 // batch_size):
        x = Sources[i * batch_size:(i + 1) * batch_size]
        sources = Sources[i * batch_size:(i + 1) * batch_size]
        targets = Targets[i * batch_size:(i + 1) * batch_size]

        preds = model.translate_with_ans(sources, targets, idx2en)
        # preds = model.translate(x, idx2en)

        for source, target, pred in zip(sources, targets, preds):
            print('source:', ' '.join(idx2de[idx] for idx in source))
            print('expected:', ' '.join(idx2en[idx] for idx in target))
            print('pred:', pred)
            print()
Example #3
0
    def inference(self, name=None):
        with tf.variable_scope("tf_inference"):
            X = self.sentence_placeholder
            seqEmds = tf.nn.embedding_lookup(self.words, X)
            shapeS = tf.shape(seqEmds)

            clsH = tf.tile(self.cls, [shapeS[0]])
            clsH = tf.reshape(clsH, [
                              shapeS[0], self.embedding_size-self.pos_embedding_size-self.seg_embedding_size])
            clsH = tf.expand_dims(clsH, axis=1)


            # now X is [Batch, kMaxSeqLen, kMaxSubToken, embedding]
            X = tf.concat([clsH, seqEmds], axis=1)
            xs = tf.zeros([shapeS[0], kMaxSeqLen])
            #[Batch, kMaxSeqLen, embedding2]
            Xpos = positional_encoding(xs, self.pos_embedding_size)
            
            amask = tf.sequence_mask(
                self.a_length, self.max_token_per_sentence+1, dtype=tf.int32)
            abmask = tf.sequence_mask(
                self.totalLength, self.max_token_per_sentence+1, dtype=tf.int32)
            totalmask = amask + abmask
            segE = tf.nn.embedding_lookup(self.segs, totalmask)
           

            X = tf.concat([X, Xpos, segE], axis=2)
            X = layer_norm_and_dropout(X, self.dropout_h)
            print("now X is: %r" % (X))
            X = TransformerModel(8, 6, X, self.dropout_h, mask=abmask)
        return X
Example #4
0
    def inference(self, name=None):
        with tf.variable_scope("tf_inference"):
            X = self.sentence_placeholder
            seqEmds = tf.nn.embedding_lookup(self.words, X)
            shapeS = tf.shape(seqEmds)
            clsH = tf.tile(self.cls, [shapeS[0]])
            # self.embedding_size-self.pos_embedding_size-self.seg_embedding_size] == embeddingSize
            clsH = tf.reshape(clsH, [
                              shapeS[0], self.embedding_size-self.pos_embedding_size-self.seg_embedding_size])
            clsH = tf.expand_dims(clsH, axis=1)
            # add [cls]的embedding
            X = tf.concat([clsH, seqEmds], axis=1)
            # position add [cls]: cls + max_token_per_sentence = final_length = 200
            xs = tf.tile([0], [shapeS[0]])
            xs = tf.reshape(xs, [shapeS[0], 1])
            # position的embedding
            Xpos = positional_encoding(
                tf.concat([xs, self.sentence_placeholder], axis=1), self.pos_embedding_size)
            amask = tf.sequence_mask(
                self.a_length, self.max_token_per_sentence+1, dtype=tf.int32)
            abmask = tf.sequence_mask(
                self.ab_length, self.max_token_per_sentence+1, dtype=tf.int32)
            totalmask = amask + abmask
            # add segment id 的embedding, default 0, if is a sentence then 2, else if is b sentence then 1
            segE = tf.nn.embedding_lookup(self.segs, totalmask)
            X = tf.concat([X, Xpos, segE], axis=2)
            print("now X is: %r" % (X))
            # 10 heads, 2 layers,  abmask: 记录输入的有效token位置
            # output :[bath_size, length, self.embedding_size]
            X = TransformerModel(10, 3, X, self.dropout_h, mask=abmask)

        return X
def main(settings):
    """
    Translates a source language file (or STDIN) into a target language file
    (or STDOUT).
    """
    # Start logging.
    level = logging.DEBUG if settings.verbose else logging.INFO
    logging.basicConfig(level=level, format='%(levelname)s: %(message)s')

    # Create the TensorFlow session.
    tf_config = tf.ConfigProto()
    tf_config.allow_soft_placement = True
    session = tf.Session(config=tf_config)

    # Load config file for each model.
    configs = []
    for model in settings.models:
        config = load_config_from_json_file(model)
        setattr(config, 'reload', model)
        configs.append(config)

    # Create the model graphs and restore their variables.
    logging.debug("Loading models\n")
    models = []

    # ============= 19/8/16 KP ============
    warning('='*20 + 'Model Config to Load')
    warning(settings.models)
    # =====================================

    for i, config in enumerate(configs):
        with tf.variable_scope("model%d" % i) as scope:
            if config.model_type == "transformer":
                model = TransformerModel(config)
            else:
                model = rnn_model.RNNModel(config)
            saver = model_loader.init_or_restore_variables(config, session,
                                                           ensemble_scope=scope)
            model.sampling_utils = SamplingUtils(settings)
            models.append(model)

    # ============= 19/8/16 KP ============
    model_summary()
    # =====================================

    # TODO Ensembling is currently only supported for RNNs, so if
    # TODO len(models) > 1 then check models are all rnn

    # Translate the source file.
    inference.translate_file(input_file=settings.input,
                             output_file=settings.output,
                             session=session,
                             models=models,
                             configs=configs,
                             beam_size=settings.beam_size,
                             nbest=settings.n_best,
                             minibatch_size=settings.minibatch_size,
                             maxibatch_size=settings.maxibatch_size,
                             normalization_alpha=settings.normalization_alpha)
Example #6
0
def main(settings):
    """
    Translates a source language file (or STDIN) into a target language file
    (or STDOUT).
    """
    # Create the TensorFlow session.
    tf_config = tf.ConfigProto()
    tf_config.allow_soft_placement = True
    session = tf.Session(config=tf_config)

    # Load config file for each model.
    configs = []
    for model in settings.models:
        config = load_config_from_json_file(model)
        setattr(config, 'reload', model)
        configs.append(config)

    # Create the model graphs.
    logging.debug("Loading models\n")
    models = []
    for i, config in enumerate(configs):
        with tf.variable_scope("model%d" % i) as scope:
            if config.model_type == "transformer":
                model = TransformerModel(config)
            else:
                model = rnn_model.RNNModel(config)
            model.sampling_utils = SamplingUtils(settings)
            models.append(model)

    # Add smoothing variables (if the models were trained with smoothing).
    #FIXME Assumes either all models were trained with smoothing or none were.
    if configs[0].exponential_smoothing > 0.0:
        smoothing = ExponentialSmoothing(configs[0].exponential_smoothing)

    # Restore the model variables.
    for i, config in enumerate(configs):
        with tf.variable_scope("model%d" % i) as scope:
            _ = model_loader.init_or_restore_variables(config, session,
                                                       ensemble_scope=scope)

    # Swap-in the smoothed versions of the variables.
    if configs[0].exponential_smoothing > 0.0:
        session.run(fetches=smoothing.swap_ops)

    # TODO Ensembling is currently only supported for RNNs, so if
    # TODO len(models) > 1 then check models are all rnn

    # Translate the source file.
    inference.translate_file(input_file=settings.input,
                             output_file=settings.output,
                             session=session,
                             models=models,
                             configs=configs,
                             beam_size=settings.beam_size,
                             nbest=settings.n_best,
                             minibatch_size=settings.minibatch_size,
                             maxibatch_size=settings.maxibatch_size,
                             normalization_alpha=settings.normalization_alpha)
Example #7
0
def main():
    num_gpus = torch.cuda.device_count()

    # Parameters
    args = argparsing()
    emsize = 200 # embedding dimension
    nhid = 200 # the dimension of the feedforward network model in nn.TransformerEncoder
    nlayers = 4 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
    nhead = 2 # the number of heads in the multiheadattention models
    dropout = 0.5 # the dropout value
    bptt = 35
    lr = 5.0 # learning rate
    batch_size = 20
    layerdrop = args.layerdrop

    if num_gpus < 1:
        raise Exception('No GPUs available!')
    elif num_gpus > 1:
        lr *= num_gpus
        batch_size *= num_gpus

    # Dataset
    print('Create dataloaders')
    dataloaders, info = get_loaders(name='WikiText2', batch_size=batch_size)
    ntokens = info['ntokens']

    # Model
    print('Create model')
    model = TransformerModel(ntokens, emsize, nhead, nhid, nlayers, dropout, layerdrop=layerdrop)
    if num_gpus > 1:
        device_ids = list(range(num_gpus))
        model = torch.nn.DataParallel(model, device_ids=device_ids)

    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)
    criterion = nn.CrossEntropyLoss()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print('Create trainer')
    trainer = Trainer(model, dataloaders, optimizer, scheduler, criterion, bptt, ntokens, device)
    print('Start training')
    trainer.run(epochs=1)
Example #8
0
    def _load_models(self, process_id, sess):
        """
        Loads models and returns them
        """
        logging.debug("Process '%s' - Loading models\n" % (process_id))

        import tensorflow as tf
        models = []
        for i, options in enumerate(self._options):
            with tf.variable_scope("model%d" % i) as scope:
                if options.model_type == "transformer":
                    model = TransformerModel(options)
                else:
                    model = rnn_model.RNNModel(options)
                saver = model_loader.init_or_restore_variables(
                    options, sess, ensemble_scope=scope)
                models.append(model)

        logging.info("NOTE: Length of translations is capped to {}".format(self._options[0].translation_maxlen))
        return models
Example #9
0
def train(config, sess):
    assert (config.prior_model != None and (tf.train.checkpoint_exists(os.path.abspath(config.prior_model))) or (config.map_decay_c==0.0)), \
    "MAP training requires a prior model file: Use command-line option --prior_model"

    # Construct the graph, with one model replica per GPU

    num_gpus = len(tf_utils.get_available_gpus())
    num_replicas = max(1, num_gpus)

    if config.loss_function == 'MRT':
        assert config.gradient_aggregation_steps == 1
        assert config.max_sentences_per_device == 0, "MRT mode does not support sentence-based split"
        if config.max_tokens_per_device != 0:
            assert (config.samplesN * config.maxlen <= config.max_tokens_per_device), "need to make sure candidates of a sentence could be " \
                                                                                      "feed into the model"
        else:
            assert num_replicas == 1, "MRT mode does not support sentence-based split"
            assert (config.samplesN * config.maxlen <= config.token_batch_size), "need to make sure candidates of a sentence could be " \
                                                                                      "feed into the model"



    logging.info('Building model...')
    replicas = []
    for i in range(num_replicas):
        device_type = "GPU" if num_gpus > 0 else "CPU"
        device_spec = tf.DeviceSpec(device_type=device_type, device_index=i)
        with tf.device(device_spec):
            with tf.variable_scope(tf.get_variable_scope(), reuse=(i>0)):
                if config.model_type == "transformer":
                    model = TransformerModel(config)
                else:
                    model = rnn_model.RNNModel(config)
                replicas.append(model)

    init = tf.zeros_initializer(dtype=tf.int32)
    global_step = tf.get_variable('time', [], initializer=init, trainable=False)

    if config.learning_schedule == "constant":
        schedule = learning_schedule.ConstantSchedule(config.learning_rate)
    elif config.learning_schedule == "transformer":
        schedule = learning_schedule.TransformerSchedule(
            global_step=global_step,
            dim=config.state_size,
            warmup_steps=config.warmup_steps)
    elif config.learning_schedule == "warmup-plateau-decay":
        schedule = learning_schedule.WarmupPlateauDecaySchedule(
            global_step=global_step,
            peak_learning_rate=config.learning_rate,
            warmup_steps=config.warmup_steps,
            plateau_steps=config.plateau_steps)
    else:
        logging.error('Learning schedule type is not valid: {}'.format(
            config.learning_schedule))
        sys.exit(1)

    if config.optimizer == 'adam':
        optimizer = tf.train.AdamOptimizer(learning_rate=schedule.learning_rate,
                                           beta1=config.adam_beta1,
                                           beta2=config.adam_beta2,
                                           epsilon=config.adam_epsilon)
    else:
        logging.error('No valid optimizer defined: {}'.format(config.optimizer))
        sys.exit(1)

    if config.summary_freq:
        summary_dir = (config.summary_dir if config.summary_dir is not None
                       else os.path.abspath(os.path.dirname(config.saveto)))
        writer = tf.summary.FileWriter(summary_dir, sess.graph)
    else:
        writer = None

    updater = ModelUpdater(config, num_gpus, replicas, optimizer, global_step,
                           writer)

    if config.exponential_smoothing > 0.0:
        smoothing = ExponentialSmoothing(config.exponential_smoothing)

    saver, progress = model_loader.init_or_restore_variables(
        config, sess, train=True)

    global_step.load(progress.uidx, sess)

    if config.sample_freq:
        random_sampler = RandomSampler(
            models=[replicas[0]],
            configs=[config],
            beam_size=1)

    if config.beam_freq or config.valid_script is not None:
        beam_search_sampler = BeamSearchSampler(
            models=[replicas[0]],
            configs=[config],
            beam_size=config.beam_size)

    #save model options
    write_config_to_json_file(config, config.saveto)

    text_iterator, valid_text_iterator = load_data(config)
    _, _, num_to_source, num_to_target = util.load_dictionaries(config)
    total_loss = 0.
    n_sents, n_words = 0, 0
    last_time = time.time()
    logging.info("Initial uidx={}".format(progress.uidx))
    # set epoch = 1 if print per-token-probability
    if config.print_per_token_pro:
        config.max_epochs = progress.eidx+1
    for progress.eidx in range(progress.eidx, config.max_epochs):
        logging.info('Starting epoch {0}'.format(progress.eidx))
        for source_sents, target_sents in text_iterator:
            if len(source_sents[0][0]) != config.factors:
                logging.error('Mismatch between number of factors in settings ({0}), and number in training corpus ({1})\n'.format(config.factors, len(source_sents[0][0])))
                sys.exit(1)
            x_in, x_mask_in, y_in, y_mask_in = util.prepare_data(
                source_sents, target_sents, config.factors, maxlen=None)
            if x_in is None:
                logging.info('Minibatch with zero sample under length {0}'.format(config.maxlen))
                continue
            write_summary_for_this_batch = config.summary_freq and ((progress.uidx % config.summary_freq == 0) or (config.finish_after and progress.uidx % config.finish_after == 0))
            (factors, seqLen, batch_size) = x_in.shape

            output = updater.update(
                sess, x_in, x_mask_in, y_in, y_mask_in, num_to_target,
                write_summary_for_this_batch)

            if config.print_per_token_pro == False:
                total_loss += output
            else:
                # write per-token probability into the file
                f = open(config.print_per_token_pro, 'a')
                for pro in output:
                    pro = str(pro) + '\n'
                    f.write(pro)
                f.close()

            n_sents += batch_size
            n_words += int(numpy.sum(y_mask_in))
            progress.uidx += 1

            # Update the smoothed version of the model variables.
            # To reduce the performance overhead, we only do this once every
            # N steps (the smoothing factor is adjusted accordingly).
            if config.exponential_smoothing > 0.0 and progress.uidx % smoothing.update_frequency == 0:
                sess.run(fetches=smoothing.update_ops)

            if config.disp_freq and progress.uidx % config.disp_freq == 0:
                duration = time.time() - last_time
                disp_time = datetime.now().strftime('[%Y-%m-%d %H:%M:%S]')
                logging.info('{0} Epoch: {1} Update: {2} Loss/word: {3} Words/sec: {4} Sents/sec: {5}'.format(disp_time, progress.eidx, progress.uidx, total_loss/n_words, n_words/duration, n_sents/duration))
                last_time = time.time()
                total_loss = 0.
                n_sents = 0
                n_words = 0

            if config.sample_freq and progress.uidx % config.sample_freq == 0:
                x_small = x_in[:, :, :10]
                x_mask_small = x_mask_in[:, :10]
                y_small = y_in[:, :10]
                samples = translate_utils.translate_batch(
                    sess, random_sampler, x_small, x_mask_small,
                    config.translation_maxlen, 0.0)
                assert len(samples) == len(x_small.T) == len(y_small.T), \
                    (len(samples), x_small.shape, y_small.shape)
                for xx, yy, ss in zip(x_small.T, y_small.T, samples):
                    source = util.factoredseq2words(xx, num_to_source)
                    target = util.seq2words(yy, num_to_target)
                    sample = util.seq2words(ss[0][0], num_to_target)
                    logging.info('SOURCE: {}'.format(source))
                    logging.info('TARGET: {}'.format(target))
                    logging.info('SAMPLE: {}'.format(sample))

            if config.beam_freq and progress.uidx % config.beam_freq == 0:
                x_small = x_in[:, :, :10]
                x_mask_small = x_mask_in[:, :10]
                y_small = y_in[:,:10]
                samples = translate_utils.translate_batch(
                    sess, beam_search_sampler, x_small, x_mask_small,
                    config.translation_maxlen, config.normalization_alpha)
                assert len(samples) == len(x_small.T) == len(y_small.T), \
                    (len(samples), x_small.shape, y_small.shape)
                for xx, yy, ss in zip(x_small.T, y_small.T, samples):
                    source = util.factoredseq2words(xx, num_to_source)
                    target = util.seq2words(yy, num_to_target)
                    logging.info('SOURCE: {}'.format(source))
                    logging.info('TARGET: {}'.format(target))
                    for i, (sample_seq, cost) in enumerate(ss):
                        sample = util.seq2words(sample_seq, num_to_target)
                        msg = 'SAMPLE {}: {} Cost/Len/Avg {}/{}/{}'.format(
                            i, sample, cost, len(sample), cost/len(sample))
                        logging.info(msg)

            if config.valid_freq and progress.uidx % config.valid_freq == 0:
                if config.exponential_smoothing > 0.0:
                    sess.run(fetches=smoothing.swap_ops)
                    valid_ce = validate(sess, replicas[0], config,
                                        valid_text_iterator)
                    sess.run(fetches=smoothing.swap_ops)
                else:
                    valid_ce = validate(sess, replicas[0], config,
                                        valid_text_iterator)
                if (len(progress.history_errs) == 0 or
                    valid_ce < min(progress.history_errs)):
                    progress.history_errs.append(valid_ce)
                    progress.bad_counter = 0
                    save_non_checkpoint(sess, saver, config.saveto)
                    progress_path = '{0}.progress.json'.format(config.saveto)
                    progress.save_to_json(progress_path)
                else:
                    progress.history_errs.append(valid_ce)
                    progress.bad_counter += 1
                    if progress.bad_counter > config.patience:
                        logging.info('Early Stop!')
                        progress.estop = True
                        break
                if config.valid_script is not None:
                    if config.exponential_smoothing > 0.0:
                        sess.run(fetches=smoothing.swap_ops)
                        score = validate_with_script(sess, beam_search_sampler)
                        sess.run(fetches=smoothing.swap_ops)
                    else:
                        score = validate_with_script(sess, beam_search_sampler)
                    need_to_save = (score is not None and
                        (len(progress.valid_script_scores) == 0 or
                         score > max(progress.valid_script_scores)))
                    if score is None:
                        score = 0.0  # ensure a valid value is written
                    progress.valid_script_scores.append(score)
                    if need_to_save:
                        progress.bad_counter = 0
                        save_path = config.saveto + ".best-valid-script"
                        save_non_checkpoint(sess, saver, save_path)
                        write_config_to_json_file(config, save_path)

                        progress_path = '{}.progress.json'.format(save_path)
                        progress.save_to_json(progress_path)

            if config.save_freq and progress.uidx % config.save_freq == 0:
                saver.save(sess, save_path=config.saveto, global_step=progress.uidx)
                write_config_to_json_file(config, "%s-%s" % (config.saveto, progress.uidx))

                progress_path = '{0}-{1}.progress.json'.format(config.saveto, progress.uidx)
                progress.save_to_json(progress_path)

            if config.finish_after and progress.uidx % config.finish_after == 0:
                logging.info("Maximum number of updates reached")
                saver.save(sess, save_path=config.saveto, global_step=progress.uidx)
                write_config_to_json_file(config, "%s-%s" % (config.saveto, progress.uidx))

                progress.estop=True
                progress_path = '{0}-{1}.progress.json'.format(config.saveto, progress.uidx)
                progress.save_to_json(progress_path)
                break
        if progress.estop:
            break
Example #10
0
def train(config, sess):
    ####################################################
    assert (config.prior_model != None and (tf.train.checkpoint_exists(os.path.abspath(config.prior_model))) or (config.map_decay_c==0.0)), \
    "MAP training requires a prior model file: Use command-line option --prior_model"

    # Construct the graph, with one model replica per GPU

    num_gpus = len(util.get_available_gpus())
    num_replicas = max(1, num_gpus)

    logging.info('Building model...')
    replicas = []
    for i in range(num_replicas):
        device_type = "GPU" if num_gpus > 0 else "CPU"
        device_spec = tf.DeviceSpec(device_type=device_type, device_index=i)
        with tf.device(device_spec):
            with tf.variable_scope(tf.get_variable_scope(), reuse=(i > 0)):
                if config.model_type == "transformer":
                    model = TransformerModel(config)
                else:
                    model = rnn_model.RNNModel(config)
                replicas.append(model)

    init = tf.zeros_initializer(dtype=tf.int32)
    global_step = tf.get_variable('time', [],
                                  initializer=init,
                                  trainable=False)

    if config.learning_schedule == "constant":
        schedule = ConstantSchedule(config.learning_rate)
    elif config.learning_schedule == "transformer":
        schedule = TransformerSchedule(global_step=global_step,
                                       dim=config.state_size,
                                       warmup_steps=config.warmup_steps)
    else:
        logging.error('Learning schedule type is not valid: {}'.format(
            config.learning_schedule))
        sys.exit(1)

    if config.optimizer == 'adam':
        optimizer = tf.train.AdamOptimizer(
            learning_rate=schedule.learning_rate,
            beta1=config.adam_beta1,
            beta2=config.adam_beta2,
            epsilon=config.adam_epsilon)
    else:
        logging.error('No valid optimizer defined: {}'.format(
            config.optimizer))
        sys.exit(1)

    if config.summary_freq:
        summary_dir = (config.summary_dir if config.summary_dir is not None
                       else os.path.abspath(os.path.dirname(config.saveto)))
        writer = tf.summary.FileWriter(summary_dir, sess.graph)
    else:
        writer = None

    updater = ModelUpdater(config, num_gpus, replicas, optimizer, global_step,
                           writer)

    saver, progress = model_loader.init_or_restore_variables(config,
                                                             sess,
                                                             train=True)

    ############################################################
    #add: pretrain
    if config.pretrain:
        logging.info("Start pre-training")
        #预训练网络参数
        pre_batch_size = 1000
        epochs = 20
        pre_learning_rate = 0.001
        pre_optimizer = tf.train.GradientDescentOptimizer(
            pre_learning_rate).minimize(replicas[0].loss_pre_train)
        #加载预训练数据及相关字典
        gvocab, gvectors = util.pre_load_data(config.pretrain_vocab,
                                              config.pretrain_vectors)
        pre_vocab_list = list(gvocab.keys())
        #过采样
        pre_train_list = []
        with open('/media/ntfs-3/EXP/MULTI/mix/zh-en/data3/glove/vocab.txt',
                  'r',
                  encoding='utf-8') as f:
            for line in f:
                k, v = line.strip().split()
                pre_train_list.extend([k] * int(v))
        utf8_dict = json.load(
            open(config.source_dicts[0], 'r', encoding='utf-8'))
        embedding_list = []
        #开始训练
        for i in range(epochs):
            logging.info("epoch:{}".format(i))
            if i == epochs - 1:
                source_x, source_y, _vocab = util.get_data(pre_vocab_list,
                                                           pre_batch_size,
                                                           gvocab,
                                                           gvectors,
                                                           utf8_dict,
                                                           shuffle=False)
            else:
                source_x, source_y, _vocab = util.get_data(pre_train_list,
                                                           pre_batch_size,
                                                           gvocab,
                                                           gvectors,
                                                           utf8_dict,
                                                           shuffle=True)
            for idx, [s_x, s_y] in enumerate(zip(source_x, source_y)):
                assert len(s_x) == len(s_y), "{}, {}".format(
                    len(s_x), len(s_y))
                sx, sy = util.pre_prepare_data(s_x, s_y)
                feed_dict = {}
                feed_dict[replicas[0].pre_inputs.x] = sx
                feed_dict[replicas[0].pre_inputs.y] = sy
                _, loss, embedding = sess.run([
                    pre_optimizer, replicas[0].loss_pre_train,
                    replicas[0].pre_embedding
                ],
                                              feed_dict=feed_dict)
                if idx % 100 == 0:
                    logging.info("loss:{}".format(loss))
                if i == epochs - 1:
                    embedding_list.append(embedding)
        assert _vocab == pre_vocab_list
        emb = embedding_list[0]
        for e in embedding_list[1:]:
            emb = numpy.concatenate((emb, e))
        numpy.save("pre_emb/pre_emb.npy", emb)
        with open("pre_emb/vocab", "w", encoding="utf-8") as f:
            f.write("\n".join(pre_vocab_list))
        #tsne可视化
        tsne = util.get_tsne(emb, "pre_emb/tsne.npy")
        gtsne = numpy.load(config.pretrain_tsne)
        #util.plot_tsne(_vocab, tsne, gvocab, gtsne, top=20)
        #exit(0)
    ##################################################################################

    global_step.load(progress.uidx, sess)

    # Use an InferenceModelSet to abstract over model types for sampling and
    # beam search. Multi-GPU sampling and beam search are not currently
    # supported, so we just use the first replica.
    model_set = inference.InferenceModelSet([replicas[0]], [config])

    #save model options
    write_config_to_json_file(config, config.saveto)

    text_iterator, valid_text_iterator = load_data(config)
    _, _, num_to_source, num_to_target = util.load_dictionaries(config)
    total_loss = 0.
    n_sents, n_words = 0, 0
    last_time = time.time()
    logging.info("Initial uidx={}".format(progress.uidx))
    for progress.eidx in range(progress.eidx, config.max_epochs):
        logging.info('Starting epoch {0}'.format(progress.eidx))
        for pre_source_sents, source_sents, target_sents in text_iterator:
            #if len(source_sents[0][0]) != config.factors:
            #logging.error('Mismatch between number of factors in settings ({0}), and number in training corpus ({1})\n'.format(config.factors, len(source_sents[0][0])))
            #sys.exit(1)

            px_in, x_in, x_mask_in, y_in, y_mask_in = util.prepare_data(
                source_sents,
                target_sents,
                config.factors,
                pre_source_sents,
                maxlen=None)

            if x_in is None:
                logging.info(
                    'Minibatch with zero sample under length {0}'.format(
                        config.maxlen))
                continue

            write_summary_for_this_batch = config.summary_freq and (
                (progress.uidx % config.summary_freq == 0) or
                (config.finish_after
                 and progress.uidx % config.finish_after == 0))
            (factors, seqLen, uLen, batch_size) = x_in.shape

            loss = updater.update(sess, px_in, x_in, x_mask_in, y_in,
                                  y_mask_in, write_summary_for_this_batch)

            total_loss += loss
            n_sents += batch_size
            n_words += int(numpy.sum(y_mask_in))
            progress.uidx += 1

            if config.disp_freq and progress.uidx % config.disp_freq == 0:
                duration = time.time() - last_time
                disp_time = datetime.now().strftime('[%Y-%m-%d %H:%M:%S]')
                logging.info(
                    '{0} Epoch: {1} Update: {2} Loss/word: {3} Words/sec: {4} Sents/sec: {5}'
                    .format(disp_time, progress.eidx, progress.uidx,
                            total_loss / n_words, n_words / duration,
                            n_sents / duration))
                last_time = time.time()
                total_loss = 0.
                n_sents = 0
                n_words = 0

            if config.sample_freq and progress.uidx % config.sample_freq == 0:
                x_small, x_mask_small, y_small = x_in[:, :, :, :
                                                      10], x_mask_in[:, :, :
                                                                     10], y_in[:, :
                                                                               10]
                samples = model_set.sample(sess, x_small, x_mask_small)
                assert len(samples) == len(x_small.T) == len(
                    y_small.T), (len(samples), x_small.shape, y_small.shape)
                for xx, yy, ss in zip(x_small.T, y_small.T, samples):
                    #source = util.factoredseq2words(xx, num_to_source)
                    target = util.seq2words(yy, num_to_target)
                    sample = util.seq2words(ss, num_to_target)
                    #logging.info('SOURCE: {}'.format(source))
                    #logging.info('SOURCE: {}'.format(xx))
                    logging.info('TARGET: {}'.format(target))
                    logging.info('SAMPLE: {}'.format(sample))

            if config.beam_freq and progress.uidx % config.beam_freq == 0:
                x_small, x_mask_small, y_small = x_in[:, :, :, :
                                                      10], x_mask_in[:, :, :
                                                                     10], y_in[:, :
                                                                               10]
                samples = model_set.beam_search(
                    sess,
                    x_small,
                    x_mask_small,
                    config.beam_size,
                    normalization_alpha=config.normalization_alpha)
                # samples is a list with shape batch x beam x len
                assert len(samples) == len(x_small.T) == len(
                    y_small.T), (len(samples), x_small.shape, y_small.shape)
                for xx, yy, ss in zip(x_small.T, y_small.T, samples):
                    #source = util.factoredseq2words(xx, num_to_source)
                    target = util.seq2words(yy, num_to_target)
                    #logging.info('SOURCE: {}'.format(source))
                    logging.info('TARGET: {}'.format(target))
                    for i, (sample_seq, cost) in enumerate(ss):
                        sample = util.seq2words(sample_seq, num_to_target)
                        msg = 'SAMPLE {}: {} Cost/Len/Avg {}/{}/{}'.format(
                            i, sample, cost, len(sample), cost / len(sample))
                        logging.info(msg)

            if config.valid_freq and progress.uidx % config.valid_freq == 0:
                valid_ce = validate(sess, replicas[0], config,
                                    valid_text_iterator)
                if (len(progress.history_errs) == 0
                        or valid_ce < min(progress.history_errs)):
                    progress.history_errs.append(valid_ce)
                    progress.bad_counter = 0
                    save_non_checkpoint(sess, saver, config.saveto)
                    progress_path = '{0}.progress.json'.format(config.saveto)
                    progress.save_to_json(progress_path)
                else:
                    progress.history_errs.append(valid_ce)
                    progress.bad_counter += 1
                    if progress.bad_counter > config.patience:
                        logging.info('Early Stop!')
                        progress.estop = True
                        break
                if config.valid_script is not None:
                    score = validate_with_script(sess, replicas[0], config)
                    need_to_save = (
                        score is not None
                        and (len(progress.valid_script_scores) == 0
                             or score > max(progress.valid_script_scores)))
                    if score is None:
                        score = 0.0  # ensure a valid value is written
                    progress.valid_script_scores.append(score)
                    if need_to_save:
                        progress.bad_counter = 0
                        save_path = config.saveto + ".best-valid-script"
                        save_non_checkpoint(sess, saver, save_path)
                        write_config_to_json_file(config, save_path)

                        progress_path = '{}.progress.json'.format(save_path)
                        progress.save_to_json(progress_path)

            if config.save_freq and progress.uidx % config.save_freq == 0:
                saver.save(sess,
                           save_path=config.saveto,
                           global_step=progress.uidx)
                write_config_to_json_file(
                    config, "%s-%s" % (config.saveto, progress.uidx))

                progress_path = '{0}-{1}.progress.json'.format(
                    config.saveto, progress.uidx)
                progress.save_to_json(progress_path)

            if config.finish_after and progress.uidx % config.finish_after == 0:
                logging.info("Maximum number of updates reached")
                saver.save(sess,
                           save_path=config.saveto,
                           global_step=progress.uidx)
                write_config_to_json_file(
                    config, "%s-%s" % (config.saveto, progress.uidx))

                progress.estop = True
                progress_path = '{0}-{1}.progress.json'.format(
                    config.saveto, progress.uidx)
                progress.save_to_json(progress_path)
                break
        if progress.estop:
            break
######################################################################
# The model is set up with the hyperparameter below. The vocab size is
# equal to the length of the vocab object.
#

ntokens = len(input_vocab)  # the size of vocabulary
nclstokens = 4  # D0, D1, S0, S1 + PAD
ntagtokens = 1  # binary O or D
emsize = 512  # embedding dimension
nhid = 512  # the dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 6  # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = 8  # the number of heads in the multiheadattention models
dropout = 0.1  # the dropout value
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
model = TransformerModel(ntokens, nclstokens, ntagtokens, emsize, nhead, nhid,
                         nlayers, dropout).to(device)
state_dict = torch.load("%s/%s" % (model_dir, args.iter), map_location='cpu')

from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    if k.startswith("module."):
        name = k[7:]  # remove 'module.' of dataparallel
    else:
        name = k
    new_state_dict[name] = v

model.load_state_dict(new_state_dict)

import time
    spiece_id_to_tokens = {v: k for k, v in spiece_ids.items()}

    eos_token = '</s>'
    eos_id = spiece_ids[eos_token]

    import sentencepiece as spm

    src_tokenizer = spm.SentencePieceProcessor()
    src_tokenizer.Load(source_tokenizer_model_path)

    trg_tokenizer = spm.SentencePieceProcessor()
    trg_tokenizer.Load(target_tokenizer_model_path)

    model = TransformerModel(intoken=src_vocab_size,
                             outtoken=trg_vocab_size,
                             hidden=d_model,
                             d_ff=d_ff,
                             nlayers=nlayers,
                             dropout=0.0).to(device)

    model = load_transformer_weights(model, weights_path)
    print(f"Done loading pretrained model weights!")

    model.eval()

    src_pieces = src_tokenizer.encode_as_pieces(args.text_to_translate)
    src_input_ids = [spiece_ids[piece] for piece in src_pieces] + [eos_id]

    src_tensor = torch.LongTensor(src_input_ids).unsqueeze(1).to(device)

    trg_indexes = [eos_id]
def train(config, sess):
    assert (config.prior_model != None and (tf.train.checkpoint_exists(os.path.abspath(config.prior_model))) or (config.map_decay_c==0.0)), \
    "MAP training requires a prior model file: Use command-line option --prior_model"

    # Construct the graph, with one model replica per GPU

    num_gpus = len(util.get_available_gpus())
    num_replicas = max(1, num_gpus)

    logging.info('Building model...')
    replicas = []
    for i in range(num_replicas):
        device_type = "GPU" if num_gpus > 0 else "CPU"
        device_spec = tf.DeviceSpec(device_type=device_type, device_index=i)
        with tf.device(device_spec):
            with tf.variable_scope(tf.get_variable_scope(), reuse=(i > 0)):
                if config.model_type == "transformer":
                    model = TransformerModel(config)
                else:
                    model = rnn_model.RNNModel(config)
                replicas.append(model)

    init = tf.zeros_initializer(dtype=tf.int32)
    global_step = tf.get_variable('time', [],
                                  initializer=init,
                                  trainable=False)

    if config.learning_schedule == "constant":
        schedule = ConstantSchedule(config.learning_rate)
    elif config.learning_schedule == "transformer":
        schedule = TransformerSchedule(global_step=global_step,
                                       dim=config.state_size,
                                       warmup_steps=config.warmup_steps)
    else:
        logging.error('Learning schedule type is not valid: {}'.format(
            config.learning_schedule))
        sys.exit(1)

    if config.optimizer == 'adam':
        optimizer = tf.train.AdamOptimizer(
            learning_rate=schedule.learning_rate,
            beta1=config.adam_beta1,
            beta2=config.adam_beta2,
            epsilon=config.adam_epsilon)
    else:
        logging.error('No valid optimizer defined: {}'.format(
            config.optimizer))
        sys.exit(1)

    if config.summary_freq:
        summary_dir = (config.summary_dir if config.summary_dir is not None
                       else os.path.abspath(os.path.dirname(config.saveto)))
        writer = tf.summary.FileWriter(summary_dir, sess.graph)
    else:
        writer = None

    updater = ModelUpdater(config, num_gpus, replicas, optimizer, global_step,
                           writer)

    saver, progress = model_loader.init_or_restore_variables(config,
                                                             sess,
                                                             train=True)

    global_step.load(progress.uidx, sess)

    # Use an InferenceModelSet to abstract over model types for sampling and
    # beam search. Multi-GPU sampling and beam search are not currently
    # supported, so we just use the first replica.
    model_set = inference.InferenceModelSet([replicas[0]], [config])

    #save model options
    write_config_to_json_file(config, config.saveto)

    text_iterator, valid_text_iterator = load_data(config)
    _, _, num_to_source, num_to_target = util.load_dictionaries(config)
    total_loss = 0.
    n_sents, n_words = 0, 0
    last_time = time.time()
    logging.info("Initial uidx={}".format(progress.uidx))
    for progress.eidx in range(progress.eidx, config.max_epochs):
        logging.info('Starting epoch {0}'.format(progress.eidx))
        for source_sents, target_sents in text_iterator:
            if len(source_sents[0][0]) != config.factors:
                logging.error(
                    'Mismatch between number of factors in settings ({0}), and number in training corpus ({1})\n'
                    .format(config.factors, len(source_sents[0][0])))
                sys.exit(1)
            x_in, x_mask_in, y_in, y_mask_in = util.prepare_data(
                source_sents, target_sents, config.factors, maxlen=None)
            if x_in is None:
                logging.info(
                    'Minibatch with zero sample under length {0}'.format(
                        config.maxlen))
                continue
            write_summary_for_this_batch = config.summary_freq and (
                (progress.uidx % config.summary_freq == 0) or
                (config.finish_after
                 and progress.uidx % config.finish_after == 0))
            (factors, seqLen, batch_size) = x_in.shape

            loss = updater.update(sess, x_in, x_mask_in, y_in, y_mask_in,
                                  write_summary_for_this_batch)
            total_loss += loss
            n_sents += batch_size
            n_words += int(numpy.sum(y_mask_in))
            progress.uidx += 1

            if config.disp_freq and progress.uidx % config.disp_freq == 0:
                duration = time.time() - last_time
                disp_time = datetime.now().strftime('[%Y-%m-%d %H:%M:%S]')
                logging.info(
                    '{0} Epoch: {1} Update: {2} Loss/word: {3} Words/sec: {4} Sents/sec: {5}'
                    .format(disp_time, progress.eidx, progress.uidx,
                            total_loss / n_words, n_words / duration,
                            n_sents / duration))
                last_time = time.time()
                total_loss = 0.
                n_sents = 0
                n_words = 0

            if config.sample_freq and progress.uidx % config.sample_freq == 0:
                x_small, x_mask_small, y_small = x_in[:, :, :
                                                      10], x_mask_in[:, :
                                                                     10], y_in[:, :
                                                                               10]
                samples = model_set.sample(sess, x_small, x_mask_small)
                assert len(samples) == len(x_small.T) == len(
                    y_small.T), (len(samples), x_small.shape, y_small.shape)
                for xx, yy, ss in zip(x_small.T, y_small.T, samples):
                    source = util.factoredseq2words(xx, num_to_source)
                    target = util.seq2words(yy, num_to_target)
                    sample = util.seq2words(ss, num_to_target)
                    logging.info('SOURCE: {}'.format(source))
                    logging.info('TARGET: {}'.format(target))
                    logging.info('SAMPLE: {}'.format(sample))

            if config.beam_freq and progress.uidx % config.beam_freq == 0:
                x_small, x_mask_small, y_small = x_in[:, :, :
                                                      10], x_mask_in[:, :
                                                                     10], y_in[:, :
                                                                               10]
                samples = model_set.beam_search(
                    sess,
                    x_small,
                    x_mask_small,
                    config.beam_size,
                    normalization_alpha=config.normalization_alpha)
                # samples is a list with shape batch x beam x len
                assert len(samples) == len(x_small.T) == len(
                    y_small.T), (len(samples), x_small.shape, y_small.shape)
                for xx, yy, ss in zip(x_small.T, y_small.T, samples):
                    source = util.factoredseq2words(xx, num_to_source)
                    target = util.seq2words(yy, num_to_target)
                    logging.info('SOURCE: {}'.format(source))
                    logging.info('TARGET: {}'.format(target))
                    for i, (sample_seq, cost) in enumerate(ss):
                        sample = util.seq2words(sample_seq, num_to_target)
                        msg = 'SAMPLE {}: {} Cost/Len/Avg {}/{}/{}'.format(
                            i, sample, cost, len(sample), cost / len(sample))
                        logging.info(msg)

            if config.valid_freq and progress.uidx % config.valid_freq == 0:
                valid_ce = validate(sess, replicas[0], config,
                                    valid_text_iterator)
                if (len(progress.history_errs) == 0
                        or valid_ce < min(progress.history_errs)):
                    progress.history_errs.append(valid_ce)
                    progress.bad_counter = 0
                    save_non_checkpoint(sess, saver, config.saveto)
                    progress_path = '{0}.progress.json'.format(config.saveto)
                    progress.save_to_json(progress_path)
                else:
                    progress.history_errs.append(valid_ce)
                    progress.bad_counter += 1
                    if progress.bad_counter > config.patience:
                        logging.info('Early Stop!')
                        progress.estop = True
                        break
                if config.valid_script is not None:
                    score = validate_with_script(sess, replicas[0], config)
                    need_to_save = (
                        score is not None
                        and (len(progress.valid_script_scores) == 0
                             or score > max(progress.valid_script_scores)))
                    if score is None:
                        score = 0.0  # ensure a valid value is written
                    progress.valid_script_scores.append(score)
                    if need_to_save:
                        save_path = config.saveto + ".best-valid-script"
                        save_non_checkpoint(sess, saver, save_path)
                        write_config_to_json_file(config, save_path)

                        progress_path = '{}.progress.json'.format(save_path)
                        progress.save_to_json(progress_path)

            if config.save_freq and progress.uidx % config.save_freq == 0:
                saver.save(sess,
                           save_path=config.saveto,
                           global_step=progress.uidx)
                write_config_to_json_file(
                    config, "%s-%s" % (config.saveto, progress.uidx))

                progress_path = '{0}-{1}.progress.json'.format(
                    config.saveto, progress.uidx)
                progress.save_to_json(progress_path)

            if config.finish_after and progress.uidx % config.finish_after == 0:
                logging.info("Maximum number of updates reached")
                saver.save(sess,
                           save_path=config.saveto,
                           global_step=progress.uidx)
                write_config_to_json_file(
                    config, "%s-%s" % (config.saveto, progress.uidx))

                progress.estop = True
                progress_path = '{0}-{1}.progress.json'.format(
                    config.saveto, progress.uidx)
                progress.save_to_json(progress_path)
                break
        if progress.estop:
            break
Example #14
0
######################################################################
# The model is set up with the hyperparameter below. The vocab size is
# equal to the length of the vocab object.
#

ntokens = len(input_vocab)  # the size of vocabulary
nclstokens = 4  # D0, D1, S0, S1
ntagtokens = 1  # Binary classification O or D
emsize = 512  # embedding dimension
nhid = 512  # the dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 6  # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = 8  # the number of heads in the multiheadattention models
dropout = 0.1  # the dropout value
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transformer = TransformerModel(ntokens, nclstokens, ntagtokens, emsize, nhead,
                               nhid, nlayers, dropout)
if args.model:
    transformer.load_state_dict(torch.load(args.model))
else:
    # save random init model
    torch.save(transformer.state_dict(), "init.mdl")
model = nn.DataParallel(transformer).to(device)

######################################################################
# Run the model
# -------------
#

cls_criterion = nn.CrossEntropyLoss()
lr = float(args.lr)  # learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
Example #15
0
src_ntokens = len(EN_TEXT.vocab.stoi)  # the size of vocabulary
tgt_ntokens = len(FR_TEXT.vocab.stoi)  # the size of vocabulary
emsize = 200  # embedding dimension
nhid = 200  # the dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 2  # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = 2  # the number of heads in the multiheadattention models
dropout = 0.2  # the dropout value
from transformer import TransformerModel
from lstm_seq2seq import Seq2Seq

model_type = 'LSTM'  # LSTM or Transformer
if model_type == 'LSTM':
    model = Seq2Seq(src_ntokens, tgt_ntokens, emsize, nhid, device).to(device)
else:
    model = TransformerModel(src_ntokens, tgt_ntokens, emsize, nhead, nhid,
                             nlayers, dropout).to(device)

import numpy as np
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('# parameters: {:e}'.format(params))

criterion = torch.nn.CrossEntropyLoss(ignore_index=1)
optimizer = torch.optim.Adam(model.parameters())
model.train()

log_interval = 200
step = 0
while step < 100000:
    for batch in iter(train_iter):
        optimizer.zero_grad()
Example #16
0
def main(settings):
    """
    Translates a source language file (or STDIN) into a target language file
    (or STDOUT).
    """
    # Create the TensorFlow session.
    g = tf.Graph()
    with g.as_default():
        tf_config = tf.compat.v1.ConfigProto()
        tf_config.allow_soft_placement = True
        session = tf.compat.v1.Session(config=tf_config)

        # Load config file for each model.
        configs = []
        for model in settings.models:
            config = load_config_from_json_file(model)
            setattr(config, 'reload', model)
            setattr(config, 'translation_maxlen', settings.translation_maxlen)
            configs.append(config)

        # Create the model graphs.
        logging.debug("Loading models\n")
        models = []
        for i, config in enumerate(configs):
            with tf.compat.v1.variable_scope("model%d" % i) as scope:
                if config.model_type == "transformer":
                    model = TransformerModel(
                        config, consts_config_str=settings.config_str)
                else:
                    model = rnn_model.RNNModel(config)
                model.sampling_utils = SamplingUtils(settings)
                models.append(model)
        # Add smoothing variables (if the models were trained with smoothing).
        # FIXME Assumes either all models were trained with smoothing or none were.
        if configs[0].exponential_smoothing > 0.0:
            smoothing = ExponentialSmoothing(configs[0].exponential_smoothing)

        # Restore the model variables.
        for i, config in enumerate(configs):
            with tf.compat.v1.variable_scope("model%d" % i) as scope:
                _ = model_loader.init_or_restore_variables(
                    config, session, ensemble_scope=scope)

        # Swap-in the smoothed versions of the variables.
        if configs[0].exponential_smoothing > 0.0:
            session.run(fetches=smoothing.swap_ops)

        max_translation_len = settings.translation_maxlen

        # Create a BeamSearchSampler / RandomSampler.
        if settings.translation_strategy == 'beam_search':
            sampler = BeamSearchSampler(models, configs, settings.beam_size)
        else:
            assert settings.translation_strategy == 'sampling'
            sampler = RandomSampler(models, configs, settings.beam_size)

        # Warn about the change from neg log probs to log probs for the RNN.
        if settings.n_best:
            model_types = [config.model_type for config in configs]
            if 'rnn' in model_types:
                logging.warn(
                    'n-best scores for RNN models have changed from '
                    'positive to negative (as of commit 95793196...). '
                    'If you are using the scores for reranking etc, then '
                    'you may need to update your scripts.')

        # Translate the source file.
        translate_utils.translate_file(
            input_file=settings.input,
            output_file=settings.output,
            session=session,
            sampler=sampler,
            config=configs[0],
            max_translation_len=max_translation_len,
            normalization_alpha=settings.normalization_alpha,
            consts_config_str=settings.config_str,
            nbest=settings.n_best,
            minibatch_size=settings.minibatch_size,
            maxibatch_size=settings.maxibatch_size)
Example #17
0
        model = torch.load(args.load, map_location=device)

        vocab = model.vocab

        test_source, _test_target = read_data(args.test)
        test_source = index_data(test_source, vocab)
        for i, source in enumerate(batchify_data(test_source)):
            output = model.decode(source)
            for words in output:
                print(' '.join(words))
        exit(0)

    if args.model == 'baseline':
        model = BaselineModel(vocab).to(device)
    elif args.model == 'transformer':
        model = TransformerModel(vocab).to(device)
    else:
        print('error: invalid model or model not specified (--model)',
              file=sys.stderr)
        sys.exit()

    for p in model.parameters():
        if p.dim() > 1:
            torch.nn.init.xavier_uniform_(p)

    criterion = torch.nn.CrossEntropyLoss(ignore_index=pad_id)
    lr = 5  # learning rate
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)

    bos_token = vocab.numberize('<BOS>')
Example #18
0
######################################################################
# The model is set up with the hyperparameter below. The vocab size is
# equal to the length of the vocab object.
#
#
ntokens = len(input_vocab)  # the size of vocabulary
nclstokens = 4  # D0, D1, S0, S1
ntagtokens = 1  # binary O or D
emsize = 512  # embedding dimension
nhid = 512  # the dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 6  # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = 8  # the number of heads in the multiheadattention models
dropout = 0.1  # the dropout value
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transformer = TransformerModel(ntokens, nclstokens, ntagtokens, emsize, nhead,
                               nhid, nlayers, dropout)
transformer.load_state_dict(torch.load("%s/%s" % (model_dir, args.iter)))
model = transformer.to(device)

import time
######################################################################
# Run the model
# -------------
#

lr = float(args.lr)  # learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.995)

import time
Example #19
0
def main(hparams):

    # Args
    batch_size = 20
    eval_batch_size = 10
    bptt = 35

    dataset = Dataset()
    train_data = dataset.batchify(dataset.train_txt, batch_size)
    val_data = dataset.batchify(dataset.val_txt, eval_batch_size)
    test_data = dataset.batchify(dataset.test_txt, eval_batch_size)

    # Transformer model architecture
    ntokens = len(dataset.TEXT.vocab.stoi)  # the size of vocabulary
    emsize = 200  # embedding dimension
    nhid = 200  # the dimension of the feedforward network model in nn.TransformerEncoder
    nlayers = 2  # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
    nhead = 2  # the number of heads in the multiheadattention models
    dropout = 0.2  # the dropout value
    transformer = TransformerModel(ntokens, emsize, nhead, nhid, nlayers,
                                   dropout)

    # Optimizer.
    criterion = nn.CrossEntropyLoss()  # loss function
    lr = 5.0  # learning rate
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)

    # bittensor:
    # Load bittensor config from hparams.
    config = bittensor.Config(hparams)

    # Build the neuron from configs.
    neuron = bittensor.Neuron(config)

    # Init a trainable request router.
    router = bittensor.Router(x_dim=784, key_dim=100, topk=10)

    # Build local network.
    net = Net()

    # Subscribe the local network to the network
    neuron.subscribe(transformer)

    # Start the neuron backend.
    neuron.start()

    def train(dataset, transformer):
        model.train()  # Turn on the train mode
        total_loss = 0.
        start_time = time.time()
        ntokens = len(dataset.TEXT.vocab.stoi)
        for batch, i in enumerate(
                range(0,
                      train_data.size(0) - 1, dataset.bptt)):
            data, targets = dataset.get_batch(train_data, i)
            optimizer.zero_grad()

            # data
            print(data.shape)

            # Flatten encoder inputs inputs
            inputs = data.view(-1, bptt, emsize)
            inputs = torch.flatten(inputs, start_dim=1)

            # Query the remote network.
            synapses = neuron.synapses(
            )  # Returns a list of synapses on the network.
            requests, scores = router.route(
                inputs, synapses)  # routes inputs to network.
            responses = neuron(requests, synapses)  # Makes network calls.
            remote = router.join(responses)  # Joins responses based on scores.

            # Encode sequence inputs.
            encodings = transformer.encode(
                data)  # (seq_len, batch_size, embedding_size)

            # Get nodes from metagraph.
            # and map nodes to torch keys.
            axons = neuron.axons()  # List[bittensor_pb2.Node]))
            keys = keymap.toKeys(axons)  # (-1, key_dim)

            # Learning a map from the gate_inputs to keys
            # gates[i, j] = score for the jth key for input i
            gate_inputs = encodings.view(
                batch_size, x_dim)  # (batch_size, seq_len * embedding_size)
            gates = gate(gate_inputs, keys, topk=min(len(keys), topk))

            # Dispatch data to inputs for each key.
            # when gates[i, j] == 0, the key j does not recieve input i
            dispatch_inputs = data.view(batch_size,
                                        -1)  # (batch_size, sequence_length)
            dispatch = dispatcher.dispatch(dispatch_inputs,
                                           gates)  # List[(-1, seq_len)]

            # Query the network by mapping from keys to node endpoints.
            # results = list[torch.Tensor], len(results) = len(keys)
            axons = keymap.toAxons(keys)  # List[bittensor_pb2.Node]
            query = neuron(dispatch, axons)  # List[(-1, embedding_size)]

            # Join results using gates to combine inputs.
            results = dispatcher.combine(
                query, gates)  # (batch_size, seq_len * embedding_size)

            # Decode responses.
            results = results.view(
                -1, batch_size,
                emsize)  # (seq_len, batch_size, embedding_size)
            to_decode = results + encodings
            output = model.decode(
                to_decode)  # (target_len, batch_size, embedding_size)

            # Loss and optimizer step
            loss = criterion(output.view(-1, ntokens), targets)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
            optimizer.step()

            # Update bittensor weights
            weights = neuron.getweights(axons)
            weights = (0.95) * weights + (0.05) * torch.mean(gates, dim=0)
            neuron.setweights(axons, weights)

            total_loss += loss.item()
            log_interval = 1
            if batch % log_interval == 0 and batch > 0:
                cur_loss = total_loss / log_interval
                elapsed = time.time() - start_time
                print('| epoch {:3d} | {:5d}/{:5d} batches | '
                      'lr {:02.2f} | ms/batch {:5.2f} | '
                      'loss {:5.2f} | ppl {:8.2f}'.format(
                          epoch, batch,
                          len(train_data) // dataset.bptt,
                          scheduler.get_lr()[0], elapsed * 1000 / log_interval,
                          cur_loss, math.exp(cur_loss)))
                total_loss = 0
                start_time = time.time()

    for epoch in range(1, epochs + 1):
        epoch_start_time = time.time()
        train(dataset, model)
        scheduler.step()
Example #20
0
def main():
    parser = argparse.ArgumentParser(description='OGBN (GNN)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--project', type=str, default='lcgnn')
    parser.add_argument('--dataset', type=str, default='flickr')
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--num_layers', type=int, default=4)
    parser.add_argument('--num_heads', type=int, default=2)
    parser.add_argument('--ego_size', type=int, default=64)
    parser.add_argument('--hidden_size', type=int, default=64)
    parser.add_argument('--input_dropout', type=float, default=0.2)
    parser.add_argument('--hidden_dropout', type=float, default=0.4)
    parser.add_argument('--weight_decay', type=float, default=0.005)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--epochs', type=int, default=500)
    parser.add_argument('--early_stopping', type=int, default=50)
    parser.add_argument('--batch_size', type=int, default=512)
    parser.add_argument('--eval_batch_size', type=int, default=2048)
    parser.add_argument('--layer_norm', type=int, default=0)
    parser.add_argument('--src_scale', type=int, default=0)
    parser.add_argument('--num_workers',
                        type=int,
                        default=4,
                        help='number of workers')
    parser.add_argument('--pe_type', type=int, default=0)
    parser.add_argument('--mask', type=int, default=0)
    parser.add_argument('--mlp', type=int, default=0)
    parser.add_argument("--optimizer",
                        type=str,
                        default='adamw',
                        choices=['adam', 'adamw'],
                        help="optimizer")
    parser.add_argument("--scheduler",
                        type=str,
                        default='noam',
                        choices=['noam', 'linear'],
                        help="scheduler")
    parser.add_argument("--method",
                        type=str,
                        default='acl',
                        choices=['acl', 'l1reg'],
                        help="method for local clustering")
    parser.add_argument('--warmup', type=int, default=10000)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--load_path', type=str, default='')
    parser.add_argument('--exp_name', type=str, default='')
    args = parser.parse_args()
    print(args)

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    para_dic = {
        'nl': args.num_layers,
        'nh': args.num_heads,
        'es': args.ego_size,
        'hs': args.hidden_size,
        'id': args.input_dropout,
        'hd': args.hidden_dropout,
        'bs': args.batch_size,
        'pe': args.pe_type,
        'op': args.optimizer,
        'lr': args.lr,
        'wd': args.weight_decay,
        'ln': args.layer_norm,
        'sc': args.src_scale,
        'sd': args.seed,
        'md': args.method
    }
    para_dic['warm'] = args.warmup
    para_dic['mask'] = args.mask
    exp_name = get_exp_name(args.dataset, para_dic, args.exp_name)

    wandb_name = exp_name.replace('_sd' + str(args.seed), '')
    wandb.init(name=wandb_name, project=args.project)
    wandb.config.update(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    if args.dataset == 'papers100M':
        dataset = MyNodePropPredDataset(name=args.dataset)
    elif args.dataset in ['flickr', 'reddit', 'yelp', 'amazon']:
        dataset = SAINTDataset(name=args.dataset)
    else:
        dataset = PygNodePropPredDataset(name=f'ogbn-{args.dataset}')

    split_idx = dataset.get_idx_split()
    train_idx = set(split_idx['train'].cpu().numpy())
    valid_idx = set(split_idx['valid'].cpu().numpy())
    test_idx = set(split_idx['test'].cpu().numpy())

    if args.method != "acl":
        ego_graphs_unpadded = np.load(
            f'data/{args.dataset}-lc-{args.method}-ego-graphs-{args.ego_size}.npy',
            allow_pickle=True)
        conds_unpadded = np.load(
            f'data/{args.dataset}-lc-{args.method}-conds-{args.ego_size}.npy',
            allow_pickle=True)
    else:
        tmp_ego_size = 256 if args.dataset == 'products' else args.ego_size
        if args.ego_size < 64:
            tmp_ego_size = 64
        ego_graphs_unpadded = np.load(
            f'data/{args.dataset}-lc-ego-graphs-{tmp_ego_size}.npy',
            allow_pickle=True)
        conds_unpadded = np.load(
            f'data/{args.dataset}-lc-conds-{tmp_ego_size}.npy',
            allow_pickle=True)

    ego_graphs_train, ego_graphs_valid, ego_graphs_test = [], [], []
    cut_train, cut_valid, cut_test = [], [], []

    for i, x in enumerate(ego_graphs_unpadded):
        idx = x[0]
        assert len(x) == len(conds_unpadded[i])
        if len(x) > args.ego_size:
            x = x[:args.ego_size]
            conds_unpadded[i] = conds_unpadded[i][:args.ego_size]
        ego_graph = -np.ones(args.ego_size, dtype=np.int32)
        ego_graph[:len(x)] = x
        cut_position = np.argmin(conds_unpadded[i])
        cut = np.zeros(args.ego_size, dtype=np.float32)
        cut[:cut_position + 1] = 1.0
        if idx in train_idx:
            ego_graphs_train.append(ego_graph)
            cut_train.append(cut)
        elif idx in valid_idx:
            ego_graphs_valid.append(ego_graph)
            cut_valid.append(cut)
        elif idx in test_idx:
            ego_graphs_test.append(ego_graph)
            cut_test.append(cut)
        else:
            print(f"{idx} not in train/valid/test idx")

    ego_graphs_train, ego_graphs_valid, ego_graphs_test = torch.LongTensor(
        ego_graphs_train), torch.LongTensor(
            ego_graphs_valid), torch.LongTensor(ego_graphs_test)
    cut_train, cut_valid, cut_test = torch.FloatTensor(
        cut_train), torch.FloatTensor(cut_valid), torch.FloatTensor(cut_test)

    pe = None
    if args.pe_type == 1:
        pe = torch.load(f'data/{args.dataset}-embedding-{args.hidden_size}.pt')
    elif args.pe_type == 2:
        pe = np.fromfile("data/paper100m.pro",
                         dtype=np.float32).reshape(-1, 128)
        pe = torch.FloatTensor(pe)
        if args.hidden_size < 128:
            pe = pe[:, :args.hidden_size]

    data = dataset[0]
    if len(data.y.shape) == 1:
        data.y = data.y.unsqueeze(1)
    adj = None
    if args.mask:
        adj = torch.BoolTensor(~np.load(
            f'data/{args.dataset}-ego-graphs-adj-{args.ego_size}.npy'))

    num_classes = dataset.num_classes

    train_dataset = NodeClassificationDataset(data.x, data.y, ego_graphs_train,
                                              pe, args, num_classes, adj,
                                              cut_train)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers,
                              collate_fn=batcher(train_dataset),
                              pin_memory=True)

    valid_dataset = NodeClassificationDataset(data.x, data.y, ego_graphs_valid,
                                              pe, args, num_classes, adj,
                                              cut_valid)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.eval_batch_size,
                              shuffle=False,
                              num_workers=args.num_workers,
                              collate_fn=batcher(valid_dataset),
                              pin_memory=True)

    test_dataset = NodeClassificationDataset(data.x, data.y, ego_graphs_test,
                                             pe, args, num_classes, adj,
                                             cut_test)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.eval_batch_size,
                             shuffle=False,
                             num_workers=args.num_workers,
                             collate_fn=batcher(test_dataset),
                             pin_memory=True)

    model = TransformerModel(data.x.size(1) + 1,
                             args.hidden_size,
                             args.num_heads,
                             args.hidden_size,
                             args.num_layers,
                             num_classes,
                             args.input_dropout,
                             args.hidden_dropout,
                             layer_norm=args.layer_norm,
                             src_scale=args.src_scale,
                             mlp=args.mlp).to(device)
    wandb.watch(model, log='all')

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        model = nn.DataParallel(model)

    pytorch_total_params = sum(p.numel() for p in model.parameters()
                               if p.requires_grad)
    print('model parameters:', pytorch_total_params)

    if not os.path.exists('saved'):
        os.mkdir('saved')

    if torch.cuda.device_count() > 1:
        model.module.init_weights()
    else:
        model.init_weights()

    if args.load_path:
        model.load_state_dict(torch.load(args.load_path,
                                         map_location='cuda:0'))

        valid_acc, valid_loss = test(model, valid_loader, device, args)
        valid_output = f'Valid: {100 * valid_acc:.2f}% '

        cor_train_acc, _ = test(model, train_loader, device, args)

        cor_test_acc, cor_test_loss = test(model, test_loader, device, args)
        train_output = f'Train: {100 * cor_train_acc:.2f}%, '
        test_output = f'Test: {100 * cor_test_acc:.2f}%'

        print(train_output + valid_output + test_output)
        return

    best_val_acc = 0
    cor_train_acc = 0
    cor_test_acc = 0
    patience = 0

    if args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
    elif args.optimizer == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(),
                                      lr=args.lr,
                                      weight_decay=args.weight_decay)
    else:
        raise NotImplementedError
    if args.warmup > 0:
        if args.scheduler == 'noam':
            optimizer = NoamOptim(
                optimizer,
                args.hidden_size if args.hidden_size > 0 else data.x.size(1),
                n_warmup_steps=args.warmup)  #, init_lr=args.lr)
        elif args.scheduler == 'linear':
            optimizer = LinearOptim(optimizer,
                                    n_warmup_steps=args.warmup,
                                    n_training_steps=args.epochs *
                                    len(train_loader),
                                    init_lr=args.lr)

    for epoch in range(1, 1 + args.epochs):
        # lp = LineProfiler()
        # lp_wrapper = lp(train)
        # loss = lp_wrapper(model, train_loader, device, optimizer, args)
        # lp.print_stats()
        loss = train(model, train_loader, device, optimizer, args)

        train_output = valid_output = test_output = ''
        if epoch >= 10 and epoch % args.log_steps == 0:
            valid_acc, valid_loss = test(model, valid_loader, device, args)
            valid_output = f'Valid: {100 * valid_acc:.2f}% '

            if valid_acc > best_val_acc:
                best_val_acc = valid_acc
                # cor_train_acc, _ = test(model, train_loader, device, args)
                cor_test_acc, cor_test_loss = test(model, test_loader, device,
                                                   args)
                # train_output = f'Train: {100 * cor_train_acc:.2f}%, '
                test_output = f'Test: {100 * cor_test_acc:.2f}%'
                patience = 0
                try:
                    if torch.cuda.device_count() > 1:
                        torch.save(model.module.state_dict(),
                                   'saved/' + exp_name + '.pt')
                    else:
                        torch.save(model.state_dict(),
                                   'saved/' + exp_name + '.pt')
                    wandb.save('saved/' + exp_name + '.pt')
                except FileNotFoundError as e:
                    print(e)
            else:
                patience += 1
                if patience >= args.early_stopping:
                    print('Early stopping...')
                    break
            wandb.log({
                'Train Loss': loss,
                'Valid Acc': valid_acc,
                'best_val_acc': best_val_acc,
                'cor_test_acc': cor_test_acc,
                'LR': get_lr(optimizer),
                'Valid Loss': valid_loss,
                'cor_test_loss': cor_test_loss
            })
        else:
            wandb.log({'Train Loss': loss, 'LR': get_lr(optimizer)})
        # train_output +
        print(f'Epoch: {epoch:02d}, '
              f'Loss: {loss:.4f}, ' + valid_output + test_output)
Example #21
0
        self.mode="test"

        ## transformer的参数
        self.dropout=0.5
        self.max_len=5000
        self.nhead=2


# data_path="E:/study_series/2020_3/re_write_classify/data/"
# data_path="/mnt/data3/wuchunsheng/code/nlper/NLP_task/text_classification/my_classification_cnews/2020_3_30/text_classify/data/"

config = Config()
train_iter, valid_iter, test_iter, TEXT = generate_data(config)

#model = RNNModel(config, TEXT).to(config.device)
model=TransformerModel(config, TEXT).to(config.device)

model =load_model(config, model)

#sen="目"*50
sen="体育快讯"
#sen="".join(['c', 'o', 'n', 't', 'e', 'x', 't', ',', 'l', 'a', 'b', 'e', 'l'])
#res=test_sentence(config, model ,TEXT, sen)
#print(sen)
#print(res)
#res=test(config,model,TEXT,  test_iter)
#print(res)
print("=========================")
sen="篮球"
#sen="体育"
sen_ori=sen
Example #22
0
    logger = logging.getLogger('test')
    logger.info(f"Test Log")
    logger.info(opt)

device = torch.device(f"cuda:{opt.device}" if opt.device.isdigit() else 'cpu')
if __name__ == '__main__':
    src_vocab_list = VocabField.load_vocab(opt.src_vocab_file)
    src_vocab = VocabField(src_vocab_list, vocab_size=opt.src_vocab_size)

    if opt.model_name == 'textrcnn':
        model = TextRCNN(opt.src_vocab_size, opt.embedding_size,
                         opt.hidden_size)
    elif opt.model_name == 'rnn':
        model = RNN(opt.src_vocab_size, opt.embedding_size, opt.hidden_size)
    elif opt.model_name == 'transformer':
        model = TransformerModel(opt.src_vocab_size, opt.embedding_size,
                                 opt.hidden_size)

    last_checkpoint = None
    if opt.resume and not opt.load_checkpoint:
        last_checkpoint = get_last_checkpoint(opt.best_model_dir)
    if last_checkpoint:
        opt.load_checkpoint = os.path.join(opt.model_dir, last_checkpoint)
        opt.skip_steps = int(last_checkpoint.strip('.pt').split('/')[-1])

    if opt.load_checkpoint:
        model.load_state_dict(torch.load(opt.load_checkpoint))
        opt.skip_steps = int(opt.load_checkpoint.strip('.pt').split('/')[-1])
        logger.info(f"\nLoad from {opt.load_checkpoint}\n")
    else:
        for param in model.parameters():
            param.data.uniform_(-opt.init_weight, opt.init_weight)