Beispiel #1
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
# vim:fileencoding=utf-8

from pyfinite import ffield
from poly import Poly
from code import CodeCreator
from encode import Encoder
from decode import Decoder
import time
import sys
###########################

if (sys.argv[1] == "mode=1"):
    n = int(sys.argv[2])
    p = float(sys.argv[3])
    Code = CodeCreator(n, p)
    Code.write_code_to_file(sys.argv[4])
elif (sys.argv[1] == "mode=2"):
    enc = Encoder(sys.argv[2])
    encode_str = enc.encode_file(sys.argv[3])
    enc.write_to_file_with_noise(encode_str, sys.argv[4])
    enc.write_to_file(encode_str, sys.argv[5])
elif (sys.argv[1] == "mode=3"):
    dec = Decoder(sys.argv[2])
    decode_str = dec.decode_file(sys.argv[3], sys.argv[4])
Beispiel #2
0
# -*- coding: utf-8 -*-
import binascii, re
import fcntl

from decode import Decoder
de_key = '345095a6a09c0643bcf41007fd1311cdf4889004e886b2bca8d4881fb27a7fca'
decode = Decoder(binascii.a2b_hex(de_key)).decode
from dummy import *
from miniCurl import Curl
curl = Curl()
#Embedded file name: rockoaup.py
import re


def assign(service, arg):
    if service == '''rockoa''':
        return (True, arg)


def audit(arg):
    o0OO00 = arg + decode(
        'F?\xf6\xcd\x8e\xecn3\x83\x95-r\x8d\x7f~\xac\x90\xee\xf9h\x8d\xf5\xc6\x9a\xcc\xe9\xfc~\xc1\x11\r\xbfZv\xf8\x9b\xd5\xeca1\xdd\x90u!\x9cyp\xb5\x96\xe7\xffh\xd5\xf2\xc0\xc9\xcd'
    )
    oo = arg + decode('G3\xf4\xc8\xc9\xf2`,\x92\x84xw')
    i1iII1IiiIiI1, iIiiiI1IiI1I1, o0OoOoOO00, I11i, O0O = curl.curl2(
        o0OO00,
        post=decode(
            'W?\xfb\xd2\xc5\xf2r~\xcf\x97qi\x94}w\xa2\xda\xf8\xf8t\xce\xf5\xc6\xd9\xd8\xe9\xbb9\xc7\n\n\xb8Xm\xfd\xd2\xd4\xec#p\xdd\xd1"a\xd8!w\xbd\x8d\xfb\xf1j\x8c\xe4\xdd\xc4\x86\xa7\xe1q\xd3\x1b\x0f\xba\x1a3\xfa\xcb\x85\xae`(\xca\xd1#a\x9cpe\xe8\xc7\xec\xf7a\x9c\xa3\x80\x8a\xc3\xf1\xbb{\xc0\x1b\x11\xadQ`\xa5\x97\x85\xae0!\x99\xc7t'
        ))
    i1iII1IiiIiI1, iIiiiI1IiI1I1, o0OoOoOO00, I11i, O0O = curl.curl2(oo)
    def run(self):
        # update per-sentence grammars, if there's any
        for g in self.grammars:
            g.update(self.id)

        self.flog = open('%s/%s_%s' % (FLAGS.run_dir,
                                  'log',
                                  self.suffix),
                    'w')
        if FLAGS.show_time:
            self.flog.write('running on %s\n\n' % socket.gethostname())
            self.flog.flush()

        fwords = self.line.strip().split()


        # added by freesunshine, build the local grammar for oov words for each sentence
        rules = []
        if self.oov_idx is not None and len(self.oov_idx) > 0:
            #oov_weight = 8.0
            oov_weight = 0.0001
            for idx in self.oov_idx:
                fw = fwords[idx]
                ew = "."
                rule_str = "[A0-0] ||| %s ||| %s ||| %lf %lf %lf" %(fw, ew, oov_weight, oov_weight, oov_weight)
                rr = Rule()
                rr.fromstr(rule_str)
                rules.append(rr)

        if self.ner_items is not None and len(self.ner_items) > 0:
            for item in self.ner_items:
                concept_weight = 10.0
                st = item[0][0]
                ed = item[0][1]
                fw = ' '.join(fwords[st:ed])
                #concept_weight *= pow((ed-st), 2)
                ew = item[1]
                value = int(ew[2])

                #Here is the feature for difference of nonterminal type
                #concept_weight /= pow(1.4, value)

                #Here is the feature for the favor of longer spans
                #concept_weight *= pow(2, ed-st)

                #Here is the feature for the number of edges
                #concept_weight /= pow(2.0, get_num_edges(ew))
                #print >>sys.stder, ew, concept_weight
                #rule_str = "[A1-1] ||| %s ||| %s ||| " % (fw, ew)
                rule_str = "%s ||| " % ew
                #weight = 5
                if fw == ';':
                    rule_str += "%lf %lf %lf" % (concept_weight, concept_weight, concept_weight)
                else:
                    rule_str += "%lf %lf %lf" % (concept_weight, concept_weight, concept_weight)
                rr = Rule()
                #print rule_str
                rr.fromstr(rule_str)
                rules.append(rr)

        #print '===== local_gr ====='
        #for r in rules:
        #    print r

        local_gr = None
        if len(rules) > 0:
          local_gr = Grammar(FLAGS.rule_bin_size)
          local_gr.build(rules, self.grammars[0].features)

        if FLAGS.preprocess:
            self.fidx2replacement = {}
            j = 0
            for i, token in enumerate(fwords):
                if token in ('$number', '$date'):
                    self.fidx2replacement[i] = self.special[j][1]
                    j += 1

        self.flog.write('[%s][%s words] %s\n' %
                   (self.id, len(fwords), self.line))

        decoder = Decoder(fwords,
                          self.grammars,
                          self.features,
                          local_gr)

        begin_time = time()
        if FLAGS.decoding_method == 'agenda':
            item = decoder.decode()
        elif FLAGS.decoding_method == 'cyk':
            item = decoder.decode_cyk()
        elif FLAGS.decoding_method == 'earley':
            item = decoder.decode_earley()
        else:
            assert False, '"%s" not valid decoding option' \
                    % FLAGS.decoding_method
        self.time = time() - begin_time

        if item is None:
            self.out = '[decoder failed to build a goal item]'
        else:
            ttt, succ = item
            item = ttt
            hg = Hypergraph(item)
            hg.set_semiring(hypergraph.SHORTEST_PATH)
            hg.set_functions(lambda x: x.cost, None, None)
            hg.topo_sort()
            self.kbest = hg.root.best_paths()
            #output_tokens = self.kbest[0].translation[:]

            #if FLAGS.preprocess:
            #    for i in range(len(output_tokens)):
            #        if output_tokens[i] in ('$number', '$date'):
            #            fidx = self.kbest[0].composed_rule.we2f[i]
            #            if fidx is not None:
            #                output_tokens[i] = self.fidx2replacement[fidx]

            # @freesunshine target side string output
            #self.out = ' '.join(output_tokens[FLAGS.lm_order-1:
            #                                  1-FLAGS.lm_order])

            self.flog.write('Decuction Tree:\n%s\n' % self.kbest[0].tree_str())
            #self.out = str(self.kbest[0].translation)
            #if succ:
            self.out = self.kbest[0].translation.to_amr_format()[0]
            #else:
            #    self.out = self.kbest[0].translation.toAMR()
            lines = [x.strip() for x in self.out.split('\n')]
            self.out = "".join(lines)

            self.hg = hg
            if FLAGS.output_hypergraph:
                self.write_hypergraph()

        self.flog.write('%s\n' % self.out)
        self.flog.write('\n')
        #if item is not None:
        #    self.flog.write(self.kbest[0].tree_str())
        #    self.flog.write('\n')
        #    self.flog.write(hg.stats())
        #    self.flog.write('\n')
        self.flog.write(decoder.agenda_stats())
        self.flog.write('\n')
        self.flog.write(decoder.chart.stats())
        self.flog.write('\n')
        for dotchart in decoder.dotcharts:
            self.flog.write(dotchart.stats())
            self.flog.write('\n')

        if FLAGS.show_time:
            timeline = '{:<35}{:>15.2f}\n'.format('[time]:', self.time)
            self.flog.write(timeline)
        self.write_output_file()
        if FLAGS.output_kbest:
            self.write_kbest_to_file()
        self.flog.close()
Beispiel #4
0
 def __init__(self, username, password):
     self.username = username
     self.password = password
     self.decoder = Decoder()
Beispiel #5
0
def decode(aegs, n_spk):
    # load config
    config = get_config(args.conf_path)

    # logger
    logger = Logger(args.log_name, 'decoder', 'dataset')

    # training device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logger.decoder.info('device: %s' % device)

    # trainind settings and model
    net = Net(config.model, n_spk, 1, device)
    net.to(device)

    # resume
    dic = torch.load(args.checkpoint)
    net.load_state_dict(dic['model'])
    criteria_before = dic['criteria']
    iter_count = dic['iter_count']
    logger.decoder.info(net)
    logger.decoder.info('Criteria before: %f' % criteria_before)

    # dataset
    datasets = {
        'test':
        Dataset(args.test_dir,
                args.stats_dir,
                logger.dataset,
                pad_len=2800,
                batch_len=-1,
                device=device)
    }

    data_loaders = {
        'test': DataLoader(datasets['test'], batch_size=1, shuffle=True)
    }

    # logging about training data
    logger.dataset.info('number of test samples: %d' % len(datasets['test']))

    # decoder for validation
    decoder = Decoder(args, datasets['test'].scaler, logger=logger.decoder)

    # decode
    logger.decoder.info('start decoding!')
    for i, batch in enumerate(data_loaders['test']):
        # inputs
        inputs = {
            'feat':
            torch.cat(
                (batch['uv'], batch['lcf0'], batch['codeap'], batch['mcep']),
                dim=-1).to(device),
            'cv_stats':
            torch.cat((batch['uv'], batch['lcf0'], batch['codeap']),
                      dim=-1).to(device),
            'src_code':
            batch['src_code'].to(device),
            'trg_code':
            batch['trg_code'].to(device),
            'src_spk':
            batch['src_id'].to(device),
            'trg_spk':
            batch['trg_id'].to(device),
            'src':
            batch['src_spk'],
            'trg':
            batch['trg_spk'],
            'flen':
            batch['flen'],
            'f0':
            batch['f0'],
            'codeap':
            batch['codeap'],
            'mcep':
            batch['mcep'],
            'cv_f0':
            batch['cv_f0']
        }

        # forward propagation with target-pos output
        outputs = net(inputs)

        # decode
        decoder.decode(inputs, outputs, iter_count, i)
def main(argv):
    tf.set_random_seed(111)  # a seed value for randomness

    # Create a batcher object that will create minibatches of data
    # TODO change to pass number

    # --------------- building graph ---------------
    hparam_gen = [
        'mode',
        'model_dir',
        'adagrad_init_acc',
        'steps_per_checkpoint',
        'batch_size',
        'beam_size',
        'cov_loss_wt',
        'coverage',
        'emb_dim',
        'rand_unif_init_mag',
        'gen_vocab_file',
        'gen_vocab_size',
        'hidden_dim',
        'gen_lr',
        'gen_max_gradient',
        'max_dec_steps',
        'max_enc_steps',
        'min_dec_steps',
        'trunc_norm_init_std',
        'single_pass',
        'log_root',
        'data_path',
    ]

    hps_dict = {}
    for key, val in FLAGS.__flags.iteritems():  # for each flag
        if key in hparam_gen:  # if it's in the list
            hps_dict[key] = val  # add it to the dict

    hps_gen = namedtuple("HParams4Gen", hps_dict.keys())(**hps_dict)

    print("Building vocabulary for generator ...")
    gen_vocab = Vocab(join_path(hps_gen.data_path, hps_gen.gen_vocab_file),
                      hps_gen.gen_vocab_size)

    hparam_dis = [
        'mode',
        'vocab_type',
        'model_dir',
        'dis_vocab_size',
        'steps_per_checkpoint',
        'learning_rate_decay_factor',
        'dis_vocab_file',
        'num_class',
        'layer_size',
        'conv_layers',
        'max_steps',
        'kernel_size',
        'early_stop',
        'pool_size',
        'pool_layers',
        'dis_max_gradient',
        'batch_size',
        'dis_lr',
        'lr_decay_factor',
        'cell_type',
        'max_enc_steps',
        'max_dec_steps',
        'single_pass',
        'data_path',
        'num_models',
    ]
    hps_dict = {}
    for key, val in FLAGS.__flags.iteritems():  # for each flag
        if key in hparam_dis:  # if it's in the list
            hps_dict[key] = val  # add it to the dict

    hps_dis = namedtuple("HParams4Dis", hps_dict.keys())(**hps_dict)
    if hps_gen.gen_vocab_file == hps_dis.dis_vocab_file:
        hps_dis = hps_dis._replace(vocab_type="word")
        hps_dis = hps_dis._replace(layer_size=hps_gen.emb_dim)
        hps_dis = hps_dis._replace(dis_vocab_size=hps_gen.gen_vocab_size)
    else:
        hps_dis = hps_dis._replace(max_enc_steps=hps_dis.max_enc_steps * 2)
        hps_dis = hps_dis._replace(max_dec_steps=hps_dis.max_dec_steps * 2)
    if FLAGS.mode == "train_gan":
        hps_gen = hps_gen._replace(batch_size=hps_gen.batch_size *
                                   hps_dis.num_models)

    if FLAGS.mode != "pretrain_dis":
        with tf.variable_scope("generator"):
            generator = PointerGenerator(hps_gen, gen_vocab)
            print("Building generator graph ...")
            gen_decoder_scope = generator.build_graph()

    if FLAGS.mode != "pretrain_gen":
        print("Building vocabulary for discriminator ...")
        dis_vocab = Vocab(join_path(hps_dis.data_path, hps_dis.dis_vocab_file),
                          hps_dis.dis_vocab_size)
    if FLAGS.mode in ['train_gan', 'pretrain_dis']:
        with tf.variable_scope("discriminator"), tf.device("/gpu:0"):
            discriminator = Seq2ClassModel(hps_dis)
            print("Building discriminator graph ...")
            discriminator.build_graph()

    hparam_gan = [
        'mode',
        'model_dir',
        'gan_iter',
        'gan_gen_iter',
        'gan_dis_iter',
        'gan_lr',
        'rollout_num',
        'sample_num',
    ]
    hps_dict = {}
    for key, val in FLAGS.__flags.iteritems():  # for each flag
        if key in hparam_gan:  # if it's in the list
            hps_dict[key] = val  # add it to the dict

    hps_gan = namedtuple("HParams4GAN", hps_dict.keys())(**hps_dict)
    hps_gan = hps_gan._replace(mode="train_gan")
    if FLAGS.mode == 'train_gan':
        with tf.device("/gpu:0"):
            print("Creating rollout...")
            rollout = Rollout(generator, 0.8, gen_decoder_scope)

    # --------------- initializing variables ---------------
    all_variables = tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES) + \
        tf.get_collection_ref(tf.GraphKeys.WEIGHTS) + \
        tf.get_collection_ref(tf.GraphKeys.BIASES)
    sess = tf.Session(config=utils.get_config())
    sess.run(tf.variables_initializer(all_variables))
    if FLAGS.mode == "pretrain_gen":
        val_dir = ensure_exists(
            join_path(FLAGS.model_dir, 'generator', FLAGS.val_dir))
        model_dir = ensure_exists(join_path(FLAGS.model_dir, 'generator'))
        print("Restoring the generator model from the latest checkpoint...")
        gen_saver = tf.train.Saver(
            max_to_keep=3,
            var_list=[
                v for v in all_variables
                if "generator" in v.name and "GAN" not in v.name
            ])
        gen_dir = ensure_exists(join_path(FLAGS.model_dir, "generator"))
        # gen_dir = ensure_exists(FLAGS.model_dir)
        # temp_saver = tf.train.Saver(
        #     var_list=[v for v in all_variables if "generator" in v.name and "Adagrad" not in v.name])
        ckpt_path = utils.load_ckpt(gen_saver, sess, gen_dir)
        print('going to restore embeddings from checkpoint')
        if not ckpt_path:
            emb_path = join_path(FLAGS.model_dir, "generator", "init_embed")
            if emb_path:
                generator.saver.restore(
                    sess,
                    tf.train.get_checkpoint_state(
                        emb_path).model_checkpoint_path)
                print(
                    colored(
                        "successfully restored embeddings form %s" % emb_path,
                        'green'))
            else:
                print(
                    colored("failed to restore embeddings form %s" % emb_path,
                            'red'))

    elif FLAGS.mode in ["decode", "train_gan"]:
        print("Restoring the generator model from the best checkpoint...")
        dec_saver = tf.train.Saver(
            max_to_keep=3,
            var_list=[v for v in all_variables if "generator" in v.name])
        gan_dir = ensure_exists(
            join_path(FLAGS.model_dir, 'generator', FLAGS.gan_dir))
        gan_val_dir = ensure_exists(
            join_path(FLAGS.model_dir, 'generator', FLAGS.gan_dir,
                      FLAGS.val_dir))
        gan_saver = tf.train.Saver(
            max_to_keep=3,
            var_list=[v for v in all_variables if "generator" in v.name])
        gan_val_saver = tf.train.Saver(
            max_to_keep=3,
            var_list=[v for v in all_variables if "generator" in v.name])
        utils.load_ckpt(dec_saver, sess, val_dir,
                        (FLAGS.mode in ["train_gan", "decode"]))

    if FLAGS.mode in ["pretrain_dis", "train_gan"]:
        dis_saver = tf.train.Saver(
            max_to_keep=3,
            var_list=[v for v in all_variables if "discriminator" in v.name])
        dis_dir = ensure_exists(join_path(FLAGS.model_dir, 'discriminator'))
        ckpt = utils.load_ckpt(dis_saver, sess, dis_dir)
        if not ckpt:
            if hps_dis.vocab_type == "word":
                discriminator.init_emb(
                    sess, join_path(FLAGS.model_dir, "generator",
                                    "init_embed"))
            else:
                discriminator.init_emb(
                    sess,
                    join_path(FLAGS.model_dir, "discriminator", "init_embed"))

    # --------------- train models ---------------
    if FLAGS.mode != "pretrain_dis":
        gen_batcher_train = GenBatcher("train",
                                       gen_vocab,
                                       hps_gen,
                                       single_pass=hps_gen.single_pass)
        decoder = Decoder(sess, generator, gen_vocab)
        gen_batcher_val = GenBatcher("val",
                                     gen_vocab,
                                     hps_gen,
                                     single_pass=True)
        val_saver = tf.train.Saver(
            max_to_keep=10,
            var_list=[
                v for v in all_variables
                if "generator" in v.name and "GAN" not in v.name
            ])

    if FLAGS.mode != "pretrain_gen":
        dis_val_batch_size = hps_dis.batch_size * hps_dis.num_models \
            if hps_dis.mode == "train_gan" else hps_dis.batch_size * hps_dis.num_models * 2
        dis_batcher_val = DisBatcher(
            hps_dis.data_path,
            "eval",
            gen_vocab,
            dis_vocab,
            dis_val_batch_size,
            single_pass=True,
            max_art_steps=hps_dis.max_enc_steps,
            max_abs_steps=hps_dis.max_dec_steps,
        )

    if FLAGS.mode == "pretrain_gen":
        # get reload the
        print('Going to pretrain the generator')
        try:
            pretrain_generator(generator, gen_batcher_train, sess,
                               gen_batcher_val, gen_saver, model_dir,
                               val_saver, val_dir)
        except KeyboardInterrupt:
            tf.logging.info("Caught keyboard interrupt on worker....")

    elif FLAGS.mode == "pretrain_dis":
        print('Going to pretrain the discriminator')
        dis_batcher = DisBatcher(
            hps_dis.data_path,
            "decode",
            gen_vocab,
            dis_vocab,
            hps_dis.batch_size * hps_dis.num_models,
            single_pass=hps_dis.single_pass,
            max_art_steps=hps_dis.max_enc_steps,
            max_abs_steps=hps_dis.max_dec_steps,
        )
        try:
            pretrain_discriminator(sess, discriminator, dis_batcher_val,
                                   dis_vocab, dis_batcher, dis_saver)
        except KeyboardInterrupt:
            tf.logging.info("Caught keyboard interrupt on worker....")

    elif FLAGS.mode == "train_gan":
        gen_best_loss = get_best_loss_from_chpt(val_dir)
        gen_global_step = 0
        print('Going to tune the two using Gan')
        for i_gan in range(hps_gan.gan_iter):
            # Train the generator for one step
            g_losses = []
            current_speed = []
            for it in range(hps_gan.gan_gen_iter):
                start_time = time.time()
                batch = gen_batcher_train.next_batch()

                # generate samples
                enc_states, dec_in_state, n_samples, n_targets_padding_mask = decoder.mc_generate(
                    batch, include_start_token=True, s_num=hps_gan.sample_num)
                # get rewards for the samples
                n_rewards = rollout.get_reward(sess, gen_vocab, dis_vocab,
                                               batch, enc_states, dec_in_state,
                                               n_samples, hps_gan.rollout_num,
                                               discriminator)

                # fine tune the generator
                n_sample_targets = [samples[:, 1:] for samples in n_samples]
                n_targets_padding_mask = [
                    padding_mask[:, 1:]
                    for padding_mask in n_targets_padding_mask
                ]
                n_samples = [samples[:, :-1] for samples in n_samples]
                # sample_target_padding_mask = pad_sample(sample_target, gen_vocab, hps_gen)
                n_samples = [
                    np.where(
                        np.less(samples, hps_gen.gen_vocab_size), samples,
                        np.array([[gen_vocab.word2id(data.UNKNOWN_TOKEN)] *
                                  hps_gen.max_dec_steps] * hps_gen.batch_size))
                    for samples in n_samples
                ]
                results = generator.run_gan_batch(sess, batch, n_samples,
                                                  n_sample_targets,
                                                  n_targets_padding_mask,
                                                  n_rewards)

                gen_global_step = results["global_step"]

                # for visualization
                g_loss = results["loss"]
                if not math.isnan(g_loss):
                    g_losses.append(g_loss)
                else:
                    print(colored('a nan in gan loss', 'red'))
                current_speed.append(time.time() - start_time)

            # Test
            # if FLAGS.gan_gen_iter and (i_gan % 100 == 0 or i_gan == hps_gan.gan_iter - 1):
            if i_gan % 100 == 0 or i_gan == hps_gan.gan_iter - 1:
                print('Going to test the generator.')
                current_speed = sum(current_speed) / (len(current_speed) *
                                                      hps_gen.batch_size)
                everage_g_loss = sum(g_losses) / len(g_losses)
                # one more process hould be opened for the evaluation
                eval_loss, gen_best_loss = save_ckpt(
                    sess, generator, gen_best_loss, gan_dir, gan_saver,
                    gen_batcher_val, gan_val_dir, gan_val_saver,
                    gen_global_step)

                if eval_loss:
                    print("\nDashboard for " +
                          colored("GAN Generator", 'green') + " updated %s, "
                          "finished steps:\t%s\n"
                          "\tBatch size:\t%s\n"
                          "\tVocabulary size:\t%s\n"
                          "\tCurrent speed:\t%.4f seconds/article\n"
                          "\tAverage training loss:\t%.4f; "
                          "eval loss:\t%.4f" % (
                              datetime.datetime.now().strftime(
                                  "on %m-%d at %H:%M"),
                              gen_global_step,
                              FLAGS.batch_size,
                              hps_gen.gen_vocab_size,
                              current_speed,
                              everage_g_loss.item(),
                              eval_loss.item(),
                          ))

            # Train the discriminator
            print('Going to train the discriminator.')
            dis_best_loss = 1000
            dis_losses = []
            dis_accuracies = []
            for d_gan in range(hps_gan.gan_dis_iter):
                batch = gen_batcher_train.next_batch()
                enc_states, dec_in_state, k_samples_words, _ = decoder.mc_generate(
                    batch, s_num=hps_gan.sample_num)
                # shuould first tanslate to words to avoid unk
                articles_oovs = batch.art_oovs
                for samples_words in k_samples_words:
                    dec_batch_words = batch.target_batch
                    conditions_words = batch.enc_batch_extend_vocab
                    if hps_dis.vocab_type == "char":
                        samples = gen_vocab2dis_vocab(samples_words, gen_vocab,
                                                      articles_oovs, dis_vocab,
                                                      hps_dis.max_dec_steps,
                                                      STOP_DECODING)
                        dec_batch = gen_vocab2dis_vocab(
                            dec_batch_words, gen_vocab, articles_oovs,
                            dis_vocab, hps_dis.max_dec_steps, STOP_DECODING)
                        conditions = gen_vocab2dis_vocab(
                            conditions_words, gen_vocab, articles_oovs,
                            dis_vocab, hps_dis.max_enc_steps, PAD_TOKEN)
                    else:
                        samples = samples_words
                        dec_batch = dec_batch_words
                        conditions = conditions_words
                        # the unknown in target

                    inputs = np.concatenate([samples, dec_batch], 0)
                    conditions = np.concatenate([conditions, conditions], 0)

                    targets = [[1, 0] for _ in samples] + [[0, 1]
                                                           for _ in dec_batch]
                    targets = np.array(targets)
                    # randomize the samples
                    assert len(inputs) == len(conditions) == len(
                        targets
                    ), "lengthes of the inputs, conditions and targests should be the same."
                    indices = np.random.permutation(len(inputs))
                    inputs = np.split(inputs[indices], 2)
                    conditions = np.split(conditions[indices], 2)
                    targets = np.split(targets[indices], 2)
                    assert len(inputs) % 2 == 0, "the length should be mean"

                    results = discriminator.run_one_batch(
                        sess, inputs[0], conditions[0], targets[0])
                    dis_accuracies.append(results["accuracy"].item())
                    dis_losses.append(results["loss"].item())

                    results = discriminator.run_one_batch(
                        sess, inputs[1], conditions[1], targets[1])
                    dis_accuracies.append(results["accuracy"].item())

                ave_dis_acc = sum(dis_accuracies) / len(dis_accuracies)
                if d_gan == hps_gan.gan_dis_iter - 1:
                    if (sum(dis_losses) / len(dis_losses)) < dis_best_loss:
                        dis_best_loss = sum(dis_losses) / len(dis_losses)
                        checkpoint_path = ensure_exists(
                            join_path(hps_dis.model_dir,
                                      "discriminator")) + "/model.ckpt"
                        dis_saver.save(sess,
                                       checkpoint_path,
                                       global_step=results["global_step"])
                    print_dashboard("GAN Discriminator",
                                    results["global_step"].item(),
                                    hps_dis.batch_size, hps_dis.dis_vocab_size,
                                    results["loss"].item(), 0.00, 0.00, 0.00)
                    print("Average training accuracy: \t%.4f" % ave_dis_acc)

                if ave_dis_acc > 0.9:
                    break

    # --------------- decoding samples ---------------
    elif FLAGS.mode == "decode":
        print('Going to decode from the generator.')
        decoder.bs_decode(gen_batcher_train)
        print("Finished decoding..")
        # decode for generating corpus for discriminator

    sess.close()
Beispiel #7
0
 def __init__(self, feature_opts={}, decoding='mst'):
     self.feature_opts = feature_opts
     self.arc_perceptron = ArcPerceptron(self.feature_opts)
     self.decoder = Decoder(decoding)
     self.arc_accuracy = None
Beispiel #8
0
 def decode_dfs(self):
     decoder = Decoder(self.bottom_betas, self.betas8, self.betas16,
                       self.top_betas)
     return decoder.dfs_decode()
Beispiel #9
0
 def decode_viterbi(self):
     decoder = Decoder(self.bottom_betas, self.betas8, self.betas16,
                       self.top_betas)
     return decoder.viterbi_decode()