Exemple #1
0
def main(_):
    if FLAGS.num_gpus >= 1 and FLAGS.mode == "train":
        dev = "/gpu:0"
    elif FLAGS.num_gpus >= 2 and FLAGS.mode == "eval":
        dev = "/gpu:1"
    else:
        dev = "/cpu:0"

    if FLAGS.mode == "train":
        batch_size = FLAGS.batch_size
    elif FLAGS.mode == "eval":
        batch_size = 100

    num_classes = 10

    hps = HParams(
        batch_size=batch_size,
        num_classes=num_classes,
        min_lrn_rate=0.0001,
        lrn_rate=0.1,
        num_units=5,
        weight_decay_rate=0.0002,
        relu_leakiness=0.1,
        optimizer="mom",
    )

    with tf.device(dev):
        if FLAGS.mode == "train":
            train(hps)
        elif FLAGS.mode == "eval":
            evaluate(hps)
Exemple #2
0
    def __init__(self):
        self.FLAGS = FLAGS

        self.batch_size = self.FLAGS.batch_size

        # training data
        self.vocab, self.ivocab, self.data = self.load_data(
            self.FLAGS.data_dir)
        self.dic_size = len(self.vocab)

        self.PAD_ID = self.vocab['PAD']
        self.GO_ID = self.vocab['GO']
        self.EOS_ID = self.vocab['</S>']
        self.UNK_ID = self.vocab['UNK']

        # development data
        pkl_file = open(self.FLAGS.data_dir + '/text_dev.pkl', 'rb')
        self.dev_data = pickle.load(pkl_file)
        pkl_file.close()

        print(np.shape(self.data))
        #print(np.shape(self.dev_data))

        self.batch_check = np.zeros(len(self.data))
        self.batch_check2 = np.zeros(len(self.data), dtype=np.float32)

        # construct HParams
        self.hps = HParams(vocab_size=len(self.vocab),
                           emb_size=self.FLAGS.emb_size,
                           hidden_size=self.FLAGS.hidden_size,
                           device=self.FLAGS.device,
                           learning_rate=self.FLAGS.learning_rate,
                           max_gradient_norm=self.FLAGS.max_gradient_norm,
                           buckets=[(8, 9)],
                           batch_size=self.FLAGS.batch_size,
                           num_topic=self.FLAGS.num_topic,
                           mode='train')

        print("Params  sets: ")
        print("___________________")
        print("learning_rate:%s  max_gradient_norm:%s   " %
              (str(self.FLAGS.learning_rate), self.FLAGS.max_gradient_norm))
        print("batch_size:%d" % (self.FLAGS.batch_size))
        print("hidden_size:%d   emb_size:%d   " %
              (self.FLAGS.hidden_size, self.FLAGS.emb_size))
        print("steps_per_checkpoint:%d" % (self.FLAGS.steps_per_checkpoint))
        print("steps_per_sample:%d" % (self.FLAGS.steps_per_sample))
        print("sample_num:%d" % (self.FLAGS.sample_num))
        print("device:%s" % (self.FLAGS.device))
        print("Vocabulary size: %d  data size: %d " %
              (len(self.vocab), len(self.data)))
        print("___________________")

        self.buckets = self.buckets = [(8, 9)]
Exemple #3
0
    def __init__(self):
        self.FLAGS = FLAGS
        print(self.FLAGS.data_dir)
        self.vocab, self.ivocab = self.load_dic(
            self.FLAGS.data_dir)
        self.dic_size = len(self.vocab)
        self.hps = HParams(
            vocab_size=len(self.vocab),
            emb_size=self.FLAGS.emb_size,
            hidden_size=self.FLAGS.hidden_size,
            device=self.FLAGS.device,
            learning_rate=self.FLAGS.learning_rate,
            max_gradient_norm=self.FLAGS.max_gradient_norm,
            buckets=[(8, 9)],
            batch_size=self.FLAGS.batch_size,
            num_topic = self.FLAGS.num_topic,
            mode='decode'
        )

        self.tool = PoetryTool()
        self.load_already=False
def main():
    args = parser.parse_args()

    if os.path.isfile(args.base_model+'/hparams.json'):
        with open(args.base_model+'/hparams.json', encoding='utf-8') as f:
            params = json.loads(f.read())
            hparams = HParams(**params)
    elif 'small' in args.base_model:
        hparams = HParams(**{
          "n_vocab": n_vocab,
          "n_ctx": 1024,
          "n_embd": 768,
          "n_head": 12,
          "n_layer": 12
        })
    elif 'medium' in args.base_model:
        hparams = HParams(**{
          "n_vocab": n_vocab,
          "n_ctx": 1024,
          "n_embd": 1024,
          "n_head": 16,
          "n_layer": 24
        })
    elif 'large' in args.base_model:
        hparams = HParams(**{
          "n_vocab": n_vocab,
          "n_ctx": 1024,
          "n_embd": 1280,
          "n_head": 20,
          "n_layer": 36
        })
    else:
        raise ValueError('invalid model name.')

    config = tf.ConfigProto()
    if int(args.gpu) >= 0:
        config.gpu_options.allow_growth = True
        config.gpu_options.visible_device_list = args.gpu
    with tf.Session(config=config,graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [None, None])
        output = model.model(hparams=hparams, X=context, past=None, reuse=tf.AUTO_REUSE)
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=context[:, 1:], logits=output['logits'][:, :-1]))

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(args.base_model)
        saver.restore(sess, ckpt)

        train_vars = tf.trainable_variables()

        global_step = tf.Variable(0, trainable=False)
        if args.warmup_steps > 0:
            learning_rate = tf.compat.v1.train.polynomial_decay(
                    learning_rate=1e-10,
                    end_learning_rate=args.learning_rate,
                    global_step=global_step,
                    decay_steps=args.warmup_steps
                )
        else:
            learning_rate = args.learning_rate

        if args.optim=='adam':
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                           beta1=0.9,
                                           beta2=0.98,
                                           epsilon=1e-7)
        elif args.optim=='adagrad':
            opt = tf.train.AdagradOptimizer(learning_rate=learning_rate)
        elif args.optim=='sgd':
            opt = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        else:
            raise ValueError('invalid optimizer name.')

        train_vars = tf.trainable_variables()
        opt_grads = tf.gradients(loss, train_vars)
        opt_grads = list(zip(opt_grads, train_vars))
        opt_apply = opt.apply_gradients(opt_grads)

        summaries = tf.summary.scalar('loss', loss)
        summary_log = tf.summary.FileWriter(
            os.path.join(CHECKPOINT_DIR, args.run_name))

        saver = tf.train.Saver(
            var_list=train_vars,
            max_to_keep=5,
            keep_checkpoint_every_n_hours=2)
        sess.run(tf.global_variables_initializer())

        ckpt = tf.train.latest_checkpoint(args.base_model)
        saver.restore(sess, ckpt)
        print('Loading checkpoint', ckpt)

        print('Loading dataset...')
        global_chunks = []
        with np.load(args.dataset) as npz:
            for inditem, item in enumerate(npz.files):
                token_chunk = npz[item]
                current_token = []
                for ind in range(0,len(token_chunk)):
                  current_token.append(np.uint16(token_chunk[ind]))
                  if len(current_token) == hparams.n_ctx:
                      global_chunks.append(current_token)
                      current_token = []
        global_chunk_index = np.random.permutation(len(global_chunks))
        global_chunk_step = 0
        print('Training...')

        def sample_feature():
            nonlocal global_chunks,global_chunk_index,global_chunk_step
            p_input_ids = []

            for b in range(args.batch_size): # FULL-SENTENCES
                idx = global_chunk_index[global_chunk_step]
                global_chunk_step += 1
                if global_chunk_step >= len(global_chunk_index):
                    global_chunk_step = 0
                    global_chunk_index = np.random.permutation(len(global_chunks))
                sampled_token = global_chunks[idx]
                # Make Sequence
                ids = copy(global_chunks[idx])
                p_input_ids.append(ids)

            return {context:p_input_ids}

        counter = 1
        counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter')
        hparams_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'hparams.json')
        if os.path.exists(counter_path):
            # Load the step number if we're resuming a run
            # Add 1 so we don't immediately try to save again
            with open(counter_path, 'r', encoding='utf-8') as fp:
                counter = int(fp.read()) + 1

        maketree(os.path.join(CHECKPOINT_DIR, args.run_name))

        def save():
            maketree(os.path.join(CHECKPOINT_DIR, args.run_name))
            print(
                'Saving',
                os.path.join(CHECKPOINT_DIR, args.run_name,
                             'model-{}').format(counter))
            saver.save(
                sess,
                os.path.join(CHECKPOINT_DIR, args.run_name, 'model'),
                global_step=counter)
            with open(counter_path, 'w', encoding='utf-8') as fp:
                fp.write(str(counter) + '\n')
            with open(hparams_path, 'w', encoding='utf-8') as fp:
                fp.write(json.dumps({
                      "n_vocab": int(hparams.n_vocab),
                      "n_ctx": int(hparams.n_ctx),
                      "n_embd": int(hparams.n_embd),
                      "n_head": int(hparams.n_head),
                      "n_layer": int(hparams.n_layer),
                }))

        avg_loss = (0.0, 0.0)
        start_time = time.time()

        try:
            while True:
                if counter % args.save_every == 0:
                    save()

                (_, v_loss, v_summary) = sess.run(
                    (opt_apply, loss, summaries),
                    feed_dict=sample_feature())

                summary_log.add_summary(v_summary, counter)

                avg_loss = (avg_loss[0] * 0.99 + v_loss,
                            avg_loss[1] * 0.99 + 1.0)

                print(
                    '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}'
                    .format(
                        counter=counter,
                        time=time.time() - start_time,
                        loss=v_loss,
                        avg=avg_loss[0] / avg_loss[1]))

                counter = counter+1
                if args.warmup_steps > 0:
                    global_step = global_step+1
        except KeyboardInterrupt:
            print('interrupted')
            save()
        bpe = f.read().split('\n')
    with open('emoji.json', encoding='utf-8') as f:
        emoji = json.loads(f.read())
    enc = BPEEncoder_ja(bpe, emoji)
    n_vocab = len(enc)
    eot_token = enc.encode('<|endoftext|>')[0]
    sep_token = enc.encode('<|byte0|>')[0]
    temperature = args.temperature
    top_k = args.top_k
    top_p = args.top_p
    min_answer_len = args.min_answer_len

    if os.path.isfile(args.model + '/hparams.json'):
        with open(args.model + '/hparams.json') as f:
            params = json.loads(f.read())
            hparams = HParams(**params)
            n_prediction = params['n_prediction']
    elif 'small' in args.model:
        hparams = HParams(
            **{
                "n_vocab": n_vocab,
                "n_ctx": 1024,
                "n_embd": 768,
                "n_head": 12,
                "n_layer": 12
            })
        n_prediction = args.max_answer_len
    elif 'medium' in args.model:
        hparams = HParams(
            **{
                "n_vocab": n_vocab,
parser.add_argument('--gpu', type=str, default='0')
args = parser.parse_args()

with open('ja-bpe.txt', encoding='utf-8') as f:
    bpe = f.read().split('\n')

with open('emoji.json', encoding='utf-8') as f:
    emoji = json.loads(f.read())

enc = BPEEncoder_ja(bpe, emoji)
n_vocab = len(enc)

if os.path.isfile(args.model + '/hparams.json'):
    with open(args.model + '/hparams.json') as f:
        params = json.loads(f.read())
        hparams = HParams(**params)
        max_length = params['n_prediction']
else:
    raise ValueError('invalid model name.')

length = hparams.n_ctx - max_length - 1
temperature = args.temperature
top_k = args.top_k
top_p = args.top_p
SEP_TOKEN = enc.encode('<|byte0|>')[0]


def generate_one(sess, output):
    context_tokens = enc.encode(args.context)
    if len(context_tokens) > length:
        context_tokens = context_tokens[:length]
Exemple #7
0
class GeneratorUI(object):

    def __init__(self):
        self.FLAGS = FLAGS
        print(self.FLAGS.data_dir)
        self.vocab, self.ivocab = self.load_dic(
            self.FLAGS.data_dir)
        self.dic_size = len(self.vocab)
        self.hps = HParams(
            vocab_size=len(self.vocab),
            emb_size=self.FLAGS.emb_size,
            hidden_size=self.FLAGS.hidden_size,
            device=self.FLAGS.device,
            learning_rate=self.FLAGS.learning_rate,
            max_gradient_norm=self.FLAGS.max_gradient_norm,
            buckets=[(8, 9)],
            batch_size=self.FLAGS.batch_size,
            num_topic = self.FLAGS.num_topic,
            mode='decode'
        )

        self.tool = PoetryTool()
        self.load_already=False

    def load_dic(self, file_dir):
        """
        loading  training data, including vocab, inverting vocab and corpus
        """
        vocab_file = open(file_dir + '/vocab.pkl', 'rb')
        dic = pickle.load(vocab_file,encoding='utf8')
        vocab_file.close()

        ivocab_file = open(file_dir + '/ivocab.pkl', 'rb')
        idic = pickle.load(ivocab_file,encoding='utf8')
        ivocab_file.close()

        return dic, idic

    def load_model(self, session, beam_size):
        """load parameters in session."""
        decode_hps = self.hps._replace(batch_size=beam_size)
        model = PoemModel(decode_hps)

        ckpt = tf.train.get_checkpoint_state("model/")

        if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
            print("Reading model parameters from %s" %
                  ckpt.model_checkpoint_path)
            model.saver.restore(session, ckpt.model_checkpoint_path)
        else:
            raise ValueError("%s not found! " % ckpt.model_checkpoint_path)

        return model

    def generate_one(self, all_topic):
        # generate poems using cmd line
        beam_size = input("please input beam size>")
        beam_size = int(beam_size)

        self.sess = tf.InteractiveSession(graph=tf.Graph())
        self.model = self.load_model(self.sess, beam_size)
        self.generator = Generator(
            self.vocab, self.ivocab, self.hps, self.model, self.sess)

        while True:
            sys.stdout.write("> ")
            sys.stdout.flush()
            sentence = sys.stdin.readline()
            ans, info = self.generator.generate_one(sentence, beam_size=beam_size, all_topic=all_topic, manu=False)
            if len(ans) == 0:
                print("generation failed!")
                print(info)
                continue

            for sen in ans:
                print(sen)
    
    def generate_whole_file(self, infile, outfile, all_topic, beam_size):
        # generate poems given the first sentences from a file
        self.sess = tf.InteractiveSession(graph=g1)
        self.model = self.load_model(self.sess, beam_size)
        self.generator = Generator(
            self.vocab, self.ivocab, self.hps, self.model, self.sess)

        fin = open(infile, 'r')
        lines = fin.readlines()
        fin.close()

        for manu in range(10):
            fout = open(outfile+str(manu)+".txt", 'w')
            for line in lines:
                line = line.strip()
                if len(line)<5:
                    continue
                if line == "failed!":
                    continue
                #try:
                ans, info = self.generator.generate_one(line, manu, beam_size, all_topic, False)
                if len(ans) == 0:
                    fout.write(info + "\n")
                else:
                    fout.write(" ".join(ans) + "\n")
                fout.flush()
                    #except:
                    #    print(line)

            fout.close()
Exemple #8
0
def main():
    args = parser.parse_args()

    if 'small' in args.base_model:
        hparams = HParams(
            **{
                "n_vocab": n_vocab,
                "n_ctx": 1024,
                "n_embd": 768,
                "n_head": 12,
                "n_layer": 12
            })
    elif 'medium' in args.base_model:
        hparams = HParams(
            **{
                "n_vocab": n_vocab,
                "n_ctx": 1024,
                "n_embd": 1024,
                "n_head": 16,
                "n_layer": 24
            })
    elif 'large' in args.base_model:
        hparams = HParams(
            **{
                "n_vocab": n_vocab,
                "n_ctx": 1024,
                "n_embd": 1280,
                "n_head": 20,
                "n_layer": 36
            })
    else:
        raise ValueError('invalid model name.')

    max_answer_len = args.max_answer_len
    batch_size = args.batch_size
    max_seq_length = hparams.n_ctx

    if args.train_type == 'QtoA':
        index_q = 0
        index_a = 1
        max_q = max_seq_length - args.max_answer_len
        max_a = args.max_answer_len
    elif args.train_type == 'AtoQ':
        index_q = 1
        index_a = 0
        max_q = args.max_answer_len
        max_a = max_seq_length - args.max_answer_len
    else:
        raise ValueError('invalid train type.')

    config = tf.ConfigProto()
    if int(args.gpu) >= 0:
        config.gpu_options.allow_growth = True
        config.gpu_options.visible_device_list = args.gpu
    with tf.Session(config=config) as sess:
        input_ids = tf.placeholder(tf.int32, [batch_size, None])
        masked_lm_positions = tf.placeholder(tf.int32, [batch_size, None])
        masked_lm_ids = tf.placeholder(tf.int32, [batch_size, None])
        masked_lm_weights = tf.placeholder(tf.float32, [batch_size, None])

        output = model.model(hparams=hparams,
                             X=input_ids,
                             past=None,
                             reuse=tf.AUTO_REUSE)

        (loss, _, _) = get_masked_lm_output(hparams=hparams,
                                            logits=output['logits'],
                                            positions=masked_lm_positions,
                                            label_ids=masked_lm_ids,
                                            label_weights=masked_lm_weights)

        train_vars = tf.trainable_variables()

        global_step = tf.Variable(0, trainable=False)
        if args.warmup_steps > 0:
            learning_rate = tf.compat.v1.train.polynomial_decay(
                learning_rate=1e-10,
                end_learning_rate=args.learning_rate,
                global_step=global_step,
                decay_steps=args.warmup_steps)
        else:
            learning_rate = args.learning_rate

        if args.optim == 'adam':
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                         beta1=0.9,
                                         beta2=0.99,
                                         epsilon=1e-7)
        elif args.optim == 'adagrad':
            opt = tf.train.AdagradOptimizer(learning_rate=learning_rate)
        elif args.optim == 'sgd':
            opt = tf.train.GradientDescentOptimizer(
                learning_rate=learning_rate)
        else:
            raise ValueError('invalid optimizer name.')

        train_vars = tf.trainable_variables()
        opt_grads = tf.gradients(loss, train_vars)
        opt_grads = list(zip(opt_grads, train_vars))
        opt_apply = opt.apply_gradients(opt_grads)

        summaries = tf.summary.scalar('loss', loss)
        summary_log = tf.summary.FileWriter(
            os.path.join(CHECKPOINT_DIR, args.run_name))

        saver = tf.train.Saver(var_list=train_vars,
                               max_to_keep=5,
                               keep_checkpoint_every_n_hours=2)
        sess.run(tf.global_variables_initializer())

        ckpt = tf.train.latest_checkpoint(args.base_model)
        saver.restore(sess, ckpt)
        print('Loading checkpoint', ckpt)

        print('Loading dataset...')
        global_chunks = []
        for fn in glob.glob(args.dataset):
            with open(fn, 'rb') as f:
                for p in pickle.load(f):
                    if len(p[0]) > 0 and len([1]) > 0:
                        if p[0][-1] != eot_token:
                            p[0].append(eot_token)
                        if p[1][-1] != eot_token:
                            p[1].append(eot_token)
                        global_chunks.append(p)
        global_chunk_index = np.random.permutation(len(global_chunks))
        global_chunk_step = 0
        print('There is', len(global_chunks), 'chunks.')
        print('Training...')

        def sample_feature():
            nonlocal global_chunks, global_chunk_index, global_chunk_step

            p_input_ids = []
            p_masked_lm_positions = []
            p_masked_lm_ids = []
            p_masked_lm_weights = []

            for b in range(batch_size):
                idx = global_chunk_index[global_chunk_step]
                global_chunk_step += 1
                if global_chunk_step >= len(global_chunk_index):
                    global_chunk_step = 0
                    global_chunk_index = np.random.permutation(
                        len(global_chunks))
                sampled_tokens = global_chunks[idx]

                # Make Sequence
                ids = copy(sampled_tokens[index_q])
                if len(ids) > max_q:
                    ids = ids[:max_q]
                ids[-1] = sep_token

                lm_ids = copy(sampled_tokens[index_a])
                if len(lm_ids) > max_a:
                    lm_ids = lm_ids[:max_a]
                lm_weights = [1.0] * len(lm_ids)
                lm_positions = list(
                    range(len(ids) - 1,
                          len(ids) - 1 + len(lm_ids), 1))
                while len(lm_positions) < max_answer_len:
                    lm_positions.append(0)
                    lm_ids.append(0)
                    lm_weights.append(0.0)

                ids = ids + lm_ids
                while len(ids) < max_seq_length:
                    ids.append(eot_token)

                p_input_ids.append(ids)
                p_masked_lm_positions.append(lm_positions)
                p_masked_lm_ids.append(lm_ids)
                p_masked_lm_weights.append(lm_weights)

            return {
                input_ids: p_input_ids,
                masked_lm_positions: p_masked_lm_positions,
                masked_lm_ids: p_masked_lm_ids,
                masked_lm_weights: p_masked_lm_weights
            }

        counter = 1
        counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter')
        hparams_path = os.path.join(CHECKPOINT_DIR, args.run_name,
                                    'hparams.json')
        if os.path.exists(counter_path):
            # Load the step number if we're resuming a run
            # Add 1 so we don't immediately try to save again
            with open(counter_path, 'r') as fp:
                counter = int(fp.read()) + 1

        def save():
            maketree(os.path.join(CHECKPOINT_DIR, args.run_name))
            print(
                'Saving',
                os.path.join(CHECKPOINT_DIR, args.run_name,
                             'model-{}').format(counter))
            saver.save(sess,
                       os.path.join(CHECKPOINT_DIR, args.run_name, 'model'),
                       global_step=counter)
            with open(counter_path, 'w') as fp:
                fp.write(str(counter) + '\n')
            with open(hparams_path, 'w') as fp:
                fp.write(
                    json.dumps({
                        "n_vocab": int(hparams.n_vocab),
                        "n_ctx": int(hparams.n_ctx),
                        "n_embd": int(hparams.n_embd),
                        "n_head": int(hparams.n_head),
                        "n_layer": int(hparams.n_layer),
                        "n_prediction": int(max_answer_len),
                    }))

        avg_loss = (0.0, 0.0)
        start_time = time.time()

        try:
            while True:
                if counter % args.save_every == 0:
                    save()

                (_, v_loss, v_summary) = sess.run((opt_apply, loss, summaries),
                                                  feed_dict=sample_feature())

                summary_log.add_summary(v_summary, counter)

                avg_loss = (avg_loss[0] * 0.99 + v_loss,
                            avg_loss[1] * 0.99 + 1.0)

                print(
                    '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}'
                    .format(counter=counter,
                            time=time.time() - start_time,
                            loss=v_loss,
                            avg=avg_loss[0] / avg_loss[1]))

                counter = counter + 1
                if args.warmup_steps > 0:
                    global_step = global_step + 1
                if args.max_train_steps > 0 and args.max_train_steps <= counter:
                    save()
                    break
        except KeyboardInterrupt:
            print('interrupted')
            save()