def sample_mv_percents(self, phase):
        main_mv_percents = []
        for s in stock_symbols:
            start_date, end_date = self._get_start_end_date(phase)
            stock_mv_path = os.path.join(str(self.movement_path),
                                         '{}.txt'.format(s))
            main_target_dates = []

            with open(stock_mv_path, 'r') as movement_f:
                for line in movement_f:
                    data = line.split('\t')
                    main_target_date = datetime.strptime(data[0],
                                                         '%Y-%m-%d').date()
                    main_target_date_str = main_target_date.isoformat()

                    if start_date <= main_target_date_str < end_date:
                        main_target_dates.append(main_target_date)

            for main_target_date in main_target_dates:
                prices_and_ts = self._get_prices_and_ts(s, main_target_date)
                if not prices_and_ts:
                    continue
                main_mv_percents.append(prices_and_ts['main_mv_percent'])

            logger.info('finished: {}'.format(s))

        return main_mv_percents
Esempio n. 2
0
 def assemble_graph(self):
     logger.info('Start graph assembling...')
     with tf.device('/device:GPU:0'):
         self._build_placeholders()
         self._build_embeds()
         self._build_mie()
         self._build_vmd()
         self._build_temporal_att()
         self._build_ata()
         self._create_optimizer()
    def unit_test_train(self):
        with tf.Session() as sess:
            word_table_init = self.pipe.init_word_table()
            feed_table_init = {self.model.word_table_init: word_table_init}
            sess.run(tf.global_variables_initializer(),
                     feed_dict=feed_table_init)
            logger.info('Word table init: done!')

            logger.info('Model: {0}, start a new session!'.format(
                self.model.model_name))

            n_iter = self.model.global_step.eval()

            # forward
            train_batch_loss_list = list()
            train_epoch_size = 0.0
            train_epoch_n_acc = 0.0
            train_batch_gen = self.pipe.batch_gen(phase='train')
            train_batch_dict = next(train_batch_gen)

            while n_iter < 100:
                feed_dict = {
                    self.model.is_training_phase: True,
                    self.model.batch_size: train_batch_dict['batch_size'],
                    self.model.stock_ph: train_batch_dict['stock_batch'],
                    self.model.T_ph: train_batch_dict['T_batch'],
                    self.model.n_words_ph: train_batch_dict['n_words_batch'],
                    self.model.n_msgs_ph: train_batch_dict['n_msgs_batch'],
                    self.model.y_ph: train_batch_dict['y_batch'],
                    self.model.price_ph: train_batch_dict['price_batch'],
                    self.model.mv_percent_ph:
                    train_batch_dict['mv_percent_batch'],
                    self.model.word_ph: train_batch_dict['word_batch'],
                    self.model.ss_index_ph: train_batch_dict['ss_index_batch'],
                }

                ops = [
                    self.model.y_T, self.model.y_T_, self.model.loss,
                    self.model.optimize
                ]
                train_batch_y, train_batch_y_, train_batch_loss, _ = sess.run(
                    ops, feed_dict)

                # training batch stat
                train_epoch_size += float(train_batch_dict['batch_size'])
                train_batch_loss_list.append(train_batch_loss)
                train_batch_n_acc = sess.run(
                    metrics.n_accurate(y=train_batch_y, y_=train_batch_y_))
                train_epoch_n_acc += float(train_batch_n_acc)

                stat_logger.print_batch_stat(n_iter, train_batch_loss,
                                             train_batch_n_acc,
                                             train_batch_dict['batch_size'])
                n_iter += 1
Esempio n. 4
0
    def restore_and_test(self):
        with tf.Session(config=self.tf_config) as sess:
            checkpoint = tf.train.get_checkpoint_state(os.path.dirname(self.model.tf_checkpoint_file_path))
            if checkpoint and checkpoint.model_checkpoint_path:
                logger.info('Model: {0}, session restored!'.format(self.model.model_name))
                self.saver.restore(sess, checkpoint.model_checkpoint_path)
            else:
                logger.info('Model: {0}: NOT found!'.format(self.model.model_name))
                raise IOError

            res = self.generation(sess, phase='test')
            stat_logger.print_eval_res(res)
    def init_word_table(self):
        word_table_init = np.random.random(
            (vocab_size, self.word_embed_size)) * 2 - 1  # [-1.0, 1.0]

        if self.word_embed_type is not 'rand':
            n_replacement = 0
            vocab_id_dict = self.index_token(vocab, key='token')

            with io.open(self.glove_path, 'r', encoding='utf-8') as f:
                for line in f:
                    tuples = line.split()
                    word, embed = tuples[0], [
                        float(embed_col) for embed_col in tuples[1:]
                    ]
                    if word in ['<unk>', 'unk']:  # unify UNK
                        word = 'UNK'
                    if word in vocab_id_dict:
                        n_replacement += 1
                        word_id = vocab_id_dict[word]
                        word_table_init[word_id] = embed

            logger.info(
                'ASSEMBLE: word table #replacement: {}'.format(n_replacement))
        return word_table_init
Esempio n. 6
0
    def train_and_dev(self):
        with tf.Session(config=self.tf_config) as sess:
            # prep: writer and init
            writer = tf.summary.FileWriter(self.model.tf_graph_path, sess.graph)

            # init all vars with tables
            feed_table_init = {self.model.word_table_init: self.pipe.init_word_table()}
            sess.run(tf.global_variables_initializer(), feed_dict=feed_table_init)
            logger.info('Word table init: done!')

            # prep: checkpoint
            checkpoint = tf.train.get_checkpoint_state(os.path.dirname(self.model.tf_checkpoint_file_path))
            if checkpoint and checkpoint.model_checkpoint_path:
                # restore partial saved vars
                reader = tf.train.NewCheckpointReader(checkpoint.model_checkpoint_path)
                restore_dict = dict()
                for v in tf.all_variables():
                    tensor_name = v.name.split(':')[0]
                    if reader.has_tensor(tensor_name):
                        print('has tensor: {0}'.format(tensor_name))
                        restore_dict[tensor_name] = v

                checkpoint_saver = tf.train.Saver(restore_dict)
                checkpoint_saver.restore(sess, checkpoint.model_checkpoint_path)
                logger.info('Model: {0}, session restored!'.format(self.model.model_name))
            else:
                logger.info('Model: {0}, start a new session!'.format(self.model.model_name))

            for epoch in range(self.model.n_epochs):
                logger.info('Epoch: {0}/{1} start'.format(epoch+1, self.model.n_epochs))

                # training phase
                train_batch_loss_list = list()
                epoch_size, epoch_n_acc = 0.0, 0.0

                train_batch_gen = self.pipe.batch_gen(phase='train')  # a new gen for a new epoch

                for train_batch_dict in train_batch_gen:

                    # logger.info('train: batch_size: {0}'.format(train_batch_dict['batch_size']))

                    feed_dict = {self.model.is_training_phase: True,
                                 self.model.batch_size: train_batch_dict['batch_size'],
                                 self.model.stock_ph: train_batch_dict['stock_batch'],
                                 self.model.T_ph: train_batch_dict['T_batch'],
                                 self.model.n_words_ph: train_batch_dict['n_words_batch'],
                                 self.model.n_msgs_ph: train_batch_dict['n_msgs_batch'],
                                 self.model.y_ph: train_batch_dict['y_batch'],
                                 self.model.price_ph: train_batch_dict['price_batch'],
                                 self.model.mv_percent_ph: train_batch_dict['mv_percent_batch'],
                                 self.model.word_ph: train_batch_dict['word_batch'],
                                 self.model.ss_index_ph: train_batch_dict['ss_index_batch'],
                                 }

                    ops = [self.model.y_T, self.model.y_T_, self.model.loss, self.model.optimize,
                           self.model.global_step]
                    train_batch_y, train_batch_y_, train_batch_loss, _, n_iter = sess.run(ops, feed_dict)

                    # training batch stat
                    epoch_size += float(train_batch_dict['batch_size'])
                    train_batch_loss_list.append(train_batch_loss)  # list of floats
                    train_batch_n_acc = sess.run(metrics.n_accurate(y=train_batch_y, y_=train_batch_y_))  # float
                    epoch_n_acc += float(train_batch_n_acc)

                    # save model and generation
                    if n_iter >= self.silence_step and n_iter % self.skip_step == 0:
                        stat_logger.print_batch_stat(n_iter, train_batch_loss, train_batch_n_acc,
                                                     train_batch_dict['batch_size'])
                        self.saver.save(sess, self.model.tf_saver_path, n_iter)
                        res = self.generation(sess, phase='dev')
                        stat_logger.print_eval_res(res)

                # print training epoch stat
                epoch_loss, epoch_acc = metrics.basic_train_stat(train_batch_loss_list, epoch_n_acc, epoch_size)
                stat_logger.print_epoch_stat(epoch_loss=epoch_loss, epoch_acc=epoch_acc)

        writer.close()
Esempio n. 7
0
    def __init__(self):
        logger.info('INIT: #stock: {0}, #vocab+1: {1}'.format(ss_size, vocab_size))

        # model config
        self.mode = config_model['mode']
        self.opt = config_model['opt']
        self.lr = config_model['lr']
        self.decay_step = config_model['decay_step']
        self.decay_rate = config_model['decay_rate']
        self.momentum = config_model['momentum']

        self.kl_lambda_anneal_rate = config_model['kl_lambda_anneal_rate']
        self.kl_lambda_start_step = config_model['kl_lambda_start_step']
        self.use_constant_kl_lambda = config_model['use_constant_kl_lambda']
        self.constant_kl_lambda = config_model['constant_kl_lambda']

        self.daily_att = config_model['daily_att']
        self.alpha = config_model['alpha']

        self.clip = config_model['clip']
        self.n_epochs = config_model['n_epochs']
        self.batch_size_for_name = config_model['batch_size']

        self.max_n_days = config_model['max_n_days']
        self.max_n_msgs = config_model['max_n_msgs']
        self.max_n_words = config_model['max_n_words']

        self.weight_init = config_model['weight_init']
        uniform = True if self.weight_init == 'xavier-uniform' else False
        self.initializer = tf.contrib.layers.xavier_initializer(uniform=uniform)
        self.bias_initializer = tf.constant_initializer(0.0, dtype=tf.float32)

        self.word_embed_type = config_model['word_embed_type']

        self.y_size = config_model['y_size']
        self.word_embed_size = config_model['word_embed_size']
        self.stock_embed_size = config_model['stock_embed_size']
        self.price_embed_size = config_model['word_embed_size']

        self.mel_cell_type = config_model['mel_cell_type']
        self.variant_type = config_model['variant_type']
        self.vmd_cell_type = config_model['vmd_cell_type']

        self.vmd_rec = config_model['vmd_rec']

        self.mel_h_size = config_model['mel_h_size']
        self.msg_embed_size = config_model['mel_h_size']
        self.corpus_embed_size = config_model['mel_h_size']

        self.h_size = config_model['h_size']
        self.z_size = config_model['h_size']
        self.g_size = config_model['g_size']
        self.use_in_bn= config_model['use_in_bn']
        self.use_o_bn = config_model['use_o_bn']
        self.use_g_bn = config_model['use_g_bn']

        self.dropout_train_mel_in = config_model['dropout_mel_in']
        self.dropout_train_mel = config_model['dropout_mel']
        self.dropout_train_ce = config_model['dropout_ce']
        self.dropout_train_vmd_in = config_model['dropout_vmd_in']
        self.dropout_train_vmd = config_model['dropout_vmd']

        self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')

        # model name
        name_pattern_max_n = 'days-{0}.msgs-{1}-words-{2}'
        name_max_n = name_pattern_max_n.format(self.max_n_days, self.max_n_msgs, self.max_n_words)

        name_pattern_input_type = 'word_embed-{0}.vmd_in-{1}'
        name_input_type = name_pattern_input_type.format(self.word_embed_type, self.variant_type)

        name_pattern_key = 'alpha-{0}.anneal-{1}.rec-{2}'
        name_key = name_pattern_key.format(self.alpha, self.kl_lambda_anneal_rate, self.vmd_rec)

        name_pattern_train = 'batch-{0}.opt-{1}.lr-{2}-drop-{3}-cell-{4}'
        name_train = name_pattern_train.format(self.batch_size_for_name, self.opt, self.lr, self.dropout_train_mel_in, self.mel_cell_type)

        name_tuple = (self.mode, name_max_n, name_input_type, name_key, name_train)
        self.model_name = '_'.join(name_tuple)

        # paths
        self.tf_graph_path = os.path.join(path_parser.graphs, self.model_name)  # summary
        self.tf_checkpoints_path = os.path.join(path_parser.checkpoints, self.model_name)  # checkpoints
        self.tf_checkpoint_file_path = os.path.join(self.tf_checkpoints_path, 'checkpoint')  # for restore
        self.tf_saver_path = os.path.join(self.tf_checkpoints_path, 'sess')  # for save

        # verification
        assert self.opt in ('sgd', 'adam')
        assert self.mel_cell_type in ('ln-lstm', 'gru', 'basic')
        assert self.vmd_cell_type in ('ln-lstm', 'gru')
        assert self.variant_type in ('hedge', 'fund', 'tech', 'discriminative')
        assert self.vmd_rec in ('zh', 'h')
        assert self.weight_init in ('xavier-uniform', 'xavier-normal')