def __init__(self, pretrain=False):
        self.logger = get_logger('baseline')
        self.voca_size = opt.word_voca_size + 2
        self.char_voca_size = opt.char_voca_size + 2

        if pretrain:
            self.logger.info('use pretrained embedding matrix')
            word_embed_matrix = joblib.load('../word_embed_matrix.np')
            char_embed_matrix = joblib.load('../char_embed_matrix.np')
            self.word_embd = Embedding(self.voca_size,
                                       opt.word_embd_size,
                                       weights=[word_embed_matrix],
                                       name='shared_embed',
                                       trainable=True)
            self.char_embd = Embedding(self.char_voca_size,
                                       opt.char_embd_size,
                                       weights=[char_embed_matrix],
                                       name='shared_char_embed',
                                       trainable=True)
        else:
            self.logger.info('no use pretrained embedding matrix')
            self.word_embd = Embedding(self.voca_size,
                                       opt.word_embd_size,
                                       name='shared_embed',
                                       trainable=True)
            self.char_embd = Embedding(self.char_voca_size,
                                       opt.char_embd_size,
                                       name='shared_char_embed',
                                       trainable=True)
Ejemplo n.º 2
0
    def __init__(self, config, use=False):
        self.logger = get_logger()
        self.meta_path = config['META_PATH']
        self.titles_path = config['TITLES_PATH']
        self.spm_dir_path = config['SPM_DIR_PATH']
        self.spm_wp_path = config['SPM_WP_PATH']
        self.category_path = config['CATEGORY_PATH']
        self.parse_data_path = config['PARSE_DATA_PATH']
        self.doc2vec_dir_path = config['DOC2VEC_DIR_PATH']
        self.use_cols = config['USE_COLS']
        self.use_cate = config['USE_CATE']
        self.n_sample = config['N_SAMPLE']
        self.vocab_size = config['VOCAB_SIZE']
        self.n_shuffle = config['N_SHUFFLE']
        self.cate_depth = config['CATE_DEPTH']
        self.n_log_print = config['N_LOG_PRINT']

        self.doc_vec_size = config['DOC_VEC_SIZE']
        self.doc2vec_epochs = config['DOC2CEC_EPOCHS']
        self.n_workers = config['N_WORKERS']
        self.window_size = config['WINDOW_SIZE']

        self.re_sc = re.compile('[\!@#$%\^&\*\(\)=\[\]\{\}\.,/\?~\+\"|\_\-:;]')
        self.stopwords =['&', '"']
        if use:
            self.load_spm()
            self.load_doc2vec_model()
Ejemplo n.º 3
0
    def __init__(self, data_path):
        self.logger = get_logger('EDA')
        self.write_path = './EDA/'
        self.data_path = data_path

        self.cate1 = self.load_catefile(self.data_path + '/' +
                                        opt.cate_filename)
Ejemplo n.º 4
0
 def __init__(self, epochs=10, batch_size=128):
     self.logger = get_logger()
     self.data = Data("005930")
     data_len = len(self.data)
     train_num = int(data_len * 0.8)
     valid_num = int(data_len * 0.1)
     test_num = data_len - train_num - valid_num
     train, valid, test = random_split(self.data,
                                       [train_num, valid_num, test_num])
     self.train_iter = DataLoader(train,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=4)
     self.valid_iter = DataLoader(valid,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=4)
     self.test_iter = DataLoader(test,
                                 batch_size=batch_size,
                                 shuffle=True,
                                 num_workers=4)
     self.encoder = Encoder(features=self.data.features,
                            hid_dim=64,
                            layers=2,
                            dropout=0.3)
     self.network = Network(encoder=self.encoder,
                            enc_hid_dim=64,
                            hid_dim=64,
                            device=device).to(device)
     print(self.network)
     self.epochs = epochs
     self.batch_size = batch_size
Ejemplo n.º 5
0
def get_logger_and_parser():
    parser = argparse.ArgumentParser(description='config')
    parser.add_argument('--config',
                        type=str,
                        default='config/cityscapes_pspnet.yaml',
                        help='Configuration file to use')
    parser.add_argument('--num_of_gpus', type=int, default=0)
    parser.add_argument('opts',
                        help='',
                        default=None,
                        nargs=argparse.REMAINDER)

    args = parser.parse_args()

    assert args.config is not None
    cfg = config.load_cfg_from_cfg_file(args.config)

    if args.opts is not None:
        cfg = config.merge_cfg_from_list(cfg, args.opts)
    args_dict = dict()

    for arg in vars(args):
        args_dict[arg] = getattr(args, arg)
    cfg.update(args_dict)

    run_dir = os.path.join('runs',
                           os.path.basename(args.config)[:-5], cfg['exp_name'])
    check_dir_exists(run_dir)

    run_id = str(int(time.time()))
    logger = get_logger(run_dir, run_id, 'val')

    logger.info('RUNDIR: {}'.format(run_dir))

    return logger, cfg, run_dir
Ejemplo n.º 6
0
    def __init__(self, num_classes):
        logger = get_logger('textonly')
        max_len = opt['max_len']
        voca_size = opt['unigram_hash_size'] + 1

        t_input = tf.keras.Input(shape=(max_len, ))
        tx = keras.layers.Embedding(voca_size, opt['embd_size'])(t_input)

        w_input = tf.keras.Input(shape=(max_len, ))
        wx = tf.keras.layers.Reshape((max_len, 1))(w_input)

        x = tf.keras.layers.dot([tx, wx], axes=1)
        x = tf.keras.layers.Reshape((opt['embd_size'], ))(x)
        x = keras.layers.Dense(16, activation=tf.nn.relu)(x)
        outputs = keras.layers.Dense(num_classes, activation=tf.nn.sigmoid)(x)

        model = tf.keras.models.Model(inputs=[t_input, w_input],
                                      outputs=outputs)

        model.summary(print_fn=lambda x: logger.info(x))
        model.compile(loss='binary_crossentropy',
                      optimizer=tf.train.AdamOptimizer(),
                      metrics=[top1_acc])

        self.model = model
Ejemplo n.º 7
0
    def __init__(self,
                 path,
                 window,
                 stride,
                 minute_after,
                 bandwidth,
                 batch_size,
                 train_ratio,
                 verbose=False):
        """
        :param path: data path
        :param window: length of windows in each frame
        :param stride: stride of frame
        :param minute_after: compare with the last of frame close and minute_after close
        :param bandwidth: height of frame
        :param batch_size: train batch
        :param train_ratio:
        :param verbose:
        """
        self.logger = get_logger()
        self.path = path
        self.window = window
        self.stride = stride
        self.minute_after = minute_after
        self.bandwidth = bandwidth

        self.batch_size = batch_size
        self.train_ratio = train_ratio
        self.verbose = verbose

        self.threshold = 0.01
Ejemplo n.º 8
0
    def __init__(self, conf="./config/stock_minute.json", **kwargs):
        Status.__init__(self, conf=conf, **kwargs)
        self.logger = get_logger()
        if not os.path.isdir(self.opt.export_to):
            os.mkdir(self.opt.export_to)

        self.stock_chart = None
        self.stock_code = None
Ejemplo n.º 9
0
 def __init__(self):
     self.logger = get_logger('words')
     self.ignore = [
         '기타', '참조', '상품상세설명', '주', '청구할인', '상세설명참조', '없음', '상세정보참조',
         '해당없음', '품명', '상품상세정보', '상세설명', '상세정보별도표시', '알수', '상세페이지', '상세참조',
         'ETC', '상세내용참조', '기타상세참조', '상세정보', '별도표기', '상세페이지참조', '알수없음',
         '상품상세설명참조'
     ] + [chr(asc) for asc in range(65, 91)]
Ejemplo n.º 10
0
    def __init__(self):
        self.logger = get_logger('data')
        self.word_to_idx = self.load_word_to_idx()

        # make temporary save dir if not exists
        tempdir = os.path.dirname(self.tmp_chunk_tpl)
        if not os.path.isdir(tempdir) :
            os.makedirs(tempdir)
Ejemplo n.º 11
0
    def __init__(self,
                 enc_emb_dim=128,
                 dec_emb_dim=128,
                 enc_hid_dim=256,
                 dec_hid_dim=256,
                 enc_dropout=0.3,
                 dec_dropout=0.3,
                 epochs=15):
        self.logger = get_logger()
        #self.data = Data()
        self.data = FRENDataset()
        data_len = len(self.data)
        train_num = int(data_len * 0.8)
        valid_num = int(data_len * 0.1)
        test_num = data_len - train_num - valid_num
        train, valid, test = random_split(self.data,
                                          [train_num, valid_num, test_num])
        self.train_iter = DataLoader(train,
                                     batch_size=128,
                                     shuffle=True,
                                     num_workers=4)
        self.valid_iter = DataLoader(valid,
                                     batch_size=128,
                                     shuffle=True,
                                     num_workers=4)
        self.test_iter = DataLoader(test,
                                    batch_size=128,
                                    shuffle=True,
                                    num_workers=4)
        #self.train_iter, self.valid_iter, self.test_iter = self.data.iterator()
        #self.input_dim = len(self.data.source.vocab)
        #self.output_dim = len(self.data.target.vocab)
        self.input_dim = self.data.source_dim
        self.output_dim = self.data.target_dim

        self.enc_emb_dim = enc_emb_dim
        self.dec_emb_dim = dec_emb_dim
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim
        self.enc_dropout = enc_dropout
        self.dec_dropout = dec_dropout

        self.encoder = Encoder(self.input_dim, self.enc_emb_dim,
                               self.enc_hid_dim, self.dec_hid_dim,
                               self.enc_dropout)
        self.attention = Attention(self.enc_hid_dim, self.dec_hid_dim)
        self.decoder = Decoder(self.output_dim, self.dec_emb_dim,
                               self.enc_hid_dim, self.dec_hid_dim,
                               self.dec_dropout, self.attention)
        self.model = Seq2Seq(self.encoder, self.decoder, device).to(device)

        self.epochs = epochs
        #target_padding_index = self.data.target.vocab.stoi[self.data.target.pad_token]
        #self.criterion = nn.CrossEntropyLoss(ignore_index = target_padding_index)
        self.criterion = nn.CrossEntropyLoss(
            ignore_index=self.data.end_token_pivot)
Ejemplo n.º 12
0
 def __init__(self):
     self.logger = get_logger('Classifier')
     self.h5 = {
         'train': h5py.File(os.path.join(opt.data_path, 'train.h5'), 'r'),
         'dev': h5py.File(os.path.join(opt.data_path, 'dev.h5'), 'r'),
         'test': h5py.File(os.path.join(opt.data_path, 'test.h5'), 'r')
     }
     self.char_dict = pickle.load(open(
         os.path.join(opt.data_path, 'char_dict.pkl'), 'rb'),
                                  encoding='utf-8')
Ejemplo n.º 13
0
    def __init__(self, config):
        self.logger = get_logger()
        self.parse_data_path = config['PARSEMETA']['PARSE_DATA_PATH']
        self.category_path = config['PARSEMETA']['CATEGORY_PATH']
        self.n_log_print = config['PARSEMETA']['N_LOG_PRINT']
        self.doc_vec_size = config['PARSEMETA']['DOC_VEC_SIZE']
        self.train_dir_path = config['MAKEDB']['TRAIN_DIR_PATH']
        self.chunk_size = config['MAKEDB']['CHUNK_SIZE']
        self.temp_dir_path = config['MAKEDB']['TEMP_DIR_PATH']

        self.parser = EcommerceDataParser(config['PARSEMETA'], use=True)
Ejemplo n.º 14
0
 def __init__(self):
     self.logger = get_logger()
     self.init_token = '<sos>'
     self.end_token = '<eos>'
     en = self._read_file("small_vocab_en")
     fr = self._read_file("small_vocab_fr")
     assert len(en) == len(fr)
     self.en_sequences, self.en_dict = self._tokenize(en)
     self.fr_sequences, self.fr_dict = self._tokenize(fr)
     self.source_dim = len(self.en_dict) + 1
     self.target_dim = len(self.fr_dict) + 1
     self.end_token_pivot = 2
Ejemplo n.º 15
0
 def __init__(self):
     self.logger = get_logger('preprocessor')
     self.train_df_file = "train_df.csv"
     self.dev_df_file = "dev_df.csv"
     self.test_df_file = "test_df.csv"
     self.train_df_columns = ['bcateid', 'mcateid', 'scateid', 'dcateid', 'brand', 'maker', 'model',
                              'product', 'price', 'updttm', 'pid']
     self.dev_df_columns = ['brand', 'maker', 'model',
                            'product', 'price', 'updttm', 'pid']
     self.test_df_columns = ['brand', 'maker', 'model',
                             'product', 'price', 'updttm', 'pid']
     self.data_path_list = opt.train_data_list
     self.stop_words = opt.en_stopwords + opt.ko_stopwords
Ejemplo n.º 16
0
 def __init__(self):
     self.logger = get_logger('Classifier')
     self.num_classes = 0
     self.encoded_dict = {"price_lev": 3}
     self.cate_index_dict = pickle.load(
         open('./data/cate_index_dict.pickle', 'rb'))
     self.predict_encoder = pickle.load(
         open('./data/predict_encoder.pickle', 'rb'))
     self.cate_split_index = {"b": 0, "m": 1, "s": 2, "d": 3}
     self.prev_cate_list = {"m": "b", "s": "m", "d": "s"}
     self.b_model = None
     self.m_model = None
     self.s_model = None
     self.d_model = None
Ejemplo n.º 17
0
    def __init__(self, path="data/ninetoten.h5", verbose=False):

        self.logger = get_logger()
        self.path = path
        self.verbose = verbose

        self.barrier = 4
        self.window = 7
        assert self.barrier > 0, "target inference"

        self.transaction_fee = 0.0035
        self.threshold = 0.0001

        self.stock_meta = StockMetaData()

        self.load()
Ejemplo n.º 18
0
 def __init__(self):
     self.logger = get_logger('data')
     self.price_quantile_dict = pickle.load(
         open(self.price_quantile_dict_path, 'rb'))
     self.time_aging_dict = pickle.load(
         open(self.time_aging_dict_path, 'rb'))
     self.b2v_dict = pickle.load(open(self.b2v_dict_path, 'rb'))
     self.b2v_model = Word2Vec.load(self.b2v_model_path)
     self.d2v_model = Doc2Vec.load(self.d2v_model_path)
     # self.df_term_vector = pd.concat([
     #     pd.read_pickle('./data/df_product_train_dataset.pkl'),
     #     pd.read_pickle('./data/df_product_dev_dataset.pkl'),
     #     pd.read_pickle('./data/df_product_test_dataset.pkl')],
     #     axis=0
     # )
     self.term_vector_dict = pickle.load(
         open(self.term_vector_dict_path, 'rb'))
Ejemplo n.º 19
0
def get_logger_and_parser():
    global cfg, logger

    parser = argparse.ArgumentParser(description='config')
    parser.add_argument(
        '--config',
        type=str,
        default='config/cityscapes_pspnet.yaml',
        help='Configuration file to use',
    )
    parser.add_argument(
        '--local_rank',
        type=int,
        default=0,
        help='Local rank for distributed training',
    )

    args = parser.parse_args()

    assert args.config is not None
    cfg = config.load_cfg_from_cfg_file(args.config)
    args_dict = dict()
    for arg in vars(args):
        args_dict[arg] = getattr(args, arg)
    cfg.update(args_dict)

    run_dir = os.path.join('runs',
                           os.path.basename(args.config)[:-5], cfg['exp_name'])

    if main_process():
        check_dir_exists(run_dir)
        run_id = str(int(time.time()))
        logger = get_logger(run_dir, run_id, 'train')
        logger.info('RUNDIR: {}'.format(run_dir))
        shutil.copy(args.config, run_dir)
    else:
        logger = None

    try:
        cfg['world_size'] = int(os.environ['WORLD_SIZE'])
    except:
        pass

    return logger, cfg, run_dir
Ejemplo n.º 20
0
    def __init__(self):
        self.logger = get_logger('ShopNet')

        self.N_IMG_FEAT = 2048
        self.max_len = opt.max_len
        self.voca_size = get_word_idx_size(
        ) + 1  #500424+1 이어야함 #500458+1 #96778+1 #opt.max_embd_words + 1
        self.embd_size = opt.embd_size

        self.C_idx = dict()
        self.C_idx['b'] = {c: c - 1 for c in range(1, 57 + 1)}
        self.C_idx['m'] = {c: c - 1 for c in range(1, 552 + 1)}
        self.C_idx['s'] = {c: c - 2 for c in range(2, 3190 + 1)}
        self.C_idx['d'] = {c: c - 2 for c in range(2, 404 + 1)}

        self.N_Cb = 57
        self.N_Cm = 552
        self.N_Cs = 3190 - 1
        self.N_Cd = 404 - 1
Ejemplo n.º 21
0
    def __init__(self, stock_code, marketkind=1, verbose=False):

        self.logger = get_logger()
        self.verbose = verbose

        self.barrier = 8
        self.window = 12
        assert self.barrier > 0, "target inference"

        self.transaction_fee = 0.0035
        self.threshold = 0.0

        stock_code = str(stock_code)
        if not stock_code.startswith("A"):
            stock_code = "A" + stock_code
        self.stock_code = stock_code
        self.logger.info("target stock code: {}".format(self.stock_code))
        self.marketkind = marketkind

        self.parse()
Ejemplo n.º 22
0
    def __init__(self,
                 report,
                 feat_len,
                 batch_size,
                 train_ratio,
                 verbose=False):
        """
        :param path: data path
        :param report: report type string
        :param batch_size: train batch
        :param train_ratio:
        :param verbose:
        """
        self.logger = get_logger()
        self.report = report
        self.feature_len = feat_len
        self.batch_size = batch_size
        self.train_ratio = train_ratio
        self.verbose = verbose
        self.feature_norm = []

        self.threshold = 0.006
Ejemplo n.º 23
0
    def __init__(self,
                 report,
                 feat_len,
                 batch_size,
                 train_ratio,
                 epochs,
                 verbose=False):
        self.logger = get_logger()
        self.report = report
        self.feat_len = feat_len
        self.batch_size = batch_size
        self.train_ratio = train_ratio
        self.verbose = verbose

        self.data_iter = Data(report=self.report,
                              feat_len=self.feat_len,
                              batch_size=self.batch_size,
                              train_ratio=self.train_ratio)
        self.data_iter.launch()

        self.log_path = "tmp/tensorboard"
        self.checkpoint_path = "tmp/checkpoint"
        self.epochs = epochs
Ejemplo n.º 24
0
    def __init__(self,
                 path,
                 window,
                 stride,
                 minute_after,
                 bandwidth,
                 batch_size,
                 train_ratio,
                 epochs,
                 verbose=False):
        self.logger = get_logger()
        self.path = path
        self.window = window
        self.stride = stride
        self.minute_after = minute_after
        self.bandwidth = bandwidth

        self.batch_size = batch_size
        self.train_ratio = train_ratio
        self.verbose = verbose

        self.log_path = "tmp/tensorboard"
        self.checkpoint_path = "tmp/checkpoint"
        self.epochs = epochs
Ejemplo n.º 25
0
 def __init__(self):
     self.logger = get_logger('data')
     self.price_quantile_dict = pickle.load(
         open(self.price_quantile_dict_path, 'rb'))
     self.time_aging_dict = pickle.load(
         open(self.time_aging_dict_path, 'rb'))
     self.valid_tag_dict = pickle.load(open(self.valid_tag_dict_path, 'rb'))
     self.b2v_dict = pickle.load(open(self.b2v_dict_path, 'rb'))
     # self.b2v_model = gensim.models.Word2Vec.load(self.b2v_model_path)
     self.d2v_model = Doc2Vec.load(
         '/workspace/dataset/doc2vec_test/reduced_doc2vec.model'
     )  #TODO 절대경로
     self.df_term_vector = pd.concat([
         pd.read_pickle(
             '/workspace/dataset/preprocess_test/df_product_train_datset.pkl'
         ),
         pd.read_pickle(
             '/workspace/dataset/preprocess_test/df_product_dev_datset.pkl'
         ),
         pd.read_pickle(
             '/workspace/dataset/preprocess_test/df_product_test_datset.pkl'
         )
     ],
                                     axis=0)
Ejemplo n.º 26
0
 def __init__(self):
     self.logger = get_logger('textonly')
Ejemplo n.º 27
0
 def __init__(self):
     self.logger = get_logger('Model')
Ejemplo n.º 28
0
 def __init__(self):
     self.logger = get_logger('Classifier')
     self.num_classes = 0
Ejemplo n.º 29
0
 def __init__(self):
     self.logger = get_logger('shopnet')
     self.input_size = 2352
Ejemplo n.º 30
0
# coding=utf-8
from misc import get_logger

logger = get_logger(__file__)

SPADE = '♠'
HEART = '♥'
CLUB = '♣'
DIAMOND = '♦'

SUITS_RANK = {
    # higher is better
    SPADE: 3,
    HEART: 2,
    CLUB: 1,
    DIAMOND: 0
}

SUITS = [SPADE, HEART, CLUB, DIAMOND]
# 2, 3, 4, 5, 6, 7, 8, 9, 10, J, Q, K, A
CARDS_RANK = range(2, 15)

ALL_CARDS = set()
for s in SUITS:
    for c in CARDS_RANK:
        ALL_CARDS.add((c, s))