コード例 #1
0
def main():
    opt = parse_args()
    if (opt.shuffle > 0):
        raise AssertionError("-shuffle is not implemented, please make sure \
                         you shuffle your data before pre-processing.")
    init_logger(opt.log_file)
    logger.info("Input args: %r", opt)
    logger.info("Extracting features...")

    logger.info("Building `Fields` object...")
    fields = get_fields()
    task1_fields = get_task_fields()
    task2_fields = get_task2_fields()

    logger.info("Building & saving task training data...")
    train_dataset_files = build_save_dataset('train', 'task', fields, opt)
    logger.info("Building & saving task2 training data...")
    train_dataset_files2 = build_save_dataset('train', 'task2', fields, opt)

    logger.info("Building & saving task validation data...")
    build_save_dataset('valid', 'task', fields, opt)
    logger.info("Building & saving task2 validation data...")
    build_save_dataset('valid', 'task2', fields, opt)

    logger.info("Building & saving vocabulary...")

    build_save_vocab(train_dataset_files + train_dataset_files2, fields, opt)
コード例 #2
0
def main(opt, device_id):
  opt = training_opt_postprocessing(opt, device_id)
  init_logger(opt.log_file)
  # Load checkpoint if we resume from a previous training.
  if opt.train_from:
    logger.info('Loading checkpoint from %s' % opt.train_from)
    checkpoint = torch.load(opt.train_from,
                            map_location=lambda storage, loc: storage)

    # Load default opts values then overwrite it with opts from
    # the checkpoint. It's usefull in order to re-train a model
    # after adding a new option (not set in checkpoint)
    dummy_parser = configargparse.ArgumentParser()
    opts.model_opts(dummy_parser)
    default_opt = dummy_parser.parse_known_args([])[0]

    model_opt = default_opt
    model_opt.__dict__.update(checkpoint['opt'].__dict__)
  else:
    checkpoint = None
    model_opt = opt

  # Load fields generated from preprocess phase.
  fields = load_fields(opt, checkpoint)

  # Build model.
  model = build_model(model_opt, opt, fields, checkpoint)
  n_params, enc, dec = _tally_parameters(model)
  logger.info('encoder: %d' % enc)
  logger.info('decoder: %d' % dec)
  logger.info('* number of parameters: %d' % n_params)
  _check_save_model_path(opt)

  # Build optimizer.
  optim = build_optim(model, opt, checkpoint)

  # Build model saver
  model_saver = build_model_saver(model_opt, opt, model, fields, optim)

  trainer = build_trainer(opt, device_id, model, fields,
                          optim, model_saver=model_saver)

  def train_iter_fct(): 
    return build_dataset_iter(
      load_dataset("train", opt), fields, opt)

  def valid_iter_fct(): 
    return build_dataset_iter(
      load_dataset("valid", opt), fields, opt, is_train=False)

  # Do training.
  if len(opt.gpu_ranks):
    logger.info('Starting training on GPU: %s' % opt.gpu_ranks)
  else:
    logger.info('Starting training on CPU, could be very slow')
  trainer.train(train_iter_fct, valid_iter_fct, opt.train_steps,
                opt.valid_steps)

  if opt.tensorboard:
    trainer.report_manager.tensorboard_writer.close()
コード例 #3
0
ファイル: main.py プロジェクト: hxl1116/nasa-apis-bot
def init(mode):
    """ NASA Bot config and logging init """
    if os.path.exists(f".env.{mode}"):
        init_root_cfg(f".env.{mode}")
        logger_cfg = get_logger_cfg()
        init_logger(log_lvl=logger_cfg['log_lvl'],
                    log_dir=logger_cfg['log_dir'])
    else:
        sys.exit('No .env file present')
コード例 #4
0
def series_plot(ser: pd.Series,
                kind="bar",
                y_label="y",
                x_label="x",
                title="Untitled",
                d=0.1):
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    ser_max, ser_min = ser.max(), ser.min()
    init_logger().info("min/max: {min}/{max}".format(min=ser_min, max=ser_max))
    delta = (ser_max - ser_min) * d
    ser.plot(kind=kind, ylim=(ser_min - delta, ser_max + delta), title=title)
    plt.show()
コード例 #5
0
    def __init__(self, df=None, strategy: dict = None):
        """
        TODO : overwrite to self.strategy
        :param strategy: dictionary {
            "method": [] list of columns
        }
        """
        if df is not None and strategy is not None:
            self.logger = init_logger()

            self.input_df = df
            self.strategy = strategy

            # constant
            self.transform_method = {
                "log": FunctionTransformer(np.log1p),
                "standard": StandardScaler(),
                "one_hot_encoding": OneHotEncoder(sparse=False),
                "none": FunctionTransformer(lambda x: x)
            }

            # self.column_transformer = ColumnTransformer(transformers=self.make_transformers())
            self.transformed = None
            super().__init__(transformers=self.make_transformers())
        else:
            super().__init__()
コード例 #6
0
    def __init__(
            self, x_train, y_train, bucket_name,
            grid_params=None, score=mean_squared_error
    ):
        if grid_params is None:
            grid_params = {
                "max_iter": [1, 5, 10],
                "alpha": [0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                "l1_ratio": np.arange(0.0, 1.0, 0.1)
            }

        self.x_train = x_train
        self.y_train = y_train
        self.scorer = score

        self.error = None  # pd.Series
        self.metric = None

        # s3
        self.s3_manager = S3Manager(bucket_name=bucket_name)

        # logger
        self.logger = init_logger()

        super().__init__(
            estimator=ElasticNet(),
            param_grid=grid_params,
            scoring=make_scorer(self.scorer, greater_is_better=False),
            # we have to know the relationship before and after obviously, so n_splits: 2
            cv=TimeSeriesSplit(n_splits=2).split(self.x_train)
        )
コード例 #7
0
    def __init__(self, bucket_name: str, logger_name: str, date: str):
        self.logger = init_logger()
        self.date = date
        # TODO: now -> term of dataset
        self.term = datetime.datetime.now().strftime("%m%Y")

        # s3
        self.bucket_name = bucket_name
コード例 #8
0
    def __init__(self, args, device_id):
        """
        :param args: parser.parse_args()
        :param device_id: 0 or -1
        """
        self.args = args
        self.device_id = device_id
        self.model_flags = [
            'hidden_size', 'ff_size', 'heads', 'inter_layers', 'encoder',
            'ff_actv', 'use_interval', 'rnn_size'
        ]

        self.device = "cpu" if self.args.visible_gpus == '-1' else "cuda"
        logger.info('Device ID %d' % self.device_id)
        logger.info('Device %s' % self.device)
        torch.manual_seed(self.args.seed)
        random.seed(self.args.seed)

        if self.device_id >= 0:
            torch.cuda.set_device(self.device_id)

        init_logger(args.log_file)

        try:
            self.step = int(self.args.test_from.split('.')[-2].split('_')[-1])
        except IndexError:
            self.step = 0

        logger.info('Loading checkpoint from %s' % self.args.test_from)
        checkpoint = torch.load(self.args.test_from,
                                map_location=lambda storage, loc: storage)
        opt = vars(checkpoint['opt'])
        for k in opt.keys():
            if k in self.model_flags:
                setattr(self.args, k, opt[k])

        config = BertConfig.from_json_file(self.args.bert_config_path)
        self.model = model_builder.Summarizer(self.args,
                                              self.device,
                                              load_pretrained_bert=False,
                                              bert_config=config)
        self.model.load_cp(checkpoint)
        self.model.eval()
コード例 #9
0
    def __init__(self, bucket_name: str, date: str):
        self.logger = init_logger()

        # s3
        self.bucket_name = bucket_name
        self.load_key = "public_data/open_data_raw_material_price/process/csv/{filename}.csv".format(
            filename=date)

        # TODO: not loaded here. extractor just do preprocess data
        self.input_df = self.load()
コード例 #10
0
    def __init__(self, bucket_name: str, date: str):
        self.logger = init_logger()

        # s3
        self.bucket_name = bucket_name
        self.load_key = "public_data/open_data_terrestrial_weather/process/csv/{filename}.csv".format(
            filename=date)

        self.input_df = self.load()
        self.categorical_features = []
コード例 #11
0
    def __init__(self):
        self.logger = init_logger()
        # load a prepared data and split test & train
        prepared_data = PriceDataPipeline().process()
        X = prepared_data.drop("당일조사가격", axis=1, inplace=False)
        y = prepared_data["당일조사가격"]

        self.logger.debug(
            X.groupby("품목명").count().sort_values(by=["평균기온(°C)"]))
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            X, y, test_size=0.2, stratify=X["품목명"])
コード例 #12
0
def main():
    opt = parse_args()
    if (opt.shuffle > 0):
        raise AssertionError("-shuffle is not implemented, please make sure \
                         you shuffle your data before pre-processing.")
    init_logger(opt.log_file)
    logger.info("Input args: %r", opt)
    logger.info("Extracting features...")

    logger.info("Building 'Fields' object...")
    fields = get_fields()

    logger.info("Building & saving training data...")
    train_dataset_files = build_save_dataset('train', fields, opt)  # 返回生成的文件列表

    logger.info("Building & saving validation data...")
    build_save_dataset('valid', fields, opt)

    logger.info("Building & saving vocabulary...")
    build_save_vocab(train_dataset_files, fields, opt)  # only用train集创建vocabulary
コード例 #13
0
    def __init__(self, strategy=None, df=None):
        self.logger = init_logger()

        self.input_df = df
        self.strategy = strategy

        self.fillna_method = {
            "drop": self.fillna_with_drop,
            "zero": self.fillna_with_zero,
            "linear": self.fillna_with_linear,
        }
コード例 #14
0
    def __init__(self, bucket_name):
        """
        TODO:
            Add capability to process other formats (i.e. json, text, avro, parquet, etc.)
        :param bucket_name: AWS S3 bucket name
        """
        self.logger = init_logger()

        self.bucket_name = bucket_name

        self.s3 = boto3.resource('s3')
        self.s3_bucket = self.s3.Bucket(bucket_name)
コード例 #15
0
def draw_hist(s, h_type: str = "dist", name: str = None):
    h_method = {
        "dist": sns.distplot,
        "count": sns.countplot,
    }
    try:
        method = h_method[h_type]
    except KeyError:
        # TODO: handle exception
        init_logger().critical(
            "histogram type '{h_type}' is not supported".format(h_type=h_type))
        sys.exit()

    if isinstance(s, pd.Series):
        plt.title('{name} histogram'.format(name=s.name))
        method(s)
        plt.show()
    else:
        # for jupyter notebook
        plt.title('{name} histogram'.format(name=name))
        return list(map(lambda series: method(series), s))
コード例 #16
0
    def __init__(self, args):
        self.logger = init_logger()

        self.type_map = {
            "price": "public_price",
            "terrestrial_weather": "public_terrestrial_weather",
            "marine_weather": "public_marine_weather"
        }

        self.args = args
        self.types = None

        self.df_list = None
コード例 #17
0
    def __init__(self, args, device_id):
        """
        :param args: parser.parse_args()
        :param device_id: 0 or -1
        """
        self.args = args
        self.device_id = device_id
        self.model_flags = [
            'hidden_size', 'ff_size', 'heads', 'inter_layers', 'encoder',
            'ff_actv', 'use_interval', 'rnn_size'
        ]

        self.device = "cpu" if self.args.visible_gpus == '-1' else "cuda"
        logger.info('Device ID %d' % self.device_id)
        logger.info('Device %s' % self.device)
        torch.manual_seed(self.args.seed)
        random.seed(self.args.seed)

        if self.device_id >= 0:
            torch.cuda.set_device(self.device_id)

        init_logger(args.log_file)
コード例 #18
0
def main():
    opt = parse_args()  #get the opt augment
    if (opt.shuffle > 0):
        raise AssertionError("-shuffle is not implemented, please make sure \
                         you shuffle your data before pre-processing.")
    init_logger(opt.log_file)
    logger.info("Input args: %r", opt)
    logger.info("Extracting features...")

    logger.info("Building `Fields` object...")
    fields = get_fields(
    )  #get the dict ,it save the torchtext.data src ,target &indices without data
    # x_train, x_valid, x_test, y_train_emo, y_valid_emo, y_test_emo = data_loader.test_mosei_emotion_data()
    logger.info("Building & saving training data...")
    train_dataset_files = build_save_dataset(
        'train', fields, opt)  #shard the source retutn the shard file paths
    logger.info("Building & saving validation data...")
    build_save_dataset('valid', fields,
                       opt)  ##shard the source retutn the shard file paths

    logger.info("Building & saving vocabulary...")
    build_save_vocab(train_dataset_files, fields, opt)
コード例 #19
0
def main():
    opt = parse_args()

    if (opt.shuffle > 0):
        raise AssertionError("-shuffle is not implemented, please make sure \
                         you shuffle your data before pre-processing.")
    print(opt)
    # 全部日志写入file以及console
    init_logger(opt.log_file)
    logger.info("Extracting features...")

    logger.info("Building `Fields` object...")
    fields = get_fields()

    logger.info("Building & saving training data...")
    train_dataset_files = build_save_dataset('train', fields, opt)

    logger.info("Building & saving validation data...")
    build_save_dataset('valid', fields, opt)

    logger.info("Building & saving vocabulary...")

    build_save_vocab(train_dataset_files, fields, opt)
コード例 #20
0
    def multi_card_run(self):
        """ Spawns 1 process per GPU """
        init_logger()

        nb_gpu = self.args.world_size
        mp = torch.multiprocessing.get_context('spawn')

        # Create a thread to listen for errors in the child processes.
        error_queue = mp.SimpleQueue()
        error_handler = ErrorHandler(error_queue)

        # Train with multiprocessing.
        process = []
        for i in range(nb_gpu):
            self.device_id = i
            process.append(
                mp.Process(target=self.multi_card_train,
                           args=(self.args, self.device_id, error_queue),
                           daemon=True))
            process[i].start()
            logger.info(" Starting process pid: %d  " % process[i].pid)
            error_handler.add_child(process[i].pid)
        for p in process:
            p.join()
コード例 #21
0
    def __init__(self, base_url, bucket_name, key, head=False):
        self.logger = init_logger()

        self.bucket_name = bucket_name
        self.s3_manager = S3Manager(bucket_name=self.bucket_name)
        self.prefix = key

        self.chrome_path = "C:/chromedriver"
        options = webdriver.ChromeOptions()
        if head is False:
            options.add_argument('headless')

        self.driver = webdriver.Chrome(executable_path=self.chrome_path,
                                       chrome_options=options)

        self.base_url = base_url
コード例 #22
0
ファイル: main.py プロジェクト: chunjy92/drsc
def main(_):
  begin = time.time()

  tf.gfile.MakeDirs(FLAGS.model_dir)

  # redirects tf logs to file
  log_file = logging.init_logger(FLAGS.model_dir, FLAGS.do_debug)
  config.display_args(FLAGS)

  if FLAGS.model == "bert":
    run_bert_classifier(log_file)
  else:
    E = DRSCExperiment(FLAGS)
    E.run()

  tf.logging.info("Execution Time: {:.2f}s".format(time.time() - begin))
コード例 #23
0
    def __init__(self, bucket_name: str, x_train, y_train, params=None):
        # logger
        self.logger = init_logger()

        # s3
        self.s3_manager = S3Manager(bucket_name=bucket_name)

        if params is None:
            self.model = ElasticNet()
        else:
            self.model = ElasticNet(**params)

        self.x_train, self.y_train = x_train, y_train

        self.error = None
        self.metric = None
コード例 #24
0
    def __init__(self, key):
        self.key = key
        self.logger = init_logger()

        # s3
        self.bucket_name = "production-bobsim"
        self.s3_key = "public_data/{dir}/origin".format(dir=self.key)

        # valid check
        self.dtypes = dtype[key]
        self.columns = list(self.dtypes.keys())

        # rdb
        self.schema_name = "public_data"
        self.table_name = self.key

        # return
        self.df = None
コード例 #25
0
ファイル: core.py プロジェクト: meowpunch/bobsim-research
    def __init__(self, bucket_name: str, date: str):
        self.logger = init_logger()

        self.date = date

        # s3
        # TODO: bucket_name -> parameterized
        self.s3_manager = S3Manager(bucket_name=bucket_name)
        self.load_key = "public_data/open_data_raw_material_price/origin/csv/{filename}.csv".format(
            filename=self.date)
        self.save_key = "public_data/open_data_raw_material_price/process/csv/{filename}.csv".format(
            filename=self.date)

        self.dtypes = dtype["raw_material_price"]
        self.translate = translation["raw_material_price"]

        # load filtered df
        self.input_df = self.load()
コード例 #26
0
ファイル: core.py プロジェクト: meowpunch/bobsim-research
    def __init__(self, bucket_name: str, date: str):
        self.logger = init_logger()

        # TODO: how to handle datetime?
        self.term = datetime.strptime(date, "%Y%m")

        # s3
        self.bucket_name = bucket_name
        self.file_name = "2014-2020.csv"
        self.load_key = "public_data/open_data_marine_weather/origin/csv/{filename}".format(
            filename=self.file_name)
        self.save_key = "public_data/open_data_marine_weather/process/csv/{filename}.csv".format(
            filename=date)

        # type
        self.dtypes = dtype["marine_weather"]
        self.translate = translation["marine_weather"]

        # fillna

        self.columns_with_linear = [
            "m_wind_spd_avg", "m_atm_press_avg", "m_rel_hmd_avg",
            "m_temper_avg", "m_water_temper_avg", "m_max_wave_h_avg",
            "m_sign_wave_h_avg", "m_sign_wave_h_high", "m_max_wave_h_high"
        ]
        self.columns_with_zero = ['m_wave_p_avg', 'm_wave_p_high']
        """
        self.columns_with_linear = [
            "m_atm_press_avg", "m_rel_hmd_avg",
            "m_temper_avg", "m_water_temper_avg", "m_max_wave_h_avg",
            "m_max_wave_h_high"
        ]
        """
        # self.columns_with_zero = ['m_wave_p_avg']
        self.columns_with_drop = ['date']

        # load filtered df and take certain term
        df = self.load()
        # TODO: make function
        date_picker = (df['date'].dt.year == self.term.year) & (
            df['date'].dt.month == self.term.month)
        self.input_df = df[date_picker]
コード例 #27
0
ファイル: translate.py プロジェクト: ag027592/MMESGN
    #   tgt_iter = None
    translator.translate(src_data_iter=src_iter,
                         tgt_data_iter=tgt_iter,
                         batch_size=opt.batch_size,
                         out_file=out_file)
    out_file.close()


if __name__ == "__main__":
    parser = configargparse.ArgumentParser(
        description='translate.py',
        config_file_parser_class=configargparse.YAMLConfigFileParser,
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter)
    opts.config_opts(parser)
    opts.translate_opts(parser)

    opt = parser.parse_args()
    logger = init_logger(opt.log_file)
    logger.info("Input args: %r", opt)
    path = 'rein_model/rein_model_step'
    for i in range(0, 25000, 10):
        current_path = path + '_' + str(i) + '.pt'
        if os.path.exists(current_path):

            model_path = current_path
            opt.output = 'rein_data/rein.tran' + '_' + str(i)
            main(opt, model_path)
        else:

            continue
コード例 #28
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# import argparse
from utils.args import print_arguments
from utils.logging import init_logger
from utils.check import check_gpu

from networks.graphsum.run_graphsum import main as run_graphsum
from run_args import parser as run_parser


if __name__ == "__main__":
    args = run_parser.parse_args()
    print_arguments(args)
    init_logger(args.log_file)
    check_gpu(args.use_cuda)

    if args.model_name == 'graphsum':
        run_graphsum(args)
    else:
        raise ValueError("Model %s is not supported currently!" % args.model_name)
コード例 #29
0
ファイル: app.py プロジェクト: meowpunch/bobsim-research
def main():
    logger = init_logger()

    app = Flask(__name__)
    app.config['JSON_AS_ASCII'] = False
    app.config['JSON_SORT_KEYS'] = False

    # TODO: classify crawl recipe API service
    @app.route('/', methods=['GET'])
    def index():
        return "<h2>Crawling Service</h2>\
                <h3>Recipe</h3>\
                <strong>Mangae</strong><br>\
                [GET] : /crawl_recipe/M?str_num=6932924&end_num=6932926<br>\
                [GET] : /recipe/M<br><br>\
                <strong>Haemuk</strong><br>\
                [GET] : /crawl_recipe/H?str_num=5004&end_num=5005<br>\
                [GET] : /recipe/H<br><br>\
                <h3>Item</h3>\
                <strong>Emart</strong><br>\
                [GET] : /crawl_item/E<br>\
                [GET] : /item/E<br><br>\
                <strong>Haemuk</strong><br>\
                [GET] : /crawl_item/H<br>\
                [GET] : /item/H<br><br>"

    @app.route('/crawl_recipe/<source>', methods=['GET'])
    def crawl_recipe(source):
        """
        :return: jsonified recipe
        """
        args = request.args
        try:
            str_num, end_num = args["str_num"], args["end_num"]
        except KeyError:
            logger.warning("There is no parameter, 'str_num' or 'end_num'")
            str_num, end_num = 6934386, 6934390

        logger.info("let's crawl {str} ~ {end} {source} recipes".format(
            str=str_num, end=end_num, source=source))

        bucket_name = "production-bobsim"
        key = "crawled_recipe/{s}".format(s=source)
        candidate_num = range(int(str_num), int(end_num))
        field = ['title', 'items', "duration", "tags"]

        if source == "M":
            result = MangaeRecipeCrawler(
                base_url="https://www.10000recipe.com/recipe",
                candidate_num=candidate_num,
                field=field,
                bucket_name=bucket_name,
                key=key).process()
        elif source == "H":
            result = HaemukRecipeCrawler(
                base_url="https://www.haemukja.com/recipes",
                candidate_num=candidate_num,
                field=field,
                bucket_name=bucket_name,
                key=key).process()
        else:
            raise NotImplementedError

        return jsonify(result)

    @app.route('/crawl_item/<source>', methods=['GET'])
    def crawl_item(source):
        """
        :return: jsonified recipe
        """
        logger.info("let's crawl item categories {source} recipes".format(
            source=source))

        bucket_name = "production-bobsim"
        key = "crawled_item/{s}".format(s=source)
        head = True

        if source == "H":
            result = HaemukItemCrawler(
                base_url="https://www.haemukja.com/refrigerator",
                bucket_name=bucket_name,
                key=key,
                head=head).process()
        elif source == "E":
            result = HaemukItemCrawler(base_url="http://emart.ssg.com/",
                                       bucket_name=bucket_name,
                                       key=key,
                                       head=head).process()
        else:
            raise NotImplementedError

        return jsonify(result)

    @app.route('/<prefix>/<source>', methods=['GET'])
    def get_recipes(prefix, source):
        data = S3Manager("production-bobsim").fetch_dict_from_json(
            key="crawled_{p}/{s}".format(p=prefix, s=source))
        if data is None:
            return 'there is no data'
        return jsonify(data)

    app.run(host='0.0.0.0', port=9000, debug=True)
コード例 #30
0
"""
Adopted from AllenNLP:
    https://github.com/allenai/allennlp/tree/v0.6.1/allennlp/common

Functions and exceptions for checking that
AllenNLP and its models are configured correctly.
"""

from torch import cuda

from utils import logging

logger = logging.init_logger()  # pylint: disable=invalid-name


class ConfigurationError(Exception):
    """
    The exception raised by any AllenNLP object when it's misconfigured
    (e.g. missing properties, invalid properties, unknown properties).
    """

    def __init__(self, message):
        super(ConfigurationError, self).__init__()
        self.message = message

    def __str__(self):
        return repr(self.message)


def log_pytorch_version_info():
    import torch