Example #1
0
def main(_):
    if FLAGS.config == "example_params.py":
        logging.info("Training on default config.")
        logging.info(
            "Use train.py --config=your_config.py to train different models")

    with open(tf.app.flags.FLAGS.config, 'r') as f:
        logging.info("f: %s", f)
        hypes = json.load(f)

    train_dir = utils.get_train_dir()
    initialize_training_folder(hypes, train_dir)
    maybe_download_and_extract(hypes, train_dir)
    run_training(hypes, train_dir)
Example #2
0
def main(_):
    if FLAGS.config == "example_params.py":
        logging.info("Training on default config.")
        logging.info(
            "Use train.py --config=your_config.py to train different models")

    with open(tf.app.flags.FLAGS.config, 'r') as f:
        logging.info("f: %s", f)
        hypes = json.load(f)

    train_dir = utils.get_train_dir()
    initialize_training_folder(hypes, train_dir)
    maybe_download_and_extract(hypes, train_dir)
    run_training(hypes, train_dir)
Example #3
0
def main(_):

  train_dir = utils.get_train_dir()
  evaluate(train_dir)
        "DP": {
            'function': single_label,
            'x': x_dual_problem,
            'y': y_dual_problem
        },
    }

    train_problems_on_data(pipeline, data, train_in_parallel)


if __name__ == '__main__':
    logger = root_logger('train_sk_pipeline', logging.INFO)
    load_dotenv(find_dotenv())

    scenario_dir = Path(
        get_train_dir() /
        f'sklearn_{datetime.datetime.now().strftime("%Y_%m_%d_%H_%M")}')
    scenario_dir.mkdir(parents=True, exist_ok=True)
    add_file_handler_to_logger(logger, scenario_dir, 'train_sk_pipeline')

    try:
        db = MongoDB()  # credentials for MongoDB can be set up here
        n_cores = cpu_count(
        )  # number of processors that shall be used for loading data from MongoDB
        max_samples = 10_000  # max number of samples per license, used for single and multi label problem, value
        min_samples = 1_000  # min number of samples per license, decides if the license will be taken to training, internally limited to 10

        collection_analysis = CollectionAnalysis.load_object(
            get_db_analysis_dir() / 'Conclusion.pkl')

        training_licenses = collection_analysis.get_mapped_licenses_with_min_samples(
            docs._cursor.close()


# end functions for tagging the benchmark set

if __name__ == '__main__':
    logger = root_logger('update_data', logging.INFO)
    load_dotenv(find_dotenv())
    preprocessor = PreprocessorSpacy()

    try:
        db = MongoDB()  # credentials for MongoDB can be set up here
        n_cores = cpu_count(
        )  # number of processors that shall be used can be set up here

        license_mapping = load_license_mapping_file(get_train_dir() /
                                                    LICENSE_MAPPING_FILENAME)
        update(update_document,
               n_cores=n_cores,
               batch_size=10_000,
               collection=Documents.Conclusion)

        collection_analysis = analyze_in_parallel(
            db,
            n_cores=n_cores,
            batch_size=10_000,
            collection=Documents.Conclusion)

        collection_analysis.save_statistics(db)
        collection_analysis.save_object()
Example #6
0
def main(_):

    train_dir = utils.get_train_dir()
    evaluate(train_dir)