def main(_): if FLAGS.config == "example_params.py": logging.info("Training on default config.") logging.info( "Use train.py --config=your_config.py to train different models") with open(tf.app.flags.FLAGS.config, 'r') as f: logging.info("f: %s", f) hypes = json.load(f) train_dir = utils.get_train_dir() initialize_training_folder(hypes, train_dir) maybe_download_and_extract(hypes, train_dir) run_training(hypes, train_dir)
def main(_): train_dir = utils.get_train_dir() evaluate(train_dir)
"DP": { 'function': single_label, 'x': x_dual_problem, 'y': y_dual_problem }, } train_problems_on_data(pipeline, data, train_in_parallel) if __name__ == '__main__': logger = root_logger('train_sk_pipeline', logging.INFO) load_dotenv(find_dotenv()) scenario_dir = Path( get_train_dir() / f'sklearn_{datetime.datetime.now().strftime("%Y_%m_%d_%H_%M")}') scenario_dir.mkdir(parents=True, exist_ok=True) add_file_handler_to_logger(logger, scenario_dir, 'train_sk_pipeline') try: db = MongoDB() # credentials for MongoDB can be set up here n_cores = cpu_count( ) # number of processors that shall be used for loading data from MongoDB max_samples = 10_000 # max number of samples per license, used for single and multi label problem, value min_samples = 1_000 # min number of samples per license, decides if the license will be taken to training, internally limited to 10 collection_analysis = CollectionAnalysis.load_object( get_db_analysis_dir() / 'Conclusion.pkl') training_licenses = collection_analysis.get_mapped_licenses_with_min_samples(
docs._cursor.close() # end functions for tagging the benchmark set if __name__ == '__main__': logger = root_logger('update_data', logging.INFO) load_dotenv(find_dotenv()) preprocessor = PreprocessorSpacy() try: db = MongoDB() # credentials for MongoDB can be set up here n_cores = cpu_count( ) # number of processors that shall be used can be set up here license_mapping = load_license_mapping_file(get_train_dir() / LICENSE_MAPPING_FILENAME) update(update_document, n_cores=n_cores, batch_size=10_000, collection=Documents.Conclusion) collection_analysis = analyze_in_parallel( db, n_cores=n_cores, batch_size=10_000, collection=Documents.Conclusion) collection_analysis.save_statistics(db) collection_analysis.save_object()