def consume_data(self, data, passback, output_dir): open_threads = [] _, max_processes, _ = config_parser_singleton.read_execution_options() max_processes = multiprocessing.cpu_count() if max_processes == -1 else max_processes with multiprocessing.Pool(max_processes) as pool: out_dir = output_dir + path.sep + "ann_models" if not path.exists(out_dir): os.mkdir(out_dir) for ticker, training_data in data.items(): open_threads.append(pool.apply_async(handle_data, [ticker, training_data, out_dir, self._overwite_existing], {'trend_lookahead': self._trend_lookahead, 'combined_examples': self._combined_examples_factor})) for t in tqdm.tqdm(open_threads): t.get()
def consume_data(self, data, passback, output_dir): out_dir = output_dir + path.sep + 'svm_strength_models' if not path.exists(out_dir): os.mkdir(out_dir) exec_options = config_parser_singleton.read_execution_options() max_processes = exec_options[1] max_processes = multiprocessing.cpu_count( ) if max_processes == -1 else max_processes with multiprocessing.Pool(max_processes) as pool: open_jobs = [] for ticker, training_data in data.items(): job = pool.apply_async( handle_data, [ticker, training_data, out_dir, self._overwrite_existing], {'combined_examples': self._combined_examples_factor}) open_jobs.append(job) for job in tqdm.tqdm(open_jobs): job.get()
def consume_data(self, data, passback, output_dir): out_dir = output_dir + path.sep + 'random_forest_models' if not path.exists(out_dir): os.mkdir(out_dir) _, max_processes, _ = config_parser_singleton.read_execution_options() max_processes = multiprocessing.cpu_count( ) if max_processes == -1 else max_processes with multiprocessing.Pool(max_processes) as pool: tasks = [] for ticker, training_data in data.items(): tasks.append( pool.apply_async( handle_model_creation, [ ticker, training_data, out_dir, self._overwrite_existing ], {"combined_examples": self._periods_per_example})) for task in tqdm.tqdm(tasks): task.get()
def predict_data(self, data, passback, in_model_dir): out_dir = in_model_dir + path.sep + 'similarity_analysis' if not path.exists(out_dir): os.mkdir(out_dir) exec_options = config_parser_singleton.read_execution_options() max_processes = exec_options[1] max_processes = multiprocessing.cpu_count() if max_processes == -1 else max_processes with multiprocessing.Pool(max_processes) as pool: open_jobs = [] for ticker, prediction_data in data.items(): open_jobs.append(pool.apply_async( predict_data, [ticker, out_dir, prediction_data], {'combined_examples': self._combined_examples_factor, 'num_similar_regions': self._num_similar_regions} )) for job in open_jobs: job.get()
def predict_data(self, data, passback, in_model_dir): model_dir = in_model_dir + path.sep + 'ann_models' if not path.exists(model_dir): raise FileNotFoundError("Model storage directory for ANN prediction does not exist. Please run model " "creation without the prediction flag set to true to create models used in " "prediction.") predictions = {} _, max_processes, _ = config_parser_singleton.read_execution_options() max_processes = multiprocessing.cpu_count() if max_processes == -1 else max_processes with multiprocessing.Pool(max_processes) as pool: working_threads = [] for ticker, prediction_data in data.items(): working_threads.append(pool.apply_async(predict_data, [ticker, model_dir, prediction_data], {'trend_lookahead': self._trend_lookahead, 'combined_examples': self._combined_examples_factor})) for worker in tqdm.tqdm(working_threads): result = worker.get() if result is not None: ticker, actual_prediction, accuracy = result predictions[ticker] = (actual_prediction, accuracy) return predictions
def pass_data(self, output_dir, stop_for_errors=False, print_errors=True): provider = None consumer = None columns = None predict, _, export_data = read_execution_options() ret_predictions = {} for provKey, provider in self.providers.items(): try: if provKey not in self.consumers.keys(): continue registeredConsumers = self.consumers[provKey] for consumer_set in registeredConsumers: consumer = None args = None passback = None keyword_args = {} if len(consumer_set) == 3: consumer, args, passback = consumer_set elif len(consumer_set) == 2: consumer, args = consumer_set elif len(consumer_set) == 1: consumer = consumer_set[0] elif len(consumer_set) == 4: consumer, args, passback, keyword_args = consumer_set else: raise ValueError( "Invalid number of consumer registration arguments" ) if not predict: consumer.consume_data( provider.generate_data(*args, **keyword_args), passback, output_dir) else: predictions = consumer.predict_data( provider.generate_prediction_data( *args, **keyword_args), passback, output_dir) consumer_passback_id = str( type(consumer)) + str(passback) if consumer_passback_id in self._prediction_string_serializers and not export_data: predictions = self._prediction_string_serializers[ consumer_passback_id](predictions) elif consumer_passback_id in self._data_exportation_functions and export_data: self._data_exportation_functions[ consumer_passback_id](predictions, output_dir) ret_predictions[consumer_passback_id] = predictions except Exception: if print_errors: traceback.print_exc() logger.logger.log( logger.NON_FATAL_ERROR, "Above error was encountered during processing " "of the following provider/consumer pair") logger.logger.log( logger.NON_FATAL_ERROR, "\t%s %s" % (type(provider), type(consumer))) logger.logger.log( logger.NON_FATAL_ERROR, "With the following columns as a data argument") logger.logger.log(logger.NON_FATAL_ERROR, "\t%s" % str(columns)) if stop_for_errors: return return ret_predictions
from data_providing_module import configurable_registry from data_providing_module.data_provider_registry import registry from general_utils.config.config_parser_singleton import parser, update_config, read_execution_options from general_utils.exportation import csv_amalgamation from general_utils.logging import logger # os.environ["KERAS_BACKEND"] = "plaidml.keras.backend" if __name__ == "__main__": import sys args = sys.argv[1:] providers = os.listdir("data_providing_module/data_providers") for provider in providers: if provider.startswith('__'): continue importlib.import_module('data_providing_module.data_providers.' + provider.replace('.py', '')) consumers = os.listdir("training_managers") for consumer in consumers: if consumer.startswith('__'): continue importlib.import_module("training_managers." + consumer.replace('.py', '')) configurable_registry.config_registry.handle_configurables(parser) predict, max_processes, export_predictions = read_execution_options() update_config() ret_predictions = registry.pass_data(args[0], stop_for_errors=False) if predict and not export_predictions: for passback, predictions in ret_predictions.items(): logger.logger.log(logger.OUTPUT, predictions)