def pipeline(self): """ Main method """ #start a timer that keeps track of total time needed start_time = time.time() # load all the necessary parameters from configuration file config = Configuration() #start the operation logger = configure_logger('default') logger.info("Operation started") # load the urls from the csv file and take full-size screenshots run_screenshot = ScreenshotTaker(config.url_file_path, config.url_file_name, config.hashed_url_file_path, config.hashed_url_file_name, logger) list_urls_hashed = run_screenshot.link_processor() # run the screenshotModule inside the event loop manager asyncio.get_event_loop().run_until_complete( run_screenshot.screenshot_module( list_urls_hashed[:config.batch_size], config.screenshots_path)) # resize and filter the screenshots preprocessor = PreProcessor(config.screenshots_path, config.path_to_processed, config.width, config.height, logger) preprocessor.resize_pictures(config.screenshots_path, config.path_to_processed, config.width, config.height) preprocessor.delete_white_pictures(config.path_to_processed) # delete the full size screenshots # preprocessor.clear_screenshots() # predict the resized images and label them accordingly predictor = Predictor( config.path_to_processed, config.path_of_submission, config.path_of_the_model, config.model_name, config.width, config.height, config.positive_threshold, config.hashed_url_file_path, config.hashed_url_file_name, ) predictor.predict( config.path_to_processed, config.path_of_submission, config.path_of_the_model, config.model_name, config.positive_threshold, ) # delete the processed screenshots # predictor.clear_processed_screenshots() end_time = time.time() logger.info("Successfully Completed") logger.info("Total time needed: " + str(end_time - start_time) + " seconds")
def calculate_metrics(self, path): """ :param path: :return: """ predictor = Predictor() cnt = 0 result = [[], [], {}] for a in range(0, self.judger.task1_cnt): result[0].append({"TP": 0, "FP": 0, "TN": 0, "FN": 0}) for a in range(0, self.judger.task2_cnt): result[1].append({"TP": 0, "FP": 0, "TN": 0, "FN": 0}) result[2] = {"cnt": 0, "score": 0} with open(path, encoding="UTF-8") as f: for line in f.readlines(): line = json.loads(line) ground_truth = line["meta"] fact = line["fact"] ans = predictor.predict(fact) cnt += 1 result = self.judger.gen_new_result(result, ground_truth, ans[0]) scores = self.judger.get_score(result) # print(result) print(scores)
def handle(self, *args, **kwargs): currency = kwargs.get('currency') or 'USD' predictor = Predictor() max_date, last_rate, prediction = predictor.predict(currency) logger.info( "Last date stored: %s, rate was %s - predicted value: %s" % ( max_date.isoformat(), last_rate, prediction[0]) )
def __init__(self): self.database = Database() logging.info("\nReloading phase DB") self.database.load_database() self.data_collector = DataCollector(WORKERS) self.characterizer = Characterizer(self.database) self.predictor = Predictor(self.database, ALGO) self.metrics_publisher = MetricsPublisher() self.curr_phase = "" # for graceful exit signal(SIGINT, self.sawcap_exit) signal(SIGTERM, self.sawcap_exit)
def main(archive_file: str, save_directory: str) -> None: """Saves the model and tokenizer from an AllenNLP `archive_file` path pointing to a trained DeCLUTR model to a format that can be used with HuggingFace Transformers at `save_directory`.""" save_directory = Path(save_directory) save_directory.parents[0].mkdir(parents=True, exist_ok=True) common_util.import_module_and_submodules("declutr") # cuda_device -1 places the model onto the CPU before saving. This avoids issues with # distributed models. overrides = "{\"trainer.cuda_device\": -1, \"model.text_field_embedder.token_embedders.tokens.load_directory\": \"None\"}" archive = load_archive(archive_file, overrides=overrides) predictor = Predictor.from_archive(archive, predictor_name="declutr") token_embedder = predictor._model._text_field_embedder._token_embedders["tokens"] model = token_embedder.transformer_model tokenizer = token_embedder.tokenizer # Casting as a string to avoid this error: https://github.com/huggingface/transformers/pull/4650 # Can be removed after PR is merged and Transformers is updated. model.save_pretrained(str(save_directory)) tokenizer.save_pretrained(str(save_directory)) typer.secho( ( f"{SAVING} {HUGGING_FACE} Transformers compatible model saved to: {save_directory}." " See https://huggingface.co/transformers/model_sharing.html for instructions on" f" hosting the model with {HUGGING_FACE} Transformers." ), bold=True, )
class Sawcap: def __init__(self): self.database = Database() logging.info("\nReloading phase DB") self.database.load_database() self.data_collector = DataCollector(WORKERS) self.characterizer = Characterizer(self.database) self.predictor = Predictor(self.database, ALGO) self.metrics_publisher = MetricsPublisher() self.curr_phase = "" # for graceful exit signal(SIGINT, self.sawcap_exit) signal(SIGTERM, self.sawcap_exit) def run(self): while True: self._get_new_snapshot() # prev1 (prev 2 is last prev1) sleep(INTERVAL) self._get_new_snapshot() # curr # Check which phase we are in currently self.curr_phase = self.characterizer.get_current_phase() # Based on the current phase make a prediction predicted, phase_exists = self.predictor.get_prediction( self.curr_phase) # Log data for error calculation and print predictions if ENABLE_STATS: actual = self.database.get_curr_resource() stats["predicted_data"].append(predicted) stats["actual_data"].append(actual) acc_cpu, acc_mem = self.calculate_errors() self.metrics_publisher.publish_predictions(actual, predicted) self.metrics_publisher.publish_accuracy(acc_cpu, acc_mem) logging.info("Actual: " + str([ "{:.2f}".format(a) for a in self.database.get_curr_resource() ]) + " Predicted:" + str(["{:.2f}".format(a) for a in predicted])) if ANOMALY_DETECTION_ENABLED: anomaly_detected = self.predictor.detect_anomaly( predicted, self.database.get_curr_resource(), self.curr_phase, phase_exists) if anomaly_detected: self.sawcap_exit() # Add profile to phase database self.characterizer.update_phase_database(self.curr_phase) # Update ML model for current phase, if possible self.predictor.update_ml_model(self.curr_phase) @publish_latency("data_collection_latency") def _get_new_snapshot(self): stacktrace_functions, resource_data = self.data_collector.get_data_from_workers( ) snapshot = Snapshot(resource_data, stacktrace_functions) logging.debug("Added new snapshot to database") self.database.add_new_snapshot(snapshot) # Exit after catching a Keyboard Interrupt def sawcap_exit(self, signal_received=None, frame=None): self.export_stats() logging.info('\nExiting after saving the current database') self.database.save_database() sys.exit(2) def export_stats(self): acc_cpu, acc_mem = self.calculate_errors() file_path = DATA_DIR + STATS_FILE f = open(file_path, "a") f.write("\n### Accuracy Rates ###\n") f.write(f'CPU Prediction Accuracy: {acc_cpu:.3f}\n') f.write(f'MEM Prediction Accuracy: {acc_mem:.3f}\n') f.close() # Temp change to dump all stats logging.info(stats) def calculate_errors(self): logging.info("\n### Accuracy Rates ###") actual_resources = stats["actual_data"] predicted_resources = stats["predicted_data"] # CPU resource usage accuracy actual_resources_cpu = [resource[0] for resource in actual_resources] predicted_resources_cpu = [ resource[0] for resource in predicted_resources ] acc_cpu = 100 - SMAPE(actual_resources_cpu, predicted_resources_cpu) logging.info('CPU Prediction Accuracy: %.3f %%' % (acc_cpu)) # Memory usage accuracy actual_resources_mem = [resource[1] for resource in actual_resources] predicted_resources_mem = [ resource[1] for resource in predicted_resources ] acc_mem = 100 - SMAPE(actual_resources_mem, predicted_resources_mem) logging.info('MEM Prediction Accuracy: %.3f %%' % (acc_mem)) return acc_cpu, acc_mem
def main(): audio_file = './../data/audio/test_samples/30.wav' model_file = './../data/model/svm_model.pkl' predictor = Predictor(model_file) print predictor.predict(audio_file)
def allennlp( path_to_senteval: str, path_to_allennlp_archive: str, output_filepath: str = None, weights_file: str = None, cuda_device: int = -1, output_dict_field: str = "embeddings", predictor_name: str = None, include_package: List[str] = None, prototyping_config: bool = False, verbose: bool = False, ) -> None: """Evaluates a trained AllenNLP model against the SentEval benchmark.""" # from allennlp.models.archival import load_archive from archival.archival import load_archive from predictor.predictor import Predictor # from allennlp.predictors import Predictor # SentEval prepare and batcher def prepare(params, samples): return @torch.no_grad() def batcher(params, batch): batch = _cleanup_batch(batch) # Re-tokenize the input text using the tokenizer of the dataset reader inputs = [{"text": " ".join(tokens)} for tokens in batch] outputs = params.predictor.predict_batch_json(inputs) # AllenNLP models return a dictionary, so access the embeddings with the given key. embeddings = [output[output_dict_field] for output in outputs] embeddings = np.vstack(embeddings) return embeddings # Allows us to import custom dataset readers and models that may exist in the AllenNLP archive. # See: https://tinyurl.com/whkmoqh include_package = include_package or [] print("include package ", include_package) for package_name in include_package: print("package name is ", package_name) common_util.import_module_and_submodules(package_name) # Load the archived Model archive = load_archive( path_to_allennlp_archive, cuda_device=cuda_device, weights_file=weights_file, overrides="{'trainer.use_amp': true}", ) predictor = Predictor.from_archive(archive, predictor_name) typer.secho( f'{SUCCESS} Model from AllenNLP archive "{path_to_allennlp_archive}" loaded successfully.', fg=typer.colors.GREEN, bold=True, ) # Performs a few setup steps and returns the SentEval params params_senteval = _setup_senteval(path_to_senteval, prototyping_config, verbose) params_senteval["predictor"] = predictor print("params_senteval", params_senteval) _run_senteval(params_senteval, path_to_senteval, batcher, prepare, output_filepath) return
from collector.kospi_db_manager import KospiDBManager from collector.collector import DailyCollector # from collector.collector import HourlyCollector from collector.timeutill_helper import TimeUtillHelper from predictor.predictor import Predictor start_time = TimeUtillHelper(2009, 5, 1) end_time = TimeUtillHelper(2019, 6, 20) daily_collector = DailyCollector("035420", start_time, end_time) daily_collector.read_stock_data() daily_collector.update_stock_database() daily_collector.update_labelled_database() # start_time = TimeUtillHelper(2019, 7, 29, 9, 10, 00) # end_time = TimeUtillHelper(2019, 8, 2, 15, 30, 00) # hourly_collector = HourlyCollector("035420", start_time, end_time) # hourly_collector.read_stock_data() # hourly_collector.update_stock_database() # hourly_collector.update_labelled_database() predictor = Predictor() predictor.check_predictor()