コード例 #1
0
    def pipeline(self):
        """  Main method """
        #start a timer that keeps track of total time needed
        start_time = time.time()
        # load all the necessary parameters from configuration file
        config = Configuration()
        #start the operation
        logger = configure_logger('default')
        logger.info("Operation started")
        # load the urls from the csv file and take full-size screenshots
        run_screenshot = ScreenshotTaker(config.url_file_path,
                                         config.url_file_name,
                                         config.hashed_url_file_path,
                                         config.hashed_url_file_name, logger)
        list_urls_hashed = run_screenshot.link_processor()

        # run the screenshotModule inside the event loop manager
        asyncio.get_event_loop().run_until_complete(
            run_screenshot.screenshot_module(
                list_urls_hashed[:config.batch_size], config.screenshots_path))

        # resize and filter the screenshots
        preprocessor = PreProcessor(config.screenshots_path,
                                    config.path_to_processed, config.width,
                                    config.height, logger)
        preprocessor.resize_pictures(config.screenshots_path,
                                     config.path_to_processed, config.width,
                                     config.height)
        preprocessor.delete_white_pictures(config.path_to_processed)

        # delete the full size screenshots
        # preprocessor.clear_screenshots()

        # predict the resized images and label them accordingly
        predictor = Predictor(
            config.path_to_processed,
            config.path_of_submission,
            config.path_of_the_model,
            config.model_name,
            config.width,
            config.height,
            config.positive_threshold,
            config.hashed_url_file_path,
            config.hashed_url_file_name,
        )
        predictor.predict(
            config.path_to_processed,
            config.path_of_submission,
            config.path_of_the_model,
            config.model_name,
            config.positive_threshold,
        )

        # delete the processed screenshots
        # predictor.clear_processed_screenshots()

        end_time = time.time()
        logger.info("Successfully Completed")
        logger.info("Total time needed: " + str(end_time - start_time) +
                    " seconds")
コード例 #2
0
    def calculate_metrics(self, path):
        """
        :param path:
        :return:
        """
        predictor = Predictor()
        cnt = 0
        result = [[], [], {}]
        for a in range(0, self.judger.task1_cnt):
            result[0].append({"TP": 0, "FP": 0, "TN": 0, "FN": 0})
        for a in range(0, self.judger.task2_cnt):
            result[1].append({"TP": 0, "FP": 0, "TN": 0, "FN": 0})
        result[2] = {"cnt": 0, "score": 0}

        with open(path, encoding="UTF-8") as f:
            for line in f.readlines():
                line = json.loads(line)
                ground_truth = line["meta"]
                fact = line["fact"]
                ans = predictor.predict(fact)
                cnt += 1
                result = self.judger.gen_new_result(result, ground_truth,
                                                    ans[0])
                scores = self.judger.get_score(result)
        # print(result)
        print(scores)
コード例 #3
0
 def handle(self, *args, **kwargs):
     currency = kwargs.get('currency') or 'USD'
     predictor = Predictor()
     max_date, last_rate, prediction = predictor.predict(currency)
     logger.info(
         "Last date stored: %s, rate was %s - predicted value: %s" % (
             max_date.isoformat(),
             last_rate,
             prediction[0])
     )
コード例 #4
0
    def __init__(self):
        self.database = Database()
        logging.info("\nReloading phase DB")
        self.database.load_database()

        self.data_collector = DataCollector(WORKERS)
        self.characterizer = Characterizer(self.database)
        self.predictor = Predictor(self.database, ALGO)
        self.metrics_publisher = MetricsPublisher()
        self.curr_phase = ""

        # for graceful exit
        signal(SIGINT, self.sawcap_exit)
        signal(SIGTERM, self.sawcap_exit)
コード例 #5
0
def main(archive_file: str, save_directory: str) -> None:
    """Saves the model and tokenizer from an AllenNLP `archive_file` path pointing to a trained
    DeCLUTR model to a format that can be used with HuggingFace Transformers at `save_directory`."""
    save_directory = Path(save_directory)
    save_directory.parents[0].mkdir(parents=True, exist_ok=True)

    common_util.import_module_and_submodules("declutr")
    # cuda_device -1 places the model onto the CPU before saving. This avoids issues with
    # distributed models.
    overrides = "{\"trainer.cuda_device\": -1, \"model.text_field_embedder.token_embedders.tokens.load_directory\": \"None\"}"
    archive = load_archive(archive_file, overrides=overrides)
    predictor = Predictor.from_archive(archive, predictor_name="declutr")

    token_embedder = predictor._model._text_field_embedder._token_embedders["tokens"]
    model = token_embedder.transformer_model
    tokenizer = token_embedder.tokenizer

    # Casting as a string to avoid this error: https://github.com/huggingface/transformers/pull/4650
    # Can be removed after PR is merged and Transformers is updated.
    model.save_pretrained(str(save_directory))
    tokenizer.save_pretrained(str(save_directory))

    typer.secho(
        (
            f"{SAVING} {HUGGING_FACE} Transformers compatible model saved to: {save_directory}."
            " See https://huggingface.co/transformers/model_sharing.html for instructions on"
            f" hosting the model with {HUGGING_FACE} Transformers."
        ),
        bold=True,
    )
コード例 #6
0
class Sawcap:
    def __init__(self):
        self.database = Database()
        logging.info("\nReloading phase DB")
        self.database.load_database()

        self.data_collector = DataCollector(WORKERS)
        self.characterizer = Characterizer(self.database)
        self.predictor = Predictor(self.database, ALGO)
        self.metrics_publisher = MetricsPublisher()
        self.curr_phase = ""

        # for graceful exit
        signal(SIGINT, self.sawcap_exit)
        signal(SIGTERM, self.sawcap_exit)

    def run(self):
        while True:
            self._get_new_snapshot()  # prev1 (prev 2 is last prev1)
            sleep(INTERVAL)
            self._get_new_snapshot()  # curr

            # Check which phase we are in currently
            self.curr_phase = self.characterizer.get_current_phase()

            # Based on the current phase make a prediction
            predicted, phase_exists = self.predictor.get_prediction(
                self.curr_phase)

            # Log data for error calculation and print predictions
            if ENABLE_STATS:
                actual = self.database.get_curr_resource()
                stats["predicted_data"].append(predicted)
                stats["actual_data"].append(actual)
                acc_cpu, acc_mem = self.calculate_errors()

                self.metrics_publisher.publish_predictions(actual, predicted)
                self.metrics_publisher.publish_accuracy(acc_cpu, acc_mem)

            logging.info("Actual: " + str([
                "{:.2f}".format(a) for a in self.database.get_curr_resource()
            ]) + " Predicted:" + str(["{:.2f}".format(a) for a in predicted]))

            if ANOMALY_DETECTION_ENABLED:
                anomaly_detected = self.predictor.detect_anomaly(
                    predicted, self.database.get_curr_resource(),
                    self.curr_phase, phase_exists)
                if anomaly_detected:
                    self.sawcap_exit()

            # Add profile to phase database
            self.characterizer.update_phase_database(self.curr_phase)

            # Update ML model for current phase, if possible
            self.predictor.update_ml_model(self.curr_phase)

    @publish_latency("data_collection_latency")
    def _get_new_snapshot(self):
        stacktrace_functions, resource_data = self.data_collector.get_data_from_workers(
        )
        snapshot = Snapshot(resource_data, stacktrace_functions)
        logging.debug("Added new snapshot to database")
        self.database.add_new_snapshot(snapshot)

    # Exit after catching a Keyboard Interrupt
    def sawcap_exit(self, signal_received=None, frame=None):
        self.export_stats()
        logging.info('\nExiting after saving the current database')
        self.database.save_database()
        sys.exit(2)

    def export_stats(self):
        acc_cpu, acc_mem = self.calculate_errors()
        file_path = DATA_DIR + STATS_FILE

        f = open(file_path, "a")
        f.write("\n### Accuracy Rates ###\n")
        f.write(f'CPU Prediction Accuracy: {acc_cpu:.3f}\n')
        f.write(f'MEM Prediction Accuracy: {acc_mem:.3f}\n')
        f.close()

        # Temp change to dump all stats
        logging.info(stats)

    def calculate_errors(self):
        logging.info("\n### Accuracy Rates ###")

        actual_resources = stats["actual_data"]
        predicted_resources = stats["predicted_data"]

        # CPU resource usage accuracy
        actual_resources_cpu = [resource[0] for resource in actual_resources]
        predicted_resources_cpu = [
            resource[0] for resource in predicted_resources
        ]
        acc_cpu = 100 - SMAPE(actual_resources_cpu, predicted_resources_cpu)
        logging.info('CPU Prediction Accuracy: %.3f %%' % (acc_cpu))

        # Memory usage accuracy
        actual_resources_mem = [resource[1] for resource in actual_resources]
        predicted_resources_mem = [
            resource[1] for resource in predicted_resources
        ]
        acc_mem = 100 - SMAPE(actual_resources_mem, predicted_resources_mem)
        logging.info('MEM Prediction Accuracy: %.3f %%' % (acc_mem))

        return acc_cpu, acc_mem
コード例 #7
0
def main():
    audio_file = './../data/audio/test_samples/30.wav'
    model_file = './../data/model/svm_model.pkl'

    predictor = Predictor(model_file)
    print predictor.predict(audio_file)
コード例 #8
0
ファイル: run_senteval.py プロジェクト: vano1205/EfficientCL
def allennlp(
    path_to_senteval: str,
    path_to_allennlp_archive: str,
    output_filepath: str = None,
    weights_file: str = None,
    cuda_device: int = -1,
    output_dict_field: str = "embeddings",
    predictor_name: str = None,
    include_package: List[str] = None,
    prototyping_config: bool = False,
    verbose: bool = False,
) -> None:
    """Evaluates a trained AllenNLP model against the SentEval benchmark."""

    # from allennlp.models.archival import load_archive
    from archival.archival import load_archive
    from predictor.predictor import Predictor

    # from allennlp.predictors import Predictor

    # SentEval prepare and batcher
    def prepare(params, samples):
        return

    @torch.no_grad()
    def batcher(params, batch):
        batch = _cleanup_batch(batch)
        # Re-tokenize the input text using the tokenizer of the dataset reader
        inputs = [{"text": " ".join(tokens)} for tokens in batch]
        outputs = params.predictor.predict_batch_json(inputs)
        # AllenNLP models return a dictionary, so access the embeddings with the given key.
        embeddings = [output[output_dict_field] for output in outputs]

        embeddings = np.vstack(embeddings)
        return embeddings

    # Allows us to import custom dataset readers and models that may exist in the AllenNLP archive.
    # See: https://tinyurl.com/whkmoqh
    include_package = include_package or []
    print("include package ", include_package)
    for package_name in include_package:
        print("package name is ", package_name)
        common_util.import_module_and_submodules(package_name)

    # Load the archived Model
    archive = load_archive(
        path_to_allennlp_archive,
        cuda_device=cuda_device,
        weights_file=weights_file,
        overrides="{'trainer.use_amp': true}",
    )
    predictor = Predictor.from_archive(archive, predictor_name)
    typer.secho(
        f'{SUCCESS} Model from AllenNLP archive "{path_to_allennlp_archive}" loaded successfully.',
        fg=typer.colors.GREEN,
        bold=True,
    )

    # Performs a few setup steps and returns the SentEval params
    params_senteval = _setup_senteval(path_to_senteval, prototyping_config,
                                      verbose)
    params_senteval["predictor"] = predictor
    print("params_senteval", params_senteval)
    _run_senteval(params_senteval, path_to_senteval, batcher, prepare,
                  output_filepath)

    return
コード例 #9
0
ファイル: main.py プロジェクト: bbiyongel/kospi_predictor
from collector.kospi_db_manager import KospiDBManager
from collector.collector import DailyCollector
# from collector.collector import HourlyCollector
from collector.timeutill_helper import TimeUtillHelper
from predictor.predictor import Predictor

start_time = TimeUtillHelper(2009, 5, 1)
end_time = TimeUtillHelper(2019, 6, 20)
daily_collector = DailyCollector("035420", start_time, end_time)
daily_collector.read_stock_data()
daily_collector.update_stock_database()
daily_collector.update_labelled_database()

# start_time = TimeUtillHelper(2019, 7, 29, 9, 10, 00)
# end_time = TimeUtillHelper(2019, 8, 2, 15, 30, 00)
# hourly_collector = HourlyCollector("035420", start_time, end_time)
# hourly_collector.read_stock_data()
# hourly_collector.update_stock_database()
# hourly_collector.update_labelled_database()

predictor = Predictor()
predictor.check_predictor()