def test__start_logging(self):
     # todo: understand better why this test takes about 1.1 secs to execute (some of it is caused by the processing process starting, and elastic being setup)
     log_worker = start_logging()                                        # trigger logging process
     log_info()                                                      # send 4 log messages
     log_warning()
     log_info(message=random_text(), data={'a': 42})
     log_error(message='an error')
Example #2
0
    def get_xmlreport(self, endpoint, fileId, dir):
        log_info(message=f"getting XML Report for {fileId} at {endpoint}")

        xmlreport = self.xmlreport_request(endpoint, fileId)
        if not xmlreport:
            raise ValueError('Failed to obtain the XML report')

        try:
            json_obj = xmltodict.parse(xmlreport)

            file_extension = json_obj["gw:GWallInfo"]["gw:DocumentStatistics"][
                "gw:DocumentSummary"]["gw:FileType"]
            self.meta_service.set_rebuild_file_extension(dir, file_extension)
            json_obj['original_hash'] = os.path.basename(dir)
            json_save_file_pretty(json_obj, os.path.join(dir, "report.json"))

            #self.report_elastic.add_report(json_obj)

            analysis_obj = self.analysis_json.get_file_analysis(
                os.path.basename(dir), json_obj)
            json_save_file_pretty(analysis_obj,
                                  os.path.join(dir, "analysis.json"))

            self.analysis_elastic.add_analysis(analysis_obj)

            return True
        except Exception as error:
            log_error(
                message=f"Error in parsing xmlreport for {fileId} : {error}")
            return False
 def setUpClass(cls) -> None:
     cls.setup_testing = Setup_Testing()
     cls.log_worker = start_logging()
     cls.storage = Storage()
     cls.config = cls.storage.config
     from cdr_plugin_folder_to_folder.utils.Logging import log_info
     log_info(message='in Temp_Config')
     cls.setup_testing.set_config_to_temp_folder()
Example #4
0
    def ProcessDirectoryWithEndpoint(self, itempath, file_hash,
                                     endpoint_index):

        if not os.path.isdir(itempath):
            return False

        log_info(
            message=
            f"Starting ProcessDirectoryWithEndpoint on endpoint # {endpoint_index} for file {file_hash}"
        )
        meta_service = Metadata_Service()
        original_file_path = meta_service.get_original_file_paths(itempath)
        events = Events_Log(itempath)

        endpoint = "http://" + self.config.endpoints['Endpoints'][
            endpoint_index]['IP'] + ":" + self.config.endpoints['Endpoints'][
                endpoint_index]['Port']
        events.add_log("Processing with: " + endpoint)

        meta_service.set_f2f_plugin_version(itempath, API_VERSION)
        meta_service.set_f2f_plugin_git_commit(itempath, self.git_commit())

        try:
            file_processing = File_Processing(events, self.events_elastic,
                                              self.report_elastic,
                                              self.analysis_elastic,
                                              meta_service)
            if not file_processing.processDirectory(endpoint, itempath):
                events.add_log("CANNOT be processed")
                return False

            log_data = {
                'file': original_file_path,
                'status': FileStatus.COMPLETED,
                'error': 'none',
                'timestamp': datetime.now(),
            }
            log_info('ProcessDirectoryWithEndpoint', data=log_data)
            meta_service.set_error(itempath, "none")
            meta_service.set_status(itempath, FileStatus.COMPLETED)
            self.hash_json.update_status(file_hash, FileStatus.COMPLETED)
            events.add_log("Has been processed")
            return True
        except Exception as error:
            log_data = {
                'file': original_file_path,
                'status': FileStatus.FAILED,
                'error': str(error),
            }
            log_error(message='error in ProcessDirectoryWithEndpoint',
                      data=log_data)
            meta_service.set_error(itempath, str(error))
            meta_service.set_status(itempath, FileStatus.FAILED)
            self.hash_json.update_status(file_hash, FileStatus.FAILED)
            events.add_log("ERROR:" + str(error))
            return False
    def create_metadata(self, file_path):
        self.metadata = Metadata()
        self.metadata.add_file(file_path)

        self.metadata_elastic.add_metadata(
            self.metadata.data)  # save metadata to elastic
        log_info(
            message=f"created metadata for: {self.metadata.get_file_name()}",
            data={
                "file_path": file_path,
                "metadata_file_path": self.metadata.metadata_file_path()
            })
        return self.metadata
def start_logging():
    global logging_worker
    queue = get_logging_queue()
    enabled = get_logging_enabled()

    if logging_worker is None and enabled.value == 0:  # if it already enabled don't start a new process
        worker = multiprocessing.Process(target=start_logging_process,
                                         args=(queue, enabled),
                                         daemon=True)
        worker.start()
        enabled.value = 1  # set enabled value
        logging_worker = worker
        log_info(message="Logging Process started")
    return logging_worker
    def add_log(self, message, data=None):
        log_info(message=message, data=data)
        if data is str:
            data = {"str": data}
        self.get_from_file()

        json_data = {
            "timestamp": str(datetime.now()),
            "message": message,
            "data": data or {},
            "uuid": self.unique_id
        }

        self.data["events"].append(json_data)
        self.write_to_file()
        return json_data
Example #8
0
    def LoopHashDirectories(self, thread_count=None):
        #Allow only a single loop to be run at a time
        if self.IsProcessing():
            log_error(
                message=
                "ERROR: Attempt to start processing while processing is in progress"
            )
            return False

        self.status.StartStatusThread()
        thread_count = thread_count or self.config.thread_count
        log_info(message="in LoopHashDirectories, about to start main loop")
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        loop.run_until_complete(self.LoopHashDirectoriesAsync(thread_count))
        log_info(message="in LoopHashDirectories, Loop completed")
        self.status.StopStatusThread()
        return True
Example #9
0
    def LoopHashDirectoriesInternal(self, thread_count, do_single):

        if folder_exists(self.storage.hd2_data()) is False:
            log_message = "ERROR: rootdir does not exist: " + self.storage.hd2_data(
            )
            log_error(log_message)
            return False

        if not isinstance(thread_count, int):
            raise TypeError("thread_count must be a integer")

        if not isinstance(do_single, bool):
            raise TypeError("thread_count must be a integer")

        log_message = f"LoopHashDirectoriesInternal started with {thread_count} threads"
        self.events.add_log(log_message)
        log_info(log_message)

        json_list = self.updateHashJson()

        log_message = f"LoopHashDirectoriesInternal started with {thread_count} threads"
        self.events.add_log(log_message)
        log_info(log_message)

        threads = list()

        process_index = 0

        log_info(
            message=f'before Mapping thread_data for {len(json_list)} files')
        thread_data = []
        for key in json_list:
            file_hash = key

            itempath = self.storage.hd2_data(key)
            if (FileStatus.COMPLETED == json_list[key]["file_status"]):
                self.events.add_log(
                    f"The file processing has been already completed")
                continue

            if not os.path.exists(itempath):
                self.events.add_log(
                    f"ERROR: Path \"{itempath}\" does not exist")
                json_list[key]["file_status"] = FileStatus.FAILED
                continue

            process_index += 1
            thread_data.append((
                itempath,
                file_hash,
                process_index,
            ))
            # # limit the number of parallel threads
            #
            # if process_index % int(thread_count) == 0:                      # todo: refactor this workflow to use multiprocess and queues
            #     # Clean up the threads
            #     for index, thread in enumerate(threads):                    # todo: since at the moment this will block allocating new threads until
            #         thread.join()                                           #       all have finishing execution
            #
            # process_index += 1
            # log_info(message=f"in LoopHashDirectoriesInternal process_index={process_index} , thread #{process_index % int(thread_count) }")
            # x = threading.Thread(target=self.ProcessDirectory, args=(itempath, file_hash, process_index,))
            # threads.append(x)
            # x.start()
            #
            # if do_single:
            #     break
            #
            # if not Loops.continue_processing:
            #     break

        # for index, thread in enumerate(threads):
        #     thread.join()

        log_info(
            message=
            f'after mapped thread_data, there are {len(thread_data)} mapped items'
        )
        #thread_data = thread_data[:500]
        #log_info(message=f'to start with only processing {len(thread_data)} thread_data items')
        pool = ThreadPool(thread_count)
        results = pool.map(self.ProcessDirectory, thread_data)
        pool.close()
        pool.join()

        self.moveProcessedFiles()

        self.events.add_log("LoopHashDirectoriesInternal finished")
        return True
Example #10
0
    def do_rebuild(self, endpoint, hash, source_path, dir):
        log_info(
            message=f"Starting rebuild for file {hash} on endpoint {endpoint}")
        with Duration() as duration:
            event_data = {
                "endpoint": endpoint,
                "hash": hash,
                "source_path": source_path,
                "dir": dir
            }  # todo: see if we can use a variable that holds the params data
            self.add_event_log('Starting File rebuild', event_data)

            self.meta_service.set_rebuild_server(dir, endpoint)

            encodedFile = FileService.base64encode(source_path)
            if not encodedFile:
                message = f"Failed to encode the file: {hash}"
                log_error(message=message)
                self.add_event_log(message)
                self.meta_service.set_error(dir, message)
                return False

            response = self.rebuild(endpoint, encodedFile)
            result = response.text
            if not result:
                message = f"Failed to rebuild the file : {hash}"
                log_error(message=message)
                self.add_event_log(message)
                self.meta_service.set_error(dir, message)
                return False

            try:
                for path in self.meta_service.get_original_file_paths(dir):
                    #rebuild_file_path = path
                    if path.startswith(self.config.hd1_location):
                        rebuild_file_path = path.replace(
                            self.config.hd1_location, self.config.hd3_location)
                    else:
                        rebuild_file_path = os.path.join(
                            self.config.hd3_location, path)

                    folder_create(parent_folder(
                        rebuild_file_path))  # make sure parent folder exists

                    final_rebuild_file_path = self.save_file(
                        result, rebuild_file_path
                    )  # returns actual file saved (which could be .html)

                    # todo: improve the performance of these update since each will trigger a save
                    file_size = os.path.getsize(
                        final_rebuild_file_path)  # calculate rebuilt file fize
                    rebuild_hash = self.meta_service.file_hash(
                        final_rebuild_file_path
                    )  # calculate hash of final_rebuild_file_path

                    self.meta_service.set_rebuild_file_size(dir, file_size)
                    self.meta_service.set_rebuild_file_path(
                        dir, final_rebuild_file_path
                    )  # capture final_rebuild_file_path
                    self.meta_service.set_rebuild_hash(
                        dir, rebuild_hash)  # capture it
                if not FileService.base64decode(result):
                    message = f"Engine response could not be decoded"
                    log_error(message=message, data=f"{result}")
                    self.meta_service.set_error(dir, message)
                    return False
            except Exception as error:
                message = f"Error Saving file for {hash} : {error}"
                log_error(message=message)
                self.meta_service.set_xml_report_status(dir, "No Report")
                self.meta_service.set_error(dir, message)
                return False

            headers = response.headers
            fileIdKey = "X-Adaptation-File-Id"

            # get XML report
            if fileIdKey in headers:
                if self.get_xmlreport(endpoint, headers[fileIdKey], dir):
                    self.add_event_log('The XML report has been saved')
                    self.meta_service.set_xml_report_status(dir, "Obtained")
                else:
                    self.meta_service.set_xml_report_status(
                        dir, "No XML Report")
            else:
                self.meta_service.set_xml_report_status(
                    dir, "Failed to obtain")
                message = f'No X-Adaptation-File-Id header found in the response for {hash}'
                log_error(message)
                self.add_event_log(message)
                self.meta_service.set_error(dir, message)
                return False
                #raise ValueError("No X-Adaptation-File-Id header found in the response")

            # todo: add when server side supports this
            # SDKEngineVersionKey = "X-SDK-Engine-Version"
            # SDKAPIVersionKey = "X-SDK-Api-Version"
            #
            # if SDKEngineVersionKey in headers:
            #     self.sdk_engine_version = headers[SDKEngineVersionKey]
            # if SDKAPIVersionKey in headers:
            #     self.sdk_api_version = headers[SDKAPIVersionKey]
            #
            # self.meta_service.set_server_version(dir, "Engine:" + self.sdk_engine_version + " API:" + self.sdk_api_version )
        log_info(
            message=
            f"rebuild ok for file {hash} on endpoint {endpoint} took {duration.seconds()} seconds"
        )
        return True