Exemple #1
0
    def update_report(self, index, report_json):
        try:
            self.get_from_file()
            self.analysis_data[index]["original_hash"] = index

            self.analysis_data[index]["file_type"] = report_json[
                "gw:GWallInfo"]["gw:DocumentStatistics"]["gw:DocumentSummary"][
                    "gw:FileType"]
            self.analysis_data[index]["file_size"] = report_json[
                "gw:GWallInfo"]["gw:DocumentStatistics"]["gw:DocumentSummary"][
                    "gw:TotalSizeInBytes"]

            self.analysis_data[index]["remediated_item_count"], \
            self.analysis_data[index]["remediate_items_list"]       = self.get_remediated_item_details(report_json)

            self.analysis_data[index]["sanitised_item_count"], \
            self.analysis_data[index]["sanitised_items_list"]       = self.get_sanitisation_item_details(report_json)

            self.analysis_data[index]["issue_item_count"],\
            self.analysis_data[index]["issue_item_list"]            = self.get_issue_item_details(report_json)

            self.write_to_file()
        except Exception as error:
            log_error(
                message=
                f"Error in update_report from json data {index} : {error}")
Exemple #2
0
    def get_xmlreport(self, endpoint, fileId, dir):
        log_info(message=f"getting XML Report for {fileId} at {endpoint}")

        xmlreport = self.xmlreport_request(endpoint, fileId)
        if not xmlreport:
            raise ValueError('Failed to obtain the XML report')

        try:
            json_obj = xmltodict.parse(xmlreport)

            file_extension = json_obj["gw:GWallInfo"]["gw:DocumentStatistics"][
                "gw:DocumentSummary"]["gw:FileType"]
            self.meta_service.set_rebuild_file_extension(dir, file_extension)
            json_obj['original_hash'] = os.path.basename(dir)
            json_save_file_pretty(json_obj, os.path.join(dir, "report.json"))

            #self.report_elastic.add_report(json_obj)

            analysis_obj = self.analysis_json.get_file_analysis(
                os.path.basename(dir), json_obj)
            json_save_file_pretty(analysis_obj,
                                  os.path.join(dir, "analysis.json"))

            self.analysis_elastic.add_analysis(analysis_obj)

            return True
        except Exception as error:
            log_error(
                message=f"Error in parsing xmlreport for {fileId} : {error}")
            return False
Exemple #3
0
    def get_valid_endpoints(self, endpoint_string):
        self.reset_last_error()
        try:
            valid_endpoints = {'Endpoints': []}
            endpoint_json = json.loads(endpoint_string)
            endpoint_count = len(endpoint_json['Endpoints'])
            for idx in range(endpoint_count):

                server_url = "http://" + endpoint_json['Endpoints'][idx]['IP'] + ":" + \
                              endpoint_json['Endpoints'][idx]['Port']

                response = self.gw_sdk_healthcheck(server_url)
                if response:
                    if response.status_code == 200:
                        valid_endpoints['Endpoints'].append(
                            endpoint_json['Endpoints'][idx])

            valid_endpoints_count = len(valid_endpoints['Endpoints'])

            if valid_endpoints_count == 0:
                return None

            return json.dumps(valid_endpoints)

        except Exception as e:
            self.last_error_message = f'Configure_Env : get_valid_endpoints : {e}'
            log_error(f'Configure_Env : get_valid_endpoints : {e}')
            raise ValueError(str(e))
 def test__start_logging(self):
     # todo: understand better why this test takes about 1.1 secs to execute (some of it is caused by the processing process starting, and elastic being setup)
     log_worker = start_logging()                                        # trigger logging process
     log_info()                                                      # send 4 log messages
     log_warning()
     log_info(message=random_text(), data={'a': 42})
     log_error(message='an error')
Exemple #5
0
    def ProcessDirectoryWithEndpoint(self, itempath, file_hash,
                                     endpoint_index):

        if not os.path.isdir(itempath):
            return False

        log_info(
            message=
            f"Starting ProcessDirectoryWithEndpoint on endpoint # {endpoint_index} for file {file_hash}"
        )
        meta_service = Metadata_Service()
        original_file_path = meta_service.get_original_file_paths(itempath)
        events = Events_Log(itempath)

        endpoint = "http://" + self.config.endpoints['Endpoints'][
            endpoint_index]['IP'] + ":" + self.config.endpoints['Endpoints'][
                endpoint_index]['Port']
        events.add_log("Processing with: " + endpoint)

        meta_service.set_f2f_plugin_version(itempath, API_VERSION)
        meta_service.set_f2f_plugin_git_commit(itempath, self.git_commit())

        try:
            file_processing = File_Processing(events, self.events_elastic,
                                              self.report_elastic,
                                              self.analysis_elastic,
                                              meta_service)
            if not file_processing.processDirectory(endpoint, itempath):
                events.add_log("CANNOT be processed")
                return False

            log_data = {
                'file': original_file_path,
                'status': FileStatus.COMPLETED,
                'error': 'none',
                'timestamp': datetime.now(),
            }
            log_info('ProcessDirectoryWithEndpoint', data=log_data)
            meta_service.set_error(itempath, "none")
            meta_service.set_status(itempath, FileStatus.COMPLETED)
            self.hash_json.update_status(file_hash, FileStatus.COMPLETED)
            events.add_log("Has been processed")
            return True
        except Exception as error:
            log_data = {
                'file': original_file_path,
                'status': FileStatus.FAILED,
                'error': str(error),
            }
            log_error(message='error in ProcessDirectoryWithEndpoint',
                      data=log_data)
            meta_service.set_error(itempath, str(error))
            meta_service.set_status(itempath, FileStatus.FAILED)
            self.hash_json.update_status(file_hash, FileStatus.FAILED)
            events.add_log("ERROR:" + str(error))
            return False
Exemple #6
0
    def ProcessSingleFile(self):
        if self.IsProcessing():
            log_error(
                "ERROR: Attempt to start processing while processing is in progress"
            )
            return False

        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        loop.run_until_complete(self.LoopHashDirectoriesAsync(1, True))
        return True
Exemple #7
0
    def LoopHashDirectoriesSequential(self):
        #Allow only a single loop to be run at a time
        if self.IsProcessing():
            log_error(
                "ERROR: Attempt to start processing while processing is in progress"
            )
            return False

        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        loop.run_until_complete(self.LoopHashDirectoriesAsync(1))
        return True
Exemple #8
0
 def env_details(self):
     self.reset_last_error()
     try:
         return {
             "hd1_path": environ.get('HD1_LOCATION'),
             "hd2_path": environ.get('HD2_LOCATION'),
             "hd3_path": environ.get('HD3_LOCATION')
         }
     except Exception as e:
         self.last_error_message = f'Configure_Env : env_details : {e}'
         log_error(f'Configure_Env : env_details : {e}')
         raise ValueError(str(e))
Exemple #9
0
    def gw_sdk_healthcheck(self, server_url):
        self.reset_last_error()
        try:
            api_route = "api/health/"
            url = urljoin(server_url, api_route)

            response = requests.request("GET", url, verify=False, timeout=10)
            return response

        except Exception as e:
            self.last_error_message = f'Configure_Env : gw_sdk_healthcheck : {e}'
            log_error(f'Configure_Env : gw_sdk_healthcheck : {e}')
            return None
Exemple #10
0
    def add_file(self, file_hash, file_name):
        if self.is_hash(file_hash) and file_name:
            json_value = {"file_name": file_name}

            json_data = {file_hash: json_value}

            self.analysis_data.update(json_data)
            self.write_to_file()
            return True
        log_error(message='in Analysis_Json.add_file bad data provided',
                  data={
                      'file_hash': file_hash,
                      'file_name': file_name
                  })
        return False
Exemple #11
0
    def base64request(self, endpoint, api_route, base64enc_file):
        try:
            url = endpoint + "/" + api_route

            payload = json.dumps({"Base64": base64enc_file})

            headers = {'Content-Type': 'application/json'}

            return requests.request("POST",
                                    url,
                                    headers=headers,
                                    data=payload,
                                    timeout=int(self.config.request_timeout))

        except Exception as e:
            log_error(str(e))
            raise ValueError(str(e))
Exemple #12
0
    def LoopHashDirectories(self, thread_count=None):
        #Allow only a single loop to be run at a time
        if self.IsProcessing():
            log_error(
                message=
                "ERROR: Attempt to start processing while processing is in progress"
            )
            return False

        self.status.StartStatusThread()
        thread_count = thread_count or self.config.thread_count
        log_info(message="in LoopHashDirectories, about to start main loop")
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        loop.run_until_complete(self.LoopHashDirectoriesAsync(thread_count))
        log_info(message="in LoopHashDirectories, Loop completed")
        self.status.StopStatusThread()
        return True
Exemple #13
0
    def get_file_analysis(self, index, report_json):
        try:
            meta_service = Metadata_Service()

            metadata = meta_service.get_from_file(index)

            self.file_analysis_data = {}
            self.file_analysis_data["file_name"] = metadata.data.get(
                'file_name')
            self.file_analysis_data[
                "rebuild_file_extension"] = metadata.data.get(
                    'rebuild_file_extension')
            self.file_analysis_data["rebuild_file_size"] = metadata.data.get(
                'rebuild_file_size')

            self.file_analysis_data["original_hash"] = index
            self.file_analysis_data["rebuild_hash"] = metadata.data.get(
                'rebuild_hash')

            self.file_analysis_data["file_type"] = report_json["gw:GWallInfo"][
                "gw:DocumentStatistics"]["gw:DocumentSummary"]["gw:FileType"]
            self.file_analysis_data["file_size"] = int(
                report_json["gw:GWallInfo"]["gw:DocumentStatistics"]
                ["gw:DocumentSummary"]["gw:TotalSizeInBytes"])

            self.file_analysis_data["remediated_item_count"], \
            self.file_analysis_data["remediate_items_list"]       = self.get_remediated_item_details(report_json)

            self.file_analysis_data["sanitised_item_count"], \
            self.file_analysis_data["sanitised_items_list"]       = self.get_sanitisation_item_details(report_json)

            self.file_analysis_data["issue_item_count"],\
            self.file_analysis_data["issue_item_list"]            = self.get_issue_item_details(report_json)

            self.file_analysis_data[
                "threat_analysis"] = self.get_threat_analysis(
                    self.file_analysis_data["sanitised_items_list"])

            return self.file_analysis_data

        except Exception as error:
            log_error(
                message=
                f"Error in get_file_analysis from json data {dir} : {error}")
Exemple #14
0
    def configure(self, hd1_path=None, hd2_path=None, hd3_path=None):
        self.reset_last_error()
        try:
            dotenv_file = dotenv.find_dotenv()
            if hd1_path:
                if path.exists(hd1_path):
                    environ['HD1_LOCATION'] = hd1_path
                    dotenv.set_key(dotenv_file, "HD1_LOCATION",
                                   environ["HD1_LOCATION"])
                else:
                    self.last_error_message = f"hd1_path did not exist: {hd1_path}"
                    log_error(message=f"hd1_path did not exist",
                              data={"path": hd1_path})
                    return -1

            if hd2_path:
                if not path.exists(hd2_path):
                    folder_create(hd2_path)
                    folder_create(path_combine(hd2_path,
                                               DEFAULT_HD2_DATA_NAME))
                    folder_create(
                        path_combine(hd2_path, DEFAULT_HD2_STATUS_NAME))

                environ['HD2_LOCATION'] = hd2_path
                dotenv.set_key(dotenv_file, "HD2_LOCATION",
                               environ["HD2_LOCATION"])

            if hd3_path:
                if not path.exists(hd3_path):
                    folder_create(hd3_path)

                environ['HD3_LOCATION'] = hd3_path
                dotenv.set_key(dotenv_file, "HD3_LOCATION",
                               environ["HD3_LOCATION"])

            self.config.load_values()
            return self.env_details()

        except Exception as e:
            self.last_error_message = f'Configure_Env : configure : {e}'
            log_error(f'Configure_Env : configure : {e}')
            raise ValueError(str(e))
Exemple #15
0
    def configure_endpoints(self, endpoint_string):
        self.reset_last_error()
        try:
            dotenv_file = dotenv.find_dotenv()
            valid_endpoint_string = self.get_valid_endpoints(endpoint_string)

            if valid_endpoint_string:
                environ['ENDPOINTS'] = valid_endpoint_string
                logger.info(f"ENDPOINTS : {environ['ENDPOINTS']}")
                dotenv.set_key(dotenv_file, "ENDPOINTS", environ["ENDPOINTS"])
                self.config.load_values()
                return json.loads(environ['ENDPOINTS'])

            else:
                self.last_error_message = f"No valid endpoint found in: {endpoint_string}"
                log_error(f"No valid endpoint found in",
                          data={"enpoints": endpoint_string})
                return -1

        except Exception as e:
            self.last_error_message = f'Configure_Env : configure_endpoints : {e}'
            log_error(f'Configure_Env : configure_endpoints : {e}')
            raise ValueError(str(e))
Exemple #16
0
    def add_file(self, file_hash, file_name):
        if self.is_hash(file_hash) and file_name:
            Hash_Json.lock.acquire()
            try:
                json_value = {
                    "file_name": file_name,
                    "file_status": FileStatus.INITIAL
                }

                json_data = {file_hash: json_value}

                self.data().update(json_data)
            finally:
                Hash_Json.lock.release()

            return True

        log_error(message='in Hash_Json.add_file bad data provided',
                  data={
                      'file_hash': file_hash,
                      'file_name': file_name
                  })
        return False
Exemple #17
0
    def do_rebuild(self, endpoint, hash, source_path, dir):
        log_info(
            message=f"Starting rebuild for file {hash} on endpoint {endpoint}")
        with Duration() as duration:
            event_data = {
                "endpoint": endpoint,
                "hash": hash,
                "source_path": source_path,
                "dir": dir
            }  # todo: see if we can use a variable that holds the params data
            self.add_event_log('Starting File rebuild', event_data)

            self.meta_service.set_rebuild_server(dir, endpoint)

            encodedFile = FileService.base64encode(source_path)
            if not encodedFile:
                message = f"Failed to encode the file: {hash}"
                log_error(message=message)
                self.add_event_log(message)
                self.meta_service.set_error(dir, message)
                return False

            response = self.rebuild(endpoint, encodedFile)
            result = response.text
            if not result:
                message = f"Failed to rebuild the file : {hash}"
                log_error(message=message)
                self.add_event_log(message)
                self.meta_service.set_error(dir, message)
                return False

            try:
                for path in self.meta_service.get_original_file_paths(dir):
                    #rebuild_file_path = path
                    if path.startswith(self.config.hd1_location):
                        rebuild_file_path = path.replace(
                            self.config.hd1_location, self.config.hd3_location)
                    else:
                        rebuild_file_path = os.path.join(
                            self.config.hd3_location, path)

                    folder_create(parent_folder(
                        rebuild_file_path))  # make sure parent folder exists

                    final_rebuild_file_path = self.save_file(
                        result, rebuild_file_path
                    )  # returns actual file saved (which could be .html)

                    # todo: improve the performance of these update since each will trigger a save
                    file_size = os.path.getsize(
                        final_rebuild_file_path)  # calculate rebuilt file fize
                    rebuild_hash = self.meta_service.file_hash(
                        final_rebuild_file_path
                    )  # calculate hash of final_rebuild_file_path

                    self.meta_service.set_rebuild_file_size(dir, file_size)
                    self.meta_service.set_rebuild_file_path(
                        dir, final_rebuild_file_path
                    )  # capture final_rebuild_file_path
                    self.meta_service.set_rebuild_hash(
                        dir, rebuild_hash)  # capture it
                if not FileService.base64decode(result):
                    message = f"Engine response could not be decoded"
                    log_error(message=message, data=f"{result}")
                    self.meta_service.set_error(dir, message)
                    return False
            except Exception as error:
                message = f"Error Saving file for {hash} : {error}"
                log_error(message=message)
                self.meta_service.set_xml_report_status(dir, "No Report")
                self.meta_service.set_error(dir, message)
                return False

            headers = response.headers
            fileIdKey = "X-Adaptation-File-Id"

            # get XML report
            if fileIdKey in headers:
                if self.get_xmlreport(endpoint, headers[fileIdKey], dir):
                    self.add_event_log('The XML report has been saved')
                    self.meta_service.set_xml_report_status(dir, "Obtained")
                else:
                    self.meta_service.set_xml_report_status(
                        dir, "No XML Report")
            else:
                self.meta_service.set_xml_report_status(
                    dir, "Failed to obtain")
                message = f'No X-Adaptation-File-Id header found in the response for {hash}'
                log_error(message)
                self.add_event_log(message)
                self.meta_service.set_error(dir, message)
                return False
                #raise ValueError("No X-Adaptation-File-Id header found in the response")

            # todo: add when server side supports this
            # SDKEngineVersionKey = "X-SDK-Engine-Version"
            # SDKAPIVersionKey = "X-SDK-Api-Version"
            #
            # if SDKEngineVersionKey in headers:
            #     self.sdk_engine_version = headers[SDKEngineVersionKey]
            # if SDKAPIVersionKey in headers:
            #     self.sdk_api_version = headers[SDKAPIVersionKey]
            #
            # self.meta_service.set_server_version(dir, "Engine:" + self.sdk_engine_version + " API:" + self.sdk_api_version )
        log_info(
            message=
            f"rebuild ok for file {hash} on endpoint {endpoint} took {duration.seconds()} seconds"
        )
        return True
Exemple #18
0
    def LoopHashDirectoriesInternal(self, thread_count, do_single):

        if folder_exists(self.storage.hd2_data()) is False:
            log_message = "ERROR: rootdir does not exist: " + self.storage.hd2_data(
            )
            log_error(log_message)
            return False

        if not isinstance(thread_count, int):
            raise TypeError("thread_count must be a integer")

        if not isinstance(do_single, bool):
            raise TypeError("thread_count must be a integer")

        log_message = f"LoopHashDirectoriesInternal started with {thread_count} threads"
        self.events.add_log(log_message)
        log_info(log_message)

        json_list = self.updateHashJson()

        log_message = f"LoopHashDirectoriesInternal started with {thread_count} threads"
        self.events.add_log(log_message)
        log_info(log_message)

        threads = list()

        process_index = 0

        log_info(
            message=f'before Mapping thread_data for {len(json_list)} files')
        thread_data = []
        for key in json_list:
            file_hash = key

            itempath = self.storage.hd2_data(key)
            if (FileStatus.COMPLETED == json_list[key]["file_status"]):
                self.events.add_log(
                    f"The file processing has been already completed")
                continue

            if not os.path.exists(itempath):
                self.events.add_log(
                    f"ERROR: Path \"{itempath}\" does not exist")
                json_list[key]["file_status"] = FileStatus.FAILED
                continue

            process_index += 1
            thread_data.append((
                itempath,
                file_hash,
                process_index,
            ))
            # # limit the number of parallel threads
            #
            # if process_index % int(thread_count) == 0:                      # todo: refactor this workflow to use multiprocess and queues
            #     # Clean up the threads
            #     for index, thread in enumerate(threads):                    # todo: since at the moment this will block allocating new threads until
            #         thread.join()                                           #       all have finishing execution
            #
            # process_index += 1
            # log_info(message=f"in LoopHashDirectoriesInternal process_index={process_index} , thread #{process_index % int(thread_count) }")
            # x = threading.Thread(target=self.ProcessDirectory, args=(itempath, file_hash, process_index,))
            # threads.append(x)
            # x.start()
            #
            # if do_single:
            #     break
            #
            # if not Loops.continue_processing:
            #     break

        # for index, thread in enumerate(threads):
        #     thread.join()

        log_info(
            message=
            f'after mapped thread_data, there are {len(thread_data)} mapped items'
        )
        #thread_data = thread_data[:500]
        #log_info(message=f'to start with only processing {len(thread_data)} thread_data items')
        pool = ThreadPool(thread_count)
        results = pool.map(self.ProcessDirectory, thread_data)
        pool.close()
        pool.join()

        self.moveProcessedFiles()

        self.events.add_log("LoopHashDirectoriesInternal finished")
        return True