def compile_archive(self): """ Once the file is renamed and the difmet instruction file created, they are both packaged and sent to cache/C_tosend repertory """ # create the difmet instruction file instr_file_path = self._create_diffmet_instr() basename = os.path.basename(instr_file_path) basename = basename.replace(".diffusions.xml", ".tar.gz") archive_path = os.path.join(self.dir_c, basename + ".tmp") with tarfile.open(archive_path, "w:gz") as tar: tar.add(instr_file_path, arcname=os.path.basename(instr_file_path)) LOGGER.info("Compressed diffmet instruction file %s in %s.", instr_file_path, archive_path) tar.add(self.new_file_path, arcname=os.path.basename(self.new_file_path)) LOGGER.info("Compressed dissemination file %s in %s.", self.new_file_path, archive_path) shutil.move(archive_path, archive_path[:-4]) Tools.remove_file(instr_file_path, "processed instruction", LOGGER) Tools.remove_file(self.new_file_path, "processed data", LOGGER) Database.update_field_by_query("rxnotif", False, **dict(final_file=self.new_filename)) Database.update_field_by_query("message", "File packaged in tar.gz format", **dict(final_file=self.new_filename))
def clear_orphan_files(dir_): # cleaning other files for file_ in os.listdir(dir_): file_path = os.path.join(dir_, file_) try: Tools.remove_file(file_path, "orphan", LOGGER) except FileNotFoundError: pass
def clear_instruction_files(instruction_files): # cleaning instruction files for file_ in instruction_files: try: Tools.remove_file(file_, "instruction", LOGGER) # file either was moved back to repertory A or deleted except FileNotFoundError: pass
def process_instruction_file(cls, file_to_process): processed = False files_fetched = [] with open(file_to_process, "r") as file_: info_file = json.load(file_) # get full_id req_id = info_file.get("req_id") hostname = info_file.get("hostname") full_id = req_id + hostname file_expired = cls.check_file_age(file_to_process) if file_expired: msg = ("%s instruction file discarded " "because it is over expiration date " "according to keepfiletime settings " "parameter" % file_to_process) LOGGER.warning(msg) Database.update_field_by_query("requestStatus", REQ_STATUS.failed, **dict(fullrequestId=full_id)) Database.update_field_by_query("message", msg, **dict(fullrequestId=full_id)) else: # get URI uri = info_file.get("uri") # fetch files on staging post processed, files_fetched = cls.fetch_files(req_id, hostname, uri) # if a file couldn't be gathered, dissemination is failed and # instruction file deleted if not processed: LOGGER.error("Couldn't fetch files from openwis staging post for" " instruction file %s." " Proceeding to next instruction file.", file_to_process) # check if database status is at failed. If yes, instruction file is deleted. # if not, it sent back to A repertory to be processed again if Database.get_request_status(full_id) == REQ_STATUS.failed: Tools.remove_file(file_to_process, "instruction", LOGGER) else: shutil.move(file_to_process, cls.dir_a) else: msg = "Instruction file %s processed" % file_to_process LOGGER.info(msg) Database.update_field_by_query("message", msg, **dict(fullrequestId=full_id)) return processed, info_file, files_fetched
def process(cls, max_loops=0): cls.nb_workers = SettingsManager.get("sendFTPlimitConn") # in debug mode, it is possible to set pool_method = cls.get_pool_method() cls.pool = pool_method(processes=cls.nb_workers) counter = 0 cls.setup_process() while cls._running: counter += 1 cls.signal_loop(counter) cls.load_settings() cls.update_workers() # idle time idle_time = SettingsManager.get("sendFTPIdle") sleep(idle_time) # get settings cls.dir_c = dir_c = HarnessTree.get("temp_dissRequest_C") cls.dir_d = dir_d = HarnessTree.get("temp_dissRequest_D") # move back any remaining file from D to C cls.move_back_files() # get files in C max_files = cls.nb_workers list_files_c = cls.get_file_list(dir_c, max_files) files_to_ftp = cls.move_files(list_files_c, dir_d) for file_ in files_to_ftp: file_expired = cls.check_file_age(file_) if file_expired: # TODO we need to find a way to update the info to the database # would require looking at the file compressed though Tools.remove_file(file_, "difmet archive", LOGGER) continue size = os.stat(file_).st_size timeout = cls.compute_timeout(size, file_) # start download # renaming file to prevent any operation on it. cls.lock_file(file_) res = cls.pool.apply_async( cls.abortable_ftp, (cls.upload_file, file_, dir_c, dir_d), dict(timeout=timeout)) # for testing and debugging purpose only cls.check_end_loop(counter, max_loops)
def commit_failure(self, database, diff_id): diffusion = Diffusion(diff_externalid=diff_id, fullrequestId=self.req_id, requestStatus=REQ_STATUS.failed, Date=self._to_datetime(self.date_reception), rxnotif=True) with Database.get_app().app_context(): database.session.add(diffusion) database.session.commit() LOGGER.info("Committed %s dissemination status into database.", REQ_STATUS.failed) if os.path.isfile(self.request_file): Tools.remove_file(self.request_file, "JSON request", LOGGER) return REQ_STATUS.failed
def package_data(all_files_fetched, diss_instructions): # process files fetched for file_path in all_files_fetched: filename = os.path.basename(file_path) request_id_list = Database.get_id_list_by_filename(filename) # no reference => file is an orphan if request_id_list == []: Tools.remove_file(file_path, "orphan file", LOGGER) continue # purge requestId_list or req_id that are not in # diss_instructions keys. That is to prevent trying to find # an instruction file related to a file that has been processed # by a previous request request_id_list = [item for item in request_id_list if item in diss_instructions.keys()] LOGGER.info("Processing downloaded file %s linked to " "requests %s", file_path, request_id_list) # instantiate a DiffMetManager object that connects the file # to its single or multiple requests and instructioons diff_manager = DiffMetManager(request_id_list, file_path, diss_instructions) # rename files according to regex renaming_ok = diff_manager.rename() # package the archive if renaming_ok: diff_manager.compile_archive() else: msg = ("Dissemination failed for requests %s because user settings " "regex resulted in incorrect filename for difmet" % request_id_list) LOGGER.error(msg) for req_id in request_id_list: Database.update_field_by_query("requestStatus", REQ_STATUS.failed, **dict(fullrequestId=req_id)) Database.update_field_by_query("message", msg, **dict(fullrequestId=req_id))
def abortable_ftp(cls, func, *args, **kwargs): try: timeout = kwargs.get('timeout', None) # size in Mbytes # get file name + ".lock" extension original_file, dir_c, dir_d = args file_ = original_file + ".lock" size = os.stat(file_).st_size / (1 << 20) connection_ok, ftp = cls.connect_ftp() if connection_ok: proc = ThreadPool(1) res = proc.apply_async(func, args=( original_file, ftp, )) try: # Wait timeout seconds for func to complete. upload_ok, duration = res.get(timeout) file_ = cls.unlock_file(file_) if not upload_ok: shutil.move(file_, dir_c) LOGGER.debug( "Moved file back from repertory %s to repertory %s", dir_d, dir_c) else: LOGGER.info( "File %s of size %f Mo sent to Diffmet in %f s", file_, size, duration) Tools.remove_file(file_, "difmet archive", LOGGER) ftp.quit() except multiprocessing.TimeoutError: ftp.close() proc.terminate() LOGGER.error( "Timeout of %f s exceeded for sending file %s" " on difmet. Checking upload.", timeout, original_file) _, ftp = cls.connect_ftp() upload_ok = cls.check_transfer(basename(original_file), ftp) if upload_ok: LOGGER.warning( "Process hit the timeout but " "file %s of size %f Mo was still sent to Diffmet", file_, size) Tools.remove_file(file_, "difmet archive", LOGGER) else: file_ = cls.unlock_file(file_) LOGGER.error("FTP upload of %s s failed.", file_) # move the file back from D to C shutil.move(file_, dir_c) LOGGER.debug( "Moved file back from repertory %s to repertory %s", dir_d, dir_c) except Exception as exc: file_ = cls.unlock_file(file_) trace = ''.join( traceback.format_exception(type(exc), exc, exc.__traceback__)) LOGGER.error( "Error when uploading file %s with " "trace :\n %s", file_, trace) ftp.quit() else: file_ = cls.unlock_file(file_) LOGGER.error("Couldn't connect to FTP for uploading file %s ", file_) # move the file back from D to C shutil.move(file_, dir_c) LOGGER.debug( "Moved file back from repertory %s to repertory %s", dir_d, dir_c) proc.terminate() except Exception as exc: trace = ''.join( traceback.format_exception(type(exc), exc, exc.__traceback__)) LOGGER.error("Error when uploading file %s with " "trace :\n %s", file_, trace)