Example #1
0
    def compile_archive(self):
        """
        Once the file is renamed and the difmet instruction file 
        created, they are both packaged and sent to cache/C_tosend 
        repertory 
        """

        # create the difmet instruction file
        instr_file_path = self._create_diffmet_instr()

        basename = os.path.basename(instr_file_path)
        basename = basename.replace(".diffusions.xml", ".tar.gz")
        archive_path = os.path.join(self.dir_c, basename + ".tmp")

        with tarfile.open(archive_path, "w:gz") as tar:
            tar.add(instr_file_path, arcname=os.path.basename(instr_file_path))
            LOGGER.info("Compressed diffmet instruction file %s in %s.",
                        instr_file_path, archive_path)
            tar.add(self.new_file_path, arcname=os.path.basename(self.new_file_path))
            LOGGER.info("Compressed dissemination file %s in %s.",
                        self.new_file_path, archive_path)

        shutil.move(archive_path, archive_path[:-4])

        Tools.remove_file(instr_file_path, "processed instruction", LOGGER)
        Tools.remove_file(self.new_file_path, "processed data", LOGGER)
        Database.update_field_by_query("rxnotif", False, **dict(final_file=self.new_filename))
        Database.update_field_by_query("message", "File packaged in tar.gz format",
                                       **dict(final_file=self.new_filename))
Example #2
0
 def clear_orphan_files(dir_):
     # cleaning other files
     for file_ in os.listdir(dir_):
         file_path = os.path.join(dir_, file_)
         try:
             Tools.remove_file(file_path, "orphan", LOGGER)
         except FileNotFoundError:
             pass
Example #3
0
 def clear_instruction_files(instruction_files):
     # cleaning instruction files
     for file_ in instruction_files:
         try:
             Tools.remove_file(file_, "instruction", LOGGER)
         # file either was moved back to repertory A or deleted
         except FileNotFoundError:
             pass
Example #4
0
    def process_instruction_file(cls, file_to_process):

        processed = False
        files_fetched = []

        with open(file_to_process, "r") as file_:
            info_file = json.load(file_)

        # get full_id
        req_id = info_file.get("req_id")
        hostname = info_file.get("hostname")
        full_id = req_id + hostname

        file_expired = cls.check_file_age(file_to_process)
        if file_expired:
            msg = ("%s instruction file discarded "
                   "because it is over expiration date "
                   "according to keepfiletime settings "
                   "parameter" % file_to_process)
            LOGGER.warning(msg)
            Database.update_field_by_query("requestStatus", REQ_STATUS.failed,
                                           **dict(fullrequestId=full_id))
            Database.update_field_by_query("message", msg,
                                           **dict(fullrequestId=full_id))
        else:
            # get URI
            uri = info_file.get("uri")
            # fetch files on staging post
            processed, files_fetched = cls.fetch_files(req_id, hostname, uri)
            # if a file couldn't be gathered, dissemination is failed and
            # instruction file deleted
            if not processed:
                LOGGER.error("Couldn't fetch files from openwis staging post for"
                             " instruction file %s."
                             " Proceeding to next instruction file.",
                             file_to_process)
                # check if database status is at failed. If yes, instruction file is deleted.
                # if not, it sent back to A repertory to be processed again
                if Database.get_request_status(full_id) == REQ_STATUS.failed:
                    Tools.remove_file(file_to_process, "instruction", LOGGER)
                else:
                    shutil.move(file_to_process, cls.dir_a)
            else:
                msg = "Instruction file %s processed" % file_to_process
                LOGGER.info(msg)
                Database.update_field_by_query("message", msg,
                                **dict(fullrequestId=full_id))

        return processed, info_file, files_fetched
Example #5
0
    def process(cls, max_loops=0):
        cls.nb_workers = SettingsManager.get("sendFTPlimitConn")
        # in debug mode, it is possible to set
        pool_method = cls.get_pool_method()
        cls.pool = pool_method(processes=cls.nb_workers)
        counter = 0
        cls.setup_process()
        while cls._running:
            counter += 1

            cls.signal_loop(counter)
            cls.load_settings()
            cls.update_workers()
            # idle time
            idle_time = SettingsManager.get("sendFTPIdle")
            sleep(idle_time)

            # get settings
            cls.dir_c = dir_c = HarnessTree.get("temp_dissRequest_C")
            cls.dir_d = dir_d = HarnessTree.get("temp_dissRequest_D")
            # move back any remaining file from D to C
            cls.move_back_files()

            # get files in C
            max_files = cls.nb_workers
            list_files_c = cls.get_file_list(dir_c, max_files)
            files_to_ftp = cls.move_files(list_files_c, dir_d)

            for file_ in files_to_ftp:

                file_expired = cls.check_file_age(file_)
                if file_expired:
                    # TODO we need to find a way to update the info to the database
                    # would require looking at the file compressed though
                    Tools.remove_file(file_, "difmet archive", LOGGER)
                    continue
                size = os.stat(file_).st_size

                timeout = cls.compute_timeout(size, file_)

                # start download
                # renaming file to prevent any operation on it.
                cls.lock_file(file_)
                res = cls.pool.apply_async(
                    cls.abortable_ftp, (cls.upload_file, file_, dir_c, dir_d),
                    dict(timeout=timeout))

            # for testing and debugging purpose only
            cls.check_end_loop(counter, max_loops)
Example #6
0
    def commit_failure(self, database, diff_id):

        diffusion = Diffusion(diff_externalid=diff_id,
                              fullrequestId=self.req_id,
                              requestStatus=REQ_STATUS.failed,
                              Date=self._to_datetime(self.date_reception),
                              rxnotif=True)

        with Database.get_app().app_context():
            database.session.add(diffusion)
            database.session.commit()

        LOGGER.info("Committed %s dissemination status into database.",
                    REQ_STATUS.failed)

        if os.path.isfile(self.request_file):
            Tools.remove_file(self.request_file, "JSON request", LOGGER)

        return REQ_STATUS.failed
Example #7
0
    def package_data(all_files_fetched, diss_instructions):
        # process files fetched
        for file_path in all_files_fetched:

            filename = os.path.basename(file_path)
            request_id_list = Database.get_id_list_by_filename(filename)

            # no reference => file is an orphan
            if request_id_list == []:
                Tools.remove_file(file_path, "orphan file", LOGGER)
                continue

            # purge requestId_list or req_id that are not in
            # diss_instructions keys. That is to prevent trying to find
            # an instruction file related to a file that has been processed
            # by a previous request
            request_id_list = [item for item in request_id_list
                                if item in diss_instructions.keys()]

            LOGGER.info("Processing downloaded file %s linked to "
                        "requests %s", file_path, request_id_list)

            # instantiate a DiffMetManager object that connects the file
            # to its single or multiple requests and instructioons
            diff_manager = DiffMetManager(request_id_list,
                                          file_path,
                                          diss_instructions)
            # rename files according to regex
            renaming_ok = diff_manager.rename()
            # package the archive
            if renaming_ok:
                diff_manager.compile_archive()
            else:
                msg = ("Dissemination failed for requests %s because user settings "
                       "regex resulted in incorrect filename for difmet" % request_id_list)
                LOGGER.error(msg)
                for req_id in request_id_list:
                    Database.update_field_by_query("requestStatus", REQ_STATUS.failed,
                                                **dict(fullrequestId=req_id))
                    Database.update_field_by_query("message", msg,
                                                **dict(fullrequestId=req_id))
Example #8
0
    def abortable_ftp(cls, func, *args, **kwargs):

        try:
            timeout = kwargs.get('timeout', None)
            # size in Mbytes
            # get file name + ".lock" extension
            original_file, dir_c, dir_d = args
            file_ = original_file + ".lock"
            size = os.stat(file_).st_size / (1 << 20)
            connection_ok, ftp = cls.connect_ftp()
            if connection_ok:
                proc = ThreadPool(1)
                res = proc.apply_async(func, args=(
                    original_file,
                    ftp,
                ))
                try:
                    # Wait timeout seconds for func to complete.
                    upload_ok, duration = res.get(timeout)
                    file_ = cls.unlock_file(file_)
                    if not upload_ok:
                        shutil.move(file_, dir_c)
                        LOGGER.debug(
                            "Moved file back from repertory %s to repertory %s",
                            dir_d, dir_c)
                    else:
                        LOGGER.info(
                            "File %s of size %f Mo sent to Diffmet in %f s",
                            file_, size, duration)
                        Tools.remove_file(file_, "difmet archive", LOGGER)
                    ftp.quit()
                except multiprocessing.TimeoutError:
                    ftp.close()
                    proc.terminate()

                    LOGGER.error(
                        "Timeout of %f s exceeded for sending file %s"
                        " on difmet. Checking upload.", timeout, original_file)
                    _, ftp = cls.connect_ftp()
                    upload_ok = cls.check_transfer(basename(original_file),
                                                   ftp)
                    if upload_ok:
                        LOGGER.warning(
                            "Process hit the timeout but "
                            "file %s of size %f Mo was still sent to Diffmet",
                            file_, size)
                        Tools.remove_file(file_, "difmet archive", LOGGER)
                    else:
                        file_ = cls.unlock_file(file_)
                        LOGGER.error("FTP upload of %s s failed.", file_)
                        # move the file back from D to C
                        shutil.move(file_, dir_c)
                        LOGGER.debug(
                            "Moved file back from repertory %s to repertory %s",
                            dir_d, dir_c)
                except Exception as exc:
                    file_ = cls.unlock_file(file_)
                    trace = ''.join(
                        traceback.format_exception(type(exc), exc,
                                                   exc.__traceback__))
                    LOGGER.error(
                        "Error when uploading file %s with "
                        "trace :\n %s", file_, trace)
                    ftp.quit()
            else:
                file_ = cls.unlock_file(file_)
                LOGGER.error("Couldn't connect to FTP for uploading file %s ",
                             file_)
                # move the file back from D to C
                shutil.move(file_, dir_c)
                LOGGER.debug(
                    "Moved file back from repertory %s to repertory %s", dir_d,
                    dir_c)

            proc.terminate()
        except Exception as exc:
            trace = ''.join(
                traceback.format_exception(type(exc), exc, exc.__traceback__))
            LOGGER.error("Error when uploading file %s with "
                         "trace :\n %s", file_, trace)