Esempio n. 1
0
    def __get_query(self) -> str:

        if self.task.source_query_type_id == 3:  # url
            query = self.source_loader.web_url(self.task.source_url)

        elif self.task.source_query_type_id == 1:  # gitlab url
            query = self.source_loader.gitlab(self.task.source_git)

        elif self.task.source_query_type_id == 4:  # code
            query = self.source_loader.source()

        elif self.task.source_query_type_id == 2:  # smb
            file_name = self.param_loader.insert_file_params(
                self.task.source_query_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()
            query = self.source_loader.source(
                Path(
                    Smb(
                        task=self.task,
                        run_id=self.run_id,
                        directory=self.temp_path,
                        connection=self.task.query_source,
                    ).read(file_name)[0].name).read_text("utf8"))

        return query
Esempio n. 2
0
def get_task_file_download(file_id: int) -> dict:
    """Download file from SMB backup server."""
    my_file = TaskFile.query.filter_by(id=file_id).first()
    task = my_file.task

    temp_path = Path(
        Path(__file__).parent.parent
        / "temp"
        / sanitize_filename(task.project.name)
        / sanitize_filename(task.name)
        / my_file.job_id
    )

    temp_path.mkdir(parents=True, exist_ok=True)

    temp_file = (
        Smb(
            task=task,
            run_id=my_file.job_id,
            connection=None,  # "default",
            directory=temp_path,
        )
        .read(my_file.path)[0]
        .name
    )

    return jsonify({"message": temp_file})
Esempio n. 3
0
def send_smb(run_id: int, file_id: int) -> dict:
    """Send file to SMB server specified in the task.

    File is loaded from the backup SMB file server into a tempfile.
    The tempfile is deposited into the SMB location.
    """
    try:
        my_file = TaskFile.query.filter_by(id=file_id).first()
        task = my_file.task

        temp_path = Path(
            Path(__file__).parent.parent
            / "temp"
            / sanitize_filename(task.project.name)
            / sanitize_filename(task.name)
            / my_file.job_id
        )

        temp_path.mkdir(parents=True, exist_ok=True)

        # download the file
        Smb(
            task=task,
            run_id=my_file.job_id,
            connection=None,  # "default",
            directory=temp_path,
        ).read(my_file.path)

        # upload the file
        Smb(
            task=task,
            run_id=str(run_id),
            connection=task.destination_smb_conn,
            directory=temp_path,
        ).save(
            overwrite=1,
            file_name=my_file.name,
        )

        return jsonify({"message": "successfully sent file."})

    # pylint: disable=broad-except
    except BaseException as e:

        return jsonify({"error": str(e)})
Esempio n. 4
0
def send_email(run_id: int, file_id: int) -> dict:
    """Send file to email address specified in the task.

    File is loaded from the backup SMB file server into a tempfile.
    The tempfile is sent as an email attachment.
    """
    try:
        my_file = TaskFile.query.filter_by(id=file_id).first()
        task = my_file.task

        temp_path = Path(
            Path(__file__).parent.parent
            / "temp"
            / sanitize_filename(task.project.name)
            / sanitize_filename(task.name)
            / my_file.job_id
        )

        temp_path.mkdir(parents=True, exist_ok=True)

        # download the file
        downloaded_files = Smb(
            task=task,
            run_id=my_file.job_id,
            connection=None,  # "default",
            directory=temp_path,
        ).read(my_file.path)

        # send the file

        date = str(datetime.datetime.now())

        template = env.get_template("email/email.html.j2")

        Smtp(
            task=task,
            run_id=str(run_id),
            recipients=task.email_completion_recipients,
            short_message=f"Atlas Hub: {task.name} data emailed.",
            subject="(Manual Send) Project: "
            + task.project.name
            + " / Task: "
            + task.name,
            message=template.render(
                task=task,
                success=1,
                date=date,
                logs=[],
            ),
            attachments=[x.name for x in downloaded_files],
        )

        return jsonify({"message": "successfully sent file."})

    # pylint: disable=broad-except
    except BaseException as e:
        return jsonify({"error": str(e)})
Esempio n. 5
0
    def __store_files(self) -> None:
        if not self.source_files or len(self.source_files) == 0:
            return

        RunnerLog(
            self.task,
            self.run_id,
            8,
            "Storing output file%s..." %
            ("s" if len(self.source_files) != 1 else ""),
        )

        for file_counter, this_file in enumerate(self.source_files, 1):

            this_file_size = (self.query_output_size
                              if self.query_output_size is not None else Path(
                                  this_file.name).stat().st_size)

            # get file name. if no name specified in task setting, then use temp name.
            try:
                file_name, file_path, file_hash = File(
                    task=self.task,
                    run_id=self.run_id,
                    data_file=this_file,
                    params=self.param_loader,
                ).save()

            except BaseException as e:
                raise RunnerException(self.task, self.run_id, 11,
                                      f"Failed to create data file.\n{e}")

            self.output_files.append(file_path)

            if len(self.source_files) > 1:
                RunnerLog(
                    self.task,
                    self.run_id,
                    8,
                    f"Storing file {file_counter} of {len(self.source_files)}...",
                )
            # store
            # send to sftp
            if self.task.destination_sftp == 1 and self.task.destination_sftp_conn:
                if (self.task.destination_sftp_dont_send_empty_file == 1
                        and this_file_size == 0):
                    RunnerLog(
                        self.task,
                        self.run_id,
                        8,
                        "Skipping SFTP, file is empty.",
                    )
                else:
                    Sftp(
                        task=self.task,
                        run_id=self.run_id,
                        connection=self.task.destination_sftp_conn,
                        directory=self.temp_path,
                    ).save(
                        overwrite=self.task.destination_sftp_overwrite,
                        file_name=file_name,
                    )

            # send to ftp
            if self.task.destination_ftp == 1 and self.task.destination_ftp_conn:
                if (self.task.destination_ftp_dont_send_empty_file == 1
                        and this_file_size == 0):
                    RunnerLog(
                        self.task,
                        self.run_id,
                        8,
                        "Skipping FTP, file is empty.",
                    )
                else:
                    Ftp(
                        task=self.task,
                        run_id=self.run_id,
                        connection=self.task.destination_ftp_conn,
                        directory=self.temp_path,
                    ).save(
                        overwrite=self.task.destination_ftp_overwrite,
                        file_name=file_name,
                    )

            # save to smb
            if self.task.destination_smb == 1 and self.task.destination_smb_conn:
                if (self.task.destination_smb_dont_send_empty_file == 1
                        and this_file_size == 0):
                    RunnerLog(
                        self.task,
                        self.run_id,
                        8,
                        "Skipping SMB, file is empty.",
                    )
                else:
                    Smb(
                        task=self.task,
                        run_id=self.run_id,
                        connection=self.task.destination_smb_conn,
                        directory=self.temp_path,
                    ).save(
                        overwrite=self.task.destination_smb_overwrite,
                        file_name=file_name,
                    )

            # save historical copy
            smb_path = Smb(
                task=self.task,
                run_id=self.run_id,
                connection=None,  # "default",
                directory=self.temp_path,
            ).save(overwrite=1, file_name=file_name)

            # log file details
            db.session.add(
                TaskFile(
                    name=file_name,
                    path=smb_path,
                    task_id=self.task.id,
                    job_id=self.run_id,
                    file_hash=file_hash,
                    size=file_size(str(os.path.getsize(file_path))),
                ))
            db.session.commit()
Esempio n. 6
0
    def __process(self) -> None:

        RunnerLog(self.task, self.run_id, 8, "Starting processing script...")
        # get processing script

        # 1 = smb
        # 2 = sftp
        # 3 = ftp
        # 4 = git url
        # 5 = other url
        # 6 = source code

        processing_script_name = self.temp_path / (self.run_id + ".py")

        my_file = ""
        if (self.task.processing_type_id == 1
                and self.task.processing_smb_id is not None):
            file_name = self.param_loader.insert_file_params(
                self.task.source_smb_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            my_file = Path(
                Smb(
                    task=self.task,
                    run_id=self.run_id,
                    directory=self.temp_path,
                    connection=self.task.processing_smb_conn,
                ).read(file_name)[0].name).read_text("utf8")

        elif (self.task.processing_type_id == 2
              and self.task.processing_sftp_id is not None):
            file_name = self.param_loader.insert_file_params(
                self.task.processing_sftp_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            my_file = Path(
                Sftp(
                    task=self.task,
                    run_id=self.run_id,
                    connection=self.task.processing_sftp_conn,
                    directory=self.temp_path,
                ).read(file_name=file_name)[0].name).read_text("utf8")

        elif (self.task.processing_type_id == 3
              and self.task.processing_ftp_id is not None):
            file_name = self.param_loader.insert_file_params(
                self.task.processing_ftp_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            my_file = Path(
                Ftp(
                    task=self.task,
                    run_id=self.run_id,
                    connection=self.task.source_ftp_conn,
                    directory=self.temp_path,
                ).read(file_name=file_name)[0].name).read_text("utf8")

        elif self.task.processing_type_id == 4 and self.task.processing_git is not None:

            # if a dir is specified then download all files
            if (self.task.processing_command is not None
                    and self.task.processing_command != ""):
                try:
                    url = (re.sub(
                        r"(https?://)(.+?)",
                        r"\1<username>:<password>@\2",
                        self.task.processing_git,
                        flags=re.IGNORECASE,
                    ).replace("<username>",
                              urllib.parse.quote(
                                  app.config["GIT_USERNAME"])).replace(
                                      "<password>",
                                      urllib.parse.quote(
                                          app.config["GIT_PASSWORD"])))

                    cmd = (
                        "$(which git) clone -q --depth 1 " +
                        '--recurse-submodules --shallow-submodules %s "%s"' %
                        (url, str(self.temp_path)))

                    Cmd(
                        self.task,
                        self.run_id,
                        cmd,
                        "Repo cloned.",
                        "Failed to clone repo: %s" %
                        (self.task.processing_git, ),
                    ).shell()

                # pylint: disable=broad-except
                except BaseException:
                    raise RunnerException(self.task, self.run_id, 8,
                                          "Processor failed to clone repo.")

            # otherwise get py file
            else:
                my_file = self.source_loader.gitlab(self.task.processing_git)

        elif self.task.processing_type_id == 5 and self.task.processing_url is not None:
            if self.task.processing_command is not None:
                try:

                    cmd = (
                        "$(which git) clone -q --depth 1 " +
                        '--recurse-submodules --shallow-submodules %s "%s"' %
                        (self.task.processing_url, str(self.temp_path)))

                    Cmd(
                        task=self.task,
                        run_id=self.run_id,
                        cmd=cmd,
                        success_msg="Repo cloned",
                        error_msg="Failed to clone repo: %s" %
                        (self.task.processing_url, ),
                    ).shell()

                    processing_script_name = str(self.temp_path) + (
                        self.task.processing_command
                        if self.task.processing_command is not None else "")
                # pylint: disable=broad-except
                except BaseException:
                    raise RunnerException(self.task, self.run_id, 8,
                                          "Processor failed to clone repo.")
            else:
                my_file = self.source_loader.web_url(self.task.processing_url)

        elif (self.task.processing_type_id == 6
              and self.task.processing_code is not None):
            my_file = self.task.processing_code
        elif self.task.processing_type_id > 0:
            raise RunnerException(
                self.task,
                self.run_id,
                8,
                "Processing error, Not enough information to run a processing script from.",
            )

        try:
            if my_file != "" and self.task.processing_type_id > 0:
                Path(processing_script_name).parent.mkdir(parents=True,
                                                          exist_ok=True)
                with open(processing_script_name, "w") as text_file:
                    text_file.write(my_file)
                RunnerLog(self.task, self.run_id, 8,
                          "Processing script created.")

        # pylint: disable=broad-except
        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 8,
                                  f"Processing script failure:\n{e}")

        # run processing script
        output = PyProcesser(
            task=self.task,
            run_id=self.run_id,
            directory=self.temp_path,
            source_files=self.source_files,
            script=self.task.processing_command or processing_script_name.name,
        ).run()

        # # allow processer to rename file
        if output:
            RunnerLog(self.task, self.run_id, 8,
                      f"Processing script output:\n{output}")
            self.data_files = output
Esempio n. 7
0
    def __get_source(self) -> None:

        if self.task.source_type_id == 1:  # sql

            external_db = self.task.source_database_conn
            try:
                RunnerLog(self.task, self.run_id, 8, "Loading query...")
                query = self.__get_query()
            except BaseException as e:
                raise RunnerException(self.task, self.run_id, 8,
                                      f"Failed to load query.\n{e}")

            RunnerLog(self.task, self.run_id, 8,
                      "Starting query run, waiting for results...")

            if external_db.database_type.id == 1:  # postgres
                try:
                    self.query_output_size, self.source_files = Postgres(
                        task=self.task,
                        run_id=self.run_id,
                        connection=em_decrypt(external_db.connection_string,
                                              app.config["PASS_KEY"]),
                        timeout=external_db.timeout
                        or app.config["DEFAULT_SQL_TIMEOUT"],
                        directory=self.temp_path,
                    ).run(query)

                except ValueError as message:
                    raise RunnerException(self.task, self.run_id, 21, message)

                except BaseException as message:
                    raise RunnerException(self.task, self.run_id, 21,
                                          f"Failed to run query.\n{message}")

            elif external_db.database_type.id == 2:  # mssql
                try:
                    self.query_output_size, self.source_files = SqlServer(
                        task=self.task,
                        run_id=self.run_id,
                        connection=em_decrypt(external_db.connection_string,
                                              app.config["PASS_KEY"]),
                        timeout=external_db.timeout
                        or app.config["DEFAULT_SQL_TIMEOUT"],
                        directory=self.temp_path,
                    ).run(query)

                except ValueError as message:
                    raise RunnerException(self.task, self.run_id, 20, message)

                except BaseException as message:
                    raise RunnerException(self.task, self.run_id, 20,
                                          f"Failed to run query.\n{message}")

            RunnerLog(
                self.task,
                self.run_id,
                8,
                f"Query completed.\nData file {self.source_files[0].name} created. Data size: {file_size(str(Path(self.source_files[0].name).stat().st_size))}.",
            )

        elif self.task.source_type_id == 2:  # smb file
            file_name = self.param_loader.insert_file_params(
                self.task.source_smb_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            self.source_files = Smb(
                task=self.task,
                run_id=self.run_id,
                connection=self.task.source_smb_conn,
                directory=self.temp_path,
            ).read(file_name=file_name)

        elif self.task.source_type_id == 3:  # sftp file
            RunnerLog(self.task, self.run_id, 9, "Loading data from server...")
            file_name = self.param_loader.insert_file_params(
                self.task.source_sftp_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            self.source_files = Sftp(
                task=self.task,
                run_id=self.run_id,
                connection=self.task.source_sftp_conn,
                directory=self.temp_path,
            ).read(file_name=file_name)

        elif self.task.source_type_id == 4:  # ftp file
            RunnerLog(self.task, self.run_id, 13,
                      "Loading data from server...")
            file_name = self.param_loader.insert_file_params(
                self.task.source_ftp_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            self.source_files = Ftp(
                task=self.task,
                run_id=self.run_id,
                connection=self.task.source_ftp_conn,
                directory=self.temp_path,
            ).read(file_name=file_name)

        elif self.task.source_type_id == 6:  # ssh command
            query = self.__get_query()

            Ssh(
                task=self.task,
                run_id=self.run_id,
                connection=self.task.source_ssh_conn,
                command=query,
            ).run()