Ejemplo n.º 1
0
def test_date_parsing_microseconds(client_fixture: fixture) -> None:
    _, t_id = create_demo_task()

    task = Task.query.filter_by(id=t_id).first()

    my_date_string = DateParsing(task, None, "%f").string_to_date()

    assert len(my_date_string) == 6

    my_date_string = DateParsing(task, None, "%f-1").string_to_date()

    assert len(my_date_string) == 6

    my_date_string = DateParsing(task, None, "%f+1").string_to_date()

    assert len(my_date_string) == 6
Ejemplo n.º 2
0
    def __get_query(self) -> str:

        if self.task.source_query_type_id == 3:  # url
            query = self.source_loader.web_url(self.task.source_url)

        elif self.task.source_query_type_id == 1:  # gitlab url
            query = self.source_loader.gitlab(self.task.source_git)

        elif self.task.source_query_type_id == 4:  # code
            query = self.source_loader.source()

        elif self.task.source_query_type_id == 2:  # smb
            file_name = self.param_loader.insert_file_params(
                self.task.source_query_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()
            query = self.source_loader.source(
                Path(
                    Smb(
                        task=self.task,
                        run_id=self.run_id,
                        directory=self.temp_path,
                        connection=self.task.query_source,
                    ).read(file_name)[0].name).read_text("utf8"))

        return query
Ejemplo n.º 3
0
def test_date_parsing_firstday_zero(client_fixture: fixture) -> None:
    _, t_id = create_demo_task()

    task = Task.query.filter_by(id=t_id).first()

    my_date_string = DateParsing(task, None, "firstday0").string_to_date()

    assert my_date_string == "01"
Ejemplo n.º 4
0
def test_date_parsing(client_fixture: fixture) -> None:
    _, t_id = create_demo_task()

    task = Task.query.filter_by(id=t_id).first()

    my_date_string = DateParsing(task, None, "%d").string_to_date()

    assert my_date_string == datetime.datetime.now().strftime("%d")
Ejemplo n.º 5
0
def test_complex_patterns(client_fixture: fixture) -> None:
    _, t_id = create_demo_task()
    task = Task.query.filter_by(id=t_id).first()

    my_date_string = DateParsing(task, None, "%m-6-%d-30-%Y+1-lastday").string_to_date()

    new_date = datetime.datetime.now() + relativedelta.relativedelta(
        months=-6, days=-30, years=1
    )

    last_day = calendar.monthrange(
        int(new_date.strftime("%Y")),
        int(new_date.strftime("%m")),
    )[1]
    assert my_date_string == new_date.strftime("%m-%d-%Y-") + str(last_day)

    my_date_string = DateParsing(
        task, None, "%m-6-%d-30-%Y+1-lastday_something_cool_%m+6-%d+30-%Y-1-lastday"
    ).string_to_date()

    new_date = datetime.datetime.now() + relativedelta.relativedelta(
        months=-6, days=-30, years=1
    )

    last_day = calendar.monthrange(
        int(new_date.strftime("%Y")), int(new_date.strftime("%m"))
    )[1]

    second_new_date = datetime.datetime.now() + relativedelta.relativedelta(
        months=6, days=30, years=-1
    )

    second_last_day = calendar.monthrange(
        int(second_new_date.strftime("%Y")), int(second_new_date.strftime("%m"))
    )[1]

    assert my_date_string == new_date.strftime("%m-%d-%Y-") + str(
        last_day
    ) + "_something_cool_" + second_new_date.strftime("%m-%d-%Y-") + str(
        second_last_day
    )
Ejemplo n.º 6
0
def test_date_parsing_lastday(client_fixture: fixture) -> None:
    _, t_id = create_demo_task()

    task = Task.query.filter_by(id=t_id).first()

    my_date_string = DateParsing(task, None, "lastday").string_to_date()

    last_day = calendar.monthrange(
        int(datetime.datetime.now().strftime("%Y")),
        int(datetime.datetime.now().strftime("%m")),
    )[1]
    assert my_date_string == str(last_day)
Ejemplo n.º 7
0
def test_date_parsing_minutes(client_fixture: fixture) -> None:
    _, t_id = create_demo_task()

    task = Task.query.filter_by(id=t_id).first()

    my_date_string = DateParsing(task, None, "%M").string_to_date()
    assert my_date_string == datetime.datetime.now().strftime("%M")

    my_date_string = DateParsing(task, None, "%M-1").string_to_date()
    assert my_date_string == (
        datetime.datetime.now() + relativedelta.relativedelta(minutes=-1)
    ).strftime("%M")

    my_date_string = DateParsing(task, None, "%M-100").string_to_date()
    assert my_date_string == (
        datetime.datetime.now() + relativedelta.relativedelta(minutes=-100)
    ).strftime("%M")

    my_date_string = DateParsing(task, None, "%M+10").string_to_date()
    assert my_date_string == (
        datetime.datetime.now() + relativedelta.relativedelta(minutes=10)
    ).strftime("%M")
Ejemplo n.º 8
0
def filename_preview(task_id: int) -> str:
    """Generate filename preview."""
    try:
        task = Task.query.filter_by(id=task_id).first()
        param_loader = ParamLoader(task, None)

        # insert params
        file_name = param_loader.insert_file_params(task.destination_file_name)

        # parse python dates
        file_name = DateParsing(task, None, file_name).string_to_date()

        if task.file_type and task.file_type.id != 4:
            file_name = f"{file_name}.{task.file_type.ext}"

        return f'<span class="tag is-success is-light">ex: {file_name}</span>'
    except BaseException as e:
        return f'<span class="has-tooltip-arrow has-tooltip-right has-tooltip-multiline tag is-danger is-light" data-tooltip="{e}">No preview.</span>'
Ejemplo n.º 9
0
    def __process(self) -> None:

        RunnerLog(self.task, self.run_id, 8, "Starting processing script...")
        # get processing script

        # 1 = smb
        # 2 = sftp
        # 3 = ftp
        # 4 = git url
        # 5 = other url
        # 6 = source code

        processing_script_name = self.temp_path / (self.run_id + ".py")

        my_file = ""
        if (self.task.processing_type_id == 1
                and self.task.processing_smb_id is not None):
            file_name = self.param_loader.insert_file_params(
                self.task.source_smb_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            my_file = Path(
                Smb(
                    task=self.task,
                    run_id=self.run_id,
                    directory=self.temp_path,
                    connection=self.task.processing_smb_conn,
                ).read(file_name)[0].name).read_text("utf8")

        elif (self.task.processing_type_id == 2
              and self.task.processing_sftp_id is not None):
            file_name = self.param_loader.insert_file_params(
                self.task.processing_sftp_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            my_file = Path(
                Sftp(
                    task=self.task,
                    run_id=self.run_id,
                    connection=self.task.processing_sftp_conn,
                    directory=self.temp_path,
                ).read(file_name=file_name)[0].name).read_text("utf8")

        elif (self.task.processing_type_id == 3
              and self.task.processing_ftp_id is not None):
            file_name = self.param_loader.insert_file_params(
                self.task.processing_ftp_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            my_file = Path(
                Ftp(
                    task=self.task,
                    run_id=self.run_id,
                    connection=self.task.source_ftp_conn,
                    directory=self.temp_path,
                ).read(file_name=file_name)[0].name).read_text("utf8")

        elif self.task.processing_type_id == 4 and self.task.processing_git is not None:

            # if a dir is specified then download all files
            if (self.task.processing_command is not None
                    and self.task.processing_command != ""):
                try:
                    url = (re.sub(
                        r"(https?://)(.+?)",
                        r"\1<username>:<password>@\2",
                        self.task.processing_git,
                        flags=re.IGNORECASE,
                    ).replace("<username>",
                              urllib.parse.quote(
                                  app.config["GIT_USERNAME"])).replace(
                                      "<password>",
                                      urllib.parse.quote(
                                          app.config["GIT_PASSWORD"])))

                    cmd = (
                        "$(which git) clone -q --depth 1 " +
                        '--recurse-submodules --shallow-submodules %s "%s"' %
                        (url, str(self.temp_path)))

                    Cmd(
                        self.task,
                        self.run_id,
                        cmd,
                        "Repo cloned.",
                        "Failed to clone repo: %s" %
                        (self.task.processing_git, ),
                    ).shell()

                # pylint: disable=broad-except
                except BaseException:
                    raise RunnerException(self.task, self.run_id, 8,
                                          "Processor failed to clone repo.")

            # otherwise get py file
            else:
                my_file = self.source_loader.gitlab(self.task.processing_git)

        elif self.task.processing_type_id == 5 and self.task.processing_url is not None:
            if self.task.processing_command is not None:
                try:

                    cmd = (
                        "$(which git) clone -q --depth 1 " +
                        '--recurse-submodules --shallow-submodules %s "%s"' %
                        (self.task.processing_url, str(self.temp_path)))

                    Cmd(
                        task=self.task,
                        run_id=self.run_id,
                        cmd=cmd,
                        success_msg="Repo cloned",
                        error_msg="Failed to clone repo: %s" %
                        (self.task.processing_url, ),
                    ).shell()

                    processing_script_name = str(self.temp_path) + (
                        self.task.processing_command
                        if self.task.processing_command is not None else "")
                # pylint: disable=broad-except
                except BaseException:
                    raise RunnerException(self.task, self.run_id, 8,
                                          "Processor failed to clone repo.")
            else:
                my_file = self.source_loader.web_url(self.task.processing_url)

        elif (self.task.processing_type_id == 6
              and self.task.processing_code is not None):
            my_file = self.task.processing_code
        elif self.task.processing_type_id > 0:
            raise RunnerException(
                self.task,
                self.run_id,
                8,
                "Processing error, Not enough information to run a processing script from.",
            )

        try:
            if my_file != "" and self.task.processing_type_id > 0:
                Path(processing_script_name).parent.mkdir(parents=True,
                                                          exist_ok=True)
                with open(processing_script_name, "w") as text_file:
                    text_file.write(my_file)
                RunnerLog(self.task, self.run_id, 8,
                          "Processing script created.")

        # pylint: disable=broad-except
        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 8,
                                  f"Processing script failure:\n{e}")

        # run processing script
        output = PyProcesser(
            task=self.task,
            run_id=self.run_id,
            directory=self.temp_path,
            source_files=self.source_files,
            script=self.task.processing_command or processing_script_name.name,
        ).run()

        # # allow processer to rename file
        if output:
            RunnerLog(self.task, self.run_id, 8,
                      f"Processing script output:\n{output}")
            self.data_files = output
Ejemplo n.º 10
0
    def __get_source(self) -> None:

        if self.task.source_type_id == 1:  # sql

            external_db = self.task.source_database_conn
            try:
                RunnerLog(self.task, self.run_id, 8, "Loading query...")
                query = self.__get_query()
            except BaseException as e:
                raise RunnerException(self.task, self.run_id, 8,
                                      f"Failed to load query.\n{e}")

            RunnerLog(self.task, self.run_id, 8,
                      "Starting query run, waiting for results...")

            if external_db.database_type.id == 1:  # postgres
                try:
                    self.query_output_size, self.source_files = Postgres(
                        task=self.task,
                        run_id=self.run_id,
                        connection=em_decrypt(external_db.connection_string,
                                              app.config["PASS_KEY"]),
                        timeout=external_db.timeout
                        or app.config["DEFAULT_SQL_TIMEOUT"],
                        directory=self.temp_path,
                    ).run(query)

                except ValueError as message:
                    raise RunnerException(self.task, self.run_id, 21, message)

                except BaseException as message:
                    raise RunnerException(self.task, self.run_id, 21,
                                          f"Failed to run query.\n{message}")

            elif external_db.database_type.id == 2:  # mssql
                try:
                    self.query_output_size, self.source_files = SqlServer(
                        task=self.task,
                        run_id=self.run_id,
                        connection=em_decrypt(external_db.connection_string,
                                              app.config["PASS_KEY"]),
                        timeout=external_db.timeout
                        or app.config["DEFAULT_SQL_TIMEOUT"],
                        directory=self.temp_path,
                    ).run(query)

                except ValueError as message:
                    raise RunnerException(self.task, self.run_id, 20, message)

                except BaseException as message:
                    raise RunnerException(self.task, self.run_id, 20,
                                          f"Failed to run query.\n{message}")

            RunnerLog(
                self.task,
                self.run_id,
                8,
                f"Query completed.\nData file {self.source_files[0].name} created. Data size: {file_size(str(Path(self.source_files[0].name).stat().st_size))}.",
            )

        elif self.task.source_type_id == 2:  # smb file
            file_name = self.param_loader.insert_file_params(
                self.task.source_smb_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            self.source_files = Smb(
                task=self.task,
                run_id=self.run_id,
                connection=self.task.source_smb_conn,
                directory=self.temp_path,
            ).read(file_name=file_name)

        elif self.task.source_type_id == 3:  # sftp file
            RunnerLog(self.task, self.run_id, 9, "Loading data from server...")
            file_name = self.param_loader.insert_file_params(
                self.task.source_sftp_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            self.source_files = Sftp(
                task=self.task,
                run_id=self.run_id,
                connection=self.task.source_sftp_conn,
                directory=self.temp_path,
            ).read(file_name=file_name)

        elif self.task.source_type_id == 4:  # ftp file
            RunnerLog(self.task, self.run_id, 13,
                      "Loading data from server...")
            file_name = self.param_loader.insert_file_params(
                self.task.source_ftp_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            self.source_files = Ftp(
                task=self.task,
                run_id=self.run_id,
                connection=self.task.source_ftp_conn,
                directory=self.temp_path,
            ).read(file_name=file_name)

        elif self.task.source_type_id == 6:  # ssh command
            query = self.__get_query()

            Ssh(
                task=self.task,
                run_id=self.run_id,
                connection=self.task.source_ssh_conn,
                command=query,
            ).run()
Ejemplo n.º 11
0
    def save(self) -> Tuple[str, str, str]:
        """Create and save the file.

        returns [filename, filepath] of final file.
        """
        if (self.task.destination_file_name is None
                or self.task.destination_file_name == ""):
            RunnerLog(
                self.task,
                self.run_id,
                11,
                f"No filename specified, {Path(self.data_file.name).name} will be used.",
            )

        if (self.task.destination_file_name != ""
                and self.task.destination_file_name is not None):

            # insert params
            self.file_name = self.params.insert_file_params(
                self.task.destination_file_name.strip())

            # parse python dates
            self.file_name = DateParsing(self.task, self.run_id,
                                         self.file_name).string_to_date()

        else:
            self.file_name = Path(self.data_file.name).name

        # 4 is other
        if self.task.destination_file_type_id != 4 and self.task.file_type is not None:
            self.file_name += "." + (self.task.file_type.ext or "csv")

        self.file_path = str(Path(self.base_path).joinpath(self.file_name))

        # if the source name matches the destination name, rename the source and update tmp file name.
        if self.data_file.name == self.file_path:
            data_file_as_path = Path(self.data_file.name)
            new_data_file_name = str(
                data_file_as_path.parent /
                (data_file_as_path.stem + "_tmp" + data_file_as_path.suffix))
            os.rename(self.data_file.name, new_data_file_name)
            self.data_file.name = new_data_file_name  # type: ignore[misc]

        with open(self.data_file.name, "r", newline="") as data_file:
            reader = csv.reader(data_file)

            with open(self.file_path, mode="w") as myfile:
                # if csv (1) or text (2) and had delimiter

                if (self.task.destination_file_type_id == 1
                        or self.task.destination_file_type_id == 2
                        or self.task.destination_file_type_id == 4) and (
                            self.task.destination_ignore_delimiter is None
                            or self.task.destination_ignore_delimiter != 1):
                    wrtr = (
                        csv.writer(
                            myfile,
                            delimiter=str(self.task.destination_file_delimiter)
                            .encode("utf-8").decode("unicode_escape"),
                            quoting=self.__quote_level(),
                        ) if self.task.destination_file_delimiter is not None
                        and len(self.task.destination_file_delimiter) > 0 and
                        (self.task.destination_file_type_id == 2
                         or self.task.destination_file_type_id == 4
                         )  # txt or other
                        else csv.writer(
                            myfile,
                            quoting=self.__quote_level(),
                        ))
                    for row in reader:
                        new_row = [(x.strip('"').strip("'") if isinstance(
                            x, str) else x) for x in row]

                        if (self.task.destination_file_type_id == 1
                                or self.task.destination_file_type_id == 2
                                or self.task.destination_file_type_id == 4
                            ) and (self.task.destination_file_line_terminator
                                   is not None and
                                   self.task.destination_file_line_terminator
                                   != ""):
                            new_row.append(
                                self.task.destination_file_line_terminator)

                        wrtr.writerow(new_row)

                # if xlxs (3)
                elif self.task.destination_file_type_id == 3:
                    wrtr = csv.writer(
                        myfile,
                        dialect="excel",
                        quoting=self.__quote_level(),
                    )
                    for row in reader:
                        new_row = [(x.strip('"').strip("'") if isinstance(
                            x, str) else x) for x in row]
                        wrtr.writerow(new_row)

                else:
                    for line in data_file:
                        myfile.write(line)

        RunnerLog(
            self.task,
            self.run_id,
            11,
            f"File {self.file_name} created. Size: {file_size(Path(self.file_path).stat().st_size)}.\n{self.file_path}",
        )

        # encrypt file
        if self.task.file_gpg == 1:
            gpg = gnupg.GPG("/usr/local/bin/gpg")

            # import the key
            keychain = gpg.import_keys(
                em_decrypt(self.task.file_gpg_conn.key,
                           app.config["PASS_KEY"]))

            # set it to trusted
            gpg.trust_keys(keychain.fingerprints, "TRUST_ULTIMATE")

            # encrypt file
            with open(self.file_path, "rb") as my_file:
                encrypt_status = gpg.encrypt_file(
                    file=my_file,
                    recipients=keychain.fingerprints,
                    output=self.file_path + ".gpg",
                )

            # remove key
            gpg.delete_keys(keychain.fingerprints)

            # update global file name
            if not encrypt_status.ok:
                raise RunnerException(
                    self.task,
                    self.run_id,
                    11,
                    "File failed to encrypt.\n%s\n%s\n%s" % (
                        self.file_path,
                        encrypt_status.status,
                        encrypt_status.stderr,
                    ),
                )

            self.file_path = self.file_path + ".gpg"
            self.file_name = self.file_name + ".gpg"

            RunnerLog(
                self.task,
                self.run_id,
                11,
                "File encrypted.\n%s\n%s\n%s" %
                (self.file_path, encrypt_status.status, encrypt_status.stderr),
            )

        # get file hash.. after encrypting
        with open(self.file_path, "rb") as my_file:
            while True:
                chunk = my_file.read(8192)
                if not chunk:
                    break
                self.file_hash.update(chunk)

        RunnerLog(self.task, self.run_id, 11,
                  f"File md5 hash: {self.file_hash.hexdigest()}")

        # create zip
        if self.task.destination_create_zip == 1:

            self.zip_name = DateParsing(
                self.task, self.run_id,
                str(self.task.destination_zip_name)).string_to_date()

            # parse params
            self.zip_name = self.params.insert_file_params(self.zip_name)

            self.zip_name = self.zip_name.replace(".zip", "") + ".zip"

            with zipfile.ZipFile(
                    str(Path(self.base_path).joinpath(self.zip_name)),
                    "w") as zip_file:
                zip_file.write(
                    self.file_path,
                    compress_type=zipfile.ZIP_DEFLATED,
                    arcname=self.file_name,
                )

            # now we change all file stuff to our zip.

            self.file_name = self.zip_name
            self.file_path = str(Path(self.base_path).joinpath(self.zip_name))

            RunnerLog(self.task, self.run_id, 11,
                      f"ZIP archive created.\n{self.file_path}")

        return self.file_name, self.file_path, self.file_hash.hexdigest()
 def insert_date(match: re.Match) -> str:
     """Parse py dates."""
     return DateParsing(self.task, self.run_id,
                        match.group(1)).string_to_date()