def test_date_parsing_microseconds(client_fixture: fixture) -> None: _, t_id = create_demo_task() task = Task.query.filter_by(id=t_id).first() my_date_string = DateParsing(task, None, "%f").string_to_date() assert len(my_date_string) == 6 my_date_string = DateParsing(task, None, "%f-1").string_to_date() assert len(my_date_string) == 6 my_date_string = DateParsing(task, None, "%f+1").string_to_date() assert len(my_date_string) == 6
def __get_query(self) -> str: if self.task.source_query_type_id == 3: # url query = self.source_loader.web_url(self.task.source_url) elif self.task.source_query_type_id == 1: # gitlab url query = self.source_loader.gitlab(self.task.source_git) elif self.task.source_query_type_id == 4: # code query = self.source_loader.source() elif self.task.source_query_type_id == 2: # smb file_name = self.param_loader.insert_file_params( self.task.source_query_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() query = self.source_loader.source( Path( Smb( task=self.task, run_id=self.run_id, directory=self.temp_path, connection=self.task.query_source, ).read(file_name)[0].name).read_text("utf8")) return query
def test_date_parsing_firstday_zero(client_fixture: fixture) -> None: _, t_id = create_demo_task() task = Task.query.filter_by(id=t_id).first() my_date_string = DateParsing(task, None, "firstday0").string_to_date() assert my_date_string == "01"
def test_date_parsing(client_fixture: fixture) -> None: _, t_id = create_demo_task() task = Task.query.filter_by(id=t_id).first() my_date_string = DateParsing(task, None, "%d").string_to_date() assert my_date_string == datetime.datetime.now().strftime("%d")
def test_complex_patterns(client_fixture: fixture) -> None: _, t_id = create_demo_task() task = Task.query.filter_by(id=t_id).first() my_date_string = DateParsing(task, None, "%m-6-%d-30-%Y+1-lastday").string_to_date() new_date = datetime.datetime.now() + relativedelta.relativedelta( months=-6, days=-30, years=1 ) last_day = calendar.monthrange( int(new_date.strftime("%Y")), int(new_date.strftime("%m")), )[1] assert my_date_string == new_date.strftime("%m-%d-%Y-") + str(last_day) my_date_string = DateParsing( task, None, "%m-6-%d-30-%Y+1-lastday_something_cool_%m+6-%d+30-%Y-1-lastday" ).string_to_date() new_date = datetime.datetime.now() + relativedelta.relativedelta( months=-6, days=-30, years=1 ) last_day = calendar.monthrange( int(new_date.strftime("%Y")), int(new_date.strftime("%m")) )[1] second_new_date = datetime.datetime.now() + relativedelta.relativedelta( months=6, days=30, years=-1 ) second_last_day = calendar.monthrange( int(second_new_date.strftime("%Y")), int(second_new_date.strftime("%m")) )[1] assert my_date_string == new_date.strftime("%m-%d-%Y-") + str( last_day ) + "_something_cool_" + second_new_date.strftime("%m-%d-%Y-") + str( second_last_day )
def test_date_parsing_lastday(client_fixture: fixture) -> None: _, t_id = create_demo_task() task = Task.query.filter_by(id=t_id).first() my_date_string = DateParsing(task, None, "lastday").string_to_date() last_day = calendar.monthrange( int(datetime.datetime.now().strftime("%Y")), int(datetime.datetime.now().strftime("%m")), )[1] assert my_date_string == str(last_day)
def test_date_parsing_minutes(client_fixture: fixture) -> None: _, t_id = create_demo_task() task = Task.query.filter_by(id=t_id).first() my_date_string = DateParsing(task, None, "%M").string_to_date() assert my_date_string == datetime.datetime.now().strftime("%M") my_date_string = DateParsing(task, None, "%M-1").string_to_date() assert my_date_string == ( datetime.datetime.now() + relativedelta.relativedelta(minutes=-1) ).strftime("%M") my_date_string = DateParsing(task, None, "%M-100").string_to_date() assert my_date_string == ( datetime.datetime.now() + relativedelta.relativedelta(minutes=-100) ).strftime("%M") my_date_string = DateParsing(task, None, "%M+10").string_to_date() assert my_date_string == ( datetime.datetime.now() + relativedelta.relativedelta(minutes=10) ).strftime("%M")
def filename_preview(task_id: int) -> str: """Generate filename preview.""" try: task = Task.query.filter_by(id=task_id).first() param_loader = ParamLoader(task, None) # insert params file_name = param_loader.insert_file_params(task.destination_file_name) # parse python dates file_name = DateParsing(task, None, file_name).string_to_date() if task.file_type and task.file_type.id != 4: file_name = f"{file_name}.{task.file_type.ext}" return f'<span class="tag is-success is-light">ex: {file_name}</span>' except BaseException as e: return f'<span class="has-tooltip-arrow has-tooltip-right has-tooltip-multiline tag is-danger is-light" data-tooltip="{e}">No preview.</span>'
def __process(self) -> None: RunnerLog(self.task, self.run_id, 8, "Starting processing script...") # get processing script # 1 = smb # 2 = sftp # 3 = ftp # 4 = git url # 5 = other url # 6 = source code processing_script_name = self.temp_path / (self.run_id + ".py") my_file = "" if (self.task.processing_type_id == 1 and self.task.processing_smb_id is not None): file_name = self.param_loader.insert_file_params( self.task.source_smb_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() my_file = Path( Smb( task=self.task, run_id=self.run_id, directory=self.temp_path, connection=self.task.processing_smb_conn, ).read(file_name)[0].name).read_text("utf8") elif (self.task.processing_type_id == 2 and self.task.processing_sftp_id is not None): file_name = self.param_loader.insert_file_params( self.task.processing_sftp_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() my_file = Path( Sftp( task=self.task, run_id=self.run_id, connection=self.task.processing_sftp_conn, directory=self.temp_path, ).read(file_name=file_name)[0].name).read_text("utf8") elif (self.task.processing_type_id == 3 and self.task.processing_ftp_id is not None): file_name = self.param_loader.insert_file_params( self.task.processing_ftp_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() my_file = Path( Ftp( task=self.task, run_id=self.run_id, connection=self.task.source_ftp_conn, directory=self.temp_path, ).read(file_name=file_name)[0].name).read_text("utf8") elif self.task.processing_type_id == 4 and self.task.processing_git is not None: # if a dir is specified then download all files if (self.task.processing_command is not None and self.task.processing_command != ""): try: url = (re.sub( r"(https?://)(.+?)", r"\1<username>:<password>@\2", self.task.processing_git, flags=re.IGNORECASE, ).replace("<username>", urllib.parse.quote( app.config["GIT_USERNAME"])).replace( "<password>", urllib.parse.quote( app.config["GIT_PASSWORD"]))) cmd = ( "$(which git) clone -q --depth 1 " + '--recurse-submodules --shallow-submodules %s "%s"' % (url, str(self.temp_path))) Cmd( self.task, self.run_id, cmd, "Repo cloned.", "Failed to clone repo: %s" % (self.task.processing_git, ), ).shell() # pylint: disable=broad-except except BaseException: raise RunnerException(self.task, self.run_id, 8, "Processor failed to clone repo.") # otherwise get py file else: my_file = self.source_loader.gitlab(self.task.processing_git) elif self.task.processing_type_id == 5 and self.task.processing_url is not None: if self.task.processing_command is not None: try: cmd = ( "$(which git) clone -q --depth 1 " + '--recurse-submodules --shallow-submodules %s "%s"' % (self.task.processing_url, str(self.temp_path))) Cmd( task=self.task, run_id=self.run_id, cmd=cmd, success_msg="Repo cloned", error_msg="Failed to clone repo: %s" % (self.task.processing_url, ), ).shell() processing_script_name = str(self.temp_path) + ( self.task.processing_command if self.task.processing_command is not None else "") # pylint: disable=broad-except except BaseException: raise RunnerException(self.task, self.run_id, 8, "Processor failed to clone repo.") else: my_file = self.source_loader.web_url(self.task.processing_url) elif (self.task.processing_type_id == 6 and self.task.processing_code is not None): my_file = self.task.processing_code elif self.task.processing_type_id > 0: raise RunnerException( self.task, self.run_id, 8, "Processing error, Not enough information to run a processing script from.", ) try: if my_file != "" and self.task.processing_type_id > 0: Path(processing_script_name).parent.mkdir(parents=True, exist_ok=True) with open(processing_script_name, "w") as text_file: text_file.write(my_file) RunnerLog(self.task, self.run_id, 8, "Processing script created.") # pylint: disable=broad-except except BaseException as e: raise RunnerException(self.task, self.run_id, 8, f"Processing script failure:\n{e}") # run processing script output = PyProcesser( task=self.task, run_id=self.run_id, directory=self.temp_path, source_files=self.source_files, script=self.task.processing_command or processing_script_name.name, ).run() # # allow processer to rename file if output: RunnerLog(self.task, self.run_id, 8, f"Processing script output:\n{output}") self.data_files = output
def __get_source(self) -> None: if self.task.source_type_id == 1: # sql external_db = self.task.source_database_conn try: RunnerLog(self.task, self.run_id, 8, "Loading query...") query = self.__get_query() except BaseException as e: raise RunnerException(self.task, self.run_id, 8, f"Failed to load query.\n{e}") RunnerLog(self.task, self.run_id, 8, "Starting query run, waiting for results...") if external_db.database_type.id == 1: # postgres try: self.query_output_size, self.source_files = Postgres( task=self.task, run_id=self.run_id, connection=em_decrypt(external_db.connection_string, app.config["PASS_KEY"]), timeout=external_db.timeout or app.config["DEFAULT_SQL_TIMEOUT"], directory=self.temp_path, ).run(query) except ValueError as message: raise RunnerException(self.task, self.run_id, 21, message) except BaseException as message: raise RunnerException(self.task, self.run_id, 21, f"Failed to run query.\n{message}") elif external_db.database_type.id == 2: # mssql try: self.query_output_size, self.source_files = SqlServer( task=self.task, run_id=self.run_id, connection=em_decrypt(external_db.connection_string, app.config["PASS_KEY"]), timeout=external_db.timeout or app.config["DEFAULT_SQL_TIMEOUT"], directory=self.temp_path, ).run(query) except ValueError as message: raise RunnerException(self.task, self.run_id, 20, message) except BaseException as message: raise RunnerException(self.task, self.run_id, 20, f"Failed to run query.\n{message}") RunnerLog( self.task, self.run_id, 8, f"Query completed.\nData file {self.source_files[0].name} created. Data size: {file_size(str(Path(self.source_files[0].name).stat().st_size))}.", ) elif self.task.source_type_id == 2: # smb file file_name = self.param_loader.insert_file_params( self.task.source_smb_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() self.source_files = Smb( task=self.task, run_id=self.run_id, connection=self.task.source_smb_conn, directory=self.temp_path, ).read(file_name=file_name) elif self.task.source_type_id == 3: # sftp file RunnerLog(self.task, self.run_id, 9, "Loading data from server...") file_name = self.param_loader.insert_file_params( self.task.source_sftp_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() self.source_files = Sftp( task=self.task, run_id=self.run_id, connection=self.task.source_sftp_conn, directory=self.temp_path, ).read(file_name=file_name) elif self.task.source_type_id == 4: # ftp file RunnerLog(self.task, self.run_id, 13, "Loading data from server...") file_name = self.param_loader.insert_file_params( self.task.source_ftp_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() self.source_files = Ftp( task=self.task, run_id=self.run_id, connection=self.task.source_ftp_conn, directory=self.temp_path, ).read(file_name=file_name) elif self.task.source_type_id == 6: # ssh command query = self.__get_query() Ssh( task=self.task, run_id=self.run_id, connection=self.task.source_ssh_conn, command=query, ).run()
def save(self) -> Tuple[str, str, str]: """Create and save the file. returns [filename, filepath] of final file. """ if (self.task.destination_file_name is None or self.task.destination_file_name == ""): RunnerLog( self.task, self.run_id, 11, f"No filename specified, {Path(self.data_file.name).name} will be used.", ) if (self.task.destination_file_name != "" and self.task.destination_file_name is not None): # insert params self.file_name = self.params.insert_file_params( self.task.destination_file_name.strip()) # parse python dates self.file_name = DateParsing(self.task, self.run_id, self.file_name).string_to_date() else: self.file_name = Path(self.data_file.name).name # 4 is other if self.task.destination_file_type_id != 4 and self.task.file_type is not None: self.file_name += "." + (self.task.file_type.ext or "csv") self.file_path = str(Path(self.base_path).joinpath(self.file_name)) # if the source name matches the destination name, rename the source and update tmp file name. if self.data_file.name == self.file_path: data_file_as_path = Path(self.data_file.name) new_data_file_name = str( data_file_as_path.parent / (data_file_as_path.stem + "_tmp" + data_file_as_path.suffix)) os.rename(self.data_file.name, new_data_file_name) self.data_file.name = new_data_file_name # type: ignore[misc] with open(self.data_file.name, "r", newline="") as data_file: reader = csv.reader(data_file) with open(self.file_path, mode="w") as myfile: # if csv (1) or text (2) and had delimiter if (self.task.destination_file_type_id == 1 or self.task.destination_file_type_id == 2 or self.task.destination_file_type_id == 4) and ( self.task.destination_ignore_delimiter is None or self.task.destination_ignore_delimiter != 1): wrtr = ( csv.writer( myfile, delimiter=str(self.task.destination_file_delimiter) .encode("utf-8").decode("unicode_escape"), quoting=self.__quote_level(), ) if self.task.destination_file_delimiter is not None and len(self.task.destination_file_delimiter) > 0 and (self.task.destination_file_type_id == 2 or self.task.destination_file_type_id == 4 ) # txt or other else csv.writer( myfile, quoting=self.__quote_level(), )) for row in reader: new_row = [(x.strip('"').strip("'") if isinstance( x, str) else x) for x in row] if (self.task.destination_file_type_id == 1 or self.task.destination_file_type_id == 2 or self.task.destination_file_type_id == 4 ) and (self.task.destination_file_line_terminator is not None and self.task.destination_file_line_terminator != ""): new_row.append( self.task.destination_file_line_terminator) wrtr.writerow(new_row) # if xlxs (3) elif self.task.destination_file_type_id == 3: wrtr = csv.writer( myfile, dialect="excel", quoting=self.__quote_level(), ) for row in reader: new_row = [(x.strip('"').strip("'") if isinstance( x, str) else x) for x in row] wrtr.writerow(new_row) else: for line in data_file: myfile.write(line) RunnerLog( self.task, self.run_id, 11, f"File {self.file_name} created. Size: {file_size(Path(self.file_path).stat().st_size)}.\n{self.file_path}", ) # encrypt file if self.task.file_gpg == 1: gpg = gnupg.GPG("/usr/local/bin/gpg") # import the key keychain = gpg.import_keys( em_decrypt(self.task.file_gpg_conn.key, app.config["PASS_KEY"])) # set it to trusted gpg.trust_keys(keychain.fingerprints, "TRUST_ULTIMATE") # encrypt file with open(self.file_path, "rb") as my_file: encrypt_status = gpg.encrypt_file( file=my_file, recipients=keychain.fingerprints, output=self.file_path + ".gpg", ) # remove key gpg.delete_keys(keychain.fingerprints) # update global file name if not encrypt_status.ok: raise RunnerException( self.task, self.run_id, 11, "File failed to encrypt.\n%s\n%s\n%s" % ( self.file_path, encrypt_status.status, encrypt_status.stderr, ), ) self.file_path = self.file_path + ".gpg" self.file_name = self.file_name + ".gpg" RunnerLog( self.task, self.run_id, 11, "File encrypted.\n%s\n%s\n%s" % (self.file_path, encrypt_status.status, encrypt_status.stderr), ) # get file hash.. after encrypting with open(self.file_path, "rb") as my_file: while True: chunk = my_file.read(8192) if not chunk: break self.file_hash.update(chunk) RunnerLog(self.task, self.run_id, 11, f"File md5 hash: {self.file_hash.hexdigest()}") # create zip if self.task.destination_create_zip == 1: self.zip_name = DateParsing( self.task, self.run_id, str(self.task.destination_zip_name)).string_to_date() # parse params self.zip_name = self.params.insert_file_params(self.zip_name) self.zip_name = self.zip_name.replace(".zip", "") + ".zip" with zipfile.ZipFile( str(Path(self.base_path).joinpath(self.zip_name)), "w") as zip_file: zip_file.write( self.file_path, compress_type=zipfile.ZIP_DEFLATED, arcname=self.file_name, ) # now we change all file stuff to our zip. self.file_name = self.zip_name self.file_path = str(Path(self.base_path).joinpath(self.zip_name)) RunnerLog(self.task, self.run_id, 11, f"ZIP archive created.\n{self.file_path}") return self.file_name, self.file_path, self.file_hash.hexdigest()
def insert_date(match: re.Match) -> str: """Parse py dates.""" return DateParsing(self.task, self.run_id, match.group(1)).string_to_date()