def __get_query(self) -> str: if self.task.source_query_type_id == 3: # url query = self.source_loader.web_url(self.task.source_url) elif self.task.source_query_type_id == 1: # gitlab url query = self.source_loader.gitlab(self.task.source_git) elif self.task.source_query_type_id == 4: # code query = self.source_loader.source() elif self.task.source_query_type_id == 2: # smb file_name = self.param_loader.insert_file_params( self.task.source_query_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() query = self.source_loader.source( Path( Smb( task=self.task, run_id=self.run_id, directory=self.temp_path, connection=self.task.query_source, ).read(file_name)[0].name).read_text("utf8")) return query
def get_task_file_download(file_id: int) -> dict: """Download file from SMB backup server.""" my_file = TaskFile.query.filter_by(id=file_id).first() task = my_file.task temp_path = Path( Path(__file__).parent.parent / "temp" / sanitize_filename(task.project.name) / sanitize_filename(task.name) / my_file.job_id ) temp_path.mkdir(parents=True, exist_ok=True) temp_file = ( Smb( task=task, run_id=my_file.job_id, connection=None, # "default", directory=temp_path, ) .read(my_file.path)[0] .name ) return jsonify({"message": temp_file})
def send_smb(run_id: int, file_id: int) -> dict: """Send file to SMB server specified in the task. File is loaded from the backup SMB file server into a tempfile. The tempfile is deposited into the SMB location. """ try: my_file = TaskFile.query.filter_by(id=file_id).first() task = my_file.task temp_path = Path( Path(__file__).parent.parent / "temp" / sanitize_filename(task.project.name) / sanitize_filename(task.name) / my_file.job_id ) temp_path.mkdir(parents=True, exist_ok=True) # download the file Smb( task=task, run_id=my_file.job_id, connection=None, # "default", directory=temp_path, ).read(my_file.path) # upload the file Smb( task=task, run_id=str(run_id), connection=task.destination_smb_conn, directory=temp_path, ).save( overwrite=1, file_name=my_file.name, ) return jsonify({"message": "successfully sent file."}) # pylint: disable=broad-except except BaseException as e: return jsonify({"error": str(e)})
def send_email(run_id: int, file_id: int) -> dict: """Send file to email address specified in the task. File is loaded from the backup SMB file server into a tempfile. The tempfile is sent as an email attachment. """ try: my_file = TaskFile.query.filter_by(id=file_id).first() task = my_file.task temp_path = Path( Path(__file__).parent.parent / "temp" / sanitize_filename(task.project.name) / sanitize_filename(task.name) / my_file.job_id ) temp_path.mkdir(parents=True, exist_ok=True) # download the file downloaded_files = Smb( task=task, run_id=my_file.job_id, connection=None, # "default", directory=temp_path, ).read(my_file.path) # send the file date = str(datetime.datetime.now()) template = env.get_template("email/email.html.j2") Smtp( task=task, run_id=str(run_id), recipients=task.email_completion_recipients, short_message=f"Atlas Hub: {task.name} data emailed.", subject="(Manual Send) Project: " + task.project.name + " / Task: " + task.name, message=template.render( task=task, success=1, date=date, logs=[], ), attachments=[x.name for x in downloaded_files], ) return jsonify({"message": "successfully sent file."}) # pylint: disable=broad-except except BaseException as e: return jsonify({"error": str(e)})
def __store_files(self) -> None: if not self.source_files or len(self.source_files) == 0: return RunnerLog( self.task, self.run_id, 8, "Storing output file%s..." % ("s" if len(self.source_files) != 1 else ""), ) for file_counter, this_file in enumerate(self.source_files, 1): this_file_size = (self.query_output_size if self.query_output_size is not None else Path( this_file.name).stat().st_size) # get file name. if no name specified in task setting, then use temp name. try: file_name, file_path, file_hash = File( task=self.task, run_id=self.run_id, data_file=this_file, params=self.param_loader, ).save() except BaseException as e: raise RunnerException(self.task, self.run_id, 11, f"Failed to create data file.\n{e}") self.output_files.append(file_path) if len(self.source_files) > 1: RunnerLog( self.task, self.run_id, 8, f"Storing file {file_counter} of {len(self.source_files)}...", ) # store # send to sftp if self.task.destination_sftp == 1 and self.task.destination_sftp_conn: if (self.task.destination_sftp_dont_send_empty_file == 1 and this_file_size == 0): RunnerLog( self.task, self.run_id, 8, "Skipping SFTP, file is empty.", ) else: Sftp( task=self.task, run_id=self.run_id, connection=self.task.destination_sftp_conn, directory=self.temp_path, ).save( overwrite=self.task.destination_sftp_overwrite, file_name=file_name, ) # send to ftp if self.task.destination_ftp == 1 and self.task.destination_ftp_conn: if (self.task.destination_ftp_dont_send_empty_file == 1 and this_file_size == 0): RunnerLog( self.task, self.run_id, 8, "Skipping FTP, file is empty.", ) else: Ftp( task=self.task, run_id=self.run_id, connection=self.task.destination_ftp_conn, directory=self.temp_path, ).save( overwrite=self.task.destination_ftp_overwrite, file_name=file_name, ) # save to smb if self.task.destination_smb == 1 and self.task.destination_smb_conn: if (self.task.destination_smb_dont_send_empty_file == 1 and this_file_size == 0): RunnerLog( self.task, self.run_id, 8, "Skipping SMB, file is empty.", ) else: Smb( task=self.task, run_id=self.run_id, connection=self.task.destination_smb_conn, directory=self.temp_path, ).save( overwrite=self.task.destination_smb_overwrite, file_name=file_name, ) # save historical copy smb_path = Smb( task=self.task, run_id=self.run_id, connection=None, # "default", directory=self.temp_path, ).save(overwrite=1, file_name=file_name) # log file details db.session.add( TaskFile( name=file_name, path=smb_path, task_id=self.task.id, job_id=self.run_id, file_hash=file_hash, size=file_size(str(os.path.getsize(file_path))), )) db.session.commit()
def __process(self) -> None: RunnerLog(self.task, self.run_id, 8, "Starting processing script...") # get processing script # 1 = smb # 2 = sftp # 3 = ftp # 4 = git url # 5 = other url # 6 = source code processing_script_name = self.temp_path / (self.run_id + ".py") my_file = "" if (self.task.processing_type_id == 1 and self.task.processing_smb_id is not None): file_name = self.param_loader.insert_file_params( self.task.source_smb_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() my_file = Path( Smb( task=self.task, run_id=self.run_id, directory=self.temp_path, connection=self.task.processing_smb_conn, ).read(file_name)[0].name).read_text("utf8") elif (self.task.processing_type_id == 2 and self.task.processing_sftp_id is not None): file_name = self.param_loader.insert_file_params( self.task.processing_sftp_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() my_file = Path( Sftp( task=self.task, run_id=self.run_id, connection=self.task.processing_sftp_conn, directory=self.temp_path, ).read(file_name=file_name)[0].name).read_text("utf8") elif (self.task.processing_type_id == 3 and self.task.processing_ftp_id is not None): file_name = self.param_loader.insert_file_params( self.task.processing_ftp_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() my_file = Path( Ftp( task=self.task, run_id=self.run_id, connection=self.task.source_ftp_conn, directory=self.temp_path, ).read(file_name=file_name)[0].name).read_text("utf8") elif self.task.processing_type_id == 4 and self.task.processing_git is not None: # if a dir is specified then download all files if (self.task.processing_command is not None and self.task.processing_command != ""): try: url = (re.sub( r"(https?://)(.+?)", r"\1<username>:<password>@\2", self.task.processing_git, flags=re.IGNORECASE, ).replace("<username>", urllib.parse.quote( app.config["GIT_USERNAME"])).replace( "<password>", urllib.parse.quote( app.config["GIT_PASSWORD"]))) cmd = ( "$(which git) clone -q --depth 1 " + '--recurse-submodules --shallow-submodules %s "%s"' % (url, str(self.temp_path))) Cmd( self.task, self.run_id, cmd, "Repo cloned.", "Failed to clone repo: %s" % (self.task.processing_git, ), ).shell() # pylint: disable=broad-except except BaseException: raise RunnerException(self.task, self.run_id, 8, "Processor failed to clone repo.") # otherwise get py file else: my_file = self.source_loader.gitlab(self.task.processing_git) elif self.task.processing_type_id == 5 and self.task.processing_url is not None: if self.task.processing_command is not None: try: cmd = ( "$(which git) clone -q --depth 1 " + '--recurse-submodules --shallow-submodules %s "%s"' % (self.task.processing_url, str(self.temp_path))) Cmd( task=self.task, run_id=self.run_id, cmd=cmd, success_msg="Repo cloned", error_msg="Failed to clone repo: %s" % (self.task.processing_url, ), ).shell() processing_script_name = str(self.temp_path) + ( self.task.processing_command if self.task.processing_command is not None else "") # pylint: disable=broad-except except BaseException: raise RunnerException(self.task, self.run_id, 8, "Processor failed to clone repo.") else: my_file = self.source_loader.web_url(self.task.processing_url) elif (self.task.processing_type_id == 6 and self.task.processing_code is not None): my_file = self.task.processing_code elif self.task.processing_type_id > 0: raise RunnerException( self.task, self.run_id, 8, "Processing error, Not enough information to run a processing script from.", ) try: if my_file != "" and self.task.processing_type_id > 0: Path(processing_script_name).parent.mkdir(parents=True, exist_ok=True) with open(processing_script_name, "w") as text_file: text_file.write(my_file) RunnerLog(self.task, self.run_id, 8, "Processing script created.") # pylint: disable=broad-except except BaseException as e: raise RunnerException(self.task, self.run_id, 8, f"Processing script failure:\n{e}") # run processing script output = PyProcesser( task=self.task, run_id=self.run_id, directory=self.temp_path, source_files=self.source_files, script=self.task.processing_command or processing_script_name.name, ).run() # # allow processer to rename file if output: RunnerLog(self.task, self.run_id, 8, f"Processing script output:\n{output}") self.data_files = output
def __get_source(self) -> None: if self.task.source_type_id == 1: # sql external_db = self.task.source_database_conn try: RunnerLog(self.task, self.run_id, 8, "Loading query...") query = self.__get_query() except BaseException as e: raise RunnerException(self.task, self.run_id, 8, f"Failed to load query.\n{e}") RunnerLog(self.task, self.run_id, 8, "Starting query run, waiting for results...") if external_db.database_type.id == 1: # postgres try: self.query_output_size, self.source_files = Postgres( task=self.task, run_id=self.run_id, connection=em_decrypt(external_db.connection_string, app.config["PASS_KEY"]), timeout=external_db.timeout or app.config["DEFAULT_SQL_TIMEOUT"], directory=self.temp_path, ).run(query) except ValueError as message: raise RunnerException(self.task, self.run_id, 21, message) except BaseException as message: raise RunnerException(self.task, self.run_id, 21, f"Failed to run query.\n{message}") elif external_db.database_type.id == 2: # mssql try: self.query_output_size, self.source_files = SqlServer( task=self.task, run_id=self.run_id, connection=em_decrypt(external_db.connection_string, app.config["PASS_KEY"]), timeout=external_db.timeout or app.config["DEFAULT_SQL_TIMEOUT"], directory=self.temp_path, ).run(query) except ValueError as message: raise RunnerException(self.task, self.run_id, 20, message) except BaseException as message: raise RunnerException(self.task, self.run_id, 20, f"Failed to run query.\n{message}") RunnerLog( self.task, self.run_id, 8, f"Query completed.\nData file {self.source_files[0].name} created. Data size: {file_size(str(Path(self.source_files[0].name).stat().st_size))}.", ) elif self.task.source_type_id == 2: # smb file file_name = self.param_loader.insert_file_params( self.task.source_smb_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() self.source_files = Smb( task=self.task, run_id=self.run_id, connection=self.task.source_smb_conn, directory=self.temp_path, ).read(file_name=file_name) elif self.task.source_type_id == 3: # sftp file RunnerLog(self.task, self.run_id, 9, "Loading data from server...") file_name = self.param_loader.insert_file_params( self.task.source_sftp_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() self.source_files = Sftp( task=self.task, run_id=self.run_id, connection=self.task.source_sftp_conn, directory=self.temp_path, ).read(file_name=file_name) elif self.task.source_type_id == 4: # ftp file RunnerLog(self.task, self.run_id, 13, "Loading data from server...") file_name = self.param_loader.insert_file_params( self.task.source_ftp_file) file_name = DateParsing( task=self.task, run_id=self.run_id, date_string=file_name, ).string_to_date() self.source_files = Ftp( task=self.task, run_id=self.run_id, connection=self.task.source_ftp_conn, directory=self.temp_path, ).read(file_name=file_name) elif self.task.source_type_id == 6: # ssh command query = self.__get_query() Ssh( task=self.task, run_id=self.run_id, connection=self.task.source_ssh_conn, command=query, ).run()