def create_directory(month): """Formats the destination directory and creates it if it doesn't exist.""" directory = GEO_INDEX_QA_REPORT_DIRECTORY.format(month=month) if not os.path.isdir(directory): logger.info(f'creating directory {directory}') os.mkdir(directory) return directory
def download_all_reports(geo_dbs, month=current_month): """ Loops through each geo level and downloads the report. For the PCDPCA report, the required extra data is appended to it. """ directory = create_directory(month=month) for geo_level in GEO_LEVELS: logger.info(f'creating report for {geo_level}...') destination_file = os.path.join( directory, GEO_INDEX_QA_REPORT_FILE_FORMAT.format(geo_level=geo_level)) download_report( # TODO: Take out the hardcoded numbers. destination_file=destination_file, geo_level=geo_level, curr_allgeos=geo_dbs['allgeos'].current, prev_allgeos=geo_dbs['allgeos'].previous, curr_20cc=geo_dbs['20cc'].current, prev_20cc=geo_dbs['20cc'].previous, curr_pcl=geo_dbs['pcl'].current, prev_pcl=geo_dbs['pcl'].previous, ) if geo_level == 'PCDPCA': logger.info(f'Adding extra data for PCDPCA') data = get_extra_report_data(geo_dbs['allgeos'].current) append_extra_data_to_report(destination_file, data)
async def get_current_user(token: str = Depends( oauth2_scheme)) -> UserOutputData: """ get_current_user """ credentials_exception = HTTPException( status_code=401, detail="Could not validate credentials", headers={"WWW-Authenticate": "Bearer"}, ) try: id = get_id_from_token(token) except Exception as e: logger.info(f"Could not validate credentials: {e}") raise credentials_exception try: user = injector.user_get_interactor().handle(int(id)) except NoContentError as e: logger.info(f"Could not validate credentials: {e}") raise credentials_exception except Exception as e: logger.exception(e) raise HTTPException(status_code=500, detail="Internal Server Error") return user
def delete_user(self, id: int) -> Optional[Dict]: """ delete_user """ try: deleted_user = db.session.query(User).filter(User.id == id).first() except SQLAlchemyError: raise finally: db.session.close() if deleted_user is None: raise NoContentError try: db.session.query(User).filter(User.id == id).delete() db.session.commit() except SQLAlchemyError: db.session.rollback() raise finally: db.session.close() logger.info("db connection closed.") return self.__convert_schema_obj_to_dict(deleted_user)
def copy(self, source_file, destination): """Copies the given target file to the given target file/directory""" if not self._check_file_exists(destination, source_file): cmd = f'COPY {source_file} {destination}' result = self.run_cmd(cmd) return result.status_code else: logger.info(f'File {source_file} already exists in {destination} and was not copied')
def copy_backups(remote_server, backup_regex, source_directory, target_directory): session = RemoteSession(remote_server, (secrets.USER_NAME, secrets.PASSWORD)) files = session.ls(source_directory) for file in find_backups(backup_regex, files): logger.info(f'Copying file {file} to {target_directory}') session.copy( source_file=os.path.join(source_directory, file), destination=target_directory, )
def load_raw_plots(month): url = RAW_S3_URL.format(month=month, file=PLOTS) logger.info(f'Loading {url}') df = SQL_CONTEXT.read \ .option("header", True) \ .option("sep", "|") \ .csv(url) return df
def upload_files(month): """ Iterates over the particular months files and uploads them to s3. Where a month is not given, the current month will be used. """ source_directory = os.path.join(NHBC_SOURCE_DIR, month) for source_file in os.listdir(source_directory): upload_single_file(source_file, source_directory, month) logger.info('Upload complete.')
def get_plots_with_projects(plots_df, projects_df): logger.info(f'Combining plots and project data frames') project_reference_df = projects_df \ .select("Project Reference") \ .distinct() \ .withColumnRenamed("Project Reference", "PROJECT_REFERENCE") df = plots_df.join(project_reference_df, "PROJECT_REFERENCE", "inner") return df
def get_latest_backup(server, source_directory, file_regex): session = RemoteSession(server, (secrets.USER_NAME, secrets.PASSWORD)) backups = session.ls(source_directory) version_backups = list(find_backups(file_regex, backups)) if len(version_backups) == 0: logger.error(f'No backup found matching {file_regex}') raise Exception(f'No backup found matching {file_regex}') backup_file = version_backups[-1] logger.info(f'Found backup {backup_file}') return backup_file
def download_files(month): """Iterates over the transformed files and downloads the plot """ transformed_key = find_transformed_file(month) if not transformed_key: raise Exception('No transformed file found...') logger.info(f'Downloading transformed file {transformed_key}...') with open(os.path.join(NHBC_SOURCE_DIR, month, TRANSFORMED_FILE_NAME), 'wb') as data: s3.download_fileobj(NHBC_BUCKET, transformed_key, data) logger.info(f'Download complete.')
def find_transformed_file(month): month_format = f'{month[:4]}-{month[-2:]}' found_files = s3.list_objects_v2( Bucket=NHBC_BUCKET, Prefix=TRANSFORMED_FILE_PREFIX.format(month=month_format))['Contents'] if len(found_files) == 1: logger.info(f'Found file {found_files[0]["Key"]}') return found_files[0]['Key'] logger.warning( f'No file found with prefix {TRANSFORMED_FILE_PREFIX.format(month=month_format)}' )
def monitor_build(current_build_version=None): current_build_version = current_build_version or latest_comps_build_version() send_message('#ht-compsbuild', f'Started Comps Build {current_build_version}') try: time.sleep(10) op_id = LON_SQL_04_SQL_RUNNER.latest_package_operation_id(BUILD_JOB_NAME) LON_SQL_04_SQL_RUNNER.monitor_package_status(op_id) logger.info(f'Comps Build {current_build_version} complete.') send_message('#ht-compsbuild', f'Comps Build {current_build_version} completed successfully!') except Exception as e: logger.error(f'Comps Build {current_build_version} failed.\n{e}') send_message('#ht-compsbuild', f'Comps Build {current_build_version} failed.\n{e}')
def create_math_pair_samples(destination_allgeos, comps_type, phase_id): sql_query = sql_query_template.format( destination_allgeos=destination_allgeos, curr_allgeos=geo_dbs['allgeos'].current, prev_allgeos=geo_dbs['allgeos'].previous, comps_type=comps_type, phase_id=phase_id, ) logger.info(f'Creating sample in {destination_allgeos} for {comps_type}') # TODO: Add sql runner call logger.info(sql_query) LON_SQL_06_SQL_RUNNER.execute(sql_query)
def load_processed_plots(month): month_format = f'{month[:4]}-{month[-2:]}' url = TRANSFORMED_S3_URL.format(month=month_format) logger.info(f'Loading {url}') df = SQL_CONTEXT.read.options(**DF_OPTIONS).csv(url) # sometimes there are duplicate PLOT_IDs so discard the duplicates here window = Window.partitionBy(df['PLOT_ID']).orderBy(df.columns) df = df.select("*", func.rank().over(window).alias("row_number")).filter( "row_number = 1").drop("row_number") return df
async def login(form_data: OAuth2PasswordRequestForm = Depends()) -> Token: try: token: Token = injector.auth_interactor().handle(form_data.__dict__) except ValueError as e: logger.info(e) raise HTTPException(status_code=401, detail=str(e), headers={"WWW-Authenticate": "Bearer"}) except Exception as e: logger.exception(e) raise HTTPException(status_code=500, detail="Internal Server Error") return token
def drop_database(self, database_name): sql_query = f''' IF (SELECT COUNT(1) FROM sys.databases WHERE [name] = '{database_name}') > 0 BEGIN ALTER DATABASE [{database_name}] SET SINGLE_USER WITH ROLLBACK IMMEDIATE; DROP DATABASE [{database_name}]; END; ''' # TODO: Add sql runner call self.execute(sql_query) logger.info( f'Dropping database {database_name} on server {self.connection.server}' )
def restore_db(self, database, backup_file, stats=5): self.drop_database(database) restore_query = self.restore_database_query(database, backup_file) logger.info(f'Running restore query:\n{restore_query}') restore_thread = threading.Thread(target=self.restore, args=(restore_query, )) restore_thread.start() percent_complete = 0 while restore_thread.is_alive(): if percent_complete == 100: logger.info(f'{database} restore finalising') restore_state = self.get_restore_state(database) if not restore_state: percent_complete = 100 time.sleep(5) continue current_percent_complete = restore_state.percent_complete seconds_to_complete = restore_state.seconds_to_complete current_percent_complete = current_percent_complete - ( current_percent_complete % stats) if current_percent_complete > percent_complete: percent_complete = current_percent_complete expected_finish = datetime.datetime.now() + datetime.timedelta( seconds=seconds_to_complete) logger.info( f'{database} restored {percent_complete}: expected finish {expected_finish}' ) time.sleep(5) logger.info(f'{database} restore complete')
def download_report(destination_file, geo_level, curr_allgeos, prev_allgeos, curr_20cc, prev_20cc, curr_pcl, prev_pcl): """Downloads the report for the given params and saves it the given destaination file.""" url = GEO_INDEX_QA_REPORT_URL.format( geo_level=geo_level.replace(' ', '+'), curr_allgeos=curr_allgeos, prev_allgeos=prev_allgeos, curr_20cc=curr_20cc, prev_20cc=prev_20cc, curr_pcl=curr_pcl, prev_pcl=prev_pcl, ) logger.info(f'downloading report {url}') with open(destination_file, 'wb') as report_file: raw_data = get_report_file(url) report_file.write(raw_data.content)
def find_user_by_id(self, id: int) -> Optional[Dict]: """ find_user_by_id """ try: user = db.session.query(User).filter(User.id == id).first() except SQLAlchemyError: raise finally: db.session.close() logger.info("db connection closed.") if user is None: raise NoContentError(f"User not found: id={id}") return self.__convert_schema_obj_to_dict(user)
def create_user(self, name: str, password: str, email: str) -> Optional[Dict]: """ create_user """ now = datetime.now() user = User( name=name, password=password, email=email, created_at=now, updated_at=now, ) try: db.session.add(user) db.session.commit() except IntegrityError: db.session.rollback() raise DuplicateError(f"name: {name} or email: {email} is already exists.") except SQLAlchemyError: raise finally: db.session.close() logger.info("db connection closed.") try: created_user = ( db.session.query(User) .filter(User.name == name) .order_by(User.id.desc()) .first() ) except SQLAlchemyError: raise NoContentError finally: db.session.close() logger.info("db connection closed.") return self.__convert_schema_obj_to_dict(created_user)
def download_all_reports(current_version, compare_version, month=current_month): """ Loops through each geo level and downloads the report. For the PCDPCA report, the required extra data is appended to it. """ directory = create_reports_directory(month, current_version) bulk_test_ids = get_bulk_test_ids(current_version, compare_version) for file_name_fmt, report_url_fmt in BULK_TEST_REPORTS.items(): file_name = file_name_fmt.format(version=current_version, **bulk_test_ids) report_url = report_url_fmt.format(version=current_version, **bulk_test_ids) logger.info(f'creating report for {file_name}...') destination_file = os.path.join(directory, file_name) download_report( destination_file=destination_file, report_url=report_url ) send_message( '#ht-compsbuild', f'QA reports for Comps Build v{current_version} sign-off are available here:\n{directory}' )
def find_users(self) -> Optional[List[Dict]]: """ find_users """ user: User = None try: user = db.session.query(User).all() except SQLAlchemyError: raise finally: db.session.close() logger.info("db connection closed.") if not user: raise NoContentError(("Users are not found.")) response = [] for u in user: response.append(self.__convert_schema_obj_to_dict(u)) return response
def download_report(destination_file, report_url): """Downloads the report for the given params and saves it the given destaination file.""" logger.info(f'downloading report {report_url}') with open(destination_file, 'wb') as report_file: raw_data = get_report_file(report_url) report_file.write(raw_data.content)
def delete_backups(remote_server, backup_file): session = RemoteSession(remote_server, (secrets.USER_NAME, secrets.PASSWORD)) logger.info(f'Deleting file {backup_file}') session.delete_file(file=backup_file)
def read_root(): logger.info('This is an example of logging') return {'hello': 'world'}
def write_processed_plots_to_s3(month, plots_df): month_format = f'{month[:4]}-{month[-2:]}' url = TRANSFORMED_S3_URL.format(month=month_format) logger.info(f'Writing to S3 {url}') plots_df.repartition(1).write.options(**DF_OPTIONS).csv(path=url, mode="overwrite")
def upload_single_file(file_name, file_directory, month): """Uploads the given file/directory to the nhbc raw bucket for the given month.""" full_path = os.path.join(file_directory, file_name) logger.info(f'Uploading file {full_path}...') s3.upload_file(full_path, NHBC_BUCKET, FILENAME_FMT.format(month=month, filename=file_name))
def on_startup(): connect(host=SETTINGS.MONGO_URI) mongoengine.get_db() logger.info("Startup flow successful")
def create_directory(directory): """Formats the destination directory and creates it if it doesn't exist.""" if not os.path.isdir(directory): logger.info(f'creating directory {directory}') os.mkdir(directory) return directory