Beispiel #1
0
def create_directory(month):
    """Formats the destination directory and creates it if it doesn't exist."""
    directory = GEO_INDEX_QA_REPORT_DIRECTORY.format(month=month)
    if not os.path.isdir(directory):
        logger.info(f'creating directory {directory}')
        os.mkdir(directory)
    return directory
Beispiel #2
0
def download_all_reports(geo_dbs, month=current_month):
    """
    Loops through each geo level and downloads the report.
    For the PCDPCA report, the required extra data is appended to it.
    """
    directory = create_directory(month=month)
    for geo_level in GEO_LEVELS:
        logger.info(f'creating report for {geo_level}...')
        destination_file = os.path.join(
            directory,
            GEO_INDEX_QA_REPORT_FILE_FORMAT.format(geo_level=geo_level))
        download_report(
            # TODO: Take out the hardcoded numbers.
            destination_file=destination_file,
            geo_level=geo_level,
            curr_allgeos=geo_dbs['allgeos'].current,
            prev_allgeos=geo_dbs['allgeos'].previous,
            curr_20cc=geo_dbs['20cc'].current,
            prev_20cc=geo_dbs['20cc'].previous,
            curr_pcl=geo_dbs['pcl'].current,
            prev_pcl=geo_dbs['pcl'].previous,
        )
        if geo_level == 'PCDPCA':
            logger.info(f'Adding extra data for PCDPCA')
            data = get_extra_report_data(geo_dbs['allgeos'].current)
            append_extra_data_to_report(destination_file, data)
Beispiel #3
0
async def get_current_user(token: str = Depends(
    oauth2_scheme)) -> UserOutputData:
    """
    get_current_user
    """

    credentials_exception = HTTPException(
        status_code=401,
        detail="Could not validate credentials",
        headers={"WWW-Authenticate": "Bearer"},
    )

    try:
        id = get_id_from_token(token)
    except Exception as e:
        logger.info(f"Could not validate credentials: {e}")
        raise credentials_exception

    try:
        user = injector.user_get_interactor().handle(int(id))
    except NoContentError as e:
        logger.info(f"Could not validate credentials: {e}")
        raise credentials_exception
    except Exception as e:
        logger.exception(e)
        raise HTTPException(status_code=500, detail="Internal Server Error")

    return user
Beispiel #4
0
    def delete_user(self, id: int) -> Optional[Dict]:
        """
        delete_user
        """

        try:
            deleted_user = db.session.query(User).filter(User.id == id).first()
        except SQLAlchemyError:
            raise
        finally:
            db.session.close()

        if deleted_user is None:
            raise NoContentError

        try:
            db.session.query(User).filter(User.id == id).delete()
            db.session.commit()
        except SQLAlchemyError:
            db.session.rollback()
            raise
        finally:
            db.session.close()
            logger.info("db connection closed.")

        return self.__convert_schema_obj_to_dict(deleted_user)
Beispiel #5
0
 def copy(self, source_file, destination):
     """Copies the given target file to the given target file/directory"""
     if not self._check_file_exists(destination, source_file):
         cmd = f'COPY {source_file} {destination}'
         result = self.run_cmd(cmd)
         return result.status_code
     else:
         logger.info(f'File {source_file} already exists in {destination} and was not copied')
Beispiel #6
0
def copy_backups(remote_server, backup_regex, source_directory, target_directory):
    session = RemoteSession(remote_server, (secrets.USER_NAME, secrets.PASSWORD))
    files = session.ls(source_directory)
    for file in find_backups(backup_regex, files):
        logger.info(f'Copying file {file} to {target_directory}')
        session.copy(
            source_file=os.path.join(source_directory, file),
            destination=target_directory,
        )
def load_raw_plots(month):
    url = RAW_S3_URL.format(month=month, file=PLOTS)
    logger.info(f'Loading {url}')
    df = SQL_CONTEXT.read \
        .option("header", True) \
        .option("sep", "|") \
        .csv(url)

    return df
Beispiel #8
0
def upload_files(month):
    """
    Iterates over the particular months files and uploads them to s3.
    Where a month is not given, the current month will be used.
    """
    source_directory = os.path.join(NHBC_SOURCE_DIR, month)
    for source_file in os.listdir(source_directory):
        upload_single_file(source_file, source_directory, month)
    logger.info('Upload complete.')
def get_plots_with_projects(plots_df, projects_df):
    logger.info(f'Combining plots and project data frames')
    project_reference_df = projects_df \
        .select("Project Reference") \
        .distinct() \
        .withColumnRenamed("Project Reference", "PROJECT_REFERENCE")

    df = plots_df.join(project_reference_df, "PROJECT_REFERENCE", "inner")

    return df
Beispiel #10
0
def get_latest_backup(server, source_directory, file_regex):
    session = RemoteSession(server, (secrets.USER_NAME, secrets.PASSWORD))
    backups = session.ls(source_directory)
    version_backups = list(find_backups(file_regex, backups))
    if len(version_backups) == 0:
        logger.error(f'No backup found matching {file_regex}')
        raise Exception(f'No backup found matching {file_regex}')
    backup_file = version_backups[-1]
    logger.info(f'Found backup {backup_file}')
    return backup_file
Beispiel #11
0
def download_files(month):
    """Iterates over the transformed files and downloads the plot """
    transformed_key = find_transformed_file(month)
    if not transformed_key:
        raise Exception('No transformed file found...')
    logger.info(f'Downloading transformed file {transformed_key}...')
    with open(os.path.join(NHBC_SOURCE_DIR, month, TRANSFORMED_FILE_NAME),
              'wb') as data:
        s3.download_fileobj(NHBC_BUCKET, transformed_key, data)
    logger.info(f'Download complete.')
Beispiel #12
0
def find_transformed_file(month):
    month_format = f'{month[:4]}-{month[-2:]}'
    found_files = s3.list_objects_v2(
        Bucket=NHBC_BUCKET,
        Prefix=TRANSFORMED_FILE_PREFIX.format(month=month_format))['Contents']

    if len(found_files) == 1:
        logger.info(f'Found file {found_files[0]["Key"]}')
        return found_files[0]['Key']
    logger.warning(
        f'No file found with prefix {TRANSFORMED_FILE_PREFIX.format(month=month_format)}'
    )
Beispiel #13
0
def monitor_build(current_build_version=None):
    current_build_version = current_build_version or latest_comps_build_version()
    send_message('#ht-compsbuild', f'Started Comps Build {current_build_version}')
    try:
        time.sleep(10)
        op_id = LON_SQL_04_SQL_RUNNER.latest_package_operation_id(BUILD_JOB_NAME)
        LON_SQL_04_SQL_RUNNER.monitor_package_status(op_id)
        logger.info(f'Comps Build {current_build_version} complete.')
        send_message('#ht-compsbuild', f'Comps Build {current_build_version} completed successfully!')
    except Exception as e:
        logger.error(f'Comps Build {current_build_version} failed.\n{e}')
        send_message('#ht-compsbuild', f'Comps Build {current_build_version} failed.\n{e}')
Beispiel #14
0
def create_math_pair_samples(destination_allgeos, comps_type, phase_id):
    sql_query = sql_query_template.format(
        destination_allgeos=destination_allgeos,
        curr_allgeos=geo_dbs['allgeos'].current,
        prev_allgeos=geo_dbs['allgeos'].previous,
        comps_type=comps_type,
        phase_id=phase_id,
    )
    logger.info(f'Creating sample in {destination_allgeos} for {comps_type}')
    # TODO: Add sql runner call
    logger.info(sql_query)
    LON_SQL_06_SQL_RUNNER.execute(sql_query)
def load_processed_plots(month):
    month_format = f'{month[:4]}-{month[-2:]}'
    url = TRANSFORMED_S3_URL.format(month=month_format)
    logger.info(f'Loading {url}')
    df = SQL_CONTEXT.read.options(**DF_OPTIONS).csv(url)

    # sometimes there are duplicate PLOT_IDs so discard the duplicates here
    window = Window.partitionBy(df['PLOT_ID']).orderBy(df.columns)
    df = df.select("*",
                   func.rank().over(window).alias("row_number")).filter(
                       "row_number = 1").drop("row_number")

    return df
Beispiel #16
0
async def login(form_data: OAuth2PasswordRequestForm = Depends()) -> Token:
    try:
        token: Token = injector.auth_interactor().handle(form_data.__dict__)
    except ValueError as e:
        logger.info(e)
        raise HTTPException(status_code=401,
                            detail=str(e),
                            headers={"WWW-Authenticate": "Bearer"})
    except Exception as e:
        logger.exception(e)
        raise HTTPException(status_code=500, detail="Internal Server Error")

    return token
Beispiel #17
0
 def drop_database(self, database_name):
     sql_query = f'''
     IF (SELECT COUNT(1) FROM sys.databases WHERE [name] = '{database_name}') > 0
     BEGIN
         ALTER DATABASE [{database_name}] SET  SINGLE_USER WITH ROLLBACK IMMEDIATE;
         DROP DATABASE [{database_name}];
     END;
     '''
     # TODO: Add sql runner call
     self.execute(sql_query)
     logger.info(
         f'Dropping database {database_name} on server {self.connection.server}'
     )
Beispiel #18
0
 def restore_db(self, database, backup_file, stats=5):
     self.drop_database(database)
     restore_query = self.restore_database_query(database, backup_file)
     logger.info(f'Running restore query:\n{restore_query}')
     restore_thread = threading.Thread(target=self.restore,
                                       args=(restore_query, ))
     restore_thread.start()
     percent_complete = 0
     while restore_thread.is_alive():
         if percent_complete == 100:
             logger.info(f'{database} restore finalising')
         restore_state = self.get_restore_state(database)
         if not restore_state:
             percent_complete = 100
             time.sleep(5)
             continue
         current_percent_complete = restore_state.percent_complete
         seconds_to_complete = restore_state.seconds_to_complete
         current_percent_complete = current_percent_complete - (
             current_percent_complete % stats)
         if current_percent_complete > percent_complete:
             percent_complete = current_percent_complete
             expected_finish = datetime.datetime.now() + datetime.timedelta(
                 seconds=seconds_to_complete)
             logger.info(
                 f'{database} restored {percent_complete}: expected finish {expected_finish}'
             )
         time.sleep(5)
     logger.info(f'{database} restore complete')
Beispiel #19
0
def download_report(destination_file, geo_level, curr_allgeos, prev_allgeos,
                    curr_20cc, prev_20cc, curr_pcl, prev_pcl):
    """Downloads the report for the given params and saves it the given destaination file."""
    url = GEO_INDEX_QA_REPORT_URL.format(
        geo_level=geo_level.replace(' ', '+'),
        curr_allgeos=curr_allgeos,
        prev_allgeos=prev_allgeos,
        curr_20cc=curr_20cc,
        prev_20cc=prev_20cc,
        curr_pcl=curr_pcl,
        prev_pcl=prev_pcl,
    )
    logger.info(f'downloading report {url}')
    with open(destination_file, 'wb') as report_file:
        raw_data = get_report_file(url)
        report_file.write(raw_data.content)
Beispiel #20
0
    def find_user_by_id(self, id: int) -> Optional[Dict]:
        """
        find_user_by_id
        """

        try:
            user = db.session.query(User).filter(User.id == id).first()
        except SQLAlchemyError:
            raise
        finally:
            db.session.close()
            logger.info("db connection closed.")

        if user is None:
            raise NoContentError(f"User not found: id={id}")

        return self.__convert_schema_obj_to_dict(user)
Beispiel #21
0
    def create_user(self, name: str, password: str, email: str) -> Optional[Dict]:
        """
        create_user
        """

        now = datetime.now()

        user = User(
            name=name,
            password=password,
            email=email,
            created_at=now,
            updated_at=now,
        )

        try:
            db.session.add(user)
            db.session.commit()
        except IntegrityError:
            db.session.rollback()

            raise DuplicateError(f"name: {name} or email: {email} is already exists.")
        except SQLAlchemyError:
            raise
        finally:
            db.session.close()
            logger.info("db connection closed.")

        try:
            created_user = (
                db.session.query(User)
                .filter(User.name == name)
                .order_by(User.id.desc())
                .first()
            )
        except SQLAlchemyError:
            raise NoContentError
        finally:
            db.session.close()
            logger.info("db connection closed.")

        return self.__convert_schema_obj_to_dict(created_user)
Beispiel #22
0
def download_all_reports(current_version, compare_version, month=current_month):
    """
    Loops through each geo level and downloads the report.
    For the PCDPCA report, the required extra data is appended to it.
    """
    directory = create_reports_directory(month, current_version)
    bulk_test_ids = get_bulk_test_ids(current_version, compare_version)
    for file_name_fmt, report_url_fmt in BULK_TEST_REPORTS.items():
        file_name = file_name_fmt.format(version=current_version, **bulk_test_ids)
        report_url = report_url_fmt.format(version=current_version, **bulk_test_ids)
        logger.info(f'creating report for {file_name}...')
        destination_file = os.path.join(directory, file_name)
        download_report(
            destination_file=destination_file,
            report_url=report_url
        )
    send_message(
        '#ht-compsbuild',
        f'QA reports for Comps Build v{current_version} sign-off are available here:\n{directory}'
    )
Beispiel #23
0
    def find_users(self) -> Optional[List[Dict]]:
        """
        find_users
        """

        user: User = None

        try:
            user = db.session.query(User).all()
        except SQLAlchemyError:
            raise
        finally:
            db.session.close()
            logger.info("db connection closed.")

        if not user:
            raise NoContentError(("Users are not found."))

        response = []

        for u in user:
            response.append(self.__convert_schema_obj_to_dict(u))

        return response
Beispiel #24
0
def download_report(destination_file, report_url):
    """Downloads the report for the given params and saves it the given destaination file."""
    logger.info(f'downloading report {report_url}')
    with open(destination_file, 'wb') as report_file:
        raw_data = get_report_file(report_url)
        report_file.write(raw_data.content)
Beispiel #25
0
def delete_backups(remote_server, backup_file):
    session = RemoteSession(remote_server, (secrets.USER_NAME, secrets.PASSWORD))
    logger.info(f'Deleting file {backup_file}')
    session.delete_file(file=backup_file)
Beispiel #26
0
def read_root():
    logger.info('This is an example of logging')
    return {'hello': 'world'}
def write_processed_plots_to_s3(month, plots_df):
    month_format = f'{month[:4]}-{month[-2:]}'
    url = TRANSFORMED_S3_URL.format(month=month_format)
    logger.info(f'Writing to S3 {url}')
    plots_df.repartition(1).write.options(**DF_OPTIONS).csv(path=url,
                                                            mode="overwrite")
Beispiel #28
0
def upload_single_file(file_name, file_directory, month):
    """Uploads the given file/directory to the nhbc raw bucket for the given month."""
    full_path = os.path.join(file_directory, file_name)
    logger.info(f'Uploading file {full_path}...')
    s3.upload_file(full_path, NHBC_BUCKET,
                   FILENAME_FMT.format(month=month, filename=file_name))
Beispiel #29
0
def on_startup():
    connect(host=SETTINGS.MONGO_URI)
    mongoengine.get_db()
    logger.info("Startup flow successful")
Beispiel #30
0
def create_directory(directory):
    """Formats the destination directory and creates it if it doesn't exist."""
    if not os.path.isdir(directory):
        logger.info(f'creating directory {directory}')
        os.mkdir(directory)
    return directory