def __init__(self): fs_to_data_lake: Mapping[str, Callable[[], Tuple[ BaseLandingArea, BlobStagingArea, BlobPublicArea]]] = { FileSystemType.blob: self.__get_blob_data_lakes, FileSystemType.local: self.__get_local_data_lakes, } self.landing, self.staging, self.public = fs_to_data_lake[ Config().file_system_type]() self.big_query = BigQuery(project=Config().bq_project, client_secrets=Config().bq_secret)
def main(req: func.HttpRequest) -> func.HttpResponse: logging.basicConfig(format='[%(asctime)s] [%(levelname)s] %(message)s', level=logging.DEBUG) try: log.info(f"Http trigger. req.params: {req.params}") date = datetime.datetime.strptime( get_req_param(req, 'date', default=DEFAULT_DAY), DAY_FORMAT) company = get_req_param(req, 'company', default=Config().default_company) df = ReposRankingMTD(date=date, company=company).read() out_df = filter_projects( df=df, projects_filter_list=DataLake().staging.load_projects_filter(), commits_amount_field=DataLake( ).public.schemas.repo_commits_ranking.commits, repo_name_field=DataLake( ).public.schemas.repo_commits_ranking.repo) DataLake().public.save_report(report_df=out_df, report_name='projects_activity_MTD', date=date, company=company) return func.HttpResponse(f'{{"output": "{out_df}"}}') except Exception as ex: log.error(f'Exception {ex}') log.exception(ex) return func.HttpResponse( f"This HTTP triggered function failed {ex} " f"{''.join(tb.format_exception(etype=type(ex), value=ex, tb=ex.__traceback__))}", status_code=500)
def main(req: func.HttpRequest) -> func.HttpResponse: logging.basicConfig(format='[%(asctime)s] [%(levelname)s] %(message)s', level=logging.DEBUG) try: log.info(f"Http trigger. req.params: {req.params}") date = datetime.datetime.strptime( get_req_param(req, 'date', default=DEFAULT_DAY), DAY_FORMAT) company = get_req_param(req, 'company', default=Config().default_company) contributors_report = ContributorsRankingMTD(date=date, company=company) contributors_mbm_report = ContributorsRankingMBM(date=date, company=company) df = get_contributors_ranking_mbm_change_report( reports=contributors_report.read_all(), contributor_field=ContributorsRankingReportSchema.author, commits_amount_field=ContributorsRankingReportSchema.commits) contributors_mbm_report.save(df=df) return func.HttpResponse(json.dumps(dict(out_df=str(df)))) except Exception as ex: log.error(f'Exception {ex}') log.exception(ex) return func.HttpResponse( f"This HTTP triggered function failed {ex} " f"{''.join(tb.format_exception(etype=type(ex), value=ex, tb=ex.__traceback__))}", status_code=500)
def main(req: func.HttpRequest) -> func.HttpResponse: logging.basicConfig(format='[%(asctime)s] [%(levelname)s] %(message)s', level=logging.DEBUG) try: log.info(f"Http trigger. req.params: {req.params}") day = datetime.datetime.strptime( get_req_param(req, 'date', default=DEFAULT_DAY), DAY_FORMAT ) get_contributors_repositories_change(date=day, company=Config().default_company) return func.HttpResponse('{"output":"This HTTP triggered function executed."}', status_code=200) except Exception as ex: ex_message = (f'Exception {ex} \n' f'{"".join(tb.format_exception(etype=type(ex), value=ex, tb=ex.__traceback__))}') log.error(ex_message) return func.HttpResponse(ex_message, status_code=500)
def _repositories(names: Iterable[str]): with GithubRest(token=Config().github_token) as rest: for name in names: try: repo_resp = rest.get_repository(repo_name=name) if repo_resp is not None: repository = parse_get_repository_response( repo_resp, downloaded_at=datetime.now().date()) if repository.__getattribute__( Repositories.schema.license): yield { field: repository.__getattribute__(field) for field in Repositories.schema.required } except Exception as ex: log.error(f'Failed loading repository {name}. ex: {ex}')
def load_company_repositories_events_commits(date: datetime, company: str): events = DataLake().staging.get_push_events_commits(company=company, from_date=date, to_date=date, date_period_type=DatePeriodType.DTD) schema = DataLake().staging.schemas.push_commits if events.empty: log.warning(f'No {company} events at {date}') return with GithubRest(token=Config().github_token) as rest: company_commits = get_company_repositories_events_commits(repositories_names=events[schema.repo_name].unique(), date=date, company=company, rest=rest) company_commits_df = pd.DataFrame(company_commits) DataLake().staging.save_private_push_events_commits(push_event_commits=company_commits_df, company_name=company, date=date)
def main(req: func.HttpRequest) -> func.HttpResponse: logging.basicConfig(format='[%(asctime)s] [%(levelname)s] %(message)s', level=logging.DEBUG) try: log.info(f"Http trigger. req.params: {req.params}") date = datetime.datetime.strptime( get_req_param(req, 'date', default=DEFAULT_DAY), DAY_FORMAT) company = get_req_param(req, 'company', default=Config().default_company) load_company_repositories_events_commits(date=date, company=company) func.HttpResponse(json.dumps({'status': 'SUCCESS'})) except Exception as ex: ex_message = ( f'Exception {ex} \n' f'{"".join(tb.format_exception(etype=type(ex), value=ex, tb=ex.__traceback__))}' ) log.error(ex_message) return func.HttpResponse(ex_message, status_code=500)
def __get_blob_data_lakes( ) -> Tuple[BlobLandingArea, BlobStagingArea, BlobPublicArea]: return (BlobLandingArea(**Config().file_system.landing_props), BlobStagingArea(**Config().file_system.staging_props), BlobPublicArea(**Config().file_system.public_props))
def __get_local_data_lakes( ) -> Tuple[LocalLandingArea, LocalStagingArea, LocalPublicArea]: return (LocalLandingArea(**Config().file_system.landing_props), LocalStagingArea(**Config().file_system.staging_props), LocalPublicArea(**Config().file_system.public_props))
You should have received a copy of the GNU General Public License along with OSCI. If not, see <http://www.gnu.org/licenses/>.""" import click import logging logging.basicConfig(format='[%(asctime)s] [%(levelname)s] %(message)s', level=logging.DEBUG) log = logging.getLogger(__name__) if 'dbutils' in globals(): log.debug( 'Variable `dbutils` in memory. Try to setup config with `dbutils`') from __app__.config import Config, FileSystemType config = Config(dbutils=dbutils) if 'spark' in globals(): log.debug('Variable `spark` in memory.') from __app__.jobs.session import Session Session(spark_session=spark) if Config().file_system_type == FileSystemType.blob: print( 'FS CONF', f'fs.azure.account.key.{Config().file_system.staging_props.get("storage_account_name")}.' f'blob.core.windows.net', Config().file_system.staging_props.get('storage_account_key')) spark.conf.set( f'fs.azure.account.key.{Config().file_system.staging_props.get("storage_account_name")}.' f'blob.core.windows.net',
def test_config_singleton(): config1 = Config() config2 = Config() Config.tear_down() assert id(config1) == id(config2)