Ejemplo n.º 1
0
 def __init__(self):
     fs_to_data_lake: Mapping[str, Callable[[], Tuple[
         BaseLandingArea, BlobStagingArea, BlobPublicArea]]] = {
             FileSystemType.blob: self.__get_blob_data_lakes,
             FileSystemType.local: self.__get_local_data_lakes,
         }
     self.landing, self.staging, self.public = fs_to_data_lake[
         Config().file_system_type]()
     self.big_query = BigQuery(project=Config().bq_project,
                               client_secrets=Config().bq_secret)
Ejemplo n.º 2
0
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.basicConfig(format='[%(asctime)s] [%(levelname)s] %(message)s',
                        level=logging.DEBUG)
    try:
        log.info(f"Http trigger. req.params: {req.params}")
        date = datetime.datetime.strptime(
            get_req_param(req, 'date', default=DEFAULT_DAY), DAY_FORMAT)
        company = get_req_param(req,
                                'company',
                                default=Config().default_company)
        df = ReposRankingMTD(date=date, company=company).read()
        out_df = filter_projects(
            df=df,
            projects_filter_list=DataLake().staging.load_projects_filter(),
            commits_amount_field=DataLake(
            ).public.schemas.repo_commits_ranking.commits,
            repo_name_field=DataLake(
            ).public.schemas.repo_commits_ranking.repo)
        DataLake().public.save_report(report_df=out_df,
                                      report_name='projects_activity_MTD',
                                      date=date,
                                      company=company)
        return func.HttpResponse(f'{{"output": "{out_df}"}}')
    except Exception as ex:
        log.error(f'Exception {ex}')
        log.exception(ex)
        return func.HttpResponse(
            f"This HTTP triggered function failed {ex} "
            f"{''.join(tb.format_exception(etype=type(ex), value=ex, tb=ex.__traceback__))}",
            status_code=500)
Ejemplo n.º 3
0
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.basicConfig(format='[%(asctime)s] [%(levelname)s] %(message)s',
                        level=logging.DEBUG)
    try:
        log.info(f"Http trigger. req.params: {req.params}")
        date = datetime.datetime.strptime(
            get_req_param(req, 'date', default=DEFAULT_DAY), DAY_FORMAT)
        company = get_req_param(req,
                                'company',
                                default=Config().default_company)
        contributors_report = ContributorsRankingMTD(date=date,
                                                     company=company)
        contributors_mbm_report = ContributorsRankingMBM(date=date,
                                                         company=company)

        df = get_contributors_ranking_mbm_change_report(
            reports=contributors_report.read_all(),
            contributor_field=ContributorsRankingReportSchema.author,
            commits_amount_field=ContributorsRankingReportSchema.commits)
        contributors_mbm_report.save(df=df)
        return func.HttpResponse(json.dumps(dict(out_df=str(df))))
    except Exception as ex:
        log.error(f'Exception {ex}')
        log.exception(ex)
        return func.HttpResponse(
            f"This HTTP triggered function failed {ex} "
            f"{''.join(tb.format_exception(etype=type(ex), value=ex, tb=ex.__traceback__))}",
            status_code=500)
Ejemplo n.º 4
0
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.basicConfig(format='[%(asctime)s] [%(levelname)s] %(message)s', level=logging.DEBUG)
    try:
        log.info(f"Http trigger. req.params: {req.params}")
        day = datetime.datetime.strptime(
            get_req_param(req, 'date', default=DEFAULT_DAY),
            DAY_FORMAT
        )
        get_contributors_repositories_change(date=day, company=Config().default_company)
        return func.HttpResponse('{"output":"This HTTP triggered function executed."}',
                                 status_code=200)
    except Exception as ex:
        ex_message = (f'Exception {ex} \n'
                      f'{"".join(tb.format_exception(etype=type(ex), value=ex, tb=ex.__traceback__))}')
        log.error(ex_message)
        return func.HttpResponse(ex_message, status_code=500)
Ejemplo n.º 5
0
 def _repositories(names: Iterable[str]):
     with GithubRest(token=Config().github_token) as rest:
         for name in names:
             try:
                 repo_resp = rest.get_repository(repo_name=name)
                 if repo_resp is not None:
                     repository = parse_get_repository_response(
                         repo_resp, downloaded_at=datetime.now().date())
                     if repository.__getattribute__(
                             Repositories.schema.license):
                         yield {
                             field: repository.__getattribute__(field)
                             for field in Repositories.schema.required
                         }
             except Exception as ex:
                 log.error(f'Failed loading repository {name}. ex: {ex}')
Ejemplo n.º 6
0
def load_company_repositories_events_commits(date: datetime, company: str):
    events = DataLake().staging.get_push_events_commits(company=company,
                                                        from_date=date,
                                                        to_date=date,
                                                        date_period_type=DatePeriodType.DTD)
    schema = DataLake().staging.schemas.push_commits
    if events.empty:
        log.warning(f'No {company} events at {date}')
        return
    with GithubRest(token=Config().github_token) as rest:
        company_commits = get_company_repositories_events_commits(repositories_names=events[schema.repo_name].unique(),
                                                                  date=date,
                                                                  company=company,
                                                                  rest=rest)
        company_commits_df = pd.DataFrame(company_commits)
        DataLake().staging.save_private_push_events_commits(push_event_commits=company_commits_df,
                                                            company_name=company,
                                                            date=date)
Ejemplo n.º 7
0
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.basicConfig(format='[%(asctime)s] [%(levelname)s] %(message)s',
                        level=logging.DEBUG)
    try:
        log.info(f"Http trigger. req.params: {req.params}")
        date = datetime.datetime.strptime(
            get_req_param(req, 'date', default=DEFAULT_DAY), DAY_FORMAT)
        company = get_req_param(req,
                                'company',
                                default=Config().default_company)
        load_company_repositories_events_commits(date=date, company=company)
        func.HttpResponse(json.dumps({'status': 'SUCCESS'}))
    except Exception as ex:
        ex_message = (
            f'Exception {ex} \n'
            f'{"".join(tb.format_exception(etype=type(ex), value=ex, tb=ex.__traceback__))}'
        )
        log.error(ex_message)
        return func.HttpResponse(ex_message, status_code=500)
Ejemplo n.º 8
0
 def __get_blob_data_lakes(
 ) -> Tuple[BlobLandingArea, BlobStagingArea, BlobPublicArea]:
     return (BlobLandingArea(**Config().file_system.landing_props),
             BlobStagingArea(**Config().file_system.staging_props),
             BlobPublicArea(**Config().file_system.public_props))
Ejemplo n.º 9
0
 def __get_local_data_lakes(
 ) -> Tuple[LocalLandingArea, LocalStagingArea, LocalPublicArea]:
     return (LocalLandingArea(**Config().file_system.landing_props),
             LocalStagingArea(**Config().file_system.staging_props),
             LocalPublicArea(**Config().file_system.public_props))
Ejemplo n.º 10
0
   You should have received a copy of the GNU General Public License
   along with OSCI.  If not, see <http://www.gnu.org/licenses/>."""
import click
import logging

logging.basicConfig(format='[%(asctime)s] [%(levelname)s] %(message)s',
                    level=logging.DEBUG)
log = logging.getLogger(__name__)

if 'dbutils' in globals():
    log.debug(
        'Variable `dbutils` in memory. Try to setup config with `dbutils`')
    from __app__.config import Config, FileSystemType

    config = Config(dbutils=dbutils)

    if 'spark' in globals():
        log.debug('Variable `spark` in memory.')
        from __app__.jobs.session import Session

        Session(spark_session=spark)
        if Config().file_system_type == FileSystemType.blob:
            print(
                'FS CONF',
                f'fs.azure.account.key.{Config().file_system.staging_props.get("storage_account_name")}.'
                f'blob.core.windows.net',
                Config().file_system.staging_props.get('storage_account_key'))
            spark.conf.set(
                f'fs.azure.account.key.{Config().file_system.staging_props.get("storage_account_name")}.'
                f'blob.core.windows.net',
Ejemplo n.º 11
0
def test_config_singleton():
    config1 = Config()
    config2 = Config()
    Config.tear_down()
    assert id(config1) == id(config2)