예제 #1
0
def main(branch='master', push=False, emails_to_notify=[]):
    creds = json.loads(CREDENTIALS_PATH.read_text())
    email_sender = EmailSender(creds['EMAIL_ADDRESS'], creds['EMAIL_PASSWORD'])
    notifier = Notifier(emails=emails_to_notify, email_sender=email_sender)

    try:
        logging.info('Opening and checking the repository...')
        repo = git.Repo(DATA_REPO)
        if repo.active_branch.name != branch:
            raise Exception(
                'dataset repository in unexpected branch. Expected: %s. Actual: %s'
                % (branch, repo.active_branch.name))

        logging.info('Checking for new reports...')
        new_report_paths = download_missing_reports()
        if not new_report_paths:
            logging.info('No new reports.')
        else:
            logging.info('New reports found: %s',
                         ', '.join(map(str, new_report_paths)))

        new_dataset_paths = make_single_date_datasets(skip_existing=True)
        if not new_dataset_paths:
            logging.info('No new datasets written. Exit.')
            sys.exit(0)
        logging.info('New datasets: %s', ', '.join(map(str,
                                                       new_dataset_paths)))

        full_dataset_path = make_full_dataset()

        # git add
        files_to_add = [
            str(path) for path in new_dataset_paths + [full_dataset_path]
        ]
        logging.info('Command: git add %s', ' '.join(files_to_add))
        repo.index.add(items=files_to_add)

        # git commit
        latest_date = max(
            get_date_from_filename(path.name) for path in new_dataset_paths)
        commit_msg = COMMIT_TEMPLATE.format(date=latest_date)
        logging.info('Command: git commit -m %r', commit_msg)
        repo.index.commit(commit_msg)

        # git push
        if push:
            # TODO: handle push errors
            refspec = '{0}:{0}'.format(branch)
            logging.info('Command: git push %s', refspec)
            repo.remote('origin').push(refspec=refspec)
            notifier.notify('Pushed new ICCAS datasets', commit_msg)
        else:
            notifier.notify('New commit waiting for push', commit_msg)

    except Exception as exc:
        notifier.notify('Fatal error', repr(exc))
        logging.exception('Exception was raised')
        raise
    finally:
        email_sender.quit()
예제 #2
0
    if not ordered_parts:
        print("No datasets found in", input_dir)
        return False

    dataset = pd.concat(ordered_parts, axis=0)
    dataset.to_csv(out_path, line_terminator="\n")
    print("Full dataset written to", out_path)
    return out_path


def get_latest_data_date(dirpath=REPORTS_DATA_DIR, default: str = "") -> str:
    if not dirpath.exists():
        return default
    dataset_list = list_datasets_by_date(dirpath)
    return dataset_list[-1][0] if dataset_list else default


if __name__ == "__main__":
    import argparse
    from download_reports import download_missing_reports

    parser = argparse.ArgumentParser()
    parser.add_argument("-o", "--overwrite", action="store_true")
    args = parser.parse_args()

    download_missing_reports(after=get_latest_data_date())
    new_data_paths = extract_data_from_reports(
        skip_existing=not args.overwrite)
    if args.overwrite or new_data_paths:
        make_dataset()
예제 #3
0

def make_full_dataset(input_dir=DATA_BY_DATE_DIR, output_dir=FULL_DATASET_DIR):
    date_path_pairs = list_datasets_by_date(input_dir)
    if not date_path_pairs:
        print('No datasets found in', input_dir)
        return False

    out_path = get_full_dataset_path(dirpath=output_dir)
    iccas_by_date = {}
    for date, path in date_path_pairs:
        iccas_by_date[date] = pd.read_csv(path, index_col='age_group')

    full = pd.concat(iccas_by_date.values(),
                     axis=0,
                     keys=iccas_by_date.keys(),
                     names=['date', 'age_group'])

    output_dir.mkdir(parents=True, exist_ok=True)
    full.to_csv(out_path)
    print('Full dataset written to', out_path)
    return out_path


if __name__ == '__main__':
    from download_reports import download_missing_reports

    download_missing_reports()
    make_single_date_datasets()
    make_full_dataset()