Exemple #1
0
def test__extract_metadata__interface(metadata_rich_notebooks):  # noqa F811
    import json
    import os

    from nbcollection.ci.constants import SCANNER_BUILD_DIR
    from nbcollection.ci.scanner.utils import find_build_jobs, generate_job_context
    from nbcollection.ci.metadata.factory import run_extract_metadata
    from nbcollection.ci.metadata.utils import extract_metadata
    from nbcollection.ci.commands.utils import validate_and_parse_inputs
    from nbcollection_tests.ci.tools.utils import collection_set_to_namespace

    metadata_keys = ['title', 'description']
    notebook_name = 'Notebook-One'
    options = collection_set_to_namespace(metadata_rich_notebooks,
                                          extra={
                                              'notebook_names': notebook_name,
                                          })
    run_extract_metadata(options)
    for job_idx, job in enumerate(
            find_build_jobs(options.project_path, options.collection_names,
                            options.category_names, options.notebook_names)):
        for notebook in job.category.notebooks:
            extract_metadata(notebook)
            with open(notebook.metadata.path, 'rb') as stream:
                metadata = json.loads(stream.read().decode(ENCODING))
                for key in metadata_keys:
                    assert key in metadata.keys()

    assert job_idx == 0
    validative_options = collection_set_to_namespace(metadata_rich_notebooks,
                                                     extra={
                                                         'notebook_names':
                                                         notebook_name,
                                                     })
    validate_and_parse_inputs(validative_options)
    for job_idx, job in enumerate(
            find_build_jobs(options.project_path, options.collection_names,
                            options.category_names, options.notebook_names)):
        job_context = generate_job_context(job)
        for notebook_idx, notebook_context in enumerate(job_context.notebooks):
            extract_metadata(notebook_context)

        assert notebook_idx == 0

        validative_metadata_filepath = os.path.join(
            SCANNER_BUILD_DIR, job.semantic_path(),
            f'{notebook.name}.metadata.json')
        with open(validative_metadata_filepath, 'rb') as stream:
            validative_metadata = json.loads(stream.read().decode(ENCODING))
            for key in metadata_keys:
                assert validative_metadata[key] == metadata[key]

    assert job_idx == 0
Exemple #2
0
def test__reset_notebook_execution__interface(
        executed_notebook_collection):  # noqa F811
    import json
    import os

    from nbcollection.ci.constants import SCANNER_BUILD_DIR
    from nbcollection.ci.scanner.utils import find_build_jobs, generate_job_context
    from nbcollection.ci.metadata.factory import run_reset_notebook_execution
    from nbcollection_tests.ci.tools.utils import collection_set_to_namespace

    options = collection_set_to_namespace(executed_notebook_collection)
    run_reset_notebook_execution(options)
    for job in find_build_jobs(options.project_path, options.collection_names,
                               options.category_names, options.notebook_names):
        job_context = generate_job_context(job)  # noqa F841
        for notebook in job.category.notebooks:
            notebook_path = os.path.join(SCANNER_BUILD_DIR,
                                         job.semantic_path(),
                                         f'{notebook.name}.ipynb')
            assert os.path.exists(notebook_path)
            with open(notebook_path, 'rb') as stream:
                notebook_data = json.loads(stream.read().decode(ENCODING))

            for idx, cell in enumerate(notebook_data['cells']):
                assert cell.get('execution_count', None) is None
                assert len(cell.get('outputs', [])) == 0
Exemple #3
0
def pull_request_build(
        url: str,
        project_path: str,
        collection_names: typing.List[str] = [],
        category_names: typing.List[str] = []) -> None:
    repo_path, repo_type = select_repo_type(url)
    url_parts = select_url_type(url, repo_type)
    if url_parts.url_type is URLType.GithubPullRequest:
        repo_path = project_path
        if not os.path.exists(repo_path):
            git.Repo.clone_from(url_parts.https_url, repo_path)

        repo = git.Repo(repo_path)
        RemoteParts.ParseURLToRemoteParts(repo.remotes.origin.url)
        pr_info = obtain_pull_request_info(url_parts)
        if getattr(repo.remotes, pr_info.source.org, None) is None:
            repo.create_remote(pr_info.source.org, pr_info.source.https_url)

        repo_info = extract_repo_info(repo, pr_info)
        build_jobs = {}

        if len(collection_names) > 0:
            for job in select_build_jobs_by_pr_author_commits(repo_info, pr_info):
                if not job.semantic_path() in build_jobs.keys():
                    build_jobs[job.semantic_path()] = job
        else:
            for job in find_build_jobs(repo_info.repo.working_dir, collection_names, category_names):
                build_jobs[job.semantic_path()] = job

        for semantic_path, build_job in build_jobs.items():
            job_context = generate_job_context(job)
            run_job_context(job_context, True)

    else:
        raise NotImplementedError(f'Unable to parse URL[{url}]')
Exemple #4
0
def test__reset_notebook_execution(executed_notebook_collection):  # noqa F811
    import json
    import os

    from nbcollection.ci.scanner.utils import find_build_jobs
    from nbcollection.ci.metadata.utils import reset_notebook_execution

    notebook_paths = []
    for job in find_build_jobs(executed_notebook_collection):
        for notebook in job.category.notebooks:
            with open(notebook.path, 'rb') as stream:
                notebook_data = json.loads(stream.read().decode(ENCODING))

            reset_notebook_execution(notebook_data)
            with open(notebook.path, 'wb') as stream:
                stream.write(json.dumps(notebook_data).encode(ENCODING))

            notebook_paths.append(notebook.path)

    for path in notebook_paths:
        assert os.path.exists(path)
        with open(path, 'rb') as stream:
            notebook_data = json.loads(stream.read().decode(ENCODING))

        for cell in notebook_data['cells']:
            assert cell.get('execution_count', None) is None
            assert len(cell.get('outputs', [])) == 0
Exemple #5
0
def select_build_jobs_by_pr_author_commits(
        repo_info: RepoInfo, pr_info: PullRequestInfo) -> types.GeneratorType:
    source_files = {}
    for commit in pr_info.commits:
        comms = [
            c for c in repo_info.repo.iter_commits()
            if c.hexsha == commit.commit_hash
        ]
        if len(comms) > 1:
            raise NotImplementedError("Shouldn't be possible")

        try:
            comm = comms[0]
        except IndexError:
            continue

        source_files.update(comm.stats.files)

    for path in source_files.keys():
        if not path.endswith('ipynb'):
            continue

        collection_name = path.split('/', 1)[0]
        category_name = os.path.dirname(path).rsplit('/', 1)[1]
        for build_job in find_build_jobs(repo_info.repo.working_dir,
                                         [collection_name], [category_name]):
            yield build_job
Exemple #6
0
def latest_offline_artifacts(
        command_context: CICommandContext, merge_context: MergeContext,
        existing_categories: typing.List[str]) -> types.GeneratorType:
    for job in find_build_jobs(command_context.project_path,
                               command_context.collection_names,
                               command_context.category_names,
                               command_context.notebook_names, True):

        namespace = '.'.join([job.collection.name, job.category.name])
        if namespace in existing_categories:
            continue

        storage_path = os.path.join(command_context.project_path, 'artifacts',
                                    job.collection.name, job.category.name)
        for notebook in job.category.notebooks:
            html_filepath = os.path.join(storage_path, f'{notebook.name}.html')
            meta_filepath = os.path.join(storage_path,
                                         f'{notebook.name}.metadata.json')

            if any([
                    not os.path.exists(html_filepath),
                    not os.path.exists(meta_filepath)
            ]):
                continue

            html_filename = os.path.basename(html_filepath)
            yield NotebookSource(html_filename, html_filepath,
                                 job.category.name, job.collection.name,
                                 'local-file', meta_filepath)
            meta_filename = os.path.basename(meta_filepath)
            yield NotebookSource(meta_filename, meta_filepath,
                                 job.category.name, job.collection.name,
                                 'local-file', meta_filepath)
Exemple #7
0
def test__run_job_context(quick_build_collection):  # noqa F811
    from nbcollection.ci.constants import SCANNER_BUILD_LOG_DIR, SCANNER_BUILD_DIR
    from nbcollection.ci.scanner.utils import run_job_context, generate_job_context, find_build_jobs

    for job_idx, job in enumerate(find_build_jobs(quick_build_collection)):
        job_context = generate_job_context(job)
        run_job_context(job_context)

        # Validate Run completed
        stdout_log = os.path.join(
            SCANNER_BUILD_LOG_DIR,
            f'{job.collection.name}-{job.category.name}.stdout.log')
        assert os.path.exists(stdout_log)

        stderr_log = os.path.join(
            SCANNER_BUILD_LOG_DIR,
            f'{job.collection.name}-{job.category.name}.stderr.log')
        assert os.path.exists(stderr_log)

        assert os.path.exists(job_context.setup_script)
        for notebook in job_context.notebooks:
            assert os.path.exists(notebook.path)
            assert os.path.exists(notebook.artifact.path)
            assert os.path.exists(notebook.metadata.path)

        build_dir = os.path.join(SCANNER_BUILD_DIR, job.collection.name,
                                 job.category.name)
        venv_dirpath = os.path.join(build_dir, 'venv')
        assert os.path.exists(venv_dirpath)
        assert build_dir == job_context.build_dir
def run_sync_notebooks(options: argparse.Namespace) -> None:
    if not os.path.exists(options.destination_path):
        raise NotImplementedError(
            f'Destination Path[{options.destination_path}] does not exist')

    validate_and_parse_inputs(options)
    # https://github.com/spacetelescope/dat_pyinthesky/blob/78bfaec05eb9af6280c6d15b6df54886b1aa4e9f/.circleci/builder/factory.py#L59
    for job in find_build_jobs(options.project_path, options.collection_names,
                               options.category_names, options.notebook_names,
                               True):

        notebooks_to_update = {}
        for notebook in job.category.notebooks:
            new_path = f'{options.destination_path}/{job.category.name}/{notebook.name}.ipynb'
            new_dirpath = os.path.dirname(new_path)
            source_path = f'{options.project_path}/{job.collection.name}/{job.category.name}/{notebook.name}.ipynb'
            source_dirpath = os.path.dirname(source_path)

            key = f'{job.collection.name}.{job.category.name}'
            notebooks_to_update[key] = (source_path, new_path)

        for key, (source_path, new_path) in notebooks_to_update.items():
            collection_name, category_name = key.split('.', 1)
            logger.info(f'Updating: {collection_name} - {category_name}')
            if os.path.exists(new_dirpath):
                shutil.rmtree(new_dirpath)

            shutil.copytree(source_dirpath, new_dirpath)
def run_extract_metadata(options: argparse.Namespace) -> Metadata:
    validate_and_parse_inputs(options)
    for job in find_build_jobs(options.project_path, options.collection_names,
                               options.category_names, options.notebook_names):
        job_context = generate_job_context(job)
        for notebook_context in job_context.notebooks:
            extract_metadata(notebook_context)
Exemple #10
0
def run_build(options: argparse.ArgumentParser) -> None:
    if not os.environ.get('CIRCLE_PULL_REQUEST', None) is None:
        logger.info('Pull Request detected. Skipping Build')
        return None

    validate_and_parse_inputs(options)

    # Find Builds
    jobs = []
    for job in find_build_jobs(options.project_path, options.collection_names,
                               options.category_names, options.notebook_names,
                               options.force_build):
        jobs.append(job)

    # Run Build
    artifact_paths = {}
    if options.build_mode is BuildMode.Single:
        for job in jobs:
            job_context = generate_job_context(job)
            run_job_context(job_context, True)
            for notebook in job_context.notebooks:
                hash_name = f'{notebook.collection_name}-{notebook.category_name}'
                artifact_paths[hash_name] = notebook.artifact.path

    else:
        build_artifacts_concurrently(options, jobs, artifact_paths)

    for name, path in artifact_paths.items():
        logger.info(f'Artifact[{name}] created here: {path}')
def run_generate_ci_environment(options: argparse.Namespace) -> None:
    validate_and_parse_inputs(options)
    jobs = []
    for job in find_build_jobs(options.project_path, options.collection_names,
                               options.category_names, options.notebook_names):
        jobs.append(job)

    gen_ci_env(jobs, options.ci_environment, options.project_path,
               options.enable_website_publication, options.enable_nightly)
Exemple #12
0
def test__find_build_jobs__filter_in_collection(
        multi_collection_repo):  # noqa F811
    from nbcollection.ci.scanner.utils import find_build_jobs

    for job_idx, job in enumerate(
            find_build_jobs(multi_collection_repo,
                            filter_in_collections=['collection_one'])):
        assert job.collection.name == 'collection_one'

    assert job_idx == 1
Exemple #13
0
def test__find_build_jobs(single_collection_repo__nth_categories):  # noqa F811
    from nbcollection.ci.scanner.utils import find_build_jobs
    from nbcollection.ci.datatypes import BuildJob, Requirements, PreRequirements

    for job_idx, job in enumerate(
            find_build_jobs(single_collection_repo__nth_categories)):
        assert job.__class__ is BuildJob
        assert job.collection.name == 'collection_one'
        assert job.category.name in ['asdf_example', 'cube_fitting']
        assert job.category.requirements.__class__ is Requirements
        assert job.category.pre_requirements.__class__ is PreRequirements
Exemple #14
0
def test__find_build_jobs__filter_in_category(
        single_collection_repo):  # noqa F811
    from nbcollection.ci.scanner.utils import find_build_jobs

    for job_idx, job in enumerate(
            find_build_jobs(single_collection_repo,
                            filter_in_collections=[],
                            filter_in_categories=['asdf_example'])):
        assert job.category.name == 'asdf_example'

    assert job_idx == 0
def run_reset_notebook_execution(options: argparse.Namespace) -> None:
    validate_and_parse_inputs(options)
    for job in find_build_jobs(options.project_path, options.collection_names,
                               options.category_names):
        for notebook in job.category.notebooks:
            with open(notebook.path, 'rb') as stream:
                notebook_data = json.loads(stream.read().decode(ENCODING))

            reset_notebook_execution(notebook_data)

            with open(notebook.path, 'wb') as stream:
                stream.write(json.dumps(notebook_data).encode(ENCODING))
Exemple #16
0
    def _build_category(project_path: str, collection_name: str,
                        category_name: str) -> None:
        os.environ['CHANNEL_BUILD'] = 'true'
        for job in find_build_jobs(project_path, [collection_name],
                                   [category_name]):
            print(job.collection.name, job.category.name)
            print('Creating Job Context: ', job.collection.name,
                  job.category.name)
            job_context = generate_job_context(job)
            print('Running Job Context: ', job.collection.name,
                  job.category.name)
            run_job_context(job_context, False)

        del os.environ['CHANNEL_BUILD']
Exemple #17
0
def test__find_build_jobs__filter_in_notebook__zero(
        multi_notebook_category):  # noqa F811
    from nbcollection.ci.scanner.utils import find_build_jobs

    for job_idx, job in enumerate(
            find_build_jobs(multi_notebook_category,
                            filter_in_collections=[],
                            filter_in_categories=['alot-of-notebooks'],
                            filter_in_notebooks=[])):
        for notebook_idx, notebook in enumerate(job.category.notebooks):
            assert notebook.name in ['Notebook-One', 'Notebook-Two']

        assert notebook_idx == 1

    assert job_idx == 0
Exemple #18
0
def select_build_jobs(repo_info: RepoInfo,
                      pr_info: PullRequestInfo) -> types.GeneratorType:
    rel_paths_with_notebooks = []
    for diff in repo_info.repo.index.diff(pr_info.commits[-1].commit_hash):
        rel_path = os.path.dirname(diff.b_path)
        if diff.b_path.endswith('.ipynb'):
            rel_paths_with_notebooks.append(rel_path)

    rel_paths_with_notebooks = [path for path in set(rel_paths_with_notebooks)]
    for path in rel_paths_with_notebooks:
        collection_name = path.split('/', 1)[0]
        category_name = path.rsplit('/', 1)[1]
        for build_job in find_build_jobs(repo_info.repo.working_dir,
                                         [collection_name], [category_name]):
            yield build_job
Exemple #19
0
def test__extract_metadata(metadata_rich_notebooks):  # noqa F811
    import json
    import os

    from nbcollection.ci.scanner.utils import find_build_jobs, generate_job_context
    from nbcollection.ci.metadata.utils import extract_metadata

    for job in find_build_jobs(metadata_rich_notebooks):
        job_context = generate_job_context(job)
        for notebook_context in job_context.notebooks:
            extract_metadata(notebook_context)
            assert os.path.exists(notebook_context.metadata.path)
            with open(notebook_context.metadata.path, 'rb') as stream:
                extracted_data = json.loads(stream.read().decode(ENCODING))
                assert extracted_data['title'] == 'Notebook One'
                assert not extracted_data['description'] is None
Exemple #20
0
def test__generate_job_context(
        single_collection_repo__nth_categories):  # noqa F811
    from nbcollection.ci.scanner.utils import find_build_jobs, generate_job_context
    from nbcollection.ci.datatypes import Requirements, PreRequirements, PreInstall

    for job_idx, job in enumerate(
            find_build_jobs(single_collection_repo__nth_categories)):
        job_context = generate_job_context(job)
        assert os.path.exists(job_context.build_dir)
        assert os.path.isdir(job_context.build_dir)
        assert os.path.exists(job_context.requirements.path)
        assert job_context.requirements.__class__ == Requirements
        assert job_context.pre_requirements.__class__ == PreRequirements
        assert job_context.pre_install.__class__ == PreInstall
        for notebook_context in job_context.notebooks:
            assert os.path.exists(notebook_context.build_script_path)

    assert job_idx == 1
Exemple #21
0
def collection_set_to_namespace(path_to_collection_set, extra: typing.Dict[str, typing.Any] = {}):
    collection_names = []
    category_names = []
    for job in find_build_jobs(path_to_collection_set):
        if job.collection.name not in collection_names:
            collection_names.append(job.collection.name)

        if job.category.name not in category_names:
            category_names.append(job.category.name)

    kwargs = {
        'project_path': path_to_collection_set,
        'collection_names': ','.join(collection_names),
        'category_names': ','.join(category_names),
        'notebook_names': '',
        'output_dir': tempfile.NamedTemporaryFile().name
    }
    kwargs.update(extra)
    return argparse.Namespace(**kwargs)
def run_sync(options: argparse.Namespace) -> None:
    if not os.environ.get('CIRCLE_PULL_REQUEST', None) is None:
        logger.info('Pull Request detected. Skipping Website Publication')
        return None

    if options.site is Site.GithubPages:
        try:
            project_repo = git.Repo(options.project_path)
        except git.exc.InvalidGitRepositoryError:
            raise Exception(f'ProjectPath[{options.project_path}] does not contain a .git folder')

        current_branch = project_repo.head.reference
        try:
            branch = project_repo.heads[options.publish_branch]
        except IndexError:
            branch = project_repo.create_head(options.publish_branch)

        try:
            push_remote = project_repo.remotes[options.publish_remote]
        except IndexError:
            remote_url = os.environ['CIRCLE_REPOSITORY_URL']
            logger.info(f'Using Remote URL: {remote_url}')
            push_remote = project_repo.create_remote(options.publish_remote, remote_url)


        storage_dir = os.path.join(options.project_path, options.artifact_storage_directory)
        from nbcollection.ci.scanner.utils import find_build_jobs
        for build_job in find_build_jobs(options.project_path, options.collection_names, options.category_names, options.notebook_names, True):
            source_path = os.path.join(SCANNER_ARTIFACT_DEST_DIR, build_job.collection.name, build_job.category.name)
            storage_path = os.path.join(storage_dir, build_job.collection.name, build_job.category.name)
            storage_path_dirpath = os.path.dirname(storage_path)
            if not os.path.exists(source_path):
                logger.info(f'Storing Artifact: {build_job.collection.name}.{build_job.category.name}')
                continue

            logger.info(f'Storing Artifact: {build_job.collection.name}.{build_job.category.name}')
            if not os.path.exists(storage_path_dirpath):
                os.makedirs(storage_path_dirpath)

            if os.path.exists(storage_path):
                shutil.rmtree(storage_path)

            shutil.copytree(source_path, storage_path)

        logger.info(f'Pushing Artifacts to Storage Branch: {options.publish_remote} -> {options.publish_branch}')
        project_repo.head.reference = branch
        project_repo.index.add(storage_dir, force=True)
        project_repo.index.commit('Added locally built artifacts')
        push_remote.push(options.publish_branch, force=True)
        project_repo.head.reference = current_branch


    else:
        raise NotImplementedError(options.site)


    # validate_and_parse_inputs(options)
    # command_context = CICommandContext(options.project_path,
    #                                  options.collection_names,
    #                                  options.category_names,
    #                                  options.notebook_names,
    #                                  options.ci_mode)

    merge_context = generate_merge_context(options.project_path, options.org, options.repo_name)
    run_artifact_merge(command_context, merge_context)