Beispiel #1
0
def get_notebook_modules_info(repository_id):
    with connect() as session:
        repository = session.query(Repository).filter(
            Repository.id == repository_id).first()
        notebook_modules = []
        if repository is None:
            return notebook_modules

        for name in repository.notebook_names:
            if not name:
                continue
            notebook = session.query(Notebook).filter(
                Notebook.repository_id == repository.id,
                Notebook.name == name,
            ).first()

            nb_modules_query = (
                "SELECT * "
                "FROM notebook_modules "
                "WHERE notebook_id = {} AND repository_id = {}").format(
                    notebook.id, repository_id)
            nb_modules = session.execute(nb_modules_query)
            nb_module = nb_modules.fetchone()
            notebook_obj = {}
            notebook_obj['name'] = notebook.name
            if nb_module is not None:
                if 'any_any' in nb_module:
                    notebook_obj['modules'] = nb_module.any_any
                if 'any_any_count' in nb_module:
                    notebook_obj['totalmodules'] = nb_module.any_any_count
                notebook_modules.append(notebook_obj)
        return notebook_modules
def main():
    """Main function"""
    script_name = os.path.basename(__file__)[:-3]
    parser = argparse.ArgumentParser(
        description='Calculate MD5 hashes and the presence of exercise keywords')
    parser.add_argument('-v', '--verbose', type=int, default=config.VERBOSE,
                        help='increase output verbosity')
    parser.add_argument('-i', '--interval', type=int, nargs=2,
                        default=config.REPOSITORY_INTERVAL,
                        help='repository id interval')
    parser.add_argument('-c', '--count', action='store_true',
                        help='count results')
    parser.add_argument('-r', '--reverse', action='store_true',
                        help='iterate in reverse order')
    parser.add_argument('--check', type=str, nargs='*',
                        default={'all', script_name, script_name + '.py'},
                        help='check name in .exit')

    args = parser.parse_args()
    config.VERBOSE = args.verbose
    status = None
    if not args.count:
        status = StatusLogger(script_name)
        status.report()

    with connect() as session, savepid():
        apply(
            session,
            status,
            args.count,
            args.interval,
            args.reverse,
            set(args.check)
        )
Beispiel #3
0
def get_cell_modules_info(repository_id):
    with connect() as session:
        query = ("SELECT * "
                 "FROM cell_modules "
                 "WHERE repository_id = {}").format(repository_id)
        cell_modules = pd.read_sql(query, session.connection())
        return cell_modules
Beispiel #4
0
def get_repository_nb_info(repository_id):
    with connect() as session:
        repository = session.query(Repository).filter(
            Repository.id == repository_id).first()
        notebooks = []
        if repository is None:
            return notebooks
        for name in repository.notebook_names:
            if not name:
                continue
            notebook = session.query(Notebook).filter(
                Notebook.repository_id == repository.id,
                Notebook.name == name,
            ).first()
            notebook_obj = {}
            notebook_obj['name'] = notebook.name
            notebook_obj['id'] = notebook.id
            notebook_obj['nbformat'] = notebook.nbformat
            notebook_obj['kernel'] = notebook.kernel
            notebook_obj['language'] = notebook.language
            notebook_obj['language_version'] = notebook.language_version
            notebook_obj['max_execution_count'] = notebook.max_execution_count
            notebook_obj['total_cells'] = notebook.total_cells
            notebook_obj['code_cells'] = notebook.code_cells
            notebook_obj[
                'code_cells_with_output'] = notebook.code_cells_with_output
            notebook_obj['markdown_cells'] = notebook.markdown_cells
            notebook_obj['raw_cells'] = notebook.raw_cells
            notebook_obj[
                'unknown_cell_formats'] = notebook.unknown_cell_formats
            notebook_obj['empty_cells'] = notebook.empty_cells
            notebooks.append(notebook_obj)
        return notebooks
Beispiel #5
0
def get_repository_info(repository_id):
    with connect() as session:
        repository_obj = session.query(Repository).filter(
            Repository.id == repository_id).first()

        repositories = {}
        if repository_obj is None:
            return repositories

        notebooks_info = []
        repositories[
            "Link"] = "https://" + repository_obj.domain + "/" + repository_obj.repository
        repositories["No. of Notebooks"] = repository_obj.notebooks_count
        repositories["Setup Count"] = repository_obj.setups_count
        repositories["Requirement Count"] = repository_obj.requirements_count
        if repository_obj.setups_count > 0:
            repositories["Setup"] = repository_obj.setups
        elif repository_obj.requirements_count > 0:
            repositories["Requirement"] = repositories[
                "Link"] + '/tree/master/' + repository_obj.requirements
        repositories["Valid Python Notebooks"] = valid_python_notebooks(
            repository_id)
        repositories["Invalid Python Notebooks"] = invalid_python_notebooks(
            repository_id)
        repositories[
            "Count of notebooks without execution count"] = get_nb_without_execution_count(
                repository_id)
        repositories[
            "Count of notebooks with execution count"] = get_nb_with_execution_count(
                repository_id)
        return repositories
Beispiel #6
0
def get_notebook_execution_info(repository_id):
    with connect() as session:
        repository = session.query(Repository).filter(
            Repository.id == repository_id).first()
        notebook_execution = []
        if repository is None:
            return notebook_execution
        for name in repository.notebook_names:
            if not name:
                continue
            notebook = session.query(Notebook).filter(
                Notebook.repository_id == repository.id,
                Notebook.name == name,
            ).first()
            query = ("SELECT * "
                     "FROM executions "
                     "WHERE notebook_id = {} AND repository_id = {}").format(
                         notebook.id, repository_id)
            executions = session.execute(query)

            execution = executions.fetchone()
            if execution is not None:
                notebook_obj = {}
                notebook_obj['name'] = notebook.name
                if execution["reason"] is None:
                    notebook_obj['executionreason'] = "Success"
                else:
                    notebook_obj['executionreason'] = execution["reason"]
                notebook_obj['diffoncell'] = execution["diff"]
                notebook_obj['diffcount'] = execution["diff_count"]
                notebook_obj['duration'] = human_readable_duration(
                    execution["duration"])
                notebook_obj['msg'] = execution["msg"]
                notebook_execution.append(notebook_obj)
        return notebook_execution
Beispiel #7
0
def get_reproduced_nb(repository_id):
    with connect() as session:
        execution_query = (
            "SELECT id, notebook_id, mode, reason, msg, diff, cell, count, diff_count, timeout, duration, processed, skip "
            "FROM executions "
            "WHERE repository_id = {}").format(repository_id)
        executions = pd.read_sql(execution_query, session.connection())

        total_notebooks = get_total_notebooks(repository_id)
        if total_notebooks == 0:
            return

        validity_notebooks = get_valid_invalid_nb(repository_id)

        failed_installations = get_failed_installations(executions)

        non_declared_dependencies = get_non_declared_dependencies(executions)

        repro_exceptions = get_repro_exceptions(executions)

        installed_dp = executions[(executions["processed"] > 0)
                                  & (executions["mode"] == 3)]
        non_declared_dp = executions[(executions["processed"] > 0)
                                     & (executions["mode"] == 5)]
        combined = pd.concat([installed_dp, non_declared_dp])
        total_combined = len(combined)
        repro_excluded_nbformat = len(
            combined[combined["reason"] == "<Read notebook error>"])
        combined = combined[combined["reason"] != "<Read notebook error>"]
        with_exceptions = combined[combined["processed"] & 4 == 4]

        exception_error = get_exception_error(combined, executions)

        repro_timeout = get_repro_timeout(combined, total_notebooks)

        nb_finished_unfinished_executions = get_nb_finished_unfinished_executions(
            executions)

        nb_results_difference = get_nb_results_difference(executions)

        nb_execution_count = get_nb_execution_count(repository_id)

        nb_output_cell_count = get_nb_output_cell_count(repository_id)

        return json.dumps({
            'failed_installations': failed_installations,
            'non_declared_dependencies': non_declared_dependencies,
            'repro_exceptions': repro_exceptions,
            'exception_error': exception_error,
            'nb_finished_unfinished_executions':
            nb_finished_unfinished_executions,
            'nb_results_difference': nb_results_difference,
            'validity_notebooks': validity_notebooks,
            'nb_execution_count': nb_execution_count,
            'nb_output_cell_count': nb_output_cell_count
        })
Beispiel #8
0
def get_total_notebooks(repository_id):
    with connect() as session:
        nb_query = ("SELECT count(id) "
                    "FROM notebooks "
                    "WHERE NOT (kernel = 'no-kernel' AND nbformat = '0') "
                    "AND total_cells != 0 "
                    "AND repository_id = {} ").format(repository_id)
        result = session.execute(nb_query)
        total_notebooks = result.scalar()
        return total_notebooks
Beispiel #9
0
def get_nb_with_execution_count(repository_id):
    with connect() as session:
        query = ("SELECT * "
                 "FROM notebooks "
                 "WHERE repository_id = {}").format(repository_id)
        notebooks = pd.read_sql(query, session.connection())

        with_execution_count = notebooks[notebooks.max_execution_count >= -0]
        nb_with_execution_count = len(with_execution_count)
        return nb_with_execution_count
Beispiel #10
0
def main():
    """Main function"""
    script_name = os.path.basename(__file__)[:-3]
    parser = argparse.ArgumentParser(
        description="Compress processed repositories")
    parser.add_argument("-v",
                        "--verbose",
                        type=int,
                        default=config.VERBOSE,
                        help="increase output verbosity")
    parser.add_argument("-z",
                        "--compression",
                        type=str,
                        default=config.COMPRESSION,
                        help="compression algorithm")
    parser.add_argument("-e",
                        "--retry-errors",
                        action='store_true',
                        help="retry errors")
    parser.add_argument("-i",
                        "--interval",
                        type=int,
                        nargs=2,
                        default=config.REPOSITORY_INTERVAL,
                        help="id interval")
    parser.add_argument("-c",
                        "--count",
                        action='store_true',
                        help="count results")
    parser.add_argument('-r',
                        '--reverse',
                        action='store_true',
                        help='iterate in reverse order')
    parser.add_argument('-k',
                        '--keep-uncompressed',
                        action='store_true',
                        help='keep uncompressed files')
    parser.add_argument('--check',
                        type=str,
                        nargs='*',
                        default={'all', script_name, script_name + '.py'},
                        help='check name in .exit')

    args = parser.parse_args()
    config.VERBOSE = args.verbose
    status = None
    if not args.count:
        status = StatusLogger(script_name)
        status.report()

    config.COMPRESSION = args.compression
    with connect() as session, savepid():
        apply(session, status, args.keep_uncompressed, args.count,
              args.interval, args.reverse, set(args.check))
Beispiel #11
0
def get_cell_type(repository_id):
    with connect() as session:
        query = ("SELECT * "
                 "FROM cells "
                 "WHERE repository_id = {}").format(repository_id)
        cells = pd.read_sql(query, session.connection())
        if not cells.empty:
            fig, counts = display_counts(cells["cell_type"].value_counts(),
                                         width=5,
                                         show_values=True,
                                         plot=False,
                                         title='Types of cells in Notebooks')
            return fig
Beispiel #12
0
def get_nblanguage_version(repository_id):
    with connect() as session:
        query = ("SELECT * "
                 "FROM notebooks "
                 "WHERE repository_id = {}").format(repository_id)
        notebooks = pd.read_sql(query, session.connection())
        if not notebooks.empty:
            series = notebooks.groupby(["language_version", "language"
                                        ]).count()['kernel'].unstack()
            series.plot(kind="bar",
                        title='Programming Language Version of Notebooks')
            fig = plt.gcf()
            return fig
def main():
    """Main function"""
    script_name = os.path.basename(__file__)[:-3]
    parser = argparse.ArgumentParser(
        description="Extract requirement files from registered repositories")
    parser.add_argument("-v",
                        "--verbose",
                        type=int,
                        default=config.VERBOSE,
                        help="increase output verbosity")
    parser.add_argument("-n",
                        "--repositories",
                        type=int,
                        default=None,
                        nargs="*",
                        help="repositories ids")
    parser.add_argument("-i",
                        "--interval",
                        type=int,
                        nargs=2,
                        default=config.REPOSITORY_INTERVAL,
                        help="id interval")
    parser.add_argument("-e",
                        "--retry-errors",
                        action='store_true',
                        help="retry errors")
    parser.add_argument("-c",
                        "--count",
                        action='store_true',
                        help="count results")
    parser.add_argument('-r',
                        '--reverse',
                        action='store_true',
                        help='iterate in reverse order')
    parser.add_argument('--check',
                        type=str,
                        nargs='*',
                        default={'all', script_name, script_name + '.py'},
                        help='check name in .exit')

    args = parser.parse_args()
    config.VERBOSE = args.verbose
    status = None
    if not args.count:
        status = StatusLogger(script_name)
        status.report()

    with connect() as session, savepid():
        apply(session, status, args.repositories or True,
              0 if args.retry_errors else consts.R_REQUIREMENTS_ERROR,
              args.count, args.interval, args.reverse, set(args.check))
Beispiel #14
0
def get_nblanguage(repository_id):
    with connect() as session:
        query = ("SELECT * "
                 "FROM notebooks "
                 "WHERE repository_id = {}").format(repository_id)
        notebooks = pd.read_sql(query, session.connection())
        if not notebooks.empty:
            fig, counts = display_counts(
                notebooks["language"].value_counts(),
                width=5,
                show_values=True,
                plot=False,
                title='Programming Language of Notebooks')
            return fig
Beispiel #15
0
def github_crawler(github_url):
    status = None
    count = None
    interval = None
    reverse = None
    check = 'all'
    keep_uncompressed = 'False'
    dispatches = set()
    script_name = None
    skip_env = False
    skip_extract = 0
    dry_run = 0
    status = StatusLogger(script_name)
    status.report()

    with connect() as session, mount_basedir(), savepid():
        repository = load_repository.load_repository_from_url(
            session, github_url)
        s1_notebooks_and_cells.apply(
            SafeSession(session,
                        interrupted=consts.N_STOPPED), status, [repository.id]
            or True, consts.R_N_ERROR, count, interval, reverse, set(check))
        s2_requirement_files.apply(session, status, [repository.id] or True,
                                   consts.R_REQUIREMENTS_ERROR, count,
                                   interval, reverse, set(check))
        s3_compress.apply(session, status, keep_uncompressed, count, interval,
                          reverse, set(check))
        s4_markdown_features.apply(session, status, consts.C_PROCESS_ERROR,
                                   count, interval, reverse, set(check))
        s5_extract_files.apply(session, status, consts.R_COMPRESS_ERROR, count,
                               interval, reverse, set(check))
        s6_cell_features.apply(SafeSession(session), status, dispatches, True,
                               consts.C_PROCESS_ERROR, consts.C_SYNTAX_ERROR,
                               consts.C_TIMEOUT, count, interval, reverse,
                               set(check))
        result = s7_execute_repositories.apply(
            session, repository.id, status, script_name, config.EXECUTION_MODE,
            config.WITH_EXECUTION, config.WITH_DEPENDENCY,
            consts.R_COMPRESS_ERROR, 3, consts.R_TROUBLESOME,
            consts.R_UNAVAILABLE_FILES, skip_env, skip_extract, dry_run,
            mode_rules, s7_execute_repositories.notebook_exec_mode, count,
            interval, reverse, set(check))
        p0_local_possibility.apply(session, status, count, interval, reverse,
                                   set(check))
        p1_notebook_aggregate.apply(session, status, consts.N_AGGREGATE_ERROR,
                                    count, interval, reverse, set(check))
        p2_sha1_exercises.apply(session, status, count, interval, reverse,
                                set(check))
        return repository.id
Beispiel #16
0
def get_repository_notebook(repository_id, notebook_id):
    with connect() as session:
        filters = [Repository.id == repository_id]
        repository = session.query(Repository).filter(*filters).first()
        if repository is not None:
            notebook_filters = [
                Notebook.id == notebook_id,
                Notebook.repository_id == repository_id
            ]
            notebook_query = session.query(Notebook).filter(
                *notebook_filters).first()
            notebook_name = notebook_query.name
            repository_path = repository.path
            repository_name = repository.repository
            return repository_name, repository_path, notebook_name
Beispiel #17
0
def get_repository_requirements_info(repository_id):
    with connect() as session:
        query = ("SELECT name, reqformat, content "
                 "FROM requirement_files "
                 "WHERE repository_id = {}").format(repository_id)

        requirement_files = pd.read_sql(query, session.connection())

        if requirement_files["reqformat"].item() == 'requirements.txt':
            requirements = []
            requirement_files_content = requirement_files["content"].item()
            for rq in requirement_files_content.split('\r\n'):
                requirements.append(rq)
            return requirements
        return requirement_files["content"]
Beispiel #18
0
def valid_python_notebooks(repository_id):
    with connect() as session:
        query = ("SELECT * "
                 "FROM notebooks "
                 "WHERE repository_id = {}").format(repository_id)
        notebooks = pd.read_sql(query, session.connection())

        valid_python_notebooks_count = len(
            notebooks[(notebooks["language"] == "python")
                      & (notebooks["language_version"] != "unknown")
                      & ~((notebooks["kernel"] == "no-kernel")
                          & (notebooks["nbformat"] == "0"))
                      & (notebooks["total_cells"] != 0)
                      & (np.bitwise_and(notebooks["processed"], 16) == 0)])
        return valid_python_notebooks_count
Beispiel #19
0
def main():
    """Main function"""
    parser = argparse.ArgumentParser(description="Load Repository by URL")
    parser.add_argument("url", type=str,
                        help="repository URL")
    parser.add_argument("-v", "--verbose", type=int, default=config.VERBOSE,
                        help="increase output verbosity")
    parser.add_argument("-b", "--branch", type=str,
                        help="specific branch")
    parser.add_argument("-c", "--commit", type=str,
                        help="specific commit")
    parser.add_argument("-e", "--clone-existing", action='store_true',
                        help="clone even if repository exists")

    args = parser.parse_args()
    config.VERBOSE = args.verbose
    with connect() as session, mount_basedir(), savepid():
        load_repository_from_url(
            session, args.url, args.branch, args.commit, args.clone_existing
        )
def main():
    """Main function"""
    register_surrogateescape()
    script_name = os.path.basename(__file__)[:-3]
    parser = argparse.ArgumentParser(
        description='Execute repositories')
    parser.add_argument('-v', '--verbose', type=int, default=config.VERBOSE,
                        help='increase output verbosity')
    parser.add_argument('-e', '--retry-errors', action='store_true',
                        help='retry errors')
    parser.add_argument('-i', '--interval', type=int, nargs=2,
                        default=config.REPOSITORY_INTERVAL,
                        help='repository id interval')
    parser.add_argument('-c', '--count', action='store_true',
                        help='count results')
    parser.add_argument('-r', '--reverse', action='store_true',
                        help='iterate in reverse order')
    parser.add_argument('--check', type=str, nargs='*',
                        default={'all', script_name, script_name + '.py'},
                        help='check name in .exit')

    args = parser.parse_args()
    config.VERBOSE = args.verbose
    status = None
    if not args.count:
        status = StatusLogger(script_name)
        status.report()

    with connect() as session, savepid():
        apply(
            session,
            status,
            0 if args.retry_errors else consts.R_COMPRESS_ERROR,
            args.count,
            args.interval,
            args.reverse,
            set(args.check)
        )
Beispiel #21
0
def get_cell_info(repository_id, notebook_id):
    with connect() as session:
        filters = [
            Cell.repository_id == repository_id,
            Cell.notebook_id == notebook_id,
        ]
        query = (session.query(Cell).filter(*filters))
        notebook_cells = []
        for cell in query:
            print("cell")
            notebook_cell_obj = {}
            notebook_cell_obj['id'] = cell.id
            notebook_cell_obj['repository_id'] = cell.repository_id
            notebook_cell_obj['notebook_id'] = cell.notebook_id
            notebook_cell_obj['index'] = cell.index
            notebook_cell_obj['cell_type'] = cell.cell_type
            notebook_cell_obj['execution_count'] = cell.execution_count
            notebook_cell_obj['lines'] = cell.lines
            notebook_cell_obj['output_formats'] = cell.output_formats
            notebook_cell_obj['source'] = cell.source
            notebook_cell_obj['python'] = cell.python
            notebook_cells.append(notebook_cell_obj)
        return notebook_cells
Beispiel #22
0
def get_nb_output_cell_count(repository_id):
    with connect() as session:
        nb_query = (
            "SELECT sum(code_cells) as sum_code_cells, sum(code_cells_with_output) as sum_code_cells_with_output "
            "FROM notebooks "
            "WHERE repository_id = {} ").format(repository_id)
        notebooks = session.execute(nb_query).fetchone()

        total_code_cells = notebooks.sum_code_cells
        total_code_cells_with_output = notebooks.sum_code_cells_with_output
        total_code_cells_without_output = total_code_cells - total_code_cells_with_output
        nb_output_cell_count_data = [
            total_code_cells_with_output, total_code_cells_without_output
        ]
        nb_output_cell_count_labels = [
            'Code Cells with Output', 'Code Cells without Output'
        ]
        nb_output_cell_count = {
            'data': nb_output_cell_count_data,
            'labels': nb_output_cell_count_labels,
            'title': 'Notebooks with Code cells with and without Output',
        }
        return nb_output_cell_count
Beispiel #23
0
def get_notebook(repository_id, notebook_id):
    with connect() as session:
        nbconvert_rdf = ''
        name = ''
        filters = [Repository.id == repository_id]
        repository = session.query(Repository).filter(*filters).first()

        notebook_filters = [
            Notebook.id == notebook_id, Notebook.repository_id == repository_id
        ]
        notebook_query = session.query(Notebook).filter(
            *notebook_filters).first()
        name = notebook_query.name
        with mount_basedir():
            if repository.path.exists():
                execution_path = (config.EXECUTION_DIR / repository.hash_dir2)
                if os.path.exists(execution_path):
                    notebook_path = execution_path
                else:
                    notebook_path = repository.path

            try:
                with open(str(notebook_path / name)) as ofile:
                    notebook = ofile.read()
                    nbtordfconverter = nb2rdf.NBToRDFConverter()
                    notebook_json = nbformat.reads(notebook, as_version=4)
                    nbconvert_rdf = nbtordfconverter.convert_to_rdf(
                        name, notebook_json)
                    output_file_extension = 'ttl'
                    output_file = os.path.join(
                        repository.path, name + "." + output_file_extension)
                    open(output_file, 'w').write(str(nbconvert_rdf))
                    return str(nbconvert_rdf), name
            except OSError as e:
                vprint(3, "Failed to open notebook {}".format(e))
                return str(nbconvert_rdf), name
def main():
    """Main function"""
    script_name = os.path.basename(__file__)[:-3]
    parser = argparse.ArgumentParser(
        description="Extract notebooks from registered repositories")
    parser.add_argument("-v",
                        "--verbose",
                        type=int,
                        default=config.VERBOSE,
                        help="increase output verbosity")
    parser.add_argument("-n",
                        "--repositories",
                        type=int,
                        default=None,
                        nargs="*",
                        help="repositories ids")
    parser.add_argument("-s",
                        "--status",
                        type=int,
                        default=consts.R_COMPRESS_OK +
                        consts.R_COMMIT_MISMATCH,
                        help="has processed status")
    parser.add_argument("-z",
                        "--no",
                        type=int,
                        default=0,
                        help="does not have status")
    parser.add_argument("-e",
                        "--retry-errors",
                        action='store_true',
                        help="retry errors")
    parser.add_argument("-t",
                        "--retry-timeout",
                        action='store_true',
                        help="retry timeout")
    parser.add_argument("-i",
                        "--interval",
                        type=int,
                        nargs=2,
                        default=config.REPOSITORY_INTERVAL,
                        help="id interval")
    parser.add_argument("-c",
                        "--count",
                        action='store_true',
                        help="count results")
    parser.add_argument('-r',
                        '--reverse',
                        action='store_true',
                        help='iterate in reverse order')
    parser.add_argument('--check',
                        type=str,
                        nargs='*',
                        default={'all', script_name, script_name + '.py'},
                        help='check name in .exit')

    args = parser.parse_args()
    config.VERBOSE = args.verbose
    status = None
    if not args.count:
        status = StatusLogger(script_name)
        status.report()
    with connect() as session, savepid():
        apply(session, status, args.repositories or True, args.status, args.no,
              args.count, args.interval, args.reverse, set(args.check))
Beispiel #25
0
def get_execution_order_json(repository_id, notebook_id, cell_types):
    with connect() as session:
        # Get Execution Order of Cells in a Notebook of a Repository

        # Filter on the type of notebook cell
        filters = [
            Cell.repository_id == repository_id,
            Cell.notebook_id == notebook_id,
        ]
        if int(cell_types) == consts.CODE_CELL_TYPE:
            filters += [Cell.cell_type == 'code']
        elif int(cell_types) == consts.OTHER_CELL_TYPE:
            filters += [Cell.cell_type != 'code']

        query = (session.query(Cell).filter(*filters))

        # Create a dictionary with Cell Index and Cell Execution Count
        notebook_cells_json = []
        notebook_cell_obj = {}
        for cell in query:
            if cell.execution_count is not None:
                notebook_cell_obj[cell.index] = int(cell.execution_count)

        notebook_cells_json = {
            k: v
            for k, v in sorted(notebook_cell_obj.items(),
                               key=lambda item: item[1])
        }

        # Sort the Execution order based on Execution Count
        execution_order = []
        for k, v in sorted(notebook_cell_obj.items(),
                           key=lambda item: item[1]):
            execution_order.append(k)

        # JSON for the execution order
        execution_order_json = {"nodes": [], "links": []}
        node_name = ''
        node_label = 'Cell'
        node_id = ''
        source = ''
        target = ''

        node_obj = {}
        link_obj = {"source": '', "target": '', "type": "followedby"}
        for cell in query:
            node_obj = {}
            node_name = cell.execution_count
            node_id = cell.index
            if node_name:
                node_obj["name"] = node_name
            else:
                node_obj["name"] = "-"
            node_obj["id"] = node_id
            node_obj["label"] = "Cell " + str(node_id)

            execution_order_json["nodes"].append(node_obj)

        for item, next_item in zip(execution_order, execution_order[1:]):
            link_obj = {}
            link_obj["source"] = item
            link_obj["target"] = next_item
            link_obj["type"] = 'followedby'
            execution_order_json["links"].append(link_obj)
        return execution_order_json
def main():
    """Main function"""
    register_surrogateescape()
    script_name = os.path.basename(__file__)[:-3]
    parser = argparse.ArgumentParser(description='Execute repositories')
    parser.add_argument('-v',
                        '--verbose',
                        type=int,
                        default=config.VERBOSE,
                        help='increase output verbosity')
    parser.add_argument("-n",
                        "--notebooks",
                        type=int,
                        default=None,
                        nargs="*",
                        help="notebooks ids")
    parser.add_argument('-e',
                        '--retry-errors',
                        action='store_true',
                        help='retry errors')
    parser.add_argument('-s',
                        '--retry-syntaxerrors',
                        action='store_true',
                        help='retry syntax errors')
    parser.add_argument('-t',
                        '--retry-timeout',
                        action='store_true',
                        help='retry timeout')
    parser.add_argument('-i',
                        '--interval',
                        type=int,
                        nargs=2,
                        default=config.REPOSITORY_INTERVAL,
                        help='repository id interval')
    parser.add_argument('-c',
                        '--count',
                        action='store_true',
                        help='count results')
    parser.add_argument('-r',
                        '--reverse',
                        action='store_true',
                        help='iterate in reverse order')
    parser.add_argument('--check',
                        type=str,
                        nargs='*',
                        default={'all', script_name, script_name + '.py'},
                        help='check name in .exit')

    args = parser.parse_args()
    config.VERBOSE = args.verbose
    status = None
    if not args.count:
        status = StatusLogger(script_name)
        status.report()

    dispatches = set()
    with savepid():
        with connect() as session:
            apply(SafeSession(session), status, dispatches, args.notebooks
                  or True, 0 if args.retry_errors else consts.C_PROCESS_ERROR,
                  0 if args.retry_syntaxerrors else consts.C_SYNTAX_ERROR,
                  0 if args.retry_timeout else consts.C_TIMEOUT, args.count,
                  args.interval, args.reverse, set(args.check))

        pos_apply(dispatches, args.retry_errors, args.retry_timeout,
                  args.verbose)
def main():
    """Main function"""
    script_name = os.path.basename(__file__)[:-3]
    parser = argparse.ArgumentParser(description="Execute repositories")
    parser.add_argument("-v",
                        "--verbose",
                        type=int,
                        default=config.VERBOSE,
                        help="increase output verbosity")
    parser.add_argument("-e",
                        "--retry-errors",
                        action='store_true',
                        help="retry errors")
    parser.add_argument("-f",
                        "--discover-deleted",
                        action='store_true',
                        help="try to discover deleted files")
    parser.add_argument("-i",
                        "--interval",
                        type=int,
                        nargs=2,
                        default=config.REPOSITORY_INTERVAL,
                        help="repository id interval")
    parser.add_argument("-z",
                        "--retry-troublesome",
                        action='store_true',
                        help="retry troublesome")
    parser.add_argument(
        "-x",
        "--with-execution",
        type=int,
        default=config.WITH_EXECUTION,
        help="-1: without execution; 0: all; 1: with execution")
    parser.add_argument(
        "-d",
        "--with-dependency",
        type=int,
        default=config.WITH_DEPENDENCY,
        help="-1: without dependency; 0: all; 1: with dependency")
    parser.add_argument(
        "-m",
        "--execution-mode",
        type=int,
        default=config.EXECUTION_MODE,
        help="-1: auto; 1: cellorder, 2: dependencies, 4: anaconda")
    parser.add_argument("-c",
                        "--count",
                        action='store_true',
                        help="count results")
    parser.add_argument(
        "--dry-run",
        type=int,
        default=0,
        help="dry-run level. 0 runs everything. 1 does not execute. "
        "2 does not install dependencies. 3 does not extract files. "
        "4 does not prepare conda environment")
    parser.add_argument('-r',
                        '--reverse',
                        action='store_true',
                        help='iterate in reverse order')
    parser.add_argument('--check',
                        type=str,
                        nargs='*',
                        default={'all', script_name, script_name + '.py'},
                        help='check name in .exit')
    parser.add_argument("--skip-env",
                        action='store_true',
                        help="skip environment")
    parser.add_argument("--skip-extract",
                        action='store_true',
                        help="skip extraction")

    args = parser.parse_args()
    config.VERBOSE = args.verbose
    status = None
    if not args.count:
        status = StatusLogger(script_name)
        status.report()

    with connect() as session, savepid():
        apply(session, status, script_name, args.execution_mode,
              args.with_execution, args.with_dependency,
              0 if args.retry_errors else consts.R_COMPRESS_ERROR,
              1 if args.retry_errors else 3,
              0 if args.retry_troublesome else consts.R_TROUBLESOME,
              0 if args.discover_deleted else consts.R_UNAVAILABLE_FILES,
              args.skip_env, args.skip_extract, args.dry_run,
              mode_rules_cell_order, notebook_exec_mode_cell_order, args.count,
              args.interval, args.reverse, set(args.check))