def get_speaking_language(nb_id):

    # gather the markdown cells
    md_cells = data.get_md_cells(nb_id)

    # if there are no markdown cells, return immediately
    if len(md_cells) == 0:
        return None

    # get the language of the markdown cells
    return speaking_language(md_cells)
Esempio n. 2
0
def frequency(nb_id):

    # check if markdown cells exist
    if len(data.get_md_cells(nb_id)) == 0:
        return None

    # get the number of code <-> markdown switches and the number of total switches
    cm_switches = count_switches(nb_id)
    total_switches = len(data.get_cells(nb_id)) - 1

    # calculate proportion
    return float(cm_switches) / float(total_switches)
def num_headers(nb_id):

    # get the markdown cells
    md_cells = data.get_md_cells(nb_id)

    # count the number of headers across the cells
    num_headers = 0
    for cell in md_cells:
        num_headers += count_headers(cell)

    # return the total
    return num_headers
Esempio n. 4
0
def md_formatting(nb_id):

    # gather markdown cells
    md_cells = data.get_md_cells(nb_id)

    # if there are no markdown cells, return immediately
    if len(md_cells) == 0:
        return None

    # iterate through and check whether they have special formatting
    for cell in md_cells:
        if has_extra_formatting(cell):
            return True

    return False
def has_author(nb_id):

    # get all the markdown cells and author names
    md_cells = data.get_md_cells(nb_id)

    author_name = get_author(nb_id)
    author_login = get_username(nb_id)
    all_names = get_contributors(nb_id) + [author_name, author_login]

    # check each cell for an author
    for cell in md_cells:

        if 'source' in cell.keys():
            lines = cell['source']

            # scan all the lines in the cell
            for line in lines:

                # filter out links before searching for author
                link = re.search(regex.link, line)
                while link != None:
                    line = line.replace(link.group(0), "")
                    link = re.search(regex.link, line)

                # check all possible authors
                for name in all_names:
                    if name != None and name in line:
                        return True

    # gather comments
    comments = data.get_comments(nb_id)

    if comments != None:
        # search each comment for the author names
        for comment in comments:
            for name in all_names:
                if name != None and name in comment:
                    return True

    # no instances of authors found
    return False
Esempio n. 6
0
def has_equations(nb_id):

    # get all the markdown cells
    md_cells = data.get_md_cells(nb_id)

    # if there are no markdown cells, return immediately
    if len(md_cells) == 0:
        return None

    # search the markdown cells for equations
    for cell in md_cells:

        # check if markdown cell has source field
        if 'source' not in cell.keys():
            continue

        for line in cell['source']:
            if re.search(regex.equation, line):
                return True

    return False
def is_education(nb_id):

    # get the markdown cells
    md_cells = data.get_md_cells(nb_id)

    # go through the markdown cells and search for the keywords
    for cell in md_cells:
        if 'source' in cell.keys():
            for line in cell['source']:
                # make sure to search for each keyword
                for pattern in regex.education:
                    if re.search(pattern, line.lower()):
                        return True

    # check the file path (including the file name) for the keywords as well
    path = data.get_path(nb_id)
    for pattern in regex.education:
        if re.search(pattern, path.lower()):
            return True

    return False
Esempio n. 8
0
def markdown_prop(nb_id):

    num_cells = len(data.get_cells(nb_id))
    num_md = len(data.get_md_cells(nb_id))

    return (float(num_md) / float(num_cells))