コード例 #1
0
ファイル: manual_join_imp.py プロジェクト: kannode/mcdp
def do_remove_stuff(soup, remove_selectors, remove):
    all_selectors = []
    if remove is not None and remove != '':
        all_selectors.append(remove)
    if remove_selectors:
        all_selectors.extend(remove_selectors)

    logger.debug('all_selectors: %s' % all_selectors)

    all_removed = ''
    for selector in all_selectors:
        nremoved = 0
        logger.debug('Removing selector %r' % remove)
        toremove = list(soup.select(selector))
        logger.debug('Removing %d objects' % len(toremove))
        for x in toremove:
            nremoved += 1
            nd = len(list(x.descendants))
            logger.debug('removing %s with %s descendants' % (x.name, nd))
            if nd > 1000:
                s = str(x)[:300]
                logger.debug(' it is %s' % s)
            x.extract()

            all_removed += '\n\n' + '-' * 50 + ' chunk %d removed\n' % nremoved
            all_removed += str(x)
            all_removed += '\n\n' + '-' * 100 + '\n\n'

        logger.info('Removed %d elements of selector %r' % (nremoved, remove))
コード例 #2
0
ファイル: manual_join_imp.py プロジェクト: newmanlucy/mcdp
def fix_duplicated_ids(basename2soup):
    '''
        fragments is a list of soups that might have
        duplicated ids.
    '''
    id2frag = {}
    tochange = []  # (i, from, to)
    for basename, fragment in basename2soup.items():
        # get all the ids for fragment
        for element in fragment.find_all(id=True):
            id_ = element.attrs['id']
            # ignore the mathjax stuff
            if 'MathJax' in id_:  # or id_.startswith('MJ'):
                continue
            # is this a new ID
            if not id_ in id2frag:
                id2frag[id_] = basename
            else:  # already know it
                if id2frag[id_] == basename:
                    # frome the same frag
                    logger.debug('duplicated id %r inside frag %s' %
                                 (id_, basename))
                else:
                    # from another frag
                    # we need to rename all references in this fragment
                    # '%s' % random.randint(0,1000000)
                    new_id = id_ + '-' + basename
                    element['id'] = new_id
                    tochange.append((basename, id_, new_id))
    #logger.info(tochange)
    for i, id_, new_id in tochange:
        fragment = basename2soup[i]
        for a in fragment.find_all(href="#" + id_):
            a.attrs['href'] = '#' + new_id
コード例 #3
0
def check_translation_diskrep_to_gitrep(disk_rep0, disk_events, disk_rep1, out):  # @UnusedVariable
    if not disk_events:
        raise ValueError('no disk events')
    repo = gitrep_from_diskrep(disk_rep0)
    wd = repo.working_tree_dir
    readback = diskrep_from_gitrep(repo)
    assert_diskreps_same(disk_rep0, readback, 'original', 'written back')
    logger.debug(wd)
    logger.debug('\n'+indent(readback.tree(), 'read back |'))
    logger.debug('\n'+indent(yaml_dump(disk_events), 'disk_events|'))
    commits = []
    for disk_event in disk_events:
        logger.debug(indent(yaml_dump(disk_event), 'disk_event | '))
        apply_disk_event_to_filesystem(wd, disk_event, repo=repo)
        
        if repo.untracked_files:
            logger.debug('adding untracked file %r' % repo.untracked_files) 
            repo.index.add(repo.untracked_files) 
            
        message = yaml_dump(disk_event)
        who = disk_event['who']
        logger.info('who: %s' % who)
        actor = who['actor']
        instance = who.get('instance', None)
        host = who.get('host', None)
        author = Actor(actor, instance)
        committer = Actor(instance, host) 
        commit = repo.index.commit(message, author=author, committer=committer)
        commits.append(commit) 
    
    res = {}
    res['repo'] = repo
    return res 
コード例 #4
0
    def get_image(self, name, data_format):
        extension = data_format
        for p in self.paths:
            for fn in _list_files_with_extension(p, extension):
                bn = os.path.basename(fn)
                x = os.path.splitext(bn)[0]

                if x.lower() == name.lower():
                    if x != name:
                        msg = 'Using file "%s" for image "%s", even though case does not match.' % (
                            bn, name)
                        _warn_once(msg)

                    if os.path.exists(fn):
                        return open(fn).read()
                    else:
                        # warn broken link
                        msg = 'Filename does not exist (broken link?): %s' % fn
                        logger.debug(msg)

        msg = 'Could not find %s.%s in %d paths.' % (name, data_format,
                                                     len(self.paths))
        for p in self.paths:
            msg += '\n path: %s' % p
        raise NoImageFound(msg)
コード例 #5
0
def get_empty_links_to_fragment(soup):
    """
        Find all empty links that have a reference to a fragment.
        yield LinkElement
    """
    logger.debug('building index')
    # first find all elements by id
    id2element = {}
    for x in list(soup.descendants):
        if isinstance(x, Tag) and 'id' in x.attrs:
            id2element[x.attrs['id']] = x

    logger.debug('building index done')

    for element in get_empty_links(soup):
        if not 'href' in element.attrs:
            continue

        href = element.attrs['href']
        if not href.startswith('#'):
            continue
        rest = href[1:]
        #         if '/' in rest:
        #             i = rest.index('/')
        #             eid = rest[:i]
        #             query = rest[i + 1:]
        #         else:
        eid = rest
        query = None

        linked = id2element.get(eid, None)
        yield LinkElement(linker=element, eid=eid, linked=linked, query=query)
コード例 #6
0
ファイル: app_visualization.py プロジェクト: rusi/mcdp
        def get_link(specname, libname, thingname):
            # find library. Returns a string or raises error
            try:
                rname, sname = e.session.get_repo_shelf_for_libname(libname)
            except NoSuchLibrary:
                msg = 'No such library %r' % libname
                logger.debug(msg)
                raise


#                 return None
            things = e.db_view.repos[rname].shelves[sname].libraries[
                libname].things.child(specname)

            if thingname in things:

                # check if the thing exists

                res = get_link_library(
                    libname) + '%s/%s/views/syntax/' % (specname, thingname)
                #                 logger.debug(' link for %s = %s' % (thingname, res))
                return res
            else:
                msg = 'No such thing %r' % thingname
                logger.debug(msg)
                raise NoSuchLibrary(msg)
コード例 #7
0
    def savefile(filename_hint, data):
        """ must return the url (might be equal to filename) """
        where = os.path.join(assets_dir, filename_hint)
        logger.debug('writing to %s' % where)
        with open(where, 'wb') as f:
            f.write(data)

        relative = os.path.relpath(where, os.path.dirname(out))

        return relative
コード例 #8
0
ファイル: memdata_events.py プロジェクト: rusi/mcdp
def replay_events(view_manager, db0, events):
    db0 = deepcopy(db0)
    for event in events:
        event_intepret(view_manager, db0, event)
        msg = '\nAfter playing event:\n'
        msg += indent(yaml_dump(event), '   event: ')
        msg += '\nthe DB is:\n'
        msg += indent(yaml_dump(db0), '   db: ')
        logger.debug(msg)
    return db0
コード例 #9
0
def displayfile1():
    defaults = {'org': 'AndreaCensi', 'repo': 'mcdp', 'branch': 'duckuments'}

    s = """
<display-file src="github:path=context_eval_as_constant.py,from_text=get_connections_for,to_text=return"></a> 
"""
    soup = bs(s)
    n = display_files(soup, defaults, raise_errors=True)
    assert n == 1

    s2 = str(soup)
    logger.debug('\n' + indent(s2, '  '))
コード例 #10
0
    def __call__(self, event):
        self.other(event)
        repo  = self.other.repo
        logger.debug('pushing')
        repo.remotes.origin.push()
        
        
#     hi_config = Schema()
#     
#     hi_config.string('root') # where to put temporary files
#     hi_config.string('instance') # instance name
#     hi_config.hash('repo_local', SchemaString()) # dirname for local repo
#     hi_config.hash('repo_git', SchemaString()) # git url for local repo
#      
コード例 #11
0
def generate_toc(soup, max_depth=None):
    stack = [Item(None, 0, 'root', 'root', [])]

    headers_depths = list(get_things_to_index(soup))

    for header, depth, using in headers_depths:
        if max_depth is not None:
            if depth > max_depth:
                continue

        item = Item(header, depth, using, header['id'], [])

        while (stack[-1].depth >= depth):
            stack.pop()
        stack[-1].items.append(item)
        stack.append(item)

    root = stack[0]

    logger.debug('numbering items')
    number_items2(root)
    if False:
        logger.debug(toc_summary(root))


#
#     logger.debug('toc iterating')
#     # iterate over chapters (below each h1)
#     # XXX: this is parts
#     if False:
#         for item in root.items:
#             s = item.to_html(root=True, max_levels=100)
#             stoc = bs(s)
#             if stoc.ul is not None:  # empty document case
#                 ul = stoc.ul
#                 ul.extract()
#                 ul['class'] = 'toc chapter_toc'
#                 # todo: add specific h1
#                 item.tag.insert_after(ul)  # XXX: uses <fragment>
#
#     logger.debug('toc done iterating')
    exclude = [
        'subsub', 'fig', 'code', 'tab', 'par', 'subfig', 'appsubsub', 'def',
        'eq', 'rem', 'lem', 'prob', 'prop', 'exa', 'thm'
    ]
    without_levels = root.copy_excluding_levels(exclude)
    res = without_levels.to_html(root=True, max_levels=13)
    return res
コード例 #12
0
def sub1():
    defaults = {'org': 'AndreaCensi', 'repo': 'mcdp', 'branch': 'duckuments'}

    s = """
<a href="github:path=context_eval_as_constant.py"></a> 
"""
    soup = bs(s)
    n = substitute_github_refs(soup, defaults)
    assert n == 1

    s2 = str(soup)
    logger.debug(indent(s2, '  '))

    expect = '<code class="github-resource-link">context_eval_as_constant.py</code>'
    if not expect in s2:
        raise Exception(s2)
コード例 #13
0
def sub2():
    defaults = {'org': 'AndreaCensi', 'repo': 'mcdp', 'branch': 'duckuments'}

    s = """
<a href="github:path=context_eval_as_constant.py,from_text=get_connections_for,to_text=return"></a> 
"""
    soup = bs(s)
    n = substitute_github_refs(soup, defaults)
    assert n == 1

    s2 = str(soup)
    logger.debug('\n' + indent(s2, '  '))

    expect = 'context_eval_as_constant.py#L7-L12'

    if not expect in s2:
        raise Exception('No %s in %s' % (expect, s2))
コード例 #14
0
def go():
    if len(sys.argv) != 3:
        print('Syntax:\n\n     %s input_html output_html' %
              os.path.basename(sys.argv[0]))
        print('\n\nError: I need exactly 2 arguments.')
        sys.exit(1)
    fn = sys.argv[1]
    out = sys.argv[2]

    assets_dir = out + '.assets'
    if not os.path.exists(assets_dir):
        os.makedirs(assets_dir)
    logger.debug('Using assets dir %s' % assets_dir)

    outd = os.path.dirname(out)
    if not os.path.exists(outd):
        os.makedirs(outd)

    return go_(fn, out, assets_dir)
コード例 #15
0
ファイル: test_db.py プロジェクト: rusi/mcdp
def read_as_user_db(dirname):
    dm = DB.dm

    hierarchy = ProxyDirectory.from_disk(dirname)

    logger.info('These are the files found:\n%s' %
                indent(hierarchy.tree(), '  '))

    user_db_schema = DB.user_db
    user_db_data = dm.interpret_hierarchy_(user_db_schema, hierarchy)

    logger.debug('user_db schema: \n' + str(user_db_schema))
    logger.debug('user_db:\n' + indent(yaml_dump(user_db_data), ' > '))

    DB.user_db.validate(user_db_data)

    user_db_view = DB.view_manager.create_view_instance(
        user_db_schema, user_db_data)
    user_db_view.set_root()
    return user_db_view
コード例 #16
0
def get_empty_links_to_fragment(soup):
    """
        Find all links that have a reference to a fragment.
        yield LinkElement
    """
    #
    # s.findAll(lambda tag: tag.name == 'p' and tag.find(True) is None and
    # (tag.string is None or tag.string.strip()==""))

    logger.debug('building index')
    # first find all elements by id
    id2element = {}
    for x in soup.descendants:
        if isinstance(x, Tag) and 'id' in x.attrs:
            id2element[x.attrs['id']] = x

    logger.debug('building index done')

    for element in soup.find_all('a'):
        empty = len(list(element.descendants)) == 0
        if not empty:
            continue

        if not 'href' in element.attrs:
            continue
        href = element.attrs['href']
        if href.startswith('#'):
            rest = href[1:]
            if '?' in rest:
                i = rest.index('?')
                eid = rest[:i]
                query = rest[i + 1:]
            else:
                eid = rest
                query = None

            linked = id2element.get(eid, None)
            yield LinkElement(linker=element,
                              eid=eid,
                              linked=linked,
                              query=query)
コード例 #17
0
ファイル: manual_join_imp.py プロジェクト: newmanlucy/mcdp
def make_sections2(elements,
                   is_marker,
                   copy=True,
                   element_name='div',
                   attrs={},
                   add_debug_comments=False):
    sections = []

    def make_new():
        x = Tag(name=element_name)
        for k, v in attrs.items():
            x.attrs[k] = v
        return x

    current_header = None
    current_section = make_new()

    current_section['class'] = 'without-header-inside'

    for x in elements:
        if is_marker(x):
            if contains_something_else_than_space(current_section):
                sections.append((current_header, current_section))

            current_section = make_new()
            logger.debug('marker %s' % x.attrs.get('id', 'unnamed'))
            current_header = x.__copy__()
            #             current_section.append(x.__copy__())
            current_section['class'] = 'with-header-inside'
        else:
            x2 = x.__copy__() if copy else x.extract()
            current_section.append(x2)

    if current_header or contains_something_else_than_space(current_section):
        sections.append((current_header, current_section))

    logger.info('make_sections: %s found using marker %s' %
                (len(sections), is_marker.__name__))
    return sections
コード例 #18
0
def check_translation_gitrep_to_diskrep(repo, branch_name, out):
    wd = repo.working_tree_dir

    commits = list(reversed(list(repo.iter_commits(branch_name))))

    # make sure that commits[0] is the first
    for i in range(1, len(commits)):
        assert commits[i].parents[0] == commits[i - 1]
    repo.head.reference = commits[0]
    repo.head.reset(index=True, working_tree=True)

    disk_rep0 = ProxyDirectory.from_disk(wd)
    disk_rep = deepcopy(disk_rep0)

    if os.path.exists(out):
        shutil.rmtree(out)
    if not os.path.exists(out):
        os.makedirs(out)

    def write_file_(name, what):
        name = os.path.join(out, name)
        with open(name, 'w') as f:
            f.write(what)
        logger.info('wrote on %s' % name)

    def write_file(i, n, what):
        name = '%d-%s.txt' % (i, n)
        write_file_(name, what)

    logger.debug('Initial files: %s' %
                 list(_.path for _ in commits[1].tree.traverse()))

    msg = ""
    for i, commit in enumerate(commits):
        d = disk_rep_from_git_tree(commit.tree)
        msg += '\n\n' + indent(d.tree(), ' tree at commit #%d | ' % i)
    write_file_('00-commits.txt', msg)

    all_disk_events = []
    for i in range(1, len(commits)):
        write_file(i, 'a-disk_rep', disk_rep.tree())

        msg = ""
        for d in commits[i - 1].diff(commits[i]):
            msg += '\n' + str(d)
        write_file(i, 'c-diffs', msg)

        events = diskevents_from_diff(commits[i - 1], commits[i])
        write_file(i, 'd-diskevents_from_diff', yaml_dump(events))

        for disk_event in events:
            disk_event_interpret(disk_rep, disk_event)
        all_disk_events.extend(events)

        write_file(i, 'e-disk_rep-after-diskevents', disk_rep.tree())

        repo.head.reference = commits[i]
        repo.head.reset(index=True, working_tree=True)
        supposedly = ProxyDirectory.from_disk(wd)
        write_file(i, 'f-supposedly', supposedly.tree())

        assert_equal_disk_rep(disk_rep, supposedly)

    logger.debug('wd: %s' % wd)
    return dict(disk_rep0=disk_rep0,
                disk_events=all_disk_events,
                disk_rep=disk_rep)
コード例 #19
0
def diskevents_from_diff(commit_a, commit_b):
    diff = commit_a.diff(commit_b)

    def dirname_name_from_path(path):
        path = path.encode('utf8')
        dirname = os.path.dirname(path)

        if dirname == '':
            dirname = ()
        else:
            dirname = tuple(dirname.split('/'))
        basename = os.path.basename(path)
        return dirname, basename

    _id = 'ID'
    who = who_from_commit(commit_b)
    events = []

    existing = set([_.path.encode('utf8') for _ in commit_a.tree.traverse()])
    # create hash directory -> everything contained
    dir2contents = {}
    for tree in commit_a.tree.traverse():
        if isinstance(tree, Tree):
            dir2contents[tree.path.encode('utf8')] = set()

    for blob in commit_a.tree.traverse():
        if isinstance(blob, Blob):
            path = blob.path
            for d in dir2contents:
                if path.startswith(d):
                    dir2contents[d].add(path)

    removed_files = set()

    for d in diff.iter_change_type('D'):
        removed_files.add(d.b_path)

    deleted_completely = set()

    deleted_by_deleting_dir = set()
    for di, di_contents in dir2contents.items():
        if all(x in removed_files for x in di_contents):
            print('detected that %s was removed completely' % di)
            deleted_completely.add(di)

    for di in deleted_completely:
        # do not do this if the parent was already deleted
        if os.path.dirname(di) in deleted_completely:
            continue
        else:
            dirname, name = dirname_name_from_path(di)
            print('%s -> %s, %s' % (di, dirname, name))
            deleted_by_deleting_dir.update(dir2contents[di])
            e = disk_event_dir_delete(_id, who, dirname=dirname, name=name)
            events.append(e)

    for d in diff.iter_change_type('D'):
        if d.b_path in deleted_by_deleting_dir:
            continue
        dirname, name = dirname_name_from_path(d.b_path)
        e = disk_event_file_delete(_id, who, dirname=dirname, name=name)
        events.append(e)

    logger.debug('trees: %s' % list(commit_a.tree.traverse()))
    logger.debug('existing: %s' % "\n- ".join(existing))

    for d in diff.iter_change_type('A'):
        dirname, name = dirname_name_from_path(d.b_path)
        # create all partial directories
        for i in range(1, len(dirname) + 1):
            partial = dirname[:i]
            partial_path = "/".join(partial)
            if not partial_path in existing:
                logger.debug('I need to create directory %r' % partial_path)
                d2 = partial[:-1]
                n2 = partial[-1]
                e = disk_event_dir_create(_id, who, dirname=d2, name=n2)
                events.append(e)
                existing.add("/".join(partial))

        contents = d.b_blob.data_stream.read()
        e = disk_event_file_create(_id,
                                   who,
                                   dirname=dirname,
                                   name=name,
                                   contents=contents)
        events.append(e)
    for d in diff.iter_change_type('M'):
        dirname, name = dirname_name_from_path(d.b_path)
        contents = d.b_blob.data_stream.read()
        e = disk_event_file_modify(_id,
                                   who,
                                   dirname=dirname,
                                   name=name,
                                   contents=contents)
        events.append(e)

    dir_renames = set()
    for d in diff.iter_change_type('R'):  # rename
        a_dirname, a_name = dirname_name_from_path(d.a_path)
        b_dirname, b_name = dirname_name_from_path(d.b_path)
        if a_dirname != b_dirname:
            dirname, name1, name2 = get_first_diff(d.a_path, d.b_path)
            dir_renames.add((tuple(dirname), name1, name2))

        else:
            e = disk_event_file_rename(_id,
                                       who,
                                       dirname=a_dirname,
                                       name=a_name,
                                       name2=b_name)
            events.append(e)

    for dirname, name1, name2 in dir_renames:
        e = disk_event_dir_rename(_id,
                                  who,
                                  dirname=dirname,
                                  name=name1,
                                  name2=name2)
        events.append(e)

    return events
コード例 #20
0
def substituting_empty_links(soup, raise_errors=False):
    '''
    
    
        default style is [](#sec:systems)  "Chapter 10"
        
        the name is [](#sec:systems?only_name) "My title"
        
        the number is [](#sec:systems?only_number) "10"
        
        and full is [](#sec:systems?toc_link) "Chapter 10 - My title"
    
    
        You can also use "class":
        
            <a href='#sec:name' class='only_number'></a>
            
            or
            
            <a href='#sec:name?only_number'></a>
    

    '''
    CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER
    CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME
    CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME

    logger.debug('substituting_empty_links')

    n = 0
    nerrors = 0
    for le in get_empty_links_to_fragment(soup):

        a = le.linker
        element_id = le.eid
        element = le.linked

        n += 1
        if not element:
            msg = ('Cannot find %s' % element_id)
            note_error_msg(a, msg)
            nerrors += 1
            if raise_errors:
                raise ValueError(msg)
            continue
        # if there is a query, remove it
        if le.query is not None:
            new_href = '#' + le.eid
            a.attrs['href'] = new_href
            logger.info('setting new href= %s' % (new_href))

        if (not LABEL_WHAT_NUMBER  in element.attrs) or \
                (not LABEL_NAME in element.attrs):
            msg = (
                'substituting_empty_links: Could not find attributes %s or %s in %s'
                % (LABEL_NAME, LABEL_WHAT_NUMBER, element))
            if True:
                logger.warning(msg)
            else:
                note_error_msg(a, msg)
                nerrors += 1
                if raise_errors:
                    raise ValueError(msg)
            continue

        label_what_number = element.attrs[LABEL_WHAT_NUMBER]
        label_number = element.attrs[LABEL_NUMBER]
        label_what = element.attrs[LABEL_WHAT]
        label_name = element.attrs[LABEL_NAME]

        classes = list(a.attrs.get('class', []))  # bug: I was modifying

        if le.query is not None:
            classes.append(le.query)

        if 'toc_link' in classes:
            s = Tag(name='span')
            s.string = label_what
            add_class(s, 'toc_what')
            a.append(s)

            a.append(' ')

            s = Tag(name='span')
            s.string = label_number
            add_class(s, 'toc_number')
            a.append(s)

            s = Tag(name='span')
            s.string = ' - '
            add_class(s, 'toc_sep')
            a.append(s)

            if label_name is not None and '<' in label_name:
                contents = bs(label_name)
                # sanitize the label name
                for br in contents.findAll('br'):
                    br.replaceWith(NavigableString(' '))
                for _ in contents.findAll('a'):
                    _.extract()

                a.append(contents)
                #logger.debug('From label_name = %r to a = %r' % (label_name, a))
            else:
                s = Tag(name='span')
                if label_name is None:
                    s.string = '(unnamed)'  # XXX
                else:
                    s.string = label_name
                add_class(s, 'toc_name')
                a.append(s)

        else:

            if CLASS_ONLY_NUMBER in classes:
                label = label_number
            elif CLASS_NUMBER_NAME in classes:
                if label_name is None:
                    label = label_what_number + \
                        ' - ' + '(unnamed)'  # warning
                else:
                    label = label_what_number + ' - ' + label_name
            elif CLASS_ONLY_NAME in classes:
                if label_name is None:
                    label = '(unnamed)'  # warning
                else:
                    label = label_name
            else:
                label = label_what_number

            span1 = Tag(name='span')
            add_class(span1, 'reflabel')
            span1.string = label
            a.append(span1)

    logger.debug('substituting_empty_links: %d total, %d errors' %
                 (n, nerrors))
コード例 #21
0
ファイル: manual_join_imp.py プロジェクト: kannode/mcdp
def do_bib(soup, bibhere):
    """ find used bibliography entries put them there """
    used = []
    unused = set()
    for a in soup.find_all('a'):
        href = a.attrs.get('href', '')
        if href.startswith('#bib:'):
            used.append(href[1:])  # no "#"
    logger.debug('I found %d references, to these: %s' % (len(used), used))

    # collect all the <cite>
    id2cite = {}
    for c in soup.find_all('cite'):
        ID = c.attrs.get('id', None)
        id2cite[ID] = c
        if ID in used:
            add_class(c, 'used')
        else:
            unused.add(ID)
            add_class(c, 'unused')

    # divide in found and not found
    found = []
    notfound = []
    for ID in used:
        if not ID in id2cite:
            if not ID in notfound:
                notfound.append(ID)
        else:
            found.append(ID)

    # now create additional <cite> for the ones that are not found
    for ID in notfound:
        cite = Tag(name='cite')
        s = 'Reference %s not found.' % ID
        cite.append(NavigableString(s))
        cite.attrs['class'] = ['errored', 'error']  # XXX
        soup.append(cite)
        id2cite[ID] = cite

    # now number the cites
    n = 1
    id2number = {}
    for ID in used:
        if not ID in id2number:
            id2number[ID] = n
        n += 1

    # now add the attributes for cross-referencing
    for ID in used:
        number = id2number[ID]
        cite = id2cite[ID]

        cite.attrs[LABEL_NAME] = '[%s]' % number
        cite.attrs[LABEL_SELF] = '[%s]' % number
        cite.attrs[LABEL_NUMBER] = number
        cite.attrs[LABEL_WHAT] = 'Reference'
        cite.attrs[LABEL_WHAT_NUMBER_NAME] = '[%s]' % number
        cite.attrs[LABEL_WHAT_NUMBER] = '[%s]' % number

    # now put the cites at the end of the document
    for ID in used:
        c = id2cite[ID]
        # remove it from parent
        c.extract()
        #         logger.debug('Extracting cite for %r: %s' % (ID, c))
        # add to bibliography
        bibhere.append(c)

    s = ("Bib cites: %d\nBib used: %s\nfound: %s\nnot found: %s\nunused: %d" %
         (len(id2cite), len(used), len(found), len(notfound), len(unused)))
    logger.info(s)
コード例 #22
0
ファイル: check_missing_links.py プロジェクト: rusi/mcdp
def check_if_any_href_is_invalid(soup):
    '''
         Checks if references are invalid and tries to correct them. 
         
        if it is of the form "#frag?query" then query is stripped out
    '''
    errors = []
    math_errors = []
    
    # let's first find all the IDs
    id2element, duplicates = get_id2element(soup, 'id')
    _name2element, _duplicates = get_id2element(soup, 'name')
#     id2element.update(name2element)
#     for a in soup.select('a[href^="#"]'):

    for a in soup.select('[href^="#"]'):
        href = a['href']
        if a.has_attr('class') and  "mjx-svg-href" in a['class']:
            msg = 'Invalid math reference (sorry, no details): href = %s .' % href
            logger.error(msg)
            a.insert_before(Comment('Error: %s' % msg))
            math_errors.append(msg)
            continue 
        assert href.startswith('#')
        ID = href[1:]
        # remove query if it exists
        if '?' in ID:
            ID = ID[:ID.index('?')]
#         not_found = []

        if not ID in id2element:
            # try to fix it
#             
#             # it there is named element
#             if ID in name2element:
#                 real_id = name2element[ID].attrs
            
            # if there is already a prefix, remove it 
            if ':' in href:
                i = href.index(':')
                core = href[i+1:]
            else:
                core = ID
            possible = ['sec', 'sub', 'subsub', 'fig', 'tab', 'code', 'app', 'appsub',
                        'appsubsub',
                        'def', 'eq', 'rem', 'lem', 'prob', 'prop', 'exa', 'thm' ]
            matches = [] 
            others = []
            for possible_prefix in possible:
                why_not = possible_prefix + ':' + core
                others.append(why_not)
                if why_not in id2element:
                    matches.append(why_not)
            
            if len(matches) > 1:
                msg = '%s not found, and multiple matches for heuristics (%s)' % (href, matches)
                logger.error(msg)
                add_class(a, 'errored')
                w = Tag(name='span', attrs={'class':'href-invalid href-invalid-missing'})
                w.string = msg
                a.insert_after(w)
            elif len(matches) == 1:
                msg = '%s not found, but corrected in %s' % (href, matches[0])
                logger.debug(msg)
                
                add_class(a, 'warning')
                w = Tag(name='span', attrs={'class':'href-replaced'})
                w.string = msg
                a['href'] = '#' + matches[0]
                a.insert_after(w)
                
            else:
#                 msg = 'Not found %r (also tried %s)' % (href, ", ".join(others))
#                 not_found.append(ID)
#                 logger.error(msg)
                errors.append('Not found %r' % (href))
                if not 'errored' in a.attrs.get('class', ''):
                    add_class(a, 'errored')
                    w = Tag(name='span', attrs={'class':'href-invalid href-invalid-missing'})
                    w.string = 'Not found %r' % (href)
                    a.insert_after(w)
            
        if ID in duplicates:
            msg = 'More than one element matching %r.' % href
            logger.error(msg)
            if not 'errored' in a.attrs.get('class', ''):
                add_class(a, 'errored')
                w = Tag(name='span', attrs={'class':'href-invalid href-invalid-multiple'})
                w.string = msg
                a.insert_after(w)

            errors.append(msg)
            
    return errors, math_errors
コード例 #23
0
ファイル: manual_join_imp.py プロジェクト: kannode/mcdp
 def debug(s):
     if False:
         logger.debug(s)
コード例 #24
0
ファイル: manual_join_imp.py プロジェクト: newmanlucy/mcdp
def manual_join(template,
                files_contents,
                bibfile,
                stylesheet,
                remove=None,
                extra_css=None,
                remove_selectors=None,
                hook_before_toc=None):
    """
        extra_css: if not None, a string of more CSS to be added
        Remove_selectors: list of selectors to remove (e.g. ".draft").

        hook_before_toc if not None is called with hook_before_toc(soup=soup)
        just before generating the toc
    """
    logger.debug('remove_selectors: %s' % remove_selectors)
    logger.debug('remove: %s' % remove)
    from mcdp_utils_xml import bs

    template = replace_macros(template)

    # cannot use bs because entire document
    template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8')
    d = template_soup
    assert d.html is not None
    assert '<html' in str(d)
    head = d.find('head')
    assert head is not None
    for x in get_manual_css_frag().contents:
        head.append(x.__copy__())

    if stylesheet is not None:
        link = Tag(name='link')
        link['rel'] = 'stylesheet'
        link['type'] = 'text/css'
        from mcdp_report.html import get_css_filename
        link['href'] = get_css_filename('compiled/%s' % stylesheet)
        head.append(link)

    basename2soup = OrderedDict()
    for (_libname, docname), data in files_contents:
        frag = bs(data)
        basename2soup[docname] = frag

    fix_duplicated_ids(basename2soup)

    body = d.find('body')
    add_comments = False
    for docname, content in basename2soup.items():
        logger.debug('docname %r -> %s KB' % (docname, len(data) / 1024))
        from mcdp_docs.latex.latex_preprocess import assert_not_inside
        assert_not_inside(data, 'DOCTYPE')
        if add_comments:
            body.append(NavigableString('\n\n'))
            body.append(Comment('Beginning of document dump of %r' % docname))
            body.append(NavigableString('\n\n'))
        for x in content:
            x2 = x.__copy__()  # not clone, not extract
            body.append(x2)
        if add_comments:
            body.append(NavigableString('\n\n'))
            body.append(Comment('End of document dump of %r' % docname))
            body.append(NavigableString('\n\n'))

    extract_bibtex_blocks(d)
    logger.info('external bib')
    if bibfile is not None:
        if not os.path.exists(bibfile):
            logger.error('Cannot find bib file %s' % bibfile)
        else:
            bibliography_entries = get_bibliography(bibfile)
            bibliography_entries['id'] = 'bibliography_entries'
            body.append(bibliography_entries)

    bibhere = d.find('div', id='put-bibliography-here')
    if bibhere is None:
        logger.warning('Could not find #put-bibliography-here in document.'
                       'Adding one at end of document')
        bibhere = Tag(name='div')
        bibhere.attrs['id'] = 'put-bibliography-here'
        d.find('body').append(bibhere)

    do_bib(d, bibhere)

    if True:
        logger.info('reorganizing contents in <sections>')
        body2 = reorganize_contents(d.find('body'))
        body.replace_with(body2)
    else:
        warnings.warn('fix')
        body2 = body

    # Removing
    all_selectors = []
    if remove is not None and remove != '':
        all_selectors.append(remove)
    if remove_selectors:
        all_selectors.extend(remove_selectors)

    logger.debug('all_selectors: %s' % all_selectors)

    all_removed = ''
    for selector in all_selectors:
        nremoved = 0
        logger.debug('Removing selector %r' % remove)
        toremove = list(body2.select(selector))
        logger.debug('Removing %d objects' % len(toremove))
        for x in toremove:
            nremoved += 1
            nd = len(list(x.descendants))
            logger.debug('removing %s with %s descendants' % (x.name, nd))
            if nd > 1000:
                s = str(x)[:300]
                logger.debug(' it is %s' % s)
            x.extract()

            all_removed += '\n\n' + '-' * 50 + ' chunk %d removed\n' % nremoved
            all_removed += str(x)
            all_removed += '\n\n' + '-' * 100 + '\n\n'

        logger.info('Removed %d elements of selector %r' % (nremoved, remove))


#     if False:
    with open('all_removed.html', 'w') as f:
        f.write(all_removed)

    if hook_before_toc is not None:
        hook_before_toc(soup=d)
    ###
    logger.info('adding toc')
    toc = generate_toc(body2)

    logger.info('TOC:\n' + str(toc))
    toc_ul = bs(toc).ul
    toc_ul.extract()
    assert toc_ul.name == 'ul'
    toc_ul['class'] = 'toc'
    toc_ul['id'] = 'main_toc'
    toc_selector = 'div#toc'
    tocs = list(d.select(toc_selector))
    if not tocs:
        msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector
        logger.warning(msg)
    else:
        toc_place = tocs[0]
        toc_place.replaceWith(toc_ul)

    logger.info('checking errors')
    check_various_errors(d)

    from mcdp_docs.check_missing_links import check_if_any_href_is_invalid
    logger.info('checking hrefs')
    check_if_any_href_is_invalid(d)

    # Note that this should be done *after* check_if_any_href_is_invalid()
    # because that one might fix some references
    logger.info('substituting empty links')
    substituting_empty_links(d)

    warn_for_duplicated_ids(d)

    if extra_css is not None:
        logger.info('adding extra CSS')
        add_extra_css(d, extra_css)

    add_footnote_polyfill(d)

    logger.info('converting to string')
    # do not use to_html_stripping_fragment - this is a complete doc
    res = unicode(d)
    res = res.encode('utf8')
    logger.info('done - %d bytes' % len(res))
    return res
コード例 #25
0
 def notify_callback(event):
     logger.debug('\n' + yaml_dump(event))
     events.append(event)
コード例 #26
0
def check_if_any_href_is_invalid(soup):
    '''
         Checks if references are invalid and tries to correct them.

        if it is of the form "#frag?query" then query is stripped out
    '''
    logger.debug('check_if_any_href_is_invalid')

    errors = []
    math_errors = []

    # let's first find all the IDs
    id2element, duplicates = get_id2element(soup, 'id')
    _name2element, _duplicates = get_id2element(soup, 'name')

    for a in soup.select('[href^="#"]'):
        href = a['href']
        if a.has_attr('class') and "mjx-svg-href" in a['class']:
            msg = 'Invalid math reference (sorry, no details): href = %s .' % href
            logger.warning(msg)
            a.insert_before(Comment('Error: %s' % msg))
            math_errors.append(msg)
            continue
        assert href.startswith('#')
        ID = href[1:]
        # remove query if it exists
        if '?' in ID:
            ID = ID[:ID.index('?')]

        if not ID in id2element:
            # try to fix it

            # if there is already a prefix, remove it
            if ':' in href:
                i = href.index(':')
                core = href[i + 1:]
            else:
                core = ID

#             logger.debug('check_if_any_href_is_invalid: not found %r, core %r' % (ID, core))

            possible = [
                'part',
                'sec',
                'sub',
                'subsub',
                'fig',
                'tab',
                'code',
                'app',
                'appsub',
                'appsubsub',
                'def',
                'eq',
                'rem',
                'lem',
                'prob',
                'prop',
                'exa',
                'thm',
                #                         'bib'
            ]
            matches = []
            others = []
            for possible_prefix in possible:
                why_not = possible_prefix + ':' + core
                others.append(why_not)
                if why_not in id2element:
                    matches.append(why_not)


#             logger.debug('others = %r, matches = %r' % (others, matches))

            if len(matches) > 1:
                short = 'Ref. error'
                msg = '%s not found, and multiple matches for heuristics (%s)' % (
                    href, matches)
                note_error2(a, short, msg,
                            ['href-invalid', 'href-invalid-missing'])

            elif len(matches) == 1:

                a['href'] = '#' + matches[0]

                if show_debug_message_for_corrected_links:
                    short = 'Ref replaced'
                    msg = '%s not found, but corrected in %s' % (href,
                                                                 matches[0])
                    note_warning2(a, short, msg, ['href-replaced'])

            else:
                if has_class(a, MCDPConstants.CLASS_IGNORE_IF_NOT_EXISTENT):
                    pass
                else:
                    short = 'Ref. error'
                    #                 msg = 'Not found %r (also tried %s)' % (href, ", ".join(others))
                    msg = 'I do not know the link that is indicated by the link %r.' % href
                    note_error2(a, short, msg,
                                ['href-invalid', 'href-invalid-missing'])
                    errors.append(msg)

        if ID in duplicates:
            msg = 'More than one element matching %r.' % href
            short = 'Ref. error'
            note_error2(a, short, msg,
                        ['href-invalid', 'href-invalid-multiple'])
            errors.append(msg)

    return errors, math_errors
コード例 #27
0
ファイル: manual_join_imp.py プロジェクト: newmanlucy/mcdp
    def make_sections(body,
                      is_marker,
                      preserve=lambda _: False,
                      element_name='section',
                      copy=True,
                      attrs={}):
        sections = []

        def make_new():
            x = Tag(name=element_name)
            for k, v in attrs.items():
                x.attrs[k] = v
            return x

        current_section = make_new()
        current_section['id'] = 'before-any-match-of-%s' % is_marker.__name__
        current_section['class'] = 'without-header-inside'
        #         sections.append(current_section)
        for x in body.contents:
            if is_marker(x):
                #print('starting %s' % str(x))
                if contains_something_else_than_space(current_section):
                    sections.append(current_section)
                current_section = make_new()
                current_section['id'] = x.attrs.get(
                    'id', 'unnamed-h1') + ':' + element_name
                logger.debug('marker %s' % current_section['id'])
                current_section['class'] = x.attrs.get('class', '')
                #print('%s/section %s %s' % (is_marker.__name__, x.attrs.get('id','unnamed'), current_section['id']))
                current_section.append(x.__copy__())
                current_section['class'] = 'with-header-inside'
            elif preserve(x):
                if contains_something_else_than_space(current_section):
                    sections.append(current_section)

                #current_section['id'] = x.attrs.get('id', 'unnamed-h1') + ':' + element_name
                #print('%s/preserve %s' % (preserve.__name__, current_section['id']))
                sections.append(x.__copy__())
                current_section = make_new()
                current_section.attrs['comment'] = "Triggered by %r" % x
            else:
                #x2 = x.__copy__() if copy else x
                x2 = x.__copy__() if copy else x.extract()
                current_section.append(x2)
        if contains_something_else_than_space(current_section):
            sections.append(current_section)  # XXX
        new_body = Tag(name=body.name)
        #         if len(sections) < 3:
        #             msg = 'Only %d sections found (%s).' % (len(sections), is_marker.__name__)
        #             raise ValueError(msg)

        logger.info('make_sections: %s found using marker %s' %
                    (len(sections), is_marker.__name__))
        for i, s in enumerate(sections):
            if add_debug_comments:
                new_body.append('\n')
                new_body.append(
                    Comment('Start of %s section %d/%d' %
                            (is_marker.__name__, i, len(sections))))
            new_body.append('\n')
            new_body.append(s)
            new_body.append('\n')
            if add_debug_comments:
                new_body.append(
                    Comment('End of %s section %d/%d' %
                            (is_marker.__name__, i, len(sections))))
                new_body.append('\n')
        return new_body