Exemple #1
0
def update_refs(filename2contents):
    id2filename = {}
    for filename, contents in filename2contents.items():

        for element in contents.findAll(id=True):
            id_ = element.attrs['id']
            if id_ in id2filename:
                logger.error('double element with ID %s' % id_)
            id2filename[id_] = filename

        # also don't forget the id for the entire section
        if 'id' in contents.attrs:
            id_ = contents.attrs['id']
            id2filename[id_] = filename


#     logger.info(id2filename)
    for filename, contents in filename2contents.items():
        for a in contents.findAll(
                href=lambda x: x is not None and x.startswith('#')):
            href = a.attrs['href']
            assert href[0] == '#'
            id_ = href[1:]  # Todo, parse out "?"
            if id_ in id2filename:
                new_href = '%s#%s' % (id2filename[id_], id_)
                a.attrs['href'] = new_href
            else:
                logger.error('no elemement with ID %s' % id_)
Exemple #2
0
    def __init__(self, allow_or_deny, to_whom, privilege):
        self.allow_or_deny = allow_or_deny
        self.to_whom = to_whom
        self.privilege = privilege

        if not privilege in Privileges.ALL_PRIVILEGES:
            raise ValueError('Unknown privilege %r' % privilege)

        def valid_group(x):
            return len(x) > 0

        def valid_username(x):
            return len(x) > 0

        valid = False
        some_ok = [
            MCDPConstants.EVERYONE,
            MCDPConstants.AUTHENTICATED,
        ]
        if to_whom in some_ok:
            valid = True
        elif to_whom.startswith('user:'******':') + 1:]
            valid = valid_username(username)
        elif to_whom.startswith('group:'):
            group = to_whom[to_whom.index(':') + 1:]
            valid = valid_group(group)
        elif to_whom.startswith('special:'):
            valid = True
        else:
            pass
        if not valid:
            msg = 'Invalid to_whom spec: %s' % to_whom
            logger.error(msg)
Exemple #3
0
def get_id2element(soup, att):
    id2element = {}
    duplicates = set()
    
    # ignore the maths
    ignore = set() 
    for element in soup.select('svg [%s]' % att): # node with ID below SVG
        ignore.add(element[att])
    for element in soup.select('svg[%s]' % att): # svg with ID
        ignore.add(element[att])
    for element in soup.select('[%s^="MathJax"]' % att): # stuff created by MathJax
        ignore.add(element[att])
        
    for element in soup.select('[%s]' % att):
        ID = element[att]
        if ID in ignore:
            continue
        if ID in id2element:
            duplicates.add(ID)
            other = id2element[ID]
            for e0 in [element, other]:
                if not 'errored' in e0.attrs.get('class', ''):
                    add_class(e0, 'errored')
                    w = Tag(name='span', attrs={'class':'duplicated-id'})
                    w.string = 'More than one element with id %r.' % ID
                    e0.insert_after(w)
        id2element[element[att]] = element
        
    if duplicates:
        s = ", ".join(sorted(duplicates))
        msg = '%d duplicated %s found (not errored): %s' % (len(duplicates), att, s) 
        logger.error(msg)
    return id2element, duplicates
Exemple #4
0
def get_id2filename(filename2contents):
    ignore_these = [
        'tocdiv',
        'not-toc',
        'disqus_thread',
        'disqus_section',
        'dsq-count-scr',
        'banner',
    ]

    id2filename = {}
    for filename, contents in filename2contents.items():

        for element in contents.findAll(id=True):
            id_ = element.attrs['id']
            if id_ in ignore_these:
                continue
            if id_ in id2filename:
                logger.error('double element with ID %s' % id_)
            id2filename[id_] = filename

        # also don't forget the id for the entire section
        if 'id' in contents.attrs:
            id_ = contents.attrs['id']
            id2filename[id_] = filename

    return id2filename
Exemple #5
0
    def go(self):
        url = 'http://localhost:8080/repos/bundled/shelves/unittests/libraries/basic/models/minus_r_real3/views/dp_graph/'
        self.driver.get(url)
        self.screenshot()

        self.driver.get('http://localhost:8080/')
        self.screenshot()

        self.click_partial_link_text('login')
        driver = self.driver

        # fill log in screen
        e = driver.find_element_by_css_selector('input[name=login]')
        e.send_keys("andrea")
        e = driver.find_element_by_css_selector('input[name=password]')
        e.send_keys("editor")
        es = driver.find_elements_by_css_selector('button')
        if len(es) > 1:
            msg = 'There should not be more than 1 button'
            logger.error(msg)
        es[1].click()

        # go to shelves
        self.click_partial_link_text('shelves')

        self.click_partial_link_text('unittests')
        self.click_partial_link_text('basic')
        self.click_partial_link_text('minus_real3')

        self.click_css('button#size-plus')
        self.click_css('button#size-plus')
        self.click_css('button#size-plus')
Exemple #6
0
def fix_header_id(header):
    ID = header.get('id', None)
    prefix = None if (ID is None or not ':' in ID) else ID[:ID.index(':')]

    allowed_prefixes_h = {
        'h1': ['sec', 'app', 'part'],
        'h2': ['sub', 'appsub'],
        'h3': ['subsub', 'appsubsub'],
        'h4': ['par'],
    }

    if header.name in allowed_prefixes_h:
        allowed_prefixes = allowed_prefixes_h[header.name]
        default_prefix = allowed_prefixes[0]

        if ID is None:
            header['id'] = '%s:%s' % (default_prefix, GlobalCounter.header_id)
            GlobalCounter.header_id += 1
        else:
            if prefix is None:
                if ID != 'booktitle':
                    msg = ('Adding prefix %r to current id %r for %s.' %
                           (default_prefix, ID, header.name))
                    header.insert_before(Comment('Warning: ' + msg))
                    header['id'] = default_prefix + ':' + ID
            else:
                if prefix not in allowed_prefixes:
                    msg = ('The prefix %r is not allowed for %s (ID=%r)' %
                           (prefix, header.name, ID))
                    logger.error(msg)
                    header.insert_after(Comment('Error: ' + msg))
Exemple #7
0
def replace_macros(s):
    ''' Replaces strings of the type @@{key} 
    
        It looks in MCDPManualConstants.macros
        
        Also available 
        
            @@{MCDPConstants.name}
    '''
    macros = MCDPManualConstants.macros

    class MyTemplate(Template):
        delimiter = '@@'
        idpattern = r'[_a-z][\._a-z0-9]*'

        def _invalid(self, mo):
            i = mo.start('invalid')
            lines = self.template[:i].splitlines(True)
            if not lines:
                colno = 1
                lineno = 1
            else:
                colno = i - len(''.join(lines[:-1]))
                lineno = len(lines)

            char = location(lineno - 1, colno - 1, s)
            w = Where(s, char)
            raise DPSyntaxError('Invalid placeholder', where=w)

    class Sub(object):
        def __init__(self, data):
            self.data = data

        def __getitem__(self, key):
            if key in self.data:
                return self.data[key]

            if '.' in key:
                i = key.index('.')
                first, last = key[:i], key[i + 1:]
                #print('%s -> %s, %s' % (key, first, last))
                return self[first][last]

            raise KeyError(key)

    t = MyTemplate(s)
    MyTemplate.idpattern = r'[_a-z][\._a-z0-9]*'
    try:
        s2 = t.substitute(Sub(macros))
    except KeyError as e:
        key = str(e).replace("'", "")
        search_for = MyTemplate.delimiter + key
        logger.error('Could not find key %r' % key)
        char = s.index(search_for)
        w = Where(s, char)
        msg = 'Key %r not found - maybe use braces?' % key
        raise DPSyntaxError(msg, where=w)
    return s2
Exemple #8
0
def process(dirname, e):
    db_view = db_view_from_dirname(dirname)
    host_cache = HostCache(db_view)
    e.repo = db_view.repos[e.repo_name]
    e.shelf = e.repo.shelves[e.shelf_name]
    e.library = e.shelf.libraries[e.library_name]
    e.things = e.library.things.child(e.spec_name)
    subscribed_shelves = get_all_shelves(db_view)
    e.context = TheContext(host_cache, db_view, subscribed_shelves, e.library_name)
    e.mcdp_library = e.context.get_library()
    
    source = e.things[e.thing_name]
    
    t0 = time.clock()
    try:
        context = e.context.child()
        e.mcdp_library.load_spec(e.spec_name, e.thing_name, context=context)
        
        error = None
        error_string = None
        exc = None
    except MCDPException as exc: 
        error = type(exc).__name__
        error_string = str(exc) 
    finally:
        cpu = time.clock() - t0
        
    if gives_syntax_error(source):
        if isinstance(exc, DPSyntaxError):
            error = None
            error_string = None
        else:
            error = 'Unexpected'
            error_string = 'Expected DPSyntaxError error, got %s' % type(exc).__name__
            error_string += '\n' + indent(error_string, 'obtained > ')
    elif gives_semantic_error(source):
        if isinstance(exc, DPSemanticError):
            error = None
            error_string = None
        else:
            error = 'Unexpected'
            error_string = 'Expected DPSemanticError error, got %s' % type(exc).__name__
            error_string += '\n' + indent(error_string, 'obtained > ')
    elif gives_not_implemented_error(source):
        if isinstance(exc, DPNotImplementedError):
            error = None
            error_string = None
        else:
            error = 'Unexpected'
            error_string = 'Expected DPNotImplementedError error, got %s' % type(exc).__name__
            error_string += '\n' + indent(error_string, 'obtained > ')
        
    if error:
        logger.error(e.id + ' ' + error)
    
    return Result(error_type=error, error_string=error_string, cpu=cpu, warnings=0)
Exemple #9
0
def rmtree_only_contents(d):
    """ Removes all the contents but not the directory itself. """

    for the_file in os.listdir(d):
        file_path = os.path.join(d, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            logger.error(e)
Exemple #10
0
def add_prev_next_links(filename2contents, only_for=None):
    new_one = OrderedDict()
    for filename, contents in list(filename2contents.items()):
        if only_for and not filename in only_for: continue

        id_prev = contents.attrs[ATTR_PREV]
        a_prev = Tag(name='a')
        a_prev.attrs['href'] = '#' + str(id_prev)
        a_prev.attrs['class'] = CLASS_LINK_PREV
        a_prev.append('prev')

        id_next = contents.attrs[ATTR_NEXT]
        a_next = Tag(name='a')
        a_next.attrs['href'] = '#' + str(id_next)
        a_next.attrs['class'] = CLASS_LINK_NEXT
        a_next.append('next')

        S = Tag(name='div')
        S.attrs['class'] = ['super']

        nav1 = Tag(name='div')
        add_class(nav1, 'navigation')
        if id_prev:
            nav1.append(a_prev.__copy__())
        if id_next:
            nav1.append(a_next.__copy__())
        spacer = Tag(name='div')
        spacer.attrs['style'] = 'clear:both'
        nav1.append(spacer)

        add_class(contents, 'main-section-for-page')

        contents2 = contents
        S.append(contents2)

        from .source_info_imp import get_main_header
        actual_id = get_main_header(contents2)

        if False:  # just checking
            e = contents2.find(id=actual_id)
            if e is not None:
                pass
            else:
                logger.error('not found %r' % actual_id)
        S.attrs['id'] = actual_id

        contents2.insert(0, nav1.__copy__())
        contents2.append(nav1.__copy__())

        new_one[filename] = S

    return new_one
Exemple #11
0
def warn_for_duplicated_ids(soup):
    from collections import defaultdict

    counts = defaultdict(lambda: [])
    for e in soup.select('[id]'):
        ID = e['id']
        counts[ID].append(e)

    problematic = []
    for ID, elements in counts.items():
        n = len(elements)
        if n == 1:
            continue

        ignore_if_contains = [
            'MathJax',  # 'MJ',
            'edge',
            'mjx-eqn',
        ]
        if any(_ in ID for _ in ignore_if_contains):
            continue

        inside_svg = False
        for e in elements:
            for _ in e.parents:
                if _.name == 'svg':
                    inside_svg = True
                    break
        if inside_svg:
            continue

        #msg = ('ID %15s: found %s - numbering will be screwed up' % (ID, n))
        # logger.error(msg)
        problematic.append(ID)

        for e in elements:
            t = Tag(name='span')
            t['class'] = 'duplicated-id'
            t.string = 'Error: warn_for_duplicated_ids:  There are %d tags with ID %s' % (
                n, ID)
            # e.insert_before(t)
            add_class(e, 'errored')

        for i, e in enumerate(elements[1:]):
            e['id'] = e['id'] + '-duplicate-%d' % (i + 1)
            #print('changing ID to %r' % e['id'])
    if problematic:
        logger.error('The following IDs were duplicated: %s' %
                     ", ".join(problematic))
        logger.error(
            'I renamed some of them; references and numbering are screwed up')
Exemple #12
0
def get_empty_links_to_fragment(element_to_modify, extra_refs, res):
    """
        Find all empty links that have a reference to a fragment.
        yield LinkElement
    """
    # logger.debug('building index')
    # first find all elements by id

    id2element_local, duplicates = get_id2element(element_to_modify, 'id')
    id2element_extra, duplicates2 = get_id2element(extra_refs, 'id')

    for k in id2element_extra:
        if k in id2element_local:
            if 'ignore_if_conflict' in id2element_extra[k].attrs:
                continue

            msg = 'ID %s in cross references also contained locally.' % k

            def cut(x):
                if len(x) < 500:
                    return x
                else:
                    return x[:500] + ' ... '

            msg += '\n\n' + indent(cut(id2element_local[k]), '', 'local: ')
            msg += '\n\n' + indent(cut(id2element_extra[k]), '', 'crossrefs: ')
            res.note_error(msg,
                           HTMLIDLocation.for_element(id2element_local[k]))
            logger.error(msg)

    id2element = {}
    id2element.update(id2element_extra)
    id2element.update(id2element_local)

    # logger.debug('building index done')

    for element in get_empty_links(element_to_modify):
        if not 'href' in element.attrs:
            continue

        href = element.attrs['href']
        if not href.startswith('#'):
            continue
        rest = href[1:]

        eid = rest
        query = None

        linked = id2element.get(eid, None)
        # noinspection PyArgumentList
        yield LinkElement(linker=element, eid=eid, linked=linked, query=query)
Exemple #13
0
def update_refs_(filename, contents, id2filename):
    test_href = lambda _: _ is not None and _.startswith('#')
    elements = list(contents.find_all('a', attrs={'href': test_href}))
    # logger.debug('updates: %s' % sorted(id2filename))
    for a in elements:
        href = a.attrs['href']
        assert href[0] == '#'
        id_ = href[1:]
        if id_ in id2filename:
            point_to_filename = id2filename[id_]
            if point_to_filename != filename:
                new_href = '%s#%s' % (point_to_filename, id_)
                a.attrs['href'] = new_href
                add_class(a, 'link-different-file')
            else:
                # actually it doesn't change
                new_href = '#%s' % id_
                a.attrs['href'] = new_href
                add_class(a, 'link-same-file')

                if 'toc_link' in a.attrs['class']:
                    p = a.parent
                    assert p.name == 'li'
                    add_class(p, 'link-same-file-direct-parent')

                    # now find all the lis
                    for x in list(p.descendants):
                        if isinstance(x, Tag) and x.name == 'li':
                            add_class(x, 'link-same-file-inside')

                p = a.parent
                while p:
                    if isinstance(p, Tag) and p.name in ['ul', 'li']:
                        add_class(p, 'contains-link-same-file')
                    p = p.parent
        else:
            logger.error('update_ref() for %r: no element with ID "%s".' %
                         (filename, id_))
Exemple #14
0
def get_id2filename(filename2contents):
    ignore_these = [
        'tocdiv',
        'not-toc',
        'disqus_thread',
        'disqus_section',
        'dsq-count-scr',
        'banner',
        'MathJax_SVG_glyphs',
        'MathJax_SVG_styles',
    ]

    id2filename = {}

    for filename, contents in filename2contents.items():

        for element in contents.select('[id]'):
            if can_ignore_duplicated_id(element):
                continue

            id_ = element.attrs['id']

            if id_ in ignore_these:
                continue

            if id_ in id2filename:
                logger.error('double element with ID %s' % id_)
            #                    logger.error(str(element.parent()))

            id2filename[id_] = filename

        # also don't forget the id for the entire section
        if 'id' in contents.attrs:
            id_ = contents.attrs['id']
            id2filename[id_] = filename

    return id2filename
Exemple #15
0
def get_id2element(soup, att):
    id2element = OrderedDict()
    duplicates = set()

    # ignore the maths
    ignore = set()
    for element in soup.select('svg [%s]' % att):  # node with ID below SVG
        ignore.add(element[att])
    for element in soup.select('svg[%s]' % att):  # svg with ID
        ignore.add(element[att])
    for element in soup.select('[%s^="MathJax"]' % att):  # stuff created by MathJax
        ignore.add(element[att])

    for element in soup.select('[%s]' % att):
        ID = element[att]
        if ID in ignore:
            continue
        if ID in id2element:
            duplicates.add(ID)

            if False:
                other = id2element[ID]
                for e0 in [element, other]:
                    # note_error2(e0, 'Naming', 'More than one element with id %r.' % ID)
                    msg = 'More than one element with id %r.' % ID
                    res.note_error(msg, HTMLIDLocation.before_element(e0))
        id2element[element[att]] = element

    if duplicates:
        n = len(duplicates)
        if n > 100:
            duplicates = list(duplicates)[:100]
        s = ", ".join(sorted(duplicates))
        msg = '%d duplicated %s found: %s' % (n, att, s)
        logger.error(msg)
    return id2element, duplicates
Exemple #16
0
def check_various_errors(d):
    error_names = ['DPSemanticError', 'DPSyntaxError']
    selector = ", ".join('.' + _ for _ in error_names)
    errors = list(d.find_all(selector))
    if errors:
        msg = 'I found %d errors in processing.' % len(errors)
        logger.error(msg)
        for e in errors:
            logger.error(e.contents)

    fragments = list(d.find_all('fragment'))
    if fragments:
        msg = 'There are %d spurious elements "fragment".' % len(fragments)
        logger.error(msg)
Exemple #17
0
def generate_view_syntax(e, make_relative):
    expr = e.spec.parse_expr
    parse_refine = e.spec.parse_refine
    source_code = e.thing

    context = Context()

    class Tmp:
        refined = None

    def postprocess(block):
        if parse_refine is None:
            return block
        try:
            Tmp.refined = parse_refine(block, context)
            return Tmp.refined
        except DPSemanticError:
            return block

    try:
        highlight = ast_to_html(source_code,
                                add_line_gutter=False,
                                parse_expr=expr,
                                postprocess=postprocess)

        def get_link_library(libname):
            try:
                rname, sname = e.session.get_repo_shelf_for_libname(libname)
            except NoSuchLibrary:
                raise
            url0 = "/repos/%s/shelves/%s/libraries/%s/" % (rname, sname,
                                                           libname)
            return make_relative(url0)

        def get_link(specname, libname, thingname):
            # find library. Returns a string or raises error
            try:
                rname, sname = e.session.get_repo_shelf_for_libname(libname)
            except NoSuchLibrary:
                msg = 'No such library %r' % libname
                logger.debug(msg)
                raise


#                 return None
            things = e.db_view.repos[rname].shelves[sname].libraries[
                libname].things.child(specname)

            if thingname in things:

                # check if the thing exists

                res = get_link_library(
                    libname) + '%s/%s/views/syntax/' % (specname, thingname)
                #                 logger.debug(' link for %s = %s' % (thingname, res))
                return res
            else:
                msg = 'No such thing %r' % thingname
                logger.debug(msg)
                raise NoSuchLibrary(msg)

        highlight = add_html_links(highlight, e.library_name, get_link,
                                   get_link_library)
        parses = True
        error = ''
    except (DPSyntaxError, DPNotImplementedError) as exc:
        highlight = '<pre class="source_code_with_error">%s</pre>' % source_code
        error = exc.__str__()
        parses = False

    if parses:
        mcdp_library = library_from_env(e)
        image_source = image_source_from_env(e)

        try:
            thing = e.spec.load(mcdp_library, e.thing_name, context=context)

            svg_data = get_svg_for_visualization(e,
                                                 image_source,
                                                 e.library_name,
                                                 e.spec,
                                                 e.thing_name,
                                                 thing,
                                                 Tmp.refined,
                                                 make_relative,
                                                 library=mcdp_library)
        except (DPSemanticError, DPNotImplementedError) as exc:
            logger.error(exc)
            from mcdp_web.editor_fancy.app_editor_fancy_generic import html_mark

            if exc.where.string != source_code:
                msg = 'This exception refers to another file.'
                msg += '\n source_code: %r' % source_code
                msg += '\n exception.where.string: %r' % exc.where.string
                msg += '\n' + indent(traceback.format_exc(exc), 'exc > ')
                raise DPInternalError(msg)
            try:
                highlight = html_mark(highlight, exc.where, "semantic_error")
            except NoLocationFound as e:
                msg = 'While trying to annotate the exception:'
                msg += '\n' + indent(exc, 'exc > ')
                raise_wrapped(NoLocationFound, e, msg)
            error = exc.error + "\n" + format_where(exc.where)

            svg_data = None
    else:
        svg_data = None

    check_isinstance(highlight, str)
    res = {
        'source_code': source_code,
        'error': unicode(error, 'utf-8'),
        'highlight': unicode(highlight, 'utf-8'),
        #         'realpath': realpath,
        'current_view': 'syntax',
        'explanation1_html': None,
        'explanation2_html': None,
        'svg_data':
        unicode(svg_data, 'utf-8') if svg_data is not None else None,
        'parses': parses,  # whether it parses
    }
    return res
Exemple #18
0
def check_if_any_href_is_invalid(soup):
    '''
         Checks if references are invalid and tries to correct them. 
         
        if it is of the form "#frag?query" then query is stripped out
    '''
    errors = []
    math_errors = []
    
    # let's first find all the IDs
    id2element, duplicates = get_id2element(soup, 'id')
    _name2element, _duplicates = get_id2element(soup, 'name')
#     id2element.update(name2element)
#     for a in soup.select('a[href^="#"]'):

    for a in soup.select('[href^="#"]'):
        href = a['href']
        if a.has_attr('class') and  "mjx-svg-href" in a['class']:
            msg = 'Invalid math reference (sorry, no details): href = %s .' % href
            logger.error(msg)
            a.insert_before(Comment('Error: %s' % msg))
            math_errors.append(msg)
            continue 
        assert href.startswith('#')
        ID = href[1:]
        # remove query if it exists
        if '?' in ID:
            ID = ID[:ID.index('?')]
#         not_found = []

        if not ID in id2element:
            # try to fix it
#             
#             # it there is named element
#             if ID in name2element:
#                 real_id = name2element[ID].attrs
            
            # if there is already a prefix, remove it 
            if ':' in href:
                i = href.index(':')
                core = href[i+1:]
            else:
                core = ID
            possible = ['sec', 'sub', 'subsub', 'fig', 'tab', 'code', 'app', 'appsub',
                        'appsubsub',
                        'def', 'eq', 'rem', 'lem', 'prob', 'prop', 'exa', 'thm' ]
            matches = [] 
            others = []
            for possible_prefix in possible:
                why_not = possible_prefix + ':' + core
                others.append(why_not)
                if why_not in id2element:
                    matches.append(why_not)
            
            if len(matches) > 1:
                msg = '%s not found, and multiple matches for heuristics (%s)' % (href, matches)
                logger.error(msg)
                add_class(a, 'errored')
                w = Tag(name='span', attrs={'class':'href-invalid href-invalid-missing'})
                w.string = msg
                a.insert_after(w)
            elif len(matches) == 1:
                msg = '%s not found, but corrected in %s' % (href, matches[0])
                logger.debug(msg)
                
                add_class(a, 'warning')
                w = Tag(name='span', attrs={'class':'href-replaced'})
                w.string = msg
                a['href'] = '#' + matches[0]
                a.insert_after(w)
                
            else:
#                 msg = 'Not found %r (also tried %s)' % (href, ", ".join(others))
#                 not_found.append(ID)
#                 logger.error(msg)
                errors.append('Not found %r' % (href))
                if not 'errored' in a.attrs.get('class', ''):
                    add_class(a, 'errored')
                    w = Tag(name='span', attrs={'class':'href-invalid href-invalid-missing'})
                    w.string = 'Not found %r' % (href)
                    a.insert_after(w)
            
        if ID in duplicates:
            msg = 'More than one element matching %r.' % href
            logger.error(msg)
            if not 'errored' in a.attrs.get('class', ''):
                add_class(a, 'errored')
                w = Tag(name='span', attrs={'class':'href-invalid href-invalid-multiple'})
                w.string = msg
                a.insert_after(w)

            errors.append(msg)
            
    return errors, math_errors
Exemple #19
0
def manual_join(template,
                files_contents,
                bibfile,
                stylesheet,
                remove=None,
                extra_css=None,
                remove_selectors=None,
                hook_before_toc=None):
    """
        extra_css: if not None, a string of more CSS to be added
        Remove_selectors: list of selectors to remove (e.g. ".draft").

        hook_before_toc if not None is called with hook_before_toc(soup=soup)
        just before generating the toc
    """
    logger.debug('remove_selectors: %s' % remove_selectors)
    logger.debug('remove: %s' % remove)
    from mcdp_utils_xml import bs

    template = replace_macros(template)

    # cannot use bs because entire document
    template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8')
    d = template_soup
    assert d.html is not None
    assert '<html' in str(d)
    head = d.find('head')
    assert head is not None
    for x in get_manual_css_frag().contents:
        head.append(x.__copy__())

    if stylesheet is not None:
        link = Tag(name='link')
        link['rel'] = 'stylesheet'
        link['type'] = 'text/css'
        from mcdp_report.html import get_css_filename
        link['href'] = get_css_filename('compiled/%s' % stylesheet)
        head.append(link)

    basename2soup = OrderedDict()
    for (_libname, docname), data in files_contents:
        frag = bs(data)
        basename2soup[docname] = frag

    fix_duplicated_ids(basename2soup)

    body = d.find('body')
    add_comments = False
    for docname, content in basename2soup.items():
        logger.debug('docname %r -> %s KB' % (docname, len(data) / 1024))
        from mcdp_docs.latex.latex_preprocess import assert_not_inside
        assert_not_inside(data, 'DOCTYPE')
        if add_comments:
            body.append(NavigableString('\n\n'))
            body.append(Comment('Beginning of document dump of %r' % docname))
            body.append(NavigableString('\n\n'))
        for x in content:
            x2 = x.__copy__()  # not clone, not extract
            body.append(x2)
        if add_comments:
            body.append(NavigableString('\n\n'))
            body.append(Comment('End of document dump of %r' % docname))
            body.append(NavigableString('\n\n'))

    extract_bibtex_blocks(d)
    logger.info('external bib')
    if bibfile is not None:
        if not os.path.exists(bibfile):
            logger.error('Cannot find bib file %s' % bibfile)
        else:
            bibliography_entries = get_bibliography(bibfile)
            bibliography_entries['id'] = 'bibliography_entries'
            body.append(bibliography_entries)

    bibhere = d.find('div', id='put-bibliography-here')
    if bibhere is None:
        logger.warning('Could not find #put-bibliography-here in document.'
                       'Adding one at end of document')
        bibhere = Tag(name='div')
        bibhere.attrs['id'] = 'put-bibliography-here'
        d.find('body').append(bibhere)

    do_bib(d, bibhere)

    if True:
        logger.info('reorganizing contents in <sections>')
        body2 = reorganize_contents(d.find('body'))
        body.replace_with(body2)
    else:
        warnings.warn('fix')
        body2 = body

    # Removing
    all_selectors = []
    if remove is not None and remove != '':
        all_selectors.append(remove)
    if remove_selectors:
        all_selectors.extend(remove_selectors)

    logger.debug('all_selectors: %s' % all_selectors)

    all_removed = ''
    for selector in all_selectors:
        nremoved = 0
        logger.debug('Removing selector %r' % remove)
        toremove = list(body2.select(selector))
        logger.debug('Removing %d objects' % len(toremove))
        for x in toremove:
            nremoved += 1
            nd = len(list(x.descendants))
            logger.debug('removing %s with %s descendants' % (x.name, nd))
            if nd > 1000:
                s = str(x)[:300]
                logger.debug(' it is %s' % s)
            x.extract()

            all_removed += '\n\n' + '-' * 50 + ' chunk %d removed\n' % nremoved
            all_removed += str(x)
            all_removed += '\n\n' + '-' * 100 + '\n\n'

        logger.info('Removed %d elements of selector %r' % (nremoved, remove))


#     if False:
    with open('all_removed.html', 'w') as f:
        f.write(all_removed)

    if hook_before_toc is not None:
        hook_before_toc(soup=d)
    ###
    logger.info('adding toc')
    toc = generate_toc(body2)

    logger.info('TOC:\n' + str(toc))
    toc_ul = bs(toc).ul
    toc_ul.extract()
    assert toc_ul.name == 'ul'
    toc_ul['class'] = 'toc'
    toc_ul['id'] = 'main_toc'
    toc_selector = 'div#toc'
    tocs = list(d.select(toc_selector))
    if not tocs:
        msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector
        logger.warning(msg)
    else:
        toc_place = tocs[0]
        toc_place.replaceWith(toc_ul)

    logger.info('checking errors')
    check_various_errors(d)

    from mcdp_docs.check_missing_links import check_if_any_href_is_invalid
    logger.info('checking hrefs')
    check_if_any_href_is_invalid(d)

    # Note that this should be done *after* check_if_any_href_is_invalid()
    # because that one might fix some references
    logger.info('substituting empty links')
    substituting_empty_links(d)

    warn_for_duplicated_ids(d)

    if extra_css is not None:
        logger.info('adding extra CSS')
        add_extra_css(d, extra_css)

    add_footnote_polyfill(d)

    logger.info('converting to string')
    # do not use to_html_stripping_fragment - this is a complete doc
    res = unicode(d)
    res = res.encode('utf8')
    logger.info('done - %d bytes' % len(res))
    return res
Exemple #20
0
def manual_join(template,
                files_contents,
                stylesheet,
                remove=None,
                extra_css=None,
                remove_selectors=None,
                hook_before_toc=None,
                references=None,
                resolve_references=True,
                hook_before_final_pass=None,
                require_toc_placeholder=False,
                permalink_prefix=None,
                crossrefs_aug=None,
                aug0=None):
    """
        files_contents: a list of tuples that can be cast to DocToJoin:
        where the string is a unique one to be used for job naming.

        extra_css: if not None, a string of more CSS to be added
        Remove_selectors: list of selectors to remove (e.g. ".draft").

        hook_before_toc if not None is called with hook_before_toc(soup=soup)
        just before generating the toc
    """
    result = AugmentedResult()

    if references is None:
        references = {}
    check_isinstance(files_contents, list)

    if crossrefs_aug is None:
        crossrefs = Tag(name='no-cross-refs')
    else:
        crossrefs = bs(crossrefs_aug.get_result())
        result.merge(crossrefs_aug)
    if aug0 is not None:
        result.merge(aug0)

    @contextmanager
    def timeit(_):
        yield

    with timeit('manual_join'):

        files_contents = [DocToJoin(*_) for _ in files_contents]

        # cannot use bs because entire document
        with timeit('parsing template'):
            template0 = template
            template = replace_macros(template)
            template_soup = BeautifulSoup(template,
                                          'lxml',
                                          from_encoding='utf-8')
            d = template_soup
            if d.html is None:
                s = "Invalid template"
                raise_desc(ValueError, s, template0=template0)

        with timeit('adding head'):
            assert d.html is not None
            assert '<html' in str(d)
            head = d.find('head')
            if head is None:
                msg = 'Could not find <head> in template:'
                logger.error(msg)
                logger.error(str(d))
                raise Exception(msg)
            assert head is not None
            for x in get_manual_css_frag().contents:
                head.append(x.__copy__())

        with timeit('adding stylesheet'):
            if stylesheet is not None:
                link = Tag(name='link')
                link['rel'] = 'stylesheet'
                link['type'] = 'text/css'
                from mcdp_report.html import get_css_filename
                link['href'] = get_css_filename('compiled/%s' % stylesheet)
                head.append(link)

        with timeit('making basename2soup'):
            basename2soup = OrderedDict()
            for doc_to_join in files_contents:
                if doc_to_join.docname in basename2soup:
                    msg = 'Repeated docname %r' % doc_to_join.docname
                    raise ValueError(msg)
                from .latex.latex_preprocess import assert_not_inside
                if isinstance(doc_to_join.contents, AugmentedResult):
                    result.merge(doc_to_join.contents)
                    contents = doc_to_join.contents.get_result()
                else:
                    contents = doc_to_join.contents
                assert_not_inside(contents, '<fragment')
                assert_not_inside(contents, 'DOCTYPE')

                frag = bs(contents)
                basename2soup[doc_to_join.docname] = frag

        # with timeit('fix_duplicate_ids'):
        # XXX
        # fix_duplicated_ids(basename2soup)

        with timeit('copy contents'):
            body = d.find('body')
            add_comments = False

            for docname, content in basename2soup.items():
                if add_comments:
                    body.append(NavigableString('\n\n'))
                    body.append(
                        Comment('Beginning of document dump of %r' % docname))
                    body.append(NavigableString('\n\n'))

                try_faster = True
                if try_faster:
                    for e in list(content.children):
                        body.append(e.extract())
                else:
                    copy_contents_into(content, body)

                if add_comments:
                    body.append(NavigableString('\n\n'))
                    body.append(Comment('End of document dump of %r' %
                                        docname))
                    body.append(NavigableString('\n\n'))

        with timeit('extract_bibtex_blocks'):
            extract_bibtex_blocks(d)

        with timeit('ID_PUT_BIB_HERE'):

            ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE

            bibhere = d.find('div', id=ID_PUT_BIB_HERE)
            if bibhere is None:
                msg = ('Could not find #%s in document. '
                       'Adding one at end of document.') % ID_PUT_BIB_HERE
                result.note_warning(msg)
                bibhere = Tag(name='div')
                bibhere.attrs['id'] = ID_PUT_BIB_HERE
                d.find('body').append(bibhere)

            do_bib(d, bibhere)

        with timeit('hook_before_final_pass'):
            if hook_before_final_pass is not None:
                hook_before_final_pass(soup=d)

        with timeit('document_final_pass_before_toc'):
            location = LocationUnknown()
            document_final_pass_before_toc(d, remove, remove_selectors, result,
                                           location)

        with timeit('hook_before_toc'):
            if hook_before_toc is not None:
                hook_before_toc(soup=d)

        with timeit('generate_and_add_toc'):
            try:
                generate_and_add_toc(d, raise_error=True, res=result)
            except NoTocPlaceholder as e:
                if require_toc_placeholder:
                    msg = 'Could not find toc placeholder: %s' % e
                    # logger.error(msg)
                    if aug0 is not None:
                        result.note_error(msg)
                    else:
                        raise Exception(msg)

        with timeit('document_final_pass_after_toc'):
            document_final_pass_after_toc(
                soup=d,
                crossrefs=crossrefs,
                resolve_references=resolve_references,
                res=result)

        if extra_css is not None:
            logger.info('adding extra CSS')
            add_extra_css(d, extra_css)

        with timeit('document_only_once'):
            document_only_once(d)

        location = LocationUnknown()
        substitute_github_refs(d, defaults={}, res=result, location=location)

        with timeit('another A pass'):
            for a in d.select('a[href]'):
                href = a.attrs['href']
                if href in references:
                    r = references[href]
                    a.attrs['href'] = r.url
                    if not a.children:  # empty
                        a.append(r.title)

        # do not use to_html_stripping_fragment - this is a complete doc
        # mark_in_html(result, soup=d)

        add_github_links_if_edit_url(soup=d, permalink_prefix=permalink_prefix)

        with timeit('converting to string'):
            res = unicode(d)

        with timeit('encoding'):
            res = res.encode('utf8')

        logger.info('done - %.1f MB' % (len(res) / (1024 * 1024.0)))

        result.set_result(res)
        return result
Exemple #21
0
def check_no_headers_inside_div(x):
    if x.name == 'div' and list(x.find_all(['h1', 'h2', 'h3', 'h4', 'h5'])):
        msg = 'There are headers inside this <div>'
        logger.error(msg)