コード例 #1
0
def generate_content_test(filename):
    filename_output_html = filename + '.metahtml.html'
    filename_output_text = filename + '.metahtml.text'

    #if os.path.isfile(filename_output_text):
    #return

    urlish = os.path.dirname(filename).split('/')[-1]
    urlish = urlish.replace('___', '://').replace('_', '/')

    with open(filename) as f:
        html = f.read()
        meta = metahtml.parse_all(html, urlish, fast=False)
        with open(filename_output_html, 'w') as fout:
            html_metahtml = meta['content']['value']['html']
            fout.write(html_metahtml)
        with open(filename_output_text, 'w') as fout:
            fout.write(meta['content']['value']['text'])

    # generate the diff
    with open(filename + '.newspaper3k.html') as f:
        html_newspaper = f.read()

    with open(filename + '.diff.html', 'w') as f:
        html_diff = '<style> ins { background-color: #00ff00; }\ndel {background-color: #ff0000} </style>'
        html_diff += htmldiff(html_newspaper, html_metahtml)
        f.write(html_diff)
コード例 #2
0
    def test_basic(self):
        html = load_regression_data('basic-multi-page.html')
        urldict = self._make_basic_urldict()
        fetcher = urlfetch.MockUrlFetch(urldict)
        options = {
                'url': 'http://basic.com/article.html',
                'multipage': True,
                'urlfetch': fetcher
                }
        doc = Document(html, **options)
        res = doc.summary_with_metadata()

        self.assertIn('Page 2', res.html, 'Should find the page 2 heading')
        self.assertIn('Page 3', res.html, 'Should find the page 3 heading')

        expected_html = load_regression_data('basic-multi-page-expected.html')
        diff_html = htmldiff(expected_html, res.html)
        diff_doc = document_fromstring(diff_html)

        insertions = diff_doc.xpath('//ins')
        deletions = diff_doc.xpath('//del')

        if len(insertions) != 0:
            for i in insertions:
                print('unexpected insertion: %s' % i.xpath('string()'))
            self.fail('readability result does not match expected')

        if len(deletions) != 0:
            for i in deletions:
                print('unexpected deletion: %s' % i.xpath('string()'))
            self.fail('readability result does not match expected')
コード例 #3
0
ファイル: _diffcommand.py プロジェクト: 15580056814/hue
def main(args=None):
    if args is None:
        args = sys.argv[1:]
    options, args = parser.parse_args(args)
    if options.annotation:
        return annotate(options, args)
    if len(args) != 2:
        print 'Error: you must give two files'
        parser.print_help()
        sys.exit(1)
    file1, file2 = args
    input1 = read_file(file1)
    input2 = read_file(file2)
    body1 = split_body(input1)[1]
    pre, body2, post = split_body(input2)
    result = htmldiff(body1, body2)
    result = pre + result + post
    if options.output == '-':
        if not result.endswith('\n'):
            result += '\n'
        sys.stdout.write(result)
    else:
        f = open(options.output, 'wb')
        f.write(result)
        f.close()
コード例 #4
0
def detect_change(file_name: str, new_content: str) -> Optional[str]:
    """Detect if the content in a file differs from a given string.

    Args:
        file_name: The file that contains the string for comparison.
        new_content: The string that should be compared with the file contained string.

    Returns:
        The difference between the contents if there was a change, else None.
    """
    difference_was_detected = False
    change = None
    if os.path.isfile(file_name):
        with open(file_name, "r") as file:
            old_content = file.read()
            if old_content != new_content:
                difference_was_detected = True
                change = htmldiff(old_content, new_content)
    else:
        difference_was_detected = True
        change = new_content

    if difference_was_detected:
        with open(file_name, "w") as file:
            file.write(new_content)
    return change
コード例 #5
0
ファイル: _diffcommand.py プロジェクト: kumarremoa/Goferbot
def main(args=None):
    if args is None:
        args = sys.argv[1:]
    options, args = parser.parse_args(args)
    if options.annotation:
        return annotate(options, args)
    if len(args) != 2:
        print 'Error: you must give two files'
        parser.print_help()
        sys.exit(1)
    file1, file2 = args
    input1 = read_file(file1)
    input2 = read_file(file2)
    body1 = split_body(input1)[1]
    pre, body2, post = split_body(input2)
    result = htmldiff(body1, body2)
    result = pre + result + post
    if options.output == '-':
        if not result.endswith('\n'):
            result += '\n'
        sys.stdout.write(result)
    else:
        f = open(options.output, 'wb')
        f.write(result)
        f.close()
コード例 #6
0
	def post(self):
		v_o_id=self.request.get('v_o_id')
		v_t_id=self.request.get('v_t_id')
		title = permission(v_o_id)
		p = permission(v_t_id)
		if title == False or p == False:
			return
		version_one = self.request.get('v_o')
		version_two = self.request.get('v_t')
		r_one = models.ScriptData.get_version(v_o_id, version_one)
		r_two = models.ScriptData.get_version(v_t_id, version_two)
		v = ['s','a','c','d','p','t']

		def to_html(raw_data):
			j = simplejson.loads(raw_data)
			s = StringIO.StringIO()
			for text, line_format in j:
				text = cgi.escape(text, quote=True)
				s.write("<p class='"+v[line_format]+"'>"+text+"</p>")
			return s.getvalue()

		s_one = to_html(r_one.data)
		s_two = to_html(r_two.data)
		content = htmldiff(s_one, s_two)
		self.response.headers['Content-Type']='text/html'
		self.response.out.write(content)
コード例 #7
0
ファイル: views.py プロジェクト: karalics/djangocms-workflows
    def get_context_data(self, **kwargs):
        context = super(DiffView, self).get_context_data(**kwargs)

        page = get_object_or_404(Page, pk=self.pk)
        public_page = self.render_page_placeholders(page.get_public_object(),
                                                    self.request)
        draft_page = self.render_page_placeholders(page.get_draft_object(),
                                                   self.request)

        diffs = []
        for slot, public_rendered in public_page.items():
            draft_rendered = draft_page.pop(slot, [])

            diff = htmldiff(public_rendered, draft_rendered)
            tree = parse_html(diff, cleanup=False)

            for item in tree.xpath("//ins | //del"):
                if len(item):
                    continue

                content = item.text
                if not (content and content.strip()):
                    item.getparent().remove(item)

            diffs.append(etree.tostring(tree, method='html'))

        context.update({
            'title': _('Show current changes'),
            'diffs': diffs,
        })

        return context
コード例 #8
0
def show_entry_history(request, entry_id):
    """
    Display a page with two version of the entries, compared with each other, with highlighted changes.
    """
    if not is_contributor(request):
        messages.warning(request, _('This page is for contributors only.'))
        return redirect('index')

    entry = get_object_or_404(Entry, pk=entry_id)
    version_1 = request.GET.get('version_1', False)
    version_2 = request.GET.get('version_2', False)

    if version_2:
        newer = get_object_or_404(EntryVersion, pk=version_2, entry_id=entry_id)
    else:
        newer = entry.versions.last()

    if version_1:
        older = get_object_or_404(EntryVersion, pk=version_1, entry_id=entry_id)
    else:
        if entry.versions.count() > 1:
            older = entry.versions.filter(is_approved=True).exclude(pk=newer.pk).last()
            if older is None:
                older = entry.versions.exclude(pk=newer.pk).last()
        else:
            older = entry.versions.last()

    if older is None or newer is None:
        raise Http404

    if newer.date < older.date:
        newer, older = older, newer

    def make_html(version):
        html = ['<article class="entry-article w3-display-container w3-border w3-card">']
        for line in version.lines.all():
            html.append("<h3>{}</h3>".format(line.speaker))
            html.append(line.text)
        if version.note:
            html.append('<small class="footnote">Footnote: {}</small>'.format(version.note))

        html.append("</article>")
        return "".join(html)

    newer_html = make_html(newer)
    older_html = make_html(older)

    html_diff = htmldiff(older_html, newer_html)

    return render(request, "palanaeum/staff/entry_history.html", {
        'newer_version': newer,
        'newer_html': newer_html,
        'html_diff': html_diff,
        'older_version': older,
        'older_html': older_html,
        'entry': entry,
        'all_versions': EntryVersion.objects.filter(entry=entry),
        'snippets': Snippet.all_visible.filter(entry=entry),
        'images': ImageSource.all_visible.filter(entry=entry)
    })
コード例 #9
0
ファイル: views.py プロジェクト: vishbin/soclone
def question_revisions(request, question_id):
    """Revision history for a Question."""
    question = get_object_or_404(Question, id=question_id)
    revisions = list(question.revisions.all())
    populate_foreign_key_caches(User, ((revisions, ('author', )), ),
                                fields=('username', 'gravatar', 'reputation',
                                        'gold', 'silver', 'bronze'))
    for i, revision in enumerate(revisions):
        revision.html = QUESTION_REVISION_TEMPLATE % {
            'title':
            revision.title,
            'html':
            sanitize_html(markdowner.convert(revision.text)),
            'tags':
            ' '.join([
                '<a class="tag">%s</a>' % tag
                for tag in revision.tagnames.split(' ')
            ]),
        }
        if i > 0:
            revisions[i - 1].diff = htmldiff(revision.html,
                                             revisions[i - 1].html)
    return render_to_response('question_revisions.html', {
        'title': u'Question Revisions',
        'question': question,
        'revisions': revisions,
    },
                              context_instance=RequestContext(request))
コード例 #10
0
ファイル: diff.py プロジェクト: mattbierner/post-mortem
def diff_html(original_content, input_file):
    """
    Generate a html diff between two html files by adding
    `<ins>` and `<del>` tags.
    """
    with codecs.open(input_file, 'r', 'utf-8') as right:
        right_content = right.read()

    content = htmldiff(
        original_content,
        right_content).encode('utf-8')

    soup = BeautifulSoup(content, 'lxml')

    # Remove link: additions
    for a in soup.findAll(['a']):
        if a.text and re.search(r'\bLink:\s.+$', a.text.encode('utf-8'), re.MULTILINE | re.UNICODE):
            a.string = re.sub(
                r'\bLink:\s.+$', u'', a.text, re.MULTILINE | re.UNICODE)

    # Remove empty tags
    for ins in soup.findAll(['ins', 'del']):
        if re.match(r'^\s*$', ins.text):
            ins.extract()

    result = []
    for element in soup.body.contents:
        if hasattr(element, 'prettify'):
            result.append(element.prettify())
        elif element and unicode(element) and not re.match(r'^\s*$', unicode(element)):
            result.append(unicode(element))

    return ''.join(result).encode('utf-8')
コード例 #11
0
ファイル: article.py プロジェクト: betagouv/zam
 def render_details(self) -> str:
     return Markup(
         clean_html(
             htmldiff(
                 self.template_vars["old_value"], self.template_vars["new_value"]
             )
         )
     )
コード例 #12
0
ファイル: models.py プロジェクト: classifaddict/classifactory
 def text_diff(self):
     diffs = self.tree2_diffs.filter(is_texts_diff=True)
     if diffs.exists():
         diff = diffs.first()
         return htmldiff(
             diff.treenode1.element.text.texts_html(),
             diff.treenode2.element.text.texts_html()
         )
     return None
コード例 #13
0
ファイル: models.py プロジェクト: classifaddict/classifactory
 def attrs_diff(self):
     diffs = self.tree2_diffs.filter(is_attrs_diff=True)
     if diffs.exists():
         diff = diffs.first()
         return htmldiff(
             diff.treenode1.element.attributes_html(),
             diff.treenode2.element.attributes_html()
         )
     return None
コード例 #14
0
ファイル: fsin_parser.py プロジェクト: filaPro/fsin_parser
 def get_diff_of_pages(self, page1, page2):
     try:
         diff = HTMLParser.HTMLParser().unescape(
             htmldiff(page1.decode("utf-8"), page2.decode("utf-8")).encode("utf-8")
         )
         diff = diff.replace("<del>", '<del style="color:red">')
         diff = diff.replace("<ins>", '<ins style="color:green">')
         return HTMLParser.HTMLParser().unescape(diff)
     except Exception as e:
         self.log("get_diff_of_pages: HTML Error " + str(e))
コード例 #15
0
ファイル: helper.py プロジェクト: heejongahn/dodotable
def compare_html(actual, expected):
    _actual = removed_spaces(actual)
    _expected = removed_spaces(expected)
    diff = html_unquote(htmldiff(_actual, _expected))
    for i, (a, e) in enumerate(zip(_actual, _expected)):
        if a != e:
            print(i)
            break
    assert diff == _actual, _actual[:i]
    return diff == _actual
コード例 #16
0
 def get_diff_of_pages(self, page1, page2):
     try:
         diff = HTMLParser.HTMLParser().unescape(
             htmldiff(page1.decode('utf-8'),
                      page2.decode('utf-8')).encode('utf-8'))
         diff = diff.replace('<del>', '<del style="color:red">')
         diff = diff.replace('<ins>', '<ins style="color:green">')
         return HTMLParser.HTMLParser().unescape(diff)
     except Exception as e:
         self.log('get_diff_of_pages: HTML Error ' + str(e))
コード例 #17
0
ファイル: views.py プロジェクト: danielhjames/Booktype
def unified_diff(content1, content2):
    try:
        content1 = clean_chapter_html(content1, clean_comments_trail=True)
        content2 = clean_chapter_html(content2, clean_comments_trail=True)
    except Exception as e:
        logger.error('ERROR while cleaning content %s. Rev 1: %s Chapter: %s' % (
            e, content1, content2))
        return {"result": False}

    diff = htmldiff(content1, content2)
    return diff
コード例 #18
0
def _diff_elements(old, new):
    """
    Diff the contents of two Beatiful Soup elements. Note that this returns
    the "new" element with its content replaced by the diff.
    """
    if not old or not new:
        return ''
    result_element = copy.copy(new)
    result_element.clear()
    result_element.append(htmldiff(str(old), str(new)))
    return result_element
コード例 #19
0
ファイル: wiki.py プロジェクト: B-Rich/wiki
def visual_diff(revision):
    target = repo[revision]
    parent = target.parents[0]

    tree = target.tree
    parent_tree = target.parents[0].tree

    diff = parent_tree.diff(tree)
    patches = list(diff)
    filename = get_current_name(tree, patches)
    name = filename[:-4]
    target_html = get_html_revision(name, revision, False)

    if len(patches) == 2:
        return {"patch": htmldiff(target_html, target_html)}

    parent_html = (get_html_revision(name, parent.hex, False)
                   if filename in parent_tree else "")

    return {"patch": htmldiff(parent_html, target_html)}
コード例 #20
0
def unified_diff(content1, content2):
    try:
        content1 = clean_chapter_html(content1, clean_comments_trail=True)
        content2 = clean_chapter_html(content2, clean_comments_trail=True)
    except Exception as e:
        logger.error('ERROR while cleaning content %s. Rev 1: %s Chapter: %s' % (
            e, content1, content2))
        return {"result": False}

    diff = htmldiff(content1, content2)
    return diff
コード例 #21
0
ファイル: lxmldiff.py プロジェクト: ianb/misc-recipes
def compare_pages(url1, url2, selector='body div'):
    basis = parse(url1).getroot()
    basis.make_links_absolute()
    other = parse(url2).getroot()
    other.make_links_absolute()
    el1 = basis.cssselect(selector)[0]
    el2 = other.cssselect(selector)[0]
    diff_content = htmldiff(tostring(el1), tostring(el2))
    diff_el = fromstring(diff_content)
    el1.getparent().insert(el1.getparent().index(el1), diff_el)
    el1.getparent().remove(el1)
    return basis
コード例 #22
0
ファイル: DomComparator.py プロジェクト: dip-kush/CrawlerUI
def checkExistState(dom1,dom2):
    if hash(dom1) == hash(dom2):
        return True
    else:
        tagCount1, strippedDom1 = traverseDom(dom1)
        tagCount2, strippedDom2 = traverseDom(dom2)
        
        mintagCount = min(tagCount1,tagCount2)
        maxtagCount = max(tagCount1,tagCount2)
        if float(mintagCount)/float(maxtagCount) < 0.9:
            logger.info("Different States Huge Difference in Tag Count")
            return False
        diff1 = htmldiff(strippedDom1, strippedDom2)
        diff2 = htmldiff(strippedDom2,strippedDom1)
        if len(diff1) > len(diff2):
            diff = diff1
        else:
            diff = diff2

        bdiff = BeautifulSoup(diff)
        ins = ''.join(str(bdiff.findAll("ins")))
        delete = ''.join(str(bdiff.findAll("del")))
        print cleanDom(delete)
        diffDom = cleanDom(ins)
        print diffDom

        if diffDom!="[]":
            diffTagCount,diffStrippedDom = traverseDom(diffDom)
        else:
            if hash(strippedDom1) ==  hash(strippedDom2):
                return True
            else:
                return False
        logger.info("tag count %d %d" % (diffTagCount, tagCount1))
        if (float(diffTagCount)/float(tagCount1))*100 > 5:
            return False
        logger.info("STATE ALREADY EXIST")
        #print dom1
        #print dom2
        return True
コード例 #23
0
def highlight_edits(new_html: str, old_html: str) -> str:
    # Don't include `Edit:` text in diff.
    if old_html.startswith("<u>Edit:</u> "):
        old_html = old_html[len("<u>Edit:</u> "):]

    # Generate diff with lxml
    new_html = htmldiff(old_html, new_html)

    # Replace <ins> with <u> since Riot doesn't allow <ins>
    new_html = new_html.replace("<ins>", "<u>").replace("</ins>", "</u>")
    # Remove <del>s since we just want to hide deletions.
    new_html = re.sub("<del>.+?</del>", "", new_html)
    return new_html
コード例 #24
0
    def text_section_update_definitions_if_new(self, message: Dict):
        text_section = TextSection.objects.get(pk=message['text_section_pk'])

        old_html = fragment_fromstring(message['old_body'],
                                       create_parent='div')
        new_html = fragment_fromstring(text_section.body, create_parent='div')

        if htmldiff(old_html, new_html):
            logger.info(
                f'Found new body in text section pk={message["text_section_pk"]}'
            )

            text_section.update_definitions()
コード例 #25
0
ファイル: utils.py プロジェクト: pimentech/notesgroup
def note_diff(note1, note2):
    context = { 'note': note1, 
                'last_note' : note2,
                'note_src': note2.uid, 
                'http_root':settings.HTTP_ROOT }
    r1 = render_to_string('notesgroup/note_view.html', context)
    context['note'] = note2
    r2 = render_to_string('notesgroup/note_view.html', context)
    try:
        content = htmldiff(r1, r2)
    except KeyError:
        content = r2
    content = content.replace('class="group"', 'style="border:1px solid #CCCCCC; margin:1em 0 0; padding:0 1em;"')
    return content
コード例 #26
0
ファイル: subjectpage.py プロジェクト: nous-consulting/ututi
 def diff_with_previous(self, subject, page):
     c.breadcrumbs = [{'link': subject.url(),
                       'title': subject.title},
                      {'link': page.url(),
                       'title': page.title}]
     if page not in subject.pages:
         abort(404)
     version_id = int(request.GET['version_id'])
     c.version = PageVersion.get(version_id)
     idx = page.versions.index(c.version)
     c.prev_version = page.versions[idx+1]
     c.diff = literal(htmldiff(html_cleanup(c.prev_version.content),
                               html_cleanup(c.version.content)))
     return render('page/diff_with_previous.mako')
コード例 #27
0
ファイル: html_diff.py プロジェクト: 284928489/zulip
def highlight_html_differences(s1: str, s2: str, msg_id: Optional[int]=None) -> str:
    retval = htmldiff(s1, s2)
    fragment = lxml.html.fromstring(retval)

    for elem in fragment.cssselect('del'):
        elem.tag = 'span'
        elem.set('class', 'highlight_text_deleted')

    for elem in fragment.cssselect('ins'):
        elem.tag = 'span'
        elem.set('class', 'highlight_text_inserted')

    retval = lxml.html.tostring(fragment)

    return retval
コード例 #28
0
def difference2(current, old):
    if (current.revision == "1"):
        return current.html
    elif parse_xml(old.json_work_item_revision_text) == parse_xml(
            current.json_work_item_revision_text):
        return "<form id=err>No steps changes in the revision %s</form>" % current.revision + parse_xml(
            current.json_work_item_revision_text)
    else:
        old = old.html
        new = current.html
        diff_html = htmldiff(old, new)
        diff_html = diff_html.replace("<del>", "<del><font color=red>")
        diff_html = diff_html.replace("</del>", "</del></font>")
        diff_html = diff_html.replace("<ins>", "<ins><font color=green>")
        diff_html = diff_html.replace("</ins>", "</ins></font>")
        return diff_html
コード例 #29
0
ファイル: html_diff.py プロジェクト: brockwhittaker/zulip
def highlight_html_differences(s1, s2, msg_id=None):
    # type: (str, str, Optional[int]) -> str
    retval = htmldiff(s1, s2)
    fragment = lxml.html.fromstring(retval)  # type: ignore # https://github.com/python/typeshed/issues/525

    for elem in fragment.cssselect('del'):
        elem.tag = 'span'
        elem.set('class', 'highlight_text_deleted')

    for elem in fragment.cssselect('ins'):
        elem.tag = 'span'
        elem.set('class', 'highlight_text_inserted')

    retval = lxml.html.tostring(fragment)   # type: ignore # https://github.com/python/typeshed/issues/525

    return retval
コード例 #30
0
def highlight_html_differences(s1: str,
                               s2: str,
                               msg_id: Optional[int] = None) -> str:
    retval = htmldiff(s1, s2)
    fragment = lxml.html.fromstring(retval)

    for elem in fragment.cssselect('del'):
        elem.tag = 'span'
        elem.set('class', 'highlight_text_deleted')

    for elem in fragment.cssselect('ins'):
        elem.tag = 'span'
        elem.set('class', 'highlight_text_inserted')

    retval = lxml.html.tostring(fragment)

    return retval
コード例 #31
0
    def process_response(self,before_data,after_data, process_type = "injection", payloadpattern=""):
        diff_data = bs(str(htmldiff(after_data,before_data).split("<del>")[1:-1]))

        #pdb.set_trace()

        if process_type == "injection":
            if re.search(r'[sS][qQ][lL]',diff_data.text):
                try:
                    print after_data.geturl()
                except:
                    pass
                #pdb.set_trace()

            if payloadpattern != "":
                if re.search(payloadpattern,after_data):
                    print "YESS YOU GOT IT"
                    pdb.set_trace()
コード例 #32
0
def highlight_html_differences(s1: str,
                               s2: str,
                               msg_id: Optional[int] = None) -> str:
    retval = htmldiff(s1, s2)
    fragment = lxml.html.fromstring(retval)

    for elem in fragment.cssselect("del"):
        elem.tag = "span"
        elem.set("class", "highlight_text_deleted")

    for elem in fragment.cssselect("ins"):
        elem.tag = "span"
        elem.set("class", "highlight_text_inserted")

    retval = lxml.html.tostring(fragment, encoding="unicode")

    return retval
コード例 #33
0
    def test_new_render_view(self):
        """
        Test final html rendered by comparing with a reference file.
        Write the result into ./rendered/export.html
        """
        ref_render_html = (Path(__file__).resolve().parent / 'new_ref_render.html').read_text()

        # this was a try to test against an another file. I won't say it was not working, but yeah, manual
        # verification was really more helpful
        assert True
        return
        self.longMessage = False
        self.assertEqual(
            ref_render_html,
            self.rendered_html,
            htmldiff(ref_render_html, self.rendered_html)
        )
        self.longMessage = True
コード例 #34
0
def extract_features(source1,source2):
    source1 = gzip.open(source1).read()
    source2 = gzip.open(source2).read()
    soup = BeautifulSoup(htmldiff(source1,source2),'html.parser')
    inserts = []
    for ins in soup.find_all('ins'):
        ins = str(ins)[5:-6].strip() 
        if '<' in ins and '>' in ins:
            if not ins.startswith('<'):
                ins = ins[ins.index('<'):]
            if not ins.endswith('>'):
                ins = ins[:ins.index('>')]

            ins = re.sub(r'\s+', '', ins)
            ins = re.sub(r'".*"', '""', ins)
            ins = re.sub(r'>.*<','><', ins)
            inserts.append(ins)

    return ' '.join(inserts)
コード例 #35
0
ファイル: views.py プロジェクト: VineGlobal/soclone
def answer_revisions(request, answer_id):
    """Revision history for an Answer."""
    answer = get_object_or_404(Answer, id=answer_id)
    revisions = list(answer.revisions.all())
    populate_foreign_key_caches(User, ((revisions, ('author',)),),
         fields=('username', 'gravatar', 'reputation', 'gold', 'silver',
                 'bronze'))
    for i, revision in enumerate(revisions):
        revision.html = QUESTION_REVISION_TEMPLATE % {
            'html': sanitize_html(markdowner.convert(revision.text)),
        }
        if i > 0:
            revisions[i - 1].diff = htmldiff(revision.html,
                                             revisions[i - 1].html)
    return render_to_response('answer_revisions.html', {
        'title': u'Answer Revisions',
        'answer': answer,
        'revisions': revisions,
    }, context_instance=RequestContext(request))
コード例 #36
0
def highlight_html_differences(s1, s2, msg_id=None):
    # type: (str, str, Optional[int]) -> str
    retval = htmldiff(s1, s2)
    fragment = lxml.html.fromstring(
        retval)  # type: ignore # https://github.com/python/typeshed/issues/525

    for elem in fragment.cssselect('del'):
        elem.tag = 'span'
        elem.set('class', 'highlight_text_deleted')

    for elem in fragment.cssselect('ins'):
        elem.tag = 'span'
        elem.set('class', 'highlight_text_inserted')

    retval = lxml.html.tostring(
        fragment
    )  # type: ignore # https://github.com/python/typeshed/issues/525

    return retval
コード例 #37
0
    def get_diff(self):
        """Get diff between the latest to website.check_files"""
        hashes = []
        for _check_file in [self.check_files[-2], self.check_files[-1]]:
            soup = _check_file.soup

            if self.css_selector:
                cont = soup.select(self.css_selector)
                if len(cont) > 1:
                    sys.exit('!! selector not unique')
                if not cont:
                    sys.exit(f"!! selector '{self.css_selector}' no results")
                cont = cont[0]
            else:
                cont = soup.html()

            hashes.append(str(cont))

        diff = htmldiff(hashes[0], hashes[1])
        return BeautifulSoup(diff, 'lxml'), diff
コード例 #38
0
def check_page(url):
    diff = None
    old_version = load_previous(url)
    new_version = load_current(url)
    if new_version is None:
        return None

    if old_version != new_version and not any([old_version is None, new_version is None]):
        diff = htmldiff(old_version, new_version)
        if new_version is not None:
            save_version(url, new_version)
    elif old_version is None and new_version is not None:
        save_version(url, new_version)
        logging.info('No previous version of page found for url: {}'.format(url))
    elif new_version is None:
        # There was an error
        logging.error('There was an error retrieving new version of url, see requests log: {}'.format(url))
    else:
        logging.info("No change in page found: {}".format(url))
    return diff
コード例 #39
0
ファイル: views.py プロジェクト: vishbin/soclone
def answer_revisions(request, answer_id):
    """Revision history for an Answer."""
    answer = get_object_or_404(Answer, id=answer_id)
    revisions = list(answer.revisions.all())
    populate_foreign_key_caches(User, ((revisions, ('author', )), ),
                                fields=('username', 'gravatar', 'reputation',
                                        'gold', 'silver', 'bronze'))
    for i, revision in enumerate(revisions):
        revision.html = QUESTION_REVISION_TEMPLATE % {
            'html': sanitize_html(markdowner.convert(revision.text)),
        }
        if i > 0:
            revisions[i - 1].diff = htmldiff(revision.html,
                                             revisions[i - 1].html)
    return render_to_response('answer_revisions.html', {
        'title': u'Answer Revisions',
        'answer': answer,
        'revisions': revisions,
    },
                              context_instance=RequestContext(request))
コード例 #40
0
ファイル: losalamosmtgs.py プロジェクト: sindependent/scripts
def diffhtml(before_html, after_html, title=None):
    """Diffs the two files, and returns an html fragment that wraps
       differences in <ins> or <del> tags, which you can style as desired.
       Returns bytes, not str, because everything else works in bytes
       due to using requests.
    """
    if not title:
        title = "Changed Agenda"

    # lxml.html.htmldiff only accepts strings, not bytes, but these
    # were read in as bytes because that's what comes from requests;
    # so translate them.
    if type(before_html) is bytes:
        before_html = before_html.decode()
    if type(after_html) is bytes:
        after_html = after_html.decode()

    # lxml.html.htmldiff returns fragments, not full documents.
    # So add a header that includes a style for ins and del.
    diff = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>%s</title>
<style>
ins { background: #9ff; }
del { background: #fbb; }
</style>
</head>

<body>
<h1>%s</h1>
''' % (title, title)

    diff += htmldiff(before_html, after_html)

    diff += "\n</body></html>\n"

    # encode to return bytes.
    return diff.encode()
コード例 #41
0
def question_revisions(request, question_id):
    question = get_object_or_404(Question, id=question_id)
    revisions = list(question.revisions.all())
    populate_foreign_key_caches(User, ((revisions, ('author',)),),
         fields=('username', 'gravatar', 'reputation', 'gold', 'silver',
                 'bronze'))
    for i, revision in enumerate(revisions):
        revision.html = QUESTION_REVISION_TEMPLATE % {
            'title': revision.title,
            'html': sanitize_html(markdowner.convert(revision.text)),
            'tags': ' '.join(['<a class="tag">%s</a>' % tag
                              for tag in revision.tagnames.split(' ')]),
        }
        if i > 0:
            revisions[i - 1].diff = htmldiff(revision.html,
                                             revisions[i - 1].html)
    return render_to_response('question_revisions.html', {
        'title': u'Question Revisions',
        'question': question,
        'revisions': revisions,
    }, context_instance=RequestContext(request))
コード例 #42
0
def diff_texts(text1, text2):
    # differ = dmp.diff_match_patch()

    # Remove HTML tags and duplicate \n if specified (helps to ignore diff plugin ids)
    if REVERSION2_DIFF_TEXT_ONLY:
        text1 = BeautifulSoup(text1, features="lxml").get_text()
        text2 = BeautifulSoup(text2, features="lxml").get_text()

    if REVERSION2_IGNORE_WHITESPACE:
        text1 = re.sub(r'\n+', '\n', text1).strip()
        text2 = re.sub(r'\n+', '\n', text2).strip()

    # diffs = differ.diff_main(text1, text2)
    # differ.diff_cleanupEfficiency(diffs)
    #
    # diffs = revert_escape(differ.diff_prettyHtml(diffs))

    from lxml.html.diff import htmldiff
    diffs = htmldiff(text1, text2)

    return diffs
コード例 #43
0
ファイル: swchook.py プロジェクト: Xarthisius/polyphemus
    def _generate_diffs(self):
        self._updater.update(
            status='pending', 
            description="Creating head and base website diffs.")

        for f in self._files:
            froot, fext = os.path.splitext(f)
            if fext not in HTML_EXTS:
                f = froot + '.html'
            f = os.path.join("_site", f)
            fpath, fname = os.path.split(f)

            head = os.path.join(self._head_dir, f)
            base = os.path.join(self._base_dir, f)
            diff = os.path.join(self._head_dir, fpath, "diff-" + fname)

            # if addition or deletion, just skip
            if not os.path.isfile(head) or not os.path.isfile(base):
                continue

            with open(base, 'r') as f:
                doc1 = lxml.html.parse(f)
 
            with open(head, 'r') as f:
                doc2 = lxml.html.parse(f)
 
            doc1body = doc1.find('body')
            doc2body = doc2.find('body')

            bodydiff = htmldiff(lxml.html.tostring(doc1body, encoding='utf-8').decode('utf-8'),
                                lxml.html.tostring(doc2body, encoding='utf-8').decode('utf-8'))
            doc2head = doc2.find('head')
            add_stylesheet(doc2head)
            diffdoc = u'<html>\n{0}\n<body>\n{1}\n</body>\n</html>'
            diffdoc = diffdoc.format(lxml.html.tostring(doc2head, encoding='utf-8').decode('utf-8'), bodydiff)

            with io.open(diff, 'wb') as f:
                f.write(diffdoc.encode('utf-8'))
            print("diff'd {0!r}".format(diff))
コード例 #44
0
ファイル: librarian.py プロジェクト: justzx2011/Robottke
def diff_rss(url, name, limit=-1):  
    rss = feedparser.parse(url)
    links = {}
    #print rss
    if limit==-1 or limit > len(rss.entries):
        limit = len(rss.entries)
    first_index = get_first_index(rss)
    for i in range(first_index, limit+1):
        links[rss.entries[i].link] = []
        post1 = rss.entries[i-1].link
        if i == limit:
            post2 = rss.entries[first_index-1].link
        else:
            post2 = rss.entries[i].link
        print post2
        diffh = htmldiff(get_content(post1)["body"], get_content(post2)["body"])
        tree = etree.parse(StringIO.StringIO(diffh), parser)
        diff = tree.xpath("//ins//@href")
        for d in diff:
            if urlparse(d).netloc != urlparse(rss.feed.link).netloc and urlparse(d).path != '/':
                links[rss.entries[i].link].append(d)
    return links
コード例 #45
0
def difference2(test_case_id, test_case_rev):
    if (test_case_rev == "1"):
        return parse_xml(test_case_id, test_case_rev)
    elif parse_xml(test_case_id, str(
        (int(test_case_rev) - 1))) == parse_xml(test_case_id, test_case_rev):
        return "<form id=err>No steps changes in the revision %s</form>" % test_case_rev + parse_xml(
            test_case_id, test_case_rev)
    else:
        old = parse_xml(test_case_id, str((int(test_case_rev) - 1)))
        new = parse_xml(test_case_id, test_case_rev)
        diff_html = htmldiff(old, new)
        diff_html = diff_html.replace("<del>", "<del><font color=red>")
        diff_html = diff_html.replace("</del>", "</del></font>")
        diff_html = diff_html.replace("<ins>", "<ins><font color=green>")
        diff_html = diff_html.replace("</ins>", "</ins></font>")
        return diff_html


# print(parse_xml("409770","11"))

# print(get_t_c_rev("409770","32"))

# print(parse_html("446114 ", "18"))
コード例 #46
0
ファイル: smartread.py プロジェクト: justzx2011/Robottke
def smart_read(url):
    resp = urllib2.urlopen(url)
    #resolve url
    url = resp.url
    domain = urlparse(url).netloc
    path = urlparse(url).path
    
    html = resp.read()
    tree = etree.parse(StringIO.StringIO(html), parser)
    links = tree.xpath("//body//@href")
    nmax = 0
    for link in links:
        if urlparse(link).netloc == domain:
            ng = NGram.compare(urlparse(link).path,path)
            #print link,ng
            if ng > nmax and ng < 1:
                nmax = ng
                mirror = link
    diffh = htmldiff(visit_page(url)["body"], visit_page(mirror)["body"])
    tree = etree.parse(StringIO.StringIO(diffh), parser)
    diff = tree.xpath("//ins//text()")
    for d in diff:
        print d
コード例 #47
0
def getHtmlDiff(dom1, dom2, tagCount1, tagCount2):
    diff1 = htmldiff(dom1, dom2)
    #diff2 = htmldiff(dom2 ,dom1)
    #print diff1
    '''
    if len(diff1) > len(diff2):
        diff = diff1
        tagCount = tagCount1
    else:
        diff = diff2
        tagCount = tagCount2
    '''
    diff = diff1
    tagCount = tagCount1
    #diff = diff1
    bdiff = BeautifulSoup(diff)
    ins = ''.join(str(bdiff.findAll("ins")))
    print ins
    delete = ''.join(str(bdiff.findAll("del")))
    print cleanDom(delete)
    diffDom = cleanDom(ins)
    print diffDom
    return diffDom
コード例 #48
0
ファイル: htmldiff.py プロジェクト: Spaceghost/magmawiki
#!/usr/bin/env python
from sys import argv
from lxml.html.diff import htmldiff

def help():
  print "----------------------------------"
  print "          An diff tool!"  
  print "----------------------------------"
  print
  print "Usage: <file_a> <file_b>"
  print "Output: HTML"
  print
  print "Example output:"
  print "    <ins>hello</ins><del>goodbye cruel</del> world"
  print
  

if __name__ == "__main__":
  
  if len(argv) != 3:
    help()
    exit(1)
    
  else:
    file_a, file_b = argv[1:]
    
    a = open(file_a).read()
    b = open(file_b).read()
    
    print htmldiff(a, b)
コード例 #49
0
ファイル: views.py プロジェクト: GunioRobot/hubplus
def htmldiffer(ver_1, ver_2):
    content = htmldiff(ver_2.content, ver_1.content)
    license = htmldiff(ver_2.license, ver_1.license)
    title = htmldiff(ver_2.title, ver_1.title)
    return {'content': content, 'license':license, 'title':title}
コード例 #50
0
ファイル: notifications.py プロジェクト: buxx/tracim
    def _build_email_body(self, mako_template_filepath: str, role: UserRoleInWorkspace, content: Content, actor: User) -> str:
        """
        Build an email body and return it as a string
        :param mako_template_filepath: the absolute path to the mako template to be used for email body building
        :param role: the role related to user to whom the email must be sent. The role is required (and not the user only) in order to show in the mail why the user receive the notification
        :param content: the content item related to the notification
        :param actor: the user at the origin of the action / notification (for example the one who wrote a comment
        :param config: the global configuration
        :return: the built email body as string. In case of multipart email, this method must be called one time for text and one time for html
        """
        logger.debug(self, 'Building email content from MAKO template {}'.format(mako_template_filepath))

        template = Template(filename=mako_template_filepath)
        # TODO - D.A. - 2014-11-06 - move this
        # Import is here for circular import problem
        import tracim.lib.helpers as helpers

        dictified_item = Context(CTX.EMAIL_NOTIFICATION, self._global_config.WEBSITE_BASE_URL).toDict(content)
        dictified_actor = Context(CTX.DEFAULT).toDict(actor)

        main_title = dictified_item.label
        content_intro = ''
        content_text = ''
        call_to_action_text = ''

        action = content.get_last_action().id
        if ActionDescription.COMMENT == action:
            content_intro = _('<span id="content-intro-username">{}</span> added a comment:').format(actor.display_name)
            content_text = content.description
            call_to_action_text = _('Answer')

        elif ActionDescription.CREATION == action:

            # Default values (if not overriden)
            content_text = content.description
            call_to_action_text = _('View online')

            if ContentType.Thread == content.type:
                call_to_action_text = _('Answer')
                content_intro = _('<span id="content-intro-username">{}</span> started a thread entitled:').format(actor.display_name)
                content_text = '<p id="content-body-intro">{}</p>'.format(content.label) + \
                               content.get_last_comment_from(actor).description

            elif ContentType.File == content.type:
                content_intro = _('<span id="content-intro-username">{}</span> added a file entitled:').format(actor.display_name)
                if content.description:
                    content_text = content.description
                else:
                    content_text = '<span id="content-body-only-title">{}</span>'.format(content.label)

            elif ContentType.Page == content.type:
                content_intro = _('<span id="content-intro-username">{}</span> added a page entitled:').format(actor.display_name)
                content_text = '<span id="content-body-only-title">{}</span>'.format(content.label)

        elif ActionDescription.REVISION == action:
            content_text = content.description
            call_to_action_text = _('View online')

            if ContentType.File == content.type:
                content_intro = _('<span id="content-intro-username">{}</span> uploaded a new revision.').format(actor.display_name)
                content_text = ''

            elif ContentType.Page == content.type:
                content_intro = _('<span id="content-intro-username">{}</span> updated this page.').format(actor.display_name)
                previous_revision = content.get_previous_revision()
                title_diff = ''
                if previous_revision.label != content.label:
                    title_diff = htmldiff(previous_revision.label, content.label)
                content_text = _('<p id="content-body-intro">Here is an overview of the changes:</p>')+ \
                    title_diff + \
                    htmldiff(previous_revision.description, content.description)

            elif ContentType.Thread == content.type:
                content_intro = _('<span id="content-intro-username">{}</span> updated the thread description.').format(actor.display_name)
                previous_revision = content.get_previous_revision()
                title_diff = ''
                if previous_revision.label != content.label:
                    title_diff = htmldiff(previous_revision.label, content.label)
                content_text = _('<p id="content-body-intro">Here is an overview of the changes:</p>')+ \
                    title_diff + \
                    htmldiff(previous_revision.description, content.description)

            # elif ContentType.Thread == content.type:
            #     content_intro = _('<span id="content-intro-username">{}</span> updated this page.').format(actor.display_name)
            #     previous_revision = content.get_previous_revision()
            #     content_text = _('<p id="content-body-intro">Here is an overview of the changes:</p>')+ \
            #         htmldiff(previous_revision.description, content.description)

        elif ActionDescription.EDITION == action:
            call_to_action_text = _('View online')

            if ContentType.File == content.type:
                content_intro = _('<span id="content-intro-username">{}</span> updated the file description.').format(actor.display_name)
                content_text = '<p id="content-body-intro">{}</p>'.format(content.get_label()) + \
                    content.description


        if '' == content_intro and content_text == '':
            # Skip notification, but it's not normal
            logger.error(
                self, 'A notification is being sent but no content. '
                      'Here are some debug informations: [content_id: {cid}]'
                      '[action: {act}][author: {actor}]'.format(
                    cid=content.content_id, act=action, actor=actor
                )
            )
            raise ValueError('Unexpected empty notification')

        # Import done here because cyclic import
        from tracim.config.app_cfg import CFG
        body_content = template.render(
            base_url=self._global_config.WEBSITE_BASE_URL,
            _=_,
            h=helpers,
            user_display_name=role.user.display_name,
            user_role_label=role.role_as_label(),
            workspace_label=role.workspace.label,
            content_intro=content_intro,
            content_text=content_text,
            main_title=main_title,
            call_to_action_text=call_to_action_text,
            result = DictLikeClass(item=dictified_item, actor=dictified_actor),
            CFG=CFG.get_instance(),
        )

        return body_content
コード例 #51
0
ファイル: urlDiff.py プロジェクト: skandaloptagon/AL_EXAmine
def do_stuff(i=0):
    temp = glob.glob('content/*')
    size = len(temp)/6

    # break the glob into 6 parts. 1 for each process and choose the 1/6th 
    # associated with the process
    temp = temp[int(i*size):int((i+1)*size)]
    
    logging.info("Starting chunk " + str(int(i*size)))

    for domain in temp:

        # Ignore the timestamps and get the unique paths
        uniq_urls = set()

        # get the list of uniq urls in each domain
        for filename in glob.glob(domain+'/*'):
            uniq_urls.add('_'.join(filename.split('_')[:-1]))

        # iterate the uniq urls
        for path in uniq_urls:

            # Get the timestamps for each uniq filename
            timestamps = set()
            for filename in glob.glob(path + '*'):
                try:
                    timestamps.add(float('.'.join(filename.split('_')[-1].split('.')[:-1])))
                except Exception as e:
                    pass
            try:
                timestamps = list(timestamps)

                # this needs to be done in order.
                timestamps.sort()

                #more parsing
                temp1 = path + '_' + str(timestamps[0]) + '.gz'
                
                # interate the copies in order
                for i in timestamps[1:]:

                    # construct the filename
                    temp2 = path + '_' + str(i) + '.gz'
                    
                    # construct the output file name
                    filename = "diffs/"+path+'_'+str(i)+'_diff.gz'

                    # check make sure the output folder exists
                    dir = os.path.dirname(filename)
                    if not os.path.exists(dir):
                        os.makedirs(dir)

                    # Skip the ones that have already been diffed
                    if not os.path.isfile(filename):

                        # Open the output file
                        with open(filename,'w') as f:
                            try:
                                try:
                                    # this is the sauce.  All of the diffing and writing happens in this line.
                                    f.write(htmldiff(gzip.open(temp1).read(),gzip.open(temp2).read()))
                                    logging.debug("successful write "+filename)
                                except Exception:
                                    # sometimes it doesn't like the sauce so I created verde.
                                    f.write(unicode(htmldiff(gzip.open(temp1).read().decode('utf-8','ignore'),gzip.open(temp2).read().decode('utf-8','ignore'))).encode('utf-8','ignore'))
                                    logging.debug("transcode write "+filename)
                            except IOError as e:
                                logging.debug("missing file " + filename)
                            except AssertionError as e:
                                logging.debug("Assertion Error " + filename + " : " + e)
                    else:
                        logging.debug("already exists " + filename)
                    temp1 = temp2
            except TypeError as e:
                print e
コード例 #52
0
ファイル: comparing.py プロジェクト: biomaks/monitor
 def show_diff(old_diff, new_diff):
     return htmldiff(old_diff, new_diff)
コード例 #53
0
ファイル: widgets.py プロジェクト: Lehych/iktomi-cms
 def value(self):
     return htmldiff(self.value1, self.value2)
コード例 #54
0
ファイル: DomComparator.py プロジェクト: dip-kush/CrawlerUI
def getDomDiff(parentDom, childDom):
    html = htmldiff(parentDom, childDom)
    bshtml = BeautifulSoup(html)
    ins = ''.join(str(bshtml.findAll("ins")))
    diffDom = cleanDom(ins)
    return diffDom
コード例 #55
0
ファイル: views.py プロジェクト: ejucovy/django-svenweb
            new_contents = site.get_page(subpath, rev=new)
        except sven.ResourceUnchanged, e:
            new = e.last_change
            resource_unchanged = True
    except sven.NotAFile:
        return redirect(site.directory_index_url(subpath))
    except sven.NoSuchResource:
        return redirect(site.history_url(subpath))
    except sven.FutureRevision:
        return redirect(site.history_url(subpath))
    if resource_unchanged:
        return redirect(site.page_diff_url(subpath)
                        + "?versions=%s,%s" % (old, new))

    # @@todo: raw diff? binary diff?
    contents = htmldiff(old_contents, new_contents)
    mimetype = mimetypes.guess_type(subpath)[0]
    return dict(site=site, contents=contents, mimetype=mimetype, path=subpath)

@requires("WIKI_EDIT")
@allow_http("GET", "POST")
@rendered_with("sites/site/page-create.html")
def page_create(request, subpath):
    site = request.site

    if request.method == "POST":
        path = request.POST['path']

        # @@todo: don't slugify for raw wikis? dunno
        from django.template.defaultfilters import slugify
        path = '/'.join(slugify(i) for i in path.split('/'))
コード例 #56
0
ファイル: diff.py プロジェクト: AnonOnWarpath/adhocracy
def _diff_html(left, right):
    return htmldiff(left, right)