Beispiel #1
0
    def compare_revisions(self, old_id, new_id):
        if old_id is None or new_id is None:
            return ''

        from htmldiff import render_html_diff

        old_rev = Version.objects.get(id=old_id).content_html
        new_rev = Version.objects.get(id=new_id).content_html

        return render_html_diff(old_rev, new_rev)
Beispiel #2
0
def distance():
    data = request.args.get('data', '')
    title1 = request.args.get('title1', '')
    title2 = request.args.get('title2', '')
    data = eval(data)
    print data
    res = {'title1': title1, 'title2': title2}
    nums = data['ids'].values()
    for num in nums:
        res[u'原差距'] = mongo_spider.sim.distance(data['num'], num)

    title_num1 = Simhash(title1).value - sys.maxint
    title_num2 = Simhash(title2).value - sys.maxint
    res[u'标题差距'] = mongo_spider.sim.distance(title_num1, title_num2)

    article = article_mongo.find_one({'_id': data['_id']}, {
        'content': 1,
        'title': 1
    })
    content_num1 = Simhash(article['content']).value - sys.maxint
    text_num = Simhash(article['title'] + default +
                       html2text(article['content'])).value - sys.maxint
    res['content1'] = article['content']

    cursors = article_mongo.find({"_id": {
        "$in": data['ids'].keys()
    }}, {
        'content': 1,
        'title': 1
    })
    for cursor in cursors:
        content_num2 = Simhash(cursor['content']).value - sys.maxint
        text_num2 = Simhash(cursor['title'] + default +
                            html2text(cursor['content'])).value - sys.maxint
        res[u'正文差距'] = mongo_spider.sim.distance(content_num1, content_num2)
        res[u'新差距'] = mongo_spider.sim.distance(text_num2, text_num)
        res['content2'] = cursor['content']

    d = render_html_diff(res['content1'], res['content2'])

    # result = list(d.compare(res['content1'], res['content2']))
    # res['res'] = ''.join(result.spilt('+'))
    # from pprint import pprint as _pprint
    # _pprint(result)
    res['d'] = d
    return render_template('test/detail.html', data=res)
Beispiel #3
0
 def _generate_diff_html(self):
     if os.path.isfile(self.html_path):
         return
     tmpl_path = os.path.join(os.path.dirname(__file__), "diff.html")
     logging.debug("creating html diff: %s", self.html_path)
     diff = htmldiff.render_html_diff(self.old.html, self.new.html)
     if '<ins>' not in diff and '<del>' not in diff:
         return False
     tmpl = jinja2.Template(codecs.open(tmpl_path, "r", "utf8").read())
     html = tmpl.render(title=self.new.title,
                        url=self.old.entry.url,
                        old_url=self.old.archive_url,
                        old_time=self.old.created,
                        new_url=self.new.archive_url,
                        new_time=self.new.created,
                        diff=diff)
     codecs.open(self.html_path, "w", 'utf8').write(html)
     return True
Beispiel #4
0
    def generate_diff_html(self, path):
        if os.path.isfile(path):
            logging.error("Diff file already exists: %s", path)
            return None

        tmpl_path = os.path.join(os.path.dirname(__file__),
                                 "diff_template.html")
        if not os.path.isfile(tmpl_path):
            logging.error("Failed to find diff template: %s", tmpl_path)
            return None

        logging.debug("creating html diff: %s", path)
        diff = htmldiff.render_html_diff(self.old.html, self.new.html)
        if not self.validate_diff(diff):
            return None

        tmpl = jinja2.Template(codecs.open(tmpl_path, "r", "utf8").read())
        html = tmpl.render(title=self.new.title,
                           url=self.old.entry.url,
                           old_time=self.old.created,
                           new_time=self.new.created,
                           diff=diff)
        return html
Beispiel #5
0
def view_revisions(request, cat, slug):
    news = get_object_or_404(News, slug=slug)
    revisions = Version.objects.select_related().filter(news=news) \
        .order_by('-created_at')

    diff = ''
    nb_revisions = revisions.count()

    old_id = request.GET.get('old', None)
    new_id = request.GET.get('new', None)

    if old_id is not None and new_id is not None:
        from htmldiff import render_html_diff

        old_rev = Version.objects.get(id=old_id).content_html
        new_rev = Version.objects.get(id=new_id).content_html

        diff = render_html_diff(old_rev, new_rev)

    return render(request, 'view_post_revisions.html', {
        'post': news, 'post_revisions': revisions,
        'nb_revisions': nb_revisions, 'diff': diff
    })
Beispiel #6
0
def distance():
	data = request.args.get('data', '')
	title1 = request.args.get('title1', '')
	title2 = request.args.get('title2', '')
	data = eval(data)
	print data
	res = {'title1':title1, 'title2':title2}
	nums = data['ids'].values()
	for num in nums:
		res[u'原差距'] = mongo_spider.sim.distance(data['num'], num)
	
	title_num1 = Simhash(title1).value - sys.maxint
	title_num2 = Simhash(title2).value - sys.maxint
	res[u'标题差距'] = mongo_spider.sim.distance(title_num1, title_num2)

	article = article_mongo.find_one({'_id':data['_id']}, {'content':1, 'title':1})
	content_num1 = Simhash(article['content']).value - sys.maxint
	text_num = Simhash(article['title'] + default + html2text(article['content']) ).value - sys.maxint
	res['content1'] = article['content']

	cursors = article_mongo.find({"_id":{"$in" : data['ids'].keys()}}, {'content':1, 'title':1})
	for cursor in cursors:		
		content_num2 = Simhash(cursor['content']).value - sys.maxint
		text_num2 = Simhash(cursor['title'] + default + html2text(cursor['content'])).value - sys.maxint
		res[u'正文差距'] = mongo_spider.sim.distance(content_num1, content_num2)
		res[u'新差距'] = mongo_spider.sim.distance(text_num2, text_num )
		res['content2'] = cursor['content']


	d = render_html_diff(res['content1'], res['content2'])
	
	# result = list(d.compare(res['content1'], res['content2']))
	# res['res'] = ''.join(result.spilt('+'))
	# from pprint import pprint as _pprint
	# _pprint(result)
	res['d'] = d
	return render_template('test/detail.html', data=res)