def main(request): collection = getMongoCollection('page_data') t = clock() total_pages = 0 scan_info = [] for scan in sorted(collection.distinct('scan_id')): pages = collection.find({'scan_id': scan}) scan_info.append({ 'scan_id': scan, 'n_pages': pages.count() }) total_pages += pages.count() build_time = clock() - t if (total_pages > 0): full_build_est = build_time * 40000000 / total_pages else: full_build_est = build_time * 40000000 / 1 return render_to_response('main.html', { 'scans': scan_info, 'build_time': build_time, 'total_pages': total_pages, 'full_build_est': full_build_est })
def runMongo(scan_id, page=None, force=False): from helpers import getMongoCollection coll = getMongoCollection('page_data') if scan_id is None: pages = coll.find({}) elif page is None: pages = coll.find({'scan_id': scan_id}) else: pages = coll.find({'scan_id': scan_id, 'scandata_index': page}) for page in pages: if (force or 'contrast' not in page): result = processPage(page['scan_id'], page['ia_page_num']) page['has_illustration']['contrast'] = result['image_detected'] page['contrast'] = result """ if not 'benchmarks' in page: page['benchmarks'] = { 'contrast': {} } page['benchmarks']['contrast']['total'] = benchmarks['image_processing'][-1:][0] """ coll.save(page)
def scan(request, scan_id): collection = getMongoCollection('page_data') print scan_id pages = collection.find({'scan_id': scan_id}).sort('scandata_index', 1) print pages.count() analysis = analyzePages(pages) page_content = [] for page in analysis['pages']: page_content.append( render_to_string('page.html', { 'scan_id': scan_id, 'page': page })) return render_to_response( 'scan.html', { 'scan_id': scan_id, 'pages': page_content, 'n_illustrations': analysis['n_illustrations'], 'abbyy': analysis['abbyy'], 'contrast': analysis['contrast'], 'color': analysis['color'] })
def main(request): collection = getMongoCollection('page_data') t = clock() total_pages = 0 scan_info = [] for scan in sorted(collection.distinct('scan_id')): pages = collection.find({'scan_id': scan}) scan_info.append({'scan_id': scan, 'n_pages': pages.count()}) total_pages += pages.count() build_time = clock() - t if (total_pages > 0): full_build_est = build_time * 40000000 / total_pages else: full_build_est = build_time * 40000000 / 1 return render_to_response( 'main.html', { 'scans': scan_info, 'build_time': build_time, 'total_pages': total_pages, 'full_build_est': full_build_est })
def pictureBlocksAsSVG(request, scan_id, page_id): """ Render the picture blocks for a page as an SVG image """ # Fetch the page from mongodb collection = getMongoCollection('page_data') page = collection.find_one({'scan_id': scan_id, 'ia_page_num': int(page_id)}) # Allow stroke width as a URL parameter stroke_width = request.GET.get('sw') if stroke_width is None: stroke_width = '0.25%' # Convert block data to x,y,w,h rects = [] if 'abbyy' in page: if 'picture_blocks' in page['abbyy']: for block in page['abbyy']['picture_blocks']: rects.append({ 'x': block['l'], 'y': block['t'], 'w': block['r'] - block['l'], 'h': block['b'] - block['t'] }) # Render the response return HttpResponse(render_to_response('picture_blocks.svg', { 'height': page['abbyy']['height'], 'width': page['abbyy']['width'], 'rects': rects, 'stroke_width': stroke_width }), content_type='image/svg+xml')
def scan(request, scan_id): collection = getMongoCollection('page_data') print scan_id pages = collection.find({'scan_id': scan_id}).sort('scandata_index', 1) print pages.count() analysis = analyzePages(pages) page_content = [] for page in analysis['pages']: page_content.append(render_to_string('page.html', { 'scan_id': scan_id, 'page': page })) return render_to_response('scan.html', { 'scan_id': scan_id, 'pages': page_content, 'n_illustrations': analysis['n_illustrations'], 'abbyy': analysis['abbyy'], 'contrast': analysis['contrast'], 'color': analysis['color'] })
def page(request, scan_id, page_id): collection = getMongoCollection('page_data') page = collection.find_one({'scan_id': scan_id, 'ia_page_num': int(page_id)}) analyzePage(page) return render_to_response('page.html', { 'scan_id': scan_id, 'page': page })
def page(request, scan_id, page_id): collection = getMongoCollection('page_data') page = collection.find_one({ 'scan_id': scan_id, 'ia_page_num': int(page_id) }) analyzePage(page) return render_to_response('page.html', {'scan_id': scan_id, 'page': page})
def compressionHistogram(request): collection = getMongoCollection('page_data') data = [] for page in collection.find({}): if ('compression' in page): data.append(page['compression']) return createHistogram(data, 'Compression Ratio')
def coverageHistogram(request): collection = getMongoCollection('page_data') data = [] for page in collection.find({"abbyy_complete":True,"abbyy.image_detected":True}): if (page['abbyy_complete']): if ('total_coverage_sum' in page['abbyy']): data.append(page['abbyy']['total_coverage_sum']) return createHistogram(data, 'Sum of Picture Block Coverage')
def coverageHistogram(request): collection = getMongoCollection('page_data') data = [] for page in collection.find({ "abbyy_complete": True, "abbyy.image_detected": True }): if (page['abbyy_complete']): if ('total_coverage_sum' in page['abbyy']): data.append(page['abbyy']['total_coverage_sum']) return createHistogram(data, 'Sum of Picture Block Coverage')
def parallelCoordinates(request): from collections import deque result = deque() alpha = request.GET.get('alpha') alpha = 0.8 if alpha is None else alpha illustration = request.GET.get('illustration') for page in getMongoCollection('page_data').find({}): coverage_sum = 0 if ("abbyy" in page): coverage_sum = page['abbyy'][ 'coverage_sum'] if 'coverage_sum' in page['abbyy'] else 0 contrastVal = 0 if ("contrast" in page): contrastVal = round(100 * page['contrast']['max_contiguous'], 2) compVal = 0 if ("compression" in page): compVal = round(100 * page['compression'], 2) data = { 'gold': page['has_illustration']['gold_standard'], 'cov': round(coverage_sum, 2), 'comp': compVal, 'cont': contrastVal } if (page['has_illustration']['gold_standard']): if (illustration != 'n'): result.append(data) else: if (illustration != 'y'): result.appendleft(data) import json from django.utils.safestring import mark_safe return render_to_response('pcoords.html', { 'data': mark_safe(json.dumps(list(result))), 'alpha': alpha })
def parallelCoordinates(request): from collections import deque result = deque() alpha = request.GET.get('alpha') alpha = 0.8 if alpha is None else alpha illustration = request.GET.get('illustration') for page in getMongoCollection('page_data').find({}): coverage_sum = 0 if ("abbyy" in page): coverage_sum = page['abbyy']['coverage_sum'] if 'coverage_sum' in page['abbyy'] else 0 contrastVal = 0 if ("contrast" in page): contrastVal = round(100 * page['contrast']['max_contiguous'], 2) compVal = 0; if ("compression" in page): compVal = round(100 * page['compression'], 2); data = { 'gold': page['has_illustration']['gold_standard'], 'cov': round(coverage_sum, 2), 'comp': compVal, 'cont': contrastVal } if (page['has_illustration']['gold_standard']): if (illustration != 'n'): result.append(data) else: if (illustration != 'y'): result.appendleft(data) import json from django.utils.safestring import mark_safe return render_to_response('pcoords.html', { 'data': mark_safe(json.dumps(list(result))), 'alpha': alpha })
def pictureBlocksAsSVG(request, scan_id, page_id): """ Render the picture blocks for a page as an SVG image """ # Fetch the page from mongodb collection = getMongoCollection('page_data') page = collection.find_one({ 'scan_id': scan_id, 'ia_page_num': int(page_id) }) # Allow stroke width as a URL parameter stroke_width = request.GET.get('sw') if stroke_width is None: stroke_width = '0.25%' # Convert block data to x,y,w,h rects = [] if 'abbyy' in page: if 'picture_blocks' in page['abbyy']: for block in page['abbyy']['picture_blocks']: rects.append({ 'x': block['l'], 'y': block['t'], 'w': block['r'] - block['l'], 'h': block['b'] - block['t'] }) # Render the response return HttpResponse(render_to_response( 'picture_blocks.svg', { 'height': page['abbyy']['height'], 'width': page['abbyy']['width'], 'rects': rects, 'stroke_width': stroke_width }), content_type='image/svg+xml')
def generateHistogram(values, xlabel): import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) n, bins, patches = ax.hist(values, 50, facecolor='green', alpha=0.75) #bincenters = 0.5*(bins[1:]+bins[:-1]) ax.set_xlabel(xlabel) ax.set_ylabel('Occurrences') #ax.set_title(r'$\mathrm{Histogram\ of\ IQ:}\ \mu=100,\ \sigma=15$') #ax.set_xlim(0, 120) ax.grid(True) return fig if __name__ == "__main__": from helpers import getMongoCollection coll = getMongoCollection('page_data') results = analyzePages(coll.find({})) print 'ABBYY:', results['abbyy'] print 'Contrast:', results['contrast'] print 'Color:', results['color']
import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) n, bins, patches = ax.hist(values, 50, facecolor='green', alpha=0.75) #bincenters = 0.5*(bins[1:]+bins[:-1]) ax.set_xlabel(xlabel) ax.set_ylabel('Occurrences') #ax.set_title(r'$\mathrm{Histogram\ of\ IQ:}\ \mu=100,\ \sigma=15$') #ax.set_xlim(0, 120) ax.grid(True) return fig if __name__ == "__main__": from helpers import getMongoCollection coll = getMongoCollection('page_data') results = analyzePages(coll.find({})) print 'ABBYY:', results['abbyy'] print 'Contrast:', results['contrast'] print 'Color:', results['color']