Example #1
0
def main(request):

    collection = getMongoCollection('page_data')

    t = clock()
    total_pages = 0

    scan_info = []
    for scan in sorted(collection.distinct('scan_id')):
        pages = collection.find({'scan_id': scan})
        scan_info.append({
            'scan_id': scan,
            'n_pages': pages.count()
        })
        total_pages += pages.count()

    build_time = clock() - t
    if (total_pages > 0):
        full_build_est = build_time * 40000000 / total_pages
    else:
        full_build_est = build_time * 40000000 / 1

    return render_to_response('main.html', {
        'scans': scan_info,
        'build_time': build_time,
        'total_pages': total_pages,
        'full_build_est': full_build_est
    })
Example #2
0
def runMongo(scan_id, page=None, force=False):

    from helpers import getMongoCollection

    coll = getMongoCollection('page_data')

    if scan_id is None:
        pages = coll.find({})
    elif page is None:
        pages = coll.find({'scan_id': scan_id})
    else:
        pages = coll.find({'scan_id': scan_id, 'scandata_index': page})

    for page in pages:
        if (force or 'contrast' not in page):
            result = processPage(page['scan_id'], page['ia_page_num'])

            page['has_illustration']['contrast'] = result['image_detected']
            page['contrast'] = result
            """
            if not 'benchmarks' in page:
                page['benchmarks'] = {
                    'contrast': {}
                }
            page['benchmarks']['contrast']['total'] = benchmarks['image_processing'][-1:][0]
            """

            coll.save(page)
Example #3
0
def scan(request, scan_id):

    collection = getMongoCollection('page_data')

    print scan_id
    pages = collection.find({'scan_id': scan_id}).sort('scandata_index', 1)
    print pages.count()

    analysis = analyzePages(pages)

    page_content = []
    for page in analysis['pages']:
        page_content.append(
            render_to_string('page.html', {
                'scan_id': scan_id,
                'page': page
            }))

    return render_to_response(
        'scan.html', {
            'scan_id': scan_id,
            'pages': page_content,
            'n_illustrations': analysis['n_illustrations'],
            'abbyy': analysis['abbyy'],
            'contrast': analysis['contrast'],
            'color': analysis['color']
        })
Example #4
0
def main(request):

    collection = getMongoCollection('page_data')

    t = clock()
    total_pages = 0

    scan_info = []
    for scan in sorted(collection.distinct('scan_id')):
        pages = collection.find({'scan_id': scan})
        scan_info.append({'scan_id': scan, 'n_pages': pages.count()})
        total_pages += pages.count()

    build_time = clock() - t
    if (total_pages > 0):
        full_build_est = build_time * 40000000 / total_pages
    else:
        full_build_est = build_time * 40000000 / 1

    return render_to_response(
        'main.html', {
            'scans': scan_info,
            'build_time': build_time,
            'total_pages': total_pages,
            'full_build_est': full_build_est
        })
Example #5
0
def pictureBlocksAsSVG(request, scan_id, page_id):
    """
    Render the picture blocks for a page as an SVG image
    """

    # Fetch the page from mongodb
    collection = getMongoCollection('page_data')
    page = collection.find_one({'scan_id': scan_id, 'ia_page_num': int(page_id)})

    # Allow stroke width as a URL parameter
    stroke_width = request.GET.get('sw')
    if stroke_width is None:
        stroke_width = '0.25%'

    # Convert block data to x,y,w,h
    rects = []
    if 'abbyy' in page:
        if 'picture_blocks' in page['abbyy']:
            for block in page['abbyy']['picture_blocks']:
                rects.append({
                    'x': block['l'],
                    'y': block['t'],
                    'w': block['r'] - block['l'],
                    'h': block['b'] - block['t']
                })

    # Render the response
    return HttpResponse(render_to_response('picture_blocks.svg', {
        'height': page['abbyy']['height'],
        'width': page['abbyy']['width'],
        'rects': rects,
        'stroke_width': stroke_width
    }), content_type='image/svg+xml')
Example #6
0
def scan(request, scan_id):

    collection = getMongoCollection('page_data')

    print scan_id
    pages = collection.find({'scan_id': scan_id}).sort('scandata_index', 1)
    print pages.count()

    analysis = analyzePages(pages)

    page_content = []
    for page in analysis['pages']:
        page_content.append(render_to_string('page.html', {
            'scan_id': scan_id,
            'page': page
        }))

    return render_to_response('scan.html', {
        'scan_id': scan_id,
        'pages': page_content,
        'n_illustrations': analysis['n_illustrations'],
        'abbyy': analysis['abbyy'],
        'contrast': analysis['contrast'],
        'color': analysis['color']
    })
Example #7
0
def runMongo(scan_id, page=None, force=False):

    from helpers import getMongoCollection

    coll = getMongoCollection('page_data')

    if scan_id is None:
        pages = coll.find({})
    elif page is None:
        pages = coll.find({'scan_id': scan_id})
    else:
        pages = coll.find({'scan_id': scan_id, 'scandata_index': page})

    for page in pages:
        if (force or 'contrast' not in page):
            result = processPage(page['scan_id'], page['ia_page_num'])

            page['has_illustration']['contrast'] = result['image_detected']
            page['contrast'] = result
            """
            if not 'benchmarks' in page:
                page['benchmarks'] = {
                    'contrast': {}
                }
            page['benchmarks']['contrast']['total'] = benchmarks['image_processing'][-1:][0]
            """

            coll.save(page)
Example #8
0
def page(request, scan_id, page_id):

    collection = getMongoCollection('page_data')
    page = collection.find_one({'scan_id': scan_id, 'ia_page_num': int(page_id)})
    analyzePage(page)
    return render_to_response('page.html', {
        'scan_id': scan_id,
        'page': page
    })
Example #9
0
def page(request, scan_id, page_id):

    collection = getMongoCollection('page_data')
    page = collection.find_one({
        'scan_id': scan_id,
        'ia_page_num': int(page_id)
    })
    analyzePage(page)
    return render_to_response('page.html', {'scan_id': scan_id, 'page': page})
Example #10
0
def compressionHistogram(request):

    collection = getMongoCollection('page_data')

    data = []
    for page in collection.find({}):
        if ('compression' in page):
            data.append(page['compression'])

    return createHistogram(data, 'Compression Ratio')
Example #11
0
def compressionHistogram(request):

    collection = getMongoCollection('page_data')

    data = []
    for page in collection.find({}):
        if ('compression' in page):
            data.append(page['compression'])

    return createHistogram(data, 'Compression Ratio')
Example #12
0
def coverageHistogram(request):

    collection = getMongoCollection('page_data')

    data = []
    for page in collection.find({"abbyy_complete":True,"abbyy.image_detected":True}):
        if (page['abbyy_complete']):
            if ('total_coverage_sum' in page['abbyy']):
                data.append(page['abbyy']['total_coverage_sum'])

    return createHistogram(data, 'Sum of Picture Block Coverage')
Example #13
0
def coverageHistogram(request):

    collection = getMongoCollection('page_data')

    data = []
    for page in collection.find({
            "abbyy_complete": True,
            "abbyy.image_detected": True
    }):
        if (page['abbyy_complete']):
            if ('total_coverage_sum' in page['abbyy']):
                data.append(page['abbyy']['total_coverage_sum'])

    return createHistogram(data, 'Sum of Picture Block Coverage')
Example #14
0
def parallelCoordinates(request):

    from collections import deque

    result = deque()

    alpha = request.GET.get('alpha')
    alpha = 0.8 if alpha is None else alpha

    illustration = request.GET.get('illustration')

    for page in getMongoCollection('page_data').find({}):

        coverage_sum = 0
        if ("abbyy" in page):
            coverage_sum = page['abbyy'][
                'coverage_sum'] if 'coverage_sum' in page['abbyy'] else 0

        contrastVal = 0
        if ("contrast" in page):
            contrastVal = round(100 * page['contrast']['max_contiguous'], 2)

        compVal = 0
        if ("compression" in page):
            compVal = round(100 * page['compression'], 2)

        data = {
            'gold': page['has_illustration']['gold_standard'],
            'cov': round(coverage_sum, 2),
            'comp': compVal,
            'cont': contrastVal
        }

        if (page['has_illustration']['gold_standard']):
            if (illustration != 'n'):
                result.append(data)
        else:
            if (illustration != 'y'):
                result.appendleft(data)

    import json
    from django.utils.safestring import mark_safe
    return render_to_response('pcoords.html', {
        'data': mark_safe(json.dumps(list(result))),
        'alpha': alpha
    })
Example #15
0
def parallelCoordinates(request):

    from collections import deque

    result = deque()

    alpha = request.GET.get('alpha')
    alpha = 0.8 if alpha is None else alpha

    illustration = request.GET.get('illustration')

    for page in getMongoCollection('page_data').find({}):

        coverage_sum = 0
        if ("abbyy" in page):
            coverage_sum = page['abbyy']['coverage_sum'] if 'coverage_sum' in page['abbyy'] else 0

        contrastVal = 0
        if ("contrast" in page):
            contrastVal = round(100 * page['contrast']['max_contiguous'], 2)

        compVal = 0;
        if ("compression" in page):
            compVal = round(100 * page['compression'], 2);

        data = {
            'gold': page['has_illustration']['gold_standard'],
            'cov': round(coverage_sum, 2),
            'comp': compVal,
            'cont': contrastVal
        }

        if (page['has_illustration']['gold_standard']):
            if (illustration != 'n'):
                result.append(data)
        else:
            if (illustration != 'y'):
                result.appendleft(data)

    import json
    from django.utils.safestring import mark_safe
    return render_to_response('pcoords.html', {
        'data': mark_safe(json.dumps(list(result))),
        'alpha': alpha
    })
Example #16
0
def pictureBlocksAsSVG(request, scan_id, page_id):
    """
    Render the picture blocks for a page as an SVG image
    """

    # Fetch the page from mongodb
    collection = getMongoCollection('page_data')
    page = collection.find_one({
        'scan_id': scan_id,
        'ia_page_num': int(page_id)
    })

    # Allow stroke width as a URL parameter
    stroke_width = request.GET.get('sw')
    if stroke_width is None:
        stroke_width = '0.25%'

    # Convert block data to x,y,w,h
    rects = []
    if 'abbyy' in page:
        if 'picture_blocks' in page['abbyy']:
            for block in page['abbyy']['picture_blocks']:
                rects.append({
                    'x': block['l'],
                    'y': block['t'],
                    'w': block['r'] - block['l'],
                    'h': block['b'] - block['t']
                })

    # Render the response
    return HttpResponse(render_to_response(
        'picture_blocks.svg', {
            'height': page['abbyy']['height'],
            'width': page['abbyy']['width'],
            'rects': rects,
            'stroke_width': stroke_width
        }),
                        content_type='image/svg+xml')
Example #17
0
def generateHistogram(values, xlabel):

    import matplotlib.pyplot as plt

    fig = plt.figure()
    ax = fig.add_subplot(111)

    n, bins, patches = ax.hist(values, 50, facecolor='green', alpha=0.75)
    #bincenters = 0.5*(bins[1:]+bins[:-1])

    ax.set_xlabel(xlabel)
    ax.set_ylabel('Occurrences')
    #ax.set_title(r'$\mathrm{Histogram\ of\ IQ:}\ \mu=100,\ \sigma=15$')
    #ax.set_xlim(0, 120)
    ax.grid(True)

    return fig


if __name__ == "__main__":

    from helpers import getMongoCollection

    coll = getMongoCollection('page_data')

    results = analyzePages(coll.find({}))

    print 'ABBYY:', results['abbyy']
    print 'Contrast:', results['contrast']
    print 'Color:', results['color']
Example #18
0
    import matplotlib.pyplot as plt

    fig = plt.figure()
    ax = fig.add_subplot(111)

    n, bins, patches = ax.hist(values, 50, facecolor='green', alpha=0.75)
    #bincenters = 0.5*(bins[1:]+bins[:-1])

    ax.set_xlabel(xlabel)
    ax.set_ylabel('Occurrences')
    #ax.set_title(r'$\mathrm{Histogram\ of\ IQ:}\ \mu=100,\ \sigma=15$')
    #ax.set_xlim(0, 120)
    ax.grid(True)

    return fig


if __name__ == "__main__":

    from helpers import getMongoCollection

    coll = getMongoCollection('page_data')

    results = analyzePages(coll.find({}))

    print 'ABBYY:', results['abbyy']
    print 'Contrast:', results['contrast']
    print 'Color:', results['color']