Example #1
0
 def _purge_batch(self, batch):
     batch_name = batch.name
     # just delete batch causes memory to bloat out
     # so we do it piece-meal
     for issue in batch.issues.all():
         for page in issue.pages.all():
             page.delete()
             # remove coordinates
             if os.path.exists(models.coordinates_path(page._url_parts())):
                 os.remove(models.coordinates_path(page._url_parts()))
             reset_queries()
         issue.delete()
     batch.delete()
     if self.PROCESS_OCR:
         self.solr.delete_query('batch:"%s"' % batch_name)
         self.solr.commit()
Example #2
0
    def _process_coordinates(self, page, coords):
        _logger.debug("writing out word coords for %s" %
            page.url)

        f = open(models.coordinates_path(page._url_parts()), "w")
        f.write(gzip_compress(json.dumps(coords)))
        f.close()
Example #3
0
 def _purge_batch(self, batch):
     batch_name = batch.name
     # just delete batch causes memory to bloat out
     # so we do it piece-meal
     for issue in batch.issues.all():
         for page in issue.pages.all():
             page.delete()
             # remove coordinates
             if os.path.exists(models.coordinates_path(page._url_parts())):
                 os.remove(models.coordinates_path(page._url_parts()))
             reset_queries()
         issue.delete()
     batch.delete()
     if self.PROCESS_OCR:
         self.solr.delete_query('batch:"%s"' % batch_name)
         self.solr.commit()
Example #4
0
    def _process_coordinates(self, page, coords):
        _logger.debug("writing out word coords for %s" %
            page.url)

        f = open(models.coordinates_path(page._url_parts()), "w")
        f.write(gzip_compress(json.dumps(coords)))
        f.close()
Example #5
0
def coordinates(request, lccn, date, edition, sequence, words=None):
    url_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence)
    try:
        file_data = gzip.open(models.coordinates_path(url_parts), 'rb')
    except IOError:
        return HttpResponse()

    data = json.load(file_data)

    non_lexemes = re.compile('''^[^a-zA-Z0-9]+|[^a-zA-Z0-9]+$|'s$''')
    return_coords = data.copy()
    # reset coords to {} and build afresh, getting rid of unwanted punctuations
    return_coords['coords'] = {}
    for key in data.get('coords'):
        return_coords['coords'][re.sub(non_lexemes, '', key)] = data['coords'][key]

    r = HttpResponse(content_type='application/json')
    r.write(json.dumps(return_coords))
    return r
Example #6
0
def coordinates(request, lccn, date, edition, sequence, words=None):
    url_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence)
    try:
        file_data = gzip.open(models.coordinates_path(url_parts), 'rb')
    except IOError:
        return HttpResponse()

    data = json.load(file_data)

    non_lexemes = re.compile('''^[^a-zA-Z0-9]+|[^a-zA-Z0-9]+$|'s$''')
    return_coords = data.copy()
    # reset coords to {} and build afresh, getting rid of unwanted punctuations
    return_coords['coords'] = {}
    for key in data.get('coords'):
        return_coords['coords'][re.sub(non_lexemes, '',
                                       key)] = data['coords'][key]

    r = HttpResponse(content_type='application/json')
    r.write(json.dumps(return_coords))
    return r