def _purge_batch(self, batch): batch_name = batch.name # just delete batch causes memory to bloat out # so we do it piece-meal for issue in batch.issues.all(): for page in issue.pages.all(): page.delete() # remove coordinates if os.path.exists(models.coordinates_path(page._url_parts())): os.remove(models.coordinates_path(page._url_parts())) issue.delete() batch.delete() if self.PROCESS_OCR: self.solr.delete_query('batch:"%s"' % batch_name) self.solr.commit()
def _process_coordinates(self, page, coords): _logger.debug("writing out word coords for %s" % page.url) f = open(models.coordinates_path(page._url_parts()), "w") f.write(gzip_compress(json.dumps(coords))) f.close()
def coordinates(request, lccn, date, edition, sequence, words=None): url_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence) file_path = models.coordinates_path(url_parts) try: with gzip.open(file_path, 'rb') as i: response = HttpResponse(i.read(), content_type='application/json') return add_cache_tag(response, "lccn=%s" % lccn) except IOError: LOGGER.warning('Word coordinates file %s does not exist', file_path) raise Http404
def coordinates(request, lccn, date, edition, sequence, words=None): url_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence) try: f = open(models.coordinates_path(url_parts)) except IOError: return HttpResponse() data = f.read() r = HttpResponse(mimetype='application/json') r['Content-Encoding'] = 'gzip' r['Content-Length'] = len(data) r.write(data) f.close() return r
def _process_coordinates(self, page, coords): LOGGER.debug("writing out word coords for %s", page.url) fd, path = tempfile.mkstemp(text="w", suffix=".coordinates", dir=settings.TEMP_STORAGE) # get a temp file in case the coordinates dir is a NFS or S3 mount which have poor multiple write performance f = open(path, "w") f.write(gzip_compress(json.dumps(coords))) f.close() os.close(fd) final_path = models.coordinates_path(page._url_parts()) try: shutil.move(path, final_path) except Exception: LOGGER.warn("Could not move coordinates to [%s]. Waiting 5 seconds and trying again in case of network mount", final_path) time.sleep(5) shutil.move(path, final_path)
def coordinates(request, lccn, date, edition, sequence, words=None): url_parts = { "lccn": lccn, "date": date, "edition": edition, "sequence": sequence } file_path = models.coordinates_path(url_parts) try: with gzip.open(file_path, "rb") as i: return HttpResponse(i.read(), content_type="application/json") except IOError: LOGGER.warning("Word coordinates file %s does not exist", file_path) raise Http404
def coordinates(request, lccn, date, edition, sequence, words=None): url_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence) try: file_data = gzip.open(models.coordinates_path(url_parts), 'rb') except IOError: return HttpResponse() data = json.load(file_data) non_lexemes = re.compile('''^[^a-zA-Z0-9]+|[^a-zA-Z0-9]+$|'s$''') return_coords = data.copy() # reset coords to {} and build afresh, getting rid of unwanted punctuations return_coords['coords'] = {} for key in data.get('coords'): return_coords['coords'][re.sub(non_lexemes, '', key)] = data['coords'][key] r = HttpResponse(mimetype='application/json') r.write(json.dumps(return_coords)) return r
def coordinates(request, lccn, date, edition, sequence, words=None): url_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence) try: file_data = gzip.open(models.coordinates_path(url_parts), 'rb') except IOError: return HttpResponse() data = json.load(file_data) non_lexemes = re.compile('''^[^a-zA-Z0-9]+|[^a-zA-Z0-9]+$|'s$''') return_coords = data.copy() # reset coords to {} and build afresh, getting rid of unwanted punctuations return_coords['coords'] = {} for key in data.get('coords'): return_coords['coords'][re.sub(non_lexemes, '', key)] = data['coords'][key] r = HttpResponse(content_type='application/json') r.write(json.dumps(return_coords)) return r
def _process_coordinates(self, page, coords): LOGGER.debug("writing out word coords for %s", page.url) # We'll use a temporary file in case the coordinates dir is configured # to a network filesystem which has poor update performance # characteristics fd, path = tempfile.mkstemp(text="w", suffix=".coordinates", dir=settings.TEMP_STORAGE) f = open(path, "w") f.write(gzip_compress(json.dumps(coords))) f.close() os.close(fd) final_path = models.coordinates_path(page._url_parts()) try: shutil.move(path, final_path) except Exception: LOGGER.warning( 'Could not move coordinates to "%s". Waiting 5 seconds before trying again…', final_path) time.sleep(5) shutil.move(path, final_path)