def handle(self, **options): for title in Title.objects.filter( urls__value__icontains='chroniclingamerica'): record = pymarc.parse_xml_to_array(StringIO(title.marc.xml))[0] if record['245']['h'] == '[electronic resource].': if options['pretend']: print(title) else: _log.info("deleting %s [%s] from solr index") solr_index.delete_title(title) _log.info("purging %s [%s]" % (title, title.lccn)) title.delete() if not options['pretend']: solr_index.conn().commit()
def test_index_pages(self): solr = si.conn() solr.delete(q='type:page') solr.commit() self.assertEqual(si.page_count(), 0) si.index_pages() self.assertEqual(si.page_count(), 2)
def __init__(self, process_ocr=True, process_coordinates=True): """Create a BatchLoader. The process_ocr parameter is used (mainly in testing) when we don't want to spend time actually extracting ocr text and indexing. """ self.PROCESS_OCR = process_ocr if self.PROCESS_OCR: self.solr = solr_index.conn() self.PROCESS_COORDINATES = process_coordinates
def test_count(self): solr = solr_index.conn() solr.delete(q='type:page') solr_index.index_pages() solr.commit() self.assertEqual(solr_index.page_count(), 108) q = QueryDict('proxtext=') p = solr_index.SolrPaginator(q) self.assertEqual(108, p.count)
def handle(self, batch_name=None, *args, **options): if len(args)!=0: raise CommandError('Usage is purge_batch %s' % self.args) loader = BatchLoader() try: log.info("purging batch '%s'", batch_name) loader.purge_batch(batch_name) if options['optimize']: log.info("optimizing solr") solr = solr_index.conn() solr.optimize() log.info("optimizing MySQL OCR table") cursor = connection.cursor() cursor.execute("OPTIMIZE TABLE core_ocr") log.info("finished optimizing") except BatchLoaderException as e: log.exception(e) raise CommandError("unable to purge batch. check the purge_batch log for clues")
def handle(self, **options): solr = solr_index.conn() solr.delete(q='*:*') solr.commit()
def handle(self, **options): solr_index.conn().commit()