def flush(foreign_id): """Reset the crawler state for a given source specification.""" from aleph.index import delete_source source = Source.by_foreign_id(foreign_id) if source is None: raise ValueError("No such source: %r" % foreign_id) delete_source(source.id) source.delete() db.session.commit()
def analyze(foreign_id=None): """Re-analyze documents in the given source (or throughout).""" if foreign_id: source = Source.by_foreign_id(foreign_id) if source is None: raise ValueError("No such source: %r" % foreign_id) analyze_source.delay(source.id) else: for source in Source.all(): analyze_source.delay(source.id)
def index(foreign_id=None): """Index documents in the given source (or throughout).""" q = Document.all_ids() if foreign_id: source = Source.by_foreign_id(foreign_id) if source is None: raise ValueError("No such source: %r" % foreign_id) q = q.filter(Document.source_id == source.id) for doc_id, in q: index_document.delay(doc_id) if foreign_id is None: reindex_entities()
def index(foreign_id=None): """Index documents in the given source (or throughout).""" q = Document.all_ids() if foreign_id: source = Source.by_foreign_id(foreign_id) if source is None: raise ValueError("No such source: %r" % foreign_id) q = q.filter(Document.source_id == source.id) else: delete_index() init_search() for doc_id, in q: index_document.delay(doc_id)
def index(foreign_id=None, immediate=False): """Index documents in the given source (or throughout).""" q = Document.all_ids() if foreign_id: source = Source.by_foreign_id(foreign_id) if source is None: raise ValueError("No such source: %r" % foreign_id) q = q.filter(Document.source_id == source.id) for doc_id, in q: #import time; time.sleep(10) #let's not get banned print('indexing %s' % doc_id) if immediate: #bypass the queue index_document(doc_id) else: index_document.delay(doc_id) if foreign_id is None: reindex_entities()