Ejemplo n.º 1
0
def debug(path, dataset, languages=None):
    """Debug the ingest for the given path."""
    conn = get_fakeredis()
    settings.sts.DATABASE_URI = 'sqlite://'
    db = get_dataset(dataset, OP_INGEST)
    _ingest_path(db, conn, dataset, path, languages=languages)
    worker = IngestWorker(conn=conn, stages=STAGES)
    worker.sync()
    for entity in db.iterate():
        pprint(entity.to_dict())
Ejemplo n.º 2
0
 def handle(self, task):
     name = task.context.get("ftmstore", task.job.dataset.name)
     dataset = get_dataset(name, task.stage.stage)
     try:
         if task.stage.stage == OP_INGEST:
             entity_ids = self._ingest(dataset, task)
             self.dispatch_next(task, entity_ids)
         elif task.stage.stage == OP_ANALYZE:
             entity_ids = self._analyze(dataset, task)
             self.dispatch_next(task, entity_ids)
     finally:
         dataset.close()
Ejemplo n.º 3
0
Archivo: cli.py Proyecto: mudsill/aleph
def analyze(dataset):
    db = get_dataset(dataset, OP_ANALYZE)
    analyzer = None
    for entity in db.partials():
        if analyzer is None or analyzer.entity.id != entity.id:
            if analyzer is not None:
                analyzer.flush()
            # log.debug("Analyze: %r", entity)
            analyzer = Analyzer(db, entity, {})
        analyzer.feed(entity)
    if analyzer is not None:
        analyzer.flush()
Ejemplo n.º 4
0
 def setUp(self):
     # Force tests to use fake configuration
     ingestors_settings.TESTING = True
     service_settings.REDIS_URL = None
     service_settings.ARCHIVE_TYPE = 'file'
     service_settings.ARCHIVE_PATH = mkdtemp()
     ftmstore_settings.DATABASE_URI = 'sqlite://'
     conn = get_fakeredis()
     job = Job.create(conn, 'test')
     stage = Stage(job, OP_INGEST)
     dataset = get_dataset(job.dataset.name, OP_INGEST)
     self.manager = Manager(dataset, stage, {})
     self.manager.entities = []
     self.manager.emit_entity = types.MethodType(emit_entity, self.manager)
     self.manager.queue_entity = types.MethodType(queue_entity,
                                                  self.manager)  # noqa
     self.archive = init_archive()
     self.manager._archive = self.archive
Ejemplo n.º 5
0
def ingest(path, dataset, languages=None):
    """Queue a set of files for ingest."""
    conn = get_redis()
    db = get_dataset(dataset, OP_INGEST)
    _ingest_path(db, conn, dataset, path, languages=languages)