Python Document.all Examples, aleph.model.Document.all Python Examples

Example #1

0

Show file

File: test_ingest.py Project: mcrouse911/findpeopleviadocument

 def test_load_sample_directory(self):
     samples_path = self.get_fixture_path('samples')
     document = Document.by_keys(collection=self.collection,
                                 foreign_id='samples')
     db.session.commit()
     db.session.refresh(document)
     ingest_document(document, samples_path)
     assert Document.all().count() == 5, Document.all().count()

Example #2

0

Show file

File: test_ingest.py Project: mcrouse911/findpeopleviadocument

 def test_load_pdf_file(self):
     pdf_path = self.get_fixture_path('demo.pdf')
     document = Document.by_keys(collection=self.collection,
                                 foreign_id='demo.pdf')
     db.session.commit()
     db.session.refresh(document)
     ingest_document(document, pdf_path)
     assert Document.all().count() == 1, Document.all().count()

Example #3

0

Show file

File: test_ingest.py Project: pudo/aleph

 def test_load_pdf_file(self):
     pdf_path = self.get_fixture_path('demo.pdf')
     document = Document.by_keys(collection_id=self.collection.id,
                                 foreign_id='demo.pdf')
     db.session.commit()
     db.session.refresh(document)
     ingest_document(document, pdf_path)
     assert Document.all().count() == 1, Document.all().count()

Example #4

0

Show file

File: test_ingest.py Project: pudo/aleph

 def test_load_sample_directory(self):
     samples_path = self.get_fixture_path('samples')
     document = Document.by_keys(collection_id=self.collection.id,
                                 foreign_id='samples')
     db.session.commit()
     db.session.refresh(document)
     ingest_document(document, samples_path)
     assert Document.all().count() == 5, Document.all().count()

Example #5

0

Show file

File: test_crawler.py Project: wcyn/aleph

    def test_crawler_execute(self):
        tdc = TDocumentCrawler()
        ccnt = Document.all().count()
        assert ccnt == 0, ccnt
        tdc.execute()
        states = Document.all().all()
        assert len(states) == 1, len(states)
        demo = states[0]
        assert 'kitty' in demo.title, demo.meta

        coll = Collection.by_foreign_id('test')
        assert coll is not None, coll
        assert len(list(coll.documents)) == 1, list(coll.documents)

Example #6

0

Show file

File: util.py Project: mustafaascha/aleph

 def load_fixtures(self, file_name):
     filepath = self.get_fixture_path(file_name)
     load_fixtures(db, loaders.load(filepath))
     db.session.commit()
     for document in Document.all():
         index_document(document)
     self.update_index()

Example #7

0

Show file

File: documents_api.py Project: 01-/aleph

def index():
    sources_ids = match_ids('sources', authz.sources(authz.READ))
    q = Document.all().filter(Document.source_id.in_(sources_ids))
    hashes = request.args.getlist('content_hash')
    if len(hashes):
        q = q.filter(Document.content_hash.in_(hashes))
    return jsonify(Pager(q))

Example #8

0

Show file

File: documents_api.py Project: OpenOil-UG/aleph

def index():
    sources_ids = match_ids('sources', authz.sources(authz.READ))
    q = Document.all().filter(Document.source_id.in_(sources_ids))
    hashes = request.args.getlist('content_hash')
    if len(hashes):
        q = q.filter(Document.content_hash.in_(hashes))
    return jsonify(Pager(q))

Example #9

0

Show file

 def load_fixtures(self, file_name):
     filepath = self.get_fixture_path(file_name)
     load_fixtures(db, loaders.load(filepath))
     db.session.commit()
     update_collections()
     for doc in Document.all():
         process_document(doc)
     self.flush_index()

Example #10

0

Show file

File: documents_api.py Project: adamchainz/aleph

def index():
    collection_ids = match_ids('collection', authz.collections(authz.READ))
    q = Document.all()
    clause = Collection.id.in_(collection_ids)
    q = q.filter(Document.collections.any(clause))
    hashes = request.args.getlist('content_hash')
    if len(hashes):
        q = q.filter(Document.content_hash.in_(hashes))
    return jsonify(Pager(q))

Example #11

0

Show file

File: documents_api.py Project: adamchainz/aleph

def index():
    collection_ids = match_ids('collection', authz.collections(authz.READ))
    q = Document.all()
    clause = Collection.id.in_(collection_ids)
    q = q.filter(Document.collections.any(clause))
    hashes = request.args.getlist('content_hash')
    if len(hashes):
        q = q.filter(Document.content_hash.in_(hashes))
    return jsonify(Pager(q))

Example #12

0

Show file

File: documents.py Project: correctiv/aleph

def load_documents():
    graph = get_graph()
    tx = graph.begin()
    for i, document in enumerate(Document.all()):
        load_document(tx, document)
        if i > 0 and i % 1000 == 0:
            tx.commit()
            tx = graph.begin()
    tx.commit()

Example #13

0

Show file

File: util.py Project: tomjie/aleph

 def load_fixtures(self, file_name, process_documents=True):
     filepath = self.get_fixture_path(file_name)
     load_fixtures(db, loaders.load(filepath))
     db.session.commit()
     reindex_entities()
     if process_documents:
         for doc in Document.all():
             analyze_document(doc)
         optimize_search()

Example #14

0

Show file

File: documents.py Project: nivertech/aleph

def load_documents():
    graph = get_graph()
    tx = graph.begin()
    for i, document in enumerate(Document.all()):
        load_document(tx, document)
        if i > 0 and i % 1000 == 0:
            tx.commit()
            tx = graph.begin()
    tx.commit()

Example #15

0

Show file

File: util.py Project: CodeForAfrica/aleph

 def load_fixtures(self, file_name, process_documents=True):
     filepath = self.get_fixture_path(file_name)
     load_fixtures(db, loaders.load(filepath))
     db.session.commit()
     reindex_entities()
     if process_documents:
         for doc in Document.all():
             analyze_document(doc)
         optimize_search()

Example #16

0

Show file

File: util.py Project: kkrbalam/aleph

 def load_fixtures(self, file_name, process_documents=True):
     filepath = self.get_fixture_path(file_name)
     load_fixtures(db, loaders.load(filepath))
     db.session.commit()
     reindex_entities()
     if process_documents:
         for doc in Document.all():
             process_document(doc)
     self.flush_index()

Example #17

0

Show file

File: test_ingest.py Project: wcyn/aleph

    def test_load_csv_file(self):
        csv_path = self.get_fixture_path('experts.csv')
        document = Document.by_keys(collection=self.collection,
                                    foreign_id='experts.csv')
        db.session.commit()
        db.session.refresh(document)
        ingest_document(document, csv_path)
        assert Document.all().count() == 1, Document.all().count()
        records = db.session.query(DocumentRecord).all()
        assert len(records) == 14, len(records)
        rec0 = records[0]
        assert str(rec0.id) in repr(rec0), repr(rec0)
        assert 'nationality' in rec0.data, rec0.data
        assert 'name' in rec0.data, rec0.data

        doc = rec0.document
        doc.delete_records()
        records = db.session.query(DocumentRecord).all()
        assert len(records) == 0, len(records)

Example #18

0

Show file

def index():
    authz = request.authz
    collections = request.args.getlist('collection')
    collections = authz.collections_intersect(authz.READ, collections)
    q = Document.all()
    q = q.filter(Document.collection_id.in_(collections))
    hashes = request.args.getlist('content_hash')
    if len(hashes):
        q = q.filter(Document.content_hash.in_(hashes))
    return jsonify(Pager(q))

Example #19

0

Show file

File: documents.py Project: tomjie/aleph

def load_documents():
    graph = get_graph()
    tx = graph.begin()
    for i, document in enumerate(Document.all()):
        log.info("Load doc [%s]: %r", document.id, document.meta)
        load_document(tx, document)
        if i > 0 and i % 1000 == 0:
            tx.commit()
            tx = graph.begin()
    tx.commit()

Example #20

0

Show file

File: test_etl.py Project: stefanw/aleph

    def test_load_csv_file(self):
        csv_path = self.get_fixture_path('experts.csv')
        crawler = DirectoryCrawler()
        crawler.execute(directory=csv_path)
        assert Document.all().count() == 1, Document.all().count()
        records = db.session.query(DocumentRecord).all()
        assert len(records) == 14, len(records)
        rec0 = records[0]
        assert str(rec0.id) in repr(rec0), repr(rec0)
        assert 'experts.csv' in rec0.document.meta.file_name, \
            rec0.document.meta
        assert 'nationality' in rec0.data, rec0.data
        assert 'name' in rec0.data, rec0.data

        doc = rec0.document
        assert 'experts' in repr(doc)

        doc.delete_records()
        records = db.session.query(DocumentRecord).all()
        assert len(records) == 0, len(records)

Example #21

0

Show file

File: test_etl.py Project: wilbrodn/aleph

    def test_load_csv_file(self):
        csv_path = self.get_fixture_path('experts.csv')
        crawler = DirectoryCrawler()
        crawler.execute(directory=csv_path)
        assert Document.all().count() == 1, Document.all().count()
        records = db.session.query(DocumentRecord).all()
        assert len(records) == 14, len(records)
        rec0 = records[0]
        assert str(rec0.id) in repr(rec0), repr(rec0)
        assert 'experts.csv' in rec0.document.meta.file_name, \
            rec0.document.meta
        assert 'nationality' in rec0.data, rec0.data
        assert 'name' in rec0.data, rec0.data

        doc = rec0.document
        assert 'experts' in repr(doc)

        doc.delete_records()
        records = db.session.query(DocumentRecord).all()
        assert len(records) == 0, len(records)

Example #22

0

Show file

File: test_ingest.py Project: pudo/aleph

    def test_load_csv_file(self):
        csv_path = self.get_fixture_path('experts.csv')
        document = Document.by_keys(collection_id=self.collection.id,
                                    foreign_id='experts.csv')
        document.file_name = 'experts.csv'
        db.session.commit()
        db.session.refresh(document)
        ingest_document(document, csv_path)
        assert Document.all().count() == 1, Document.all().count()
        records = db.session.query(DocumentRecord).all()
        assert len(records) == 14, len(records)
        rec0 = records[0]
        assert str(rec0.id) in repr(rec0), repr(rec0)
        assert 'nationality' in rec0.data, rec0.data
        assert 'name' in rec0.data, rec0.data

        doc = rec0.document
        doc.delete_records()
        records = db.session.query(DocumentRecord).all()
        assert len(records) == 0, len(records)

Example #23

0

Show file

File: documents_api.py Project: rlugojr/aleph

def index():
    try:
        authorized = authz.collections(authz.READ)
        collection_ids = [int(f) for f in request.args.getlist('collection')]
        collection_ids = collection_ids or authorized
        collection_ids = [c for c in collection_ids if c in authorized]
    except ValueError:
        raise BadRequest()
    q = Document.all()
    clause = Collection.id.in_(collection_ids)
    q = q.filter(Document.collections.any(clause))
    hashes = request.args.getlist('content_hash')
    if len(hashes):
        q = q.filter(Document.content_hash.in_(hashes))
    return jsonify(Pager(q))

Example #24

0

Show file

File: documents_api.py Project: tomjie/aleph

def index():
    try:
        authorized = authz.collections(authz.READ)
        collection_ids = [int(f) for f in request.args.getlist('collection')]
        collection_ids = collection_ids or authorized
        collection_ids = [c for c in collection_ids if c in authorized]
    except ValueError:
        raise BadRequest()
    q = Document.all()
    clause = Collection.id.in_(collection_ids)
    q = q.filter(Document.collections.any(clause))
    hashes = request.args.getlist('content_hash')
    if len(hashes):
        q = q.filter(Document.content_hash.in_(hashes))
    return jsonify(Pager(q))

Example #25

0

Show file

File: ingestor.py Project: stefanw/aleph

 def create_document(self, meta, type=None):
     if meta.content_hash:
         q = Document.all()
         if meta.foreign_id:
             q = q.filter(Document.foreign_id == meta.foreign_id)
         else:
             q = q.filter(Document.content_hash == meta.content_hash)
         q = q.filter(Document.source_id == self.source_id)
         document = q.first()
     if document is None:
         document = Document()
         document.source_id = self.source_id
     document.meta = meta
     document.type = type or self.DOCUMENT_TYPE
     db.session.add(document)
     db.session.flush()
     return document

Example #26

0

Show file

File: ingestor.py Project: OpenOil-UG/aleph

 def create_document(self, meta, type=None):
     if meta.content_hash:
         q = Document.all()
         if meta.foreign_id:
             q = q.filter(Document.foreign_id == meta.foreign_id)
         else:
             q = q.filter(Document.content_hash == meta.content_hash)
         q = q.filter(Document.source_id == self.source_id)
         document = q.first()
     if document is None:
         document = Document()
         document.source_id = self.source_id
     document.meta = meta
     document.type = type or self.DOCUMENT_TYPE
     db.session.add(document)
     db.session.flush()
     return document

Example #27

0

Show file

File: ingestor.py Project: wilbrodn/aleph

 def document_by_meta(cls, collection_id, meta):
     q = Document.all()
     if meta.foreign_id:
         q = q.filter(Document.foreign_id == meta.foreign_id)
     elif meta.content_hash:
         q = q.filter(Document.content_hash == meta.content_hash)
     else:
         raise ValueError("No unique criterion for document: %s" % meta)
     q = q.filter(Document.collection_id == collection_id)
     document = q.first()
     if document is None:
         document = Document()
         document.collection_id = collection_id
         document.foreign_id = meta.foreign_id
         document.content_hash = meta.content_hash
     document.meta = meta
     return document

Example #28

0

Show file

File: ingestor.py Project: adamchainz/aleph

 def create_document(self, meta, type=None):
     if meta.content_hash:
         q = Document.all()
         if meta.foreign_id:
             q = q.filter(Document.foreign_id == meta.foreign_id)
         else:
             q = q.filter(Document.content_hash == meta.content_hash)
         clause = Collection.id == self.collection_id
         q = q.filter(Document.collections.any(clause))
         document = q.first()
     if document is None:
         document = Document()
         document.collections = [Collection.by_id(self.collection_id)]
     document.meta = meta
     document.type = type or self.DOCUMENT_TYPE
     db.session.add(document)
     db.session.flush()
     return document

Example #29

0

Show file

 def create_document(self, meta, type=None):
     if meta.content_hash:
         q = Document.all()
         if meta.foreign_id:
             q = q.filter(Document.foreign_id == meta.foreign_id)
         else:
             q = q.filter(Document.content_hash == meta.content_hash)
         clause = Collection.id == self.collection_id
         q = q.filter(Document.collections.any(clause))
         document = q.first()
     if document is None:
         document = Document()
         document.collections = [Collection.by_id(self.collection_id)]
     document.meta = meta
     document.type = type or self.DOCUMENT_TYPE
     db.session.add(document)
     db.session.flush()
     return document

Example #30

0

Show file

File: test_crawler.py Project: wcyn/aleph

 def test_incremental(self):
     tdc = TDocumentCrawler()
     tdc.execute()
     tdc.execute(incremental=True)
     states = Document.all().all()
     assert len(states) == 1, len(states)

Example #31

0

Show file

File: test_ingest.py Project: wayne9qiu/aleph

 def test_crawl_sample_directory(self):
     samples_path = self.get_fixture_path("samples")
     crawl_directory(self.collection, samples_path)
     assert Document.all().count() == 4, Document.all().count()