Esempi in Python per DocumentModel, esempi in Python per scrapi.processing.cassandra.DocumentModel

Esempio n. 1

0

Mostra file

def test_migrate_v2():
    DocumentModelOld.create(**RAW.attributes).save()
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert len(queryset) == 0
    tasks.migrate_to_source_partition(dry=False)
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert len(queryset) == 1

Esempio n. 2

0

Mostra file

File: test_migrations.py Progetto: Johnetordoff/scrapi

def test_migrate_v2():
    DocumentModelOld.create(**RAW.attributes).save()
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert(len(queryset) == 0)
    tasks.migrate_to_source_partition(dry=False)
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert(len(queryset) == 1)

Esempio n. 3

0

Mostra file

File: test_migrations.py Progetto: Johnetordoff/scrapi

def test_rename():
    real_es = scrapi.processing.elasticsearch.es
    scrapi.processing.elasticsearch.es = mock.MagicMock()
    test_cass.process_raw(RAW)
    test_cass.process_normalized(RAW, NORMALIZED)

    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    old_source = NORMALIZED['shareProperties']['source']

    assert(queryset[0].source == utils.RECORD['shareProperties']['source'])
    assert(queryset[0].source == old_source)

    new_record = copy.deepcopy(utils.RECORD)

    new_record['shareProperties']['source'] = 'wwe_news'

    test_harvester.short_name = 'wwe_news'

    registry['wwe_news'] = test_harvester

    tasks.migrate(rename, sources=[old_source], target='wwe_news', dry=False)

    queryset = DocumentModel.objects(docID=RAW['docID'], source='wwe_news')
    assert(queryset[0].source == 'wwe_news')
    assert(len(queryset) == 1)
    scrapi.processing.elasticsearch.es = real_es

Esempio n. 4

0

Mostra file

File: test_migrations.py Progetto: NeuroVault/scrapi

def test_rename():
    real_es = scrapi.processing.elasticsearch.es
    scrapi.processing.elasticsearch.es = mock.MagicMock()
    test_cass.process_raw(RAW)
    test_cass.process_normalized(RAW, NORMALIZED)

    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    old_source = NORMALIZED['shareProperties']['source']

    assert queryset[0].source == utils.RECORD['shareProperties']['source']
    assert queryset[0].source == old_source

    new_record = copy.deepcopy(utils.RECORD)

    new_record['shareProperties']['source'] = 'wwe_news'

    test_harvester.short_name = 'wwe_news'

    registry['wwe_news'] = test_harvester

    tasks.migrate(rename, sources=[old_source], target='wwe_news', dry=False)

    queryset = DocumentModel.objects(docID=RAW['docID'], source='wwe_news')
    assert queryset[0].source == 'wwe_news'
    assert len(queryset) == 1
    scrapi.processing.elasticsearch.es = real_es

Esempio n. 5

0

Mostra file

File: test_migrations.py Progetto: NeuroVault/scrapi

def test_migrate_v2():
    try:
        RAW['doc'] = RAW['doc'].encode('utf-8')
    except AttributeError:
        RAW['doc'] = str(RAW['doc'])
    DocumentModelOld.create(**RAW.attributes).save()
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert len(queryset) == 0
    tasks.migrate_to_source_partition(dry=False)
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert len(queryset) == 1

Esempio n. 6

0

Mostra file

File: test_migrations.py Progetto: Johnetordoff/scrapi

def test_renormalize():
    real_es = scrapi.processing.elasticsearch.es
    scrapi.processing.elasticsearch.es = mock.MagicMock()
    test_cass.process_raw(RAW)
    test_cass.process_normalized(RAW, NORMALIZED)

    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert(len(queryset) == 1)

    tasks.migrate(renormalize, source=RAW['source'])
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert(len(queryset) == 1)
    scrapi.processing.elasticsearch.es = real_es

Esempio n. 7

0

Mostra file

File: test_migrations.py Progetto: NeuroVault/scrapi

def test_renormalize():
    real_es = scrapi.processing.elasticsearch.es
    scrapi.processing.elasticsearch.es = mock.MagicMock()
    test_cass.process_raw(RAW)
    test_cass.process_normalized(RAW, NORMALIZED)

    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert len(queryset) == 1

    tasks.migrate(renormalize, source=RAW['source'])
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert len(queryset) == 1
    scrapi.processing.elasticsearch.es = real_es

Esempio n. 8

0

Mostra file

def test_delete():
    real_es = scrapi.processing.elasticsearch.es
    scrapi.processing.elasticsearch.es = mock.MagicMock()
    test_cass.process_raw(RAW)
    test_cass.process_normalized(RAW, NORMALIZED)

    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert (len(queryset) == 1)

    tasks.migrate(delete, sources=[RAW['source']], dry=False)
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert (len(queryset) == 0)
    scrapi.processing.elasticsearch.es = real_es

Esempio n. 9

0

Mostra file

File: conftest.py Progetto: zamattiac/scrapi

def pytest_runtest_setup(item):
    TIMEOUT = 20

    marker = item.get_marker('cassandra')
    if marker is not None:
        from scrapi.processing.cassandra import DocumentModel
        if not database.setup():
            pytest.skip('No connection to Cassandra')

        start = time.time()
        while True:
            try:
                DocumentModel.all().limit(1).get()
                break
            except NoHostAvailable as e:
                now = time.time()
                if (now - start) > TIMEOUT:
                    raise e
                continue
            except Exception:
                break

    marker = item.get_marker('elasticsearch')
    if marker is not None:
        if not use_es:
            pytest.skip('No connection to Elasticsearch')
        con.indices.create(index='test', body={}, ignore=400)

        # This is done to let the test index finish being created before connecting to search
        start = time.time()
        while True:
            try:
                scrapi.processing.elasticsearch.ElasticsearchProcessor.manager.es.search(
                    index='test')
                break
            except TransportError as e:
                now = time.time()
                if (now - start) > TIMEOUT:
                    raise e
                continue

Esempio n. 10

0

Mostra file

File: conftest.py Progetto: AndrewSallans/scrapi

def pytest_runtest_setup(item):
    TIMEOUT = 20

    marker = item.get_marker('cassandra')
    if marker is not None:
        from scrapi.processing.cassandra import DocumentModel
        if not database.setup():
            pytest.skip('No connection to Cassandra')

        start = time.time()
        while True:
            try:
                DocumentModel.all().limit(1).get()
                break
            except NoHostAvailable as e:
                now = time.time()
                if (now - start) > TIMEOUT:
                    raise e
                continue
            except Exception:
                break


    marker = item.get_marker('elasticsearch')
    if marker is not None:
        if not use_es:
            pytest.skip('No connection to Elasticsearch')
        con.indices.create(index='test', body={}, ignore=400)

        # This is done to let the test index finish being created before connecting to search
        start = time.time()
        while True:
            try:
                scrapi.processing.elasticsearch.ElasticsearchProcessor.manager.es.search(index='test')
                break
            except TransportError as e:
                now = time.time()
                if (now - start) > TIMEOUT:
                    raise e
                continue

Esempio n. 11

0

Mostra file

File: test_cassandra_processor.py Progetto: kms6bn/scrapi

def test_versions():
    test_db.process_normalized(RAW, NORMALIZED)
    queryset = DocumentModel.objects(docID=RAW["docID"], source=RAW["source"])

    assert len(queryset) == 1

    old_title = NORMALIZED["title"]

    NORMALIZED["title"] = "some new title"
    test_db.process_normalized(RAW, NORMALIZED)
    doc = DocumentModel.objects(docID=RAW["docID"], source=RAW["source"])[0]
    assert doc.title == "some new title"
    assert len(doc.versions) == 1

    version = VersionModel.objects(key=doc.versions[-1])[0]

    assert version.title == old_title

    test_db.process_normalized(RAW, NORMALIZED)
    doc = DocumentModel.objects(docID=RAW["docID"], source=RAW["source"])[0]
    assert doc.title == "some new title"
    assert len(doc.versions) == 1

Esempio n. 12

0

Mostra file

File: test_cassandra_processor.py Progetto: zamattiac/scrapi

def test_versions():
    test_db.process_normalized(RAW, NORMALIZED)
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])

    assert (len(queryset) == 1)

    old_title = NORMALIZED['title']

    NORMALIZED['title'] = 'some new title'
    test_db.process_normalized(RAW, NORMALIZED)
    doc = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])[0]
    assert (doc.title == 'some new title')
    assert len(doc.versions) == 1

    version = VersionModel.objects(key=doc.versions[-1])[0]

    assert (version.title == old_title)

    test_db.process_normalized(RAW, NORMALIZED)
    doc = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])[0]
    assert (doc.title == 'some new title')
    assert len(doc.versions) == 1

Esempio n. 13

0

Mostra file

File: test_cassandra_processor.py Progetto: kms6bn/scrapi

def test_process_normalized():
    test_db.process_normalized(RAW, NORMALIZED)
    queryset = DocumentModel.objects(docID=RAW["docID"], source=RAW["source"])

    assert queryset[0].title == utils.RECORD["title"]

Esempio n. 14

0

Mostra file

File: test_cassandra_processor.py Progetto: kms6bn/scrapi

def test_process_raw():
    test_db.process_raw(RAW)
    queryset = DocumentModel.objects(docID="someID", source=RAW["source"])
    assert len(queryset) == 1

Esempio n. 15

0

Mostra file

File: test_cassandra_processor.py Progetto: zamattiac/scrapi

def test_process_normalized():
    test_db.process_normalized(RAW, NORMALIZED)
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])

    assert(queryset[0].title == utils.RECORD['title'])

Esempio n. 16

0

Mostra file

File: test_cassandra_processor.py Progetto: zamattiac/scrapi

def test_process_raw():
    test_db.process_raw(RAW)
    queryset = DocumentModel.objects(docID='someID', source=RAW['source'])
    assert(len(queryset) == 1)