Python process_normalizedの例

プログラミング言語: Python

名前空間/パッケージ名: scrapi.tasks

メソッド/関数: process_normalized

hotexamples.comのコード掲載数: 12

Python process_normalized - 12件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのscrapi.tasks.process_normalizedの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: migrations.py プロジェクト: felliott/scrapi

def rename(docs, target=None, **kwargs):
    assert target, "To run this migration you need a target."
    for doc in docs:
        raw = RawDocument({
            'doc': doc.doc,
            'docID': doc.docID,
            'source': target,
            'filetype': doc.filetype,
            'timestamps': doc.timestamps,
            'versions': doc.versions
        })

        assert doc.source != target, "Can't rename {} to {}, names are the same.".format(
            doc.source, target)

        if not kwargs.get('dry'):
            tasks.process_raw(raw)
            tasks.process_normalized(tasks.normalize(raw, raw['source']), raw)
            logger.info('Processed document from {} with id {}'.format(
                doc.source, raw['docID']))

            es.delete(index=settings.ELASTIC_INDEX,
                      doc_type=doc.source,
                      id=raw['docID'],
                      ignore=[404])
            es.delete(index='share_v1',
                      doc_type=doc.source,
                      id=raw['docID'],
                      ignore=[404])

        logger.info('Deleted document from {} with id {}'.format(
            doc.source, raw['docID']))

コード例 #2

ファイルを表示

def rename(docs, target=None, **kwargs):
    assert target, "To run this migration you need a target."

    for doc in docs:
        new_doc = copy.deepcopy(doc.raw.attributes)
        new_doc['source'] = target

        raw = RawDocument(new_doc, validate=False)

        assert doc.raw.attributes[
            'source'] != target, "Can't rename {} to {}, names are the same.".format(
                doc.raw['source'], target)

        if not kwargs.get('dry'):
            tasks.process_raw(raw)
            tasks.process_normalized(tasks.normalize(raw, raw['source']), raw)
            logger.info('Processed document from {} with id {}'.format(
                doc.raw.attributes['source'], raw['docID']))

            es_processor = get_processor('elasticsearch')
            es_processor.manager.es.delete(
                index=settings.ELASTIC_INDEX,
                doc_type=doc.raw.attributes['source'],
                id=raw['docID'],
                ignore=[404])
            es_processor.manager.es.delete(
                index='share_v1',
                doc_type=doc.raw.attributes['source'],
                id=raw['docID'],
                ignore=[404])

        logger.info('Renamed document from {} to {} with id {}'.format(
            doc.raw.attributes['source'], target, raw['docID']))

コード例 #3

ファイルを表示

ファイル: rename.py プロジェクト: PatrickEGorman/scrapi

def rename(source, target, dry=True):
    assert source != target, "Can't rename {} to {}, names are the same".format(source, target)
    count = 0
    exceptions = []

    for doc in documents(source):
        count += 1
        try:
            raw = RawDocument({
                'doc': doc.doc,
                'docID': doc.docID,
                'source': target,
                'filetype': doc.filetype,
                'timestamps': doc.timestamps,
                'versions': doc.versions
            })
            if not dry:
                process_raw(raw)
                process_normalized(normalize(raw, raw['source']), raw)
            logger.info('Processed document from {} with id {}'.format(source, raw['docID']))
        except Exception as e:
            logger.exception(e)
            exceptions.append(e)
        else:
            if not dry:
                es.delete(index=settings.ELASTIC_INDEX, doc_type=source, id=raw['docID'], ignore=[404])
                es.delete(index='share_v1', doc_type=source, id=raw['docID'], ignore=[404])
            logger.info('Deleted document from {} with id {}'.format(source, raw['docID']))
    if dry:
        logger.info('Dry run complete')

    for ex in exceptions:
        logger.exception(e)
    logger.info('{} documents processed, with {} exceptions'.format(count, len(exceptions)))

コード例 #4

ファイルを表示

ファイル: test_tasks.py プロジェクト: felliott/scrapi

def test_process_norm_calls(raw_doc, monkeypatch):
    pmock = mock.Mock()

    monkeypatch.setattr('scrapi.tasks.processing.process_normalized', pmock)

    tasks.process_normalized(raw_doc, raw_doc)

    pmock.assert_called_once_with(raw_doc, raw_doc, {})

コード例 #5

ファイルを表示

ファイル: test_tasks.py プロジェクト: bdyetton/scrapi

def test_process_norm_calls(raw_doc, monkeypatch):
    pmock = mock.Mock()

    monkeypatch.setattr("scrapi.tasks.processing.process_normalized", pmock)

    tasks.process_normalized(raw_doc, raw_doc)

    pmock.assert_called_once_with(raw_doc, raw_doc, {})

コード例 #6

ファイルを表示

def renormalize(doc, **kwargs):
    raw = RawDocument({
        'doc': doc.doc,
        'docID': doc.docID,
        'source': doc.source,
        'filetype': doc.filetype,
        'timestamps': doc.timestamps,
        'versions': doc.versions
    })
    if not kwargs.get('dry'):
        tasks.process_normalized(tasks.normalize(raw, raw['source']), raw)

コード例 #7

ファイルを表示

ファイル: migrations.py プロジェクト: AndrewSallans/scrapi

def rename(docs, target=None, **kwargs):
    assert target, "To run this migration you need a target."

    for doc in docs:
        new_doc = copy.deepcopy(doc.raw.attributes)
        new_doc['source'] = target

        raw = RawDocument(new_doc, validate=False)

        assert doc.raw.attributes['source'] != target, "Can't rename {} to {}, names are the same.".format(doc.raw['source'], target)

        if not kwargs.get('dry'):
            tasks.process_raw(raw)
            tasks.process_normalized(tasks.normalize(raw, raw['source']), raw)
            logger.info('Processed document from {} with id {}'.format(doc.raw.attributes['source'], raw['docID']))

            es_processor = get_processor('elasticsearch')
            es_processor.manager.es.delete(index=settings.ELASTIC_INDEX, doc_type=doc.raw.attributes['source'], id=raw['docID'], ignore=[404])
            es_processor.manager.es.delete(index='share_v1', doc_type=doc.raw.attributes['source'], id=raw['docID'], ignore=[404])

        logger.info('Renamed document from {} to {} with id {}'.format(doc.raw.attributes['source'], target, raw['docID']))

コード例 #8

ファイルを表示

ファイル: renormalize.py プロジェクト: PatrickEGorman/scrapi

def renormalize(sources=()):
    count = 0
    exceptions = []
    for doc in documents(*sources):
        count += 1
        try:
            raw = RawDocument({
                'doc': doc.doc,
                'docID': doc.docID,
                'source': doc.source,
                'filetype': doc.filetype,
                'timestamps': doc.timestamps,
                'versions': doc.versions
            })
            process_normalized(normalize(raw, raw['source']), raw)
        except Exception as e:
            logger.exception(e)
            exceptions.append(e)

    for ex in exceptions:
        logger.exception(e)
    logger.info('{} documents processed, with {} exceptions'.format(count, len(exceptions)))

コード例 #9

ファイルを表示

ファイル: migrations.py プロジェクト: Johnetordoff/scrapi

def rename(doc, target=None, **kwargs):
    assert target, "To run this migration you need a target."

    raw = RawDocument({
        'doc': doc.doc,
        'docID': doc.docID,
        'source': target,
        'filetype': doc.filetype,
        'timestamps': doc.timestamps,
        'versions': doc.versions
    })

    assert doc.source != target, "Can't rename {} to {}, names are the same.".format(doc.source, target)

    if not kwargs.get('dry'):
        tasks.process_raw(raw)
        tasks.process_normalized(tasks.normalize(raw, raw['source']), raw)
        logger.info('Processed document from {} with id {}'.format(doc.source, raw['docID']))

        es.delete(index=settings.ELASTIC_INDEX, doc_type=doc.source, id=raw['docID'], ignore=[404])
        es.delete(index='share_v1', doc_type=doc.source, id=raw['docID'], ignore=[404])

    logger.info('Deleted document from {} with id {}'.format(doc.source, raw['docID']))

コード例 #10

ファイルを表示

ファイル: renormalize.py プロジェクト: Eleonore9/scrapi

def main():
    for raw in document_generator():
        try:
            process_normalized(normalize(raw, raw['source']), raw)
        except Exception as e:
            logger.exception(e)

コード例 #11

ファイルを表示

def renormalize(docs, *args, **kwargs):
    for doc in docs:
        if not kwargs.get('dry'):
            tasks.process_normalized(
                tasks.normalize(doc.raw, doc.raw['source']), doc.raw)

コード例 #12

ファイルを表示

ファイル: migrations.py プロジェクト: AndrewSallans/scrapi

def renormalize(docs, *args, **kwargs):
    for doc in docs:
        if not kwargs.get('dry'):
            tasks.process_normalized(tasks.normalize(doc.raw, doc.raw['source']), doc.raw)