Example #1
0
def rename(docs, target=None, **kwargs):
    assert target, "To run this migration you need a target."
    for doc in docs:
        raw = RawDocument({
            'doc': doc.doc,
            'docID': doc.docID,
            'source': target,
            'filetype': doc.filetype,
            'timestamps': doc.timestamps,
            'versions': doc.versions
        })

        assert doc.source != target, "Can't rename {} to {}, names are the same.".format(
            doc.source, target)

        if not kwargs.get('dry'):
            tasks.process_raw(raw)
            tasks.process_normalized(tasks.normalize(raw, raw['source']), raw)
            logger.info('Processed document from {} with id {}'.format(
                doc.source, raw['docID']))

            es.delete(index=settings.ELASTIC_INDEX,
                      doc_type=doc.source,
                      id=raw['docID'],
                      ignore=[404])
            es.delete(index='share_v1',
                      doc_type=doc.source,
                      id=raw['docID'],
                      ignore=[404])

        logger.info('Deleted document from {} with id {}'.format(
            doc.source, raw['docID']))
Example #2
0
def delete(doc, sources=None, **kwargs):
    assert sources, "To run this migration you need a source."
    doc.timeout(5).delete()
    es.delete(index=settings.ELASTIC_INDEX, doc_type=sources, id=doc.docID, ignore=[404])
    es.delete(index='share_v1', doc_type=sources, id=doc.docID, ignore=[404])

    logger.info('Deleted document from {} with id {}'.format(sources, doc.docID))
Example #3
0
def rename(source, target, dry=True):
    assert source != target, "Can't rename {} to {}, names are the same".format(source, target)
    count = 0
    exceptions = []

    for doc in documents(source):
        count += 1
        try:
            raw = RawDocument({
                'doc': doc.doc,
                'docID': doc.docID,
                'source': target,
                'filetype': doc.filetype,
                'timestamps': doc.timestamps,
                'versions': doc.versions
            })
            if not dry:
                process_raw(raw)
                process_normalized(normalize(raw, raw['source']), raw)
            logger.info('Processed document from {} with id {}'.format(source, raw['docID']))
        except Exception as e:
            logger.exception(e)
            exceptions.append(e)
        else:
            if not dry:
                es.delete(index=settings.ELASTIC_INDEX, doc_type=source, id=raw['docID'], ignore=[404])
                es.delete(index='share_v1', doc_type=source, id=raw['docID'], ignore=[404])
            logger.info('Deleted document from {} with id {}'.format(source, raw['docID']))
    if dry:
        logger.info('Dry run complete')

    for ex in exceptions:
        logger.exception(e)
    logger.info('{} documents processed, with {} exceptions'.format(count, len(exceptions)))
Example #4
0
def delete_by_source(source):
    count = 0
    exceptions = []
    for doc in documents(source):
        count += 1
        try:
            doc.delete()
            es.delete(index=settings.ELASTIC_INDEX, doc_type=source, id=doc.docID, ignore=[404])
            es.delete(index='share_v1', doc_type=source, id=doc.docID, ignore=[404])
        except Exception as e:
            logger.exception(e)
            exceptions.append(e)

    for ex in exceptions:
        logger.exception(e)
    logger.info('{} documents processed, with {} exceptions'.format(count, len(exceptions)))
Example #5
0
def rename(doc, target=None, **kwargs):
    assert target, "To run this migration you need a target."

    raw = RawDocument({
        'doc': doc.doc,
        'docID': doc.docID,
        'source': target,
        'filetype': doc.filetype,
        'timestamps': doc.timestamps,
        'versions': doc.versions
    })

    assert doc.source != target, "Can't rename {} to {}, names are the same.".format(doc.source, target)

    if not kwargs.get('dry'):
        tasks.process_raw(raw)
        tasks.process_normalized(tasks.normalize(raw, raw['source']), raw)
        logger.info('Processed document from {} with id {}'.format(doc.source, raw['docID']))

        es.delete(index=settings.ELASTIC_INDEX, doc_type=doc.source, id=raw['docID'], ignore=[404])
        es.delete(index='share_v1', doc_type=doc.source, id=raw['docID'], ignore=[404])

    logger.info('Deleted document from {} with id {}'.format(doc.source, raw['docID']))