def rename(docs, target=None, **kwargs): assert target, "To run this migration you need a target." for doc in docs: new_doc = copy.deepcopy(doc.raw.attributes) new_doc['source'] = target raw = RawDocument(new_doc, validate=False) assert doc.raw.attributes[ 'source'] != target, "Can't rename {} to {}, names are the same.".format( doc.raw['source'], target) if not kwargs.get('dry'): tasks.process_raw(raw) tasks.process_normalized(tasks.normalize(raw, raw['source']), raw) logger.info('Processed document from {} with id {}'.format( doc.raw.attributes['source'], raw['docID'])) es_processor = get_processor('elasticsearch') es_processor.manager.es.delete( index=settings.ELASTIC_INDEX, doc_type=doc.raw.attributes['source'], id=raw['docID'], ignore=[404]) es_processor.manager.es.delete( index='share_v1', doc_type=doc.raw.attributes['source'], id=raw['docID'], ignore=[404]) logger.info('Renamed document from {} to {} with id {}'.format( doc.raw.attributes['source'], target, raw['docID']))
def rename(docs, target=None, **kwargs): assert target, "To run this migration you need a target." for doc in docs: raw = RawDocument({ 'doc': doc.doc, 'docID': doc.docID, 'source': target, 'filetype': doc.filetype, 'timestamps': doc.timestamps, 'versions': doc.versions }) assert doc.source != target, "Can't rename {} to {}, names are the same.".format( doc.source, target) if not kwargs.get('dry'): tasks.process_raw(raw) tasks.process_normalized(tasks.normalize(raw, raw['source']), raw) logger.info('Processed document from {} with id {}'.format( doc.source, raw['docID'])) es.delete(index=settings.ELASTIC_INDEX, doc_type=doc.source, id=raw['docID'], ignore=[404]) es.delete(index='share_v1', doc_type=doc.source, id=raw['docID'], ignore=[404]) logger.info('Deleted document from {} with id {}'.format( doc.source, raw['docID']))
def rename(source, target, dry=True): assert source != target, "Can't rename {} to {}, names are the same".format(source, target) count = 0 exceptions = [] for doc in documents(source): count += 1 try: raw = RawDocument({ 'doc': doc.doc, 'docID': doc.docID, 'source': target, 'filetype': doc.filetype, 'timestamps': doc.timestamps, 'versions': doc.versions }) if not dry: process_raw(raw) process_normalized(normalize(raw, raw['source']), raw) logger.info('Processed document from {} with id {}'.format(source, raw['docID'])) except Exception as e: logger.exception(e) exceptions.append(e) else: if not dry: es.delete(index=settings.ELASTIC_INDEX, doc_type=source, id=raw['docID'], ignore=[404]) es.delete(index='share_v1', doc_type=source, id=raw['docID'], ignore=[404]) logger.info('Deleted document from {} with id {}'.format(source, raw['docID'])) if dry: logger.info('Dry run complete') for ex in exceptions: logger.exception(e) logger.info('{} documents processed, with {} exceptions'.format(count, len(exceptions)))
def test_process_raw_calls(raw_doc, monkeypatch): pmock = mock.Mock() monkeypatch.setattr('scrapi.tasks.processing.process_raw', pmock) tasks.process_raw(raw_doc) pmock.assert_called_once_with(raw_doc, {})
def test_process_raw_calls(raw_doc, monkeypatch): pmock = mock.Mock() monkeypatch.setattr("scrapi.tasks.processing.process_raw", pmock) tasks.process_raw(raw_doc) pmock.assert_called_once_with(raw_doc, {})
def rename(docs, target=None, **kwargs): assert target, "To run this migration you need a target." for doc in docs: new_doc = copy.deepcopy(doc.raw.attributes) new_doc['source'] = target raw = RawDocument(new_doc, validate=False) assert doc.raw.attributes['source'] != target, "Can't rename {} to {}, names are the same.".format(doc.raw['source'], target) if not kwargs.get('dry'): tasks.process_raw(raw) tasks.process_normalized(tasks.normalize(raw, raw['source']), raw) logger.info('Processed document from {} with id {}'.format(doc.raw.attributes['source'], raw['docID'])) es_processor = get_processor('elasticsearch') es_processor.manager.es.delete(index=settings.ELASTIC_INDEX, doc_type=doc.raw.attributes['source'], id=raw['docID'], ignore=[404]) es_processor.manager.es.delete(index='share_v1', doc_type=doc.raw.attributes['source'], id=raw['docID'], ignore=[404]) logger.info('Renamed document from {} to {} with id {}'.format(doc.raw.attributes['source'], target, raw['docID']))
def rename(doc, target=None, **kwargs): assert target, "To run this migration you need a target." raw = RawDocument({ 'doc': doc.doc, 'docID': doc.docID, 'source': target, 'filetype': doc.filetype, 'timestamps': doc.timestamps, 'versions': doc.versions }) assert doc.source != target, "Can't rename {} to {}, names are the same.".format(doc.source, target) if not kwargs.get('dry'): tasks.process_raw(raw) tasks.process_normalized(tasks.normalize(raw, raw['source']), raw) logger.info('Processed document from {} with id {}'.format(doc.source, raw['docID'])) es.delete(index=settings.ELASTIC_INDEX, doc_type=doc.source, id=raw['docID'], ignore=[404]) es.delete(index='share_v1', doc_type=doc.source, id=raw['docID'], ignore=[404]) logger.info('Deleted document from {} with id {}'.format(doc.source, raw['docID']))