def test_model_merge(self): merged = model.merge({'schema': 'Person'}, {'schema': 'Company'}) assert merged['schema'] == 'LegalEntity' merged = model.merge({}, {'id': 'banana'}) assert merged['id'] == 'banana'
def _index_updates(collection, entities): """Look up existing index documents and generate an updated form. This is necessary to make the index accumulative, i.e. if an entity or link gets indexed twice with different field values, it'll add up the different field values into a single record. This is to avoid overwriting the document and losing field values. An alternative solution would be to implement this in Groovy on the ES. """ common = { 'collection_id': collection.id, 'bulk': True, 'roles': collection.roles, 'updated_at': datetime.utcnow() } if not len(entities): return query = { 'query': { 'ids': { 'values': list(entities.keys()) } }, '_source': ['schema', 'properties', 'created_at'] } result = search_safe(index=entity_index(), body=query) for doc in result.get('hits').get('hits', []): entity_id = doc['_id'] entity = entities.get(entity_id) existing = doc.get('_source') combined = model.merge(existing, entity) combined['created_at'] = existing.get('created_at') entities[entity_id] = combined for doc_id, entity in entities.items(): entity.pop('id', None) entity.update(common) schema = model.get(entity.get('schema')) entity = finalize_index(entity, schema, []) # pprint(entity) yield { '_id': doc_id, '_index': entity_index(), '_type': 'doc', '_source': entity }
def _index_updates(collection, entities): """Look up existing index documents and generate an updated form. This is necessary to make the index accumulative, i.e. if an entity or link gets indexed twice with different field values, it'll add up the different field values into a single record. This is to avoid overwriting the document and losing field values. An alternative solution would be to implement this in Groovy on the ES. """ common = { 'collection_id': collection.id, '$bulk': True, 'roles': collection.roles, 'updated_at': datetime.utcnow() } if not len(entities): return result = es.mget(index=es_index, doc_type=TYPE_ENTITY, body={'ids': entities.keys()}, _source=['schema', 'properties', 'created_at']) for doc in result.get('docs', []): if not doc.get('found', False): continue entity_id = doc['_id'] entity = entities.get(entity_id) existing = doc.get('_source') combined = model.merge(existing, entity) combined['created_at'] = existing.get('created_at') entities[entity_id] = combined for doc_id, entity in entities.items(): entity.pop('id', None) entity.pop('data', None) entity.update(common) if 'created_at' not in entity: entity['created_at'] = entity.get('updated_at') schema = model.get(entity.get('schema')) entity = finalize_index(entity, schema) # pprint(entity) yield { '_id': doc_id, '_type': TYPE_ENTITY, '_index': str(es_index), '_source': entity }