def _inner(body): metadata = sedar.collect_sedar_metadata(body['source_url']) if len(body['attributes']) > 0: return None attributes = generate_attributes(metadata) body['attributes'] = attributes body = fix_date(body) return body
def index_package(package, plain_text, normalized_text): es.json_encoder = JSONEncoder body = { 'id': package.id, 'collection': package.collection } source = package.source if source is None: log.error("No source for package %r, skipping", package) return body['name'] = source.meta.get('name') body['slug'] = source.meta.get('slug') body['title'] = source.meta.get('title') or body['name'] body['source_url'] = source.meta.get('source_url') body['created_at'] = source.meta.get('created_at') body['updated_at'] = source.meta.get('updated_at') body['filed_at'] = source.meta.get('filed_at') body['extension'] = source.meta.get('extension') body['mime_type'] = source.meta.get('mime_type') if plain_text.exists(): body['text'] = plain_text.fh().read() summary = source.meta.get('summary') or body.get('text') body['summary'] = html_summary(summary) if normalized_text.exists(): body['normalized'] = normalized_text.fh().read() if not body['title']: log.error("No title for package %r, skipping", package) return body['entities'] = EntityTag.by_package(package.collection, package.id) body['attributes'] = generate_attributes(source.meta) log.info("Indexing: %r", body['title']) es.index(es_index, DOC_TYPE, body, package.id)