Esempio n. 1
0
 def transient(self, data):
     data['$uri'] = url_for('entities_api.view', id=data.get('id'))
     data['$ui'] = entity_url(data.get('id'))
     if data.get('$bulk'):
         data['$writeable'] = False
     else:
         collection_id = data.get('collection_id')
         data['$writeable'] = request.authz.can_write(collection_id)
     return data
Esempio n. 2
0
 def entity_links(self, data, pk, schemata):
     return {
         'self': url_for('entities_api.view', id=pk),
         # 'similar': url_for('entities_api.similar', id=pk),
         # 'documents': url_for('entities_api.documents', id=pk),
         'references': url_for('entities_api.references', id=pk),
         'tags': url_for('entities_api.tags', id=pk),
         'ui': entity_url(pk)
     }
Esempio n. 3
0
def reconcile_index():
    domain = app_ui_url.strip('/')
    api_key = request.authz.role.api_key if request.authz.logged_in else None
    meta = {
        'name': settings.APP_TITLE,
        'identifierSpace': 'http://rdf.freebase.com/ns/type.object.id',
        'schemaSpace': 'http://rdf.freebase.com/ns/type.object.id',
        'view': {
            'url': entity_url('{{id}}')
        },
        'preview': {
            'url': entity_url('{{id}}') + '?api_key=%s' % api_key,
            'width': 800,
            'height': 400
        },
        'suggest': {
            'entity': {
                'service_url':
                domain,
                'service_path':
                url_for('reconcile_api.suggest_entity', api_key=api_key)
            },
            'type': {
                'service_url': domain,
                'service_path': url_for('reconcile_api.suggest_type')
            },
            'property': {
                'service_url': domain,
                'service_path': url_for('reconcile_api.suggest_property')
            }
        },
        'defaultTypes': [{
            'id': 'Entity',
            'name': 'Persons and Companies'
        }]
    }
    return jsonify(meta)
Esempio n. 4
0
def reconcile_op(query):
    """Reconcile operation for a single query."""
    parser = SearchQueryParser({
        'limit': query.get('limit', '5'),
        'strict': 'false'
    }, request.authz)

    name = query.get('query', '')
    schema = query.get('type') or 'Thing'
    entity = {
        'id': 'fake',
        'names': [name],
        'fingerprints': [fingerprints.generate(name)],
        'schemata': ensure_list(schema),
        'schema': schema
    }

    for p in query.get('properties', []):
        entity[p.get('pid')] = ensure_list(p.get('v'))

    query = SimilarEntitiesQuery(parser, entity=entity)
    matches = []
    for doc in query.search().get('hits').get('hits'):
        source = doc.get('_source')
        match = {
            'id': doc.get('_id'),
            'name': source.get('name'),
            'score': min(100, doc.get('_score') * 10),
            'uri': entity_url(doc.get('_id')),
            'match': source.get('name') == name
        }
        for type_ in get_freebase_types():
            if source['schema'] == type_['id']:
                match['type'] = [type_]
        matches.append(match)

    log.info("Reconciled: %r -> %d matches", name, len(matches))
    return {
        'result': matches,
        'num': len(matches)
    }
Esempio n. 5
0
def generate_matches_sheet(workbook,
                           sheet,
                           collection,
                           match_collection,
                           authz,
                           links=True,
                           one_sheet=False,
                           offset=0,
                           limit=1000):
    from aleph.views.serializers import MatchSchema

    if one_sheet:
        sheet_label = "All matches (top %s per collection)" % limit
    else:
        sheet_label = "%s (top %s)" % (match_collection.label, limit)

    sheet.set_zoom(125)
    parser = QueryParser({}, authz, limit=limit)
    q_match = Match.find_by_collection(collection.id, match_collection.id)
    matches = MatchQueryResult({}, q_match, parser=parser, schema=MatchSchema)

    if offset < 3:

        sheet.write(0, 0, '', workbook.header_format)
        sheet.write(1, 0, 'Score', workbook.header_format)
        sheet.merge_range(0, 1, 0, 4, collection.label, workbook.header_format)
        sheet.write(1, 1, 'Name', workbook.header_format)
        sheet.write(1, 2, 'Type', workbook.header_format)
        sheet.write(1, 3, 'Country', workbook.header_format)
        sheet.write(1, 4, 'Source URL', workbook.header_format)
        sheet.merge_range(0, 5, 0, 8, sheet_label, workbook.header_format)
        sheet.write(1, 5, 'Name', workbook.header_format)
        sheet.write(1, 6, 'Type', workbook.header_format)
        sheet.write(1, 7, 'Country', workbook.header_format)
        if one_sheet:
            sheet.write(1, 8, 'Collection', workbook.header_format)

        sheet.freeze_panes(2, 0)
        sheet.autofilter(1, 1, 2 + len(matches.results), 8)

    widths = {}
    for row, result in enumerate(matches.results, offset):
        sheet.write_number(row, 0, int(result.score))
        name = result.entity.get('name')
        widths[1] = max(widths.get(1, 0), len(name))
        if links:
            url = entity_url(result.entity_id)
            sheet.write_url(row, 1, url, workbook.link_format, name)
        else:
            sheet.write_string(row, 1, name)
        schema = model.get(result.entity['schema'])
        sheet.write_string(row, 2, schema.label)
        countries = ', '.join(sorted(result.entity.get('countries', [])))
        sheet.write_string(row, 3, countries.upper())
        ent_props = result.entity.get('properties', {})
        if (ent_props.get('sourceUrl') is not None):
            source_url = ', '.join(ent_props.get('sourceUrl'))
        else:
            source_url = ''
        sheet.write_string(row, 4, source_url)

        name = result.match.get('name')
        widths[5] = max(widths.get(5, 0), len(name))
        if links:
            url = entity_url(result.match_id)
            sheet.write_url(row, 5, url, workbook.link_format, name)
        else:
            sheet.write_string(row, 5, name)
        schema = model.get(result.match['schema'])
        sheet.write_string(row, 6, schema.label)
        countries = ', '.join(sorted(result.match.get('countries', [])))
        sheet.write_string(row, 7, countries.upper())
        if one_sheet:
            sheet.write_string(row, 8, match_collection.label)

    for idx, max_len in widths.items():
        max_len = min(70, max(7, max_len + 1))
        sheet.set_column(idx, idx, float(max_len))

    return sheet