Ejemplos de ensure_list en Python, ejemplos de aleph.util.ensure_list en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: reconcile_api.py Proyecto: wilbrodn/aleph

def reconcile_op(query):
    """Reconcile operation for a single query."""
    state = QueryState({
        'limit': query.get('limit', '5'),
        'strict': 'false'
    }, request.authz)

    name = query.get('query', '')
    entity = {
        'id': 'fake',
        'names': [name],
        'fingerprints': [make_fingerprint(name)],
        'schemata': ensure_list(query.get('type'))
    }

    for p in query.get('properties', []):
        entity[p.get('pid')] = ensure_list(p.get('v'))

    suggested = similar_entities(entity, state)
    matches = []
    for ent in suggested.get('results'):
        types = [t for t in get_freebase_types() if ent['schema'] == t['id']]
        matches.append({
            'id': ent.get('id'),
            'name': ent.get('name'),
            'type': types,
            'score': min(100, ent.get('score') * 10),
            'uri': entity_link(ent.get('id')),
            'match': ent.get('name') == name
        })
    log.info("Reconciled: %r -> %d matches", name, len(matches))
    return {
        'result': matches,
        'num': len(matches)
    }

Ejemplo n.º 2

0

Mostrar archivo

Archivo: authz.py Proyecto: wcyn/aleph

 def check_roles(self, roles):
     # if self.in_maintenance:
     #     return False
     if self.is_admin:
         return True
     isect = self.roles.intersection(ensure_list(roles))
     return len(isect) > 0

Ejemplo n.º 3

0

Mostrar archivo

Archivo: entities.py Proyecto: tpreusse/aleph

def index_entity(entity):
    """Index an entity."""
    if entity.deleted_at is not None:
        return delete_entity(entity.id)

    data = {
        'foreign_ids': entity.foreign_ids,
        'data': entity.data,
        'created_at': entity.created_at,
        'updated_at': entity.updated_at,
        '$bulk': False,
        'roles': entity.collection.roles,
        'collection_id': entity.collection_id,
        'properties': {
            'name': [entity.name]
        }
    }

    for k, v in entity.data.items():
        data['properties'][k] = ensure_list(v)

    # data['$documents'] = get_count(entity)
    data = finalize_index(data, entity.schema)
    es.index(index=es_index, doc_type=TYPE_ENTITY, id=entity.id, body=data)
    data['id'] = entity.id
    data['$type'] = TYPE_ENTITY
    return data

Ejemplo n.º 4

0

Mostrar archivo

Archivo: entities.py Proyecto: tpreusse/aleph

def finalize_index(data, schema):
    """Apply final denormalisations to the index."""
    properties = data.get('properties', {})

    texts = []
    for prop in schema.properties:
        if prop.name not in properties:
            continue
        if prop.type_name in ['date', 'url', 'uri', 'country']:
            continue
        texts.extend(ensure_list(properties[prop.name]))

    data['text'] = index_form(texts)
    data = schema.invert(data)
    index_names(data)
    data['schema'] = schema.name
    # Get implied schemata (i.e. parents of the actual schema)
    data['schemata'] = schema.names

    # Second name field for non-tokenised sorting.
    if 'name' in data:
        data['name_sort'] = data.get('name')

    # pprint(data)
    return data

Ejemplo n.º 5

0

Mostrar archivo

 def to_index(self):
     entity = self.to_dict()
     entity['properties'] = {'name': [self.name]}
     for k, v in self.data.items():
         v = ensure_list(v)
         if len(v):
             entity['properties'][k] = v
     return entity

Ejemplo n.º 6

0

Mostrar archivo

 def dump(self, data, many=False):
     results = []
     for res in ensure_list(data):
         schema = self.SCHEMATA[res['$doc_type']]
         res = schema().dump(res)
         if not many:
             return res
         results.append(res.data)
     return results, []

Ejemplo n.º 7

0

Mostrar archivo

Archivo: util.py Proyecto: maquchizi/aleph

def finalize_index(data, schema):
    """Apply final denormalisations to the index."""
    properties = data.get('properties', {})

    texts = []
    for vs in properties.values():
        for v in ensure_list(vs):
            texts.append(v)

    data['text'] = index_form(texts)
    data['fingerprints'] = data.get('fingerprints', [])

    # Generate inverted representations of the data stored in properties.
    for prop in schema.properties:
        values = properties.get(prop.name, [])
        if not len(values):
            continue

        # Find an set the name property
        if prop.is_label:
            data['name'] = values[0]

        # Generate key material
        # TODO: this should probably be record-based.
        data['fingerprints'].extend(prop.type.fingerprint(values))

        # Add inverted properties. This takes all the properties
        # of a specific type (names, dates, emails etc.)
        invert = prop.type.index_invert
        if invert:
            if invert not in data:
                data[invert] = []
            for norm in prop.type.normalize(values):
                if norm not in data[invert]:
                    data[invert].append(norm)

    data['fingerprints'] = list(set(data['fingerprints']))

    # Add latinised names
    names = data.get('names', [])
    for name in list(names):
        names.append(ascii_text(name))
    data['names'] = list(set(names))

    # Get implied schemata (i.e. parents of the actual schema)
    data['schema'] = schema.name
    data['schemata'] = []
    for parent in schema.schemata:
        if not parent.hidden:
            data['schemata'].append(parent.name)

    # Second name field for non-tokenised sorting.
    if 'name' in data:
        data['name_sort'] = data.get('name')
    return data

Ejemplo n.º 8

0

Mostrar archivo

Archivo: serializers.py Proyecto: GelLiNN/aleph

 def dump(self, data, many=False):
     results = []
     for res in ensure_list(data):
         if res.get('schema') == Document.SCHEMA:
             res = DocumentSchema().dump(res)
         else:
             res = EntitySchema().dump(res)
         if not many:
             return res
         results.append(res.data)
     return results, []

Ejemplo n.º 9

0

Mostrar archivo

def reconcile_op(query):
    """Reconcile operation for a single query."""
    parser = SearchQueryParser({
        'limit': query.get('limit', '5'),
        'strict': 'false'
    }, request.authz)

    name = query.get('query', '')
    schema = query.get('type') or 'Thing'
    entity = {
        'id': 'fake',
        'names': [name],
        'fingerprints': [fingerprints.generate(name)],
        'schemata': ensure_list(schema),
        'schema': schema
    }

    for p in query.get('properties', []):
        entity[p.get('pid')] = ensure_list(p.get('v'))

    query = SimilarEntitiesQuery(parser, entity=entity)
    matches = []
    for doc in query.search().get('hits').get('hits'):
        source = doc.get('_source')
        match = {
            'id': doc.get('_id'),
            'name': source.get('name'),
            'score': min(100, doc.get('_score') * 10),
            'uri': entity_url(doc.get('_id')),
            'match': source.get('name') == name
        }
        for type_ in get_freebase_types():
            if source['schema'] == type_['id']:
                match['type'] = [type_]
        matches.append(match)

    log.info("Reconciled: %r -> %d matches", name, len(matches))
    return {
        'result': matches,
        'num': len(matches)
    }

Ejemplo n.º 10

0

Mostrar archivo

Archivo: records.py Proyecto: InternationalAccountabilityProject/aleph

def scan_entity_mentions(entity):
    """Find mentions of a given entity in all records."""
    shoulds = []
    for term in entity.regex_terms:
        shoulds.append(text_query_string(term))

    query = {
        'query': {
            'bool': {
                'should': shoulds,
                'minimum_should_match': 1
            }
        },
        'sort': [{'document_id': 'desc'}],
        '_source': ['document_id', 'text']
    }
    for res in scan(es, query=query, index=es_index, doc_type=[TYPE_RECORD]):
        for text in ensure_list(res.get('_source').get('text')):
            yield (res.get('_source').get('document_id'), text)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: tesseract.py Proyecto: maquchizi/aleph

def get_languages_iso3(codes):
    """Turn (pre-set) ISO2 language codes into ISO3 codes."""
    supported = []
    for lang in ensure_list(codes):
        if lang is None or len(lang.strip()) not in [2, 3]:
            continue
        lang = lang.lower().strip()
        if len(lang) == 2:
            try:
                c = languages.get(alpha_2=lang)
                lang = c.alpha_3
            except KeyError as ke:
                log.exception(ke)
                continue
        supported.append(lang)

    # if not len(supported):
    supported.append('eng')
    return '+'.join(sorted(set(supported)))

Ejemplo n.º 12

0

Mostrar archivo

def finalize_index(data, schema):
    """Apply final denormalisations to the index."""
    properties = data.get('properties', {})

    texts = []
    for vs in properties.values():
        for v in ensure_list(vs):
            texts.append(v)

    data['text'] = index_form(texts)

    # Generate inverted representations of the data stored in properties.
    for prop in schema.properties:
        values = properties.get(prop.name, [])
        if not len(values):
            continue

        # Find an set the name property
        if prop.is_label:
            data['name'] = values[0]

        # Add inverted properties. This takes all the properties
        # of a specific type (names, dates, emails etc.)
        invert = prop.type.index_invert
        if invert:
            if invert not in data:
                data[invert] = []
            for norm in prop.type.normalize(values):
                if norm not in data[invert]:
                    data[invert].append(norm)

    index_names(data)

    # Get implied schemata (i.e. parents of the actual schema)
    data['schema'] = schema.name
    data['schemata'] = [p.name for p in schema.schemata if not p.hidden]

    # Second name field for non-tokenised sorting.
    if 'name' in data:
        data['name_sort'] = data.get('name')

    # pprint(data)
    return data

Ejemplo n.º 13

0

Mostrar archivo

Archivo: __init__.py Proyecto: wilbrodn/aleph

    def validate(self, data):
        """Validate that the data should be stored.

        Since the types system doesn't really have validation, this currently
        tries to normalize the value to see if it passes strict parsing.
        """
        value, error = [], None
        for val in ensure_list(data):
            val = string_value(val)
            if val is None:
                continue
            val = val.strip()
            if self.type.normalize_value(val) is None:
                error = "Invalid value"
            value.append(val)
        if not self.is_multiple:
            value = value[0] if len(value) else None
        else:
            value = list(set(value))
        if self.is_label and (value is None or not len(value)):
            error = "Field is required."
        return value, error

Ejemplo n.º 14

0

Mostrar archivo

 def terms(self):
     terms = set([self.name])
     for alias in ensure_list(self.data.get('alias')):
         if alias is not None and len(alias):
             terms.add(alias)
     return terms

Ejemplo n.º 15

0

Mostrar archivo

 def check_roles(self, roles):
     if self.is_admin:
         return True
     isect = self.roles.intersection(ensure_list(roles))
     return len(isect) > 0

Ejemplo n.º 16

0

Mostrar archivo

Archivo: types.py Proyecto: wethepeopleonline/aleph

 def normalize(self, values):
     results = set()
     for value in values:
         results.update(ensure_list(self.normalize_value(value)))
     return results