Ejemplo n.º 1
0
    def before_index(self, pkg_dict):

        regions = pkg_dict.get('fao_m49_regions')
        if regions:
            if not isinstance(regions, list):
                regions = v._deserialize_from_array(regions)
            localized_regions = self.get_localized_regions(regions)
            pkg_dict.update(localized_regions)
        if pkg_dict.get('fao_datatype'):
            localized_datatype = self.get_localized_datatype(pkg_dict['fao_datatype'])
            pkg_dict.update(localized_datatype)
        if pkg_dict.get('fao_agrovoc'):

            localized_agrovoc = self.get_localized_agrovoc(pkg_dict['fao_agrovoc'])
            pkg_dict.update(localized_agrovoc)

        # optional trim values to 32k field size limit
        if TRIM_FOR_INDEX:
            for k, val in pkg_dict.iteritems():
                # skip known text fields
                if k in TRIM_SKIP_FOR_FIELDS:
                    continue
                for fname in TRIM_SKIP_FOR_FIELDS_WILDCHAR:
                    if k.startswith(fname):
                        continue
                if isinstance(val, basestring):
                    if len(val) > TRIM_LIMIT:
                        log.debug('triming %s to 32k: %s', k, val)
                    pkg_dict[k] = val[:TRIM_LIMIT] if val else val
                elif isinstance(val, (list, set, tuple,)):
                    if any([len(item) > TRIM_LIMIT for item in val if isinstance(item, basestring)]):
                        log.debug('triming %s to 32k: %s', k, val)
                    pkg_dict[k] = [item[:TRIM_LIMIT] if isinstance(item, basestring) else item for item in val]

        return pkg_dict
Ejemplo n.º 2
0
    def rename_term_in_extras(self, old_term, new_term):

        if not self.valid_term(old_term):
            raise ValueError(u"Old term {} is not valid".format(old_term))
        if not self.valid_term(new_term):
            raise ValueError(u"New term {} is not valid".format(new_term))

        if self.is_multivalued:
            q = Session.query(PackageExtra.package_id).join(Package, Package.id==PackageExtra.package_id)\
                                           .filter(PackageExtra.key==self.field_name,
                                                   PackageExtra.value.like('%{}%'.format(old_term)),
                                                   Package.type=='dataset',
                                                   Package.state=='active')

        else:
            q = Session.query(PackageExtra.package_id).join(Package, Package.id==PackageExtra.package_id)\
                                           .filter(PackageExtra.key==self.field_name,
                                                   PackageExtra.value==old_term,
                                                   Package.type=='dataset',
                                                   Package.state=='active')
        
        # import in function to avoid circular dependencies
        from ckanext.faociok.validators import _serialize_to_array, _deserialize_from_array

        ctx = {'ignore_auth': True,
               'user': _get_user()['name']}

        pshow = t.get_action('package_show')
        pupdate = t.get_action('package_update')
        counter = 0
        for pdata in q:
            pkg = pshow(ctx.copy(), {'name_or_id': pdata[0]})
            fdata = pkg.get(self.field_name)
            affected = False
            if self.is_multivalued:
                fdata = _deserialize_from_array(fdata)
                if old_term in fdata:
                    fdata.remove(old_term)
                    fdata.append(new_term)
                    fdata = _serialize_to_array(fdata)
                    affected = True
            else:
                fdata = new_term
                affected = True
            if affected:
                pkg[self.field_name] = fdata
                pkg.pop('metadata_modified', None)
                pupdate(ctx.copy(), pkg)
                counter += 1
        return counter
Ejemplo n.º 3
0
def get_field_data(data, field, lang=None):
    if field.get('element') == 'agrovoc':
        values = v._deserialize_from_array(data)
        out = []
        lang = lang or get_lang()
        for val in values:
            if not val:
                continue
            term = VocabularyTerm.get(Vocabulary.VOCABULARY_AGROVOC, val)
            if not term:
                out.append(u'{}|{}'.format(val, val))
                continue
            label = term.get_label(lang)
            if not label:
                label = term.get_label('en')
            out.append(u'{}|{}'.format(val, label.label or val))
        print('out', out)
        return out

    elif field.get('multiple'):
        return v._deserialize_from_array(data)
    else:
        return data
Ejemplo n.º 4
0
    def get_localized_agrovoc(self, terms):
        if not isinstance(terms, list):
            terms = v._deserialize_from_array(terms)
        out = {'fao_agrovoc': terms}
        for term in terms:
            term = VocabularyTerm.get(Vocabulary.VOCABULARY_AGROVOC, term)
            if not term:
                continue
            for label in term.labels:
                lname = 'fao_agrovoc_{}'.format(label.lang)
                try:
                    out[lname].add(label.label)
                except KeyError:
                    out[lname] = set([label.label])

        for k,val in out.items():
            if isinstance(val, set):
                out[k] = list(val)
        return out
Ejemplo n.º 5
0
def find_unused_terms(vocabulary_name, field_name):
    """
    Find unused terms for specific vocabulary. Assumption is terms
    are in package's extras under provided field_name, and are stored
    as an array ({item1,item2} notation).

    This will unpack all terms and verify if any of them doesn't have 
    accompanying VocabularyTerm.

    Return is a dictionary with keys:
     * datasets - list of datasets with vocabulary
       terms that are not used by any term
     * list of terms unused
    """
    from ckanext.faociok.validators import _deserialize_from_array, _serialize_to_array
    datasets = {}
    bad_terms = {}
    out = {'datasets': datasets,
           'terms': bad_terms}

    extras = Session.query(PackageExtra).filter(PackageExtra.state=='active',
                                                PackageExtra.key==field_name)\
                                        .join(Package, and_(Package.id==PackageExtra.package_id,
                                                            Package.type=='dataset',
                                                            Package.state=='active'))
    for ex in extras:
        keys = _deserialize_from_array(ex.value)
        new_keys = []
        for k in keys:
            term = VocabularyTerm.get(vocabulary_name, k)
            if not term:
                try:
                    datasets[ex.package_id].add(k)
                except KeyError:
                    datasets[ex.package_id] = set([k])
                try:
                    bad_terms[k].add(ex.package_id)
                except KeyError:
                    bad_terms[k] = set([ex.package_id])
    return out