Esempio n. 1
0
class SubthemeTestCase(unittest.TestCase):
    MAPPING_FILE = 'eurovoc_mapping.rdf'
    EUROVOC_FILE = 'eurovoc.rdf'

    def setUp(self):
        self._load_mapping()

    def _load_mapping(self):
        self.map_f = get_path(self.MAPPING_FILE)
        self.voc_f = get_path(self.EUROVOC_FILE)

    def test_subthemes(self):
        clear_subthemes()
        g = Graph()
        g.parse(self.map_f)

        refs = list(g.objects(None, SKOS.narrowMatch))
        self.assertTrue(len(refs)> 0)

        load_subthemes(self.map_f, self.voc_f)
        all_subthemes = Subtheme.q()
        self.assertTrue(all_subthemes.count()> 0)
        for ref in refs:
            try:
                subtheme = Subtheme.q().filter_by(uri=str(ref)).one()
                self.assertIsNotNone(subtheme)
            except Exception, err:
                self.assertIsNone(err, "No results for {}: {}".format(ref, err))
        themes = g.subjects(RDF.type, SKOS.Concept)
        for theme in themes:
            theme_len = g.objects(theme, SKOS.narrowMatch)
            theme_name = Subtheme.normalize_theme(theme)
            q = Subtheme.for_theme(theme_name)
            self.assertTrue(q.count() >= len(list(theme_len)))
Esempio n. 2
0
def load_themes():
    vocab_file_path = _get_path('data-theme-skos.rdf', 'vocabularies')

    class Opts(object):
        def __init__(self, filename, name, format):
            self.filename = filename
            self.url = None  #filename
            self.name = name
            self.format = format

    themes_loader.options = Opts(vocab_file_path, 'eu_themes', None)
    themes_loader.load()

    tag_localized = interfaces.get_localized_tag_name('ECON')
    Session.flush()
    assert tag_localized
    q = Session.query(Vocabulary).filter_by(name='eu_themes')
    vocab = q.first()
    assert vocab

    map_f = _get_path(MAPPING_FILE)
    voc_f = _get_path(EUROVOC_FILE)
    clear_subthemes()
    load_subthemes(map_f, voc_f)
    assert Subtheme.q().first()
def get_dcatapit_subthemes(lang):
    """
    Dump subthemes tree with localized lables for all themes
    """
    out = {}

    def _get_name(opt_val, depth):
        return '{} {}'.format('-' * depth, opt_val)

    for theme in Subtheme.get_theme_names():
        out[theme] = theme_l = []
        for opt, label in Subtheme.for_theme(theme, lang):
            theme_l.append({
                'name': _get_name(label, opt.depth),
                'value': opt.uri
            })
    return out
Esempio n. 4
0
    def test_subthemes(self):
        clear_subthemes()
        g = Graph()
        g.parse(self.map_f)

        refs = list(g.objects(None, SKOS.narrowMatch))
        self.assertTrue(len(refs)> 0)

        load_subthemes(self.map_f, self.voc_f)
        all_subthemes = Subtheme.q()
        self.assertTrue(all_subthemes.count()> 0)
        for ref in refs:
            try:
                subtheme = Subtheme.q().filter_by(uri=str(ref)).one()
                self.assertIsNotNone(subtheme)
            except Exception, err:
                self.assertIsNone(err, "No results for {}: {}".format(ref, err))
    def test_subthemes(self):
        clear_subthemes()
        g = Graph()
        g.parse(self.map_f)

        refs = list(g.objects(None, SKOS.narrowMatch))
        self.assertTrue(len(refs) > 0)

        load_subthemes(self.map_f, self.voc_f)
        all_subthemes = Subtheme.q()
        self.assertGreater(all_subthemes.count(), 0)
        for ref in refs:
            try:
                subtheme = Subtheme.q().filter_by(uri=str(ref)).one()
                self.assertIsNotNone(subtheme)
            except Exception as err:
                self.fail(f'No results for {ref}: {err}')
        themes = g.subjects(RDF.type, SKOS.Concept)
        for theme in themes:
            theme_len = g.objects(theme, SKOS.narrowMatch)
            theme_name = Subtheme.normalize_theme(theme)
            q = Subtheme.for_theme(theme_name)
            self.assertGreaterEqual(q.count(), len(list(theme_len)))
def dcatapit_string_to_localized_aggregated_themes(value, lang):
    """
    Load json with subthemes and get localized subtheme names. Used in template
    """
    data = dcatapit_string_to_aggregated_themes(value)
    out = []

    for item in data:
        localized_theme = interfaces.get_localized_tag_name(item['theme'],
                                                            lang=lang)
        outitem = {'theme': localized_theme, 'subthemes': []}
        from_model = Subtheme.for_theme(item['theme'], lang)
        for st, label in from_model:
            if st.uri in item['subthemes']:
                outitem['subthemes'].append(label)
        out.append(outitem)
    return out
Esempio n. 7
0
def load_themes():
    filename = get_test_file(SKOS_THEME_FILE)
    g = load_graph(path=filename)
    do_load(g, 'eu_themes')

    tag_localized = interfaces.get_localized_tag_name('ECON')
    Session.flush()
    assert tag_localized
    q = Session.query(Vocabulary).filter_by(name='eu_themes')
    vocab = q.first()
    assert vocab

    map_f = get_voc_file(MAPPING_FILE)
    voc_f = get_test_file(EUROVOC_FILE)
    clear_subthemes()
    load_subthemes(map_f, voc_f)
    assert Subtheme.q().first()
Esempio n. 8
0
    def _parse_themes(self, dataset, ref):
        self._remove_from_extra(dataset, 'theme')
        themes = list(self.g.objects(ref, DCAT.theme))
        subthemes = list(self.g.objects(ref, DCT.subject))
        out = []
        for t in themes:
            theme_name = str(t).split('/')[-1]
            try:
                subthemes_for_theme = Subtheme.for_theme_values(theme_name)
            except ValueError, err:
                subthemes_for_theme = []

            row = {'theme': theme_name, 'subthemes': []}
            for subtheme in subthemes:
                s = str(subtheme)
                if s in subthemes_for_theme:
                    row['subthemes'].append(s)
            out.append(row)
Esempio n. 9
0
    def _add_subthemes(self, ref, subthemes):
        """
        subthemes is a list of eurovoc hrefs.

        """
        for subtheme in subthemes:
            sref = URIRef(subtheme)
            sthm = Subtheme.get(subtheme)
            if not sthm:
                print("No subtheme for {}".format(subtheme))
                continue

            labels = sthm.get_names_dict()
            self.g.add((sref, RDF.type, SKOS.Concept))
            for lang, label in labels.items():
                if lang in OFFERED_LANGS:
                    self.g.add((sref, SKOS.prefLabel, Literal(label,
                                                              lang=lang)))
            self.g.add((ref, DCT.subject, sref))
def dcatapit_subthemes(key, flattened_data, errors, context):
    """
    Validate aggregate_theme; expected format is
    [
      {
        'theme': THEME_CODE,
        'subthemes': ['subtheme uri', 'subtheme uri']
      }, ...
    ]

    If the aggregate theme does not exist, try and parse the extra theme value.
    """
    def _get_flattened_theme():
        for tkey in flattened_data:
            if len(tkey) == 3:
                x, idx, k = tkey
                if x == 'extras' and k == 'key' and flattened_data[
                        tkey] == 'theme':
                    return flattened_data[('extras', idx, 'value')]

        # Not found in expected fields, Look into the discarded fields
        __extras = flattened_data.get(('__extras', ), None)
        if __extras and 'theme' in __extras:
            return __extras['theme']

        return None

    def _do_return(value):
        flattened_data[key] = value

    value = flattened_data.get(key)

    if not value or value == '[]':  # a little shortcut here
        theme = _get_flattened_theme()
        if theme and theme != '[]':  # other shortcut
            log.warning(
                'Aggregate theme is missing, trying setting values from extra theme key'
            )
            theme_list = themes_parse_to_uris(theme)
            _do_return(themes_to_aggr_json(theme_list))
        else:
            log.warning('Aggregate theme is missing, setting undefined value')
            _do_return(themes_to_aggr_json(['OP_DATPRO']))
        return
        # raise Invalid(_('Theme data should not be empty'))

    try:
        aggr_list = json.loads(value)
    except (TypeError, ValueError):
        # handle old '{THEME1,THEME2}' notation
        if isinstance(value, str):
            _v = value.rstrip('}').lstrip('{').split(',')
            aggr_list = [{'theme': v, 'subthemes': []} for v in _v]
        elif isinstance(value, (
                list,
                tuple,
        )):
            aggr_list = [{'theme': v, 'subthemes': []} for v in value]
        else:
            raise Invalid(
                _('Theme data is not valid, expected json, got {}'.format(
                    type(value))))
    if not isinstance(aggr_list, list):
        raise Invalid(
            _('Theme data should be a list, got {}'.format(type(aggr_list))))

    allowed_keys = {'theme': str, 'subthemes': list}

    allowed_keys_set = set(allowed_keys.keys())
    check_with_db = context.get(
        'dcatapit_subthemes_check_in_db') if context else True

    if not aggr_list:
        raise Invalid(_('Theme data should not be empty'))

    for aggr in aggr_list:
        if not isinstance(aggr, dict):
            raise Invalid(
                _('Invalid theme aggr item, should be a dict, got {}'.format(
                    type(aggr))))
        keys_set = set(aggr.keys())
        if keys_set - allowed_keys_set:
            raise Invalid(
                _('Theme aggr contains invalid keys: {}'.format(
                    keys_set - allowed_keys_set)))
        if not aggr.get('theme'):
            raise Invalid(_('Theme data should not be empty'))

        for k, v in aggr.items():
            allowed_type = allowed_keys[k]
            if (k == 'theme' and not isinstance(v, str)) or \
                    (k == 'subthemes' and not isinstance(v, list)):
                raise Invalid(
                    _('Theme item {} value: {} should be {}, got {}'.format(
                        k, v, allowed_type, type(v))))
            if k == 'subthemes':
                for subtheme in v:
                    if not isinstance(subtheme, str):
                        raise Invalid(
                            _('Subtheme {} value should be string'.format(
                                subtheme)))
        if not check_with_db:
            continue
        theme_name = aggr['theme']
        subthemes = aggr.get('subthemes') or []
        try:
            slist = [s.uri for s in Subtheme.for_theme(theme_name)]
        except ValueError:
            raise Invalid(_('Invalid theme {}'.format(theme_name)))

        for s in subthemes:
            if s not in slist:
                raise Invalid(_('Invalid subtheme: {}'.format(s)))

    reduced_themes = set([s.get('theme') for s in aggr_list if s.get('theme')])
    if len(aggr_list) != len(reduced_themes):
        raise Invalid(
            _('There are duplicate themes. Expected {} items, got {}'.format(
                len(aggr_list), len(reduced_themes))))

    _do_return(json.dumps(aggr_list))
Esempio n. 11
0
def dcatapit_subthemes(value, context):
    """
    Expects [{'theme': THEME_CODE,
              'subthemes': ['subtheme uri', 'subtheme uri']},
             ..
             ]
    """
    if not value:
        raise Invalid(_("Theme data should not be empty"))
    try:
        data = json.loads(value)
    except (
            TypeError,
            ValueError,
    ):
        # handle old '{THEME1,THEME2}' notation
        if isinstance(value, (
                str,
                unicode,
        )):
            _v = value.rstrip('}').lstrip('{').split(',')
            data = [{'theme': v, 'subthemes': []} for v in _v]
        elif isinstance(value, (
                list,
                tuple,
        )):
            data = [{'theme': v, 'subthemes': []} for v in value]
        else:
            raise Invalid(
                _("Theme data is not valid, expected json, got {}".format(
                    type(value))))
    if not isinstance(data, list):
        raise Invalid(
            _("Theme data should be a list, got {}".format(type(data))))

    allowed_keys = {
        'theme': (
            str,
            unicode,
        ),
        'subthemes': list
    }

    allowed_keys_set = set(allowed_keys.keys())
    check_with_db = context.get(
        'dcatapit_subthemes_check_in_db') if context else True

    for item in data:
        if not isinstance(item, dict):
            raise Invalid(
                _("Invalid theme item, should be a dict, got {}".format(
                    type(item))))
        keys_set = set(item.keys())
        if keys_set - allowed_keys_set:
            raise Invalid(
                _("Theme item contains invalid keys: {}".format(
                    keys_set - allowed_keys_set)))

        for k, v in item.items():
            allowed_type = allowed_keys[k]
            if not isinstance(v, allowed_type):
                raise Invalid(
                    _("Theme item {} value: {} should be {}, got {}".format(
                        k, v, allowed_type, type(v))))
            if k == 'subthemes':
                for subtheme in v:
                    if not isinstance(subtheme, (
                            str,
                            unicode,
                    )):
                        raise Invalid(
                            _("Subtheme {} value should be string".format(
                                subtheme)))
        if not check_with_db:
            continue
        theme_name = item['theme']
        subthemes = item.get('subthemes') or []
        try:
            slist = [s.uri for s in Subtheme.for_theme(theme_name)]
        except ValueError:
            raise Invalid(_("Invalid theme {}".format(theme_name)))

        for s in subthemes:
            if s not in slist:
                raise Invalid(_("Invalid subtheme: {}".format(s)))

    reduced_themes = set([s['theme'] for s in data])
    if len(data) != len(reduced_themes):
        raise Invalid(
            _("There are duplicate themes. Expected {} items, got {}".format(
                len(data), len(reduced_themes))))

    return json.dumps(data)