def test_add_get_remove(self):
     """bibknowledge - test creating a kb, adding a mapping, removing it, removing kb"""
     new_kb_id = add_kb()
     new_name = get_kb_name(new_kb_id)
     add_kb_mapping(new_name, "foobar", "barfoo")
     fbexists = kb_mapping_exists(new_name, "foobar")
     self.assertEqual(True, fbexists)
     remove_kb_mapping(new_name, "foobar")
     fbexists = kb_mapping_exists(new_name, "foobar")
     self.assertEqual(False, fbexists)
     delete_kb(new_name)
     still_there = kb_exists(new_name)
     self.assertEqual(False, still_there)
 def test_add_get_remove(self):
     """bibknowledge - test creating a kb, adding a mapping, removing it, removing kb"""
     new_kb_id = add_kb()
     new_name = get_kb_name(new_kb_id)
     add_kb_mapping(new_name, "foobar", "barfoo")
     fbexists = kb_mapping_exists(new_name, "foobar")
     self.assertEqual(True, fbexists)
     remove_kb_mapping(new_name, "foobar")
     fbexists = kb_mapping_exists(new_name, "foobar")
     self.assertEqual(False, fbexists)
     delete_kb(new_name)
     still_there = kb_exists(new_name)
     self.assertEqual(False, still_there)
Example #3
0
    def tokenize_for_words(self, recID):
        """Get the words componing the country names and country codes of the
           institutions affiliated with the authors of the publication
        """

        # Splits phrase tokens
        phrase_tokens = self.tokenize_for_phrases(recID)
        tokens = []
        for phrase in phrase_tokens:
            # If phrase is a country code be careful, because codes like IT
            # may be removed by remove_stopwords()
            if kb_mapping_exists(self.kb_country_codes, phrase):
                tokens.append(phrase)
            else:
                word_list = self.re_words.findall(phrase)
                for word in word_list:
                    word = remove_stopwords(word, self.remove_stopwords)
                    if word:
                        tokens.append(word)

        # Remove duplicates
        tokens = list(set(tokens))

        return tokens
    def tokenize_for_words(self, recID):
        """Get the words componing the country names and country codes of the
           institutions affiliated with the authors of the publication
        """

        # Splits phrase tokens
        phrase_tokens = self.tokenize_for_phrases(recID)
        tokens = []
        for phrase in phrase_tokens:
            # If phrase is a country code be careful, because codes like IT
            # may be removed by remove_stopwords()
            if kb_mapping_exists(self.kb_country_codes, phrase):
                tokens.append(phrase)
            else:
                word_list = self.re_words.findall(phrase)
                for word in word_list:
                    word = remove_stopwords(word, self.remove_stopwords)
                    if word:
                        tokens.append(word)

        # Remove duplicates
        tokens = list(set(tokens))

        return tokens
Example #5
0
def kb_add_mapping(req, kb, mapFrom, mapTo, sortby="to", ln=CFG_SITE_LANG,
                   forcetype=None, replacements=None, kb_type=None):
    """
    Adds a new mapping to a kb.

    @param ln language
    @param kb the kb id to show
    @param sortby the sorting criteria ('from' or 'to')
    @param forcetype indicates if this function should ask about replacing left/right sides (None or 'no')
                     replace in current kb ('curr') or in all ('all')
    @param replacements an object containing kbname+++left+++right strings.
                     Can be a string or an array of strings
    @param kb_type None for normal from-to kb's, 't' for taxonomies
    """
    ln = wash_language(ln)
    _ = gettext_set_language(ln)

    navtrail_previous_links = ''' &gt; <a class="navtrail" href="%s/kb?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln, _("Manage Knowledge Bases"))

    try:
        dummy = getUid(req)
    except:
        return error_page('Error', req)

    (auth_code, auth_msg) = check_user(req, 'cfgbibknowledge')
    if not auth_code:

        kb_id = wash_url_argument(kb, 'int')
        kb_name = bibknowledge.get_kb_name(kb_id)

        if kb_name is None:
            return page(title=_("Unknown Knowledge Base"),
                        body = "",
                        language=ln,
                        navtrail = navtrail_previous_links,
                        errors = [("ERR_KB_ID_UNKNOWN", kb)],
                        lastupdated=__lastupdated__,
                        req=req)

        key = wash_url_argument(mapFrom, 'str')
        value = wash_url_argument(mapTo, 'str')

        #check if key or value already exists in some KB
        left_sides_match = bibknowledge.get_kb_mappings("", key, "")
        #check that the match is exact
        left_sides = []
        for m in left_sides_match:
            if m['key'] == key:
                left_sides.append(m)

        right_sides_match = bibknowledge.get_kb_mappings("", "", value)
        right_sides = []
        for m in right_sides_match:
            if m['value'] == value:
                right_sides.append(m)

        if (len(right_sides) == 0) and (len(left_sides) == 0):
            #no problems, just add in current
            forcetype = "curr"

        #likewise, if this is a taxonomy, just pass on
        if kb_type == 't':
            forcetype = "curr"

        if forcetype and not forcetype == "no":
            pass
        else:
            if len(left_sides) > 0:
                return page(title=_("Left side exists"),
                        body = bibknowledgeadminlib.perform_request_verify_rule(ln, kb_id, key, value, "left", kb_name, left_sides),
                        language=ln,
                        navtrail = navtrail_previous_links,
                        lastupdated=__lastupdated__,
                        req=req)

            if len(right_sides) > 0:
                return page(title=_("Right side exists"),
                        body = bibknowledgeadminlib.perform_request_verify_rule(ln, kb_id, key, value, "right", kb_name, right_sides),
                        language=ln,
                        navtrail = navtrail_previous_links,
                        lastupdated=__lastupdated__,
                        req=req)

        if forcetype == "curr":
            bibknowledge.add_kb_mapping(kb_name, key, value)
        if forcetype == "all":
            #a bit tricky.. remove the rules given in param replacement and add the current
            #rule in the same kb's
            if replacements:
                #"replacements" can be either a string or an array. Let's make it always an array
                if type(replacements) == type("this is a string"):
                    mystr = replacements
                    replacements = []
                    replacements.append(mystr)
                for r in replacements:
                    if r.find("++++") > 0:
                        (rkbname, rleft, dummy) = r.split('++++')
                        bibknowledge.remove_kb_mapping(rkbname, rleft)
                        #add only if this is not yet there..
                        if not bibknowledge.kb_mapping_exists(rkbname, key):
                            bibknowledge.add_kb_mapping(rkbname, key, value)

        redirect_to_url(req, "kb?ln=%(ln)s&kb=%(kb)s&sortby=%(sortby)s&kb_type=%(kb_type)s" % {'ln':ln,
                                                                                               'kb':kb_id,
                                                                                               'sortby':sortby,
                                                                                               'kb_type':kb_type})
    else:
        return page_not_authorized(req=req,
                                   text=auth_msg,
                                   navtrail=navtrail_previous_links)
Example #6
0
def load_kbs(cfg, run_sql, in_task=False):
    for kb, query in cfg.iteritems():
        task_sleep_now_if_required(can_stop_too=True)
        if not kb_exists(kb):
            add_kb(kb)
        if in_task:
            write_message("Updating %s KB..." % kb)
        try:
            if not in_task:
                print "kb:", kb
                print "kb beginning:", len(get_kb_mappings(kb))
            if kb.startswith('json_'):
                encoder = ComplexEncoder()
                mapping, description = run_sql(query, with_desc=True)
                if kb in CFG_ADDITIONAL_ENTRIES:
                    mapping += CFG_ADDITIONAL_ENTRIES[kb]
                    if not in_task:
                        print CFG_ADDITIONAL_ENTRIES[kb]
                column_counter = {}
                new_description = []
                for column in description[1:]:
                    column = column[0]
                    counter = column_counter[column] = column_counter.get(
                        column, 0) + 1
                    if counter > 1:
                        new_description.append('%s%d' % (column, counter))
                    else:
                        new_description.append(column)
                description = new_description
            else:
                mapping = run_sql(query)
                if kb in CFG_ADDITIONAL_ENTRIES:
                    mapping += CFG_ADDITIONAL_ENTRIES[kb]
                    if not in_task:
                        print CFG_ADDITIONAL_ENTRIES[kb]
                if not in_task:
                    print "mapping:", len(mapping)
                if kb == 'projects':
                    mapping += [('000000', 'NO PROJECT')]
            original_keys = set([key[0] for key in get_kbr_keys(kb)])
            if not in_task:
                print "original_keys before:", len(original_keys)

            updated = 0
            added = 0
            for i, row in enumerate(mapping):
                key, value = row[0], row[1:]
                if kb.startswith('json_'):
                    value = encoder.encode(dict(zip(description, value)))
                else:
                    value = value[0]
                if value:
                    if key in original_keys:
                        original_keys.remove(key)
                    if in_task:
                        task_update_progress("%s - %s%%" %
                                             (kb, i * 100 / len(mapping)))
                    if kb_mapping_exists(kb, key):
                        updated += 1
                        update_kb_mapping(kb, key, key, value)
                    else:
                        added += 1
                        add_kb_mapping(kb, key, value)
            if not in_task:
                print "updated:", updated, "added:", added
                print "kb after update:", len(get_kb_mappings(kb))
                print "original_keys after:", len(original_keys)
            if in_task:
                task_update_progress("Cleaning %s" % kb)
            for key in original_keys:
                remove_kb_mapping(kb, key)
            if not in_task:
                print "kb after remove:", len(get_kb_mappings(kb))
        except:
            register_exception(alert_admin=True,
                               prefix="Error when updating KB %s" % kb)
            continue
Example #7
0
def kb_add_mapping(req,
                   kb,
                   mapFrom,
                   mapTo,
                   sortby="to",
                   ln=CFG_SITE_LANG,
                   forcetype=None,
                   replacements=None,
                   kb_type=None):
    """
    Adds a new mapping to a kb.

    @param ln language
    @param kb the kb id to show
    @param sortby the sorting criteria ('from' or 'to')
    @param forcetype indicates if this function should ask about replacing left/right sides (None or 'no')
                     replace in current kb ('curr') or in all ('all')
    @param replacements an object containing kbname+++left+++right strings.
                     Can be a string or an array of strings
    @param kb_type None for normal from-to kb's, 't' for taxonomies
    """
    ln = wash_language(ln)
    _ = gettext_set_language(ln)

    navtrail_previous_links = ''' &gt; <a class="navtrail" href="%s/kb?ln=%s">%s</a>''' % (
        CFG_SITE_SECURE_URL, ln, _("Manage Knowledge Bases"))

    try:
        dummy = getUid(req)
    except MySQLdb.Error:
        return error_page(req)

    (auth_code, auth_msg) = check_user(req, 'cfgbibknowledge')
    if not auth_code:

        kb_id = wash_url_argument(kb, 'int')
        kb_name = bibknowledge.get_kb_name(kb_id)

        if kb_name is None:
            return page(title=_("Unknown Knowledge Base"),
                        body="",
                        language=ln,
                        navtrail=navtrail_previous_links,
                        errors=[("ERR_KB_ID_UNKNOWN", kb)],
                        lastupdated=__lastupdated__,
                        req=req)

        key = wash_url_argument(mapFrom, 'str')
        value = wash_url_argument(mapTo, 'str')

        #check if key or value already exists in some KB
        left_sides_match = bibknowledge.get_kb_mappings("", key, "")
        #check that the match is exact
        left_sides = []
        for m in left_sides_match:
            if m['key'] == key:
                left_sides.append(m)

        right_sides_match = bibknowledge.get_kb_mappings("", "", value)
        right_sides = []
        for m in right_sides_match:
            if m['value'] == value:
                right_sides.append(m)

        if (len(right_sides) == 0) and (len(left_sides) == 0):
            #no problems, just add in current
            forcetype = "curr"

        #likewise, if this is a taxonomy, just pass on
        if kb_type == 't':
            forcetype = "curr"

        if forcetype and not forcetype == "no":
            pass
        else:
            if len(left_sides) > 0:
                return page(
                    title=_("Left side exists"),
                    body=bibknowledgeadminlib.perform_request_verify_rule(
                        ln, kb_id, key, value, "left", kb_name, left_sides),
                    language=ln,
                    navtrail=navtrail_previous_links,
                    lastupdated=__lastupdated__,
                    req=req)

            if len(right_sides) > 0:
                return page(
                    title=_("Right side exists"),
                    body=bibknowledgeadminlib.perform_request_verify_rule(
                        ln, kb_id, key, value, "right", kb_name, right_sides),
                    language=ln,
                    navtrail=navtrail_previous_links,
                    lastupdated=__lastupdated__,
                    req=req)

        if forcetype == "curr":
            bibknowledge.add_kb_mapping(kb_name, key, value)
        if forcetype == "all":
            #a bit tricky.. remove the rules given in param replacement and add the current
            #rule in the same kb's
            if replacements:
                #"replacements" can be either a string or an array. Let's make it always an array
                if type(replacements) == type("this is a string"):
                    mystr = replacements
                    replacements = []
                    replacements.append(mystr)
                for r in replacements:
                    if r.find("++++") > 0:
                        (rkbname, rleft, dummy) = r.split('++++')
                        bibknowledge.remove_kb_mapping(rkbname, rleft)
                        #add only if this is not yet there..
                        if not bibknowledge.kb_mapping_exists(rkbname, key):
                            bibknowledge.add_kb_mapping(rkbname, key, value)

        redirect_to_url(
            req,
            "kb?ln=%(ln)s&kb=%(kb)s&sortby=%(sortby)s&kb_type=%(kb_type)s" % {
                'ln': ln,
                'kb': kb_id,
                'sortby': sortby,
                'kb_type': kb_type
            })
    else:
        return page_not_authorized(req=req,
                                   text=auth_msg,
                                   navtrail=navtrail_previous_links)
Example #8
0
def load_kbs(cfg, run_sql, in_task=False):
    for kb, query in cfg.iteritems():
        task_sleep_now_if_required(can_stop_too=True)
        if not kb_exists(kb):
            add_kb(kb)
        if in_task:
            write_message("Updating %s KB..." % kb)
        try:
            if not in_task:
                print "kb:", kb
                print "kb beginning:", len(get_kb_mappings(kb))
            if kb.startswith('json_'):
                encoder = ComplexEncoder()
                mapping, description = run_sql(query, with_desc=True)
                if kb in CFG_ADDITIONAL_ENTRIES:
                    mapping += CFG_ADDITIONAL_ENTRIES[kb]
                    if not in_task:
                        print CFG_ADDITIONAL_ENTRIES[kb]
                column_counter = {}
                new_description = []
                for column in description[1:]:
                    column = column[0]
                    counter = column_counter[column] = column_counter.get(column, 0) + 1
                    if counter > 1:
                        new_description.append('%s%d' % (column, counter))
                    else:
                        new_description.append(column)
                description = new_description
            else:
                mapping = run_sql(query)
                if kb in CFG_ADDITIONAL_ENTRIES:
                    mapping += CFG_ADDITIONAL_ENTRIES[kb]
                    if not in_task:
                        print CFG_ADDITIONAL_ENTRIES[kb]
                if not in_task:
                    print "mapping:", len(mapping)
                if kb == 'projects':
                    mapping += [('000000', 'NO PROJECT')]
            original_keys = set([key[0] for key in get_kbr_keys(kb)])
            if not in_task:
                print "original_keys before:", len(original_keys)

            updated = 0
            added = 0
            for i, row in enumerate(mapping):
                key, value = row[0], row[1:]
                if kb.startswith('json_'):
                    value = encoder.encode(dict(zip(description, value)))
                else:
                    value = value[0]
                if value:
                    if key in original_keys:
                        original_keys.remove(key)
                    if in_task:
                        task_update_progress("%s - %s%%" % (kb, i * 100 / len(mapping)))
                    if kb_mapping_exists(kb, key):
                        updated += 1
                        update_kb_mapping(kb, key, key, value)
                    else:
                        added += 1
                        add_kb_mapping(kb, key, value)
            if not in_task:
                print "updated:", updated, "added:", added
                print "kb after update:", len(get_kb_mappings(kb))
                print "original_keys after:", len(original_keys)
            if in_task:
                task_update_progress("Cleaning %s" % kb)
            for key in original_keys:
                remove_kb_mapping(kb, key)
            if not in_task:
                print "kb after remove:", len(get_kb_mappings(kb))
        except:
            register_exception(alert_admin=True, prefix="Error when updating KB %s" % kb)
            continue