def test_add_get_remove(self): """bibknowledge - test creating a kb, adding a mapping, removing it, removing kb""" new_kb_id = add_kb() new_name = get_kb_name(new_kb_id) add_kb_mapping(new_name, "foobar", "barfoo") fbexists = kb_mapping_exists(new_name, "foobar") self.assertEqual(True, fbexists) remove_kb_mapping(new_name, "foobar") fbexists = kb_mapping_exists(new_name, "foobar") self.assertEqual(False, fbexists) delete_kb(new_name) still_there = kb_exists(new_name) self.assertEqual(False, still_there)
def tokenize_for_words(self, recID): """Get the words componing the country names and country codes of the institutions affiliated with the authors of the publication """ # Splits phrase tokens phrase_tokens = self.tokenize_for_phrases(recID) tokens = [] for phrase in phrase_tokens: # If phrase is a country code be careful, because codes like IT # may be removed by remove_stopwords() if kb_mapping_exists(self.kb_country_codes, phrase): tokens.append(phrase) else: word_list = self.re_words.findall(phrase) for word in word_list: word = remove_stopwords(word, self.remove_stopwords) if word: tokens.append(word) # Remove duplicates tokens = list(set(tokens)) return tokens
def kb_add_mapping(req, kb, mapFrom, mapTo, sortby="to", ln=CFG_SITE_LANG, forcetype=None, replacements=None, kb_type=None): """ Adds a new mapping to a kb. @param ln language @param kb the kb id to show @param sortby the sorting criteria ('from' or 'to') @param forcetype indicates if this function should ask about replacing left/right sides (None or 'no') replace in current kb ('curr') or in all ('all') @param replacements an object containing kbname+++left+++right strings. Can be a string or an array of strings @param kb_type None for normal from-to kb's, 't' for taxonomies """ ln = wash_language(ln) _ = gettext_set_language(ln) navtrail_previous_links = ''' > <a class="navtrail" href="%s/kb?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln, _("Manage Knowledge Bases")) try: dummy = getUid(req) except: return error_page('Error', req) (auth_code, auth_msg) = check_user(req, 'cfgbibknowledge') if not auth_code: kb_id = wash_url_argument(kb, 'int') kb_name = bibknowledge.get_kb_name(kb_id) if kb_name is None: return page(title=_("Unknown Knowledge Base"), body = "", language=ln, navtrail = navtrail_previous_links, errors = [("ERR_KB_ID_UNKNOWN", kb)], lastupdated=__lastupdated__, req=req) key = wash_url_argument(mapFrom, 'str') value = wash_url_argument(mapTo, 'str') #check if key or value already exists in some KB left_sides_match = bibknowledge.get_kb_mappings("", key, "") #check that the match is exact left_sides = [] for m in left_sides_match: if m['key'] == key: left_sides.append(m) right_sides_match = bibknowledge.get_kb_mappings("", "", value) right_sides = [] for m in right_sides_match: if m['value'] == value: right_sides.append(m) if (len(right_sides) == 0) and (len(left_sides) == 0): #no problems, just add in current forcetype = "curr" #likewise, if this is a taxonomy, just pass on if kb_type == 't': forcetype = "curr" if forcetype and not forcetype == "no": pass else: if len(left_sides) > 0: return page(title=_("Left side exists"), body = bibknowledgeadminlib.perform_request_verify_rule(ln, kb_id, key, value, "left", kb_name, left_sides), language=ln, navtrail = navtrail_previous_links, lastupdated=__lastupdated__, req=req) if len(right_sides) > 0: return page(title=_("Right side exists"), body = bibknowledgeadminlib.perform_request_verify_rule(ln, kb_id, key, value, "right", kb_name, right_sides), language=ln, navtrail = navtrail_previous_links, lastupdated=__lastupdated__, req=req) if forcetype == "curr": bibknowledge.add_kb_mapping(kb_name, key, value) if forcetype == "all": #a bit tricky.. remove the rules given in param replacement and add the current #rule in the same kb's if replacements: #"replacements" can be either a string or an array. Let's make it always an array if type(replacements) == type("this is a string"): mystr = replacements replacements = [] replacements.append(mystr) for r in replacements: if r.find("++++") > 0: (rkbname, rleft, dummy) = r.split('++++') bibknowledge.remove_kb_mapping(rkbname, rleft) #add only if this is not yet there.. if not bibknowledge.kb_mapping_exists(rkbname, key): bibknowledge.add_kb_mapping(rkbname, key, value) redirect_to_url(req, "kb?ln=%(ln)s&kb=%(kb)s&sortby=%(sortby)s&kb_type=%(kb_type)s" % {'ln':ln, 'kb':kb_id, 'sortby':sortby, 'kb_type':kb_type}) else: return page_not_authorized(req=req, text=auth_msg, navtrail=navtrail_previous_links)
def load_kbs(cfg, run_sql, in_task=False): for kb, query in cfg.iteritems(): task_sleep_now_if_required(can_stop_too=True) if not kb_exists(kb): add_kb(kb) if in_task: write_message("Updating %s KB..." % kb) try: if not in_task: print "kb:", kb print "kb beginning:", len(get_kb_mappings(kb)) if kb.startswith('json_'): encoder = ComplexEncoder() mapping, description = run_sql(query, with_desc=True) if kb in CFG_ADDITIONAL_ENTRIES: mapping += CFG_ADDITIONAL_ENTRIES[kb] if not in_task: print CFG_ADDITIONAL_ENTRIES[kb] column_counter = {} new_description = [] for column in description[1:]: column = column[0] counter = column_counter[column] = column_counter.get( column, 0) + 1 if counter > 1: new_description.append('%s%d' % (column, counter)) else: new_description.append(column) description = new_description else: mapping = run_sql(query) if kb in CFG_ADDITIONAL_ENTRIES: mapping += CFG_ADDITIONAL_ENTRIES[kb] if not in_task: print CFG_ADDITIONAL_ENTRIES[kb] if not in_task: print "mapping:", len(mapping) if kb == 'projects': mapping += [('000000', 'NO PROJECT')] original_keys = set([key[0] for key in get_kbr_keys(kb)]) if not in_task: print "original_keys before:", len(original_keys) updated = 0 added = 0 for i, row in enumerate(mapping): key, value = row[0], row[1:] if kb.startswith('json_'): value = encoder.encode(dict(zip(description, value))) else: value = value[0] if value: if key in original_keys: original_keys.remove(key) if in_task: task_update_progress("%s - %s%%" % (kb, i * 100 / len(mapping))) if kb_mapping_exists(kb, key): updated += 1 update_kb_mapping(kb, key, key, value) else: added += 1 add_kb_mapping(kb, key, value) if not in_task: print "updated:", updated, "added:", added print "kb after update:", len(get_kb_mappings(kb)) print "original_keys after:", len(original_keys) if in_task: task_update_progress("Cleaning %s" % kb) for key in original_keys: remove_kb_mapping(kb, key) if not in_task: print "kb after remove:", len(get_kb_mappings(kb)) except: register_exception(alert_admin=True, prefix="Error when updating KB %s" % kb) continue
def kb_add_mapping(req, kb, mapFrom, mapTo, sortby="to", ln=CFG_SITE_LANG, forcetype=None, replacements=None, kb_type=None): """ Adds a new mapping to a kb. @param ln language @param kb the kb id to show @param sortby the sorting criteria ('from' or 'to') @param forcetype indicates if this function should ask about replacing left/right sides (None or 'no') replace in current kb ('curr') or in all ('all') @param replacements an object containing kbname+++left+++right strings. Can be a string or an array of strings @param kb_type None for normal from-to kb's, 't' for taxonomies """ ln = wash_language(ln) _ = gettext_set_language(ln) navtrail_previous_links = ''' > <a class="navtrail" href="%s/kb?ln=%s">%s</a>''' % ( CFG_SITE_SECURE_URL, ln, _("Manage Knowledge Bases")) try: dummy = getUid(req) except MySQLdb.Error: return error_page(req) (auth_code, auth_msg) = check_user(req, 'cfgbibknowledge') if not auth_code: kb_id = wash_url_argument(kb, 'int') kb_name = bibknowledge.get_kb_name(kb_id) if kb_name is None: return page(title=_("Unknown Knowledge Base"), body="", language=ln, navtrail=navtrail_previous_links, errors=[("ERR_KB_ID_UNKNOWN", kb)], lastupdated=__lastupdated__, req=req) key = wash_url_argument(mapFrom, 'str') value = wash_url_argument(mapTo, 'str') #check if key or value already exists in some KB left_sides_match = bibknowledge.get_kb_mappings("", key, "") #check that the match is exact left_sides = [] for m in left_sides_match: if m['key'] == key: left_sides.append(m) right_sides_match = bibknowledge.get_kb_mappings("", "", value) right_sides = [] for m in right_sides_match: if m['value'] == value: right_sides.append(m) if (len(right_sides) == 0) and (len(left_sides) == 0): #no problems, just add in current forcetype = "curr" #likewise, if this is a taxonomy, just pass on if kb_type == 't': forcetype = "curr" if forcetype and not forcetype == "no": pass else: if len(left_sides) > 0: return page( title=_("Left side exists"), body=bibknowledgeadminlib.perform_request_verify_rule( ln, kb_id, key, value, "left", kb_name, left_sides), language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__, req=req) if len(right_sides) > 0: return page( title=_("Right side exists"), body=bibknowledgeadminlib.perform_request_verify_rule( ln, kb_id, key, value, "right", kb_name, right_sides), language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__, req=req) if forcetype == "curr": bibknowledge.add_kb_mapping(kb_name, key, value) if forcetype == "all": #a bit tricky.. remove the rules given in param replacement and add the current #rule in the same kb's if replacements: #"replacements" can be either a string or an array. Let's make it always an array if type(replacements) == type("this is a string"): mystr = replacements replacements = [] replacements.append(mystr) for r in replacements: if r.find("++++") > 0: (rkbname, rleft, dummy) = r.split('++++') bibknowledge.remove_kb_mapping(rkbname, rleft) #add only if this is not yet there.. if not bibknowledge.kb_mapping_exists(rkbname, key): bibknowledge.add_kb_mapping(rkbname, key, value) redirect_to_url( req, "kb?ln=%(ln)s&kb=%(kb)s&sortby=%(sortby)s&kb_type=%(kb_type)s" % { 'ln': ln, 'kb': kb_id, 'sortby': sortby, 'kb_type': kb_type }) else: return page_not_authorized(req=req, text=auth_msg, navtrail=navtrail_previous_links)
def load_kbs(cfg, run_sql, in_task=False): for kb, query in cfg.iteritems(): task_sleep_now_if_required(can_stop_too=True) if not kb_exists(kb): add_kb(kb) if in_task: write_message("Updating %s KB..." % kb) try: if not in_task: print "kb:", kb print "kb beginning:", len(get_kb_mappings(kb)) if kb.startswith('json_'): encoder = ComplexEncoder() mapping, description = run_sql(query, with_desc=True) if kb in CFG_ADDITIONAL_ENTRIES: mapping += CFG_ADDITIONAL_ENTRIES[kb] if not in_task: print CFG_ADDITIONAL_ENTRIES[kb] column_counter = {} new_description = [] for column in description[1:]: column = column[0] counter = column_counter[column] = column_counter.get(column, 0) + 1 if counter > 1: new_description.append('%s%d' % (column, counter)) else: new_description.append(column) description = new_description else: mapping = run_sql(query) if kb in CFG_ADDITIONAL_ENTRIES: mapping += CFG_ADDITIONAL_ENTRIES[kb] if not in_task: print CFG_ADDITIONAL_ENTRIES[kb] if not in_task: print "mapping:", len(mapping) if kb == 'projects': mapping += [('000000', 'NO PROJECT')] original_keys = set([key[0] for key in get_kbr_keys(kb)]) if not in_task: print "original_keys before:", len(original_keys) updated = 0 added = 0 for i, row in enumerate(mapping): key, value = row[0], row[1:] if kb.startswith('json_'): value = encoder.encode(dict(zip(description, value))) else: value = value[0] if value: if key in original_keys: original_keys.remove(key) if in_task: task_update_progress("%s - %s%%" % (kb, i * 100 / len(mapping))) if kb_mapping_exists(kb, key): updated += 1 update_kb_mapping(kb, key, key, value) else: added += 1 add_kb_mapping(kb, key, value) if not in_task: print "updated:", updated, "added:", added print "kb after update:", len(get_kb_mappings(kb)) print "original_keys after:", len(original_keys) if in_task: task_update_progress("Cleaning %s" % kb) for key in original_keys: remove_kb_mapping(kb, key) if not in_task: print "kb after remove:", len(get_kb_mappings(kb)) except: register_exception(alert_admin=True, prefix="Error when updating KB %s" % kb) continue