def __init__(self, *args, **kwargs): """Constructor.""" super(LiteratureForm, self).__init__(*args, **kwargs) from invenio.modules.knowledge.api import get_kb_mappings self.subject.choices = [(x['value'], x['value']) for x in get_kb_mappings(cfg["DEPOSIT_INSPIRE_SUBJECTS_KB"])] self.degree_type.choices = [('', '')] + [(x['value'], x['value']) for x in get_kb_mappings(cfg["DEPOSIT_INSPIRE_DEGREE_KB"])]
def format_element(bfo, separator=' | ', link="yes"): """Print Conference info as best is possible. @param link if yes (default) prints link to SPIRES conference info @param separator separates multiple conferences """ authors = bfo.fields('084__') output = [] output1 = [] pdgcount = 0 link = "" pdgcode = "" # Process authors to add link, highlight and format affiliation for exp in authors: if exp.get('9') == 'PDG' and 'a' in exp: values = get_kb_mappings('PDG', key=exp['a'], match_type="e") pdgcode = exp['a'] for ch in [':', '=']: if ch in pdgcode: pdgcode = pdgcode.replace(ch, '/') if values: search_link = ('<a href="http://pdglive.lbl.gov/view/' + pdgcode + '">') if values[0]['value'] == "THE TEXT IS MISSING FOR THIS NODE.": search_link += pdgcode + ' (Title Unknown)' else: search_link += values[0]['value'] search_link += '</a>' pdgcount += 1 if pdgcount < 3: output.append(search_link) else: output1.append(search_link) if len(output1): link = """ | <a href="#" style="color:green;background:white;" onclick="toggle2('content', this)"><i>More</i></a> <div id="content" style="display:none; padding-left:36px;"> %(content)s </div> <script type="text/javascript"> function toggle2(id, link) { var e = document.getElementById(id); if (e.style.display == '') { e.style.display = 'none'; link.innerHTML = '<i>More</i>'; } else { e.style.display = ''; link.innerHTML = '<i>Less</i>'; } } </script> """ % {'content': separator.join(output1)} return separator.join(output) + link
def _kb_requirements_choices(): def _mapper(x): requirements = json.loads(x['value']) if requirements.get('domain_app_env', False): return (x['key'], requirements['title']) else: return None return filter(lambda x: x is not None, map(_mapper, get_kb_mappings('requirements', '', '')))
def _kb_requirements_choices(domain_flavor=True, domain_os=True): def _mapper(x): requirements = json.loads(x['value']) if (requirements.get('domain_flavor', False) and domain_flavor) or \ (requirements.get('domain_os', False) and domain_os): return (x['key'], requirements['title']) else: return None return filter(lambda x: x is not None, map(_mapper, get_kb_mappings('requirements', '', '')))
def format_element(bfo, separator=' | ', link="yes"): """Print PACS info as best is possible. @param link if yes (default) prints link to search for this item in Inspire @param separator separates multiple items """ fields = bfo.fields('084__') output = [] output1 = [] pacs_count = 0 link = "" for item in fields: if item.get('2') == 'PACS': pacs_code = item.get('a') if pacs_code: pacs_kb_mapping = get_kb_mappings('PACS', key=pacs_code, match_type="e") title = 'Translation not available' if pacs_kb_mapping: title = pacs_kb_mapping[0]['value'] search_link = ("<a href='/search?" + urlencode({'p': '084__:"' + pacs_code + '"'}) + "' title='" + title + "'>" + pacs_code + "</a>") if pacs_count < 25: output.append(search_link) else: output1.append(search_link) pacs_count += 1 if len(output1): link = """ | <a href="#" onclick="toggle2('content', this); return false;" style="color:green;background:white;"><i>More</i></a> <div id="content" style="display:none; padding-left:42px;"> %(content)s </div> <script type="text/javascript"> function toggle2(id, link) { var e = document.getElementById(id); if (e.style.display == '') { e.style.display = 'none'; link.innerHTML = '<i>More</i>'; } else { e.style.display = ''; link.innerHTML = '<i>Less</i>'; } } </script> """ % {'content': separator.join(output1)} return separator.join(output) + link
def _kb_license_choices(domain_data=True, domain_content=True, domain_software=True): def _mapper(x): license = json.loads(x['value']) if (license['domain_data'] and domain_data) or \ (license['domain_content'] and domain_content) or \ (license['domain_software'] and domain_software): return (x['key'], license['title']) else: return None return filter(lambda x: x is not None, map( _mapper, get_kb_mappings('licenses', '', '')) )
def get_journal_info(recid, tags): record_info = [] # TODO: handle recors with multiple journals tagsvalues = {} # we store the tags and their values here # like c->444 y->1999 p->"journal of foo", # v->20 tmp = get_fieldvalues(recid, tags['publication']['journal']) if tmp: tagsvalues["p"] = tmp[0] tmp = get_fieldvalues(recid, tags['publication']['volume']) if tmp: tagsvalues["v"] = tmp[0] tmp = get_fieldvalues(recid, tags['publication']['year']) if tmp: tagsvalues["y"] = tmp[0] tmp = get_fieldvalues(recid, tags['publication']['pages']) if tmp: # if the page numbers have "x-y" take just x pages = tmp[0] hpos = pages.find("-") if hpos > 0: pages = pages[:hpos] tagsvalues["c"] = pages # check if we have the required data ok = True for c in tags['publication_format']: if c in ('p', 'v', 'y', 'c'): if c not in tagsvalues: ok = False if ok: publ = format_journal(tags['publication_format'], tagsvalues) record_info += [publ] alt_volume = get_alt_volume(tagsvalues['v']) if alt_volume: tagsvalues2 = tagsvalues.copy() tagsvalues2['v'] = alt_volume publ = format_journal(tags['publication_format'], tagsvalues2) record_info += [publ] # Add codens for coden in get_kb_mappings('CODENS', value=tagsvalues['p']): tagsvalues2 = tagsvalues.copy() tagsvalues2['p'] = coden['key'] publ = format_journal(tags['publication_format'], tagsvalues2) record_info += [publ] return record_info
def prepare_data_cache(self): """*Index* knowledge base and cache it.""" cache = {} for mapping in get_kb_mappings(self.get_kbname()): key = mapping['key'] value = mapping['value'] words = clean_and_split_words_and_stem(key, CFG_SITE_LANG, stem_p=True) for word in words: if word not in cache: cache[word] = [] if value not in cache[word]: cache[word].append(value) return cache
def update_kb(kb_name, data, key_fun, value_fun=lambda x: x, update=False): """Update a knowledge base from data.""" # Memory greedy, but faster than many individual SQL queries. mappings = dict([ (d['key'], d['value']) for d in get_kb_mappings(kb_name) ]) for item in data: k = key_fun(item) v = json.dumps(value_fun(item)) if k not in mappings: add_kb_mapping(kb_name, k, v) elif update and mappings[k] != v: update_kb_mapping(kb_name, k, k, v)
def perform_request_knowledge_base_show(kb_id, ln=CFG_SITE_LANG, sortby="to", startat=0, search_term=""): """ Show the content of a knowledge base. @param ln language @param kb a knowledge base id @param sortby the sorting criteria ('from' or 'to') @param startat start showing mapping rules at what number @param search_term search for this string in kb @return the content of the given knowledge base """ kb = bibknowledge.get_kb_by_id(kb_id) name = kb.name mappings = bibknowledge.get_kb_mappings(name, sortby) kb_type = kb.kbtype # filter in only the requested rules if the user is searching.. if search_term: newmappings = [] for mapping in mappings: key = mapping['key'] value = mapping['value'] if key.count(search_term) > 0 or value.count(search_term) > 0: newmappings.append(mapping) # we were searching, so replace mappings = newmappings # if this bk is dynamic, get the configuration from the DB, and a list of # collections as a bonus dyn_config = None collections = None if kb_type == 'd': from invenio.legacy.search_engine \ import get_alphabetically_ordered_collection_list dyn_config = kb.kbdefs.to_dict() if kb.kbdefs else {} collections = [] collitems = get_alphabetically_ordered_collection_list() for collitem in collitems: collections.append(collitem[0]) return bibknowledge_templates.tmpl_admin_kb_show(ln, kb_id, name, mappings, sortby, startat, kb_type, search_term, dyn_config, collections)
def _convert_files(obj, eng): from invenio.modules.knowledge.api import get_kb_mappings mappings = dict( map( lambda item: (item['key'], item['value']), get_kb_mappings('JOURNALS') ) ) ws = WorldScientific(mappings) target_folder_full = get_storage_path(suffix=target_folder) args = obj.extra_data['args'] to_date = args.get("to_date") or datetime.now().strftime('%Y-%m-%d') from_date = args.get("from_date") or '1900-01-01' insert_files = [] filenames = obj.data['extracted_files'] for filename in filenames: date = ws.get_date(filename) if from_date <= date <= to_date: marc = ws.get_record(filename) if marc: filename = basename(filename) filename = join(target_folder_full, filename) insert_files.append(filename) with open(filename, 'w') as outfile: outfile.write(marc) obj.log.info("Converted {0} articles between {1} to {2}".format( len(insert_files), from_date, to_date )) obj.data['insert'] = insert_files obj.data["result_path"] = target_folder_full obj.log.debug("Saved converted files to {0}".format(target_folder_full)) obj.log.debug("{0} files to add".format( len(obj.data["insert"]), ))
def perform_request_knowledge_bases_management(ln=CFG_SITE_LANG, search="", descriptiontoo=""): """ Return the main page for knowledge bases management. @param ln language @param search search for this string in kb's @param descriptiontoo search in descriptions too @return the main page for knowledge bases management """ kbs = bibknowledge.get_kbs_info() # if search is nonempty, filter out kb's that do not have the # the string that we search newkbs = [] if search: for kb in kbs: skip = 0 # do-we-need-to-scan-more control kbname = kb['name'] # get description if needed if descriptiontoo and kb['description'].count(search) > 0: # add and skip newkbs.append(kb) skip = 1 # likewise: check if name matches if descriptiontoo and kbname.count(search) > 0: # add and skip newkbs.append(kb) skip = 1 # get mappings mappings = bibknowledge.get_kb_mappings(kbname) for mapping in mappings: if skip == 0: key = mapping['key'] value = mapping['value'] if key.count(search) > 0 or value.count(search) > 0: # add this in newkbs newkbs.append(kb) # skip the rest, we know there's ok stuff in this kb skip = 1 kbs = newkbs return bibknowledge_templates.tmpl_admin_kbs_management(ln, kbs, search)
def get_requirements(): reqs = dict( flavors=OrderedDict(), images=OrderedDict(), app_envs=OrderedDict(), ) domains_to_reqs = { # kb_mappings has 'domain-xxx' = True for each type of requirement # the value of the key is a tuple: first the name of the OrderedDict # to use, seconde 'domain_flavor': {'reqs': 'flavors', 'id': 'flavor-id'}, 'domain_os': {'reqs': 'images', 'id': 'image-id'}, 'domain_app_env': {'reqs': 'app_envs', 'id': 'app-id'}, } for mapping in get_kb_mappings('requirements'): v = json.loads(mapping['value']) for d, r in domains_to_reqs.items(): if v.get(d, False): if v.get(r['id']): reqs[r['reqs']][v['id']] = v return reqs
def process_sip_metadata(cls, deposition, metadata): """Map fields to match jsonalchemy configuration.""" delete_keys = [] field_list = ['abstract', 'title'] # maps from a form field to the corresponding MarcXML field field_map = {'abstract': "summary", 'title': "title", 'subject_term': "term", 'institution': "university", 'degree_type': 'degree_type', 'thesis_date': "date", 'journal_title': "journal_title", 'page_range_article_id': "page_artid", 'volume': "journal_volume", 'year': "year", 'issue': "journal_issue", 'conference_id': "cnum"} # exclusive fields for each type of document doc_exclusive_fields = {'article': ['journal_title', 'page_range', 'article_id', 'volume', 'year', 'issue', 'conference_id'], 'thesis': ['supervisors', 'institution', 'degree_type', 'thesis_date', 'defense_date'], } del doc_exclusive_fields[metadata['type_of_doc']] def remove_exclusive_fields(fieldlist): for field in fieldlist: if field in metadata and metadata[field]: del metadata[field] map(remove_exclusive_fields, doc_exclusive_fields.values()) filter_empty_elements(metadata) # ============================ # Abstract, Title and Subjects # ============================ for field in field_list: if field in metadata: tmp_field = metadata[field] metadata[field] = {field_map[field]: tmp_field} if "subject_term" in metadata: tmp_field = metadata["subject_term"] metadata["subject_term"] = [{"term": t, "scheme": "INSPIRE", "source": "submitter"} for t in tmp_field] # ======= # Authors # ======= metadata['authors'] = filter(None, metadata['authors']) if 'authors' in metadata and metadata['authors']: first_author = metadata['authors'][0].get('full_name').split(',') if len(first_author) > 1 and \ literature.match_authors_initials(first_author[1]): first_author[1] = first_author[1].replace(' ', '') metadata['authors'][0]['full_name'] = ", ".join(first_author) metadata['_first_author'] = metadata['authors'][0] if metadata['authors'][1:]: metadata['_additional_authors'] = metadata['authors'][1:] for k in metadata['_additional_authors']: try: additional_author = k.get('full_name').split(',') if len(additional_author) > 1 and \ literature.match_authors_initials(additional_author[1]): additional_author[1] = additional_author[1].replace(' ', '') k['full_name'] = ", ".join(additional_author) except AttributeError: pass delete_keys.append('authors') # =========== # Supervisors # =========== if 'supervisors' in metadata and metadata['supervisors']: metadata['thesis_supervisor'] = metadata['supervisors'] delete_keys.append('supervisors') # ==== # Note # ==== if metadata.get('note', None): metadata['note'] = [{'value': metadata['note']}] # ============== # Thesis related # ============== thesis_fields = filter(lambda field: field in metadata, ['institution', 'degree_type', 'thesis_date']) if thesis_fields: metadata['thesis'] = {} for field in thesis_fields: metadata['thesis'][field_map[field]] = metadata[field] delete_keys.extend(thesis_fields) if 'defense_date' in metadata and metadata['defense_date']: defense_note = { 'value': 'Presented on ' + metadata['defense_date'] } if metadata.get('note', None): metadata['note'].append(defense_note) else: metadata['note'] = [defense_note] # ======== # Category # ======== metadata['collections'] = [{'primary': "HEP"}] if metadata['type_of_doc'] == 'thesis': metadata['collections'].append({'primary': "THESIS"}) # ============ # Title source # ============ if 'title_source' in metadata and metadata['title_source']: metadata['title']['source'] = metadata['title_source'] delete_keys.append('title_source') # ============= # Report number # ============= if 'report_numbers' in metadata and metadata['report_numbers']: user_report_number = metadata['report_numbers'] metadata['report_number'] = [{'primary': v['report_number']} for v in user_report_number] delete_keys.append('report_numbers') # ======== # arXiv ID # ======== imported_from_arXiv = filter(lambda field: field in metadata, ['categories', 'title_arXiv']) if imported_from_arXiv or metadata.get('title_source') == 'arXiv': if is_arxiv_post_2007(metadata['arxiv_id']): arxiv_rep_number = {'primary': 'arXiv:' + metadata['arxiv_id'], 'source': 'arXiv'} else: arxiv_rep_number = {'primary': metadata['arxiv_id'], 'source': 'arXiv'} if len(metadata['arxiv_id'].split('/')) == 2: arxiv_rep_number['arxiv_category'] = metadata['arxiv_id'].split('/')[0] if metadata.get('report_numbers'): metadata['report_number'].append(arxiv_rep_number) else: metadata['report_number'] = [arxiv_rep_number] if 'abstract' in metadata: metadata['abstract']['source'] = 'arXiv' if 'title_arXiv' in metadata: title_arXiv = metadata['title_arXiv'] metadata['title_arXiv'] = {} metadata['title_arXiv']['value'] = title_arXiv metadata['title_arXiv']['source'] = 'arXiv' if 'categories' in metadata and metadata['categories']: # arXiv subject categories subject_list = [{"term": c, "scheme": "arXiv"} for c in metadata['categories'].split()] # INSPIRE subject categories if 'subject_term' in metadata and metadata['subject_term']: metadata['subject_term'].extend(subject_list) else: metadata['subject_term'] = subject_list metadata['system_number_external'] = {'value': 'oai:arXiv.org:' + metadata['arxiv_id'], 'institute': 'arXiv'} metadata['collections'].extend([{'primary': "arXiv"}, {'primary': "Citeable"}]) # ======== # Language # ======== if metadata['language'] not in ('en', 'oth'): metadata['language'] = unicode(dict(LiteratureForm.languages).get(metadata['language'])) elif metadata['language'] == 'oth': if metadata['other_language']: metadata['language'] = metadata['other_language'] else: delete_keys.append('language') # ========== # Experiment # ========== if 'experiment' in metadata: metadata['accelerator_experiment'] = {'experiment': metadata['experiment']} delete_keys.append('experiment') # =============== # Conference Info # =============== if 'conf_name' in metadata: if 'nonpublic_note' in metadata: field = [metadata['nonpublic_note'], metadata['conf_name']] metadata['nonpublic_note'] = field else: metadata['nonpublic_note'] = [metadata['conf_name']] metadata['collections'].extend([{'primary': "ConferencePaper"}]) delete_keys.append('conf_name') # ======= # License # ======= licenses_kb = dict([(x['key'], x['value']) for x in get_kb_mappings(cfg["DEPOSIT_INSPIRE_LICENSE_KB"])]) if 'license' in metadata and metadata['license']: metadata['license'] = {'license': metadata['license']} if 'license_url' in metadata: metadata['license']['url'] = metadata['license_url'] else: metadata['license']['url'] = licenses_kb.get( metadata['license']['license']) elif 'license_url' in metadata: metadata['license'] = {'url': metadata['license_url']} license_key = {v: k for k, v in licenses_kb.items()}.get( metadata['license_url']) if license_key: metadata['license']['license'] = license_key delete_keys.append('license_url') # =========== # Files (FFT) # =========== if 'fft' in metadata and metadata['fft']: def restructure_ffts(fft): fft['url'] = fft['path'] fft['description'] = fft['name'] fft['docfile_type'] = "INSPIRE-PUBLIC" del fft['path'], fft['name'] map(restructure_ffts, metadata['fft']) # ==== # URLs # ==== if metadata.get('url'): metadata['pdf'] = metadata['url'] if isinstance(metadata['url'], string_types): metadata['url'] = [{'url': metadata['url']}] if 'additional_url' in metadata and metadata['additional_url']: if metadata.get('url'): metadata['url'].append({'url': metadata['additional_url']}) else: metadata['url'] = [{'url': metadata['additional_url']}] delete_keys.append('additional_url') # ================ # Publication Info # ================ publication_fields = filter(lambda field: field in metadata, ['journal_title', 'page_range_article_id', 'volume', 'year', 'issue', 'conference_id']) if publication_fields: metadata['publication_info'] = {} for field in publication_fields: metadata['publication_info'][field_map[field]] = metadata[field] if 'page_nr' not in metadata and 'page_range_article_id' in publication_fields: pages = metadata['page_range_article_id'].split('-') if len(pages) == 2: try: metadata['page_nr'] = int(pages[1]) - int(pages[0]) + 1 except ValueError: pass if {'primary': "ConferencePaper"} not in metadata['collections']: metadata['collections'].append({'primary': "Published"}) delete_keys.extend(publication_fields) if 'journal_title' in metadata: journals_kb = dict([(x['key'].lower(), x['value']) for x in get_kb_mappings(cfg.get("DEPOSIT_INSPIRE_JOURNALS_KB"))]) metadata['publication_info']['journal_title'] = journals_kb.get(metadata['journal_title'].lower(), metadata['journal_title']) if 'nonpublic_note' in metadata: if (isinstance(metadata['nonpublic_note'], list) and len(metadata['nonpublic_note']) > 1): del metadata['nonpublic_note'][0] else: delete_keys.append('nonpublic_note') # ============= # Preprint Info # ============= if 'created' in metadata and metadata['created']: metadata['preprint_info'] = {'date': metadata['created']} delete_keys.append('created') # ========== # Owner Info # ========== userid = deposition.user_id user = UserInfo(userid) email = user.info.get('email', '') external_ids = UserEXT.query.filter_by(id_user=userid).all() sources = ["{0}{1}".format('inspire:uid:', userid)] sources.extend(["{0}:{1}".format(e_id.method, e_id.id) for e_id in external_ids]) metadata['acquisition_source'] = dict( source=sources, email=email, method="submission", submission_number=deposition.id, ) # ============== # Extra comments # ============== if 'extra_comments' in metadata and metadata['extra_comments']: metadata['hidden_note'] = [{'value': metadata['extra_comments'], 'source': 'submitter'}] # =================== # Delete useless data # =================== for key in delete_keys: del metadata[key]
def kb_export(req, kbname="", format="kbr", searchkey="", searchvalue="", searchtype="s", limit=None, ln=CFG_SITE_LANG): """ Exports the given kb so that it is listed in stdout (the browser). @param req the request @param kbname knowledge base name @param expression evaluate this for the returned lines @param format 'kba' for authority file, 'kbr' for leftside-rightside, json for json-formatted dictionaries @param searchkey include only lines that match this on the left side @param searchvalue include only lines that match this on the right side @param searchtype s = substring match, e = exact match @param limit how many results to return. None means all @param ln language """ ln = wash_language(ln) _ = gettext_set_language(ln) navtrail_previous_links = ''' > <a class="navtrail" href="%s/kb?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln, _("Manage Knowledge Bases")) if not kbname: return page(title=_("Knowledge base name missing"), body = """Required parameter kbname is missing.""", language=ln, navtrail = navtrail_previous_links, lastupdated=__lastupdated__, req=req) #in order to make 'wget' downloads easy we do not require authorization #first check the type of the KB kbtype = None kbinfo = None kbid = None kbinfos = bibknowledge.get_kbs_info("", kbname) if kbinfos: kbinfo = kbinfos[0] kbtype = kbinfo['kbtype'] kbid = kbinfo['id'] else: return page(title=_("Unknown knowledge base"), body = _("There is no knowledge base with that name."), language=ln, navtrail = navtrail_previous_links, lastupdated=__lastupdated__, req=req) if not kbtype or kbtype == 'w': if format and format == "ejson": req.content_type = 'application/json' return bibknowledge.get_kb_mappings_embedded_json(kbname, searchkey, \ searchvalue, searchtype, limit) elif format and format[0] == 'j': # as JSON formatted string req.content_type = 'application/json' return bibknowledge.get_kb_mappings_json(kbname, searchkey, \ searchvalue, searchtype, limit) # left side / right side KB mappings = bibknowledge.get_kb_mappings(kbname, searchkey, \ searchvalue, searchtype) if format == 'right' or format == 'kba': # as authority sequence seq = [m['value'] for m in mappings] seq = uniq(sorted(seq)) for s in seq: req.write(s+"\n"); return else: # as regularly formatted left-right mapping for m in mappings: req.write(m['key'] + '---' + m['value'] + '\n') return elif kbtype == 'd': # dynamic kb, another interface for perform_request_search if format and format[0] == 'j': req.content_type = "application/json" return bibknowledge.get_kbd_values_json(kbname, searchvalue) else: # print it as a list of values for hit in bibknowledge.get_kbd_values(kbname, searchvalue): req.write(hit + '\n') req.write('\n') return elif kbtype == 't': #taxonomy: output the file kbfilename = CFG_WEBDIR+"/kbfiles/"+str(kbid)+".rdf" try: f = open(kbfilename, 'r') for line in f: req.write(line) f.close() except: req.write("Reading the file "+kbfilename+" failed.") else: # This situation should never happen raise ValueError, "Unsupported KB Type: %s" % kbtype
def kb_add_mapping(req, kb, mapFrom, mapTo, sortby="to", ln=CFG_SITE_LANG, forcetype=None, replacements=None, kb_type=None): """ Adds a new mapping to a kb. @param ln language @param kb the kb id to show @param sortby the sorting criteria ('from' or 'to') @param forcetype indicates if this function should ask about replacing left/right sides (None or 'no') replace in current kb ('curr') or in all ('all') @param replacements an object containing kbname+++left+++right strings. Can be a string or an array of strings @param kb_type None for normal from-to kb's, 't' for taxonomies """ ln = wash_language(ln) _ = gettext_set_language(ln) navtrail_previous_links = ''' > <a class="navtrail" href="%s/kb?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln, _("Manage Knowledge Bases")) try: dummy = getUid(req) except: return error_page('Error', req) (auth_code, auth_msg) = check_user(req, 'cfgbibknowledge') if not auth_code: kb_id = wash_url_argument(kb, 'int') kb_name = bibknowledge.get_kb_name(kb_id) if kb_name is None: return page(title=_("Unknown Knowledge Base"), body = "", language=ln, navtrail = navtrail_previous_links, errors = [("ERR_KB_ID_UNKNOWN", kb)], lastupdated=__lastupdated__, req=req) key = wash_url_argument(mapFrom, 'str') value = wash_url_argument(mapTo, 'str') #check if key or value already exists in some KB left_sides_match = bibknowledge.get_kb_mappings("", key, "") #check that the match is exact left_sides = [] for m in left_sides_match: if m['key'] == key: left_sides.append(m) right_sides_match = bibknowledge.get_kb_mappings("", "", value) right_sides = [] for m in right_sides_match: if m['value'] == value: right_sides.append(m) if (len(right_sides) == 0) and (len(left_sides) == 0): #no problems, just add in current forcetype = "curr" #likewise, if this is a taxonomy, just pass on if kb_type == 't': forcetype = "curr" if forcetype and not forcetype == "no": pass else: if len(left_sides) > 0: return page(title=_("Left side exists"), body = bibknowledgeadminlib.perform_request_verify_rule(ln, kb_id, key, value, "left", kb_name, left_sides), language=ln, navtrail = navtrail_previous_links, lastupdated=__lastupdated__, req=req) if len(right_sides) > 0: return page(title=_("Right side exists"), body = bibknowledgeadminlib.perform_request_verify_rule(ln, kb_id, key, value, "right", kb_name, right_sides), language=ln, navtrail = navtrail_previous_links, lastupdated=__lastupdated__, req=req) if forcetype == "curr": bibknowledge.add_kb_mapping(kb_name, key, value) if forcetype == "all": #a bit tricky.. remove the rules given in param replacement and add the current #rule in the same kb's if replacements: #"replacements" can be either a string or an array. Let's make it always an array if type(replacements) == type("this is a string"): mystr = replacements replacements = [] replacements.append(mystr) for r in replacements: if r.find("++++") > 0: (rkbname, rleft, dummy) = r.split('++++') bibknowledge.remove_kb_mapping(rkbname, rleft) #add only if this is not yet there.. if not bibknowledge.kb_mapping_exists(rkbname, key): bibknowledge.add_kb_mapping(rkbname, key, value) redirect_to_url(req, "kb?ln=%(ln)s&kb=%(kb)s&sortby=%(sortby)s&kb_type=%(kb_type)s" % {'ln':ln, 'kb':kb_id, 'sortby':sortby, 'kb_type':kb_type}) else: return page_not_authorized(req=req, text=auth_msg, navtrail=navtrail_previous_links)
def get_journal_info(record, tags): """Fetch journal info from given record""" record_info = [] journals_fields = record.find_fields(tags['publication']['journal'][:5]) for field in journals_fields: # we store the tags and their values here # like c->444 y->1999 p->"journal of foo", # v->20 tagsvalues = {} try: tmp = field.get_subfield_values( tags['publication']['journal'][5])[0] except IndexError: pass else: tagsvalues["p"] = tmp try: tmp = field.get_subfield_values( tags['publication']['volume'][5])[0] except IndexError: pass else: tagsvalues["v"] = tmp try: tmp = field.get_subfield_values(tags['publication']['year'][5])[0] except IndexError: pass else: tagsvalues["y"] = tmp try: tmp = field.get_subfield_values(tags['publication']['pages'][5])[0] except IndexError: pass else: # if the page numbers have "x-y" take just x tagsvalues["c"] = tmp.split('-', 1)[0] # check if we have the required data ok = True for c in tags['publication_format']: if c in ('p', 'v', 'y', 'c'): if c not in tagsvalues: ok = False if ok: publ = format_journal(tags['publication_format'], tagsvalues) record_info += [publ] alt_volume = get_alt_volume(tagsvalues['v']) if alt_volume: tagsvalues2 = tagsvalues.copy() tagsvalues2['v'] = alt_volume publ = format_journal(tags['publication_format'], tagsvalues2) record_info += [publ] # Add codens for coden in get_kb_mappings('CODENS', value=tagsvalues['p']): tagsvalues2 = tagsvalues.copy() tagsvalues2['p'] = coden['key'] publ = format_journal(tags['publication_format'], tagsvalues2) record_info += [publ] return record_info
def get_journal_info(record, tags): """Fetch journal info from given record""" record_info = [] journals_fields = record.find_fields(tags["publication"]["journal"][:5]) for field in journals_fields: # we store the tags and their values here # like c->444 y->1999 p->"journal of foo", # v->20 tagsvalues = {} try: tmp = field.get_subfield_values(tags["publication"]["journal"][5])[0] except IndexError: pass else: tagsvalues["p"] = tmp try: tmp = field.get_subfield_values(tags["publication"]["volume"][5])[0] except IndexError: pass else: tagsvalues["v"] = tmp try: tmp = field.get_subfield_values(tags["publication"]["year"][5])[0] except IndexError: pass else: tagsvalues["y"] = tmp try: tmp = field.get_subfield_values(tags["publication"]["pages"][5])[0] except IndexError: pass else: # if the page numbers have "x-y" take just x tagsvalues["c"] = tmp.split("-", 1)[0] # check if we have the required data ok = True for c in tags["publication_format"]: if c in ("p", "v", "y", "c"): if c not in tagsvalues: ok = False if ok: publ = format_journal(tags["publication_format"], tagsvalues) record_info += [publ] alt_volume = get_alt_volume(tagsvalues["v"]) if alt_volume: tagsvalues2 = tagsvalues.copy() tagsvalues2["v"] = alt_volume publ = format_journal(tags["publication_format"], tagsvalues2) record_info += [publ] # Add codens for coden in get_kb_mappings("CODENS", value=tagsvalues["p"]): tagsvalues2 = tagsvalues.copy() tagsvalues2["p"] = coden["key"] publ = format_journal(tags["publication_format"], tagsvalues2) record_info += [publ] return record_info
def load_kbs(cfg, run_sql, in_task=False): for kb, query in cfg.iteritems(): task_sleep_now_if_required(can_stop_too=True) if not kb_exists(kb): add_kb(kb) if in_task: write_message("Updating %s KB..." % kb) try: if not in_task: print "kb:", kb print "kb beginning:", len(get_kb_mappings(kb)) if kb.startswith('json_'): encoder = ComplexEncoder() mapping, description = run_sql(query, with_desc=True) if kb in CFG_ADDITIONAL_ENTRIES: mapping += CFG_ADDITIONAL_ENTRIES[kb] if not in_task: print CFG_ADDITIONAL_ENTRIES[kb] column_counter = {} new_description = [] for column in description[1:]: column = column[0] counter = column_counter[column] = column_counter.get( column, 0) + 1 if counter > 1: new_description.append('%s%d' % (column, counter)) else: new_description.append(column) description = new_description else: mapping = run_sql(query) if kb in CFG_ADDITIONAL_ENTRIES: mapping += CFG_ADDITIONAL_ENTRIES[kb] if not in_task: print CFG_ADDITIONAL_ENTRIES[kb] if not in_task: print "mapping:", len(mapping) if kb == 'projects': mapping += [('000000', 'NO PROJECT')] original_keys = set([key[0] for key in get_kbr_keys(kb)]) if not in_task: print "original_keys before:", len(original_keys) updated = 0 added = 0 for i, row in enumerate(mapping): key, value = row[0], row[1:] if kb.startswith('json_'): value = encoder.encode(dict(zip(description, value))) else: value = value[0] if value: if key in original_keys: original_keys.remove(key) if in_task: task_update_progress("%s - %s%%" % (kb, i * 100 / len(mapping))) if kb_mapping_exists(kb, key): updated += 1 update_kb_mapping(kb, key, key, value) else: added += 1 add_kb_mapping(kb, key, value) if not in_task: print "updated:", updated, "added:", added print "kb after update:", len(get_kb_mappings(kb)) print "original_keys after:", len(original_keys) if in_task: task_update_progress("Cleaning %s" % kb) for key in original_keys: remove_kb_mapping(kb, key) if not in_task: print "kb after remove:", len(get_kb_mappings(kb)) except: register_exception(alert_admin=True, prefix="Error when updating KB %s" % kb) continue
def load_kbs(cfg, run_sql, in_task=False): for kb, query in cfg.iteritems(): task_sleep_now_if_required(can_stop_too=True) if not kb_exists(kb): add_kb(kb) if in_task: write_message("Updating %s KB..." % kb) try: if not in_task: print "kb:", kb print "kb beginning:", len(get_kb_mappings(kb)) if kb.startswith('json_'): encoder = ComplexEncoder() mapping, description = run_sql(query, with_desc=True) if kb in CFG_ADDITIONAL_ENTRIES: mapping += CFG_ADDITIONAL_ENTRIES[kb] if not in_task: print CFG_ADDITIONAL_ENTRIES[kb] column_counter = {} new_description = [] for column in description[1:]: column = column[0] counter = column_counter[ column] = column_counter.get(column, 0) + 1 if counter > 1: new_description.append('%s%d' % (column, counter)) else: new_description.append(column) description = new_description else: mapping = run_sql(query) if kb in CFG_ADDITIONAL_ENTRIES: mapping += CFG_ADDITIONAL_ENTRIES[kb] if not in_task: print CFG_ADDITIONAL_ENTRIES[kb] if not in_task: print "mapping:", len(mapping) if kb == 'projects': mapping += [('000000', 'NO PROJECT')] original_keys = set([key[0] for key in get_kbr_keys(kb)]) if not in_task: print "original_keys before:", len(original_keys) updated = 0 added = 0 for i, row in enumerate(mapping): key, value = row[0], row[1:] if kb.startswith('json_'): value = encoder.encode(dict(zip(description, value))) else: value = value[0] if value: if key in original_keys: original_keys.remove(key) if in_task: task_update_progress( "%s - %s%%" % (kb, i * 100 / len(mapping))) if kb_mapping_exists(kb, key): updated += 1 update_kb_mapping(kb, key, key, value) else: added += 1 add_kb_mapping(kb, key, value) if not in_task: print "updated:", updated, "added:", added print "kb after update:", len(get_kb_mappings(kb)) print "original_keys after:", len(original_keys) if in_task: task_update_progress("Cleaning %s" % kb) for key in original_keys: remove_kb_mapping(kb, key) if not in_task: print "kb after remove:", len(get_kb_mappings(kb)) except: register_exception( alert_admin=True, prefix="Error when updating KB %s" % kb) continue
def inner(dummy_form, dummy_field, term, limit=50): from invenio.modules.knowledge.api import get_kb_mappings result = get_kb_mappings(name, '', term, limit=limit)[:limit] return map(mapper, result) if mapper is not None else result
def kb_export(req, kbname="", format="kbr", searchkey="", searchvalue="", searchtype="s", limit=None, ln=CFG_SITE_LANG): """ Exports the given kb so that it is listed in stdout (the browser). @param req the request @param kbname knowledge base name @param expression evaluate this for the returned lines @param format 'kba' for authority file, 'kbr' for leftside-rightside, json for json-formatted dictionaries @param searchkey include only lines that match this on the left side @param searchvalue include only lines that match this on the right side @param searchtype s = substring match, e = exact match @param limit how many results to return. None means all @param ln language """ ln = wash_language(ln) _ = gettext_set_language(ln) navtrail_previous_links = ''' > <a class="navtrail" href="%s/kb?ln=%s">%s</a>''' % ( CFG_SITE_SECURE_URL, ln, _("Manage Knowledge Bases")) if not kbname: return page(title=_("Knowledge base name missing"), body="""Required parameter kbname is missing.""", language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__, req=req) #in order to make 'wget' downloads easy we do not require authorization #first check the type of the KB kbtype = None kbinfo = None kbid = None kbinfos = bibknowledge.get_kbs_info("", kbname) if kbinfos: kbinfo = kbinfos[0] kbtype = kbinfo['kbtype'] kbid = kbinfo['id'] else: return page(title=_("Unknown knowledge base"), body=_("There is no knowledge base with that name."), language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__, req=req) if not kbtype or kbtype == 'w': if format and format == "ejson": req.content_type = 'application/json' return bibknowledge.get_kb_mappings_embedded_json(kbname, searchkey, \ searchvalue, searchtype, limit) elif format and format[0] == 'j': # as JSON formatted string req.content_type = 'application/json' return bibknowledge.get_kb_mappings_json(kbname, searchkey, \ searchvalue, searchtype, limit) # left side / right side KB mappings = bibknowledge.get_kb_mappings(kbname, searchkey, \ searchvalue, searchtype) if format == 'right' or format == 'kba': # as authority sequence seq = [m['value'] for m in mappings] seq = uniq(sorted(seq)) for s in seq: req.write(s + "\n") return else: # as regularly formatted left-right mapping for m in mappings: req.write(m['key'] + '---' + m['value'] + '\n') return elif kbtype == 'd': # dynamic kb, another interface for perform_request_search if format and format[0] == 'j': req.content_type = "application/json" return bibknowledge.get_kbd_values_json(kbname, searchvalue) else: # print it as a list of values for hit in bibknowledge.get_kbd_values(kbname, searchvalue): req.write(hit + '\n') req.write('\n') return elif kbtype == 't': #taxonomy: output the file kbfilename = CFG_WEBDIR + "/kbfiles/" + str(kbid) + ".rdf" try: f = open(kbfilename, 'r') for line in f: req.write(line) f.close() except: req.write("Reading the file " + kbfilename + " failed.") else: # This situation should never happen raise ValueError, "Unsupported KB Type: %s" % kbtype
def kb_add_mapping(req, kb, mapFrom, mapTo, sortby="to", ln=CFG_SITE_LANG, forcetype=None, replacements=None, kb_type=None): """ Adds a new mapping to a kb. @param ln language @param kb the kb id to show @param sortby the sorting criteria ('from' or 'to') @param forcetype indicates if this function should ask about replacing left/right sides (None or 'no') replace in current kb ('curr') or in all ('all') @param replacements an object containing kbname+++left+++right strings. Can be a string or an array of strings @param kb_type None for normal from-to kb's, 't' for taxonomies """ ln = wash_language(ln) _ = gettext_set_language(ln) navtrail_previous_links = ''' > <a class="navtrail" href="%s/kb?ln=%s">%s</a>''' % ( CFG_SITE_SECURE_URL, ln, _("Manage Knowledge Bases")) try: dummy = getUid(req) except: return error_page('Error', req) (auth_code, auth_msg) = check_user(req, 'cfgbibknowledge') if not auth_code: kb_id = wash_url_argument(kb, 'int') kb_name = bibknowledge.get_kb_name(kb_id) if kb_name is None: return page(title=_("Unknown Knowledge Base"), body="", language=ln, navtrail=navtrail_previous_links, errors=[("ERR_KB_ID_UNKNOWN", kb)], lastupdated=__lastupdated__, req=req) key = wash_url_argument(mapFrom, 'str') value = wash_url_argument(mapTo, 'str') #check if key or value already exists in some KB left_sides_match = bibknowledge.get_kb_mappings("", key, "") #check that the match is exact left_sides = [] for m in left_sides_match: if m['key'] == key: left_sides.append(m) right_sides_match = bibknowledge.get_kb_mappings("", "", value) right_sides = [] for m in right_sides_match: if m['value'] == value: right_sides.append(m) if (len(right_sides) == 0) and (len(left_sides) == 0): #no problems, just add in current forcetype = "curr" #likewise, if this is a taxonomy, just pass on if kb_type == 't': forcetype = "curr" if forcetype and not forcetype == "no": pass else: if len(left_sides) > 0: return page( title=_("Left side exists"), body=bibknowledgeadminlib.perform_request_verify_rule( ln, kb_id, key, value, "left", kb_name, left_sides), language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__, req=req) if len(right_sides) > 0: return page( title=_("Right side exists"), body=bibknowledgeadminlib.perform_request_verify_rule( ln, kb_id, key, value, "right", kb_name, right_sides), language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__, req=req) if forcetype == "curr": bibknowledge.add_kb_mapping(kb_name, key, value) if forcetype == "all": #a bit tricky.. remove the rules given in param replacement and add the current #rule in the same kb's if replacements: #"replacements" can be either a string or an array. Let's make it always an array if type(replacements) == type("this is a string"): mystr = replacements replacements = [] replacements.append(mystr) for r in replacements: if r.find("++++") > 0: (rkbname, rleft, dummy) = r.split('++++') bibknowledge.remove_kb_mapping(rkbname, rleft) #add only if this is not yet there.. if not bibknowledge.kb_mapping_exists(rkbname, key): bibknowledge.add_kb_mapping(rkbname, key, value) redirect_to_url( req, "kb?ln=%(ln)s&kb=%(kb)s&sortby=%(sortby)s&kb_type=%(kb_type)s" % { 'ln': ln, 'kb': kb_id, 'sortby': sortby, 'kb_type': kb_type }) else: return page_not_authorized(req=req, text=auth_msg, navtrail=navtrail_previous_links)
def get_value(kb_name, list_of_keys): """Get the value registered with at least one of the keys.""" for key in list_of_keys: if kb_mapping_exists(kb_name, key): return get_kb_mappings(kb_name=kb_name, key=key)[0].get("value")