def test_EJOURNALS_keys(self): """bibknowledge - test left/right rules (key lookups)""" mykeys = get_kbr_keys("EJOURNALS", "Acta") self.assertEqual(2, len(mykeys)) mykeys = get_kbr_values("EJOURNALS", '', searchtype='e') self.assertEqual(0, len(mykeys)) mykeys = get_kbr_values("EJOURNALS", searchtype='s') self.assertEqual(327, len(mykeys)) mykeys = get_kbr_values("EJOURNALS", searchkey='', searchtype='s') self.assertEqual(327, len(mykeys))
def crossref_translate_title(record): """ Convert the record's title to the Inspire specific abbreviation of the title (using JOURNALS knowledge base) @return: changed record """ # probably there is only one 773 field # but just in case let's treat it as a list for field in record_get_field_instances(record, '773'): title = field[0][0][1] new_title = get_kbr_values("JOURNALS", title, searchtype='e') if new_title: # returned value is a list, and we need only the first value new_title = new_title[0][0] position = field[4] record_modify_subfield(rec=record, tag='773', subfield_code='p', \ value=new_title, subfield_position=0, field_position_global=position)
def test_EJOURNALS_values(self): """bibknowledge - test a left/right rule (value lookup)""" vals = get_kbr_values("EJOURNALS", "Astron.") self.assertEqual(29, len(vals))
def perform_request_autocomplete(request_type, recid, uid, data): """ Perfrom an AJAX request associated with the retrieval of autocomplete data. Arguments: request_type: Type of the currently served request recid: the identifer of the record uid: The identifier of the user being currently logged in data: The request data containing possibly important additional arguments """ response = {} # get the values based on which one needs to search searchby = data["value"] # we check if the data is properly defined fulltag = "" if data.has_key("maintag") and data.has_key("subtag1") and data.has_key("subtag2") and data.has_key("subfieldcode"): maintag = data["maintag"] subtag1 = data["subtag1"] subtag2 = data["subtag2"] u_subtag1 = subtag1 u_subtag2 = subtag2 if (not subtag1) or (subtag1 == " "): u_subtag1 = "_" if (not subtag2) or (subtag2 == " "): u_subtag2 = "_" subfieldcode = data["subfieldcode"] fulltag = maintag + u_subtag1 + u_subtag2 + subfieldcode if request_type == "autokeyword": # call the keyword-form-ontology function if fulltag and searchby: items = get_kbt_items_for_bibedit(CFG_BIBEDIT_KEYWORD_TAXONOMY, CFG_BIBEDIT_KEYWORD_RDFLABEL, searchby) response["autokeyword"] = items if request_type == "autosuggest": # call knowledge base function to put the suggestions in an array.. if fulltag and searchby and len(searchby) > 3: suggest_values = get_kbd_values_for_bibedit(fulltag, "", searchby) # remove .. new_suggest_vals = [] for sugg in suggest_values: if sugg.startswith(searchby): new_suggest_vals.append(sugg) response["autosuggest"] = new_suggest_vals if request_type == "autocomplete": # call the values function with the correct kb_name if CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS.has_key(fulltag): kbname = CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS[fulltag] # check if the seachby field has semicolons. Take all # the semicolon-separated items.. items = [] vals = [] if searchby: if searchby.rfind(";"): items = searchby.split(";") else: items = [searchby.strip()] for item in items: item = item.strip() kbrvals = get_kbr_values(kbname, item, "", "e") # we want an exact match if kbrvals and kbrvals[0]: # add the found val into vals vals.append(kbrvals[0]) # check that the values are not already contained in other # instances of this field record = get_cache_file_contents(recid, uid)[2] xml_rec = print_rec(record) record, status_code, dummy_errors = create_record(xml_rec) existing_values = [] if status_code != 0: existing_values = record_get_field_values(record, maintag, subtag1, subtag2, subfieldcode) # get the new values.. i.e. vals not in existing new_vals = vals for val in new_vals: if val in existing_values: new_vals.remove(val) response["autocomplete"] = new_vals response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["autosuggestion_scanned"] return response
def compare_metadata(metadata, rec): """ Compare a record with the metadata returned by crossref @param rec Record @param doc xml.etree.ElementTree representation of the xml returned by crossref """ confidence_different = 0 msgs = [] # Check title title_crossref = metadata["title"] title_record = get_value(rec, "773__p") title_similarity = None volume_extra = "" if title_crossref != "" and title_record is not None: # Remove Volume number from the title title_crossref = re.sub(":.*$", "", title_crossref) if re.search(" [A-Z]$", title_crossref): volume_extra = title_crossref[-1] title_crossref = title_crossref[:-2] title_crossref = re.sub(" (Section|Volume)$", "", title_crossref) abbr_title = get_kbr_values("JOURNALS", title_crossref, searchtype='e') title_similarity = compare_str(abbr_title, title_record) confidence_different += (1 - title_similarity)*2 if title_similarity < 0.6: msgs.append("Incorrect journal name (773__p) or wrongly assigned DOI") # Check issn issn_crossref = metadata["issn"] issn_record = get_value(rec, "022__a") if issn_crossref != "" and issn_record is not None and issn_crossref != issn_record: confidence_different += 3 msgs.append("Invalid ISSN (022__a) or wrongly assigned DOI") # Check page number page_crossref = metadata["page"] page_record = get_value(rec, "773__c") if page_record is not None and page_crossref != "": page_record = page_record.split("-")[0] page_crossref = page_crossref.split("-")[0] if page_record != page_crossref: confidence_different += 3 msgs.append("Invalid page number (773__c) or wrongly assigned DOI") # Check author author_crossref = metadata["author"] author_record = get_value(rec, "100__a") if author_crossref != "" and author_record is not None: author_similarity = compare_str(author_crossref, author_record) confidence_different += (1 - author_similarity)*1.5 if author_similarity < 0.7: msgs.append("Invalid author (100__a) or wrongly assigned DOI") # Check issue issue_crossref = metadata["issue"] issue_record = get_value(rec, "773__n") if issue_crossref != "" and issue_record is not None and issue_crossref != issue_record: confidence_different += 2 msgs.append("Invalid issue (773__n) or wrongly assigned DOI") # Check year year_crossref = metadata["year"] year_record = get_value(rec, "773__y") if year_crossref != "" and year_record is not None and year_crossref != year_record: confidence_different += 2 msgs.append("Invalid year (773__y) or wrongly assigned DOI") # Check volume volume_crossref = metadata["volume"] volume_record = get_value(rec, "773__v") if volume_crossref != "" and volume_record is not None: volume_crossref = volume_extra + volume_crossref if volume_crossref != volume_record: confidence_different += 2 msgs.append("Invalid volume (773__v) or wrongly assigned DOI") if confidence_different > 4: for msg in msgs: rec.set_invalid(msg)