def solr_add_range(lower_recid, upper_recid): """ Adds the regarding field values of all records from the lower recid to the upper one to Solr. It preserves the fulltext information. """ for recid in range(lower_recid, upper_recid + 1): if record_exists(recid): try: abstract = unicode(remove_control_characters(get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0]), 'utf-8') except: abstract = "" try: first_author = remove_control_characters(get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0]) additional_authors = remove_control_characters(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), '')) author = unicode(first_author + " " + additional_authors, 'utf-8') except: author = "" try: bibrecdocs = BibRecDocs(recid) fulltext = unicode(remove_control_characters(bibrecdocs.get_text()), 'utf-8') except: fulltext = "" try: keyword = unicode(remove_control_characters(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_KEYWORD), '')), 'utf-8') except: keyword = "" try: title = unicode(remove_control_characters(get_fieldvalues(recid, CFG_MARC_TITLE)[0]), 'utf-8') except: title = "" solr_add(recid, abstract, author, fulltext, keyword, title) SOLR_CONNECTION.commit() task_sleep_now_if_required(can_stop_too=True)
def oaigetrecord(args): """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting. - if record does not exist, return oai_error 'idDoesNotExist'. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'transient' or 'deleted', then return only header, with status 'deleted'. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no', then return oai_error 'idDoesNotExist'. """ arg = parse_args(args) out = "" sysno = oaigetsysno(arg['identifier']) _record_exists = record_exists(sysno) if _record_exists == 1 or \ (_record_exists == -1 and CFG_OAI_DELETED_POLICY != 'no'): out = print_record(sysno, arg['metadataPrefix'], _record_exists) out = oai_header(args, "GetRecord") + out + oai_footer("GetRecord") else: out = oai_error("idDoesNotExist", "invalid record Identifier") out = oai_error_header(args, "GetRecord") + out + oai_error_footer("GetRecord") return out
def oaigetrecord(args): """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting. - if record does not exist, return oai_error 'idDoesNotExist'. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'transient' or 'deleted', then return only header, with status 'deleted'. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no', then return oai_error 'idDoesNotExist'. """ arg = parse_args(args) out = "" sysno = oaigetsysno(arg['identifier']) _record_exists = record_exists(sysno) if _record_exists == 1 or \ (_record_exists == -1 and CFG_OAI_DELETED_POLICY != 'no'): out = print_record(sysno, arg['metadataPrefix'], _record_exists) out = oai_header(args, "GetRecord") + out + oai_footer("GetRecord") else: out = oai_error("idDoesNotExist", "invalid record Identifier") out = oai_error_header( args, "GetRecord") + out + oai_error_footer("GetRecord") return out
def oailistrecords(args): "Generates response to oailistrecords verb." arg = parse_args(args) out = "" resumptionToken_printed = False sysnos = [] sysno = [] # check if the resumptionToken did not expire if arg['resumptionToken']: filename = os.path.join(CFG_CACHEDIR, 'RTdata', arg['resumptionToken']) if os.path.exists(filename) == 0: out = oai_error("badResumptionToken", "ResumptionToken expired") out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords") return out if arg['resumptionToken'] != "": sysnos = oaicacheout(arg['resumptionToken']) arg['metadataPrefix'] = sysnos.pop() else: sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until']) if len(sysnos) == 0: # noRecordsMatch error out = out + oai_error("noRecordsMatch", "no records correspond to the request") out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords") return out i = 0 for sysno_ in sysnos: if sysno_: if i >= CFG_OAI_LOAD: # cache or write? if not resumptionToken_printed: # resumptionToken? arg['resumptionToken'] = oaigenresumptionToken() extdate = oaigetresponsedate(CFG_OAI_EXPIRE) if extdate: out = "%s <resumptionToken expirationDate=\"%s\">%s</resumptionToken>\n" % (out, extdate, arg['resumptionToken']) else: out = "%s <resumptionToken>%s</resumptionToken>\n" % (out, arg['resumptionToken']) resumptionToken_printed = True sysno.append(sysno_) else: _record_exists = record_exists(sysno_) if not (_record_exists == -1 and CFG_OAI_DELETED_POLICY == "no"): #Produce output only if record exists and had to be printed i = i + 1 # Increment limit only if record is returned res = print_record(sysno_, arg['metadataPrefix'], _record_exists) if res: out += res if resumptionToken_printed: oaicacheclean() sysno.append(arg['metadataPrefix']) oaicachein(arg['resumptionToken'], sysno) out = oai_header(args, "ListRecords") + out + oai_footer("ListRecords") return out
def get_bibrecord(recid): """Return record in BibRecord wrapping.""" if record_exists(recid): record_revision_ids = get_record_revision_ids(recid) if record_revision_ids: return create_record(get_marcxml_of_revision_id(max(record_revision_ids)))[0] else: return get_record(recid)
def get_bibrecord(recid): """Return record in BibRecord wrapping.""" if record_exists(recid): record_revision_ids = get_record_revision_ids(recid) if record_revision_ids: return create_record( get_marcxml_of_revision_id(max(record_revision_ids)))[0] else: return get_record(recid)
def Get_Recid_Number(parameters, curdir, form, user_info=None): """ This function gets the value contained in the [edsrn] file and stores it in the 'rn' global variable which is the recid of the corresponding record. Parameters: * edsrn: Name of the file which stores the reference. This value depends on the web form configuration you did. It should contain the name of the form element used for storing the reference of the document. """ global rn, sysno # Path of file containing recid if os.path.exists("%s/%s" % (curdir, parameters['edsrn'])): try: fp = open("%s/%s" % (curdir, parameters['edsrn']), "r") rn = fp.read() rn = re.sub("[\n\r ]+", "", rn) except IOError: exception_prefix = "Error in WebSubmit function " \ "Get_Recid_Number. Tried to open " \ "edsrn file [%s/edsrn] but was " \ "unable to." % curdir register_exception(prefix=exception_prefix) rn = "" else: rn = "" if rn: act = form['act'] if act not in ['APS']: try: if record_exists(int(rn)) == 1: sysno = int(rn) coll = get_fieldvalues(sysno, '980__a')[0] if act == 'MBI': if coll not in ['BLOG']: raise InvenioWebSubmitFunctionStop(CFG_MODIFY_BLOG_ERROR) if act == 'DBI': if coll not in ['BLOG']: raise InvenioWebSubmitFunctionStop(CFG_DELETE_BLOG_ERROR) if act == 'DPI': if coll not in ['BLOGPOST']: raise InvenioWebSubmitFunctionStop(CFG_DELETE_POST_ERROR) else: raise InvenioWebSubmitFunctionStop(CFG_ALERT_RECORD_NOT_FOUND % rn) except: raise InvenioWebSubmitFunctionStop(CFG_ALERT_RECORD_NOT_FOUND % rn) return ""
def _get_record_slave(recid, result, mode=None, uid=None): """Check if record exists and return it in dictionary format. If any kind of error occurs returns None. If mode=='revision' then recid parameter is considered as revid.""" record = None if recid == 'none': mode = 'none' if mode == 'recid': record_status = record_exists(recid) #check for errors if record_status == 0: result['resultCode'], result[ 'resultText'] = 1, 'Non-existent record: %s' % recid elif record_status == -1: result['resultCode'], result[ 'resultText'] = 1, 'Deleted record: %s' % recid elif record_locked_by_queue(recid): result['resultCode'], result[ 'resultText'] = 1, 'Record %s locked by queue' % recid else: record = create_record(print_record(recid, 'xm'))[0] record_order_subfields(record) elif mode == 'tmpfile': file_path = '%s_%s.xml' % (_get_file_path( recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX) if not os.path.isfile(file_path): #check if file doesn't exist result['resultCode'], result[ 'resultText'] = 1, 'Temporary file doesnt exist' else: #open file tmpfile = open(file_path, 'r') record = create_record(tmpfile.read())[0] tmpfile.close() elif mode == 'revision': if revision_format_valid_p(recid): marcxml = get_marcxml_of_revision_id(recid) if marcxml: record = create_record(marcxml)[0] else: result['resultCode'], result[ 'resultText'] = 1, 'The specified revision does not exist' else: result['resultCode'], result[ 'resultText'] = 1, 'Invalid revision id' elif mode == 'none': return {} else: result['resultCode'], result[ 'resultText'] = 1, 'Invalid record mode for record2' return record
def getfile(req, form): args = wash_urlargd(form, websubmit_templates.files_default_urlargd) ln = args["ln"] _ = gettext_set_language(ln) uid = getUid(req) user_info = collect_user_info(req) verbose = args["verbose"] if verbose >= 1 and not isUserSuperAdmin(user_info): # Only SuperUser can see all the details! verbose = 0 if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1: return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), navmenuid="submit") if record_exists(self.recid) < 1: msg = "<p>%s</p>" % _("Requested record does not seem to exist.") return warningMsg(msg, req, CFG_SITE_NAME, ln) if record_empty(self.recid): msg = "<p>%s</p>" % _("Requested record does not seem to have been integrated.") return warningMsg(msg, req, CFG_SITE_NAME, ln) (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info["email"] == "guest": cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, {"collection": guess_primary_collection_of_a_record(self.recid)} ) target = "/youraccount/login" + make_canonical_urlargd( {"action": cookie, "ln": ln, "referer": CFG_SITE_URL + user_info["uri"]}, {} ) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", text=auth_message) readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1 # From now on: either the user provided a specific file # name (and a possible version), or we return a list of # all the available files. In no case are the docids # visible. try: bibarchive = BibRecDocs(self.recid) except InvenioWebSubmitFileError, e: register_exception(req=req, alert_admin=True) msg = "<p>%s</p><p>%s</p>" % ( _("The system has encountered an error in retrieving the list of files for this document."), _("The error has been logged and will be taken in consideration as soon as possible."), ) return warningMsg(msg, req, CFG_SITE_NAME, ln)
def oai_get_recid(identifier): """Returns the recid corresponding to the OAI identifier. Prefer a non deleted record if multiple recids matches but some of them are deleted (e.g. in case of merging). Returns None if no record matches.""" if identifier: recids = search_pattern(p=identifier, f=CFG_OAI_ID_FIELD, m='e') if recids: restricted_recids = get_all_restricted_recids() for recid in recids: if record_exists(recid) > 0 and recid not in restricted_recids: return recid if recid not in restricted_recids: return recid return None
def _get_record_slave(recid, result, mode=None, uid=None): """Check if record exists and return it in dictionary format. If any kind of error occurs returns None. If mode=='revision' then recid parameter is considered as revid.""" record = None if recid == 'none': mode = 'none' if mode == 'recid': record_status = record_exists(recid) #check for errors if record_status == 0: result['resultCode'], result['resultText'] = 1, 'Non-existent record: %s' % recid elif record_status == -1: result['resultCode'], result['resultText'] = 1, 'Deleted record: %s' % recid elif record_locked_by_queue(recid): result['resultCode'], result['resultText'] = 1, 'Record %s locked by queue' % recid else: record = create_record( print_record(recid, 'xm') )[0] record_order_subfields(record) elif mode == 'tmpfile': file_path = '%s_%s.xml' % (_get_file_path(recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX) if not os.path.isfile(file_path): #check if file doesn't exist result['resultCode'], result['resultText'] = 1, 'Temporary file doesnt exist' else: #open file tmpfile = open(file_path, 'r') record = create_record( tmpfile.read() )[0] tmpfile.close() elif mode == 'revision': if revision_format_valid_p(recid): marcxml = get_marcxml_of_revision_id(recid) if marcxml: record = create_record(marcxml)[0] else: result['resultCode'], result['resultText'] = 1, 'The specified revision does not exist' else: result['resultCode'], result['resultText'] = 1, 'Invalid revision id' elif mode == 'none': return {} else: result['resultCode'], result['resultText'] = 1, 'Invalid record mode for record2' return record
def _get_breaking_news(lang, journal_name): """ Gets the 'Breaking News' articles that are currently active according to start and end dates. """ # CERN Bulletin only if not journal_name.lower() == 'cernbulletin': return '' # Look for active breaking news breaking_news_recids = [recid for recid in search_pattern(p='980__a:BULLETINBREAKING') \ if record_exists(recid) == 1] today = time.mktime(time.localtime()) breaking_news = "" for recid in breaking_news_recids: temp_rec = BibFormatObject(recid) try: end_date = time.mktime(time.strptime(temp_rec.field("925__b"), "%m/%d/%Y")) except: end_date = time.mktime(time.strptime("01/01/1970", "%m/%d/%Y")) if end_date < today: continue try: start_date = time.mktime(time.strptime(temp_rec.field("925__a"), "%m/%d/%Y")) except: start_date = time.mktime(time.strptime("01/01/2050", "%m/%d/%Y")) if start_date > today: continue publish_date = temp_rec.field("269__c") if lang == 'fr': title = temp_rec.field("246_1a") else: title = temp_rec.field("245__a") breaking_news += ''' <h2 class="%s">%s<br/> <strong> <a href="%s/journal/popup?name=%s&type=breaking_news&record=%s&ln=%s" target="_blank">%s</a> </strong> </h2> ''' % ("", publish_date, CFG_SITE_URL, journal_name, recid, lang, title) if breaking_news: breaking_news = '<li>%s</li>' % breaking_news return breaking_news
def get_existing_records_for_reportnumber(reportnum): """Given a report number, return a list of recids of real (live) records that are associated with it. That's to say if the record does not exist (prehaps deleted, for example) its recid will now be returned in the list. @param reportnum: the report number for which recids are to be returned. @type reportnum: string @return: list of recids. @rtype: list @note: If reportnum was not found in phrase indexes, the function searches directly in bibxxx tables via MARC tags, so that the record does not have to be phrase-indexed. """ existing_records = [] ## List of the report numbers of existing records ## Get list of records with the report-number: (first in phrase indexes) reclist = list(search_pattern(req=None, p=reportnum, f="reportnumber", m="e")) if not reclist: # Maybe the record has not been indexed yet? (look in bibxxx tables) tags = get_field_tags("reportnumber") for tag in tags: recids = list(search_pattern(req=None, p=reportnum, f=tag, m="e")) reclist.extend(recids) reclist = dict.fromkeys(reclist).keys() # Remove duplicates ## Loop through all recids retrieved and testing to see whether the record ## actually exists or not. If none of the records exist, there is no record ## with this reportnumber; If more than one of the records exists, then ## there are multiple records with the report-number; If only one record ## exists, then everything is OK, for rec in reclist: rec_exists = record_exists(rec) if rec_exists == 1: ## This is a live record record the recid and augment the counter of ## records found: existing_records.append(rec) return existing_records
def oailistmetadataformats(args): "Generates response to oailistmetadataformats verb." arg = parse_args(args) out = "" flag = 1 # list or not depending on identifier if arg['identifier'] != "": flag = 0 sysno = oaigetsysno(arg['identifier']) _record_exists = record_exists(sysno) if _record_exists == 1 or \ (_record_exists == -1 and CFG_OAI_DELETED_POLICY != "no"): flag = 1 else: out = out + oai_error("idDoesNotExist", "invalid record Identifier") out = oai_error_header( args, "ListMetadataFormats") + out + oai_error_footer( "ListMetadataFormats") return out if flag: out = out + " <metadataFormat>\n" out = out + " <metadataPrefix>oai_dc</metadataPrefix>\n" out = out + " <schema>http://www.openarchives.org/OAI/1.1/dc.xsd</schema>\n" out = out + " <metadataNamespace>http://purl.org/dc/elements/1.1/</metadataNamespace>\n" out = out + " </metadataFormat>\n" out = out + " <metadataFormat>\n" out = out + " <metadataPrefix>marcxml</metadataPrefix>\n" # Infoscience modification out = out + " <schema>http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd</schema>\n" out = out + " <metadataNamespace>http://www.loc.gov/MARC21/slim</metadataNamespace>\n" out = out + " </metadataFormat>\n" out = oai_header( args, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats") return out
def _get_breaking_news(lang, journal_name): """ Gets the 'Breaking News' articles that are currently active according to start and end dates. """ # CERN Bulletin only if not journal_name.lower() == 'cernbulletin': return '' # Look for active breaking news breaking_news_recids = [recid for recid in search_pattern(p='980__a:BULLETINBREAKING') \ if record_exists(recid) == 1] today = time.mktime(time.localtime()) breaking_news = "" for recid in breaking_news_recids: temp_rec = BibFormatObject(recid) try: end_date = time.mktime( time.strptime(temp_rec.field("925__b"), "%m/%d/%Y")) except: end_date = time.mktime(time.strptime("01/01/1970", "%m/%d/%Y")) if end_date < today: continue try: start_date = time.mktime( time.strptime(temp_rec.field("925__a"), "%m/%d/%Y")) except: start_date = time.mktime(time.strptime("01/01/2050", "%m/%d/%Y")) if start_date > today: continue publish_date = temp_rec.field("269__c") if lang == 'fr': title = temp_rec.field("246_1a") else: title = temp_rec.field("245__a") breaking_news += ''' <h2 class="%s">%s<br/> <strong> <a href="%s/journal/popup?name=%s&type=breaking_news&record=%s&ln=%s" target="_blank">%s</a> </strong> </h2> ''' % ("", publish_date, CFG_SITE_URL, journal_name, recid, lang, title) if breaking_news: breaking_news = '<li>%s</li>' % breaking_news return breaking_news
def _get_record(recid, uid, result, fresh_record=False): """Retrieve record structure. """ record = None mtime = None cache_dirty = None record_status = record_exists(recid) existing_cache = cache_exists(recid, uid) if record_status == 0: result['resultCode'], result[ 'resultText'] = 1, 'Non-existent record: %s' % recid elif record_status == -1: result['resultCode'], result[ 'resultText'] = 1, 'Deleted record: %s' % recid elif not existing_cache and record_locked_by_other_user(recid, uid): result['resultCode'], result[ 'resultText'] = 1, 'Record %s locked by user' % recid elif existing_cache and cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): result['resultCode'], result[ 'resultText'] = 1, 'Record %s locked by user' % recid elif record_locked_by_queue(recid): result['resultCode'], result[ 'resultText'] = 1, 'Record %s locked by queue' % recid else: if fresh_record: delete_cache(recid, uid) existing_cache = False if not existing_cache: record_revision, record = create_cache(recid, uid) mtime = get_cache_mtime(recid, uid) cache_dirty = False else: tmpRes = get_cache_contents(recid, uid) cache_dirty, record_revision, record = tmpRes[0], tmpRes[ 1], tmpRes[2] touch_cache(recid, uid) mtime = get_cache_mtime(recid, uid) if not latest_record_revision(recid, record_revision): result['cacheOutdated'] = True result['resultCode'], result['resultText'], result[ 'cacheDirty'], result[ 'cacheMTime'] = 0, 'Record OK', cache_dirty, mtime record_order_subfields(record) return record
def oai_list_metadata_formats(argd): """Generates response to oai_list_metadata_formats verb.""" if argd.get('identifier'): recid = oai_get_recid(argd['identifier']) _record_exists = record_exists(recid) if _record_exists != 1 and (_record_exists != -1 or CFG_OAI_DELETED_POLICY == "no"): return oai_error(argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd['identifier'])]) out = "" for prefix, (dummy, schema, namespace) in CFG_OAI_METADATA_FORMATS.items(): out += X.metadataFormat()( X.metadataPrefix(prefix), X.schema(schema), X.metadataNamespace(namespace) ) return oai_header(argd, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
def _get_record_slave(recid, result, mode=None, uid=None): """Check if record exists and return it in dictionary format. If any kind of error occurs returns None. If mode=='revision' then recid parameter is considered as revid.""" record = None if recid == "none": mode = "none" if mode == "recid": record_status = record_exists(recid) # check for errors if record_status == 0: result["resultCode"], result["resultText"] = 1, "Non-existent record: %s" % recid elif record_status == -1: result["resultCode"], result["resultText"] = 1, "Deleted record: %s" % recid elif record_locked_by_queue(recid): result["resultCode"], result["resultText"] = 1, "Record %s locked by queue" % recid else: record = create_record(print_record(recid, "xm"))[0] elif mode == "tmpfile": file_path = "%s_%s.xml" % (_get_file_path(recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX) if not os.path.isfile(file_path): # check if file doesn't exist result["resultCode"], result["resultText"] = 1, "Temporary file doesnt exist" else: # open file tmpfile = open(file_path, "r") record = create_record(tmpfile.read())[0] tmpfile.close() elif mode == "revision": if revision_format_valid_p(recid): marcxml = get_marcxml_of_revision_id(recid) if marcxml: record = create_record(marcxml)[0] else: result["resultCode"], result["resultText"] = 1, "The specified revision does not exist" else: result["resultCode"], result["resultText"] = 1, "Invalid revision id" elif mode == "none": return {} else: result["resultCode"], result["resultText"] = 1, "Invalid record mode for record2" return record
def oai_list_metadata_formats(argd): """Generates response to oai_list_metadata_formats verb.""" if argd.get('identifier'): recid = oai_get_recid(argd['identifier']) _record_exists = record_exists(recid) if _record_exists != 1 and (_record_exists != -1 or CFG_OAI_DELETED_POLICY == "no"): return oai_error( argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd['identifier'])]) out = "" for prefix, (dummy, schema, namespace) in CFG_OAI_METADATA_FORMATS.items(): out += X.metadataFormat()(X.metadataPrefix(prefix), X.schema(schema), X.metadataNamespace(namespace)) return oai_header( argd, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
def print_record(recid, prefix='marcxml', verb='ListRecords', set_spec=None): """Prints record 'recid' formatted according to 'prefix'. - if record does not exist, return nothing. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'transient' or 'deleted', then return only header, with status 'deleted'. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no', then return nothing. Optional parameter 'record_exists_result' has the value of the result of the record_exists(recid) function (in order not to call that function again if already done.) """ record_exists_result = record_exists(recid) == 1 if record_exists_result: sets = get_field(recid, CFG_OAI_SET_FIELD) if set_spec is not None and not set_spec in sets and not [ set_ for set_ in sets if set_.startswith("%s:" % set_spec) ]: ## the record is not in the requested set, and is not ## in any subset record_exists_result = False if record_exists_result: status = None else: status = 'deleted' if not record_exists_result and CFG_OAI_DELETED_POLICY not in ( 'persistent', 'transient'): return idents = get_field(recid, CFG_OAI_ID_FIELD) try: assert idents, "No OAI ID for record %s, please do your checks!" % recid except AssertionError, err: register_exception(alert_admin=True) return
def solr_add_range(lower_recid, upper_recid, tags_to_index, next_commit_counter): """ Adds the regarding field values of all records from the lower recid to the upper one to Solr. It preserves the fulltext information. """ for recid in range(lower_recid, upper_recid + 1): if record_exists(recid): abstract = get_field_content_in_utf8(recid, 'abstract', tags_to_index) author = get_field_content_in_utf8(recid, 'author', tags_to_index) keyword = get_field_content_in_utf8(recid, 'keyword', tags_to_index) title = get_field_content_in_utf8(recid, 'title', tags_to_index) try: bibrecdocs = BibRecDocs(recid) fulltext = unicode(bibrecdocs.get_text(), 'utf-8') abstract = unicode( get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0], 'utf-8') except: abstract = "" try: first_author = get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0] additional_authors = reduce( lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), '') author = unicode(first_author + " " + additional_authors, 'utf-8') except: author = "" try: fulltext = unicode(get_entire_fulltext(recid), 'utf-8') except: fulltext = '' solr_add(recid, abstract, author, fulltext, keyword, title) next_commit_counter = solr_commit_if_necessary(next_commit_counter, recid=recid) return next_commit_counter
def solr_add_range(lower_recid, upper_recid, tags_to_index, next_commit_counter): """ Adds the regarding field values of all records from the lower recid to the upper one to Solr. It preserves the fulltext information. """ for recid in range(lower_recid, upper_recid + 1): if record_exists(recid): abstract = get_field_content_in_utf8(recid, 'abstract', tags_to_index) author = get_field_content_in_utf8(recid, 'author', tags_to_index) keyword = get_field_content_in_utf8(recid, 'keyword', tags_to_index) title = get_field_content_in_utf8(recid, 'title', tags_to_index) try: bibrecdocs = BibRecDocs(recid) fulltext = unicode(bibrecdocs.get_text(), 'utf-8') except: fulltext = '' solr_add(recid, abstract, author, fulltext, keyword, title) next_commit_counter = solr_commit_if_necessary(next_commit_counter,recid=recid) return next_commit_counter
def oailistmetadataformats(args): "Generates response to oailistmetadataformats verb." arg = parse_args(args) out = "" flag = 1 # list or not depending on identifier if arg['identifier'] != "": flag = 0 sysno = oaigetsysno(arg['identifier']) _record_exists = record_exists(sysno) if _record_exists == 1 or \ (_record_exists == -1 and CFG_OAI_DELETED_POLICY != "no"): flag = 1 else: out = out + oai_error("idDoesNotExist","invalid record Identifier") out = oai_error_header(args, "ListMetadataFormats") + out + oai_error_footer("ListMetadataFormats") return out if flag: out = out + " <metadataFormat>\n" out = out + " <metadataPrefix>oai_dc</metadataPrefix>\n" out = out + " <schema>http://www.openarchives.org/OAI/1.1/dc.xsd</schema>\n" out = out + " <metadataNamespace>http://purl.org/dc/elements/1.1/</metadataNamespace>\n" out = out + " </metadataFormat>\n" out = out + " <metadataFormat>\n" out = out + " <metadataPrefix>marcxml</metadataPrefix>\n" out = out + " <schema>http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd</schema>\n" out = out + " <metadataNamespace>http://www.loc.gov/MARC21/slim</metadataNamespace>\n" out = out + " </metadataFormat>\n" out = oai_header(args, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats") return out
def _get_record(recid, uid, result, fresh_record=False): """Retrieve record structure. """ record = None mtime = None cache_dirty = None record_status = record_exists(recid) existing_cache = cache_exists(recid, uid) if record_status == 0: result["resultCode"], result["resultText"] = 1, "Non-existent record: %s" % recid elif record_status == -1: result["resultCode"], result["resultText"] = 1, "Deleted record: %s" % recid elif not existing_cache and record_locked_by_other_user(recid, uid): result["resultCode"], result["resultText"] = 1, "Record %s locked by user" % recid elif existing_cache and cache_expired(recid, uid) and record_locked_by_other_user(recid, uid): result["resultCode"], result["resultText"] = 1, "Record %s locked by user" % recid elif record_locked_by_queue(recid): result["resultCode"], result["resultText"] = 1, "Record %s locked by queue" % recid else: if fresh_record: delete_cache_file(recid, uid) existing_cache = False if not existing_cache: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) cache_dirty = False else: tmpRes = get_cache_file_contents(recid, uid) cache_dirty, record_revision, record = tmpRes[0], tmpRes[1], tmpRes[2] touch_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) if not latest_record_revision(recid, record_revision): result["cacheOutdated"] = True result["resultCode"], result["resultText"], result["cacheDirty"], result["cacheMTime"] = ( 0, "Record OK", cache_dirty, mtime, ) return record
def oai_get_record(argd): """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting. - if record does not exist, return oai_error 'idDoesNotExist'. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'transient' or 'deleted', then return only header, with status 'deleted'. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no', then return oai_error 'idDoesNotExist'. """ recid = oai_get_recid(argd['identifier']) _record_exists = record_exists(recid) if _record_exists == 1 or \ (_record_exists == -1 and CFG_OAI_DELETED_POLICY != 'no'): out = print_record(recid, argd['metadataPrefix'], _record_exists) out = oai_header(argd, "GetRecord") + out + oai_footer("GetRecord") else: return oai_error(argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd['identifier'])]) return out
def print_record(recid, prefix='marcxml', verb='ListRecords', set_spec=None): """Prints record 'recid' formatted according to 'prefix'. - if record does not exist, return nothing. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'transient' or 'deleted', then return only header, with status 'deleted'. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no', then return nothing. Optional parameter 'record_exists_result' has the value of the result of the record_exists(recid) function (in order not to call that function again if already done.) """ record_exists_result = record_exists(recid) == 1 if record_exists_result: sets = get_field(recid, CFG_OAI_SET_FIELD) if set_spec is not None and not set_spec in sets and not [set_ for set_ in sets if set_.startswith("%s:" % set_spec)]: ## the record is not in the requested set, and is not ## in any subset record_exists_result = False if record_exists_result: status = None else: status = 'deleted' if not record_exists_result and CFG_OAI_DELETED_POLICY not in ('persistent', 'transient'): return idents = get_field(recid, CFG_OAI_ID_FIELD) try: assert idents, "No OAI ID for record %s, please do your checks!" % recid except AssertionError, err: register_exception(alert_admin=True) return
def _get_record(recid, uid, result, fresh_record=False): """Retrieve record structure. """ record = None mtime = None cache_dirty = None record_status = record_exists(recid) existing_cache = cache_exists(recid, uid) if record_status == 0: result['resultCode'], result['resultText'] = 1, 'Non-existent record: %s' % recid elif record_status == -1: result['resultCode'], result['resultText'] = 1, 'Deleted record: %s' % recid elif not existing_cache and record_locked_by_other_user(recid, uid): result['resultCode'], result['resultText'] = 1, 'Record %s locked by user' % recid elif existing_cache and cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): result['resultCode'], result['resultText'] = 1, 'Record %s locked by user' % recid elif record_locked_by_queue(recid): result['resultCode'], result['resultText'] = 1, 'Record %s locked by queue' % recid else: if fresh_record: delete_cache(recid, uid) existing_cache = False if not existing_cache: record_revision, record = create_cache(recid, uid) mtime = get_cache_mtime(recid, uid) cache_dirty = False else: tmpRes = get_cache_contents(recid, uid) cache_dirty, record_revision, record = tmpRes[0], tmpRes[1], tmpRes[2] touch_cache(recid, uid) mtime = get_cache_mtime(recid, uid) if not latest_record_revision(recid, record_revision): result['cacheOutdated'] = True result['resultCode'], result['resultText'], result['cacheDirty'], result['cacheMTime'] = 0, 'Record OK', cache_dirty, mtime record_order_subfields(record) return record
def get_low_level_recIDs_from_control_no(control_no): """ returns the list of EXISTING record ID(s) of the authority records corresponding to the given (INVENIO) MARC control_no (e.g. 'AUTHOR:(XYZ)abc123') (NB: the list should normally contain exactly 1 element) @param control_no: a (INVENIO) MARC internal control_no to an authority record @type control_no: string @return:: list containing the record ID(s) of the referenced authority record (should be only one) """ # values returned # recIDs = [] #check for correct format for control_no # control_no = "" # if CFG_BIBAUTHORITY_PREFIX_SEP in control_no: # auth_prefix, control_no = control_no.split(CFG_BIBAUTHORITY_PREFIX_SEP); # #enforce expected enforced_type if present # if (enforced_type is None) or (auth_prefix == enforced_type): # #low-level search needed e.g. for bibindex # hitlist = search_pattern(p='980__a:' + auth_prefix) # hitlist &= _get_low_level_recIDs_intbitset_from_control_no(control_no) # recIDs = list(hitlist) recIDs = list(_get_low_level_recIDs_intbitset_from_control_no(control_no)) # filter out "DELETED" recIDs recIDs = [recID for recID in recIDs if record_exists(recID) > 0] # normally there should be exactly 1 authority record per control_number _assert_unique_control_no(recIDs, control_no) # return return recIDs
def get_low_level_recIDs_from_control_no(control_no): """ returns the list of EXISTING record ID(s) of the authority records corresponding to the given (INVENIO) MARC control_no (e.g. 'AUTHOR:(XYZ)abc123') (NB: the list should normally contain exactly 1 element) :param control_no: a (INVENIO) MARC internal control_no to an authority record :type control_no: string :return:: list containing the record ID(s) of the referenced authority record (should be only one) """ # values returned # recIDs = [] #check for correct format for control_no # control_no = "" # if CFG_BIBAUTHORITY_PREFIX_SEP in control_no: # auth_prefix, control_no = control_no.split(CFG_BIBAUTHORITY_PREFIX_SEP); # #enforce expected enforced_type if present # if (enforced_type is None) or (auth_prefix == enforced_type): # #low-level search needed e.g. for bibindex # hitlist = search_pattern(p='980__a:' + auth_prefix) # hitlist &= _get_low_level_recIDs_intbitset_from_control_no(control_no) # recIDs = list(hitlist) recIDs = list(_get_low_level_recIDs_intbitset_from_control_no(control_no)) # filter out "DELETED" recIDs recIDs = [recID for recID in recIDs if record_exists(recID) > 0] # normally there should be exactly 1 authority record per control_number _assert_unique_control_no(recIDs, control_no) # return return recIDs
def record_get_xml(recID, format='xm', decompress=zlib.decompress, on_the_fly=False): """ Returns an XML string of the record given by recID. The function builds the XML directly from the database, without using the standard formatting process. 'format' allows to define the flavour of XML: - 'xm' for standard XML - 'marcxml' for MARC XML - 'oai_dc' for OAI Dublin Core - 'xd' for XML Dublin Core If record does not exist, returns empty string. If the record is deleted, returns an empty MARCXML (with recid controlfield, OAI ID fields and 980__c=DELETED) @param recID: the id of the record to retrieve @param on_the_fly: if False, try to fetch precreated one in database @return: the xml string of the record """ from invenio.search_engine import record_exists def get_fieldvalues(recID, tag): """Return list of field values for field TAG inside record RECID.""" out = [] if tag == "001___": # we have asked for recID that is not stored in bibXXx tables out.append(str(recID)) else: # we are going to look inside bibXXx tables digit = tag[0:2] bx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s'" \ "ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag) res = run_sql(query) for row in res: out.append(row[0]) return out def get_creation_date(recID, fmt="%Y-%m-%d"): "Returns the creation date of the record 'recID'." out = "" res = run_sql( "SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1) if res: out = res[0][0] return out def get_modification_date(recID, fmt="%Y-%m-%d"): "Returns the date of last modification for the record 'recID'." out = "" res = run_sql( "SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1) if res: out = res[0][0] return out #_ = gettext_set_language(ln) out = "" # sanity check: record_exist_p = record_exists(recID) if record_exist_p == 0: # doesn't exist return out # print record opening tags, if needed: if format == "marcxml" or format == "oai_dc": out += " <record>\n" out += " <header>\n" for identifier in get_fieldvalues(recID, CFG_OAI_ID_FIELD): out += " <identifier>%s</identifier>\n" % identifier out += " <datestamp>%s</datestamp>\n" % get_modification_date(recID) out += " </header>\n" out += " <metadata>\n" if format.startswith("xm") or format == "marcxml": res = None if on_the_fly == False: # look for cached format existence: query = """SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'""" % (recID, format) res = run_sql(query, None, 1) if res and record_exist_p == 1: # record 'recID' is formatted in 'format', so print it out += "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format' -- they are # not in "bibfmt" table; so fetch all the data from # "bibXXx" tables: if format == "marcxml": out += """ <record xmlns="http://www.loc.gov/MARC21/slim">\n""" out += " <controlfield tag=\"001\">%d</controlfield>\n" % int( recID) elif format.startswith("xm"): out += """ <record>\n""" out += " <controlfield tag=\"001\">%d</controlfield>\n" % int( recID) if record_exist_p == -1: # deleted record, so display only OAI ID and 980: oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD) if oai_ids: out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \ (CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4], CFG_OAI_ID_FIELD[4:5], CFG_OAI_ID_FIELD[5:6], oai_ids[0]) out += "<datafield tag=\"980\" ind1=\" \" ind2=\" \"><subfield code=\"c\">DELETED</subfield></datafield>\n" else: # controlfields query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\ "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\ "ORDER BY bb.field_number, b.tag ASC" % recID res = run_sql(query) for row in res: field, value = row[0], row[1] value = encode_for_xml(value) out += """ <controlfield tag="%s">%s</controlfield>\n""" % \ (encode_for_xml(field[0:3]), value) # datafields i = 1 # Do not process bib00x and bibrec_bib00x, as # they are controlfields. So start at bib01x and # bibrec_bib00x (and set i = 0 at the end of # first loop) for digit1 in range(0, 10): for digit2 in range(i, 10): bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\ "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx, recID, str(digit1)+str(digit2)) res = run_sql(query) field_number_old = -999 field_old = "" for row in res: field, value, field_number = row[0], row[1], row[2] ind1, ind2 = field[3], field[4] if ind1 == "_" or ind1 == "": ind1 = " " if ind2 == "_" or ind2 == "": ind2 = " " # print field tag if field_number != field_number_old or \ field[:-1] != field_old[:-1]: if field_number_old != -999: out += """ </datafield>\n""" out += """ <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \ (encode_for_xml(field[0:3]), encode_for_xml(ind1), encode_for_xml(ind2)) field_number_old = field_number field_old = field # print subfield value value = encode_for_xml(value) out += """ <subfield code="%s">%s</subfield>\n""" % \ (encode_for_xml(field[-1:]), value) # all fields/subfields printed in this run, so close the tag: if field_number_old != -999: out += """ </datafield>\n""" i = 0 # Next loop should start looking at bib%0 and bibrec_bib00x # we are at the end of printing the record: out += " </record>\n" elif format == "xd" or format == "oai_dc": # XML Dublin Core format, possibly OAI -- select only some bibXXx fields: out += """ <dc xmlns="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://purl.org/dc/elements/1.1/ http://www.openarchives.org/OAI/1.1/dc.xsd">\n""" if record_exist_p == -1: out += "" else: for f in get_fieldvalues(recID, "041__a"): out += " <language>%s</language>\n" % f for f in get_fieldvalues(recID, "100__a"): out += " <creator>%s</creator>\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "700__a"): out += " <creator>%s</creator>\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "245__a"): out += " <title>%s</title>\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "65017a"): out += " <subject>%s</subject>\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "8564_u"): out += " <identifier>%s</identifier>\n" % encode_for_xml( f) for f in get_fieldvalues(recID, "520__a"): out += " <description>%s</description>\n" % encode_for_xml( f) out += " <date>%s</date>\n" % get_creation_date(recID) out += " </dc>\n" # print record closing tags, if needed: if format == "marcxml" or format == "oai_dc": out += " </metadata>\n" out += " </record>\n" return out
def format_element(bfo): """ Special contract with US DoE Office of Scientific and Technical Information 29/08/2013 R.A. Serializes selected record info as "OSTI" xml """ try: from lxml import etree except ImportError: return from invenio.search_engine import perform_request_search, \ get_fieldvalues, record_exists # a dictionary of Inspire subjects mapped to OSTI coded research categories osticats = { 'Accelerators': '43 PARTICLE ACCELERATORS', 'Computing': '99 GENERAL AND MISCELLANEOUS//MATHEMATICS, COMPUTING, AND INFORMATION SCIENCE', 'Experiment-HEP': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS', 'General Physics': '71 CLASSICAL AND QUANTUM MECHANICS, GENERAL PHYSICS', 'Instrumentation': '46 INSTRUMENTATION RELATED TO NUCLEAR SCIENCE AND TECHNOLOGY', 'Astrophysics': '71 CLASSICAL AND QUANTUM MECHANICS, GENERAL PHYSICS', 'Lattice': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS', 'Math and Math Physics': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS', 'Theory-Nucl': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS', 'Experiment-Nucl': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS', 'Phenomenology-HEP': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS', 'Theory-HEP': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS', 'Other': '71 CLASSICAL AND QUANTUM MECHANICS, GENERAL PHYSICS', } rec = etree.Element("rec") recid = bfo.recID if record_exists(recid) == -1: return etree.tostring(rec, encoding='UTF-8', declaration=False) node = etree.SubElement for recnum in [x.replace('SPIRES-','') for x in \ [r for r in get_fieldvalues(recid, "970__a") \ if r.startswith('SPIRES-')]]: # SPIRES record number is used if available to prevent OSTI loading # duplicate records from INSPIRE which they already got from SPIRES. node(rec, "accession_num").text = recnum if not rec.xpath('//accession_num'): # use regular inspire recid node(rec, "accession_num").text = str(recid) for title in get_fieldvalues(recid, "245__a"): node(rec, "title").text = unicode(title, "utf-8") # The authors in the ostixml are all strung together between author tags # delimited by ';' If zero or > 10 authors don't show any authors authors = get_fieldvalues(recid, "100__a") \ + get_fieldvalues(recid, "700__a") if len(authors) <= 10 and len(authors) > 0: node(rec, 'author').text = \ '; '.join([unicode(a, "utf-8") for a in authors]) for category in get_fieldvalues(recid, "65017a"): if osticats.has_key(category): node(rec, 'subj_category').text = osticats[category] node(rec, 'subj_keywords').text = category for pubdate in get_fieldvalues(recid, "269__c"): node(rec, 'date').text = pubdate #Fermilab report numbers mapped to OSTI doc types for dtype in get_fieldvalues(recid, "037__a"): if 'fermilab' in dtype.lower(): if "PUB" in dtype: doctype = 'JA' elif "CONF" in dtype: doctype = 'CO' elif "THESIS" in dtype: doctype = 'TD' else: doctype = 'TR' node(rec, 'doctype').text = doctype # One MARC field is used for conferences and journals. So, the following # journal coding handles the variations, and outputs journal and # conf. cites in a nice order. If the conf has a cnum, we get the conf # info from its separate record in the conf. "collection." There are a # few if-then gymnastics to cover possible missing information and still # make a note that looks okay (sort of) journals = bfo.fields('773__', repeatable_subfields_p=True) for journal in journals: if journal.has_key('p'): jinfo = str(journal['p'][0]) if journal.has_key('v'): jinfo += ' %s' % journal['v'][0] if journal.has_key('c'): jinfo += ':%s' % journal['c'][0] if journal.has_key('y'): jinfo += ',%s' % journal['y'][0] node(rec, 'journal_info').text = unicode(jinfo, "utf-8") confstring = '' # without t info or cnum don't print anything if journal.has_key('t'): confstring += '%s: ' % journal['t'][0] if journal.has_key('w'): conf_info = {} cnum = journal['w'][0].replace("/", "-") idrec = perform_request_search(p="111__g:" + str(cnum), \ c='Conferences') if idrec: for subfield in ('a', 'c', 'd'): val = get_fieldvalues(idrec[0], '111__%s' % subfield, \ repetitive_values=True) if val: conf_info[subfield] = val[0] confstring += '%s. %s, %s.' % \ tuple(conf_info.get(x, '') for x in ('a','c','d')) if journal.has_key('c') and confstring != '': confstring += ' pp: %s' % journal['c'][0] for doi in get_fieldvalues(recid, "0247_a"): node(rec, 'doi').text = doi if journals and confstring != '': # because it has to come after doi (?) # although order is not guaranteed for XML serialization node(rec, 'conf_info').text = unicode(confstring, "utf-8") for pages in get_fieldvalues(recid, "300__a"): node(rec, 'format').text = '%s pages' % pages for lang in get_fieldvalues(recid, "041__a"): node(rec, 'language').text = lang # As with journals, eprints are in with report nos. in our MARC format # so they have to be separated out eprint = '' for repno in get_fieldvalues(recid, "037__a"): if "arXiv" in repno: eprint = repno node(rec, 'arXiv_eprint').text = \ 'arXiv eprint number %s' % unicode(repno, "utf-8") else: node(rec, 'report_number').text = unicode(repno, "utf-8") urls = bfo.fields('8564_', repeatable_subfields_p=True) for url in urls: if url.has_key('y') and "FERMILAB" in url['y'][0] and url.has_key('u'): node(rec, 'url').text = '%s.pdf' % url['u'][0] if eprint: node(rec, 'availability').text = \ 'http://arXiv.org/abs/%s' % eprint node(rec, 'sponsor_org').text = 'DOE Office of Science' dt_harvest = get_modification_date(recid) if dt_harvest: node(rec, 'dt_harvest').text = dt_harvest else: # fallback to SPIRES era marc for date in get_fieldvalues(recid, "961__c"): node(rec, 'dt_harvest').text = date out = etree.tostring(rec, encoding='UTF-8', xml_declaration=False, \ pretty_print=True, method='xml').rstrip('\n') return out
def process_batch_job(batch_job_file): """ Processes a batch job description dictionary @param batch_job_file: a fullpath to a batch job file @type batch_job_file: string @return: 1 if the process was successfull, 0 if not @rtype; int """ def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = 'bibencode_' + str(batch_job['recid']) + '_' + str( uuid.uuid4()) + '.xml' xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, 'w') xml_file.write(marcxml) xml_file.close() targs = ['-c', xml_filename] task_low_level_submission('bibupload', 'bibencode', *targs) #---------# # GENERAL # #---------# _task_write_message("----------- Handling Master -----------") ## Check the validity of the batch file here batch_job = json_decode_file(batch_job_file) ## Sanitise batch description and raise errrors batch_job = sanitise_batch_job(batch_job) ## Check if the record exists if record_exists(batch_job['recid']) < 1: raise Exception("Record not found") recdoc = BibRecDocs(batch_job['recid']) #--------------------# # UPDATE FROM MASTER # #--------------------# ## We want to add new stuff to the video's record, using the master as input if getval(batch_job, 'update_from_master'): found_master = False bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: comment = bibdocfile.get_comment() description = bibdocfile.get_description() subformat = bibdocfile.get_subformat() m_comment = getval(batch_job, 'bibdoc_master_comment', comment) m_description = getval(batch_job, 'bibdoc_master_description', description) m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat) if (comment == m_comment and description == m_description and subformat == m_subformat): found_master = True batch_job['input'] = bibdocfile.get_full_path() ## Get the aspect of the from the record try: ## Assumes pbcore metadata mapping batch_job['aspect'] = get_fieldvalues( 124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0] except IndexError: pass break if found_master: break if not found_master: _task_write_message("Video master for record %d not found" % batch_job['recid']) task_update_progress("Video master for record %d not found" % batch_job['recid']) ## Maybe send an email? return 1 ## Clean the job to do no upscaling etc if getval(batch_job, 'assure_quality'): batch_job = clean_job_for_quality(batch_job) global _BATCH_STEPS _BATCH_STEPS = len(batch_job['jobs']) ## Generate the docname from the input filename's name or given name bibdoc_video_docname, bibdoc_video_extension = decompose_file( batch_job['input'])[1:] if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'): bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension') if getval(batch_job, 'bibdoc_master_docname'): bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname') write_message("Creating BibDoc for %s" % bibdoc_video_docname) ## If the bibdoc exists, receive it if bibdoc_video_docname in recdoc.get_bibdoc_names(): bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname) ## Create a new bibdoc if it does not exist else: bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname) ## Get the directory auf the newly created bibdoc to copy stuff there bibdoc_video_directory = bibdoc_video.get_base_dir() #--------# # MASTER # #--------# if not getval(batch_job, 'update_from_master'): if getval(batch_job, 'add_master'): ## Generate the right name for the master ## The master should be hidden first an then renamed ## when it is really available ## !!! FIX !!! _task_write_message("Adding %s master to the BibDoc" % bibdoc_video_docname) master_format = compose_format( bibdoc_video_extension, getval(batch_job, 'bibdoc_master_subformat', 'master')) ## If a file of the same format is there, something is wrong, remove it! ## it might be caused by a previous corrupted submission etc. if bibdoc_video.format_already_exists_p(master_format): bibdoc_video.delete_file(master_format, 1) bibdoc_video.add_file_new_format( batch_job['input'], version=1, description=getval(batch_job, 'bibdoc_master_description'), comment=getval(batch_job, 'bibdoc_master_comment'), docformat=master_format) #-----------# # JOBS LOOP # #-----------# return_code = 1 global _BATCH_STEP for job in batch_job['jobs']: _task_write_message("----------- Job %s of %s -----------" % (_BATCH_STEP, _BATCH_STEPS)) ## Try to substitute docname with master docname if getval(job, 'bibdoc_docname'): job['bibdoc_docname'] = Template( job['bibdoc_docname']).safe_substitute( {'bibdoc_master_docname': bibdoc_video_docname}) #-------------# # TRANSCODING # #-------------# if job['mode'] == 'encode': ## Skip the job if assure_quality is not set and marked as fallback if not getval(batch_job, 'assure_quality') and getval( job, 'fallback'): continue if getval(job, 'profile'): profile = get_encoding_profile(job['profile']) else: profile = None ## We need an extension defined fot the video container bibdoc_video_extension = getval(job, 'extension', getval(profile, 'extension')) if not bibdoc_video_extension: raise Exception("No container/extension defined") ## Get the docname and subformat bibdoc_video_subformat = getval(job, 'bibdoc_subformat') bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) ## The subformat is incompatible with ffmpegs name convention ## We do the encoding without and rename it afterwards bibdoc_video_fullpath = compose_file(bibdoc_video_directory, bibdoc_slave_video_docname, bibdoc_video_extension) _task_write_message( "Transcoding %s to %s;%s" % (bibdoc_slave_video_docname, bibdoc_video_extension, bibdoc_video_subformat)) ## We encode now directly into the bibdocs directory encoding_result = encode_video( input_file=batch_job['input'], output_file=bibdoc_video_fullpath, acodec=getval(job, 'audiocodec'), vcodec=getval(job, 'videocodec'), abitrate=getval(job, 'videobitrate'), vbitrate=getval(job, 'audiobitrate'), resolution=getval(job, 'resolution'), passes=getval(job, 'passes', 1), special=getval(job, 'special'), specialfirst=getval(job, 'specialfirst'), specialsecond=getval(job, 'specialsecond'), metadata=getval(job, 'metadata'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), # Aspect for every job profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, message_fnc=_task_write_message) return_code &= encoding_result ## only on success if encoding_result: ## Rename it, adding the subformat os.rename( bibdoc_video_fullpath, compose_file(bibdoc_video_directory, bibdoc_video_extension, bibdoc_video_subformat, 1, bibdoc_slave_video_docname)) #bibdoc_video._build_file_list() bibdoc_video.touch() bibdoc_video._sync_to_db() bibdoc_video_format = compose_format(bibdoc_video_extension, bibdoc_video_subformat) if getval(job, 'bibdoc_comment'): bibdoc_video.set_comment(getval(job, 'bibdoc_comment'), bibdoc_video_format) if getval(job, 'bibdoc_description'): bibdoc_video.set_description( getval(job, 'bibdoc_description'), bibdoc_video_format) #------------# # EXTRACTION # #------------# # if there are multiple extraction jobs, all the produced files # with the same name will be in the same bibdoc! Make sure that # you use different subformats or docname templates to avoid # conflicts. if job['mode'] == 'extract': if getval(job, 'profile'): profile = get_extract_profile(job['profile']) else: profile = {} bibdoc_frame_subformat = getval(job, 'bibdoc_subformat') _task_write_message("Extracting frames to temporary directory") tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4()) os.mkdir(tmpdir) #Move this to the batch description bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) tmpfname = ( tmpdir + "/" + bibdoc_frame_docname + '.' + getval(profile, 'extension', getval(job, 'extension', 'jpg'))) extraction_result = extract_frames( input_file=batch_job['input'], output_file=tmpfname, size=getval(job, 'size'), positions=getval(job, 'positions'), numberof=getval(job, 'numberof'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, ) return_code &= extraction_result ## only on success: if extraction_result: ## for every filename in the directorys, create a bibdoc that contains ## all sizes of the frame from the two directories files = os.listdir(tmpdir) for filename in files: ## The docname was altered by BibEncode extract through substitution ## Retrieve it from the filename again bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext( filename) _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname) ## If the bibdoc exists, receive it if bibdoc_frame_docname in recdoc.get_bibdoc_names(): bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname) ## Create a new bibdoc if it does not exist else: bibdoc_frame = recdoc.add_bibdoc( docname=bibdoc_frame_docname) ## The filename including path from tmpdir fname = os.path.join(tmpdir, filename) bibdoc_frame_format = compose_format( bibdoc_frame_extension, bibdoc_frame_subformat) ## Same as with the master, if the format allready exists, ## override it, because something went wrong before if bibdoc_frame.format_already_exists_p( bibdoc_frame_format): bibdoc_frame.delete_file(bibdoc_frame_format, 1) _task_write_message("Adding %s jpg;%s to BibDoc" % (bibdoc_frame_docname, getval(job, 'bibdoc_subformat'))) bibdoc_frame.add_file_new_format( fname, version=1, description=getval(job, 'bibdoc_description'), comment=getval(job, 'bibdoc_comment'), docformat=bibdoc_frame_format) ## Remove the temporary folders _task_write_message("Removing temporary directory") shutil.rmtree(tmpdir) _BATCH_STEP = _BATCH_STEP + 1 #-----------------# # FIX BIBDOC/MARC # #-----------------# _task_write_message("----------- Handling MARCXML -----------") ## Fix the BibDoc for all the videos previously created _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname) bibdoc_video._build_file_list() ## Fix the MARC _task_write_message("Fixing MARC") cli_fix_marc({}, [batch_job['recid']], False) if getval(batch_job, 'collection'): ## Make the record visible by moving in from the collection marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>" "<datafield tag=\"980\" ind1=\" \" ind2=\" \">" "<subfield code=\"a\">%s</subfield></datafield></record>" ) % (batch_job['recid'], batch_job['collection']) upload_marcxml_file(marcxml) #---------------------# # ADD MASTER METADATA # #---------------------# if getval(batch_job, 'add_master_metadata'): _task_write_message("Adding master metadata") pbcore = pbcore_metadata(input_file=getval(batch_job, 'input'), pbcoreIdentifier=batch_job['recid'], aspect_override=getval(batch_job, 'aspect')) marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT) upload_marcxml_file(marcxml) #------------------# # ADD MARC SNIPPET # #------------------# if getval(batch_job, 'marc_snippet'): marc_snippet = open(getval(batch_job, 'marc_snippet')) marcxml = marc_snippet.read() marc_snippet.close() upload_marcxml_file(marcxml) #--------------# # DELETE INPUT # #--------------# if getval(batch_job, 'delete_input'): _task_write_message("Deleting input file") # only if successfull if not return_code: # only if input matches pattern if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'): try: os.remove(getval(batch_job, 'input')) except OSError: pass #--------------# # NOTIFICATION # #--------------# ## Send Notification emails on errors if not return_code: if getval(batch_job, 'notify_user'): _notify_error_user( getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) _task_write_message("Notify user because of an error") if getval(batch_job, 'notify_admin'): _task_write_message("Notify admin because of an error") if type(getval(batch_job, 'notify_admin') == type(str())): _notify_error_admin(batch_job, getval(batch_job, 'notify_admin')) else: _notify_error_admin(batch_job) else: if getval(batch_job, 'notify_user'): _task_write_message("Notify user because of success") _notify_success_user( getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) return 1
def print_record(recid, prefix='marcxml', verb='ListRecords', set_spec=None, set_last_updated=None): """Prints record 'recid' formatted according to 'prefix'. - if record does not exist, return nothing. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'transient' or 'deleted', then return only header, with status 'deleted'. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no', then return nothing. """ record_exists_result = record_exists(recid) == 1 if record_exists_result: sets = get_field(recid, CFG_OAI_SET_FIELD) if set_spec is not None and not set_spec in sets and not [set_ for set_ in sets if set_.startswith("%s:" % set_spec)]: ## the record is not in the requested set, and is not ## in any subset record_exists_result = False if record_exists_result: status = None else: status = 'deleted' if not record_exists_result and CFG_OAI_DELETED_POLICY not in ('persistent', 'transient'): return "" idents = get_field(recid, CFG_OAI_ID_FIELD) if not idents: return "" ## FIXME: Move these checks in a bibtask #try: #assert idents, "No OAI ID for record %s, please do your checks!" % recid #except AssertionError, err: #register_exception(alert_admin=True) #return "" #try: #assert len(idents) == 1, "More than OAI ID found for recid %s. Considering only the first one, but please do your checks: %s" % (recid, idents) #except AssertionError, err: #register_exception(alert_admin=True) ident = idents[0] header_body = EscapedXMLString('') header_body += X.identifier()(ident) if set_last_updated: header_body += X.datestamp()(max(get_modification_date(recid), set_last_updated)) else: header_body += X.datestamp()(get_modification_date(recid)) for set_spec in get_field(recid, CFG_OAI_SET_FIELD): if set_spec and set_spec != CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC: # Print only if field not empty header_body += X.setSpec()(set_spec) header = X.header(status=status)(header_body) if verb == 'ListIdentifiers': return header else: if record_exists_result: metadata_body = format_record(recid, CFG_OAI_METADATA_FORMATS[prefix][0]) metadata = X.metadata(body=metadata_body) provenance_body = get_record_provenance(recid) if provenance_body: provenance = X.about(body=provenance_body) else: provenance = '' rights_body = get_record_rights(recid) if rights_body: rights = X.about(body=rights_body) else: rights = '' else: metadata = '' provenance = '' rights = '' return X.record()(header, metadata, provenance, rights)
def get_bibrecord(recid): """Return record in BibRecord wrapping.""" if record_exists(recid): return create_record(print_record(recid, 'xm'))[0]
def getfile(req, form): args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd) ln = args['ln'] _ = gettext_set_language(ln) uid = getUid(req) user_info = collect_user_info(req) verbose = args['verbose'] if verbose >= 1 and not isUserSuperAdmin(user_info): # Only SuperUser can see all the details! verbose = 0 if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1: return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), navmenuid='submit') if record_exists(self.recid) < 1: msg = "<p>%s</p>" % _( "Requested record does not seem to exist.") return warning_page(msg, req, ln) if record_empty(self.recid): msg = "<p>%s</p>" % _( "Requested record does not seem to have been integrated.") return warning_page(msg, req, ln) (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info['email'] == 'guest': if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, { 'collection': guess_primary_collection_of_a_record(self.recid) }) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: return page_not_authorized(req, "../", \ text = auth_message) readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1 # From now on: either the user provided a specific file # name (and a possible version), or we return a list of # all the available files. In no case are the docids # visible. try: bibarchive = BibRecDocs(self.recid) except InvenioBibDocFileError: register_exception(req=req, alert_admin=True) msg = "<p>%s</p><p>%s</p>" % ( _("The system has encountered an error in retrieving the list of files for this document." ), _("The error has been logged and will be taken in consideration as soon as possible." )) return warning_page(msg, req, ln) if bibarchive.deleted_p(): req.status = apache.HTTP_GONE return warning_page( _("Requested record does not seem to exist."), req, ln) docname = '' docformat = '' version = '' warn = '' if filename: # We know the complete file name, guess which docid it # refers to ## TODO: Change the extension system according to ext.py from setlink ## and have a uniform extension mechanism... docname = file_strip_ext(filename) docformat = filename[len(docname):] if docformat and docformat[0] != '.': docformat = '.' + docformat if args['subformat']: docformat += ';%s' % args['subformat'] else: docname = args['docname'] if not docformat: docformat = args['format'] if args['subformat']: docformat += ';%s' % args['subformat'] if not version: version = args['version'] ## Download as attachment is_download = False if args['download']: is_download = True # version could be either empty, or all or an integer try: int(version) except ValueError: if version != 'all': version = '' display_hidden = isUserSuperAdmin(user_info) if version != 'all': # search this filename in the complete list of files for doc in bibarchive.list_bibdocs(): if docname == bibarchive.get_docname(doc.id): try: try: docfile = doc.get_file(docformat, version) except InvenioBibDocFileError, msg: req.status = apache.HTTP_NOT_FOUND if not CFG_INSPIRE_SITE and req.headers_in.get( 'referer'): ## There must be a broken link somewhere. ## Maybe it's good to alert the admin register_exception(req=req, alert_admin=True) warn += write_warning( _("The format %s does not exist for the given version: %s" ) % (cgi.escape(docformat), cgi.escape(str(msg)))) break (auth_code, auth_message) = docfile.is_restricted(user_info) if auth_code != 0 and not is_user_owner_of_record( user_info, self.recid): if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match( get_subformat_from_format(docformat)): return stream_restricted_icon(req) if user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action( 'viewrestrdoc', {'status': docfile.get_status()}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) redirect_to_url(req, target) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning( _("This file is restricted: ") + str(auth_message)) break if not docfile.hidden_p(): if not readonly: ip = str(req.remote_ip) doc.register_download( ip, docfile.get_version(), docformat, uid, self.recid) try: return docfile.stream(req, download=is_download) except InvenioBibDocFileError, msg: register_exception(req=req, alert_admin=True) req.status = apache.HTTP_INTERNAL_SERVER_ERROR warn += write_warning( _("An error has happened in trying to stream the request file." )) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning( _("The requested file is hidden and can not be accessed." )) except InvenioBibDocFileError, msg: register_exception(req=req, alert_admin=True)
def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid argd['tab'] = self.tab # do we really enter here ? if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): if acc_authorize_action(req, 'runbibedit')[0] != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT # only superadmins can use verbose parameter for obtaining debug information if not isUserSuperAdmin(user_info): argd['verbose'] = 0 if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') from invenio.search_engine import record_exists, get_merged_recid # check if the current record has been deleted # and has been merged, case in which the deleted record # will be redirect to the new one record_status = record_exists(argd['recid']) merged_recid = get_merged_recid(argd['recid']) if record_status == -1 and merged_recid: url = CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/%s?ln=%s' url %= (str(merged_recid), argd['ln']) redirect_to_url(req, url) elif record_status == -1: req.status = apache.HTTP_GONE ## The record is gone! # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if isinstance(out, intbitset): return out.fastdump() elif out == []: return str(out) else: return out
def format_record(recID, of, ln=CFG_SITE_LANG, verbose=0, search_pattern=None, xml_record=None, user_info=None, on_the_fly=False): """ Format a record in given output format. Return a formatted version of the record in the specified language, search pattern, and with the specified output format. The function will define which format template must be applied. The record to be formatted can be specified with its ID (with 'recID' parameter) or given as XML representation (with 'xml_record' parameter). If 'xml_record' is specified 'recID' is ignored (but should still be given for reference. A dummy recid 0 or -1 could be used). 'user_info' allows to grant access to some functionalities on a page depending on the user's priviledges. The 'user_info' object makes sense only in the case of on-the-fly formatting. 'user_info' is the same object as the one returned by 'webuser.collect_user_info(req)' @param recID: the ID of record to format. @type recID: int @param of: an output format code (or short identifier for the output format) @type of: string @param ln: the language to use to format the record @type ln: string @param verbose: the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, stop if error in format elements 9: errors and warnings, stop if error (debug mode )) @type verbose: int @param search_pattern: list of strings representing the user request in web interface @type search_pattern: list(string) @param xml_record: an xml string represention of the record to format @type xml_record: string or None @param user_info: the information of the user who will view the formatted page (if applicable) @param on_the_fly: if False, try to return an already preformatted version of the record in the database @type on_the_fly: boolean @return: formatted record @rtype: string """ from invenio.search_engine import record_exists if search_pattern is None: search_pattern = [] out = "" if verbose == 9: out += """\n<span class="quicknote"> Formatting record %i with output format %s. </span>""" % (recID, of) ############### FIXME: REMOVE WHEN MIGRATION IS DONE ############### if CFG_BIBFORMAT_USE_OLD_BIBFORMAT and CFG_PATH_PHP: return bibformat_engine.call_old_bibformat(recID, of=of, on_the_fly=on_the_fly) ############################# END ################################## if not on_the_fly and \ (ln == CFG_SITE_LANG or \ of.lower() == 'xm' or \ CFG_BIBFORMAT_USE_OLD_BIBFORMAT or \ (of.lower() in CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS)) and \ record_exists(recID) != -1: # Try to fetch preformatted record. Only possible for records # formatted in CFG_SITE_LANG language (other are never # stored), or of='xm' which does not depend on language. # Exceptions are made for output formats defined in # CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS, which are # always served from the same cache for any language. Also, # do not fetch from DB when record has been deleted: we want # to return an "empty" record in that case res = bibformat_dblayer.get_preformatted_record(recID, of) if res is not None: # record 'recID' is formatted in 'of', so return it if verbose == 9: last_updated = bibformat_dblayer.get_preformatted_record_date( recID, of) out += """\n<br/><span class="quicknote"> Found preformatted output for record %i (cache updated on %s). </span><br/>""" % (recID, last_updated) if of.lower() == 'xm': res = filter_hidden_fields(res, user_info) # try to replace language links in pre-cached res, if applicable: if ln != CFG_SITE_LANG and of.lower( ) in CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS: # The following statements try to quickly replace any # language arguments in URL links. Not an exact # science, but should work most of the time for most # of the formats, with not too many false positives. # We don't have time to parse output much here. res = res.replace('?ln=' + CFG_SITE_LANG, '?ln=' + ln) res = res.replace('&ln=' + CFG_SITE_LANG, '&ln=' + ln) res = res.replace('&ln=' + CFG_SITE_LANG, '&ln=' + ln) out += res return out else: if verbose == 9: out += """\n<br/><span class="quicknote"> No preformatted output found for record %s. </span>""" % recID # Live formatting of records in all other cases if verbose == 9: out += """\n<br/><span class="quicknote"> Formatting record %i on-the-fly. </span>""" % recID try: out += bibformat_engine.format_record(recID=recID, of=of, ln=ln, verbose=verbose, search_pattern=search_pattern, xml_record=xml_record, user_info=user_info) if of.lower() == 'xm': out = filter_hidden_fields(out, user_info) return out except Exception, e: register_exception(prefix="An error occured while formatting record %i in %s" % \ (recID, of), alert_admin=True) #Failsafe execution mode import invenio.template websearch_templates = invenio.template.load('websearch') if verbose == 9: out += """\n<br/><span class="quicknote"> An error occured while formatting record %i. (%s) </span>""" % (recID, str(e)) if of.lower() == 'hd': if verbose == 9: out += """\n<br/><span class="quicknote"> Formatting record %i with websearch_templates.tmpl_print_record_detailed. </span><br/>""" % recID return out + websearch_templates.tmpl_print_record_detailed( ln=ln, recID=recID, ) if verbose == 9: out += """\n<br/><span class="quicknote"> Formatting record %i with websearch_templates.tmpl_print_record_brief. </span><br/>""" % recID return out + websearch_templates.tmpl_print_record_brief( ln=ln, recID=recID, )
def Ask_For_Record_Details_Confirmation(parameters, \ curdir, \ form, \ user_info=None): """ Display the details of a record on which some operation is to be carried out and prompt for the user's confirmation that it is the correct record. Upon the clicking of the confirmation button, augment step by one. Given the "recid" (001) of a record, retrieve the basic metadata (title, report-number(s) and author(s)) and display them in the user's browser along with a prompt asking them to confirm that it is indeed the record that they expected to see. The function depends upon the presence of the "sysno" global and the presence of the "step" field in the "form" parameter. When the user clicks on the "confirm" button, step will be augmented by 1 and the form will be submitted. @parameters: None. @return: None. @Exceptions raise: InvenioWebSubmitFunctionError if problems are encountered; InvenioWebSubmitFunctionStop in order to display the details of the record and the confirmation message. """ global sysno ## Make sure that we know the current step: try: current_step = int(form['step']) except TypeError: ## Can't determine step. msg = "Unable to determine submission step. Cannot continue." raise InvenioWebSubmitFunctionError(msg) else: newstep = current_step + 1 ## Make sure that the sysno is valid: try: working_recid = int(sysno) except TypeError: ## Unable to find the details of this record - cannot query the database msg = "Unable to retrieve details of record - record id was invalid." raise InvenioWebSubmitFunctionError(msg) if not record_exists(working_recid): ## Record doesn't exist. msg = "Unable to retrieve details of record [%s] - record does not " \ "exist." % working_recid raise InvenioWebSubmitFunctionError(msg) ## Retrieve the details to be displayed: ## ## Author(s): rec_authors = "" rec_first_author = print_record(int(sysno), 'tm', "100__a") rec_other_authors = print_record(int(sysno), 'tm', "700__a") if rec_first_author != "": rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \ author in rec_first_author.split("\n")]) if rec_other_authors != "": rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \ author in rec_other_authors.split("\n")]) ## Title: rec_title = "".join(["%s<br />\n" % cgi.escape(title.strip()) for title in \ print_record(int(sysno), 'tm', "245__a").split("\n")]) ## Report numbers: rec_reportnums = "" rec_reportnum = print_record(int(sysno), 'tm', "037__a") rec_other_reportnums = print_record(int(sysno), 'tm', "088__a") if rec_reportnum != "": rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \ for repnum in rec_reportnum.split("\n")]) if rec_other_reportnums != "": rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \ for repnum in \ rec_other_reportnums.split("\n")]) raise InvenioWebSubmitFunctionStop(CFG_DOCUMENT_DETAILS_MESSAGE % \ { 'report-numbers' : rec_reportnums, \ 'title' : rec_title, \ 'author' : rec_authors, \ 'newstep' : newstep, \ 'admin-email' : CFG_SITE_ADMIN_EMAIL, \ } )
def display(self, req, form): """ Show the tab 'holdings'. """ argd = wash_urlargd(form, {'do': (str, "od"), 'ds': (str, "all"), 'nb': (int, 100), 'p': (int, 1), 'voted': (int, -1), 'reported': (int, -1), }) _ = gettext_set_language(argd['ln']) record_exists_p = record_exists(self.recid) if record_exists_p != 1: if record_exists_p == -1: msg = _("The record has been deleted.") else: msg = _("Requested record does not seem to exist.") msg = '<span class="quicknote">' + msg + '</span>' title, description, keywords = \ websearch_templates.tmpl_record_page_header_content(req, self.recid, argd['ln']) return page(title = title, show_title_p = False, body = msg, description = description, keywords = keywords, uid = getUid(req), language = argd['ln'], req = req, navmenuid='search') body = perform_get_holdings_information(self.recid, req, argd['ln']) uid = getUid(req) user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info['email'] == 'guest' and not user_info['apache_user']: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text = auth_msg) unordered_tabs = get_detailed_page_tabs(get_colID(guess_primary_collection_of_a_record(self.recid)), self.recid, ln=argd['ln']) ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()] ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1])) link_ln = '' if argd['ln'] != CFG_SITE_LANG: link_ln = '?ln=%s' % argd['ln'] tabs = [(unordered_tabs[tab_id]['label'], \ '%s/record/%s/%s%s' % (CFG_SITE_URL, self.recid, tab_id, link_ln), \ tab_id in ['holdings'], unordered_tabs[tab_id]['enabled']) \ for (tab_id, _order) in ordered_tabs_id if unordered_tabs[tab_id]['visible'] == True] top = webstyle_templates.detailed_record_container_top(self.recid, tabs, argd['ln']) bottom = webstyle_templates.detailed_record_container_bottom(self.recid, tabs, argd['ln']) title = websearch_templates.tmpl_record_page_header_content(req, self.recid, argd['ln'])[0] navtrail = create_navtrail_links(cc=guess_primary_collection_of_a_record(self.recid), ln=argd['ln']) navtrail += ' > <a class="navtrail" href="%s/record/%s?ln=%s">'% (CFG_SITE_URL, self.recid, argd['ln']) navtrail += title navtrail += '</a>' return pageheaderonly(title=title, navtrail=navtrail, uid=uid, verbose=1, req=req, metaheaderadd = "<link rel=\"stylesheet\" href=\"%s/img/jquery-ui.css\" type=\"text/css\" />" % CFG_SITE_URL, language=argd['ln'], navmenuid='search', navtrail_append_title_p=0) + \ websearch_templates.tmpl_search_pagestart(argd['ln']) + \ top + body + bottom + \ websearch_templates.tmpl_search_pageend(argd['ln']) + \ pagefooteronly(lastupdated=__lastupdated__, language=argd['ln'], req=req)
def format_element(bfo): """ Special contract with US DoE Office of Scientific and Technical Information 29/08/2013 R.A. Serializes selected record info as "OSTI" xml """ try: from lxml import etree except ImportError: return from invenio.search_engine import perform_request_search, get_fieldvalues, record_exists # a dictionary of Inspire subjects mapped to OSTI coded research categories osticats = { "Accelerators": "43 PARTICLE ACCELERATORS", "Computing": "99 GENERAL AND MISCELLANEOUS//MATHEMATICS, COMPUTING, AND INFORMATION SCIENCE", "Experiment-HEP": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS", "General Physics": "71 CLASSICAL AND QUANTUM MECHANICS, GENERAL PHYSICS", "Instrumentation": "46 INSTRUMENTATION RELATED TO NUCLEAR SCIENCE AND TECHNOLOGY", "Astrophysics": "71 CLASSICAL AND QUANTUM MECHANICS, GENERAL PHYSICS", "Lattice": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS", "Math and Math Physics": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS", "Theory-Nucl": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS", "Experiment-Nucl": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS", "Phenomenology-HEP": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS", "Theory-HEP": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS", "Other": "71 CLASSICAL AND QUANTUM MECHANICS, GENERAL PHYSICS", } rec = etree.Element("rec") recid = bfo.recID if record_exists(recid) == -1: return etree.tostring(rec, encoding="UTF-8", declaration=False) node = etree.SubElement for recnum in [ x.replace("SPIRES-", "") for x in [r for r in get_fieldvalues(recid, "970__a") if r.startswith("SPIRES-")] ]: # SPIRES record number is used if available to prevent OSTI loading # duplicate records from INSPIRE which they already got from SPIRES. node(rec, "accession_num").text = recnum if not rec.xpath("//accession_num"): # use regular inspire recid node(rec, "accession_num").text = str(recid) for title in get_fieldvalues(recid, "245__a"): node(rec, "title").text = unicode(title, "utf-8") # The authors in the ostixml are all strung together between author tags # delimited by ';' If zero or > 10 authors don't show any authors authors = get_fieldvalues(recid, "100__a") + get_fieldvalues(recid, "700__a") if len(authors) <= 10 and len(authors) > 0: node(rec, "author").text = "; ".join([unicode(a, "utf-8") for a in authors]) for category in get_fieldvalues(recid, "65017a"): if osticats.has_key(category): node(rec, "subj_category").text = osticats[category] node(rec, "subj_keywords").text = category for pubdate in get_fieldvalues(recid, "269__c"): node(rec, "date").text = pubdate # Fermilab report numbers mapped to OSTI doc types for dtype in get_fieldvalues(recid, "037__a"): if "fermilab" in dtype.lower(): if "PUB" in dtype: doctype = "JA" elif "CONF" in dtype: doctype = "CO" elif "THESIS" in dtype: doctype = "TD" else: doctype = "TR" node(rec, "doctype").text = doctype # One MARC field is used for conferences and journals. So, the following # journal coding handles the variations, and outputs journal and # conf. cites in a nice order. If the conf has a cnum, we get the conf # info from its separate record in the conf. "collection." There are a # few if-then gymnastics to cover possible missing information and still # make a note that looks okay (sort of) journals = bfo.fields("773__", repeatable_subfields_p=True) for journal in journals: if journal.has_key("p"): jinfo = str(journal["p"][0]) if journal.has_key("v"): jinfo += " %s" % journal["v"][0] if journal.has_key("c"): jinfo += ":%s" % journal["c"][0] if journal.has_key("y"): jinfo += ",%s" % journal["y"][0] node(rec, "journal_info").text = unicode(jinfo, "utf-8") confstring = "" # without t info or cnum don't print anything if journal.has_key("t"): confstring += "%s: " % journal["t"][0] if journal.has_key("w"): conf_info = {} cnum = journal["w"][0].replace("/", "-") idrec = perform_request_search(p="111__g:" + str(cnum), c="Conferences") if idrec: for subfield in ("a", "c", "d"): val = get_fieldvalues(idrec[0], "111__%s" % subfield, repetitive_values=True) if val: conf_info[subfield] = val[0] confstring += "%s. %s, %s." % tuple(conf_info.get(x, "") for x in ("a", "c", "d")) if journal.has_key("c") and confstring != "": confstring += " pp: %s" % journal["c"][0] for doi in get_fieldvalues(recid, "0247_a"): node(rec, "doi").text = doi if journals and confstring != "": # because it has to come after doi (?) # although order is not guaranteed for XML serialization node(rec, "conf_info").text = unicode(confstring, "utf-8") for pages in get_fieldvalues(recid, "300__a"): node(rec, "format").text = "%s pages" % pages for lang in get_fieldvalues(recid, "041__a"): node(rec, "language").text = lang # As with journals, eprints are in with report nos. in our MARC format # so they have to be separated out eprint = "" for repno in get_fieldvalues(recid, "037__a"): if "arXiv" in repno: eprint = repno node(rec, "arXiv_eprint").text = "arXiv eprint number %s" % unicode(repno, "utf-8") else: node(rec, "report_number").text = unicode(repno, "utf-8") urls = bfo.fields("8564_", repeatable_subfields_p=True) for url in urls: if url.has_key("y") and "FERMILAB" in url["y"][0] and url.has_key("u"): node(rec, "url").text = "%s.pdf" % url["u"][0] if eprint: node(rec, "availability").text = "http://arXiv.org/abs/%s" % eprint node(rec, "sponsor_org").text = "DOE Office of Science" dt_harvest = get_modification_date(recid) if dt_harvest: node(rec, "dt_harvest").text = dt_harvest else: # fallback to SPIRES era marc for date in get_fieldvalues(recid, "961__c"): node(rec, "dt_harvest").text = date out = etree.tostring(rec, encoding="UTF-8", xml_declaration=False, pretty_print=True, method="xml").rstrip("\n") return out
def oailistidentifiers(args): "Prints OAI response to the ListIdentifiers verb." arg = parse_args(args) out = "" resumptionToken_printed = False sysno = [] sysnos = [] if arg['resumptionToken']: filename = os.path.join(CFG_CACHEDIR, 'RTdata', arg['resumptionToken']) if os.path.exists(filename) == 0: out = out + oai_error("badResumptionToken", "ResumptionToken expired") out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers") return out if arg['resumptionToken']: sysnos = oaicacheout(arg['resumptionToken']) else: sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until']) if len(sysnos) == 0: # noRecordsMatch error out = out + oai_error("noRecordsMatch", "no records correspond to the request") out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers") return out i = 0 for sysno_ in sysnos: if sysno_: if i >= CFG_OAI_LOAD: # cache or write? if not resumptionToken_printed: # resumptionToken? arg['resumptionToken'] = oaigenresumptionToken() extdate = oaigetresponsedate(CFG_OAI_EXPIRE) if extdate: out = "%s <resumptionToken expirationDate=\"%s\">%s</resumptionToken>\n" % (out, extdate, arg['resumptionToken']) else: out = "%s <resumptionToken>%s</resumptionToken>\n" % (out, arg['resumptionToken']) resumptionToken_printed = True sysno.append(sysno_) else: _record_exists = record_exists(sysno_) if (not _record_exists == -1 and CFG_OAI_DELETED_POLICY == "no"): i = i + 1 # Increment limit only if record is returned for ident in get_field(sysno_, CFG_OAI_ID_FIELD): if ident != '': if _record_exists == -1: #Deleted? if CFG_OAI_DELETED_POLICY == "persistent" \ or CFG_OAI_DELETED_POLICY == "transient": out = out + " <header status=\"deleted\">\n" else: # In that case, print nothing (do not go further) break else: out = out + " <header>\n" out = "%s <identifier>%s</identifier>\n" % (out, escape_space(ident)) out = "%s <datestamp>%s</datestamp>\n" % (out, get_modification_date(oaigetsysno(ident))) for set in get_field(sysno_, CFG_OAI_SET_FIELD): if set: # Print only if field not empty out = "%s <setSpec>%s</setSpec>\n" % (out, set) out = out + " </header>\n" if resumptionToken_printed: oaicacheclean() # clean cache from expired resumptionTokens oaicachein(arg['resumptionToken'], sysno) out = oai_header(args, "ListIdentifiers") + out + oai_footer("ListIdentifiers") return out
style='conclusion') sys.exit(1) to_fix_marc = intbitset() print "Created a complete log file into %s" % logfilename try: try: for id_bibdoc1, id_bibdoc2 in bibdoc_bibdoc: try: record_does_exist = True recids = get_recid_from_docid(id_bibdoc1) if not recids: print "Skipping %s" % id_bibdoc1 continue for recid in recids: if record_exists(recid[0]) > 0: to_fix_marc.add(recid[0]) else: record_does_exist = False if not fix_bibdoc_bibdoc(id_bibdoc1, id_bibdoc2, logfile): if record_does_exist: raise StandardError( "Error when correcting document ID %s" % id_bibdoc1) except Exception, err: print >> logfile, "ERROR: %s" % err print wrap_text_in_a_box("DONE", style='conclusion') except: logfile.close() register_exception() print wrap_text_in_a_box(
def print_record(sysno, format='marcxml', record_exists_result=None): """Prints record 'sysno' formatted according to 'format'. - if record does not exist, return nothing. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'transient' or 'deleted', then return only header, with status 'deleted'. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no', then return nothing. Optional parameter 'record_exists_result' has the value of the result of the record_exists(sysno) function (in order not to call that function again if already done.) """ out = "" # sanity check: if record_exists_result is not None: _record_exists = record_exists_result else: _record_exists = record_exists(sysno) if not _record_exists: return if (format == "dc") or (format == "oai_dc"): format = "xd" # print record opening tags: out = out + " <record>\n" if _record_exists == -1: # Deleted? if CFG_OAI_DELETED_POLICY == "persistent" or \ CFG_OAI_DELETED_POLICY == "transient": out = out + " <header status=\"deleted\">\n" else: return else: out = out + " <header>\n" for ident in get_field(sysno, CFG_OAI_ID_FIELD): out = "%s <identifier>%s</identifier>\n" % (out, escape_space(ident)) out = "%s <datestamp>%s</datestamp>\n" % (out, get_modification_date(sysno)) for set in get_field(sysno, CFG_OAI_SET_FIELD): if set: # Print only if field not empty out = "%s <setSpec>%s</setSpec>\n" % (out, set) out = out + " </header>\n" if _record_exists == -1: # Deleted? pass else: out = out + " <metadata>\n" if format == "marcxml": formatted_record = get_preformatted_record(sysno, 'xm') if formatted_record is not None: ## MARCXML is already preformatted. Adapt it if needed # Infoscience modification : # Added custom validator from Swiss librarians formatted_record = formatted_record.replace( "<record>", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd\" type=\"Bibliographic\">\n <marc:leader>00000coc 2200000uu 4500</marc:leader>" ) formatted_record = formatted_record.replace( "<record xmlns=\"http://www.loc.gov/MARC21/slim\">", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd\" type=\"Bibliographic\">\n <marc:leader>00000coc 2200000uu 4500</marc:leader>" ) formatted_record = formatted_record.replace( "</record", "</marc:record") formatted_record = formatted_record.replace( "<controlfield", "<marc:controlfield") formatted_record = formatted_record.replace( "</controlfield", "</marc:controlfield") formatted_record = formatted_record.replace( "<datafield", "<marc:datafield") formatted_record = formatted_record.replace( "</datafield", "</marc:datafield") formatted_record = formatted_record.replace( "<subfield", "<marc:subfield") formatted_record = formatted_record.replace( "</subfield", "</marc:subfield") out += formatted_record else: ## MARCXML is not formatted in the database, so produce it. # Infoscience modification out = out + " <marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd\" type=\"Bibliographic\">" out = out + " <marc:leader>00000coc 2200000uu 4500</marc:leader>" out = "%s <marc:controlfield tag=\"001\">%d</marc:controlfield>\n" % ( out, int(sysno)) for digit1 in range(0, 10): for digit2 in range(0, 10): bibbx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s "\ "ORDER BY bb.field_number, b.tag ASC" % (bibbx, bibx) res = run_sql(query, (sysno, '%d%d%%' % (digit1, digit2))) field_number_old = -999 field_old = "" for row in res: field, value, field_number = row[0], row[1], row[2] ind1, ind2 = field[3], field[4] if ind1 == "_": ind1 = " " if ind2 == "_": ind2 = " " # print field tag if field_number != field_number_old or field[: -1] != field_old[: -1]: if format == "marcxml": if field_number_old != -999: if field_old[0:2] == "00": out = out + " </marc:controlfield>\n" else: out = out + " </marc:datafield>\n" if field[0:2] == "00": out = "%s <marc:controlfield tag=\"%s\">\n" % ( out, encode_for_xml(field[0:3])) else: out = "%s <marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">\n" % ( out, encode_for_xml(field[0:3]), encode_for_xml(ind1).lower(), encode_for_xml(ind2).lower()) field_number_old = field_number field_old = field # print subfield value if format == "marcxml": value = encode_for_xml(value) if (field[0:2] == "00"): out = "%s %s\n" % (out, value) else: out = "%s <marc:subfield code=\"%s\">%s</marc:subfield>\n" % ( out, encode_for_xml(field[-1:]), value) # fetch next subfield # all fields/subfields printed in this run, so close the tag: if (format == "marcxml") and field_number_old != -999: if field_old[0:2] == "00": out = out + " </marc:controlfield>\n" else: out = out + " </marc:datafield>\n" out = out + " </marc:record>\n" elif format == "xd": out += format_record(sysno, 'xoaidc') # print record closing tags: out = out + " </metadata>\n" out = out + " </record>\n" return out
def decorated(recid, *args, **kwargs): # ensure recid to be integer recid = int(recid) g.collection = collection = Collection.query.filter( Collection.name == guess_primary_collection_of_a_record(recid)).\ one() (auth_code, auth_msg) = check_user_can_view_record(current_user, recid) # only superadmins can use verbose parameter for obtaining debug information if not current_user.is_super_admin and 'verbose' in kwargs: kwargs['verbose'] = 0 if auth_code and current_user.is_guest: cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, {'collection': guess_primary_collection_of_a_record(recid)}) url_args = { 'action': cookie, 'ln': g.ln, 'referer': request.referrer } flash(_("Authorization failure"), 'error') return redirect(url_for('webaccount.login', **url_args)) elif auth_code: flash(auth_msg, 'error') abort(apache.HTTP_UNAUTHORIZED) from invenio.search_engine import record_exists, get_merged_recid # check if the current record has been deleted # and has been merged, case in which the deleted record # will be redirect to the new one record_status = record_exists(recid) merged_recid = get_merged_recid(recid) if record_status == -1 and merged_recid: return redirect(url_for('record.metadata', recid=merged_recid)) elif record_status == -1: abort(apache.HTTP_GONE) # The record is gone! g.record = record = Bibrec.query.get(recid) user = None if not current_user.is_guest: user = User.query.get(current_user.get_id()) title = get_fieldvalues(recid, '245__a') title = title[0] if len(title) > 0 else '' b = [(_('Home'), '')] + collection.breadcrumbs()[1:] b += [(title, 'record.metadata', dict(recid=recid))] current_app.config['breadcrumbs_map'][request.endpoint] = b g.record_tab_keys = [] tabs = [] counts = get_detailed_page_tabs_counts(recid) for k, v in get_detailed_page_tabs(collection.id, recid, g.ln).iteritems(): t = {} b = 'record' if k == '': k = 'metadata' if k == 'comments' or k == 'reviews': b = 'webcomment' if k == 'linkbacks': b = 'weblinkback' k = 'index' t['key'] = b + '.' + k t['count'] = counts.get(k.capitalize(), -1) t.update(v) tabs.append(t) if v['visible']: g.record_tab_keys.append(b + '.' + k) if CFG_WEBLINKBACK_TRACKBACK_ENABLED: @register_template_context_processor def trackback_context(): from invenio.weblinkback_templates import get_trackback_auto_discovery_tag return dict(headerLinkbackTrackbackLink= get_trackback_auto_discovery_tag(recid)) @register_template_context_processor def record_context(): return dict(recid=recid, record=record, user=user, tabs=tabs, title=title, get_mini_reviews=lambda *args, **kwargs: get_mini_reviews(*args, **kwargs).decode('utf8'), collection=collection, format_record=lambda recID, of='hb', ln=g.ln: format_record(recID, of=of, ln=ln, verbose=0, search_pattern='', on_the_fly=False)) return f(recid, *args, **kwargs)
def oailistrecords(args): "Generates response to oailistrecords verb." arg = parse_args(args) out = "" resumptionToken_printed = False sysnos = [] sysno = [] # check if the resumptionToken did not expire if arg['resumptionToken']: filename = os.path.join(CFG_CACHEDIR, 'RTdata', arg['resumptionToken']) if os.path.exists(filename) == 0: out = oai_error("badResumptionToken", "ResumptionToken expired") out = oai_error_header( args, "ListRecords") + out + oai_error_footer("ListRecords") return out if arg['resumptionToken'] != "": sysnos = oaicacheout(arg['resumptionToken']) arg['metadataPrefix'] = sysnos.pop() else: sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until']) if len(sysnos) == 0: # noRecordsMatch error out = out + oai_error("noRecordsMatch", "no records correspond to the request") out = oai_error_header( args, "ListRecords") + out + oai_error_footer("ListRecords") return out i = 0 for sysno_ in sysnos: if sysno_: if i >= CFG_OAI_LOAD: # cache or write? if not resumptionToken_printed: # resumptionToken? arg['resumptionToken'] = oaigenresumptionToken() extdate = oaigetresponsedate(CFG_OAI_EXPIRE) if extdate: out = "%s <resumptionToken expirationDate=\"%s\">%s</resumptionToken>\n" % ( out, extdate, arg['resumptionToken']) else: out = "%s <resumptionToken>%s</resumptionToken>\n" % ( out, arg['resumptionToken']) resumptionToken_printed = True sysno.append(sysno_) else: _record_exists = record_exists(sysno_) if not (_record_exists == -1 and CFG_OAI_DELETED_POLICY == "no"): #Produce output only if record exists and had to be printed i = i + 1 # Increment limit only if record is returned res = print_record(sysno_, arg['metadataPrefix'], _record_exists) if res: out += res if resumptionToken_printed: oaicacheclean() sysno.append(arg['metadataPrefix']) oaicachein(arg['resumptionToken'], sysno) out = oai_header(args, "ListRecords") + out + oai_footer("ListRecords") return out
def test_record_creation(self): import os from wtforms import TextAreaField from datetime import datetime from invenio.search_engine import record_exists from invenio.cache import cache from invenio.config import CFG_PREFIX from invenio.webuser_flask import login_user from invenio.bibworkflow_model import Workflow from invenio.bibworkflow_config import CFG_WORKFLOW_STATUS from invenio.bibsched_model import SchTASK from invenio.webdeposit_utils import get_form, create_workflow, \ set_form_status, CFG_DRAFT_STATUS from invenio.webdeposit_load_deposition_types import \ deposition_metadata from invenio.webdeposit_workflow_utils import \ create_record_from_marc from invenio.bibfield import get_record login_user(1) for deposition_type in deposition_metadata.keys(): deposition = create_workflow(deposition_type, 1) assert deposition is not None # Check if deposition creates a record create_rec = create_record_from_marc() function_exists = False for workflow_function in deposition.workflow: if create_rec.func_code == workflow_function.func_code: function_exists = True if not function_exists: # if a record is not created, #continue with the next deposition continue uuid = deposition.get_uuid() cache.delete_many("1:current_deposition_type", "1:current_uuid") cache.add("1:current_deposition_type", deposition_type) cache.add("1:current_uuid", uuid) # Run the workflow deposition.run() # Create form's json based on the field name form = get_form(1, uuid=uuid) webdeposit_json = {} # Fill the json with dummy data for field in form: if isinstance(field, TextAreaField): # If the field is associated with a marc field if field.has_recjson_key() or field.has_cook_function(): webdeposit_json[field.name] = "test " + field.name draft = dict( form_type=form.__class__.__name__, form_values=webdeposit_json, step=0, # dummy step status=CFG_DRAFT_STATUS['finished'], timestamp=str(datetime.now())) # Add a draft for the first step Workflow.set_extra_data(user_id=1, uuid=uuid, key='drafts', value={0: draft}) workflow_status = CFG_WORKFLOW_STATUS.RUNNING while workflow_status != CFG_WORKFLOW_STATUS.COMPLETED: # Continue workflow deposition.run() set_form_status(1, uuid, CFG_DRAFT_STATUS['finished']) workflow_status = deposition.get_status() # Workflow is finished. Test if record is created recid = deposition.get_data('recid') assert recid is not None # Test that record id exists assert record_exists(recid) == 1 # Test that the task exists task_id = deposition.get_data('task_id') assert task_id is not None bibtask = SchTASK.query.filter(SchTASK.id == task_id).first() assert bibtask is not None # Run bibupload, bibindex, webcoll manually cmd = "%s/bin/bibupload %s" % (CFG_PREFIX, task_id) assert not os.system(cmd) rec = get_record(recid) marc = rec.legacy_export_as_marc() for field in form: if isinstance(field, TextAreaField): # If the field is associated with a marc field if field.has_recjson_key() or field.has_cook_function(): assert "test " + field.name in marc
def oailistidentifiers(args): "Prints OAI response to the ListIdentifiers verb." arg = parse_args(args) out = "" resumptionToken_printed = False sysno = [] sysnos = [] if arg['resumptionToken']: filename = os.path.join(CFG_CACHEDIR, 'RTdata', arg['resumptionToken']) if os.path.exists(filename) == 0: out = out + oai_error("badResumptionToken", "ResumptionToken expired") out = oai_error_header( args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers") return out if arg['resumptionToken']: sysnos = oaicacheout(arg['resumptionToken']) else: sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until']) if len(sysnos) == 0: # noRecordsMatch error out = out + oai_error("noRecordsMatch", "no records correspond to the request") out = oai_error_header( args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers") return out i = 0 for sysno_ in sysnos: if sysno_: if i >= CFG_OAI_LOAD: # cache or write? if not resumptionToken_printed: # resumptionToken? arg['resumptionToken'] = oaigenresumptionToken() extdate = oaigetresponsedate(CFG_OAI_EXPIRE) if extdate: out = "%s <resumptionToken expirationDate=\"%s\">%s</resumptionToken>\n" % ( out, extdate, arg['resumptionToken']) else: out = "%s <resumptionToken>%s</resumptionToken>\n" % ( out, arg['resumptionToken']) resumptionToken_printed = True sysno.append(sysno_) else: _record_exists = record_exists(sysno_) if (not _record_exists == -1 and CFG_OAI_DELETED_POLICY == "no"): i = i + 1 # Increment limit only if record is returned for ident in get_field(sysno_, CFG_OAI_ID_FIELD): if ident != '': if _record_exists == -1: #Deleted? if CFG_OAI_DELETED_POLICY == "persistent" \ or CFG_OAI_DELETED_POLICY == "transient": out = out + " <header status=\"deleted\">\n" else: # In that case, print nothing (do not go further) break else: out = out + " <header>\n" out = "%s <identifier>%s</identifier>\n" % ( out, escape_space(ident)) out = "%s <datestamp>%s</datestamp>\n" % ( out, get_modification_date(oaigetsysno(ident))) for set in get_field(sysno_, CFG_OAI_SET_FIELD): if set: # Print only if field not empty out = "%s <setSpec>%s</setSpec>\n" % (out, set) out = out + " </header>\n" if resumptionToken_printed: oaicacheclean() # clean cache from expired resumptionTokens oaicachein(arg['resumptionToken'], sysno) out = oai_header(args, "ListIdentifiers") + out + oai_footer("ListIdentifiers") return out
print wrap_text_in_a_box("Unexpected error while backing up tables. Please, do your checks: %s" % e, style='conclusion') sys.exit(1) to_fix_marc = intbitset() print "Created a complete log file into %s" % logfilename try: try: for id_bibdoc1, id_bibdoc2 in bibdoc_bibdoc: try: record_does_exist = True recids = get_recid_from_docid(id_bibdoc1) if not recids: print "Skipping %s" % id_bibdoc1 continue for recid in recids: if record_exists(recid[0]) > 0: to_fix_marc.add(recid[0]) else: record_does_exist = False if not fix_bibdoc_bibdoc(id_bibdoc1, id_bibdoc2, logfile): if record_does_exist: raise StandardError("Error when correcting document ID %s" % id_bibdoc1) except Exception, err: print >> logfile, "ERROR: %s" % err print wrap_text_in_a_box("DONE", style='conclusion') except: logfile.close() register_exception() print wrap_text_in_a_box(title="INTERRUPTED BECAUSE OF ERROR!", body="""Please see the log file %s for what was the status of record %s prior to the error. Contact %s in case of problems, attaching the log.""" % (logfilename, BibDoc(id_bibdoc1).get_recid(), CFG_SITE_SUPPORT_EMAIL), style='conclusion') sys.exit(1)
def retrieve_data_from_record(recid): """ Extract data from a record id in order to import it to the Author list interface """ if not record_exists(recid): return output = {} DEFAULT_AFFILIATION_TYPE = cfg.OPTIONS.AUTHOR_AFFILIATION_TYPE[0] DEFAULT_IDENTIFIER = cfg.OPTIONS.IDENTIFIERS_LIST[0] IDENTIFIERS_MAPPING = cfg.OPTIONS.IDENTIFIERS_MAPPING bibrecord = get_record(recid) try: paper_title = get_fieldvalues(recid, '245__a')[0] except IndexError: paper_title = "" try: collaboration_name = get_fieldvalues(recid, '710__g') except IndexError: collaboration_name = "" try: experiment_number = get_fieldvalues(recid, '693__e') except IndexError: experiment_number = "" record_authors = bibrecord.get('100', []) record_authors.extend(bibrecord.get('700', [])) author_list = [] unique_affiliations = [] for i, field_instance in enumerate(record_authors, 1): family_name = "" given_name = "" name_on_paper = "" status = "" affiliations = [] identifiers = [] field = field_instance[0] for subfield_code, subfield_value in field: if subfield_code == "a": try: family_name = subfield_value.split(',')[0] given_name = subfield_value.split(',')[1].lstrip() except: pass name_on_paper = subfield_value elif subfield_code == "u": affiliations.append([subfield_value, DEFAULT_AFFILIATION_TYPE]) unique_affiliations.append(subfield_value) elif subfield_code == "i": # FIXME This will currently work only with INSPIRE IDs id_prefix = subfield_value.split("-")[0] if id_prefix in IDENTIFIERS_MAPPING: identifiers.append([subfield_value, IDENTIFIERS_MAPPING[id_prefix]]) if not identifiers: identifiers.append(['', DEFAULT_IDENTIFIER]) if not affiliations: affiliations.append([UNKNOWN_AFFILIATION, DEFAULT_AFFILIATION_TYPE]) unique_affiliations.append(UNKNOWN_AFFILIATION) author_list.append([ i, # Row number '', # Place holder for the web interface family_name, given_name, name_on_paper, status, affiliations, identifiers ]) unique_affiliations = list(set(unique_affiliations)) output.update({'authors': author_list}) # Generate all the affiliation related information affiliation_list = [] for i, affiliation in enumerate(unique_affiliations, 1): institution = perform_request_search(c="Institutions", p='110__u:"' + affiliation + '"') full_name = affiliation if len(institution) == 1: full_name_110_a = get_fieldvalues(institution[0], '110__a') if full_name_110_a: full_name = str(full_name_110_a[0]) full_name_110_b = get_fieldvalues(institution[0], '110__b') if full_name_110_b: full_name += ', ' + str(full_name_110_b[0]) affiliation = [i, '', affiliation, '', full_name, '', True, ''] affiliation_list.append(affiliation) output.update({'affiliations': affiliation_list}) output.update({'paper_title': paper_title, 'collaboration': collaboration_name, 'experiment_number': experiment_number, 'last_modified': int(time.time()), 'reference_ids': [], 'paper_id': '1'}) return output
def process_batch_job(batch_job_file): """ Processes a batch job description dictionary @param batch_job_file: a fullpath to a batch job file @type batch_job_file: string @return: 1 if the process was successfull, 0 if not @rtype; int """ def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml' xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, 'w') xml_file.write(marcxml) xml_file.close() targs = ['-c', xml_filename] task_low_level_submission('bibupload', 'bibencode', *targs) #---------# # GENERAL # #---------# _task_write_message("----------- Handling Master -----------") ## Check the validity of the batch file here batch_job = json_decode_file(batch_job_file) ## Sanitise batch description and raise errrors batch_job = sanitise_batch_job(batch_job) ## Check if the record exists if record_exists(batch_job['recid']) < 1: raise Exception("Record not found") recdoc = BibRecDocs(batch_job['recid']) #--------------------# # UPDATE FROM MASTER # #--------------------# ## We want to add new stuff to the video's record, using the master as input if getval(batch_job, 'update_from_master'): found_master = False bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: comment = bibdocfile.get_comment() description = bibdocfile.get_description() subformat = bibdocfile.get_subformat() m_comment = getval(batch_job, 'bibdoc_master_comment', comment) m_description = getval(batch_job, 'bibdoc_master_description', description) m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat) if (comment == m_comment and description == m_description and subformat == m_subformat): found_master = True batch_job['input'] = bibdocfile.get_full_path() ## Get the aspect of the from the record try: ## Assumes pbcore metadata mapping batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0] except IndexError: pass break if found_master: break if not found_master: _task_write_message("Video master for record %d not found" % batch_job['recid']) task_update_progress("Video master for record %d not found" % batch_job['recid']) ## Maybe send an email? return 1 ## Clean the job to do no upscaling etc if getval(batch_job, 'assure_quality'): batch_job = clean_job_for_quality(batch_job) global _BATCH_STEPS _BATCH_STEPS = len(batch_job['jobs']) ## Generate the docname from the input filename's name or given name bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:] if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'): bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension') if getval(batch_job, 'bibdoc_master_docname'): bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname') write_message("Creating BibDoc for %s" % bibdoc_video_docname) ## If the bibdoc exists, receive it if bibdoc_video_docname in recdoc.get_bibdoc_names(): bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname) ## Create a new bibdoc if it does not exist else: bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname) ## Get the directory auf the newly created bibdoc to copy stuff there bibdoc_video_directory = bibdoc_video.get_base_dir() #--------# # MASTER # #--------# if not getval(batch_job, 'update_from_master'): if getval(batch_job, 'add_master'): ## Generate the right name for the master ## The master should be hidden first an then renamed ## when it is really available ## !!! FIX !!! _task_write_message("Adding %s master to the BibDoc" % bibdoc_video_docname) master_format = compose_format( bibdoc_video_extension, getval(batch_job, 'bibdoc_master_subformat', 'master') ) ## If a file of the same format is there, something is wrong, remove it! ## it might be caused by a previous corrupted submission etc. if bibdoc_video.format_already_exists_p(master_format): bibdoc_video.delete_file(master_format, 1) bibdoc_video.add_file_new_format( batch_job['input'], version=1, description=getval(batch_job, 'bibdoc_master_description'), comment=getval(batch_job, 'bibdoc_master_comment'), docformat=master_format ) #-----------# # JOBS LOOP # #-----------# return_code = 1 global _BATCH_STEP for job in batch_job['jobs']: _task_write_message("----------- Job %s of %s -----------" % (_BATCH_STEP, _BATCH_STEPS)) ## Try to substitute docname with master docname if getval(job, 'bibdoc_docname'): job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname}) #-------------# # TRANSCODING # #-------------# if job['mode'] == 'encode': ## Skip the job if assure_quality is not set and marked as fallback if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'): continue if getval(job, 'profile'): profile = get_encoding_profile(job['profile']) else: profile = None ## We need an extension defined fot the video container bibdoc_video_extension = getval(job, 'extension', getval(profile, 'extension')) if not bibdoc_video_extension: raise Exception("No container/extension defined") ## Get the docname and subformat bibdoc_video_subformat = getval(job, 'bibdoc_subformat') bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) ## The subformat is incompatible with ffmpegs name convention ## We do the encoding without and rename it afterwards bibdoc_video_fullpath = compose_file( bibdoc_video_directory, bibdoc_slave_video_docname, bibdoc_video_extension ) _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname, bibdoc_video_extension, bibdoc_video_subformat)) ## We encode now directly into the bibdocs directory encoding_result = encode_video( input_file=batch_job['input'], output_file=bibdoc_video_fullpath, acodec=getval(job, 'audiocodec'), vcodec=getval(job, 'videocodec'), abitrate=getval(job, 'videobitrate'), vbitrate=getval(job, 'audiobitrate'), resolution=getval(job, 'resolution'), passes=getval(job, 'passes', 1), special=getval(job, 'special'), specialfirst=getval(job, 'specialfirst'), specialsecond=getval(job, 'specialsecond'), metadata=getval(job, 'metadata'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), # Aspect for every job profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, message_fnc=_task_write_message ) return_code &= encoding_result ## only on success if encoding_result: ## Rename it, adding the subformat os.rename(bibdoc_video_fullpath, compose_file(bibdoc_video_directory, bibdoc_video_extension, bibdoc_video_subformat, 1, bibdoc_slave_video_docname) ) #bibdoc_video._build_file_list() bibdoc_video.touch() bibdoc_video._sync_to_db() bibdoc_video_format = compose_format(bibdoc_video_extension, bibdoc_video_subformat) if getval(job, 'bibdoc_comment'): bibdoc_video.set_comment(getval(job, 'bibdoc_comment'), bibdoc_video_format) if getval(job, 'bibdoc_description'): bibdoc_video.set_description(getval(job, 'bibdoc_description'), bibdoc_video_format) #------------# # EXTRACTION # #------------# # if there are multiple extraction jobs, all the produced files # with the same name will be in the same bibdoc! Make sure that # you use different subformats or docname templates to avoid # conflicts. if job['mode'] == 'extract': if getval(job, 'profile'): profile = get_extract_profile(job['profile']) else: profile = {} bibdoc_frame_subformat = getval(job, 'bibdoc_subformat') _task_write_message("Extracting frames to temporary directory") tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4()) os.mkdir(tmpdir) #Move this to the batch description bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname) tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.' + getval(profile, 'extension', getval(job, 'extension', 'jpg'))) extraction_result = extract_frames(input_file=batch_job['input'], output_file=tmpfname, size=getval(job, 'size'), positions=getval(job, 'positions'), numberof=getval(job, 'numberof'), width=getval(job, 'width'), height=getval(job, 'height'), aspect=getval(batch_job, 'aspect'), profile=getval(job, 'profile'), update_fnc=_task_update_overall_status, ) return_code &= extraction_result ## only on success: if extraction_result: ## for every filename in the directorys, create a bibdoc that contains ## all sizes of the frame from the two directories files = os.listdir(tmpdir) for filename in files: ## The docname was altered by BibEncode extract through substitution ## Retrieve it from the filename again bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename) _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname) ## If the bibdoc exists, receive it if bibdoc_frame_docname in recdoc.get_bibdoc_names(): bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname) ## Create a new bibdoc if it does not exist else: bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname) ## The filename including path from tmpdir fname = os.path.join(tmpdir, filename) bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat) ## Same as with the master, if the format allready exists, ## override it, because something went wrong before if bibdoc_frame.format_already_exists_p(bibdoc_frame_format): bibdoc_frame.delete_file(bibdoc_frame_format, 1) _task_write_message("Adding %s jpg;%s to BibDoc" % (bibdoc_frame_docname, getval(job, 'bibdoc_subformat'))) bibdoc_frame.add_file_new_format( fname, version=1, description=getval(job, 'bibdoc_description'), comment=getval(job, 'bibdoc_comment'), docformat=bibdoc_frame_format) ## Remove the temporary folders _task_write_message("Removing temporary directory") shutil.rmtree(tmpdir) _BATCH_STEP = _BATCH_STEP + 1 #-----------------# # FIX BIBDOC/MARC # #-----------------# _task_write_message("----------- Handling MARCXML -----------") ## Fix the BibDoc for all the videos previously created _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname) bibdoc_video._build_file_list() ## Fix the MARC _task_write_message("Fixing MARC") cli_fix_marc({}, [batch_job['recid']], False) if getval(batch_job, 'collection'): ## Make the record visible by moving in from the collection marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>" "<datafield tag=\"980\" ind1=\" \" ind2=\" \">" "<subfield code=\"a\">%s</subfield></datafield></record>" ) % (batch_job['recid'], batch_job['collection']) upload_marcxml_file(marcxml) #---------------------# # ADD MASTER METADATA # #---------------------# if getval(batch_job, 'add_master_metadata'): _task_write_message("Adding master metadata") pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'), pbcoreIdentifier = batch_job['recid'], aspect_override = getval(batch_job, 'aspect')) marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT) upload_marcxml_file(marcxml) #------------------# # ADD MARC SNIPPET # #------------------# if getval(batch_job, 'marc_snippet'): marc_snippet = open(getval(batch_job, 'marc_snippet')) marcxml = marc_snippet.read() marc_snippet.close() upload_marcxml_file(marcxml) #--------------# # DELETE INPUT # #--------------# if getval(batch_job, 'delete_input'): _task_write_message("Deleting input file") # only if successfull if not return_code: # only if input matches pattern if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'): try: os.remove(getval(batch_job, 'input')) except OSError: pass #--------------# # NOTIFICATION # #--------------# ## Send Notification emails on errors if not return_code: if getval(batch_job, 'notify_user'): _notify_error_user(getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) _task_write_message("Notify user because of an error") if getval(batch_job, 'notify_admin'): _task_write_message("Notify admin because of an error") if type(getval(batch_job, 'notify_admin') == type(str()) ): _notify_error_admin(batch_job, getval(batch_job, 'notify_admin')) else: _notify_error_admin(batch_job) else: if getval(batch_job, 'notify_user'): _task_write_message("Notify user because of success") _notify_success_user(getval(batch_job, 'notify_user'), getval(batch_job, 'submission_filename', batch_job['input']), getval(batch_job, 'recid'), getval(batch_job, 'submission_title', "")) return 1
def get_bibrecord(recid): """Return record in BibRecord wrapping.""" if record_exists(recid): return create_record(print_record(recid, "xm"))[0]
def print_record(sysno, format='marcxml', record_exists_result=None): """Prints record 'sysno' formatted according to 'format'. - if record does not exist, return nothing. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'transient' or 'deleted', then return only header, with status 'deleted'. - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no', then return nothing. Optional parameter 'record_exists_result' has the value of the result of the record_exists(sysno) function (in order not to call that function again if already done.) """ out = "" # sanity check: if record_exists_result is not None: _record_exists = record_exists_result else: _record_exists = record_exists(sysno) if not _record_exists: return if (format == "dc") or (format == "oai_dc"): format = "xd" # print record opening tags: out = out + " <record>\n" if _record_exists == -1: # Deleted? if CFG_OAI_DELETED_POLICY == "persistent" or \ CFG_OAI_DELETED_POLICY == "transient": out = out + " <header status=\"deleted\">\n" else: return else: out = out + " <header>\n" for ident in get_field(sysno, CFG_OAI_ID_FIELD): out = "%s <identifier>%s</identifier>\n" % (out, escape_space(ident)) out = "%s <datestamp>%s</datestamp>\n" % (out, get_modification_date(sysno)) for set in get_field(sysno, CFG_OAI_SET_FIELD): if set: # Print only if field not empty out = "%s <setSpec>%s</setSpec>\n" % (out, set) out = out + " </header>\n" if _record_exists == -1: # Deleted? pass else: out = out + " <metadata>\n" if format == "marcxml": formatted_record = get_preformatted_record(sysno, 'xm') if formatted_record is not None: ## MARCXML is already preformatted. Adapt it if needed formatted_record = formatted_record.replace("<record>", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n <marc:leader>00000coc 2200000uu 4500</marc:leader>") formatted_record = formatted_record.replace("<record xmlns=\"http://www.loc.gov/MARC21/slim\">", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n <marc:leader>00000coc 2200000uu 4500</marc:leader>") formatted_record = formatted_record.replace("</record", "</marc:record") formatted_record = formatted_record.replace("<controlfield", "<marc:controlfield") formatted_record = formatted_record.replace("</controlfield", "</marc:controlfield") formatted_record = formatted_record.replace("<datafield", "<marc:datafield") formatted_record = formatted_record.replace("</datafield", "</marc:datafield") formatted_record = formatted_record.replace("<subfield", "<marc:subfield") formatted_record = formatted_record.replace("</subfield", "</marc:subfield") out += formatted_record else: ## MARCXML is not formatted in the database, so produce it. out = out + " <marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">" out = out + " <marc:leader>00000coc 2200000uu 4500</marc:leader>" out = "%s <marc:controlfield tag=\"001\">%d</marc:controlfield>\n" % (out, int(sysno)) for digit1 in range(0, 10): for digit2 in range(0, 10): bibbx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s "\ "ORDER BY bb.field_number, b.tag ASC" % (bibbx, bibx) res = run_sql(query, (sysno, '%d%d%%' % (digit1, digit2))) field_number_old = -999 field_old = "" for row in res: field, value, field_number = row[0], row[1], row[2] ind1, ind2 = field[3], field[4] if ind1 == "_": ind1 = " " if ind2 == "_": ind2 = " " # print field tag if field_number != field_number_old or field[:-1] != field_old[:-1]: if format == "marcxml": if field_number_old != -999: if field_old[0:2] == "00": out = out + " </marc:controlfield>\n" else: out = out + " </marc:datafield>\n" if field[0:2] == "00": out = "%s <marc:controlfield tag=\"%s\">\n" % (out, encode_for_xml(field[0:3])) else: out = "%s <marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">\n" % (out, encode_for_xml(field[0:3]), encode_for_xml(ind1).lower(), encode_for_xml(ind2).lower()) field_number_old = field_number field_old = field # print subfield value if format == "marcxml": value = encode_for_xml(value) if(field[0:2] == "00"): out = "%s %s\n" % (out, value) else: out = "%s <marc:subfield code=\"%s\">%s</marc:subfield>\n" % (out, encode_for_xml(field[-1:]), value) # fetch next subfield # all fields/subfields printed in this run, so close the tag: if (format == "marcxml") and field_number_old != -999: if field_old[0:2] == "00": out = out + " </marc:controlfield>\n" else: out = out + " </marc:datafield>\n" out = out + " </marc:record>\n" elif format == "xd": out += format_record(sysno, 'xoaidc') # print record closing tags: out = out + " </metadata>\n" out = out + " </record>\n" return out
def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid argd['tab'] = self.tab if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): if acc_authorize_action(req, 'runbibedit')[0] != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT # only superadmins can use verbose parameter for obtaining debug information if not isUserSuperAdmin(user_info): argd['verbose'] = 0 if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') from invenio.search_engine import record_exists, get_merged_recid # check if the current record has been deleted # and has been merged, case in which the deleted record # will be redirect to the new one record_status = record_exists(argd['recid']) merged_recid = get_merged_recid(argd['recid']) if record_status == -1 and merged_recid: url = CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/%s?ln=%s' url %= (str(merged_recid), argd['ln']) redirect_to_url(req, url) # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out
def display(self, req, form): """ Show the tab 'holdings'. """ argd = wash_urlargd( form, { 'do': (str, "od"), 'ds': (str, "all"), 'nb': (int, 100), 'p': (int, 1), 'voted': (int, -1), 'reported': (int, -1), }) _ = gettext_set_language(argd['ln']) record_exists_p = record_exists(self.recid) if record_exists_p != 1: if record_exists_p == -1: msg = _("The record has been deleted.") else: msg = _("Requested record does not seem to exist.") msg = '<span class="quicknote">' + msg + '</span>' title, description, keywords = \ websearch_templates.tmpl_record_page_header_content(req, self.recid, argd['ln']) return page(title=title, show_title_p=False, body=msg, description=description, keywords=keywords, uid=getUid(req), language=argd['ln'], req=req, navmenuid='search') body = perform_get_holdings_information(self.recid, req, argd['ln']) uid = getUid(req) user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, { 'collection': guess_primary_collection_of_a_record( self.recid) }) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text = auth_msg) unordered_tabs = get_detailed_page_tabs(get_colID( guess_primary_collection_of_a_record(self.recid)), self.recid, ln=argd['ln']) ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()] ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1])) link_ln = '' if argd['ln'] != CFG_SITE_LANG: link_ln = '?ln=%s' % argd['ln'] tabs = [(unordered_tabs[tab_id]['label'], \ '%s/record/%s/%s%s' % (CFG_SITE_URL, self.recid, tab_id, link_ln), \ tab_id in ['holdings'], unordered_tabs[tab_id]['enabled']) \ for (tab_id, _order) in ordered_tabs_id if unordered_tabs[tab_id]['visible'] == True] top = webstyle_templates.detailed_record_container_top( self.recid, tabs, argd['ln']) bottom = webstyle_templates.detailed_record_container_bottom( self.recid, tabs, argd['ln']) title = websearch_templates.tmpl_record_page_header_content( req, self.recid, argd['ln'])[0] navtrail = create_navtrail_links( cc=guess_primary_collection_of_a_record(self.recid), ln=argd['ln']) navtrail += ' > <a class="navtrail" href="%s/record/%s?ln=%s">' % ( CFG_SITE_URL, self.recid, argd['ln']) navtrail += title navtrail += '</a>' return pageheaderonly(title=title, navtrail=navtrail, uid=uid, verbose=1, req=req, metaheaderadd = "<link rel=\"stylesheet\" href=\"%s/img/jquery-ui.css\" type=\"text/css\" />" % CFG_SITE_URL, language=argd['ln'], navmenuid='search', navtrail_append_title_p=0) + \ websearch_templates.tmpl_search_pagestart(argd['ln']) + \ top + body + bottom + \ websearch_templates.tmpl_search_pageend(argd['ln']) + \ pagefooteronly(lastupdated=__lastupdated__, language=argd['ln'], req=req)