3: _("File already exists"), 4: _("A file with the same name and format already exists"), 5: _("No rights to upload to collection '%s'") } # Create directory DONE/ if doesn't exist folder = (folder[-1] == "/") and folder or (folder + "/") files_done_dir = folder + "DONE/" try: os.mkdir(files_done_dir) except OSError: # Directory exists or no write permission pass for docfile in files: if os.path.isfile(os.path.join(folder, docfile)): info[0] += 1 identifier = file_strip_ext(docfile) extension = docfile[len(identifier):] rec_id = None if identifier: if matching == 'smart': for comment, pattern, fields in FILE_SEARCH_PATTERN: g = pattern.match(docfile) if g: query = ' '.join( map( lambda key, value: '%s:"%s"' % (key, value), fields, g.groups())) write_message("%s smart matched via %s: %s" % (docfile, comment, query)) rec_id = search_pattern(p=query) break
def _lookup(self, component, path): # after /<CFG_SITE_RECORD>/<recid>/files/ every part is used as the file # name filename = component def getfile(req, form): args = wash_urlargd(form, websubmit_templates.files_default_urlargd) ln = args["ln"] _ = gettext_set_language(ln) uid = getUid(req) user_info = collect_user_info(req) verbose = args["verbose"] if verbose >= 1 and not isUserSuperAdmin(user_info): # Only SuperUser can see all the details! verbose = 0 if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1: return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), navmenuid="submit") if record_exists(self.recid) < 1: msg = "<p>%s</p>" % _("Requested record does not seem to exist.") return warningMsg(msg, req, CFG_SITE_NAME, ln) if record_empty(self.recid): msg = "<p>%s</p>" % _("Requested record does not seem to have been integrated.") return warningMsg(msg, req, CFG_SITE_NAME, ln) (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info["email"] == "guest": cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, {"collection": guess_primary_collection_of_a_record(self.recid)} ) target = "/youraccount/login" + make_canonical_urlargd( {"action": cookie, "ln": ln, "referer": CFG_SITE_URL + user_info["uri"]}, {} ) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", text=auth_message) readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1 # From now on: either the user provided a specific file # name (and a possible version), or we return a list of # all the available files. In no case are the docids # visible. try: bibarchive = BibRecDocs(self.recid) except InvenioWebSubmitFileError, e: register_exception(req=req, alert_admin=True) msg = "<p>%s</p><p>%s</p>" % ( _("The system has encountered an error in retrieving the list of files for this document."), _("The error has been logged and will be taken in consideration as soon as possible."), ) return warningMsg(msg, req, CFG_SITE_NAME, ln) if bibarchive.deleted_p(): return print_warning(req, _("Requested record does not seem to exist.")) docname = "" format = "" version = "" warn = "" if filename: # We know the complete file name, guess which docid it # refers to ## TODO: Change the extension system according to ext.py from setlink ## and have a uniform extension mechanism... docname = file_strip_ext(filename) format = filename[len(docname) :] if format and format[0] != ".": format = "." + format if args["subformat"]: format += ";%s" % args["subformat"] else: docname = args["docname"] if not format: format = args["format"] if args["subformat"]: format += ";%s" % args["subformat"] if not version: version = args["version"] # version could be either empty, or all or an integer try: int(version) except ValueError: if version != "all": version = "" display_hidden = isUserSuperAdmin(user_info) if version != "all": # search this filename in the complete list of files for doc in bibarchive.list_bibdocs(): if docname == doc.get_docname(): try: docfile = doc.get_file(format, version) (auth_code, auth_message) = docfile.is_restricted(user_info) if auth_code != 0 and not is_user_owner_of_record(user_info, self.recid): if CFG_WEBSUBMIT_ICON_SUBFORMAT_RE.match(get_subformat_from_format(format)): return stream_restricted_icon(req) if user_info["email"] == "guest": cookie = mail_cookie_create_authorize_action( "viewrestrdoc", {"status": docfile.get_status()} ) target = "/youraccount/login" + make_canonical_urlargd( {"action": cookie, "ln": ln, "referer": CFG_SITE_URL + user_info["uri"]}, {} ) redirect_to_url(req, target) else: req.status = apache.HTTP_UNAUTHORIZED warn += print_warning(_("This file is restricted: ") + auth_message) break if not docfile.hidden_p(): if not readonly: ip = str(req.remote_ip) res = doc.register_download(ip, version, format, uid) try: return docfile.stream(req) except InvenioWebSubmitFileError, msg: register_exception(req=req, alert_admin=True) req.status = apache.HTTP_INTERNAL_SERVER_ERROR return warningMsg( _("An error has happened in trying to stream the request file."), req, CFG_SITE_NAME, ln, ) else: req.status = apache.HTTP_UNAUTHORIZED warn = print_warning(_("The requested file is hidden and can not be accessed.")) except InvenioWebSubmitFileError, msg: register_exception(req=req, alert_admin=True)
def getfile(req, form): args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd) ln = args['ln'] _ = gettext_set_language(ln) uid = getUid(req) user_info = collect_user_info(req) verbose = args['verbose'] if verbose >= 1 and not isUserSuperAdmin(user_info): # Only SuperUser can see all the details! verbose = 0 if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1: return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), navmenuid='submit') if record_exists(self.recid) < 1: msg = "<p>%s</p>" % _( "Requested record does not seem to exist.") return warning_page(msg, req, ln) if record_empty(self.recid): msg = "<p>%s</p>" % _( "Requested record does not seem to have been integrated.") return warning_page(msg, req, ln) (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info['email'] == 'guest': if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, { 'collection': guess_primary_collection_of_a_record(self.recid) }) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: return page_not_authorized(req, "../", \ text = auth_message) readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1 # From now on: either the user provided a specific file # name (and a possible version), or we return a list of # all the available files. In no case are the docids # visible. try: bibarchive = BibRecDocs(self.recid) except InvenioBibDocFileError: register_exception(req=req, alert_admin=True) msg = "<p>%s</p><p>%s</p>" % ( _("The system has encountered an error in retrieving the list of files for this document." ), _("The error has been logged and will be taken in consideration as soon as possible." )) return warning_page(msg, req, ln) if bibarchive.deleted_p(): req.status = apache.HTTP_GONE return warning_page( _("Requested record does not seem to exist."), req, ln) docname = '' docformat = '' version = '' warn = '' if filename: # We know the complete file name, guess which docid it # refers to ## TODO: Change the extension system according to ext.py from setlink ## and have a uniform extension mechanism... docname = file_strip_ext(filename) docformat = filename[len(docname):] if docformat and docformat[0] != '.': docformat = '.' + docformat if args['subformat']: docformat += ';%s' % args['subformat'] else: docname = args['docname'] if not docformat: docformat = args['format'] if args['subformat']: docformat += ';%s' % args['subformat'] if not version: version = args['version'] ## Download as attachment is_download = False if args['download']: is_download = True # version could be either empty, or all or an integer try: int(version) except ValueError: if version != 'all': version = '' display_hidden = isUserSuperAdmin(user_info) if version != 'all': # search this filename in the complete list of files for doc in bibarchive.list_bibdocs(): if docname == bibarchive.get_docname(doc.id): try: try: docfile = doc.get_file(docformat, version) except InvenioBibDocFileError, msg: req.status = apache.HTTP_NOT_FOUND if not CFG_INSPIRE_SITE and req.headers_in.get( 'referer'): ## There must be a broken link somewhere. ## Maybe it's good to alert the admin register_exception(req=req, alert_admin=True) warn += write_warning( _("The format %s does not exist for the given version: %s" ) % (cgi.escape(docformat), cgi.escape(str(msg)))) break (auth_code, auth_message) = docfile.is_restricted(user_info) if auth_code != 0 and not is_user_owner_of_record( user_info, self.recid): if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match( get_subformat_from_format(docformat)): return stream_restricted_icon(req) if user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action( 'viewrestrdoc', {'status': docfile.get_status()}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) redirect_to_url(req, target) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning( _("This file is restricted: ") + str(auth_message)) break if not docfile.hidden_p(): if not readonly: ip = str(req.remote_ip) doc.register_download( ip, docfile.get_version(), docformat, uid, self.recid) try: return docfile.stream(req, download=is_download) except InvenioBibDocFileError, msg: register_exception(req=req, alert_admin=True) req.status = apache.HTTP_INTERNAL_SERVER_ERROR warn += write_warning( _("An error has happened in trying to stream the request file." )) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning( _("The requested file is hidden and can not be accessed." )) except InvenioBibDocFileError, msg: register_exception(req=req, alert_admin=True)
def get_files(bfo, distinguish_main_and_additional_files=True): """ Returns the files available for the given record. Returned structure is a tuple (parsed_urls, old_versions, additionals): - parsed_urls: contains categorized URLS (see details below) - old_versions: set to True if we can have access to old versions - additionals: set to True if we have other documents than the 'main' document 'parsed_urls' is a dictionary in the form: {'main_urls' : {'Main' : [('http://CFG_SITE_URL/record/1/files/aFile.pdf', 'aFile', 'PDF'), ('http://CFG_SITE_URL/record/1/files/aFile.gif', 'aFile', 'GIF')], 'Additional': [('http://CFG_SITE_URL/record/1/files/bFile.pdf', 'bFile', 'PDF')]}, 'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://externalurl.com/bFile.pdf', 'Fulltext')], 'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://cern.ch/bFile.pdf', 'Fulltext')], } Some notes about returned structure: - key 'cern_urls' is only available on CERN site - keys in main_url dictionaries are defined by the BibDoc. - older versions are not part of the parsed urls - returns only main files when possible, that is when doctypes make a distinction between 'Main' files and other files. Otherwise returns all the files as main. This is only enabled if distinguish_main_and_additional_files is set to True """ _ = gettext_set_language(bfo.lang) urls = bfo.fields("8564_") bibarchive = BibRecDocs(bfo.recID) old_versions = False # We can provide link to older files. Will be # set to True if older files are found. additionals = False # We have additional files. Will be set to # True if additional files are found. # Prepare object to return parsed_urls = {'main_urls':{}, # Urls hosted by Invenio (bibdocs) 'others_urls':[] # External urls } if CFG_CERN_SITE: parsed_urls['cern_urls'] = [] # cern.ch urls # Doctypes can of any type, but when there is one file marked as # 'Main', we consider that there is a distinction between "main" # and "additional" files. Otherwise they will all be considered # equally as main files distinct_main_and_additional_files = False if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \ distinguish_main_and_additional_files: distinct_main_and_additional_files = True # Parse URLs for complete_url in urls: if complete_url.has_key('u'): url = complete_url['u'] (dummy, host, path, dummy, params, dummy) = urlparse(url) filename = urllib.unquote(basename(path)) name = file_strip_ext(filename) format = filename[len(name):] if format.startswith('.'): format = format[1:] descr = '' if complete_url.has_key('y'): descr = complete_url['y'] if not url.startswith(CFG_SITE_URL): # Not a bibdoc? if not descr: # For not bibdoc let's have a description # Display the URL in full: descr = url if CFG_CERN_SITE and 'cern.ch' in host and \ ('/setlink?' in url or \ 'cms' in host or \ 'documents.cern.ch' in url or \ 'doc.cern.ch' in url or \ 'preprints.cern.ch' in url): url_params_dict = dict([part.split('=') for part in params.split('&') \ if len(part) == 2]) if url_params_dict.has_key('categ') and \ (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \ url_params_dict.has_key('id'): # Old arXiv links, used to be handled by # setlink. Provide direct links to arXiv for file_format, label in [('pdf', "PDF")]:#, #('ps', "PS"), #('e-print', "Source (generally TeX or LaTeX)"), #('abs', "Abstract")]: url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \ {'format': file_format, 'category': url_params_dict['categ'], 'id': url_params_dict['id']} parsed_urls['others_urls'].append((url, "%s/%s %s" % \ (url_params_dict['categ'], url_params_dict['id'], label))) else: parsed_urls['others_urls'].append((url, descr)) # external url else: # It's a bibdoc! assigned = False for doc in bibarchive.list_bibdocs(): if int(doc.get_latest_version()) > 1: old_versions = True if True in [f.fullname.startswith(filename) \ for f in doc.list_all_files()]: assigned = True #doc.getIcon() if not doc.doctype == 'Main' and \ distinct_main_and_additional_files == True: # In that case we record that there are # additional files, but don't add them to # returned structure. additionals = True else: if not descr: descr = _('Fulltext') if not parsed_urls['main_urls'].has_key(descr): parsed_urls['main_urls'][descr] = [] parsed_urls['main_urls'][descr].append((url, name, format)) if not assigned: # Url is not a bibdoc :-S if not descr: descr = filename parsed_urls['others_urls'].append((url, descr)) # Let's put it in a general other url return (parsed_urls, old_versions, additionals)
return errors, info err_desc = {1: _("More than one possible recID, ambiguous behaviour"), 2: _("No records match that file name"), 3: _("File already exists"), 4: _("A file with the same name and format already exists"), 5: _("No rights to upload to collection '%s'")} # Create directory DONE/ if doesn't exist folder = (folder[-1] == "/") and folder or (folder + "/") files_done_dir = folder + "DONE/" try: os.mkdir(files_done_dir) except OSError: # Directory exists or no write permission pass for docfile in files: if os.path.isfile(os.path.join(folder, docfile)): info[0] += 1 identifier = file_strip_ext(docfile) extension = docfile[len(identifier):] rec_id = None if identifier: rec_id = search_pattern(p=identifier, f=matching, m='e') if not rec_id: errors.append((docfile, err_desc[2])) continue elif len(rec_id) > 1: errors.append((docfile, err_desc[1])) continue else: rec_id = str(list(rec_id)[0]) rec_info = BibRecDocs(rec_id) if rec_info.bibdocs: for bibdoc in rec_info.bibdocs:
def getfile(req, form): args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd) ln = args['ln'] _ = gettext_set_language(ln) uid = getUid(req) user_info = collect_user_info(req) verbose = args['verbose'] if verbose >= 1 and not isUserSuperAdmin(user_info): # Only SuperUser can see all the details! verbose = 0 if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1: return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), navmenuid='submit') if record_exists(self.recid) < 1: msg = "<p>%s</p>" % _("Requested record does not seem to exist.") return warning_page(msg, req, ln) if record_empty(self.recid): msg = "<p>%s</p>" % _("Requested record does not seem to have been integrated.") return warning_page(msg, req, ln) (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info['email'] == 'guest': if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: return page_not_authorized(req, "../", \ text = auth_message) readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1 # From now on: either the user provided a specific file # name (and a possible version), or we return a list of # all the available files. In no case are the docids # visible. try: bibarchive = BibRecDocs(self.recid) except InvenioBibDocFileError: register_exception(req=req, alert_admin=True) msg = "<p>%s</p><p>%s</p>" % ( _("The system has encountered an error in retrieving the list of files for this document."), _("The error has been logged and will be taken in consideration as soon as possible.")) return warning_page(msg, req, ln) if bibarchive.deleted_p(): req.status = apache.HTTP_GONE return warning_page(_("Requested record does not seem to exist."), req, ln) docname = '' docformat = '' version = '' warn = '' if filename: # We know the complete file name, guess which docid it # refers to ## TODO: Change the extension system according to ext.py from setlink ## and have a uniform extension mechanism... docname = file_strip_ext(filename) docformat = filename[len(docname):] if docformat and docformat[0] != '.': docformat = '.' + docformat if args['subformat']: docformat += ';%s' % args['subformat'] else: docname = args['docname'] if not docformat: docformat = args['format'] if args['subformat']: docformat += ';%s' % args['subformat'] if not version: version = args['version'] ## Download as attachment is_download = False if args['download']: is_download = True # version could be either empty, or all or an integer try: int(version) except ValueError: if version != 'all': version = '' display_hidden = isUserSuperAdmin(user_info) if version != 'all': # search this filename in the complete list of files for doc in bibarchive.list_bibdocs(): if docname == bibarchive.get_docname(doc.id): try: try: docfile = doc.get_file(docformat, version) except InvenioBibDocFileError, msg: req.status = apache.HTTP_NOT_FOUND if req.headers_in.get('referer'): ## There must be a broken link somewhere. ## Maybe it's good to alert the admin register_exception(req=req, alert_admin=True) warn += write_warning(_("The format %s does not exist for the given version: %s") % (cgi.escape(docformat), cgi.escape(str(msg)))) break (auth_code, auth_message) = docfile.is_restricted(user_info) if auth_code != 0 and not is_user_owner_of_record(user_info, self.recid): if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(get_subformat_from_format(docformat)): return stream_restricted_icon(req) if user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action('viewrestrdoc', {'status' : docfile.get_status()}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) redirect_to_url(req, target) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning(_("This file is restricted: ") + str(auth_message)) break if not docfile.hidden_p(): if not readonly: ip = str(req.remote_ip) doc.register_download(ip, version, docformat, uid) try: return docfile.stream(req, download=is_download) except InvenioBibDocFileError, msg: register_exception(req=req, alert_admin=True) req.status = apache.HTTP_INTERNAL_SERVER_ERROR warn += write_warning(_("An error has happened in trying to stream the request file.")) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning(_("The requested file is hidden and can not be accessed.")) except InvenioBibDocFileError, msg: register_exception(req=req, alert_admin=True)
def get_files(bfo, distinguish_main_and_additional_files=True): """ Returns the files available for the given record. Returned structure is a tuple (parsed_urls, old_versions, additionals): - parsed_urls: contains categorized URLS (see details below) - old_versions: set to True if we can have access to old versions - additionals: set to True if we have other documents than the 'main' document 'parsed_urls' is a dictionary in the form: {'main_urls' : {'Main' : [('http://CFG_SITE_URL/record/1/files/aFile.pdf', 'aFile', 'PDF'), ('http://CFG_SITE_URL/record/1/files/aFile.gif', 'aFile', 'GIF')], 'Additional': [('http://CFG_SITE_URL/record/1/files/bFile.pdf', 'bFile', 'PDF')]}, 'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://externalurl.com/bFile.pdf', 'Fulltext')], 'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://cern.ch/bFile.pdf', 'Fulltext')], } Some notes about returned structure: - key 'cern_urls' is only available on CERN site - keys in main_url dictionaries are defined by the BibDoc. - older versions are not part of the parsed urls - returns only main files when possible, that is when doctypes make a distinction between 'Main' files and other files. Otherwise returns all the files as main. This is only enabled if distinguish_main_and_additional_files is set to True """ _ = gettext_set_language(bfo.lang) urls = bfo.fields("8564_") bibarchive = BibRecDocs(bfo.recID) old_versions = False # We can provide link to older files. Will be # set to True if older files are found. additionals = False # We have additional files. Will be set to # True if additional files are found. # Prepare object to return parsed_urls = { 'main_urls': {}, # Urls hosted by Invenio (bibdocs) 'others_urls': [] # External urls } if CFG_CERN_SITE: parsed_urls['cern_urls'] = [] # cern.ch urls # Doctypes can of any type, but when there is one file marked as # 'Main', we consider that there is a distinction between "main" # and "additional" files. Otherwise they will all be considered # equally as main files distinct_main_and_additional_files = False if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \ distinguish_main_and_additional_files: distinct_main_and_additional_files = True # Parse URLs for complete_url in urls: if complete_url.has_key('u'): url = complete_url['u'] (dummy, host, path, dummy, params, dummy) = urlparse(url) filename = urllib.unquote(basename(path)) name = file_strip_ext(filename) format = filename[len(name):] if format.startswith('.'): format = format[1:] descr = '' # VS change y to 3 and add z and q also if exists if complete_url.has_key('3'): if complete_url.has_key('z') and complete_url.has_key('q'): descr = complete_url['3'] + ' [' + complete_url[ 'q'] + ']' + ' [' + complete_url['z'] + ']' elif complete_url.has_key('z'): descr = complete_url['3'] + ' [' + complete_url['z'] + ']' elif complete_url.has_key('q'): descr = complete_url['3'] + ' [' + complete_url['q'] + ']' else: descr = complete_url['3'] if not url.startswith(CFG_SITE_URL): # Not a bibdoc? if not descr: # For not bibdoc let's have a description # Display the URL in full: descr = url if CFG_CERN_SITE and 'cern.ch' in host and \ ('/setlink?' in url or \ 'cms' in host or \ 'documents.cern.ch' in url or \ 'doc.cern.ch' in url or \ 'preprints.cern.ch' in url): url_params_dict = dict([part.split('=') for part in params.split('&') \ if len(part) == 2]) if url_params_dict.has_key('categ') and \ (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \ url_params_dict.has_key('id'): # Old arXiv links, used to be handled by # setlink. Provide direct links to arXiv for file_format, label in [('pdf', "PDF")]: #, #('ps', "PS"), #('e-print', "Source (generally TeX or LaTeX)"), #('abs', "Abstract")]: url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \ {'format': file_format, 'category': url_params_dict['categ'], 'id': url_params_dict['id']} parsed_urls['others_urls'].append((url, "%s/%s %s" % \ (url_params_dict['categ'], url_params_dict['id'], label))) else: parsed_urls['others_urls'].append( (url, descr)) # external url else: # It's a bibdoc! assigned = False for doc in bibarchive.list_bibdocs(): if int(doc.get_latest_version()) > 1: old_versions = True if True in [f.fullname.startswith(filename) \ for f in doc.list_all_files()]: assigned = True #doc.getIcon() if not doc.doctype == 'Main' and \ distinct_main_and_additional_files == True: # In that case we record that there are # additional files, but don't add them to # returned structure. additionals = True else: if not descr: descr = _('Fulltext') if not parsed_urls['main_urls'].has_key(descr): parsed_urls['main_urls'][descr] = [] parsed_urls['main_urls'][descr].append( (url, name, format)) if not assigned: # Url is not a bibdoc :-S if not descr: descr = filename parsed_urls['others_urls'].append( (url, descr)) # Let's put it in a general other url return (parsed_urls, old_versions, additionals)
def get_files(bfo, distinguish_main_and_additional_files=True, include_subformat_icons=False, hide_doctypes=None): """ Returns the files available for the given record. Returned structure is a tuple (parsed_urls, old_versions, additionals): - parsed_urls: contains categorized URLS (see details below) - old_versions: set to True if we can have access to old versions - additionals: set to True if we have other documents than the 'main' document Parameter 'include_subformat_icons' decides if subformat considered as icons should be returned Parameter hide_doctypes (list) decides which doctypes should not be included in the returned structure 'parsed_urls' is a dictionary in the form:: {'main_urls' : {'Main' : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'), ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')], 'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]}, 'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://externalurl.com/bFile.pdf', 'Fulltext')], 'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://cern.ch/bFile.pdf', 'Fulltext')], } Some notes about returned structure: - key 'cern_urls' is only available on CERN site - keys in main_url dictionaries are defined by the BibDoc. - older versions are not part of the parsed urls - returns only main files when possible, that is when doctypes make a distinction between 'Main' files and other files. Otherwise returns all the files as main. This is only enabled if distinguish_main_and_additional_files is set to True """ _ = gettext_set_language(bfo.lang) if hide_doctypes is None: hide_doctypes = [] urls = bfo.fields("8564_") bibarchive = BibRecDocs(bfo.recID) old_versions = False # We can provide link to older files. Will be # set to True if older files are found. additionals = False # We have additional files. Will be set to # True if additional files are found. # Prepare object to return parsed_urls = { 'main_urls': {}, # Urls hosted by Invenio (bibdocs) 'others_urls': [] # External urls } if CFG_CERN_SITE: parsed_urls['cern_urls'] = [] # cern.ch urls if [ url for url in urls if url.get('u', '').startswith('http://arxiv.org/pdf/') ]: # We have a link to arXiv PDF. We can hide the files on # CDS in some cases: hide_doctypes.append('CMSPUB_SOURCEF') hide_doctypes.append('ATLPUB_SOURCEF') hide_doctypes.append('LHCBPB_SOURCEF') # Doctypes can of any type, but when there is one file marked as # 'Main', we consider that there is a distinction between "main" # and "additional" files. Otherwise they will all be considered # equally as main files distinct_main_and_additional_files = False if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \ distinguish_main_and_additional_files: distinct_main_and_additional_files = True # Parse URLs for complete_url in urls: if complete_url.has_key('u'): url = complete_url['u'] (dummy, host, path, dummy, params, dummy) = urlparse(url) subformat = complete_url.get('x', '') filename = urllib.unquote(basename(path)) name = file_strip_ext(filename) url_format = filename[len(name):] if url_format.startswith('.'): url_format = url_format[1:] if compose_format( url_format, subformat ) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS: ## This format should be hidden. continue descr = _("Fulltext") if complete_url.has_key('y'): descr = complete_url['y'] if descr == 'Fulltext': descr = _("Fulltext") if not url.startswith(CFG_SITE_URL): # Not a bibdoc? if not descr: # For not bibdoc let's have a description # Display the URL in full: descr = url if CFG_CERN_SITE and 'cern.ch' in host and \ ('/setlink?' in url or \ 'cms' in host or \ 'documents.cern.ch' in url or \ 'doc.cern.ch' in url or \ 'preprints.cern.ch' in url): url_params_dict = dict([ part.split('=') for part in params.split('&') if len(part.split('=')) == 2 ]) if url_params_dict.has_key('categ') and \ (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \ url_params_dict.has_key('id'): # Old arXiv links, used to be handled by # setlink. Provide direct links to arXiv for file_format, label in [('pdf', "PDF")]: #, #('ps', "PS"), #('e-print', "Source (generally TeX or LaTeX)"), #('abs', "Abstract")]: url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \ {'format': file_format, 'category': url_params_dict['categ'], 'id': url_params_dict['id']} parsed_urls['others_urls'].append((url, "%s/%s %s" % \ (url_params_dict['categ'], url_params_dict['id'], label))) else: parsed_urls['others_urls'].append( (url, descr)) # external url else: # It's a bibdoc! assigned = False for doc in bibarchive.list_bibdocs(): if int(doc.get_latest_version()) > 1: old_versions = True if True in [f.get_full_name().startswith(filename) \ for f in doc.list_all_files()]: assigned = True if not include_subformat_icons and \ CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat): # This is an icon and we want to skip it continue doctype = doc.get_doctype(bfo.recID) if doctype in hide_doctypes: continue if not doctype == 'Main' and \ distinct_main_and_additional_files == True: # In that case we record that there are # additional files, but don't add them to # returned structure. additionals = True else: if not descr: descr = _('Fulltext') if not parsed_urls['main_urls'].has_key(descr): parsed_urls['main_urls'][descr] = [] params_dict = parse_qs(params) if 'subformat' in params_dict: url_format += ' (%s)' % params_dict[ 'subformat'][0] parsed_urls['main_urls'][descr].append( (url, name, url_format)) if not assigned: # Url is not a bibdoc :-S if not descr: descr = filename parsed_urls['others_urls'].append( (url, descr)) # Let's put it in a general other url return (parsed_urls, old_versions, additionals)
def get_files(bfo): """ Returns the files available for the given record. Returned structure is a tuple (parsed_urls, old_versions, additionals): """ _ = gettext_set_language(bfo.lang) try: bibarchive = BibRecDocs(bfo.recID) except ValueError: # sometimes recID is no an integer... # so don't print anything if this is the case return ([], [], []) main_documents = [] additional_documents = [] external_urls = [] user_info = bfo.user_info # before verifing access, assert that the user has a remote_ip and it is not # an internal call remote_ip = user_info.get('remote_ip', '') is_thesis = bfo.field("980__a") == 'THESIS' and bfo.field( "973__a") == 'EPFL' is_published = bfo.field("973__s") == 'PUBLISHED' # Parse URLs urls = bfo.fields("8564_") for complete_url in urls: if not complete_url.has_key('u'): continue #remove icons if complete_url.has_key('x') and complete_url['x'].lower() == 'icon': continue url = complete_url['u'] (dummy, host, path, dummy, params, dummy) = urlparse(url) filename = urllib.unquote(basename(path)) name = file_strip_ext(filename) format = filename[len(name):] if format.startswith('.'): format = format[1:] if not url.startswith(CFG_SITE_URL) and not complete_url.get( 'i'): # Not a bibdoc? descr = complete_url.get('z', 'URL') external_urls.append((url, descr, format, 0)) else: # It's a bibdoc! if complete_url.get('i') == 'EXTERNAL': filename = complete_url.get('z') or basename(complete_url['u']) if is_thesis and complete_url.get('x') == 'RESTRICTED': if not complete_url.get('z'): filename = _("Fulltext") if not remote_ip: # no real access main_documents.append( (thesis_link(bfo), filename, basename(complete_url['u']).split('.')[-1], 0)) continue if acc_authorize_action(bfo.user_info, 'viewrestrdoc', status='RESTRICTED')[0] == 1: # no real access main_documents.append( (thesis_link(bfo), filename, basename(complete_url['u']).split('.')[-1], 0)) continue is_sar = 'SAR' in bfo.fields('909C0p') if is_sar: main_documents.append( (url, _("Get the whole digitalized project"), '', 0)) continue main_documents.append( (complete_url['u'], filename, basename(complete_url['u']).split('.')[-1], 0)) else: # Internal for doc in bibarchive.list_bibdocs(): size = doc.get_total_size_latest_version() descr = doc.get_description(format) if True in [ f.fullname.startswith(filename) for f in doc.list_all_files() ]: if doc.status and doc.status.lower() == 'icon': continue restriction = doc.list_latest_files()[0].status #no ip = no access, only show the public files if not remote_ip: if restriction not in ('LAB', 'RESTRICTED', 'PRIVATE', 'DELETED'): if doc.get_type().lower() == 'main': if not descr or descr.lower() == 'n/a': descr = name if is_thesis: descr = _("Fulltext") if not url in [ m_url for (m_url, m_descr, m_format, m_size) in main_documents ]: main_documents.append( (url, descr, format, size)) else: if not descr or descr.lower() == 'n/a': descr = name if not url in [ m_url for (m_url, junk, junk, junk) in additional_documents ]: additional_documents.append( (url, descr, format, size)) continue #try: if doc.list_latest_files()[0].is_restricted( bfo.user_info)[0] == 1: continue #except: # restricted = 0 if doc.get_type().lower() == 'main': if not descr or descr.lower() == 'n/a': descr = name if is_thesis: if restriction == 'RESTRICTED': descr = _("EPFL intranet: Fulltext") else: descr = "Texte intégral / Full text" if not url in [ m_url for (m_url, m_descr, m_format, m_size) in main_documents ]: main_documents.append( (url, descr, format, size)) else: if not descr or descr.lower() == 'n/a': descr = name if is_thesis and restriction == 'RESTRICTED': descr = _("EPFL intranet: %s") % descr if not url in [ m_url for (m_url, junk, junk, junk) in additional_documents ]: additional_documents.append( (url, descr, format, size)) if is_thesis and not main_documents and is_published: main_documents.append( (thesis_link(bfo), _("Order free pdf"), 'pdf', 0)) return (main_documents, additional_documents, external_urls)