def getfile(req, form): args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd) ln = args['ln'] _ = gettext_set_language(ln) uid = getUid(req) user_info = collect_user_info(req) verbose = args['verbose'] if verbose >= 1 and not isUserSuperAdmin(user_info): # Only SuperUser can see all the details! verbose = 0 if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1: return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), navmenuid='submit') if record_exists(self.recid) < 1: msg = "<p>%s</p>" % _( "Requested record does not seem to exist.") return warning_page(msg, req, ln) if record_empty(self.recid): msg = "<p>%s</p>" % _( "Requested record does not seem to have been integrated.") return warning_page(msg, req, ln) (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info['email'] == 'guest': if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, { 'collection': guess_primary_collection_of_a_record(self.recid) }) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: return page_not_authorized(req, "../", \ text = auth_message) readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1 # From now on: either the user provided a specific file # name (and a possible version), or we return a list of # all the available files. In no case are the docids # visible. try: bibarchive = BibRecDocs(self.recid) except InvenioBibDocFileError: register_exception(req=req, alert_admin=True) msg = "<p>%s</p><p>%s</p>" % ( _("The system has encountered an error in retrieving the list of files for this document." ), _("The error has been logged and will be taken in consideration as soon as possible." )) return warning_page(msg, req, ln) if bibarchive.deleted_p(): req.status = apache.HTTP_GONE return warning_page( _("Requested record does not seem to exist."), req, ln) docname = '' docformat = '' version = '' warn = '' if filename: # We know the complete file name, guess which docid it # refers to ## TODO: Change the extension system according to ext.py from setlink ## and have a uniform extension mechanism... docname = file_strip_ext(filename) docformat = filename[len(docname):] if docformat and docformat[0] != '.': docformat = '.' + docformat if args['subformat']: docformat += ';%s' % args['subformat'] else: docname = args['docname'] if not docformat: docformat = args['format'] if args['subformat']: docformat += ';%s' % args['subformat'] if not version: version = args['version'] ## Download as attachment is_download = False if args['download']: is_download = True # version could be either empty, or all or an integer try: int(version) except ValueError: if version != 'all': version = '' display_hidden = isUserSuperAdmin(user_info) if version != 'all': # search this filename in the complete list of files for doc in bibarchive.list_bibdocs(): if docname == bibarchive.get_docname(doc.id): try: try: docfile = doc.get_file(docformat, version) except InvenioBibDocFileError, msg: req.status = apache.HTTP_NOT_FOUND if not CFG_INSPIRE_SITE and req.headers_in.get( 'referer'): ## There must be a broken link somewhere. ## Maybe it's good to alert the admin register_exception(req=req, alert_admin=True) warn += write_warning( _("The format %s does not exist for the given version: %s" ) % (cgi.escape(docformat), cgi.escape(str(msg)))) break (auth_code, auth_message) = docfile.is_restricted(user_info) if auth_code != 0 and not is_user_owner_of_record( user_info, self.recid): if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match( get_subformat_from_format(docformat)): return stream_restricted_icon(req) if user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action( 'viewrestrdoc', {'status': docfile.get_status()}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) redirect_to_url(req, target) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning( _("This file is restricted: ") + str(auth_message)) break if not docfile.hidden_p(): if not readonly: ip = str(req.remote_ip) doc.register_download( ip, docfile.get_version(), docformat, uid, self.recid) try: return docfile.stream(req, download=is_download) except InvenioBibDocFileError, msg: register_exception(req=req, alert_admin=True) req.status = apache.HTTP_INTERNAL_SERVER_ERROR warn += write_warning( _("An error has happened in trying to stream the request file." )) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning( _("The requested file is hidden and can not be accessed." )) except InvenioBibDocFileError, msg: register_exception(req=req, alert_admin=True)
def getfile(req, form): args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd) ln = args['ln'] _ = gettext_set_language(ln) uid = getUid(req) user_info = collect_user_info(req) verbose = args['verbose'] if verbose >= 1 and not isUserSuperAdmin(user_info): # Only SuperUser can see all the details! verbose = 0 if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1: return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), navmenuid='submit') if record_exists(self.recid) < 1: msg = "<p>%s</p>" % _("Requested record does not seem to exist.") return warning_page(msg, req, ln) if record_empty(self.recid): msg = "<p>%s</p>" % _("Requested record does not seem to have been integrated.") return warning_page(msg, req, ln) (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info['email'] == 'guest': if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: return page_not_authorized(req, "../", \ text = auth_message) readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1 # From now on: either the user provided a specific file # name (and a possible version), or we return a list of # all the available files. In no case are the docids # visible. try: bibarchive = BibRecDocs(self.recid) except InvenioBibDocFileError: register_exception(req=req, alert_admin=True) msg = "<p>%s</p><p>%s</p>" % ( _("The system has encountered an error in retrieving the list of files for this document."), _("The error has been logged and will be taken in consideration as soon as possible.")) return warning_page(msg, req, ln) if bibarchive.deleted_p(): req.status = apache.HTTP_GONE return warning_page(_("Requested record does not seem to exist."), req, ln) docname = '' docformat = '' version = '' warn = '' if filename: # We know the complete file name, guess which docid it # refers to ## TODO: Change the extension system according to ext.py from setlink ## and have a uniform extension mechanism... docname = file_strip_ext(filename) docformat = filename[len(docname):] if docformat and docformat[0] != '.': docformat = '.' + docformat if args['subformat']: docformat += ';%s' % args['subformat'] else: docname = args['docname'] if not docformat: docformat = args['format'] if args['subformat']: docformat += ';%s' % args['subformat'] if not version: version = args['version'] ## Download as attachment is_download = False if args['download']: is_download = True # version could be either empty, or all or an integer try: int(version) except ValueError: if version != 'all': version = '' display_hidden = isUserSuperAdmin(user_info) if version != 'all': # search this filename in the complete list of files for doc in bibarchive.list_bibdocs(): if docname == bibarchive.get_docname(doc.id): try: try: docfile = doc.get_file(docformat, version) except InvenioBibDocFileError, msg: req.status = apache.HTTP_NOT_FOUND if req.headers_in.get('referer'): ## There must be a broken link somewhere. ## Maybe it's good to alert the admin register_exception(req=req, alert_admin=True) warn += write_warning(_("The format %s does not exist for the given version: %s") % (cgi.escape(docformat), cgi.escape(str(msg)))) break (auth_code, auth_message) = docfile.is_restricted(user_info) if auth_code != 0 and not is_user_owner_of_record(user_info, self.recid): if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(get_subformat_from_format(docformat)): return stream_restricted_icon(req) if user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action('viewrestrdoc', {'status' : docfile.get_status()}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) redirect_to_url(req, target) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning(_("This file is restricted: ") + str(auth_message)) break if not docfile.hidden_p(): if not readonly: ip = str(req.remote_ip) doc.register_download(ip, version, docformat, uid) try: return docfile.stream(req, download=is_download) except InvenioBibDocFileError, msg: register_exception(req=req, alert_admin=True) req.status = apache.HTTP_INTERNAL_SERVER_ERROR warn += write_warning(_("An error has happened in trying to stream the request file.")) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning(_("The requested file is hidden and can not be accessed.")) except InvenioBibDocFileError, msg: register_exception(req=req, alert_admin=True)
def get_files(bfo, distinguish_main_and_additional_files=True, include_subformat_icons=False): """ Returns the files available for the given record. Returned structure is a tuple (parsed_urls, old_versions, additionals): - parsed_urls: contains categorized URLS (see details below) - old_versions: set to True if we can have access to old versions - additionals: set to True if we have other documents than the 'main' document Parameter 'include_subformat_icons' decides if subformat considered as icons should be returned 'parsed_urls' is a dictionary in the form:: {'main_urls' : {'Main' : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'), ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')], 'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]}, 'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://externalurl.com/bFile.pdf', 'Fulltext')], 'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://cern.ch/bFile.pdf', 'Fulltext')], } Some notes about returned structure: - key 'cern_urls' is only available on CERN site - keys in main_url dictionaries are defined by the BibDoc. - older versions are not part of the parsed urls - returns only main files when possible, that is when doctypes make a distinction between 'Main' files and other files. Otherwise returns all the files as main. This is only enabled if distinguish_main_and_additional_files is set to True """ _ = gettext_set_language(bfo.lang) urls = bfo.fields("8564_") bibarchive = BibRecDocs(bfo.recID) old_versions = False # We can provide link to older files. Will be # set to True if older files are found. additionals = False # We have additional files. Will be set to # True if additional files are found. # Prepare object to return parsed_urls = {'main_urls':{}, # Urls hosted by Invenio (bibdocs) 'others_urls':[] # External urls } if CFG_CERN_SITE: parsed_urls['cern_urls'] = [] # cern.ch urls # Doctypes can of any type, but when there is one file marked as # 'Main', we consider that there is a distinction between "main" # and "additional" files. Otherwise they will all be considered # equally as main files distinct_main_and_additional_files = False if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \ distinguish_main_and_additional_files: distinct_main_and_additional_files = True # Parse URLs for complete_url in urls: if complete_url.has_key('u'): url = complete_url['u'] (dummy, host, path, dummy, params, dummy) = urlparse(url) subformat = complete_url.get('x', '') filename = urllib.unquote(basename(path)) name = file_strip_ext(filename) url_format = filename[len(name):] if url_format.startswith('.'): url_format = url_format[1:] if compose_format(url_format, subformat) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS: ## This format should be hidden. continue descr = _("Fulltext") if complete_url.has_key('y'): descr = complete_url['y'] if descr == 'Fulltext': descr = _("Fulltext") if not url.startswith(CFG_SITE_URL): # Not a bibdoc? if not descr: # For not bibdoc let's have a description # Display the URL in full: descr = url if CFG_CERN_SITE and 'cern.ch' in host and \ ('/setlink?' in url or \ 'cms' in host or \ 'documents.cern.ch' in url or \ 'doc.cern.ch' in url or \ 'preprints.cern.ch' in url): url_params_dict = dict([part.split('=') for part in params.split('&') if len(part.split('=')) == 2]) if url_params_dict.has_key('categ') and \ (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \ url_params_dict.has_key('id'): # Old arXiv links, used to be handled by # setlink. Provide direct links to arXiv for file_format, label in [('pdf', "PDF")]:#, #('ps', "PS"), #('e-print', "Source (generally TeX or LaTeX)"), #('abs', "Abstract")]: url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \ {'format': file_format, 'category': url_params_dict['categ'], 'id': url_params_dict['id']} parsed_urls['others_urls'].append((url, "%s/%s %s" % \ (url_params_dict['categ'], url_params_dict['id'], label))) else: parsed_urls['others_urls'].append((url, descr)) # external url else: # It's a bibdoc! assigned = False for doc in bibarchive.list_bibdocs(): if int(doc.get_latest_version()) > 1: old_versions = True if True in [f.get_full_name().startswith(filename) \ for f in doc.list_all_files()]: assigned = True if not include_subformat_icons and \ CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat): # This is an icon and we want to skip it continue if not doc.get_doctype(bfo.recID) == 'Main' and \ distinct_main_and_additional_files == True: # In that case we record that there are # additional files, but don't add them to # returned structure. additionals = True else: if not descr: descr = _('Fulltext') if not parsed_urls['main_urls'].has_key(descr): parsed_urls['main_urls'][descr] = [] params_dict = parse_qs(params) if 'subformat' in params_dict: url_format += ' (%s)' % params_dict['subformat'][0] parsed_urls['main_urls'][descr].append((url, name, url_format)) if not assigned: # Url is not a bibdoc :-S if not descr: descr = filename parsed_urls['others_urls'].append((url, descr)) # Let's put it in a general other url return (parsed_urls, old_versions, additionals)
def get_files(bfo, distinguish_main_and_additional_files=True, include_subformat_icons=False, hide_doctypes=None): """ Returns the files available for the given record. Returned structure is a tuple (parsed_urls, old_versions, additionals): - parsed_urls: contains categorized URLS (see details below) - old_versions: set to True if we can have access to old versions - additionals: set to True if we have other documents than the 'main' document Parameter 'include_subformat_icons' decides if subformat considered as icons should be returned Parameter hide_doctypes (list) decides which doctypes should not be included in the returned structure 'parsed_urls' is a dictionary in the form:: {'main_urls' : {'Main' : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'), ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')], 'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]}, 'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://externalurl.com/bFile.pdf', 'Fulltext')], 'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://cern.ch/bFile.pdf', 'Fulltext')], } Some notes about returned structure: - key 'cern_urls' is only available on CERN site - keys in main_url dictionaries are defined by the BibDoc. - older versions are not part of the parsed urls - returns only main files when possible, that is when doctypes make a distinction between 'Main' files and other files. Otherwise returns all the files as main. This is only enabled if distinguish_main_and_additional_files is set to True """ _ = gettext_set_language(bfo.lang) if hide_doctypes is None: hide_doctypes = [] urls = bfo.fields("8564_") bibarchive = BibRecDocs(bfo.recID) old_versions = False # We can provide link to older files. Will be # set to True if older files are found. additionals = False # We have additional files. Will be set to # True if additional files are found. # Prepare object to return parsed_urls = { 'main_urls': {}, # Urls hosted by Invenio (bibdocs) 'others_urls': [] # External urls } if CFG_CERN_SITE: parsed_urls['cern_urls'] = [] # cern.ch urls if [ url for url in urls if url.get('u', '').startswith('http://arxiv.org/pdf/') ]: # We have a link to arXiv PDF. We can hide the files on # CDS in some cases: hide_doctypes.append('CMSPUB_SOURCEF') hide_doctypes.append('ATLPUB_SOURCEF') hide_doctypes.append('LHCBPB_SOURCEF') # Doctypes can of any type, but when there is one file marked as # 'Main', we consider that there is a distinction between "main" # and "additional" files. Otherwise they will all be considered # equally as main files distinct_main_and_additional_files = False if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \ distinguish_main_and_additional_files: distinct_main_and_additional_files = True # Parse URLs for complete_url in urls: if complete_url.has_key('u'): url = complete_url['u'] (dummy, host, path, dummy, params, dummy) = urlparse(url) subformat = complete_url.get('x', '') filename = urllib.unquote(basename(path)) name = file_strip_ext(filename) url_format = filename[len(name):] if url_format.startswith('.'): url_format = url_format[1:] if compose_format( url_format, subformat ) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS: ## This format should be hidden. continue descr = _("Fulltext") if complete_url.has_key('y'): descr = complete_url['y'] if descr == 'Fulltext': descr = _("Fulltext") if not url.startswith(CFG_SITE_URL): # Not a bibdoc? if not descr: # For not bibdoc let's have a description # Display the URL in full: descr = url if CFG_CERN_SITE and 'cern.ch' in host and \ ('/setlink?' in url or \ 'cms' in host or \ 'documents.cern.ch' in url or \ 'doc.cern.ch' in url or \ 'preprints.cern.ch' in url): url_params_dict = dict([ part.split('=') for part in params.split('&') if len(part.split('=')) == 2 ]) if url_params_dict.has_key('categ') and \ (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \ url_params_dict.has_key('id'): # Old arXiv links, used to be handled by # setlink. Provide direct links to arXiv for file_format, label in [('pdf', "PDF")]: #, #('ps', "PS"), #('e-print', "Source (generally TeX or LaTeX)"), #('abs', "Abstract")]: url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \ {'format': file_format, 'category': url_params_dict['categ'], 'id': url_params_dict['id']} parsed_urls['others_urls'].append((url, "%s/%s %s" % \ (url_params_dict['categ'], url_params_dict['id'], label))) else: parsed_urls['others_urls'].append( (url, descr)) # external url else: # It's a bibdoc! assigned = False for doc in bibarchive.list_bibdocs(): if int(doc.get_latest_version()) > 1: old_versions = True if True in [f.get_full_name().startswith(filename) \ for f in doc.list_all_files()]: assigned = True if not include_subformat_icons and \ CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat): # This is an icon and we want to skip it continue doctype = doc.get_doctype(bfo.recID) if doctype in hide_doctypes: continue if not doctype == 'Main' and \ distinct_main_and_additional_files == True: # In that case we record that there are # additional files, but don't add them to # returned structure. additionals = True else: if not descr: descr = _('Fulltext') if not parsed_urls['main_urls'].has_key(descr): parsed_urls['main_urls'][descr] = [] params_dict = parse_qs(params) if 'subformat' in params_dict: url_format += ' (%s)' % params_dict[ 'subformat'][0] parsed_urls['main_urls'][descr].append( (url, name, url_format)) if not assigned: # Url is not a bibdoc :-S if not descr: descr = filename parsed_urls['others_urls'].append( (url, descr)) # Let's put it in a general other url return (parsed_urls, old_versions, additionals)