def get_files(bfo, distinguish_main_and_additional_files=True, include_subformat_icons=False): """ Returns the files available for the given record. Returned structure is a tuple (parsed_urls, old_versions, additionals): - parsed_urls: contains categorized URLS (see details below) - old_versions: set to True if we can have access to old versions - additionals: set to True if we have other documents than the 'main' document Parameter 'include_subformat_icons' decides if subformat considered as icons should be returned 'parsed_urls' is a dictionary in the form:: {'main_urls' : {'Main' : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'), ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')], 'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]}, 'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://externalurl.com/bFile.pdf', 'Fulltext')], 'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://cern.ch/bFile.pdf', 'Fulltext')], } Some notes about returned structure: - key 'cern_urls' is only available on CERN site - keys in main_url dictionaries are defined by the BibDoc. - older versions are not part of the parsed urls - returns only main files when possible, that is when doctypes make a distinction between 'Main' files and other files. Otherwise returns all the files as main. This is only enabled if distinguish_main_and_additional_files is set to True """ _ = gettext_set_language(bfo.lang) urls = bfo.fields("8564_") bibarchive = BibRecDocs(bfo.recID) old_versions = False # We can provide link to older files. Will be # set to True if older files are found. additionals = False # We have additional files. Will be set to # True if additional files are found. # Prepare object to return parsed_urls = {'main_urls':{}, # Urls hosted by Invenio (bibdocs) 'others_urls':[] # External urls } if CFG_CERN_SITE: parsed_urls['cern_urls'] = [] # cern.ch urls # Doctypes can of any type, but when there is one file marked as # 'Main', we consider that there is a distinction between "main" # and "additional" files. Otherwise they will all be considered # equally as main files distinct_main_and_additional_files = False if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \ distinguish_main_and_additional_files: distinct_main_and_additional_files = True # Parse URLs for complete_url in urls: if 'u' in complete_url: url = complete_url['u'] (dummy, host, path, dummy, params, dummy) = urlparse(url) subformat = complete_url.get('x', '') filename = urllib.unquote(basename(path)) name = file_strip_ext(filename) url_format = filename[len(name):] if url_format.startswith('.'): url_format = url_format[1:] if compose_format(url_format, subformat) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS: ## This format should be hidden. continue descr = filename if 'y' in complete_url: descr = complete_url['y'] if descr == 'Fulltext': descr = _("Fulltext") if not url.startswith(CFG_SITE_URL): # Not a bibdoc? if not descr: # For not bibdoc let's have a description # Display the URL in full: descr = url if CFG_CERN_SITE and 'cern.ch' in host and \ ('/setlink?' in url or \ 'cms' in host or \ 'documents.cern.ch' in url or \ 'doc.cern.ch' in url or \ 'preprints.cern.ch' in url): url_params_dict = dict([part.split('=') for part in params.split('&') if len(part.split('=')) == 2]) if 'categ' in url_params_dict and \ (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \ 'id' in url_params_dict: # Old arXiv links, used to be handled by # setlink. Provide direct links to arXiv for file_format, label in [('pdf', "PDF")]:#, #('ps', "PS"), #('e-print', "Source (generally TeX or LaTeX)"), #('abs', "Abstract")]: url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \ {'format': file_format, 'category': url_params_dict['categ'], 'id': url_params_dict['id']} parsed_urls['others_urls'].append((url, "%s/%s %s" % \ (url_params_dict['categ'], url_params_dict['id'], label))) else: parsed_urls['others_urls'].append((url, descr)) # external url else: # It's a bibdoc! assigned = False for doc in bibarchive.list_bibdocs(): if int(doc.get_latest_version()) > 1: old_versions = True if True in [f.get_full_name().startswith(filename) \ for f in doc.list_all_files()]: assigned = True if not include_subformat_icons and \ CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat): # This is an icon and we want to skip it continue if not doc.get_doctype(bfo.recID) == 'Main' and \ distinct_main_and_additional_files == True: # In that case we record that there are # additional files, but don't add them to # returned structure. additionals = True else: if not descr: descr = _('Fulltext') if descr not in parsed_urls['main_urls']: parsed_urls['main_urls'][descr] = [] params_dict = parse_qs(params) if 'subformat' in params_dict: url_format += ' (%s)' % params_dict['subformat'][0] parsed_urls['main_urls'][descr].append((url, name, url_format)) if not assigned: # Url is not a bibdoc :-S if not descr: descr = filename parsed_urls['others_urls'].append((url, descr)) # Let's put it in a general other url return (parsed_urls, old_versions, additionals)
def document_upload(req=None, folder="", matching="", mode="", exec_date="", exec_time="", ln=CFG_SITE_LANG, priority="1", email_logs_to=None): """ Take files from the given directory and upload them with the appropiate mode. @parameters: + folder: Folder where the files to upload are stored + matching: How to match file names with record fields (report number, barcode,...) + mode: Upload mode (append, revise, replace) @return: tuple (file, error code) file: file name causing the error to notify the user error code: 1 - More than one possible recID, ambiguous behaviour 2 - No records match that file name 3 - File already exists """ import sys from invenio.legacy.bibdocfile.api import BibRecDocs, file_strip_ext from invenio.utils.hash import md5 import shutil from invenio.legacy.search_engine import perform_request_search, \ search_pattern, \ guess_collection_of_a_record _ = gettext_set_language(ln) errors = [] info = [0, []] # Number of files read, name of the files try: files = os.listdir(folder) except OSError as error: errors.append(("", error)) return errors, info err_desc = {1: _("More than one possible recID, ambiguous behaviour"), 2: _("No records match that file name"), 3: _("File already exists"), 4: _("A file with the same name and format already exists")} # Create directory DONE/ if doesn't exist folder = (folder[-1] == "/") and folder or (folder + "/") files_done_dir = folder + "DONE/" try: os.mkdir(files_done_dir) except OSError: # Directory exists or no write permission pass for docfile in files: if os.path.isfile(os.path.join(folder, docfile)): info[0] += 1 identifier = file_strip_ext(docfile) extension = docfile[len(identifier):] rec_id = None if identifier: rec_id = search_pattern(p=identifier, f=matching, m='e') if not rec_id: errors.append((docfile, err_desc[2])) continue elif len(rec_id) > 1: errors.append((docfile, err_desc[1])) continue else: rec_id = str(list(rec_id)[0]) rec_info = BibRecDocs(rec_id) if rec_info.bibdocs: for bibdoc in rec_info.bibdocs: attached_files = bibdoc.list_all_files() file_md5 = md5(open(os.path.join(folder, docfile), "rb").read()).hexdigest() num_errors = len(errors) for attached_file in attached_files: if attached_file.checksum == file_md5: errors.append((docfile, err_desc[3])) break elif attached_file.get_full_name() == docfile: errors.append((docfile, err_desc[4])) break if len(errors) > num_errors: continue # Check if user has rights to upload file if req is not None: file_collection = guess_collection_of_a_record(int(rec_id)) auth_code, auth_message = acc_authorize_action(req, 'runbatchuploader', collection=file_collection) if auth_code != 0: error_msg = _("No rights to upload to collection '%(x_name)s'", x_name=file_collection) errors.append((docfile, error_msg)) continue # Move document to be uploaded to temporary folder (fd, tmp_file) = tempfile.mkstemp(prefix=identifier + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_", suffix=extension, dir=CFG_TMPSHAREDDIR) shutil.copy(os.path.join(folder, docfile), tmp_file) # Create MARC temporary file with FFT tag and call bibupload (fd, filename) = tempfile.mkstemp(prefix=identifier + '_', dir=CFG_TMPSHAREDDIR) filedesc = os.fdopen(fd, 'w') marc_content = """ <record> <controlfield tag="001">%(rec_id)s</controlfield> <datafield tag="FFT" ind1=" " ind2=" "> <subfield code="n">%(name)s</subfield> <subfield code="a">%(path)s</subfield> </datafield> </record> """ % {'rec_id': rec_id, 'name': encode_for_xml(identifier), 'path': encode_for_xml(tmp_file), } filedesc.write(marc_content) filedesc.close() info[1].append(docfile) user = "" if req is not None: user_info = collect_user_info(req) user = user_info['nickname'] if not user: user = "******" # Execute bibupload with the appropiate mode task_arguments = ('bibupload', user, "--" + mode, "--priority=" + priority, "-N", "batchupload") if exec_date: date = '--runtime=' + "\'" + exec_date + ' ' + exec_time + "\'" task_arguments += (date, ) if email_logs_to: task_arguments += ("--email-logs-to", email_logs_to) task_arguments += (filename, ) jobid = task_low_level_submission(*task_arguments) # write batch upload history run_sql("""INSERT INTO hstBATCHUPLOAD (user, submitdate, filename, execdate, id_schTASK, batch_mode) VALUES (%s, NOW(), %s, %s, %s, "document")""", (user_info['nickname'], docfile, exec_date != "" and (exec_date + ' ' + exec_time) or time.strftime("%Y-%m-%d %H:%M:%S"), str(jobid))) # Move file to DONE folder done_filename = docfile + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_" + str(jobid) try: os.rename(os.path.join(folder, docfile), os.path.join(files_done_dir, done_filename)) except OSError: errors.append('MoveError') return errors, info
def getfile(req, form): args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd) ln = args['ln'] _ = gettext_set_language(ln) uid = getUid(req) user_info = collect_user_info(req) verbose = args['verbose'] if verbose >= 1 and not isUserSuperAdmin(user_info): # Only SuperUser can see all the details! verbose = 0 if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1: return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), navmenuid='submit') if record_exists(self.recid) < 1: msg = "<p>%s</p>" % _("Requested record does not seem to exist.") return warning_page(msg, req, ln) if record_empty(get_record(self.recid).legacy_create_recstruct()): msg = "<p>%s</p>" % _("Requested record does not seem to have been integrated.") return warning_page(msg, req, ln) (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text = auth_message) readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1 # From now on: either the user provided a specific file # name (and a possible version), or we return a list of # all the available files. In no case are the docids # visible. try: bibarchive = BibRecDocs(self.recid) except InvenioBibDocFileError: register_exception(req=req, alert_admin=True) msg = "<p>%s</p><p>%s</p>" % ( _("The system has encountered an error in retrieving the list of files for this document."), _("The error has been logged and will be taken in consideration as soon as possible.")) return warning_page(msg, req, ln) if bibarchive.deleted_p(): req.status = apache.HTTP_GONE return warning_page(_("Requested record does not seem to exist."), req, ln) docname = '' docformat = '' version = '' warn = '' if filename: # We know the complete file name, guess which docid it # refers to ## TODO: Change the extension system according to ext.py from setlink ## and have a uniform extension mechanism... docname = file_strip_ext(filename) docformat = filename[len(docname):] if docformat and docformat[0] != '.': docformat = '.' + docformat if args['subformat']: docformat += ';%s' % args['subformat'] else: docname = args['docname'] if not docformat: docformat = args['format'] if args['subformat']: docformat += ';%s' % args['subformat'] if not version: version = args['version'] ## Download as attachment is_download = False if args['download']: is_download = True # version could be either empty, or all or an integer try: int(version) except ValueError: if version != 'all': version = '' display_hidden = isUserSuperAdmin(user_info) if version != 'all': # search this filename in the complete list of files for doc in bibarchive.list_bibdocs(): if docname == bibarchive.get_docname(doc.id): try: try: docfile = doc.get_file(docformat, version) except InvenioBibDocFileError as msg: req.status = apache.HTTP_NOT_FOUND if not CFG_INSPIRE_SITE and req.headers_in.get('referer'): ## There must be a broken link somewhere. ## Maybe it's good to alert the admin register_exception(req=req, alert_admin=True) warn += write_warning(_("The format %(x_form)s does not exist for the given version: %(x_vers)s", x_form=cgi.escape(docformat), x_vers=cgi.escape(str(msg)))) break (auth_code, auth_message) = docfile.is_restricted(user_info) if auth_code != 0 and not is_user_owner_of_record(user_info, self.recid): if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(get_subformat_from_format(docformat)): return stream_restricted_icon(req) if user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action('viewrestrdoc', {'status' : docfile.get_status()}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) redirect_to_url(req, target) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning(_("This file is restricted: ") + str(auth_message)) break if not docfile.hidden_p(): if not readonly: ip = str(req.remote_ip) doc.register_download(ip, docfile.get_version(), docformat, uid, self.recid) try: return docfile.stream(req, download=is_download) except InvenioBibDocFileError as msg: register_exception(req=req, alert_admin=True) req.status = apache.HTTP_INTERNAL_SERVER_ERROR warn += write_warning(_("An error has happened in trying to stream the request file.")) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning(_("The requested file is hidden and can not be accessed.")) except InvenioBibDocFileError as msg: register_exception(req=req, alert_admin=True) if docname and docformat and not warn: req.status = apache.HTTP_NOT_FOUND warn += write_warning(_("Requested file does not seem to exist.")) # filelist = bibarchive.display("", version, ln=ln, verbose=verbose, display_hidden=display_hidden) filelist = bibdocfile_templates.tmpl_display_bibrecdocs(bibarchive, "", version, ln=ln, verbose=verbose, display_hidden=display_hidden) t = warn + bibdocfile_templates.tmpl_filelist( ln=ln, filelist=filelist) cc = guess_primary_collection_of_a_record(self.recid) cc_id = Collection.query.filter_by(name=cc).value('id') unordered_tabs = None # get_detailed_page_tabs(cc_id, self.recid, ln) ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in iteritems(unordered_tabs)] ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1])) link_ln = '' if ln != CFG_SITE_LANG: link_ln = '?ln=%s' % ln tabs = [(unordered_tabs[tab_id]['label'], '%s/%s/%s/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, tab_id, link_ln), tab_id == 'files', unordered_tabs[tab_id]['enabled']) for (tab_id, dummy_order) in ordered_tabs_id if unordered_tabs[tab_id]['visible'] is True] tabs_counts = {} # get_detailed_page_tabs_counts(self.recid) top = webstyle_templates.detailed_record_container_top(self.recid, tabs, args['ln'], citationnum=tabs_counts['Citations'], referencenum=tabs_counts['References'], discussionnum=tabs_counts['Discussions']) bottom = webstyle_templates.detailed_record_container_bottom(self.recid, tabs, args['ln']) title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, self.recid, args['ln']) return pageheaderonly(title=title, navtrail=create_navtrail_links(cc=cc, aas=0, ln=ln) + \ ''' > <a class="navtrail" href="%s/%s/%s">%s</a> > %s''' % \ (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, title, _("Access to Fulltext")), description=description, keywords=keywords, uid=uid, language=ln, req=req, navmenuid='search', navtrail_append_title_p=0) + \ websearch_templates.tmpl_search_pagestart(ln) + \ top + t + bottom + \ websearch_templates.tmpl_search_pageend(ln) + \ pagefooteronly(language=ln, req=req)
def document_upload(req=None, folder="", matching="", mode="", exec_date="", exec_time="", ln=CFG_SITE_LANG, priority="1", email_logs_to=None): """ Take files from the given directory and upload them with the appropiate mode. @parameters: + folder: Folder where the files to upload are stored + matching: How to match file names with record fields (report number, barcode,...) + mode: Upload mode (append, revise, replace) @return: tuple (file, error code) file: file name causing the error to notify the user error code: 1 - More than one possible recID, ambiguous behaviour 2 - No records match that file name 3 - File already exists """ import sys from invenio.legacy.bibdocfile.api import BibRecDocs, file_strip_ext from invenio.utils.hash import md5 import shutil from invenio.legacy.search_engine import perform_request_search, \ search_pattern, \ guess_collection_of_a_record _ = gettext_set_language(ln) errors = [] info = [0, []] # Number of files read, name of the files try: files = os.listdir(folder) except OSError as error: errors.append(("", error)) return errors, info err_desc = { 1: _("More than one possible recID, ambiguous behaviour"), 2: _("No records match that file name"), 3: _("File already exists"), 4: _("A file with the same name and format already exists") } # Create directory DONE/ if doesn't exist folder = (folder[-1] == "/") and folder or (folder + "/") files_done_dir = folder + "DONE/" try: os.mkdir(files_done_dir) except OSError: # Directory exists or no write permission pass for docfile in files: if os.path.isfile(os.path.join(folder, docfile)): info[0] += 1 identifier = file_strip_ext(docfile) extension = docfile[len(identifier):] rec_id = None if identifier: rec_id = search_pattern(p=identifier, f=matching, m='e') if not rec_id: errors.append((docfile, err_desc[2])) continue elif len(rec_id) > 1: errors.append((docfile, err_desc[1])) continue else: rec_id = str(list(rec_id)[0]) rec_info = BibRecDocs(rec_id) if rec_info.bibdocs: for bibdoc in rec_info.bibdocs: attached_files = bibdoc.list_all_files() file_md5 = md5( open(os.path.join(folder, docfile), "rb").read()).hexdigest() num_errors = len(errors) for attached_file in attached_files: if attached_file.checksum == file_md5: errors.append((docfile, err_desc[3])) break elif attached_file.get_full_name() == docfile: errors.append((docfile, err_desc[4])) break if len(errors) > num_errors: continue # Check if user has rights to upload file if req is not None: file_collection = guess_collection_of_a_record(int(rec_id)) auth_code, auth_message = acc_authorize_action( req, 'runbatchuploader', collection=file_collection) if auth_code != 0: error_msg = _( "No rights to upload to collection '%(x_name)s'", x_name=file_collection) errors.append((docfile, error_msg)) continue # Move document to be uploaded to temporary folder (fd, tmp_file) = tempfile.mkstemp( prefix=identifier + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_", suffix=extension, dir=CFG_TMPSHAREDDIR) shutil.copy(os.path.join(folder, docfile), tmp_file) # Create MARC temporary file with FFT tag and call bibupload (fd, filename) = tempfile.mkstemp(prefix=identifier + '_', dir=CFG_TMPSHAREDDIR) filedesc = os.fdopen(fd, 'w') marc_content = """ <record> <controlfield tag="001">%(rec_id)s</controlfield> <datafield tag="FFT" ind1=" " ind2=" "> <subfield code="n">%(name)s</subfield> <subfield code="a">%(path)s</subfield> </datafield> </record> """ % { 'rec_id': rec_id, 'name': encode_for_xml(identifier), 'path': encode_for_xml(tmp_file), } filedesc.write(marc_content) filedesc.close() info[1].append(docfile) user = "" if req is not None: user_info = collect_user_info(req) user = user_info['nickname'] if not user: user = "******" # Execute bibupload with the appropiate mode task_arguments = ('bibupload', user, "--" + mode, "--priority=" + priority, "-N", "batchupload") if exec_date: date = '--runtime=' + "\'" + exec_date + ' ' + exec_time + "\'" task_arguments += (date, ) if email_logs_to: task_arguments += ("--email-logs-to", email_logs_to) task_arguments += (filename, ) jobid = task_low_level_submission(*task_arguments) # write batch upload history run_sql( """INSERT INTO hstBATCHUPLOAD (user, submitdate, filename, execdate, id_schTASK, batch_mode) VALUES (%s, NOW(), %s, %s, %s, "document")""", (user_info['nickname'], docfile, exec_date != "" and (exec_date + ' ' + exec_time) or time.strftime("%Y-%m-%d %H:%M:%S"), str(jobid))) # Move file to DONE folder done_filename = docfile + "_" + time.strftime( "%Y%m%d%H%M%S", time.localtime()) + "_" + str(jobid) try: os.rename(os.path.join(folder, docfile), os.path.join(files_done_dir, done_filename)) except OSError: errors.append('MoveError') return errors, info
def getfile(req, form): args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd) ln = args['ln'] _ = gettext_set_language(ln) uid = getUid(req) user_info = collect_user_info(req) verbose = args['verbose'] if verbose >= 1 and not isUserSuperAdmin(user_info): # Only SuperUser can see all the details! verbose = 0 if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1: return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), navmenuid='submit') if record_exists(self.recid) < 1: msg = "<p>%s</p>" % _( "Requested record does not seem to exist.") return warning_page(msg, req, ln) if record_empty(self.recid): msg = "<p>%s</p>" % _( "Requested record does not seem to have been integrated.") return warning_page(msg, req, ln) (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info['email'] == 'guest': if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, { 'collection': guess_primary_collection_of_a_record(self.recid) }) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: return page_not_authorized(req, "../", \ text = auth_message) readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1 # From now on: either the user provided a specific file # name (and a possible version), or we return a list of # all the available files. In no case are the docids # visible. try: bibarchive = BibRecDocs(self.recid) except InvenioBibDocFileError: register_exception(req=req, alert_admin=True) msg = "<p>%s</p><p>%s</p>" % ( _("The system has encountered an error in retrieving the list of files for this document." ), _("The error has been logged and will be taken in consideration as soon as possible." )) return warning_page(msg, req, ln) if bibarchive.deleted_p(): req.status = apache.HTTP_GONE return warning_page( _("Requested record does not seem to exist."), req, ln) docname = '' docformat = '' version = '' warn = '' if filename: # We know the complete file name, guess which docid it # refers to ## TODO: Change the extension system according to ext.py from setlink ## and have a uniform extension mechanism... docname = file_strip_ext(filename) docformat = filename[len(docname):] if docformat and docformat[0] != '.': docformat = '.' + docformat if args['subformat']: docformat += ';%s' % args['subformat'] else: docname = args['docname'] if not docformat: docformat = args['format'] if args['subformat']: docformat += ';%s' % args['subformat'] if not version: version = args['version'] ## Download as attachment is_download = False if args['download']: is_download = True # version could be either empty, or all or an integer try: int(version) except ValueError: if version != 'all': version = '' display_hidden = isUserSuperAdmin(user_info) if version != 'all': # search this filename in the complete list of files for doc in bibarchive.list_bibdocs(): if docname == bibarchive.get_docname(doc.id): try: try: docfile = doc.get_file(docformat, version) except InvenioBibDocFileError as msg: req.status = apache.HTTP_NOT_FOUND if not CFG_INSPIRE_SITE and req.headers_in.get( 'referer'): ## There must be a broken link somewhere. ## Maybe it's good to alert the admin register_exception(req=req, alert_admin=True) warn += write_warning( _("The format %(x_form)s does not exist for the given version: %(x_vers)s", x_form=cgi.escape(docformat), x_vers=cgi.escape(str(msg)))) break (auth_code, auth_message) = docfile.is_restricted(user_info) if auth_code != 0 and not is_user_owner_of_record( user_info, self.recid): if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match( get_subformat_from_format(docformat)): return stream_restricted_icon(req) if user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action( 'viewrestrdoc', {'status': docfile.get_status()}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) redirect_to_url(req, target) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning( _("This file is restricted: ") + str(auth_message)) break if not docfile.hidden_p(): if not readonly: ip = str(req.remote_ip) doc.register_download( ip, docfile.get_version(), docformat, uid, self.recid) try: return docfile.stream(req, download=is_download) except InvenioBibDocFileError as msg: register_exception(req=req, alert_admin=True) req.status = apache.HTTP_INTERNAL_SERVER_ERROR warn += write_warning( _("An error has happened in trying to stream the request file." )) else: req.status = apache.HTTP_UNAUTHORIZED warn += write_warning( _("The requested file is hidden and can not be accessed." )) except InvenioBibDocFileError as msg: register_exception(req=req, alert_admin=True) # Prevent leaking of restricted file names req.status = apache.HTTP_NOT_FOUND return if docname and docformat and not warn: req.status = apache.HTTP_NOT_FOUND warn += write_warning( _("Requested file does not seem to exist.")) # filelist = bibarchive.display("", version, ln=ln, verbose=verbose, display_hidden=display_hidden) filelist = bibdocfile_templates.tmpl_display_bibrecdocs( bibarchive, "", version, ln=ln, verbose=verbose, display_hidden=display_hidden) t = warn + bibdocfile_templates.tmpl_filelist(ln=ln, filelist=filelist) cc = guess_primary_collection_of_a_record(self.recid) unordered_tabs = get_detailed_page_tabs(get_colID(cc), self.recid, ln) ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in iteritems(unordered_tabs)] ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1])) link_ln = '' if ln != CFG_SITE_LANG: link_ln = '?ln=%s' % ln tabs = [ (unordered_tabs[tab_id]['label'], '%s/%s/%s/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, tab_id, link_ln), tab_id == 'files', unordered_tabs[tab_id]['enabled']) for (tab_id, dummy_order) in ordered_tabs_id if unordered_tabs[tab_id]['visible'] is True ] tabs_counts = get_detailed_page_tabs_counts(self.recid) top = webstyle_templates.detailed_record_container_top( self.recid, tabs, args['ln'], citationnum=tabs_counts['Citations'], referencenum=tabs_counts['References'], discussionnum=tabs_counts['Discussions']) bottom = webstyle_templates.detailed_record_container_bottom( self.recid, tabs, args['ln']) title, description, keywords = websearch_templates.tmpl_record_page_header_content( req, self.recid, args['ln']) return pageheaderonly(title=title, navtrail=create_navtrail_links(cc=cc, aas=0, ln=ln) + \ ''' > <a class="navtrail" href="%s/%s/%s">%s</a> > %s''' % \ (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, title, _("Access to Fulltext")), description=description, keywords=keywords, uid=uid, language=ln, req=req, navmenuid='search', navtrail_append_title_p=0) + \ websearch_templates.tmpl_search_pagestart(ln) + \ top + t + bottom + \ websearch_templates.tmpl_search_pageend(ln) + \ pagefooteronly(language=ln, req=req)
def get_files(bfo, distinguish_main_and_additional_files=True, include_subformat_icons=False): """ Returns the files available for the given record. Returned structure is a tuple (parsed_urls, old_versions, additionals): - parsed_urls: contains categorized URLS (see details below) - old_versions: set to True if we can have access to old versions - additionals: set to True if we have other documents than the 'main' document Parameter 'include_subformat_icons' decides if subformat considered as icons should be returned 'parsed_urls' is a dictionary in the form:: {'main_urls' : {'Main' : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'), ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')], 'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]}, 'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://externalurl.com/bFile.pdf', 'Fulltext')], 'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://cern.ch/bFile.pdf', 'Fulltext')], } Some notes about returned structure: - key 'cern_urls' is only available on CERN site - keys in main_url dictionaries are defined by the BibDoc. - older versions are not part of the parsed urls - returns only main files when possible, that is when doctypes make a distinction between 'Main' files and other files. Otherwise returns all the files as main. This is only enabled if distinguish_main_and_additional_files is set to True """ CFG_SITE_URL = current_app.config['CFG_SITE_URL'] CFG_CERN_SITE = current_app.config['CFG_CERN_SITE'] CFG_BIBFORMAT_HIDDEN_FILE_FORMATS = current_app.config[ 'CFG_BIBFORMAT_HIDDEN_FILE_FORMATS'] _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set( normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS) _ = gettext_set_language(bfo.lang) urls = bfo.fields("8564_") bibarchive = BibRecDocs(bfo.recID) old_versions = False # We can provide link to older files. Will be # set to True if older files are found. additionals = False # We have additional files. Will be set to # True if additional files are found. # Prepare object to return parsed_urls = { 'main_urls': {}, # Urls hosted by Invenio (bibdocs) 'others_urls': [] # External urls } if CFG_CERN_SITE: parsed_urls['cern_urls'] = [] # cern.ch urls # Doctypes can of any type, but when there is one file marked as # 'Main', we consider that there is a distinction between "main" # and "additional" files. Otherwise they will all be considered # equally as main files distinct_main_and_additional_files = False if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \ distinguish_main_and_additional_files: distinct_main_and_additional_files = True # Parse URLs for complete_url in urls: if complete_url.has_key('u'): url = complete_url['u'] (dummy, host, path, dummy, params, dummy) = urlparse(url) subformat = complete_url.get('x', '') filename = urllib.unquote(basename(path)) name = file_strip_ext(filename) url_format = filename[len(name):] if url_format.startswith('.'): url_format = url_format[1:] if compose_format( url_format, subformat ) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS: ## This format should be hidden. continue descr = _("Fulltext") if complete_url.has_key('y'): descr = complete_url['y'] if descr == 'Fulltext': descr = _("Fulltext") if not url.startswith(CFG_SITE_URL): # Not a bibdoc? if not descr: # For not bibdoc let's have a description # Display the URL in full: descr = url if CFG_CERN_SITE and 'cern.ch' in host and \ ('/setlink?' in url or \ 'cms' in host or \ 'documents.cern.ch' in url or \ 'doc.cern.ch' in url or \ 'preprints.cern.ch' in url): url_params_dict = dict([ part.split('=') for part in params.split('&') if len(part.split('=')) == 2 ]) if url_params_dict.has_key('categ') and \ (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \ url_params_dict.has_key('id'): # Old arXiv links, used to be handled by # setlink. Provide direct links to arXiv for file_format, label in [('pdf', "PDF")]: #, #('ps', "PS"), #('e-print', "Source (generally TeX or LaTeX)"), #('abs', "Abstract")]: url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \ {'format': file_format, 'category': url_params_dict['categ'], 'id': url_params_dict['id']} parsed_urls['others_urls'].append((url, "%s/%s %s" % \ (url_params_dict['categ'], url_params_dict['id'], label))) else: parsed_urls['others_urls'].append( (url, descr)) # external url else: # It's a bibdoc! assigned = False for doc in bibarchive.list_bibdocs(): if int(doc.get_latest_version()) > 1: old_versions = True if True in [f.get_full_name().startswith(filename) \ for f in doc.list_all_files()]: assigned = True if not include_subformat_icons and \ CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat): # This is an icon and we want to skip it continue if not doc.get_doctype(bfo.recID) == 'Main' and \ distinct_main_and_additional_files == True: # In that case we record that there are # additional files, but don't add them to # returned structure. additionals = True else: if not descr: descr = _('Fulltext') if not parsed_urls['main_urls'].has_key(descr): parsed_urls['main_urls'][descr] = [] params_dict = parse_qs(params) if 'subformat' in params_dict: url_format += ' (%s)' % params_dict[ 'subformat'][0] parsed_urls['main_urls'][descr].append( (url, name, url_format)) if not assigned: # Url is not a bibdoc :-S if not descr: descr = filename parsed_urls['others_urls'].append( (url, descr)) # Let's put it in a general other url return (parsed_urls, old_versions, additionals)