def Move_Files_Archive(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" MainDir = "%s/files/MainFiles" % curdir IncludeDir = "%s/files/AdditionalFiles" % curdir watcheddirs = {'Main' : MainDir, 'Additional' : IncludeDir} for type, dir in iteritems(watcheddirs): if os.path.exists(dir): formats = {} files = os.listdir(dir) files.sort() for file in files: dummy, filename, extension = decompose_file(file) if filename not in formats: formats[filename] = [] formats[filename].append(normalize_format(extension)) # first delete all missing files bibarchive = BibRecDocs(sysno) existingBibdocs = bibarchive.list_bibdocs(type) for existingBibdoc in existingBibdocs: if bibarchive.get_docname(existingBibdoc.id) not in formats: existingBibdoc.delete() # then create/update the new ones for key in formats.keys(): # instanciate bibdoc object bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True) return ""
def _getfile_py(req, recid=0, docid=0, version="", name="", docformat="", ln=CFG_SITE_LANG): if not recid: ## Let's obtain the recid from the docid if docid: try: bibdoc = BibDoc(docid=docid) recid = bibdoc.bibrec_links[0]["recid"] except InvenioBibDocFileError: return warning_page( _("An error has happened in trying to retrieve the requested file." ), req, ln) else: return warning_page( _('Not enough information to retrieve the document'), req, ln) else: brd = BibRecDocs(recid) if not name and docid: ## Let's obtain the name from the docid try: name = brd.get_docname(docid) except InvenioBibDocFileError: return warning_page( _("An error has happened in trying to retrieving the requested file." ), req, ln) docformat = normalize_format(docformat) redirect_to_url( req, '%s/%s/%s/files/%s%s?ln=%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln, version and 'version=%s' % version or ''), apache.HTTP_MOVED_PERMANENTLY)
def _getfile_py(req, recid=0, docid=0, version="", name="", docformat="", ln=CFG_SITE_LANG): if not recid: ## Let's obtain the recid from the docid if docid: try: bibdoc = BibDoc(docid=docid) recid = bibdoc.bibrec_links[0]["recid"] except InvenioBibDocFileError: return warning_page(_("An error has happened in trying to retrieve the requested file."), req, ln) else: return warning_page(_('Not enough information to retrieve the document'), req, ln) else: brd = BibRecDocs(recid) if not name and docid: ## Let's obtain the name from the docid try: name = brd.get_docname(docid) except InvenioBibDocFileError: return warning_page(_("An error has happened in trying to retrieving the requested file."), req, ln) docformat = normalize_format(docformat) redirect_to_url(req, '%s/%s/%s/files/%s%s?ln=%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln, version and 'version=%s' % version or ''), apache.HTTP_MOVED_PERMANENTLY)
"""BibFormat element - Prints a links to fulltext """ __revision__ = "$Id$" import re from invenio.legacy.bibdocfile.api import BibRecDocs, file_strip_ext, normalize_format, compose_format from invenio.base.i18n import gettext_set_language from invenio.config import CFG_SITE_URL, CFG_CERN_SITE, CFG_SITE_RECORD, \ CFG_BIBFORMAT_HIDDEN_FILE_FORMATS from invenio.legacy.bibdocfile.config import CFG_BIBDOCFILE_ICON_SUBFORMAT_RE from cgi import escape, parse_qs from six.moves.urllib.parse import urlparse from os.path import basename import urllib _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set(normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS) cern_arxiv_categories = ["astro-ph", "chao-dyn", "cond-mat", "gr-qc", "hep-ex", "hep-lat", "hep-ph", "hep-th", "math-ph", "math", "nucl-ex", "nucl-th", "out", "physics", "quant-ph", "q-alg", "cs", "adap-org", "comp-gas", "chem-ph", "cs", "math", "neuro-sys", "patt-sol", "solv-int", "acc-phys", "alg-geom", "ao-sci", "atom-ph", "cmp-lg", "dg-ga", "funct-an", "mtrl-th", "plasm-ph", "q-alg", "supr-con"] def format_element(bfo, style, separator='; ', show_icons='no', focus_on_main_file='no', show_subformat_icons='no'): """ This is the default format for formatting fulltext links. When possible, it returns only the main file(s) (+ link to
def get_files(bfo, distinguish_main_and_additional_files=True, include_subformat_icons=False): """ Returns the files available for the given record. Returned structure is a tuple (parsed_urls, old_versions, additionals): - parsed_urls: contains categorized URLS (see details below) - old_versions: set to True if we can have access to old versions - additionals: set to True if we have other documents than the 'main' document Parameter 'include_subformat_icons' decides if subformat considered as icons should be returned 'parsed_urls' is a dictionary in the form:: {'main_urls' : {'Main' : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'), ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')], 'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]}, 'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://externalurl.com/bFile.pdf', 'Fulltext')], 'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://cern.ch/bFile.pdf', 'Fulltext')], } Some notes about returned structure: - key 'cern_urls' is only available on CERN site - keys in main_url dictionaries are defined by the BibDoc. - older versions are not part of the parsed urls - returns only main files when possible, that is when doctypes make a distinction between 'Main' files and other files. Otherwise returns all the files as main. This is only enabled if distinguish_main_and_additional_files is set to True """ CFG_SITE_URL = current_app.config['CFG_SITE_URL'] CFG_CERN_SITE = current_app.config['CFG_CERN_SITE'] CFG_BIBFORMAT_HIDDEN_FILE_FORMATS = current_app.config['CFG_BIBFORMAT_HIDDEN_FILE_FORMATS'] _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set(normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS) _ = gettext_set_language(bfo.lang) urls = bfo.fields("8564_") bibarchive = BibRecDocs(bfo.recID) old_versions = False # We can provide link to older files. Will be # set to True if older files are found. additionals = False # We have additional files. Will be set to # True if additional files are found. # Prepare object to return parsed_urls = {'main_urls':{}, # Urls hosted by Invenio (bibdocs) 'others_urls':[] # External urls } if CFG_CERN_SITE: parsed_urls['cern_urls'] = [] # cern.ch urls # Doctypes can of any type, but when there is one file marked as # 'Main', we consider that there is a distinction between "main" # and "additional" files. Otherwise they will all be considered # equally as main files distinct_main_and_additional_files = False if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \ distinguish_main_and_additional_files: distinct_main_and_additional_files = True # Parse URLs for complete_url in urls: if complete_url.has_key('u'): url = complete_url['u'] (dummy, host, path, dummy, params, dummy) = urlparse(url) subformat = complete_url.get('x', '') filename = urllib.unquote(basename(path)) name = file_strip_ext(filename) url_format = filename[len(name):] if url_format.startswith('.'): url_format = url_format[1:] if compose_format(url_format, subformat) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS: ## This format should be hidden. continue descr = _("Fulltext") if complete_url.has_key('y'): descr = complete_url['y'] if descr == 'Fulltext': descr = _("Fulltext") if not url.startswith(CFG_SITE_URL): # Not a bibdoc? if not descr: # For not bibdoc let's have a description # Display the URL in full: descr = url if CFG_CERN_SITE and 'cern.ch' in host and \ ('/setlink?' in url or \ 'cms' in host or \ 'documents.cern.ch' in url or \ 'doc.cern.ch' in url or \ 'preprints.cern.ch' in url): url_params_dict = dict([part.split('=') for part in params.split('&') if len(part.split('=')) == 2]) if url_params_dict.has_key('categ') and \ (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \ url_params_dict.has_key('id'): # Old arXiv links, used to be handled by # setlink. Provide direct links to arXiv for file_format, label in [('pdf', "PDF")]:#, #('ps', "PS"), #('e-print', "Source (generally TeX or LaTeX)"), #('abs', "Abstract")]: url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \ {'format': file_format, 'category': url_params_dict['categ'], 'id': url_params_dict['id']} parsed_urls['others_urls'].append((url, "%s/%s %s" % \ (url_params_dict['categ'], url_params_dict['id'], label))) else: parsed_urls['others_urls'].append((url, descr)) # external url else: # It's a bibdoc! assigned = False for doc in bibarchive.list_bibdocs(): if int(doc.get_latest_version()) > 1: old_versions = True if True in [f.get_full_name().startswith(filename) \ for f in doc.list_all_files()]: assigned = True if not include_subformat_icons and \ CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat): # This is an icon and we want to skip it continue if not doc.get_doctype(bfo.recID) == 'Main' and \ distinct_main_and_additional_files == True: # In that case we record that there are # additional files, but don't add them to # returned structure. additionals = True else: if not descr: descr = _('Fulltext') if not parsed_urls['main_urls'].has_key(descr): parsed_urls['main_urls'][descr] = [] params_dict = parse_qs(params) if 'subformat' in params_dict: url_format += ' (%s)' % params_dict['subformat'][0] parsed_urls['main_urls'][descr].append((url, name, url_format)) if not assigned: # Url is not a bibdoc :-S if not descr: descr = filename parsed_urls['others_urls'].append((url, descr)) # Let's put it in a general other url return (parsed_urls, old_versions, additionals)
def get_files(bfo, distinguish_main_and_additional_files=True, include_subformat_icons=False): """ Returns the files available for the given record. Returned structure is a tuple (parsed_urls, old_versions, additionals): - parsed_urls: contains categorized URLS (see details below) - old_versions: set to True if we can have access to old versions - additionals: set to True if we have other documents than the 'main' document Parameter 'include_subformat_icons' decides if subformat considered as icons should be returned 'parsed_urls' is a dictionary in the form:: {'main_urls' : {'Main' : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'), ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')], 'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]}, 'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://externalurl.com/bFile.pdf', 'Fulltext')], 'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://cern.ch/bFile.pdf', 'Fulltext')], } Some notes about returned structure: - key 'cern_urls' is only available on CERN site - keys in main_url dictionaries are defined by the BibDoc. - older versions are not part of the parsed urls - returns only main files when possible, that is when doctypes make a distinction between 'Main' files and other files. Otherwise returns all the files as main. This is only enabled if distinguish_main_and_additional_files is set to True """ CFG_SITE_URL = current_app.config['CFG_SITE_URL'] CFG_CERN_SITE = current_app.config['CFG_CERN_SITE'] CFG_BIBFORMAT_HIDDEN_FILE_FORMATS = current_app.config[ 'CFG_BIBFORMAT_HIDDEN_FILE_FORMATS'] _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set( normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS) _ = gettext_set_language(bfo.lang) urls = bfo.fields("8564_") bibarchive = BibRecDocs(bfo.recID) old_versions = False # We can provide link to older files. Will be # set to True if older files are found. additionals = False # We have additional files. Will be set to # True if additional files are found. # Prepare object to return parsed_urls = { 'main_urls': {}, # Urls hosted by Invenio (bibdocs) 'others_urls': [] # External urls } if CFG_CERN_SITE: parsed_urls['cern_urls'] = [] # cern.ch urls # Doctypes can of any type, but when there is one file marked as # 'Main', we consider that there is a distinction between "main" # and "additional" files. Otherwise they will all be considered # equally as main files distinct_main_and_additional_files = False if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \ distinguish_main_and_additional_files: distinct_main_and_additional_files = True # Parse URLs for complete_url in urls: if complete_url.has_key('u'): url = complete_url['u'] (dummy, host, path, dummy, params, dummy) = urlparse(url) subformat = complete_url.get('x', '') filename = urllib.unquote(basename(path)) name = file_strip_ext(filename) url_format = filename[len(name):] if url_format.startswith('.'): url_format = url_format[1:] if compose_format( url_format, subformat ) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS: ## This format should be hidden. continue descr = _("Fulltext") if complete_url.has_key('y'): descr = complete_url['y'] if descr == 'Fulltext': descr = _("Fulltext") if not url.startswith(CFG_SITE_URL): # Not a bibdoc? if not descr: # For not bibdoc let's have a description # Display the URL in full: descr = url if CFG_CERN_SITE and 'cern.ch' in host and \ ('/setlink?' in url or \ 'cms' in host or \ 'documents.cern.ch' in url or \ 'doc.cern.ch' in url or \ 'preprints.cern.ch' in url): url_params_dict = dict([ part.split('=') for part in params.split('&') if len(part.split('=')) == 2 ]) if url_params_dict.has_key('categ') and \ (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \ url_params_dict.has_key('id'): # Old arXiv links, used to be handled by # setlink. Provide direct links to arXiv for file_format, label in [('pdf', "PDF")]: #, #('ps', "PS"), #('e-print', "Source (generally TeX or LaTeX)"), #('abs', "Abstract")]: url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \ {'format': file_format, 'category': url_params_dict['categ'], 'id': url_params_dict['id']} parsed_urls['others_urls'].append((url, "%s/%s %s" % \ (url_params_dict['categ'], url_params_dict['id'], label))) else: parsed_urls['others_urls'].append( (url, descr)) # external url else: # It's a bibdoc! assigned = False for doc in bibarchive.list_bibdocs(): if int(doc.get_latest_version()) > 1: old_versions = True if True in [f.get_full_name().startswith(filename) \ for f in doc.list_all_files()]: assigned = True if not include_subformat_icons and \ CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat): # This is an icon and we want to skip it continue if not doc.get_doctype(bfo.recID) == 'Main' and \ distinct_main_and_additional_files == True: # In that case we record that there are # additional files, but don't add them to # returned structure. additionals = True else: if not descr: descr = _('Fulltext') if not parsed_urls['main_urls'].has_key(descr): parsed_urls['main_urls'][descr] = [] params_dict = parse_qs(params) if 'subformat' in params_dict: url_format += ' (%s)' % params_dict[ 'subformat'][0] parsed_urls['main_urls'][descr].append( (url, name, url_format)) if not assigned: # Url is not a bibdoc :-S if not descr: descr = filename parsed_urls['others_urls'].append( (url, descr)) # Let's put it in a general other url return (parsed_urls, old_versions, additionals)
import re from invenio.legacy.bibdocfile.api import BibRecDocs, file_strip_ext, normalize_format, compose_format from invenio.base.i18n import gettext_set_language from invenio.config import CFG_SITE_URL, CFG_BASE_URL, CFG_CERN_SITE, CFG_SITE_RECORD, \ CFG_BIBFORMAT_HIDDEN_FILE_FORMATS from invenio.legacy.bibdocfile.config import CFG_BIBDOCFILE_ICON_SUBFORMAT_RE from invenio.utils.url import get_relative_url from cgi import escape, parse_qs from six.moves.urllib.parse import urlparse from os.path import basename import urllib _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set( normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS) _CFG_BIBFORMAT_HIDDEN_DOCTYPES = ['Plot'] if CFG_CERN_SITE: _CFG_BIBFORMAT_HIDDEN_DOCTYPES.append('arXiv') cern_arxiv_categories = [ "astro-ph", "chao-dyn", "cond-mat", "gr-qc", "hep-ex", "hep-lat", "hep-ph", "hep-th", "math-ph", "math", "nucl-ex", "nucl-th", "out", "physics", "quant-ph", "q-alg", "cs", "adap-org", "comp-gas", "chem-ph", "cs", "math", "neuro-sys", "patt-sol", "solv-int", "acc-phys", "alg-geom", "ao-sci", "atom-ph", "cmp-lg", "dg-ga", "funct-an", "mtrl-th", "plasm-ph", "q-alg", "supr-con" ]