Esempio n. 1
0
def Move_Files_Archive(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    MainDir = "%s/files/MainFiles" % curdir
    IncludeDir = "%s/files/AdditionalFiles" % curdir
    watcheddirs = {'Main' : MainDir, 'Additional' : IncludeDir}
    for type, dir in iteritems(watcheddirs):
        if os.path.exists(dir):
            formats = {}
            files = os.listdir(dir)
            files.sort()
            for file in files:
                dummy, filename, extension = decompose_file(file)
                if filename not in formats:
                    formats[filename] = []
                formats[filename].append(normalize_format(extension))
            # first delete all missing files
            bibarchive = BibRecDocs(sysno)
            existingBibdocs = bibarchive.list_bibdocs(type)
            for existingBibdoc in existingBibdocs:
                if bibarchive.get_docname(existingBibdoc.id) not in formats:
                    existingBibdoc.delete()
            # then create/update the new ones
            for key in formats.keys():
                # instanciate bibdoc object
                bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True)
    return ""
Esempio n. 2
0
    def _getfile_py(req,
                    recid=0,
                    docid=0,
                    version="",
                    name="",
                    docformat="",
                    ln=CFG_SITE_LANG):
        if not recid:
            ## Let's obtain the recid from the docid
            if docid:
                try:
                    bibdoc = BibDoc(docid=docid)
                    recid = bibdoc.bibrec_links[0]["recid"]
                except InvenioBibDocFileError:
                    return warning_page(
                        _("An error has happened in trying to retrieve the requested file."
                          ), req, ln)
            else:
                return warning_page(
                    _('Not enough information to retrieve the document'), req,
                    ln)
        else:
            brd = BibRecDocs(recid)
            if not name and docid:
                ## Let's obtain the name from the docid
                try:
                    name = brd.get_docname(docid)
                except InvenioBibDocFileError:
                    return warning_page(
                        _("An error has happened in trying to retrieving the requested file."
                          ), req, ln)

        docformat = normalize_format(docformat)

        redirect_to_url(
            req, '%s/%s/%s/files/%s%s?ln=%s%s' %
            (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln,
             version and 'version=%s' % version or ''),
            apache.HTTP_MOVED_PERMANENTLY)
Esempio n. 3
0
    def _getfile_py(req, recid=0, docid=0, version="", name="", docformat="", ln=CFG_SITE_LANG):
        if not recid:
            ## Let's obtain the recid from the docid
            if docid:
                try:
                    bibdoc = BibDoc(docid=docid)
                    recid = bibdoc.bibrec_links[0]["recid"]
                except InvenioBibDocFileError:
                    return warning_page(_("An error has happened in trying to retrieve the requested file."), req, ln)
            else:
                return warning_page(_('Not enough information to retrieve the document'), req, ln)
        else:
            brd = BibRecDocs(recid)
            if not name and docid:
                ## Let's obtain the name from the docid
                try:
                    name = brd.get_docname(docid)
                except InvenioBibDocFileError:
                    return warning_page(_("An error has happened in trying to retrieving the requested file."), req, ln)

        docformat = normalize_format(docformat)

        redirect_to_url(req, '%s/%s/%s/files/%s%s?ln=%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln, version and 'version=%s' % version or ''), apache.HTTP_MOVED_PERMANENTLY)
Esempio n. 4
0
"""BibFormat element - Prints a links to fulltext
"""
__revision__ = "$Id$"

import re
from invenio.legacy.bibdocfile.api import BibRecDocs, file_strip_ext, normalize_format, compose_format
from invenio.base.i18n import gettext_set_language
from invenio.config import CFG_SITE_URL, CFG_CERN_SITE, CFG_SITE_RECORD, \
    CFG_BIBFORMAT_HIDDEN_FILE_FORMATS
from invenio.legacy.bibdocfile.config import CFG_BIBDOCFILE_ICON_SUBFORMAT_RE
from cgi import escape, parse_qs
from six.moves.urllib.parse import urlparse
from os.path import basename
import urllib

_CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set(normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS)

cern_arxiv_categories = ["astro-ph", "chao-dyn", "cond-mat", "gr-qc",
                         "hep-ex", "hep-lat", "hep-ph", "hep-th", "math-ph",
                         "math", "nucl-ex", "nucl-th", "out", "physics",
                         "quant-ph", "q-alg", "cs", "adap-org", "comp-gas",
                         "chem-ph", "cs", "math", "neuro-sys", "patt-sol",
                         "solv-int", "acc-phys", "alg-geom", "ao-sci",
                         "atom-ph", "cmp-lg", "dg-ga", "funct-an", "mtrl-th",
                         "plasm-ph", "q-alg", "supr-con"]

def format_element(bfo, style, separator='; ', show_icons='no', focus_on_main_file='no', show_subformat_icons='no'):
    """
    This is the default format for formatting fulltext links.

    When possible, it returns only the main file(s) (+ link to
Esempio n. 5
0
def get_files(bfo, distinguish_main_and_additional_files=True,
        include_subformat_icons=False):
    """
    Returns the files available for the given record.
    Returned structure is a tuple (parsed_urls, old_versions, additionals):
     - parsed_urls: contains categorized URLS (see details below)
     - old_versions: set to True if we can have access to old versions
     - additionals: set to True if we have other documents than the 'main' document

     Parameter 'include_subformat_icons' decides if subformat
     considered as icons should be returned

    'parsed_urls' is a dictionary in the form::
        {'main_urls' : {'Main'      : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'),
                                       ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')],
                        'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]},

         'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'),      # url(8564_u), description(8564_z/y)
                        ('http://externalurl.com/bFile.pdf', 'Fulltext')],

         'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'),              # url(8564_u), description(8564_z/y)
                        ('http://cern.ch/bFile.pdf', 'Fulltext')],
        }

    Some notes about returned structure:
        - key 'cern_urls' is only available on CERN site
        - keys in main_url dictionaries are defined by the BibDoc.
        - older versions are not part of the parsed urls
        - returns only main files when possible, that is when doctypes
          make a distinction between 'Main' files and other
          files. Otherwise returns all the files as main. This is only
          enabled if distinguish_main_and_additional_files is set to True
    """
    CFG_SITE_URL = current_app.config['CFG_SITE_URL']
    CFG_CERN_SITE = current_app.config['CFG_CERN_SITE']
    CFG_BIBFORMAT_HIDDEN_FILE_FORMATS = current_app.config['CFG_BIBFORMAT_HIDDEN_FILE_FORMATS']
    _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set(normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS)

    _ = gettext_set_language(bfo.lang)

    urls = bfo.fields("8564_")
    bibarchive = BibRecDocs(bfo.recID)

    old_versions = False # We can provide link to older files. Will be
                         # set to True if older files are found.
    additionals = False  # We have additional files. Will be set to
                         # True if additional files are found.

    # Prepare object to return
    parsed_urls = {'main_urls':{},    # Urls hosted by Invenio (bibdocs)
                  'others_urls':[]    # External urls
                  }
    if CFG_CERN_SITE:
        parsed_urls['cern_urls'] = [] # cern.ch urls

    # Doctypes can of any type, but when there is one file marked as
    # 'Main', we consider that there is a distinction between "main"
    # and "additional" files. Otherwise they will all be considered
    # equally as main files
    distinct_main_and_additional_files = False
    if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \
           distinguish_main_and_additional_files:
        distinct_main_and_additional_files = True
    # Parse URLs
    for complete_url in urls:
        if complete_url.has_key('u'):
            url = complete_url['u']
            (dummy, host, path, dummy, params, dummy) = urlparse(url)
            subformat = complete_url.get('x', '')
            filename = urllib.unquote(basename(path))
            name = file_strip_ext(filename)
            url_format = filename[len(name):]
            if url_format.startswith('.'):
                url_format = url_format[1:]
            if compose_format(url_format, subformat) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS:
                ## This format should be hidden.
                continue

            descr = _("Fulltext")
            if complete_url.has_key('y'):
                descr = complete_url['y']
                if descr == 'Fulltext':
                    descr = _("Fulltext")
            if not url.startswith(CFG_SITE_URL): # Not a bibdoc?
                if not descr: # For not bibdoc let's have a description
                    # Display the URL in full:
                    descr = url
                if CFG_CERN_SITE and 'cern.ch' in host and \
                       ('/setlink?' in url or \
                        'cms' in host or \
                        'documents.cern.ch' in url or \
                        'doc.cern.ch' in url or \
                        'preprints.cern.ch' in url):
                    url_params_dict = dict([part.split('=') for part in params.split('&') if len(part.split('=')) == 2])
                    if url_params_dict.has_key('categ') and \
                           (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \
                           url_params_dict.has_key('id'):
                        # Old arXiv links, used to be handled by
                        # setlink. Provide direct links to arXiv
                        for file_format, label in [('pdf', "PDF")]:#,
                            #('ps', "PS"),
                            #('e-print', "Source (generally TeX or LaTeX)"),
                            #('abs', "Abstract")]:
                            url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \
                                  {'format': file_format,
                                   'category': url_params_dict['categ'],
                                   'id': url_params_dict['id']}
                            parsed_urls['others_urls'].append((url, "%s/%s %s" % \
                                                               (url_params_dict['categ'],
                                                                url_params_dict['id'],
                                                                label)))
                else:
                    parsed_urls['others_urls'].append((url, descr)) # external url
            else: # It's a bibdoc!
                assigned = False
                for doc in bibarchive.list_bibdocs():
                    if int(doc.get_latest_version()) > 1:
                        old_versions = True
                    if True in [f.get_full_name().startswith(filename) \
                                    for f in doc.list_all_files()]:
                        assigned = True
                        if not include_subformat_icons and \
                               CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat):
                            # This is an icon and we want to skip it
                            continue
                        if not doc.get_doctype(bfo.recID) == 'Main' and \
                               distinct_main_and_additional_files == True:
                            # In that case we record that there are
                            # additional files, but don't add them to
                            # returned structure.
                            additionals = True
                        else:
                            if not descr:
                                descr = _('Fulltext')
                            if not parsed_urls['main_urls'].has_key(descr):
                                parsed_urls['main_urls'][descr] = []
                            params_dict = parse_qs(params)
                            if 'subformat' in params_dict:
                                url_format += ' (%s)' % params_dict['subformat'][0]
                            parsed_urls['main_urls'][descr].append((url, name, url_format))
                if not assigned: # Url is not a bibdoc :-S
                    if not descr:
                        descr = filename
                    parsed_urls['others_urls'].append((url, descr)) # Let's put it in a general other url
    return (parsed_urls, old_versions, additionals)
Esempio n. 6
0
def get_files(bfo,
              distinguish_main_and_additional_files=True,
              include_subformat_icons=False):
    """
    Returns the files available for the given record.
    Returned structure is a tuple (parsed_urls, old_versions, additionals):
     - parsed_urls: contains categorized URLS (see details below)
     - old_versions: set to True if we can have access to old versions
     - additionals: set to True if we have other documents than the 'main' document

     Parameter 'include_subformat_icons' decides if subformat
     considered as icons should be returned

    'parsed_urls' is a dictionary in the form::
        {'main_urls' : {'Main'      : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'),
                                       ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')],
                        'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]},

         'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'),      # url(8564_u), description(8564_z/y)
                        ('http://externalurl.com/bFile.pdf', 'Fulltext')],

         'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'),              # url(8564_u), description(8564_z/y)
                        ('http://cern.ch/bFile.pdf', 'Fulltext')],
        }

    Some notes about returned structure:
        - key 'cern_urls' is only available on CERN site
        - keys in main_url dictionaries are defined by the BibDoc.
        - older versions are not part of the parsed urls
        - returns only main files when possible, that is when doctypes
          make a distinction between 'Main' files and other
          files. Otherwise returns all the files as main. This is only
          enabled if distinguish_main_and_additional_files is set to True
    """
    CFG_SITE_URL = current_app.config['CFG_SITE_URL']
    CFG_CERN_SITE = current_app.config['CFG_CERN_SITE']
    CFG_BIBFORMAT_HIDDEN_FILE_FORMATS = current_app.config[
        'CFG_BIBFORMAT_HIDDEN_FILE_FORMATS']
    _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set(
        normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS)

    _ = gettext_set_language(bfo.lang)

    urls = bfo.fields("8564_")
    bibarchive = BibRecDocs(bfo.recID)

    old_versions = False  # We can provide link to older files. Will be
    # set to True if older files are found.
    additionals = False  # We have additional files. Will be set to
    # True if additional files are found.

    # Prepare object to return
    parsed_urls = {
        'main_urls': {},  # Urls hosted by Invenio (bibdocs)
        'others_urls': []  # External urls
    }
    if CFG_CERN_SITE:
        parsed_urls['cern_urls'] = []  # cern.ch urls

    # Doctypes can of any type, but when there is one file marked as
    # 'Main', we consider that there is a distinction between "main"
    # and "additional" files. Otherwise they will all be considered
    # equally as main files
    distinct_main_and_additional_files = False
    if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \
           distinguish_main_and_additional_files:
        distinct_main_and_additional_files = True
    # Parse URLs
    for complete_url in urls:
        if complete_url.has_key('u'):
            url = complete_url['u']
            (dummy, host, path, dummy, params, dummy) = urlparse(url)
            subformat = complete_url.get('x', '')
            filename = urllib.unquote(basename(path))
            name = file_strip_ext(filename)
            url_format = filename[len(name):]
            if url_format.startswith('.'):
                url_format = url_format[1:]
            if compose_format(
                    url_format, subformat
            ) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS:
                ## This format should be hidden.
                continue

            descr = _("Fulltext")
            if complete_url.has_key('y'):
                descr = complete_url['y']
                if descr == 'Fulltext':
                    descr = _("Fulltext")
            if not url.startswith(CFG_SITE_URL):  # Not a bibdoc?
                if not descr:  # For not bibdoc let's have a description
                    # Display the URL in full:
                    descr = url
                if CFG_CERN_SITE and 'cern.ch' in host and \
                       ('/setlink?' in url or \
                        'cms' in host or \
                        'documents.cern.ch' in url or \
                        'doc.cern.ch' in url or \
                        'preprints.cern.ch' in url):
                    url_params_dict = dict([
                        part.split('=') for part in params.split('&')
                        if len(part.split('=')) == 2
                    ])
                    if url_params_dict.has_key('categ') and \
                           (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \
                           url_params_dict.has_key('id'):
                        # Old arXiv links, used to be handled by
                        # setlink. Provide direct links to arXiv
                        for file_format, label in [('pdf', "PDF")]:  #,
                            #('ps', "PS"),
                            #('e-print', "Source (generally TeX or LaTeX)"),
                            #('abs', "Abstract")]:
                            url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \
                                  {'format': file_format,
                                   'category': url_params_dict['categ'],
                                   'id': url_params_dict['id']}
                            parsed_urls['others_urls'].append((url, "%s/%s %s" % \
                                                               (url_params_dict['categ'],
                                                                url_params_dict['id'],
                                                                label)))
                else:
                    parsed_urls['others_urls'].append(
                        (url, descr))  # external url
            else:  # It's a bibdoc!
                assigned = False
                for doc in bibarchive.list_bibdocs():
                    if int(doc.get_latest_version()) > 1:
                        old_versions = True
                    if True in [f.get_full_name().startswith(filename) \
                                    for f in doc.list_all_files()]:
                        assigned = True
                        if not include_subformat_icons and \
                               CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat):
                            # This is an icon and we want to skip it
                            continue
                        if not doc.get_doctype(bfo.recID) == 'Main' and \
                               distinct_main_and_additional_files == True:
                            # In that case we record that there are
                            # additional files, but don't add them to
                            # returned structure.
                            additionals = True
                        else:
                            if not descr:
                                descr = _('Fulltext')
                            if not parsed_urls['main_urls'].has_key(descr):
                                parsed_urls['main_urls'][descr] = []
                            params_dict = parse_qs(params)
                            if 'subformat' in params_dict:
                                url_format += ' (%s)' % params_dict[
                                    'subformat'][0]
                            parsed_urls['main_urls'][descr].append(
                                (url, name, url_format))
                if not assigned:  # Url is not a bibdoc :-S
                    if not descr:
                        descr = filename
                    parsed_urls['others_urls'].append(
                        (url, descr))  # Let's put it in a general other url
    return (parsed_urls, old_versions, additionals)
Esempio n. 7
0
import re
from invenio.legacy.bibdocfile.api import BibRecDocs, file_strip_ext, normalize_format, compose_format
from invenio.base.i18n import gettext_set_language
from invenio.config import CFG_SITE_URL, CFG_BASE_URL, CFG_CERN_SITE, CFG_SITE_RECORD, \
    CFG_BIBFORMAT_HIDDEN_FILE_FORMATS
from invenio.legacy.bibdocfile.config import CFG_BIBDOCFILE_ICON_SUBFORMAT_RE
from invenio.utils.url import get_relative_url

from cgi import escape, parse_qs
from six.moves.urllib.parse import urlparse
from os.path import basename
import urllib

_CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set(
    normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS)

_CFG_BIBFORMAT_HIDDEN_DOCTYPES = ['Plot']
if CFG_CERN_SITE:
    _CFG_BIBFORMAT_HIDDEN_DOCTYPES.append('arXiv')

cern_arxiv_categories = [
    "astro-ph", "chao-dyn", "cond-mat", "gr-qc", "hep-ex", "hep-lat", "hep-ph",
    "hep-th", "math-ph", "math", "nucl-ex", "nucl-th", "out", "physics",
    "quant-ph", "q-alg", "cs", "adap-org", "comp-gas", "chem-ph", "cs", "math",
    "neuro-sys", "patt-sol", "solv-int", "acc-phys", "alg-geom", "ao-sci",
    "atom-ph", "cmp-lg", "dg-ga", "funct-an", "mtrl-th", "plasm-ph", "q-alg",
    "supr-con"
]