def Move_Files_Archive(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    MainDir = "%s/files/MainFiles" % curdir
    IncludeDir = "%s/files/AdditionalFiles" % curdir
    watcheddirs = {'Main': MainDir, 'Additional': IncludeDir}
    for type, dir in watcheddirs.iteritems():
        if os.path.exists(dir):
            formats = {}
            files = os.listdir(dir)
            files.sort()
            for file in files:
                dummy, filename, extension = decompose_file(file)
                if not formats.has_key(filename):
                    formats[filename] = []
                formats[filename].append(normalize_format(extension))
            # first delete all missing files
            bibarchive = BibRecDocs(sysno)
            existingBibdocs = bibarchive.list_bibdocs(type)
            for existingBibdoc in existingBibdocs:
                if not formats.has_key(existingBibdoc.get_docname()):
                    existingBibdoc.delete()
            # then create/update the new ones
            for key in formats.keys():
                # instanciate bibdoc object
                bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]),
                                        doctype=type,
                                        never_fail=True)
    return ""
    def _getfile_py(req, recid=0, docid=0, version="", name="", docformat="", ln=CFG_SITE_LANG):
        if not recid:
            ## Let's obtain the recid from the docid
            if docid:
                try:
                    bibdoc = BibDoc(docid=docid)
                    recid = bibdoc.bibrec_links[0]["recid"]
                except InvenioBibDocFileError:
                    return warning_page(_("An error has happened in trying to retrieve the requested file."), req, ln)
            else:
                return warning_page(_("Not enough information to retrieve the document"), req, ln)
        else:
            brd = BibRecDocs(recid)
            if not name and docid:
                ## Let's obtain the name from the docid
                try:
                    name = brd.get_docname(docid)
                except InvenioBibDocFileError:
                    return warning_page(_("An error has happened in trying to retrieving the requested file."), req, ln)

        docformat = normalize_format(docformat)

        redirect_to_url(
            req,
            "%s/%s/%s/files/%s%s?ln=%s%s"
            % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln, version and "version=%s" % version or ""),
            apache.HTTP_MOVED_PERMANENTLY,
        )
def Move_Files_Archive(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    MainDir = "%s/files/MainFiles" % curdir
    IncludeDir = "%s/files/AdditionalFiles" % curdir
    watcheddirs = {'Main' : MainDir, 'Additional' : IncludeDir}
    for type, dir in watcheddirs.iteritems():
        if os.path.exists(dir):
            formats = {}
            files = os.listdir(dir)
            files.sort()
            for file in files:
                dummy, filename, extension = decompose_file(file)
                if not formats.has_key(filename):
                    formats[filename] = []
                formats[filename].append(normalize_format(extension))
            # first delete all missing files
            bibarchive = BibRecDocs(sysno)
            existingBibdocs = bibarchive.list_bibdocs(type)
            for existingBibdoc in existingBibdocs:
                if not formats.has_key(existingBibdoc.get_docname()):
                    existingBibdoc.delete()
            # then create/update the new ones
            for key in formats.keys():
                # instanciate bibdoc object
                bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True)
    return ""
def main_cli():
    """
    main function when the library behaves as a normal CLI tool.
    """
    from invenio.bibdocfile import normalize_format
    parser = OptionParser()
    parser.add_option("-c", "--convert", dest="input_name",
                  help="convert the specified FILE", metavar="FILE")
    parser.add_option("-d", "--debug", dest="debug", action="store_true", help="Enable debug information")
    parser.add_option("--special-pdf2hocr2pdf", dest="ocrize", help="convert the given scanned PDF into a PDF with OCRed text", metavar="FILE")
    parser.add_option("-f", "--format", dest="output_format", help="the desired output format", metavar="FORMAT")
    parser.add_option("-o", "--output", dest="output_name", help="the desired output FILE (if not specified a new file will be generated with the desired output format)")
    parser.add_option("--without-pdfa", action="store_false", dest="pdf_a", default=True, help="don't force creation of PDF/A  PDFs")
    parser.add_option("--without-pdfopt", action="store_false", dest="pdfopt", default=True, help="don't force optimization of PDFs files")
    parser.add_option("--without-ocr", action="store_false", dest="ocr", default=True, help="don't force OCR")
    parser.add_option("--can-convert", dest="can_convert", help="display all the possible format that is possible to generate from the given format", metavar="FORMAT")
    parser.add_option("--is-ocr-needed", dest="check_ocr_is_needed", help="check if OCR is needed for the FILE specified", metavar="FILE")
    parser.add_option("-t", "--title", dest="title", help="specify the title (used when creating PDFs)", metavar="TITLE")
    parser.add_option("-l", "--language", dest="ln", help="specify the language (used when performing OCR, e.g. en, it, fr...)", metavar="LN", default='en')
    (options, dummy) = parser.parse_args()
    if options.debug:
        getLogger().setLevel(DEBUG)
    if options.can_convert:
        if options.can_convert:
            input_format = normalize_format(options.can_convert)
            if input_format == '.pdf':
                if can_pdfopt():
                    print "PDF linearization supported"
                else:
                    print "No PDF linearization support"
                if can_pdfa():
                    print "PDF/A generation supported"
                else:
                    print "No PDF/A generation support"
            if can_perform_ocr():
                print "OCR supported"
            else:
                print "OCR not supported"
            print 'Can convert from "%s" to:' % input_format[1:],
            for output_format in __CONVERSION_MAP:
                if can_convert(input_format, output_format):
                    print '"%s"' % output_format[1:],
            print
    elif options.check_ocr_is_needed:
        print "Checking if OCR is needed on %s..." % options.check_ocr_is_needed,
        sys.stdout.flush()
        if guess_is_OCR_needed(options.check_ocr_is_needed):
            print "needed."
        else:
            print "not needed."
    elif options.ocrize:
        try:
            output = pdf2hocr2pdf(options.ocrize, output_file=options.output_name, title=options.title, ln=options.ln)
            print "Output stored in %s" % output
        except InvenioWebSubmitFileConverterError, err:
            print "ERROR: %s" % err
            sys.exit(1)
def main_cli():
    """
    main function when the library behaves as a normal CLI tool.
    """
    from invenio.bibdocfile import normalize_format
    parser = OptionParser()
    parser.add_option("-c", "--convert", dest="input_name",
                  help="convert the specified FILE", metavar="FILE")
    parser.add_option("-d", "--debug", dest="debug", action="store_true", help="Enable debug information")
    parser.add_option("--special-pdf2hocr2pdf", dest="ocrize", help="convert the given scanned PDF into a PDF with OCRed text", metavar="FILE")
    parser.add_option("-f", "--format", dest="output_format", help="the desired output format", metavar="FORMAT")
    parser.add_option("-o", "--output", dest="output_name", help="the desired output FILE (if not specified a new file will be generated with the desired output format)")
    parser.add_option("--without-pdfa", action="store_false", dest="pdf_a", default=True, help="don't force creation of PDF/A  PDFs")
    parser.add_option("--without-pdfopt", action="store_false", dest="pdfopt", default=True, help="don't force optimization of PDFs files")
    parser.add_option("--without-ocr", action="store_false", dest="ocr", default=True, help="don't force OCR")
    parser.add_option("--can-convert", dest="can_convert", help="display all the possible format that is possible to generate from the given format", metavar="FORMAT")
    parser.add_option("--is-ocr-needed", dest="check_ocr_is_needed", help="check if OCR is needed for the FILE specified", metavar="FILE")
    parser.add_option("-t", "--title", dest="title", help="specify the title (used when creating PDFs)", metavar="TITLE")
    parser.add_option("-l", "--language", dest="ln", help="specify the language (used when performing OCR, e.g. en, it, fr...)", metavar="LN", default='en')
    (options, dummy) = parser.parse_args()
    if options.debug:
        getLogger().setLevel(DEBUG)
    if options.can_convert:
        if options.can_convert:
            input_format = normalize_format(options.can_convert)
            if input_format == '.pdf':
                if can_pdfopt():
                    print "PDF linearization supported"
                else:
                    print "No PDF linearization support"
                if can_pdfa():
                    print "PDF/A generation supported"
                else:
                    print "No PDF/A generation support"
            if can_perform_ocr():
                print "OCR supported"
            else:
                print "OCR not supported"
            print 'Can convert from "%s" to:' % input_format[1:],
            for output_format in __CONVERSION_MAP:
                if can_convert(input_format, output_format):
                    print '"%s"' % output_format[1:],
            print
    elif options.check_ocr_is_needed:
        print "Checking if OCR is needed on %s..." % options.check_ocr_is_needed,
        sys.stdout.flush()
        if guess_is_OCR_needed(options.check_ocr_is_needed):
            print "needed."
        else:
            print "not needed."
    elif options.ocrize:
        try:
            output = pdf2hocr2pdf(options.ocrize, output_file=options.output_name, title=options.title, ln=options.ln)
            print "Output stored in %s" % output
        except InvenioWebSubmitFileConverterError, err:
            print "ERROR: %s" % err
            sys.exit(1)
def can_convert(input_format, output_format, max_intermediate_conversions=2):
    """Return the chain of conversion to transform input_format into output_format, if any."""
    from invenio.bibdocfile import normalize_format
    if max_intermediate_conversions <= 0:
        return []
    input_format = normalize_format(input_format)
    output_format = normalize_format(output_format)
    if input_format in __CONVERSION_MAP:
        if output_format in __CONVERSION_MAP[input_format]:
            return [__CONVERSION_MAP[input_format][output_format]]
        best_res = []
        best_intermediate = ''
        for intermediate_format in __CONVERSION_MAP[input_format]:
            res = can_convert(intermediate_format, output_format, max_intermediate_conversions-1)
            if res and (len(res) < best_res or not best_res):
                best_res = res
                best_intermediate = intermediate_format
        if best_res:
            return [__CONVERSION_MAP[input_format][best_intermediate]] + best_res
    return []
def can_convert(input_format, output_format, max_intermediate_conversions=2):
    """Return the chain of conversion to transform input_format into output_format, if any."""
    from invenio.bibdocfile import normalize_format
    if max_intermediate_conversions <= 0:
        return []
    input_format = normalize_format(input_format)
    output_format = normalize_format(output_format)
    if input_format in __CONVERSION_MAP:
        if output_format in __CONVERSION_MAP[input_format]:
            return [__CONVERSION_MAP[input_format][output_format]]
        best_res = []
        best_intermediate = ''
        for intermediate_format in __CONVERSION_MAP[input_format]:
            res = can_convert(intermediate_format, output_format, max_intermediate_conversions-1)
            if res and (len(res) < best_res or not best_res):
                best_res = res
                best_intermediate = intermediate_format
        if best_res:
            return [__CONVERSION_MAP[input_format][best_intermediate]] + best_res
    return []
def get_best_format_to_extract_text_from(filelist, best_formats=CFG_WEBSUBMIT_BEST_FORMATS_TO_EXTRACT_TEXT_FROM):
    """
    Return among the filelist the best file whose format is best suited for
    extracting text.
    """
    from invenio.bibdocfile import decompose_file, normalize_format
    best_formats = [normalize_format(aformat) for aformat in best_formats if can_convert(aformat, '.txt')]
    for aformat in best_formats:
        for filename in filelist:
            if decompose_file(filename, skip_version=True)[2].endswith(aformat):
                return filename
    raise InvenioWebSubmitFileConverterError("It's not possible to extract valuable text from any of the proposed files.")
def get_best_format_to_extract_text_from(filelist, best_formats=CFG_WEBSUBMIT_BEST_FORMATS_TO_EXTRACT_TEXT_FROM):
    """
    Return among the filelist the best file whose format is best suited for
    extracting text.
    """
    from invenio.bibdocfile import decompose_file, normalize_format
    best_formats = [normalize_format(aformat) for aformat in best_formats if can_convert(aformat, '.txt')]
    for aformat in best_formats:
        for filename in filelist:
            if decompose_file(filename, skip_version=True)[2].endswith(aformat):
                return filename
    raise InvenioWebSubmitFileConverterError("It's not possible to extract valuable text from any of the proposed files.")
def prepare_io(input_file, output_file=None, output_ext=None, need_working_dir=True):
    """Clean input_file and the output_file."""
    from invenio.bibdocfile import decompose_file, normalize_format
    output_ext = normalize_format(output_ext)
    debug('Preparing IO for input=%s, output=%s, output_ext=%s' % (input_file, output_file, output_ext))
    if output_ext is None:
        if output_file is None:
            output_ext = '.tmp'
        else:
            output_ext = decompose_file(output_file, skip_version=True)[2]
    if output_file is None:
        try:
            (fd, output_file) = tempfile.mkstemp(suffix=output_ext, dir=CFG_TMPDIR)
            os.close(fd)
        except IOError, err:
            raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary file: %s" % err)
def prepare_io(input_file, output_file=None, output_ext=None, need_working_dir=True):
    """Clean input_file and the output_file."""
    from invenio.bibdocfile import decompose_file, normalize_format
    output_ext = normalize_format(output_ext)
    debug('Preparing IO for input=%s, output=%s, output_ext=%s' % (input_file, output_file, output_ext))
    if output_ext is None:
        if output_file is None:
            output_ext = '.tmp'
        else:
            output_ext = decompose_file(output_file, skip_version=True)[2]
    if output_file is None:
        try:
            (fd, output_file) = tempfile.mkstemp(suffix=output_ext, dir=CFG_TMPDIR)
            os.close(fd)
        except IOError, err:
            raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary file: %s" % err)
Exemple #12
0
    def _getfile_py(req,
                    recid=0,
                    docid=0,
                    version="",
                    name="",
                    docformat="",
                    ln=CFG_SITE_LANG):
        if not recid:
            ## Let's obtain the recid from the docid
            if docid:
                try:
                    bibdoc = BibDoc(docid=docid)
                    recid = bibdoc.bibrec_links[0]["recid"]
                except InvenioBibDocFileError:
                    return warning_page(
                        _("An error has happened in trying to retrieve the requested file."
                          ), req, ln)
            else:
                return warning_page(
                    _('Not enough information to retrieve the document'), req,
                    ln)
        else:
            brd = BibRecDocs(recid)
            if not name and docid:
                ## Let's obtain the name from the docid
                try:
                    name = brd.get_docname(docid)
                except InvenioBibDocFileError:
                    return warning_page(
                        _("An error has happened in trying to retrieving the requested file."
                          ), req, ln)

        docformat = normalize_format(docformat)

        redirect_to_url(
            req, '%s/%s/%s/files/%s%s?ln=%s%s' %
            (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln,
             version and 'version=%s' % version or ''),
            apache.HTTP_MOVED_PERMANENTLY)
def convert_file(input_file, output_file=None, output_format=None, **params):
    """
    Convert files from one format to another.
    @param input_file [string] the path to an existing file
    @param output_file [string] the path to the desired ouput. (if None a
        temporary file is generated)
    @param output_format [string] the desired format (if None it is taken from
        output_file)
    @param params other paramaters to pass to the particular converter
    @return [string] the final output_file
    """
    from invenio.bibdocfile import decompose_file, normalize_format
    if output_format is None:
        if output_file is None:
            raise ValueError("At least output_file or format should be specified.")
        else:
            output_ext = decompose_file(output_file, skip_version=True)[2]
    else:
        output_ext = normalize_format(output_format)
    input_ext = decompose_file(input_file, skip_version=True)[2]
    conversion_chain = can_convert(input_ext, output_ext)
    if conversion_chain:
        current_input = input_file
        current_output = None
        for i in xrange(len(conversion_chain)):
            if i == (len(conversion_chain) - 1):
                current_output = output_file
            converter = conversion_chain[i][0]
            final_params = dict(conversion_chain[i][1])
            final_params.update(params)
            try:
                return converter(current_input, current_output, **final_params)
            except InvenioWebSubmitFileConverterError, err:
                raise InvenioWebSubmitFileConverterError("Error when converting from %s to %s: %s" % (input_file, output_ext, err))
            except Exception, err:
                register_exception()
                raise InvenioWebSubmitFileConverterError("Unexpected error when converting from %s to %s (%s): %s" % (input_file, output_ext, type(err), err))
            current_input = current_output
def convert_file(input_file, output_file=None, output_format=None, **params):
    """
    Convert files from one format to another.
    @param input_file [string] the path to an existing file
    @param output_file [string] the path to the desired ouput. (if None a
        temporary file is generated)
    @param output_format [string] the desired format (if None it is taken from
        output_file)
    @param params other paramaters to pass to the particular converter
    @return [string] the final output_file
    """
    from invenio.bibdocfile import decompose_file, normalize_format
    if output_format is None:
        if output_file is None:
            raise ValueError("At least output_file or format should be specified.")
        else:
            output_ext = decompose_file(output_file, skip_version=True)[2]
    else:
        output_ext = normalize_format(output_format)
    input_ext = decompose_file(input_file, skip_version=True)[2]
    conversion_chain = can_convert(input_ext, output_ext)
    if conversion_chain:
        current_input = input_file
        current_output = None
        for i in xrange(len(conversion_chain)):
            if i == (len(conversion_chain) - 1):
                current_output = output_file
            converter = conversion_chain[i][0]
            final_params = dict(conversion_chain[i][1])
            final_params.update(params)
            try:
                return converter(current_input, current_output, **final_params)
            except InvenioWebSubmitFileConverterError, err:
                raise InvenioWebSubmitFileConverterError("Error when converting from %s to %s: %s" % (input_file, output_ext, err))
            except Exception, err:
                register_exception()
                raise InvenioWebSubmitFileConverterError("Unexpected error when converting from %s to %s (%s): %s" % (input_file, output_ext, type(err), err))
            current_input = current_output
    def _getfile_py(req, recid=0, docid=0, version="", name="", format="", ln=CFG_SITE_LANG):
        if not recid:
            ## Let's obtain the recid from the docid
            if docid:
                try:
                    bibdoc = BibDoc(docid=docid)
                    recid = bibdoc.get_recid()
                except InvenioBibDocFileError:
                    return warning_page(_("An error has happened in trying to retrieve the requested file."), req, ln)
            else:
                return warning_page(_('Not enough information to retrieve the document'), req, ln)
        else:
            if not name and docid:
                ## Let's obtain the name from the docid
                try:
                    bibdoc = BibDoc(docid)
                    name = bibdoc.get_docname()
                except InvenioBibDocFileError:
                    return warning_page(_("An error has happened in trying to retrieving the requested file."), req, ln)

        format = normalize_format(format)

        redirect_to_url(req, '%s/%s/%s/files/%s%s?ln=%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, format, ln, version and 'version=%s' % version or ''), apache.HTTP_MOVED_PERMANENTLY)
 def normalize_desired_conversion():
     ret = {}
     for key, value in desired_conversion.iteritems():
         ret[normalize_format(key)] = [normalize_format(aformat) for aformat in value]
     return ret
 def test_base(self):
     self.assertEqual(".format", bibdocfile.normalize_format(".format"))
 def normalize_desired_conversion():
     ret = {}
     for key, value in desired_conversion.iteritems():
         ret[normalize_format(key)] = [normalize_format(aformat) for aformat in value]
     return ret
 def test_ampersand(self):
     self.assertEqual(".format",
                      bibdocfile.normalize_format(".format&suffix"))
def unoconv(input_file, output_file=None, output_format='txt', pdfopt=True, **dummy):
    """Use unconv to convert among OpenOffice understood documents."""
    from invenio.bibdocfile import normalize_format
    try:
        check_openoffice_tmpdir()
    except InvenioWebSubmitFileConverterError, err:
        register_exception(alert_admin=True, prefix='ERROR: it\'s impossible to properly execute OpenOffice.org conversions: %s' % err)
        raise

    input_file, output_file, dummy = prepare_io(input_file, output_file, output_format, need_working_dir=False)
    if output_format == 'txt':
        unoconv_format = 'text'
    else:
        unoconv_format = output_format
    try:
        tmpfile = tempfile.mktemp(dir=CFG_OPENOFFICE_TMPDIR, suffix=normalize_format(output_format))
        execute_command('sudo', '-u', CFG_OPENOFFICE_USER, CFG_PATH_OPENOFFICE_PYTHON, os.path.join(CFG_PYLIBDIR, 'invenio', 'unoconv.py'), '-v', '-s', CFG_OPENOFFICE_SERVER_HOST, '-p', str(CFG_OPENOFFICE_SERVER_PORT), '--outputfile', tmpfile, '-f', unoconv_format, input_file)
    except InvenioWebSubmitFileConverterError:
        time.sleep(5)
        execute_command('sudo', '-u', CFG_OPENOFFICE_USER, CFG_PATH_OPENOFFICE_PYTHON, os.path.join(CFG_PYLIBDIR, 'invenio', 'unoconv.py'), '-v', '-s', CFG_OPENOFFICE_SERVER_HOST, '-p', str(CFG_OPENOFFICE_SERVER_PORT), '--outputfile', tmpfile, '-f', unoconv_format, input_file)

    if not os.path.exists(tmpfile):
        raise InvenioWebSubmitFileConverterError('No output was generated by OpenOffice')

    output_format = normalize_format(output_format)

    if output_format == '.pdf' and pdfopt:
        pdf2pdfopt(tmpfile, output_file)
    else:
        shutil.copy(tmpfile, output_file)
    execute_command('sudo', '-u', CFG_OPENOFFICE_USER, CFG_PATH_OPENOFFICE_PYTHON, '-c', 'import os; os.remove(%s)' % repr(tmpfile))
 def test_unicode(self):
     self.assertEqual(".\xca\xac", bibdocfile.normalize_format(u".ʬ"))
 def test_unicode_subformat(self):
     self.assertEqual(".format;\xca\xac",
                      bibdocfile.normalize_format(u".format;ʬ"))
import re
from invenio.bibdocfile import BibRecDocs, file_strip_ext, normalize_format, compose_format
from invenio.messages import gettext_set_language
from invenio.config import CFG_SITE_URL, CFG_BASE_URL, CFG_CERN_SITE, CFG_SITE_RECORD, \
    CFG_BIBFORMAT_HIDDEN_FILE_FORMATS
from invenio.bibdocfile_config import CFG_BIBDOCFILE_ICON_SUBFORMAT_RE
from invenio.urlutils import get_relative_url

from cgi import escape, parse_qs
from urlparse import urlparse
from os.path import basename
import urllib

_CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set(
    normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS)

_CFG_BIBFORMAT_HIDDEN_DOCTYPES = ['Plot']
if CFG_CERN_SITE:
    _CFG_BIBFORMAT_HIDDEN_DOCTYPES.append('arXiv')

cern_arxiv_categories = [
    "astro-ph", "chao-dyn", "cond-mat", "gr-qc", "hep-ex", "hep-lat", "hep-ph",
    "hep-th", "math-ph", "math", "nucl-ex", "nucl-th", "out", "physics",
    "quant-ph", "q-alg", "cs", "adap-org", "comp-gas", "chem-ph", "cs", "math",
    "neuro-sys", "patt-sol", "solv-int", "acc-phys", "alg-geom", "ao-sci",
    "atom-ph", "cmp-lg", "dg-ga", "funct-an", "mtrl-th", "plasm-ph", "q-alg",
    "supr-con"
]

Exemple #24
0
"""BibFormat element - Prints a links to fulltext
"""
__revision__ = "$Id$"

import re
from invenio.bibdocfile import BibRecDocs, file_strip_ext, normalize_format, compose_format
from invenio.messages import gettext_set_language
from invenio.config import CFG_SITE_URL, CFG_CERN_SITE, CFG_SITE_RECORD, \
    CFG_BIBFORMAT_HIDDEN_FILE_FORMATS
from invenio.bibdocfile_config import CFG_BIBDOCFILE_ICON_SUBFORMAT_RE
from cgi import escape, parse_qs
from urlparse import urlparse
from os.path import basename
import urllib

_CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set(normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS)

cern_arxiv_categories = ["astro-ph", "chao-dyn", "cond-mat", "gr-qc",
                         "hep-ex", "hep-lat", "hep-ph", "hep-th", "math-ph",
                         "math", "nucl-ex", "nucl-th", "out", "physics",
                         "quant-ph", "q-alg", "cs", "adap-org", "comp-gas",
                         "chem-ph", "cs", "math", "neuro-sys", "patt-sol",
                         "solv-int", "acc-phys", "alg-geom", "ao-sci",
                         "atom-ph", "cmp-lg", "dg-ga", "funct-an", "mtrl-th",
                         "plasm-ph", "q-alg", "supr-con"]

def format_element(bfo, style, separator='; ', show_icons='no', focus_on_main_file='no', show_subformat_icons='no'):
    """
    This is the default format for formatting fulltext links.

    When possible, it returns only the main file(s) (+ link to
def unoconv(input_file, output_file=None, output_format='txt', pdfopt=True, **dummy):
    """Use unconv to convert among OpenOffice understood documents."""
    from invenio.bibdocfile import normalize_format
    try:
        check_openoffice_tmpdir()
    except InvenioWebSubmitFileConverterError, err:
        register_exception(alert_admin=True, prefix='ERROR: it\'s impossible to properly execute OpenOffice.org conversions: %s' % err)
        raise

    input_file, output_file, dummy = prepare_io(input_file, output_file, output_format, need_working_dir=False)
    if output_format == 'txt':
        unoconv_format = 'text'
    else:
        unoconv_format = output_format
    try:
        tmpfile = tempfile.mktemp(dir=CFG_OPENOFFICE_TMPDIR, suffix=normalize_format(output_format))
        execute_command('sudo', '-u', CFG_OPENOFFICE_USER, CFG_PATH_OPENOFFICE_PYTHON, os.path.join(CFG_PYLIBDIR, 'invenio', 'unoconv.py'), '-v', '-s', CFG_OPENOFFICE_SERVER_HOST, '-p', CFG_OPENOFFICE_SERVER_PORT, '--outputfile', tmpfile, '-f', unoconv_format, input_file)
    except InvenioWebSubmitFileConverterError:
        time.sleep(5)
        execute_command('sudo', '-u', CFG_OPENOFFICE_USER, CFG_PATH_OPENOFFICE_PYTHON, os.path.join(CFG_PYLIBDIR, 'invenio', 'unoconv.py'), '-v', '-s', CFG_OPENOFFICE_SERVER_HOST, '-p', CFG_OPENOFFICE_SERVER_PORT, '--outputfile', tmpfile, '-f', unoconv_format, input_file)

    if not os.path.exists(tmpfile):
        raise InvenioWebSubmitFileConverterError('No output was generated by OpenOffice')

    output_format = normalize_format(output_format)

    if output_format == '.pdf' and pdfopt:
        pdf2pdfopt(tmpfile, output_file)
    else:
        shutil.copy(tmpfile, output_file)
    execute_command('sudo', '-u', CFG_OPENOFFICE_USER, CFG_PATH_OPENOFFICE_PYTHON, '-c', 'import os; os.remove(%s)' % repr(tmpfile))
                        _("An error has happened in trying to retrieve the requested file."), req, CFG_SITE_NAME, ln
                    )
            else:
                return warningMsg(_("Not enough information to retrieve the document"), req, CFG_SITE_NAME, ln)
        else:
            if not name and docid:
                ## Let's obtain the name from the docid
                try:
                    bibdoc = BibDoc(docid)
                    name = bibdoc.get_docname()
                except InvenioWebSubmitFileError, e:
                    return warningMsg(
                        _("An error has happened in trying to retrieving the requested file."), req, CFG_SITE_NAME, ln
                    )

        format = normalize_format(format)

        redirect_to_url(
            req,
            "%s/%s/%s/files/%s%s?ln=%s%s"
            % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, format, ln, version and "version=%s" % version or ""),
            apache.HTTP_MOVED_PERMANENTLY,
        )

    return _getfile_py(req, **args)


# --------------------------------------------------

from invenio.websubmit_engine import home, action, interface, endaction
"""BibFormat element - Prints a links to fulltext
"""
__revision__ = "$Id$"

import re
from invenio.bibdocfile import BibRecDocs, file_strip_ext, normalize_format, compose_format
from invenio.messages import gettext_set_language
from invenio.config import CFG_SITE_URL, CFG_CERN_SITE, CFG_SITE_RECORD, \
    CFG_BIBFORMAT_HIDDEN_FILE_FORMATS
from invenio.bibdocfile_config import CFG_BIBDOCFILE_ICON_SUBFORMAT_RE
from cgi import escape, parse_qs
from urlparse import urlparse
from os.path import basename
import urllib

_CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set(normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS)

cern_arxiv_categories = ["astro-ph", "chao-dyn", "cond-mat", "gr-qc",
                         "hep-ex", "hep-lat", "hep-ph", "hep-th", "math-ph",
                         "math", "nucl-ex", "nucl-th", "out", "physics",
                         "quant-ph", "q-alg", "cs", "adap-org", "comp-gas",
                         "chem-ph", "cs", "math", "neuro-sys", "patt-sol",
                         "solv-int", "acc-phys", "alg-geom", "ao-sci",
                         "atom-ph", "cmp-lg", "dg-ga", "funct-an", "mtrl-th",
                         "plasm-ph", "q-alg", "supr-con"]

def format_element(bfo, style, separator='; ', show_icons='no', focus_on_main_file='no', show_subformat_icons='no'):
    """
    This is the default format for formatting fulltext links.

    When possible, it returns only the main file(s) (+ link to