Example #1
0
    def convertLaTeX2Unicode(self, source):
        for latex_entity in _latex2utf8enc_mapping_simple.keys():
            source = _encode(_decode(source).replace(latex_entity, _latex2utf8enc_mapping_simple[latex_entity]))

        for latex_entity in _latex2utf8enc_mapping.keys():
            source = _encode(_decode(source).replace(latex_entity, _latex2utf8enc_mapping[latex_entity]))

        return source
Example #2
0
    def convertLaTeX2Unicode(self, source):
        for latex_entity in _latex2utf8enc_mapping_simple.keys():
            source = _encode(
                _decode(source).replace(
                    latex_entity, _latex2utf8enc_mapping_simple[latex_entity]))

        for latex_entity in _latex2utf8enc_mapping.keys():
            source = _encode(
                _decode(source).replace(latex_entity,
                                        _latex2utf8enc_mapping[latex_entity]))

        return source
    def _bibitem_links(self, hit):
        key = hit.group(1).strip()
        catalog = getToolByName(self, 'portal_catalog')
        brains = catalog(getId=key, portal_type=self.getReferenceTypes())
        if brains:
            brain = brains[0]
            encoding = self.getProperty('default_charset') or 'utf-8'

            authors = _encode(brain.Authors, encoding)
            year = _encode(brain.publication_year, encoding)
            title = _encode(brain.Title, encoding)
            source = _encode(brain.Source, encoding)
            url = brain.getURL()

            link = '%s (%s) <a href="%s">%s</a>, %s' % \
                   (authors, year, url, title, source)
            return link
        else:
            return key
    def _bibitem_links(self, hit):
        key = hit.group(1).strip()
        catalog = getToolByName(self, 'portal_catalog')
        brains = catalog(getId = key,
                         portal_type = self.getReferenceTypes()
                         )
        if brains:
            brain = brains[0]
            encoding = self.getProperty('default_charset') or 'utf-8'

            authors = _encode(brain.Authors, encoding)
            year = _encode(brain.publication_year, encoding)
            title = _encode(brain.Title, encoding)
            source = _encode(brain.Source, encoding)
            url = brain.getURL()

            link = '%s (%s) <a href="%s">%s</a>, %s' % \
                   (authors, year, url, title, source)
            return link
        else:
            return key
Example #5
0
    def pre_validate(self, REQUEST, errors):

        reference_catalog = getToolByName(self, 'reference_catalog')

        authors = REQUEST.get('authors', [])
        result = []
        references = []

        # deduce author names from member reference
        for author in authors:
            reference = author.get('reference', None)
            if reference == 'None':
                author.reference = ''
            elif reference:

                reference_object = reference_catalog.lookupObject(reference)
                if reference_object.isTranslatable():
                    references.append(reference_object.getCanonical().UID())
                    reference_object = reference_object.getCanonical()
                else:
                    references.append(reference)

                # obtain author data from privileged fields in the reference object
                data = self.getAuthorDataFromMember(reference_object)
                only_requested_data = not not author.get('lastname', None)
                if data:
                    for key in [
                            key for key in data.keys()
                            if key not in ('middlename', )
                    ]:
                        if not only_requested_data or (author.get(key, None)
                                                       == '?'):
                            if key == 'firstname':
                                author.firstnames = _decode(
                                    data['firstname']) + ' ' + _decode(
                                        data['middlename'])
                            else:
                                exec('author.%s = _decode(data[key])' % key)

                # if this doesn't help, we try to derive the author name from the Title of reference... (YUK)
                if not author.get('lastname', None):
                    firstnames, lastname = self._name_from_reference(reference)
                    author.firstnames = firstnames
                    author.lastname = lastname

            if ''.join([
                    _decode(_encode(val)) for val in dict(author).values()
            ]).strip():
                result.append(author)

        REQUEST.form['authors'] = result[:]
        REQUEST.form['member_publication_authors'] = references[:]
    def getCookedBibRefId(self, ref, use_pid_on_import=True, **kwargs):
        """
        cook id for ref dict or object, ref dict / object may be a single reference only
        """
        isReferenceObject = (type(ref) != type({}))
        if isReferenceObject:
            ref = self._object2ref(ref)
        new_id = 'nobody1000'
        if use_pid_on_import and ref.get('pid'):
            new_id = ref['pid']
        else:
            new_id = self._cookIdCore(ref, new_id=new_id)

        return _encode(_decode(self._cleanId(new_id)))
Example #7
0
    def getCookedBibRefId(self, ref, use_pid_on_import=True, **kwargs):
        """
        cook id for ref dict or object, ref dict / object may be a single reference only
        """
        isReferenceObject = (type(ref) != type({}))
        if isReferenceObject:
            ref = self._object2ref(ref)
        new_id = 'nobody1000'
        if use_pid_on_import and ref.get('pid'):
            new_id = ref['pid']
        else:
            new_id = self._cookIdCore(ref, new_id=new_id)

        return _encode(_decode(self._cleanId(new_id)))
 def _inline_links(self, hit):
     keys = [k.strip() for k in hit.group(1).split(',')]
     results = []
     catalog = getToolByName(self, 'portal_catalog')
     encoding = self.getProperty('default_charset') or 'utf-8'
     for key in keys:
         brains = catalog(getId=key, portal_type=self.getReferenceTypes())
         if brains:
             url = brains[0].getURL()
             label = _encode(brains[0].citationLabel, encoding) \
                     or 'no label'
             link = '<a href="%s">%s</a>' % (url, label)
             results.append(link)
         else:
             results.append(key)
     return '; '.join(results)
 def _inline_links(self, hit):
     keys = [k.strip() for k in hit.group(1).split(',')]
     results = []
     catalog = getToolByName(self, 'portal_catalog')
     encoding = self.getProperty('default_charset') or 'utf-8'
     for key in keys:
         brains = catalog(getId = key,
                          portal_type = self.getReferenceTypes()
                          )
         if brains:
             url = brains[0].getURL()
             label = _encode(brains[0].citationLabel, encoding) \
                     or 'no label'
             link = '<a href="%s">%s</a>' % (url, label)
             results.append(link)
         else:
             results.append(key)
     return '; '.join(results)
    def pre_validate(self, REQUEST, errors):

        reference_catalog = getToolByName(self, 'reference_catalog')

        authors = REQUEST.get('authors',[])
        result = []
        references=[]

        # deduce author names from member reference
        for author in authors:
            reference = author.get('reference', None)
            if reference == 'None':
                author.reference = ''
            elif reference:

                reference_object = reference_catalog.lookupObject(reference)
                if reference_object.isTranslatable():
                    references.append(reference_object.getCanonical().UID())
                    reference_object = reference_object.getCanonical()
                else:
                    references.append(reference)

                # obtain author data from privileged fields in the reference object
                data = self.getAuthorDataFromMember(reference_object)
                only_requested_data = not not author.get('lastname', None)
                if data:
                    for key in [ key for key in data.keys() if key not in ('middlename',) ]:
                        if not only_requested_data or (author.get(key, None) == '?'):
                            if key == 'firstname':
                                author.firstnames = _decode(data['firstname']) + ' ' + _decode(data['middlename'])
                            else:
                                exec('author.%s = _decode(data[key])' % key)

                # if this doesn't help, we try to derive the author name from the Title of reference... (YUK)
                if not author.get('lastname', None):
                    firstnames, lastname = self._name_from_reference(reference)
                    author.firstnames = firstnames
                    author.lastname = lastname

            if ''.join([_decode(_encode(val)) for val in dict(author).values()]).strip():
                result.append(author)

        REQUEST.form['authors'] = result[:]
        REQUEST.form['member_publication_authors'] = references[:]
Example #11
0
    def convertLaTeX2Unicode(self, source):
        # This function converts LaTeX entities to Unicode characters.
        # This function was improved by JB, 2011-06-05.
        #
        # Changes:
        #  1. Rather than:
        #     for k in d.keys():
        #       s = s.replace(k, d[k])
        #    use:
        #     for k, v in d.items():
        #       s = s.replace(k, v)
        #
        #  2. Rather than (decode -> replace -> encode) *every* *iteration*,
        #    decode once at the beginning, and encode again once at the end.
        #
        # This yielded an 8x speedup for the function (profiled by cProfile):
        # 9.615 CPU secs per call on the ~1M benchmark BibTeX file (19.230 secs total),
        # down to 1.198 CPU secs/call (2.397 secs total).
        #
        # Next, the functions '_build_mapping_tree' and '_replace_using_mapping_tree'
        # were used to optimise-dowm the number of 'string.replace' calls.
        # (For more info, read the comment immediately above '_build_mapping_tree'.)
        #
        # This optimisation yielded a 4.5x speedup for this function:
        # 1.198 CPU secs per call (on the ~1M benchmark BibTeX file) down to 0.259.
        # The total speedup of this function was now >35x: 9.615 down to 0.259.

        source = _decode(source)
        #for latex_entity, unicode_code_point in _latex2utf8enc_mapping_simple.items():
        #source = source.replace(latex_entity, unicode_code_point)
        source = _replace_using_mapping_tree(
            source, LATEX2UTF8ENC_MAPPING_SIMPLE_TREE)

        #for latex_entity, unicode_code_point in _latex2utf8enc_mapping.items():
        #source = source.replace(latex_entity, unicode_code_point)
        source = _replace_using_mapping_tree(source,
                                             LATEX2UTF8ENC_MAPPING_TREE)
        source = _encode(source)

        return source
Example #12
0
    def convertLaTeX2Unicode(self, source):
        # This function converts LaTeX entities to Unicode characters.
        # This function was improved by JB, 2011-06-05.
        #
        # Changes:
        #  1. Rather than:
        #     for k in d.keys():
        #       s = s.replace(k, d[k])
        #    use:
        #     for k, v in d.items():
        #       s = s.replace(k, v)
        #
        #  2. Rather than (decode -> replace -> encode) *every* *iteration*,
        #    decode once at the beginning, and encode again once at the end.
        #
        # This yielded an 8x speedup for the function (profiled by cProfile):
        # 9.615 CPU secs per call on the ~1M benchmark BibTeX file (19.230 secs total),
        # down to 1.198 CPU secs/call (2.397 secs total).
        #
        # Next, the functions '_build_mapping_tree' and '_replace_using_mapping_tree'
        # were used to optimise-dowm the number of 'string.replace' calls.
        # (For more info, read the comment immediately above '_build_mapping_tree'.)
        #
        # This optimisation yielded a 4.5x speedup for this function:
        # 1.198 CPU secs per call (on the ~1M benchmark BibTeX file) down to 0.259.
        # The total speedup of this function was now >35x: 9.615 down to 0.259.

        source = _decode(source)
        #for latex_entity, unicode_code_point in _latex2utf8enc_mapping_simple.items():
            #source = source.replace(latex_entity, unicode_code_point)
        source = _replace_using_mapping_tree(source, LATEX2UTF8ENC_MAPPING_SIMPLE_TREE)

        #for latex_entity, unicode_code_point in _latex2utf8enc_mapping.items():
            #source = source.replace(latex_entity, unicode_code_point)
        source = _replace_using_mapping_tree(source, LATEX2UTF8ENC_MAPPING_TREE)
        source = _encode(source)

        return source
def log(msg, severity=INFO, detail='', error=None):
    # XXX is this used somewhere?
    LOG('CMFBibliographyAT', severity, _encode(msg), _encode(detail), error)
Example #14
0
    def render(self, data, source_format, target_format, output_encoding=None):
        """ Transform data from 'source_format'
            to 'target_format'

            We have nothing, so we do nothing :)
            >>> if _getCommand('bib', 'end', None) is not None:
            ...     result = ExternalTransformUtility().render('', 'bib', 'end')
            ...     assert result == ''

            >>> data = '''
            ...   @Book{bookreference.2008-02-04.7570607450,
            ...     author = {Werner, kla{\"u}s},
            ...     title = {H{\"a}rry Motter},
            ...     year = {1980},
            ...     publisher = {Diogenes}
            ...   }'''

            This should work. (If external bibutils are installed!)
            We transform the `bib`-format into the `end`-format
            >>> if _hasCommands(commands.get('bib2end')):
            ...     result = ExternalTransformUtility().render(data, 'bib', 'end')
            ...     # We need to take care of any stray Windows carriage returns.
            ...     result = result.replace('\r', '')
            ...     assert '''
            ... %0 Book
            ... %A Werner, kla"us title =. H"arry Motter
            ... %D 1980
            ... %I Diogenes
            ... %F bookreference.2008-02-04.7570607450 '''.strip() in result

            This one is not allowed. No valid transformer exists for
            `foo` and `bar` (foo2bar)
            >>> ExternalTransformUtility().render(data, 'foo', 'bar')
            Traceback (most recent call last):
            ...
            ValueError: No transformation from 'foo' to 'bar' found.

        """
        command = _getCommand(source_format, target_format)
        if not command:
            return ''

        orig_path = os.environ['PATH']
        if os.environ.has_key('BIBUTILS_PATH'):
            os.environ['PATH'] = os.pathsep.join([orig_path,
                                                  os.environ['BIBUTILS_PATH']])

        ts = time.time()

        # This is a stinking workaround  with hanging subprocesses on Linux.
        # We had the case where "end2xml | xml2bib " was just hanging
        # while reading the results from the output pipeline. So we fall
        # back in a safe way to os.system() on Linux

        if sys.platform == 'linux2':

            input_filename = tempfile.mktemp()
            error_filename = tempfile.mktemp()
            output_filename = tempfile.mktemp()
            file(input_filename, 'wb').write(_encode(data))
            command = 'cat "%s" | %s 2>"%s" 1>"%s"' % (input_filename, command, error_filename, output_filename)
            st = os.system(command)
            error = file(output_filename, 'rb').read()
            result = file(output_filename, 'rb').read()
            os.unlink(input_filename)
            os.unlink(output_filename)
            os.unlink(error_filename)

        else:
            ts = time.time()
            log.info(command)
            p = Popen(command, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE,
                      close_fds=False)
            (fi, fo, fe) = (p.stdin, p.stdout, p.stderr)
            fi.write(_encode(data))
            fi.close()
            result = fo.read()
            fo.close()
            error = fe.read()
            fe.close()

        log.info('Execution time: %2.2f seconds' % (time.time() - ts))

        if error:
            # command could be like 'ris2xml', or 'ris2xml | xml2bib'. It
            # seems unlikely, but we'll code for an arbitrary number of
            # pipes...
            command_list = command.split(' | ')
            for each in command_list:
                if each in error and not result:
                    log.error("'%s' not found. Make sure 'bibutils' is installed.",
                              command)
        if output_encoding is None:
            return result
        else:
            return _convertToOutputEncoding(result,
                                            output_encoding=output_encoding)
        os.environ['PATH'] = orig_path
def log(msg, severity=INFO, detail='', error=None):
    # XXX is this used somewhere?
    LOG('CMFBibliographyAT', severity, _encode(msg), _encode(detail), error)
Example #16
0
class ImportView(BrowserView):

    template = ViewPageTemplateFile('import.pt')

    def __call__(self):
        self.processed = False
        self.errors = {}
        start_time = self.context.ZopeTime().timeTime()
        if not self.request.form.get('form.submitted'):
            return self.template()

        # fetch value from request
        input_encoding = self.request.form.get('input_encoding', 'utf-8')
        span_of_search = self.request.form.get('span_of_search', None)
        format = self.request.form.get('format', 'bib')

        # process source
        filename = None
        source = self.request.form.get('up_text')
        if not source:
            upfile = self.request.form.get('file')
            filename = upfile and getattr(upfile, 'filename', None)
            if not filename:
                self.errors['file'] = _(u'You must import a file or enter a'
                                        ' text.')
                addStatusMessage(self.request,
                                 _(u"Please correct the indicated errors."))
                return self.template()
            source = upfile.read()
            if not source or not isinstance(source, basestring):
                msg = "Could not read the file '%s'." % filename
                self.errors['file'] = msg
                addStatusMessage(self.request, _(unicode(msg)))
                return self.template()

        # skip DOS line breaks
        source = source.replace('\r', '')

        # get parsed entries from the Bibliography Tool
        bibtool = getToolByName(self.context, 'portal_bibliography')
        try:
            entries = bibtool.getEntries(source,
                                         format,
                                         filename,
                                         input_encoding=input_encoding)
        except ImportParseError:
            msg = """%s Parser's 'checkFormat' and guessing the format""" \
                  """ from the file name '%s' failed.""" % (format,
                                                            filename)
            self.errors['format'] = msg
            addStatusMessage(self.request, _(unicode(msg)))
            return self.template()
        except UnicodeError:
            msg = """The choosen input encoding does not match the real  """ \
                  """encoding of your input data in order to convert it to """\
                  """unicode internally."""
            self.errors['input_encoding'] = msg
            addStatusMessage(self.request, _(unicode(msg)))
            return self.template()
        except RuntimeError, e:
            addStatusMessage(self.request, _(unicode(e)))
            return self.template()

        # debug message if entries is not a python list
        if not entries or not isinstance(entries, (list, tuple)):
            msg = "There must be something wrong with the parser"
            addStatusMessage(self.request, _(unicode(msg)))
            return self.template()

        # start building the report
        mtool = getToolByName(self.context, 'portal_membership')
        member = mtool.getAuthenticatedMember()
        fullname = member.getProperty('fullname', None)
        if fullname:
            username = '******' % (_encode(fullname), _encode(member.getId()))
        else:
            username = _encode(member.getId())
        tmp_report = '[%s] Imported by %s' % (self.context.ZopeTime(),
                                              username)
        if filename is not None:
            tmp_report += ' from file %s' % _encode(filename)
        tmp_report += ':\n\n'

        # process import for each entry
        processedEntries = 0
        importErrors = 0

        logger.info('Start import of %s raw entries.' % len(entries))
        counter = 0

        for entry in entries:
            counter += 1
            count = '#%05i: ' % counter
            logger.info(count + 'processing entry')
            # Workaround for #36 where an entry represents
            # an error from parser instead of a dict containing
            # importable data
            if isinstance(entry, basestring):
                msg = 'Entry could not be parsed! %s' % _encode(entry)
                upload = (msg, 'error')
                logger.error(count + msg)
            elif entry.get('title'):
                logger.info(count + 'Normal processing')
                upload = self.context.processSingleImport(
                    entry, span_of_search=span_of_search)
            else:
                formated = '; '.join([
                    '%s=%s' % (key, entry[key]) for key in sorted(entry.keys())
                    if key == key.lower()
                ])
                upload = ('Found entry without title: %s\n' % formated,
                          'error')
                logger.error(count + upload[0])
            if upload[1] == 'ok':
                processedEntries += 1
            else:
                importErrors += 1
            state, msg = _encode(upload[1].upper()), _encode(upload[0])
            tmp_report += '%s: %s\n' % (state, msg)
        self.context.logImportReport(tmp_report)
        self.processed = True
        # set the portal status message up
        msg = "Processed %i entries. There were %i errors. "\
              "Import processed in %f seconds. See import report below." \
              % (processedEntries, importErrors,
                 self.context.ZopeTime().timeTime() - start_time)
        logger.info(msg)
        addStatusMessage(self.request, _(unicode(msg)))
        return self.template()