def convertLaTeX2Unicode(self, source): for latex_entity in _latex2utf8enc_mapping_simple.keys(): source = _encode(_decode(source).replace(latex_entity, _latex2utf8enc_mapping_simple[latex_entity])) for latex_entity in _latex2utf8enc_mapping.keys(): source = _encode(_decode(source).replace(latex_entity, _latex2utf8enc_mapping[latex_entity])) return source
def convertLaTeX2Unicode(self, source): for latex_entity in _latex2utf8enc_mapping_simple.keys(): source = _encode( _decode(source).replace( latex_entity, _latex2utf8enc_mapping_simple[latex_entity])) for latex_entity in _latex2utf8enc_mapping.keys(): source = _encode( _decode(source).replace(latex_entity, _latex2utf8enc_mapping[latex_entity])) return source
def _bibitem_links(self, hit): key = hit.group(1).strip() catalog = getToolByName(self, 'portal_catalog') brains = catalog(getId=key, portal_type=self.getReferenceTypes()) if brains: brain = brains[0] encoding = self.getProperty('default_charset') or 'utf-8' authors = _encode(brain.Authors, encoding) year = _encode(brain.publication_year, encoding) title = _encode(brain.Title, encoding) source = _encode(brain.Source, encoding) url = brain.getURL() link = '%s (%s) <a href="%s">%s</a>, %s' % \ (authors, year, url, title, source) return link else: return key
def _bibitem_links(self, hit): key = hit.group(1).strip() catalog = getToolByName(self, 'portal_catalog') brains = catalog(getId = key, portal_type = self.getReferenceTypes() ) if brains: brain = brains[0] encoding = self.getProperty('default_charset') or 'utf-8' authors = _encode(brain.Authors, encoding) year = _encode(brain.publication_year, encoding) title = _encode(brain.Title, encoding) source = _encode(brain.Source, encoding) url = brain.getURL() link = '%s (%s) <a href="%s">%s</a>, %s' % \ (authors, year, url, title, source) return link else: return key
def pre_validate(self, REQUEST, errors): reference_catalog = getToolByName(self, 'reference_catalog') authors = REQUEST.get('authors', []) result = [] references = [] # deduce author names from member reference for author in authors: reference = author.get('reference', None) if reference == 'None': author.reference = '' elif reference: reference_object = reference_catalog.lookupObject(reference) if reference_object.isTranslatable(): references.append(reference_object.getCanonical().UID()) reference_object = reference_object.getCanonical() else: references.append(reference) # obtain author data from privileged fields in the reference object data = self.getAuthorDataFromMember(reference_object) only_requested_data = not not author.get('lastname', None) if data: for key in [ key for key in data.keys() if key not in ('middlename', ) ]: if not only_requested_data or (author.get(key, None) == '?'): if key == 'firstname': author.firstnames = _decode( data['firstname']) + ' ' + _decode( data['middlename']) else: exec('author.%s = _decode(data[key])' % key) # if this doesn't help, we try to derive the author name from the Title of reference... (YUK) if not author.get('lastname', None): firstnames, lastname = self._name_from_reference(reference) author.firstnames = firstnames author.lastname = lastname if ''.join([ _decode(_encode(val)) for val in dict(author).values() ]).strip(): result.append(author) REQUEST.form['authors'] = result[:] REQUEST.form['member_publication_authors'] = references[:]
def getCookedBibRefId(self, ref, use_pid_on_import=True, **kwargs): """ cook id for ref dict or object, ref dict / object may be a single reference only """ isReferenceObject = (type(ref) != type({})) if isReferenceObject: ref = self._object2ref(ref) new_id = 'nobody1000' if use_pid_on_import and ref.get('pid'): new_id = ref['pid'] else: new_id = self._cookIdCore(ref, new_id=new_id) return _encode(_decode(self._cleanId(new_id)))
def _inline_links(self, hit): keys = [k.strip() for k in hit.group(1).split(',')] results = [] catalog = getToolByName(self, 'portal_catalog') encoding = self.getProperty('default_charset') or 'utf-8' for key in keys: brains = catalog(getId=key, portal_type=self.getReferenceTypes()) if brains: url = brains[0].getURL() label = _encode(brains[0].citationLabel, encoding) \ or 'no label' link = '<a href="%s">%s</a>' % (url, label) results.append(link) else: results.append(key) return '; '.join(results)
def _inline_links(self, hit): keys = [k.strip() for k in hit.group(1).split(',')] results = [] catalog = getToolByName(self, 'portal_catalog') encoding = self.getProperty('default_charset') or 'utf-8' for key in keys: brains = catalog(getId = key, portal_type = self.getReferenceTypes() ) if brains: url = brains[0].getURL() label = _encode(brains[0].citationLabel, encoding) \ or 'no label' link = '<a href="%s">%s</a>' % (url, label) results.append(link) else: results.append(key) return '; '.join(results)
def pre_validate(self, REQUEST, errors): reference_catalog = getToolByName(self, 'reference_catalog') authors = REQUEST.get('authors',[]) result = [] references=[] # deduce author names from member reference for author in authors: reference = author.get('reference', None) if reference == 'None': author.reference = '' elif reference: reference_object = reference_catalog.lookupObject(reference) if reference_object.isTranslatable(): references.append(reference_object.getCanonical().UID()) reference_object = reference_object.getCanonical() else: references.append(reference) # obtain author data from privileged fields in the reference object data = self.getAuthorDataFromMember(reference_object) only_requested_data = not not author.get('lastname', None) if data: for key in [ key for key in data.keys() if key not in ('middlename',) ]: if not only_requested_data or (author.get(key, None) == '?'): if key == 'firstname': author.firstnames = _decode(data['firstname']) + ' ' + _decode(data['middlename']) else: exec('author.%s = _decode(data[key])' % key) # if this doesn't help, we try to derive the author name from the Title of reference... (YUK) if not author.get('lastname', None): firstnames, lastname = self._name_from_reference(reference) author.firstnames = firstnames author.lastname = lastname if ''.join([_decode(_encode(val)) for val in dict(author).values()]).strip(): result.append(author) REQUEST.form['authors'] = result[:] REQUEST.form['member_publication_authors'] = references[:]
def convertLaTeX2Unicode(self, source): # This function converts LaTeX entities to Unicode characters. # This function was improved by JB, 2011-06-05. # # Changes: # 1. Rather than: # for k in d.keys(): # s = s.replace(k, d[k]) # use: # for k, v in d.items(): # s = s.replace(k, v) # # 2. Rather than (decode -> replace -> encode) *every* *iteration*, # decode once at the beginning, and encode again once at the end. # # This yielded an 8x speedup for the function (profiled by cProfile): # 9.615 CPU secs per call on the ~1M benchmark BibTeX file (19.230 secs total), # down to 1.198 CPU secs/call (2.397 secs total). # # Next, the functions '_build_mapping_tree' and '_replace_using_mapping_tree' # were used to optimise-dowm the number of 'string.replace' calls. # (For more info, read the comment immediately above '_build_mapping_tree'.) # # This optimisation yielded a 4.5x speedup for this function: # 1.198 CPU secs per call (on the ~1M benchmark BibTeX file) down to 0.259. # The total speedup of this function was now >35x: 9.615 down to 0.259. source = _decode(source) #for latex_entity, unicode_code_point in _latex2utf8enc_mapping_simple.items(): #source = source.replace(latex_entity, unicode_code_point) source = _replace_using_mapping_tree( source, LATEX2UTF8ENC_MAPPING_SIMPLE_TREE) #for latex_entity, unicode_code_point in _latex2utf8enc_mapping.items(): #source = source.replace(latex_entity, unicode_code_point) source = _replace_using_mapping_tree(source, LATEX2UTF8ENC_MAPPING_TREE) source = _encode(source) return source
def convertLaTeX2Unicode(self, source): # This function converts LaTeX entities to Unicode characters. # This function was improved by JB, 2011-06-05. # # Changes: # 1. Rather than: # for k in d.keys(): # s = s.replace(k, d[k]) # use: # for k, v in d.items(): # s = s.replace(k, v) # # 2. Rather than (decode -> replace -> encode) *every* *iteration*, # decode once at the beginning, and encode again once at the end. # # This yielded an 8x speedup for the function (profiled by cProfile): # 9.615 CPU secs per call on the ~1M benchmark BibTeX file (19.230 secs total), # down to 1.198 CPU secs/call (2.397 secs total). # # Next, the functions '_build_mapping_tree' and '_replace_using_mapping_tree' # were used to optimise-dowm the number of 'string.replace' calls. # (For more info, read the comment immediately above '_build_mapping_tree'.) # # This optimisation yielded a 4.5x speedup for this function: # 1.198 CPU secs per call (on the ~1M benchmark BibTeX file) down to 0.259. # The total speedup of this function was now >35x: 9.615 down to 0.259. source = _decode(source) #for latex_entity, unicode_code_point in _latex2utf8enc_mapping_simple.items(): #source = source.replace(latex_entity, unicode_code_point) source = _replace_using_mapping_tree(source, LATEX2UTF8ENC_MAPPING_SIMPLE_TREE) #for latex_entity, unicode_code_point in _latex2utf8enc_mapping.items(): #source = source.replace(latex_entity, unicode_code_point) source = _replace_using_mapping_tree(source, LATEX2UTF8ENC_MAPPING_TREE) source = _encode(source) return source
def log(msg, severity=INFO, detail='', error=None): # XXX is this used somewhere? LOG('CMFBibliographyAT', severity, _encode(msg), _encode(detail), error)
def render(self, data, source_format, target_format, output_encoding=None): """ Transform data from 'source_format' to 'target_format' We have nothing, so we do nothing :) >>> if _getCommand('bib', 'end', None) is not None: ... result = ExternalTransformUtility().render('', 'bib', 'end') ... assert result == '' >>> data = ''' ... @Book{bookreference.2008-02-04.7570607450, ... author = {Werner, kla{\"u}s}, ... title = {H{\"a}rry Motter}, ... year = {1980}, ... publisher = {Diogenes} ... }''' This should work. (If external bibutils are installed!) We transform the `bib`-format into the `end`-format >>> if _hasCommands(commands.get('bib2end')): ... result = ExternalTransformUtility().render(data, 'bib', 'end') ... # We need to take care of any stray Windows carriage returns. ... result = result.replace('\r', '') ... assert ''' ... %0 Book ... %A Werner, kla"us title =. H"arry Motter ... %D 1980 ... %I Diogenes ... %F bookreference.2008-02-04.7570607450 '''.strip() in result This one is not allowed. No valid transformer exists for `foo` and `bar` (foo2bar) >>> ExternalTransformUtility().render(data, 'foo', 'bar') Traceback (most recent call last): ... ValueError: No transformation from 'foo' to 'bar' found. """ command = _getCommand(source_format, target_format) if not command: return '' orig_path = os.environ['PATH'] if os.environ.has_key('BIBUTILS_PATH'): os.environ['PATH'] = os.pathsep.join([orig_path, os.environ['BIBUTILS_PATH']]) ts = time.time() # This is a stinking workaround with hanging subprocesses on Linux. # We had the case where "end2xml | xml2bib " was just hanging # while reading the results from the output pipeline. So we fall # back in a safe way to os.system() on Linux if sys.platform == 'linux2': input_filename = tempfile.mktemp() error_filename = tempfile.mktemp() output_filename = tempfile.mktemp() file(input_filename, 'wb').write(_encode(data)) command = 'cat "%s" | %s 2>"%s" 1>"%s"' % (input_filename, command, error_filename, output_filename) st = os.system(command) error = file(output_filename, 'rb').read() result = file(output_filename, 'rb').read() os.unlink(input_filename) os.unlink(output_filename) os.unlink(error_filename) else: ts = time.time() log.info(command) p = Popen(command, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False) (fi, fo, fe) = (p.stdin, p.stdout, p.stderr) fi.write(_encode(data)) fi.close() result = fo.read() fo.close() error = fe.read() fe.close() log.info('Execution time: %2.2f seconds' % (time.time() - ts)) if error: # command could be like 'ris2xml', or 'ris2xml | xml2bib'. It # seems unlikely, but we'll code for an arbitrary number of # pipes... command_list = command.split(' | ') for each in command_list: if each in error and not result: log.error("'%s' not found. Make sure 'bibutils' is installed.", command) if output_encoding is None: return result else: return _convertToOutputEncoding(result, output_encoding=output_encoding) os.environ['PATH'] = orig_path
class ImportView(BrowserView): template = ViewPageTemplateFile('import.pt') def __call__(self): self.processed = False self.errors = {} start_time = self.context.ZopeTime().timeTime() if not self.request.form.get('form.submitted'): return self.template() # fetch value from request input_encoding = self.request.form.get('input_encoding', 'utf-8') span_of_search = self.request.form.get('span_of_search', None) format = self.request.form.get('format', 'bib') # process source filename = None source = self.request.form.get('up_text') if not source: upfile = self.request.form.get('file') filename = upfile and getattr(upfile, 'filename', None) if not filename: self.errors['file'] = _(u'You must import a file or enter a' ' text.') addStatusMessage(self.request, _(u"Please correct the indicated errors.")) return self.template() source = upfile.read() if not source or not isinstance(source, basestring): msg = "Could not read the file '%s'." % filename self.errors['file'] = msg addStatusMessage(self.request, _(unicode(msg))) return self.template() # skip DOS line breaks source = source.replace('\r', '') # get parsed entries from the Bibliography Tool bibtool = getToolByName(self.context, 'portal_bibliography') try: entries = bibtool.getEntries(source, format, filename, input_encoding=input_encoding) except ImportParseError: msg = """%s Parser's 'checkFormat' and guessing the format""" \ """ from the file name '%s' failed.""" % (format, filename) self.errors['format'] = msg addStatusMessage(self.request, _(unicode(msg))) return self.template() except UnicodeError: msg = """The choosen input encoding does not match the real """ \ """encoding of your input data in order to convert it to """\ """unicode internally.""" self.errors['input_encoding'] = msg addStatusMessage(self.request, _(unicode(msg))) return self.template() except RuntimeError, e: addStatusMessage(self.request, _(unicode(e))) return self.template() # debug message if entries is not a python list if not entries or not isinstance(entries, (list, tuple)): msg = "There must be something wrong with the parser" addStatusMessage(self.request, _(unicode(msg))) return self.template() # start building the report mtool = getToolByName(self.context, 'portal_membership') member = mtool.getAuthenticatedMember() fullname = member.getProperty('fullname', None) if fullname: username = '******' % (_encode(fullname), _encode(member.getId())) else: username = _encode(member.getId()) tmp_report = '[%s] Imported by %s' % (self.context.ZopeTime(), username) if filename is not None: tmp_report += ' from file %s' % _encode(filename) tmp_report += ':\n\n' # process import for each entry processedEntries = 0 importErrors = 0 logger.info('Start import of %s raw entries.' % len(entries)) counter = 0 for entry in entries: counter += 1 count = '#%05i: ' % counter logger.info(count + 'processing entry') # Workaround for #36 where an entry represents # an error from parser instead of a dict containing # importable data if isinstance(entry, basestring): msg = 'Entry could not be parsed! %s' % _encode(entry) upload = (msg, 'error') logger.error(count + msg) elif entry.get('title'): logger.info(count + 'Normal processing') upload = self.context.processSingleImport( entry, span_of_search=span_of_search) else: formated = '; '.join([ '%s=%s' % (key, entry[key]) for key in sorted(entry.keys()) if key == key.lower() ]) upload = ('Found entry without title: %s\n' % formated, 'error') logger.error(count + upload[0]) if upload[1] == 'ok': processedEntries += 1 else: importErrors += 1 state, msg = _encode(upload[1].upper()), _encode(upload[0]) tmp_report += '%s: %s\n' % (state, msg) self.context.logImportReport(tmp_report) self.processed = True # set the portal status message up msg = "Processed %i entries. There were %i errors. "\ "Import processed in %f seconds. See import report below." \ % (processedEntries, importErrors, self.context.ZopeTime().timeTime() - start_time) logger.info(msg) addStatusMessage(self.request, _(unicode(msg))) return self.template()