def getLicensePortletText(context, uri): """\ Generates a brief terms of use/license description that is based on the input uri for use by the portlet. """ if not uri: return u'The terms of use/license for this work is unspecified.' # get the URI from the object results = getLicenses(context, pmr2_license_uri=uri) license = None if results: # Assume the first one. license = results[0].getObject() if license.portlet_text: pt = getToolByName(input, 'portal_transforms') stream = datastream('license_description') pt.convert('safe_html', license.portlet_text, stream) return stream.getData() # template undefined, we generate one. # XXX ideally this should be a proper template/view. license_template = \ u'The terms of use for this work and/or license this work is under ' \ 'is: <a href=%s>%s</a>.' title = license and license.title or escape(uri) return license_template % (quoteattr(uri), title)
def convert(self, data, cache, **kwargs): kwargs['filename'] = 'input.tiff' tmp_dir, input_file = self.initialize_tmpdir(data, filename='input.tiff') text = None try: output_file_path = os.path.join(tmp_dir, 'output') cmd = self.binary, input_file, output_file_path process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,) stdout = process.communicate()[0] err = process.returncode if err: if err < 0: exit_msg = 'killed with signal %s' % -err else: exit_msg = 'exited with status %s' % err raise EnvironmentError('Command %r %s. Command output:\n%s' % (cmd, exit_msg, stdout)) output_file = open(output_file_path + '.txt', 'r') out = output_file.read() output_file.close() finally: self.cleanDir(tmp_dir) data = datastream('output.txt') data.setData(out) return data
def test_nasty_tags(self): self.browser.open( "%s/@@filter-controlpanel" % self.portal_url) self.assertEqual( self.browser.getControl(name='form.widgets.nasty_tags').value, 'style\nobject\nembed\napplet\nscript\nmeta') self.browser.getControl( name='form.widgets.nasty_tags').value = 'div\na' valid_tags = self.browser.getControl( name='form.widgets.valid_tags').value self.assertTrue(valid_tags.startswith('a\nabbr\nacronym\naddress')) valid_tags = valid_tags.replace('a\n', '') valid_tags = self.browser.getControl( name='form.widgets.valid_tags').value = valid_tags self.browser.getControl('Save').click() self.assertEqual(self.settings.nasty_tags, [u'div', u'a']) self.assertNotIn(u'a', self.settings.valid_tags) # test that <a> is filtered self.assertFalse(self.settings.disable_filtering) good_html = '<p><a href="http://example.com">harmless link</a></p>' ds = datastream('dummy_name') self.assertEqual( self.safe_html.convert(good_html, ds).getData(), '<p/>' )
def do_convert(self, filename=None): if filename is None and exists(self.output + '.nofilename'): output = self.output + '.nofilename' else: output = self.output input = open(self.input) orig = input.read() input.close() data = datastream(self.transform.name()) res_data = self.transform.convert(orig, data, filename=filename) self.assert_(idatastream.isImplementedBy(res_data)) got = res_data.getData() try: output = open(output) except IOError: import sys print >>sys.stderr, 'No output file found.' print >>sys.stderr, 'File %s created, check it !' % self.output output = open(output, 'w') output.write(got) output.close() self.assert_(0) expected = output.read() print self.normalize if self.normalize is not None: expected = self.normalize(expected) got = self.normalize(got) output.close() self.assertEquals(got, expected, '[%s]\n\n!=\n\n[%s]\n\nIN %s(%s)' % ( got, expected, self.transform.name(), self.input)) self.assertEquals(self.subobjects, len(res_data.getSubObjects()), '%s\n\n!=\n\n%s\n\nIN %s(%s)' % ( self.subobjects, len(res_data.getSubObjects()), self.transform.name(), self.input))
def test_stripped_combinations(self): # test a combination that isn't normally filtered self.assertFalse(self.safe_html._config['disable_transform']) html = '<p class="wow">lala</p>' ds = datastream('dummy_name') self.assertEqual( str(self.safe_html.convert(html, ds)), html) # we can set stripped combinations self.browser.open( "%s/@@filter-controlpanel" % self.portal_url) self.browser.getControl( name='form.widgets.stripped_combinations.buttons.add').click() self.browser.getControl( name='form.widgets.stripped_combinations.key.0' ).value = 'mytag1 p' self.browser.getControl( name='form.widgets.stripped_combinations.0' ).value = 'myattr1 class' self.browser.getControl('Save').click() # stripped combinations are stored on the transform self.assertIn( 'mytag1 p', self.safe_html._config['stripped_combinations']) self.assertEqual( 'myattr1 class', self.safe_html._config['stripped_combinations']['mytag1 p']) # test that combination is now filtered self.assertEqual( str(self.safe_html.convert(html, ds)), '<p>lala</p>')
def test_stripped_combinations(self): # test a combination that isn't normally filtered self.assertFalse(self.safe_html._config['disable_transform']) html = '<p class="wow">lala</p>' ds = datastream('dummy_name') self.assertEqual(str(self.safe_html.convert(html, ds)), html) # we can set stripped combinations self.browser.open("%s/@@filter-controlpanel" % self.portal_url) self.browser.getControl( name='form.widgets.stripped_combinations.buttons.add').click() self.browser.getControl( name='form.widgets.stripped_combinations.key.0').value = 'mytag1 p' self.browser.getControl(name='form.widgets.stripped_combinations.0' ).value = 'myattr1 class' self.browser.getControl('Save').click() # stripped combinations are stored on the transform self.assertIn('mytag1 p', self.safe_html._config['stripped_combinations']) self.assertEqual( 'myattr1 class', self.safe_html._config['stripped_combinations']['mytag1 p']) # test that combination is now filtered self.assertEqual(str(self.safe_html.convert(html, ds)), '<p>lala</p>')
def convert(self, data, cache, **kwargs): kwargs['filename'] = 'input.tiff' tmp_dir, input_file = self.initialize_tmpdir(data, filename='input.tiff') text = None try: output_file_path = os.path.join(tmp_dir, 'output') cmd = self.binary, input_file, output_file_path process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) stdout = process.communicate()[0] err = process.returncode if err: if err < 0: exit_msg = 'killed with signal %s' % -err else: exit_msg = 'exited with status %s' % err raise EnvironmentError('Command %r %s. Command output:\n%s' % (cmd, exit_msg, stdout)) output_file = open(output_file_path + '.txt', 'r') out = output_file.read() output_file.close() finally: self.cleanDir(tmp_dir) data = datastream('output.txt') data.setData(out.rstrip()) # .rstrip() also removes page breaks return data
def setUp(self): self.workdir = tempfile.mkdtemp() input_dir = os.path.join(os.path.dirname(__file__), 'input') self.doc_simple1_path = os.path.join(input_dir, 'simpledoc1.doc') self.doc_simple1 = open(self.doc_simple1_path, 'rb').read() self.idata = datastream('mytestdoc.doc') self.idata.setData(self.doc_simple1) self.doc = None # to be set by tests
def test_cnxmlplus2cnxml(self): cnxmlplus = open(os.path.join(dirname, 'test.cnxmlplus')).read() transform = cnxmlplus_to_shortcodecnxml() data = datastream('cnxml') data = transform.convert(cnxmlplus, data) data = data.getData() self.assertTrue(len(data) > 0) self.assertNotEquals(cnxmlplus, data)
def test_get_cache_key_no_key(self): # contexts w/o given key result in ``None`` base = OOOTransformBase() context = FakeContext() stream = datastream('mystream') stream.context = context result = base.get_cache_key('invalid-key', stream) assert result is None
def test_get_cache_key(self): # OOOTransformBase instances can get cache keys base = OOOTransformBase() context = FakeContext() stream = datastream('mystream') stream.context = context result = base.get_cache_key('some_key', stream) assert result == 'foo'
def contents(self): data = self.request['_data'] rev = self.request['rev'] contents = fix_workspace_html_anchors(data['contents'](), self.context.absolute_url(), rev) pt = getToolByName(self.context, 'portal_transforms') stream = datastream('input') pt.convert('safe_html', contents, stream) return stream.getData()
def test_convert(self): # we can convert odt docs to PDF. transform = Odt2Pdf() idatastream = datastream('mystream') transform.convert( open(self.src_path2, 'r').read(), idatastream) self.assertEqual(idatastream.getData()[:7], '%PDF-1.') self.assertEqual(idatastream.getMetadata(), {'cache_key_pdf': None})
def _test_cnxmlplus2html_chain(self): """ This looks more like a functional than unittest since it exercises all the others too. Maybe we should move it. """ cnxmlplus = open(os.path.join(dirname, 'test.cnxmlplus')).read() pt = getToolByName(self.portal, 'portal_transforms') transform = pt['cnxmlplus_to_html_chain'] data = datastream(cnxmlplus) data = transform.convert(cnxmlplus, data)
def test_convert(self): # we can convert odt docs to HTML. transform = Odt2Html() idatastream = datastream('mystream') transform.convert( open(self.src_path2, 'r').read(), idatastream) assert '</span>' in idatastream.getData() self.assertEqual(idatastream.getMetadata(), {'cache_key_html': None})
def test_nasty_tags(self): self.browser.open("%s/@@filter-controlpanel" % self.portal_url) self.browser.getControl(name="form.widgets.nasty_tags").value = "div\r\na" self.browser.getControl("Save").click() # test that <a> is filtered self.assertFalse(self.safe_html._config["disable_transform"]) good_html = '<a href="http://example.com">harmless link</a>' ds = datastream("dummy_name") self.assertEqual(str(self.safe_html.convert(good_html, ds)), "")
def test_nasty_tags(self): self.browser.open("%s/@@filter-controlpanel" % self.portal_url) self.browser.getControl( name='form.widgets.nasty_tags').value = 'div\r\na' self.browser.getControl('Save').click() # test that <a> is filtered self.assertFalse(self.safe_html._config['disable_transform']) good_html = '<p><a href="http://example.com">harmless link</a></p>' ds = datastream('dummy_name') self.assertEqual(str(self.safe_html.convert(good_html, ds)), '')
def test_transform_doesnt_swallow_conflict_errors(self): stream = datastream('dummy') # Patch TikaConverter class to just raise a ConflictError MockConverter = self.mocker.replace('ftw.tika.converter.TikaConverter') self.expect(MockConverter()).result(RaisingConverter(ConflictError)) self.replay() transform = Tika2TextTransform() with self.assertRaises(ConflictError): transform.convert('', stream)
def convertTo(self, target_mimetype, orig, data=None, object=None, **kwargs): assert orig == self.expected, '????' if data is None: data = datastream('test') data.setData(orig) return data
def test_disable_filtering(self): self.browser.open("%s/@@filter-controlpanel" % self.portal_url) self.browser.getControl(name="form.widgets.disable_filtering:list").value = "selected" self.browser.getControl("Save").click() # test that the transform is disabled self.assertEqual(self.safe_html._config["disable_transform"], 1) # anything passes nasty_html = "<script></script>" ds = datastream("dummy_name") self.assertEqual(nasty_html, str(self.safe_html.convert(nasty_html, ds)))
def do_convert(self, filename=None): if filename is None and exists(self.output + '.nofilename'): output = self.output + '.nofilename' else: output = self.output orig = read_file_data(self.input) data = datastream(self.transform.name()) res_data = self.transform.convert(orig, data, filename=filename) self.assertTrue(IDataStream.providedBy(res_data)) got = res_data.getData() self.assertIsInstance(got, self.allowed_types) try: expected = read_file_data(self.output) except IOError: expected = '' import sys print('No output file found.', file=sys.stderr) print('File {0} created, check it !'.format(self.output), file=sys.stderr) with open(output, 'w') as fd: fd.write(got) self.assertTrue(0) if self.normalize is not None: got = self.normalize(got) expected = self.normalize(expected) # show the first character ord table for debugging got_start = got.strip()[:20] expected_start = expected.strip()[:20] msg = 'IN {0}({1}) expected:\n{2}\nbut got:\n{3}'.format( self.transform.name(), self.input, "%s %s" % (expected_start, str([ord(x) for x in expected_start])), "%s %s" % (got_start, str([ord(x) for x in got_start])), ) # compare md5 sum of the whole file content self.assertEqual( got_start, expected_start, msg, ) self.assertEqual( self.subobjects, len(res_data.getSubObjects()), '%s\n\n!=\n\n%s\n\nIN %s(%s)' % ( self.subobjects, len(res_data.getSubObjects()), self.transform.name(), self.input, ))
def test_disable_filtering(self): self.browser.open("%s/@@filter-controlpanel" % self.portal_url) self.browser.getControl( name='form.widgets.disable_filtering:list').value = "selected" self.browser.getControl('Save').click() # test that the transform is disabled self.assertEqual(self.safe_html._config['disable_transform'], 1) # anything passes nasty_html = '<script></script>' ds = datastream('dummy_name') self.assertEqual(nasty_html, str(self.safe_html.convert(nasty_html, ds)))
def do_convert(self, filename=None): if filename is None and exists(self.output + '.nofilename'): output = self.output + '.nofilename' else: output = self.output with open(self.input) as fp: orig = fp.read() data = datastream(self.transform.name()) res_data = self.transform.convert(orig, data, filename=filename) self.assert_(IDataStream.providedBy(res_data)) got = res_data.getData() try: output = open(output) except IOError: import sys print >> sys.stderr, 'No output file found.' print >> sys.stderr, 'File %s created, check it !' % self.output output = open(output, 'w') output.write(got) output.close() self.assert_(0) expected = output.read() if self.normalize is not None: expected = self.normalize(expected) got = self.normalize(got) output.close() got_start = got.strip()[:20] expected_start = expected.strip()[:20] msg = 'IN {0}({1}) expected:\n{2}\nbut got:\n{3}'.format( self.transform.name(), self.input, str([ord(x) for x in expected_start]), str([ord(x) for x in got_start]), ) self.assertEqual( got_start, expected_start, msg ) self.assertEqual( self.subobjects, len(res_data.getSubObjects()), '%s\n\n!=\n\n%s\n\nIN %s(%s)' % ( self.subobjects, len(res_data.getSubObjects()), self.transform.name(), self.input ) )
def test_convert_with_cachekey(self): # we retrieve cached files if cache_key is set and valid cache_key = self.register_fakedoc_in_cache( src=self.src_path1, options=OPTIONS_HTML) transform = Odt2Html(cache_dir=self.cachedir) idatastream = datastream('mystream') # set cache key for HTML idatastream.context = FakeContext(html_key=cache_key) transform.convert( # We give a different source than what was cached as source. # This way we can be sure that if we get the fake result, it was # really retrieved via cache key lookup and not via source # lookup. open(self.src_path2, 'r').read(), idatastream) assert idatastream.getData() == 'A fake result.'
def convert(markup): """Use this function to transform markup from trix to markup that can be processed by sablon. This converter is expected to do nothing since trix markup is already valid for sablon. It is just a safeguard against malicious markup injection or against changes in trix. Thus we also log to sentry whenever we actually have to convert markup. """ data = _transform.convert(markup, data=datastream('trix_to_sablon')) converted = data.getData() if converted != markup: _log_unexpected_conversion_to_sentry(converted, markup) return converted
def test_nasty_tags(self): self.browser.open("%s/@@filter-controlpanel" % self.portal_url) self.browser.getControl( name='form.widgets.nasty_tags').value = 'div\r\na' valid_tags = self.browser.getControl( name='form.widgets.valid_tags').value valid_tags = valid_tags.replace('a\r\n', '') valid_tags = self.browser.getControl( name='form.widgets.valid_tags').value = valid_tags self.browser.getControl('Save').click() # test that <a> is filtered self.assertFalse(self.settings.disable_filtering) good_html = '<p><a href="http://example.com">harmless link</a></p>' ds = datastream('dummy_name') self.assertEqual(str(self.safe_html.convert(good_html, ds)), '<p/>')
def generate(self): reader = libsbml.SBMLReader() doc = reader.readSBMLFromString(self.input) model = doc.getModel() pt = getToolByName(self.context, 'portal_transforms', None) if pt: stream = datastream('license_description') pt.convert('safe_html', model.getNotesString(), stream) text = stream.getData().decode('utf8', 'ignore') else: # XXX should warn unsafe text = model.getNotesString() return ( ('text', text,), )
def test_disable_filtering(self): self.browser.open( "%s/@@filter-controlpanel" % self.portal_url) self.browser.getControl( name='form.widgets.disable_filtering:list').value = "selected" self.browser.getControl('Save').click() # test that the transform is disabled self.assertEqual( self.settings.disable_filtering, 1) # anything passes nasty_html = '<script></script>' ds = datastream('dummy_name') self.assertEqual( nasty_html, str(self.safe_html.convert(nasty_html, ds)) )
def do_convert(self, filename=None): if filename is None and exists(self.output + ".nofilename"): output = self.output + ".nofilename" else: output = self.output input = open(self.input) orig = input.read() input.close() data = datastream(self.transform.name()) res_data = self.transform.convert(orig, data, filename=filename) self.assert_(IDataStream.providedBy(res_data)) got = res_data.getData() try: output = open(output) except IOError: import sys print >> sys.stderr, "No output file found." print >> sys.stderr, "File %s created, check it !" % self.output output = open(output, "w") output.write(got) output.close() self.assert_(0) expected = output.read() if self.normalize is not None: expected = self.normalize(expected) got = self.normalize(got) output.close() got_start = got.strip()[:20] expected_start = expected.strip()[:20] self.assertEquals( got_start, expected_start, "[%s]\n\n!=\n\n[%s]\n\nIN %s(%s)" % (got_start, expected_start, self.transform.name(), self.input), ) self.assertEquals( self.subobjects, len(res_data.getSubObjects()), "%s\n\n!=\n\n%s\n\nIN %s(%s)" % (self.subobjects, len(res_data.getSubObjects()), self.transform.name(), self.input), )
def test_stripped_combinations(self): # test a combination that isn't normally filtered self.assertFalse(self.safe_html._config["disable_transform"]) html = '<p class="wow">lala</p>' ds = datastream("dummy_name") self.assertEqual(str(self.safe_html.convert(html, ds)), html) # we can set stripped combinations self.browser.open("%s/@@filter-controlpanel" % self.portal_url) self.browser.getControl(name="form.widgets.stripped_combinations.buttons.add").click() self.browser.getControl(name="form.widgets.stripped_combinations.key.0").value = "mytag1 p" self.browser.getControl(name="form.widgets.stripped_combinations.0").value = "myattr1 class" self.browser.getControl("Save").click() # stripped combinations are stored on the transform self.assertIn("mytag1 p", self.safe_html._config["stripped_combinations"]) self.assertEqual("myattr1 class", self.safe_html._config["stripped_combinations"]["mytag1 p"]) # test that combination is now filtered self.assertEqual(str(self.safe_html.convert(html, ds)), "<p>lala</p>")
def convert(self, data, cache, **kwargs): kwargs['filename'] = 'input.tiff' tmp_dir, input_file = self.initialize_tmpdir(data, filename='input.tiff') text = None try: command = self.binary output_file_path = os.path.join(tmp_dir, 'output') cmd = '%s %s %s' % ( self.binary, input_file, output_file_path) os.system(cmd) output_file = open(output_file_path + '.txt', 'r') out = output_file.read() output_file.close() finally: self.cleanDir(tmp_dir) data = datastream('output.txt') data.setData(out) return data
def do_convert(self, filename=None): if filename is None and exists(self.output + '.nofilename'): output = self.output + '.nofilename' else: output = self.output input = open(self.input) orig = input.read() input.close() data = datastream(self.transform.name()) res_data = self.transform.convert(orig, data, filename=filename) self.assert_(IDataStream.providedBy(res_data)) got = res_data.getData() try: output = open(output) except IOError: import sys print >> sys.stderr, 'No output file found.' print >> sys.stderr, 'File %s created, check it !' % self.output output = open(output, 'w') output.write(got) output.close() self.assert_(0) expected = output.read() if self.normalize is not None: expected = self.normalize(expected) got = self.normalize(got) output.close() got_start = got.strip()[:30] expected_start = expected.strip()[:30] self.assertEquals( got_start, expected_start, '[%s]\n\n!=\n\n[%s]\n\nIN %s(%s)' % (got_start, expected_start, self.transform.name(), self.input)) self.assertEquals( self.subobjects, len(res_data.getSubObjects()), '%s\n\n!=\n\n%s\n\nIN %s(%s)' % (self.subobjects, len( res_data.getSubObjects()), self.transform.name(), self.input))
def test_shortcodecnxml2shortcodehtml(self): cnxml = open(os.path.join(dirname, 'test.cnxml')).read() transform = shortcodecnxml_to_shortcodehtml() data = datastream('cnxml') data = transform.convert(cnxml, data)
def convert(self, input): pt = getToolByName(self.context, 'portal_transforms') stream = datastream('pt_annotation') pt.convert(self.transform, input, stream) return stream.getData()
def texSlideSection(self, section): type = self.slideSectionType(section) # Neither image or text, ignore section if not section.image_code and not section.text: return if type == 'explanation': # Explanations are only useful as footnotes to slides return if type == 'main': # Write out approximation to slide self.writeTeX(['\\fbox{']); # Main section text goes into a slide minipage if section.text and section.text.raw: self.writeTeX([ '\\begin{minipage}{' + ('0.58' if section.image_code else '0.97') + '\\textwidth}', section.text, '\\end{minipage}', ]) # If there is an image, write it into a minipage if section.image_code and section.image_code.raw: tf = ScriptToTeX() data = tf.convert( section.image_code.raw, datastream("scriptToImage"), mimetype='text/x-uri' if section.image_code.mimeType == 'text/x-url' else section.image_code.mimeType) self.writeTeX([ '\\hspace{0.5mm}', '\\begin{minipage}{' + ('0.38' if type != 'main' or section.text else '0.97') + '\\textwidth}', data.getData(), '\scriptsize ' + section.image_caption if section.image_caption else '', '\\end{minipage}', ]) self.writeTeX(['}']); else: self.writeTeX(['\\subsubsection{' + section.title + '}']) # Write out floating image if section.image_code and section.image_code.raw: tf = ScriptToTeX() data = tf.convert( section.image_code.raw, datastream("scriptToImage"), mimetype='text/x-uri' if section.image_code.mimeType == 'text/x-url' else section.image_code.mimeType) self.writeTeX([ '\\begin{figure}[h]', '\\hspace{0.5mm}', '\\begin{minipage}{0.48\\textwidth}', data.getData(), '\\caption{%s}' % section.image_caption if section.image_caption else '', '\\end{minipage}', '\\end{figure}', ]) # Rest of text goes in verbatim self.writeTeX([section.text])
def texSlideSection(self, obj): type = self.slideSectionType(obj) if type == 'main': isExpl = False elif type == 'explanation': isExpl = True else: # Ignore other sections return haveText = (obj.text and obj.text.raw) haveImage = (obj.image_code and obj.image_code.raw) if self.slideInfo['mainText'] and self.slideInfo['explText']: imageSize = 5 elif self.slideInfo['mainText'] and not self.slideInfo['explText']: imageSize = 6 elif not self.slideInfo['mainText'] and self.slideInfo['explText']: imageSize = 7 else: imageSize = 9 if self.slideInfo['explImage'] or isExpl: imageSize -= 1 imageSize -= 1 imageSize = str(imageSize) self.writeTeX([ '%% Section ' + (obj.title or "main"), '\\begin{tabular}{ll}', ]) # Slide text if haveText: self.writeTeX([ '\\begin{minipage}{' + ('0.58' if haveImage else '0.97') + '\\textwidth}', '{\\scriptsize' if isExpl else '', '\\vfill' if isExpl else '', obj.text, '}' if isExpl else '', '\\end{minipage}', ]) # Slide image if haveImage: tf = ScriptToTeX() data = tf.convert( obj.image_code.raw, datastream("scriptToImage"), mimetype='text/x-uri' if obj.image_code.mimeType == 'text/x-url' else obj.image_code.mimeType) self.writeTeX([ '\\hspace{0.5mm}', '\\begin{minipage}{' + ('0.38' if haveText else '0.97') + '\\textwidth}', '\\begin{figure}', data.getData(), '\\caption{\scriptsize ' + obj.image_caption + '}' if obj.image_caption else '', '\\end{figure}', '\\end{minipage}', ]) self.writeTeX([ '\\end{tabular}', ])
def _wrap(self, name): """wrap a data object in an icache""" return datastream(name)
def test_shortcodehtml2html(self): html = open(os.path.join(dirname, 'test.html')).read() transform = shortcodehtml_to_html() data = datastream(html) data = transform.convert(html, data)