def text(self): text = None if self.data.show_text: item = self.existing_content_item if IRichText.providedBy(item) and IRichTextBehavior(item).text: text = IRichTextBehavior(item).text.output return text
def SearchableText(obj): text = u'' richtext = IRichText(obj, None) if richtext: textvalue = richtext.text if IRichTextValue.providedBy(textvalue): transforms = getToolByName(obj, 'portal_transforms') # Before you think about switching raw/output # or mimeType/outputMimeType, first read # https://github.com/plone/Products.CMFPlone/issues/2066 raw = safe_unicode(textvalue.raw) if six.PY2: raw = raw.encode('utf-8', 'replace') text = transforms.convertTo( 'text/plain', raw, mimetype=textvalue.mimeType, ).getData().strip() subject = u' '.join( [safe_unicode(s) for s in obj.Subject()] ) return u' '.join(( safe_unicode(obj.id), safe_unicode(obj.title) or u'', safe_unicode(obj.description) or u'', safe_unicode(text), safe_unicode(subject), ))
def post_creation(self, obj): field_data = self.field_data bdata = ILayoutAware(obj, None) if bdata: bdata.contentLayout = '++contentlayout++default/document.html' bdata = IRichText(obj, None) if bdata: bdata.text = RichTextValue(field_data['text'], 'text/html', 'text/html') bdata = IBasic(obj, None) if bdata: bdata.title = field_data['title'] bdata.description = field_data['description'] else: obj.title = field_data['title'] obj.description = field_data['description'] bdata = ICategorization(obj, None) if bdata: bdata.subjects = field_data['subject'] bdata = IPublication(obj) if field_data['effectiveDate']: bdata.effective = field_data['effectiveDate'].asdatetime() ldata = ILocation(obj, None) if ldata: if field_data.get('location'): ldata.locations = [field_data['location']] if field_data.get('newsLocation'): if ldata.locations: ldata.locations.append(field_data['newsLocation']) else: ldata.locations = [field_data['newsLocation']] obj.modification_date = field_data['modification_date'] obj.creation_date = field_data['creation_date'] bdata = ILayoutAware(obj, None) if bdata: if self.data[ 'portal_type'] == 'Folder' and 'text' in self.field_data: bdata.content = FOLDER_DEFAULT_PAGE_LAYOUT % self.field_data[ 'text'] elif self.layout: bdata.contentLayout = self.layout
def SearchableText(obj): text = u'' richtext = IRichText(obj, None) if richtext: textvalue = richtext.text if IRichTextValue.providedBy(textvalue): transforms = getToolByName(obj, 'portal_transforms') text = transforms.convertTo( 'text/plain', safe_unicode(textvalue.output).encode('utf8'), mimetype=textvalue.mimeType, ).getData().strip() subject = u' '.join([safe_unicode(s) for s in obj.Subject()]) return u' '.join(( safe_unicode(obj.id), safe_unicode(obj.title) or u'', safe_unicode(obj.description) or u'', safe_unicode(text), safe_unicode(subject), ))
title = unidecode(dom.cssselect('h1')[0].text_content()) except IndexError: print('bad url: ' + found_url) continue _id = normalizer.normalize(found_url.split('/')[-1]) if _id in container.objectIds(): continue kw = {} imgs = dom.cssselect('#mw-content-text img') if len(imgs) > 0: im_url = imgs[0].attrib['src'] if im_url.startswith('//'): im_url = 'https:' + im_url resp = requests.get(im_url) kw['image'] = NamedBlobImage( data=resp.content, filename=unidecode(im_url.split('/')[-1]).decode('utf8')) obj = api.content.create(type='Document', id=_id, title=title, exclude_from_nav=True, container=container, **kw) text = '' for p in dom.cssselect('#mw-content-text p'): text += tostring(p) bdata = IRichText(obj, None) bdata.text = RichTextValue(text, 'text/html', 'text/html') obj.reindexObject() parsed.append(found_url) transaction.commit()
def test_richtext_behavior(self): IRichText.providedBy(self.portal.doc1)
def post_creation(self, obj, pdb_if_exception=False, post_creation_data=None): if obj is None: return field_data = self.field_data bdata = ILayoutAware(obj, None) if bdata: try: bdata.contentLayout = self.layout except Exception: bdata.contentLayout = '++contentlayout++default/document.html' bdata = IRichText(obj, None) if bdata: try: bdata.text = RichTextValue(field_data['text'], 'text/html', 'text/html') except Exception: try: bdata.text = RichTextValue( field_data[ 'plone.app.contenttypes.behaviors.richtext.IRichText'] ['text'], # noqa 'text/html', 'text/html').raw except Exception: bdata.text = '' bdata = IBasic(obj, None) if bdata: try: bdata.title = field_data['title'] except Exception: try: bdata.title = field_data[ 'plone.app.content.interfaces.INameFromTitle']['title'] except Exception: bdata.description = field_data[dublin]['title'] try: bdata.description = field_data['description'] except Exception: try: bdata.description = field_data[dublin]['description'] except Exception: bdata.description = field_data[basic]['description'] else: try: obj.title = field_data['title'] obj.description = field_data['description'] except Exception: obj.title = field_data[dublin]['title'] obj.description = field_data[dublin]['description'] bdata = ICategorization(obj, None) if bdata: try: bdata.subjects = field_data['subject'] except Exception: try: bdata.subjects = self.field_data[dublin]['subjects'] except Exception: try: bdata.subjects = self.field_data[categorization][ 'subjects'] except Exception: pass # no keywords found bdata = IPublication(obj) try: if field_data['effectiveDate']: bdata.effective = pydt(field_data['effectiveDate']) except Exception: try: if field_data[dublin]['effective']: bdata.effective = pydt(field_data[dublin]['effective']) except Exception: try: if field_data[publication]['effective']: bdata.effective = pydt( field_data[publication]['effective']) except Exception: bdata.effective = None ldata = ILocation(obj, None) if ldata: if field_data.get('location'): ldata.locations = [field_data['location']] if field_data.get('newsLocation'): if ldata.locations: ldata.locations.append(field_data['newsLocation']) else: ldata.locations = [field_data['newsLocation']] try: obj.modification_date = field_data['modification_date'] except Exception: try: obj.modification_date = obj.modified() except Exception: obj.modification_date = None try: obj.creation_date = field_data['creation_date'] except Exception: try: obj.creation_date = obj.created() except Exception: obj.creation_date = None bdata = IDublinCore(obj, None) if bdata: if IDublinCore.__identifier__ in field_data: dublin_core = field_data[IDublinCore.__identifier__] bdata.expires = dublin_core['expires'] bdata.rights = dublin_core['rights'] bdata.creators = tuple(dublin_core['creators']) bdata.language = dublin_core['language'] bdata.effective = pydt(dublin_core['effective']) bdata.subjects = dublin_core['subjects'] bdata.contributors = tuple(dublin_core['contributors']) else: bdata.expires = pydt(field_data.get('expirationDate')) bdata.rights = field_data.get('rights') creators = field_data.get('creators') bdata.creators = tuple(creators) if creators else () language = field_data.get('language') bdata.language = language if language is not None else "" bdata.effective = pydt(field_data.get('effectiveDate')) bdata.subjects = field_data.get('subject') contributors = field_data.get('contributors') bdata.contributors = tuple(contributors) if contributors else ( ) bdata = ILayoutAware(obj, None) if bdata: if self.data['portal_type'] == 'Folder' and ( self.field_data.get('text') or '').strip(): bdata.content = FOLDER_DEFAULT_PAGE_LAYOUT % self.field_data[ 'text'] # need to explicitly reset contentLayout value because this data # could be overwritten bdata.contentLayout = None elif self.layout: if layoutaware in field_data and 'contentLayout' in field_data[ layoutaware]: bdata.contentLayout = field_data[layoutaware][ 'contentLayout'] if layoutaware in field_data and 'content' in field_data[ layoutaware]: bdata.content = field_data[ 'plone.app.blocks.layoutbehavior.ILayoutAware'][ 'content'] if 'rendered_layout' in self.data['data']: bdata.rendered_layout = self.data['data'][ 'rendered_layout'] inv_field_mapping = {v: k for k, v in self.fields_mapping.iteritems()} for IBehavior, field_name in self.behavior_data_mappers: original_field_name = inv_field_mapping.get(field_name, field_name) if original_field_name not in self.field_data: # data not here... continue behavior = IBehavior(obj, None) if behavior is None: # behavior not valid for obj type continue val = self.field_data[original_field_name] if field_name in self.data_converters: val = self.data_converters[field_name](val) setattr(behavior, field_name, val) # handle lead images for field_name in self.lead_image_field_names: if self.field_data.get(field_name): if field_name == 'plone.app.contenttypes.behaviors.leadimage.ILeadImage': im_obj = self.field_data.get(field_name)['image'] else: im_obj = self.field_data.get(field_name) if hasattr(im_obj, 'read'): im_data = im_obj.read() else: im_data = im_obj if not im_data: continue filename = self.field_data.get('image_filename') if not filename: if hasattr(im_obj, 'filename'): filename = im_obj.filename else: filename = self.field_data['id'] obj.image = im_data if not isinstance(obj.image, NamedBlobImage): is_stringio = isinstance(im_obj, StringIO) if is_stringio: namedblobimage_data = im_data elif isinstance(im_obj, Image): namedblobimage_data = im_data.data else: if pdb_if_exception: pdb.set_trace() logger.info(" lead image is type %s" % type(im_obj)) obj.image = NamedBlobImage(data=namedblobimage_data, contentType='', filename=filename) if hasattr(obj.image, 'contentType') and isinstance( obj.image.contentType, unicode): obj.image.contentType = obj.image.contentType.encode( 'ascii') else: if isinstance(im_obj, Image): data = im_obj.data elif hasattr(im_obj, 'buf'): data = im_obj.buf elif hasattr(im_obj, '_blob'): if hasattr(im_obj._blob, '_p_blob_uncommitted'): f = open(im_obj._blob._p_blob_uncommitted, 'r') data = f.read() f.close() else: raise Exception( "no _p_blob_uncommitted attr in im_obj._blob") else: raise Exception("no _blob attr in im_obj") if data == '' or data is None: data = base64.b64decode( 'R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==' ) image_type = what('', h=data) if image_type in [ 'png', 'bmp', 'jpeg', 'xbm', 'tiff', 'gif' ]: obj.image.contentType = 'image/%s' % image_type elif image_type == 'rast': obj.image.contentType = 'image/cmu-raster' elif image_type == 'ppm': obj.image.contentType = 'image/x-portable-pixmap' elif image_type == 'pgm': obj.image.contentType = 'image/x-portable-greymap' elif image_type == 'pbm': obj.image.contentType = 'image/x-portable-bitmap' elif image_type == 'rgb': obj.image.contentType = 'image/x-rgb' else: # look at filename extension contentType, encoding = guess_type(obj.image.filename, strict=False) if contentType: obj.image.contentType = contentType else: logger.info( "Unknown image type {};" " defaulting to jpeg".format(image_type)) pdb.set_trace() obj.image.contentType = 'image/jpeg' # default for caption_field_name in self.lead_image_caption_field_names: if caption_field_name in self.field_data: obj.imageCaption = self.field_data.get( caption_field_name)
def post_creation(self, obj): field_data = self.field_data bdata = ILayoutAware(obj, None) if bdata: bdata.contentLayout = '++contentlayout++default/document.html' bdata = IRichText(obj, None) if bdata: bdata.text = RichTextValue(field_data['text'], 'text/html', 'text/html') bdata = IBasic(obj, None) if bdata: bdata.title = field_data['title'] bdata.description = field_data['description'] else: obj.title = field_data['title'] obj.description = field_data['description'] bdata = ICategorization(obj, None) if bdata: bdata.subjects = field_data['subject'] bdata = IPublication(obj) if field_data['effectiveDate']: bdata.effective = pydt(field_data['effectiveDate']) ldata = ILocation(obj, None) if ldata: if field_data.get('location'): ldata.locations = [field_data['location']] if field_data.get('newsLocation'): if ldata.locations: ldata.locations.append(field_data['newsLocation']) else: ldata.locations = [field_data['newsLocation']] obj.modification_date = field_data['modification_date'] obj.creation_date = field_data['creation_date'] bdata = ILayoutAware(obj, None) if bdata: if self.data[ 'portal_type'] == 'Folder' and 'text' in self.field_data: bdata.content = FOLDER_DEFAULT_PAGE_LAYOUT % self.field_data[ 'text'] # need to explicitly reset contentLayout value because this data # could be overwritten bdata.contentLayout = None elif self.layout: bdata.contentLayout = self.layout inv_field_mapping = {v: k for k, v in self.fields_mapping.iteritems()} for IBehavior, field_name in self.behavior_data_mappers: original_field_name = inv_field_mapping.get(field_name, field_name) if original_field_name not in self.field_data: # data not here... continue behavior = IBehavior(obj, None) if behavior is None: # behavior not valid for obj type continue val = self.field_data[original_field_name] if field_name in self.data_converters: val = self.data_converters[field_name](val) setattr(behavior, field_name, val) # handle lead images for field_name in self.lead_image_field_names: if self.field_data.get(field_name): im_obj = self.field_data.get(field_name) if hasattr(im_obj, 'read'): im_data = im_obj.read() else: im_data = im_obj if not im_data: continue filename = self.field_data.get('image_filename') if not filename: if hasattr(im_obj, 'filename'): filename = im_obj.filename else: filename = self.field_data['id'] obj.image = NamedBlobImage(data=decode_file_data(im_data), filename=to_unicode(filename)) if not obj.image.contentType: obj.image.contentType = 'image/jpeg' for caption_field_name in self.lead_image_caption_field_names: if caption_field_name in self.field_data: obj.imageCaption = self.field_data.get( caption_field_name)