Example #1
0
def get_content_links(obj):
    refs = set()
    if ILayoutAware.providedBy(obj):
        behavior_data = ILayoutAware(obj)
        # get data from tile data
        annotations = IAnnotations(obj)
        for key in annotations.keys():
            if key.startswith(ANNOTATIONS_KEY_PREFIX):
                data = annotations[key]
                refs |= get_tile_data_links(obj, data)
        if not behavior_data.contentLayout and behavior_data.content:
            dom = fromstring(behavior_data.content)
            for el in dom.cssselect('.mosaic-text-tile .mosaic-tile-content'):
                links = extractLinks(tostring(el))
                refs |= li.getObjectsFromLinks(obj, links)
    try:
        # scan more than just this we probably should...
        value = obj.text.raw
        links = extractLinks(value)
        refs |= li.getObjectsFromLinks(obj, links)
    except AttributeError:
        pass

    if getattr(obj, 'image'):
        if IReferenceNamedImage.providedBy(obj.image):
            sub_obj = uuidToObject(obj.image.reference)
            if sub_obj:
                objid = get_ref(obj)
                if objid:
                    refs.add(RelationValue(objid))
    return refs
def modifiedDexterity(obj, event):
    """ a dexterity based object was modified """
    pu = getToolByName(obj, 'portal_url', None)
    if pu is None:
        # `getObjectFromLinks` is not possible without access
        # to `portal_url`
        return
    rc = getToolByName(obj, 'reference_catalog', None)
    if rc is None:
        # `updateReferences` is not possible without access
        # to `reference_catalog`
        return

    fti = getUtility(IDexterityFTI, name=obj.portal_type)
    fields = []

    schema = fti.lookupSchema()
    additional_schema = getAdditionalSchemata(context=obj,
                                              portal_type=obj.portal_type)

    schemas = [i for i in additional_schema] + [schema]

    refs = set()

    for schema in schemas:
        for name,field in getFieldsInOrder(schema):
            if isinstance(field, RichText):
                # Only check for "RichText" ?
                value = getattr(schema(obj), name)
                if not value:
                    continue
                links = extractLinks(value.raw)
                refs |= getObjectsFromLinks(obj, links)

    updateReferences(IReferenceable(obj), referencedRelationship, refs)
Example #3
0
 def test_link_extraction_easy(self):
     doc1 = self.portal.doc1
     self._set_text(doc1, '<a href="doc2">Doc 2</a>')
     self.assertEqual(
         extractLinks(self._get_text(doc1)),
         ('doc2', )
     )
 def test_link_extraction_easy(self):
     doc1 = self.portal.doc1
     self._set_text(doc1, '<a href="doc2">Doc 2</a>')
     self.assertEqual(
         extractLinks(self._get_text(doc1)),
         ('doc2', )
     )
Example #5
0
def modifiedArchetype(obj, event):
    """ an archetype based object was modified """
    pu = getToolByName(obj, 'portal_url', None)
    if pu is None:
        # `getObjectFromLinks` is not possible without access
        # to `portal_url`
        return
    rc = getToolByName(obj, 'reference_catalog', None)
    if rc is None:
        # `updateReferences` is not possible without access
        # to `reference_catalog`
        return
    refs = set()
    for field in obj.Schema().fields():
        if isinstance(field, TextField):
            accessor = field.getAccessor(obj)
            encoding = field.getRaw(obj, raw=1).original_encoding
            if accessor is not None:
                value = accessor()
            else:
                # Fields that have been added via schema extension do
                # not have an accessor method.
                value = field.get(obj)
            links = extractLinks(value, encoding)
            refs |= getObjectsFromLinks(obj, links)
    updateReferences(obj, referencedRelationship, refs)
Example #6
0
    def get_referenced_objects(self):
        """Get referenced objects from cover object.

        :returns: a set of objects referenced
        :rtype: set of objects
        """
        refs = set()
        for tile_uuid in self.list_tiles():
            tile = self.get_tile(tile_uuid)
            uuid = tile.data.get('uuid', None)
            if uuid is not None:
                refs |= set([uuidToObject(uuid)])
            if IListTile.providedBy(tile):
                uuids = tile.data.get('uuids', [])
                if uuids is None:
                    continue
                for uuid in uuids:
                    refs |= set([uuidToObject(uuid)])
            elif IRichTextTile.providedBy(tile):
                value = tile.data.get('text')
                if value is None:
                    continue
                value = value.raw
                links = extractLinks(value)
                refs |= getObjectsFromLinks(self, links)
        return refs
Example #7
0
def modifiedCoverTile(obj, event):
    """Ensure link integrity on Rich Text tiles.

    Keyword arguments:
    obj -- Dexterity-based object that was modified
    event -- event fired
    """
    pu = api.portal.get_tool('portal_url')
    if pu is None:
        # `getObjectFromLinks` is not possible without access
        # to `portal_url`
        return
    rc = api.portal.get_tool('reference_catalog')
    if rc is None:
        # `updateReferences` is not possible without access
        # to `reference_catalog`
        return
    referenceable_parent = IReferenceable(obj.context, None)
    if referenceable_parent is None:
        # `updateReferences` is not possible
        # if parent object isn't referenceable
        return

    refs = set()

    for name, value in obj.data.items():
        if isinstance(value, RichTextValue):
            value = value.raw
            if not value:
                continue
            links = extractLinks(value)
            refs |= getObjectsFromLinks(obj.context, links)

    updateReferences(IReferenceable(obj.context), referencedRelationship, refs)
def modifiedArchetype(obj, event):
    """ an archetype based object was modified """
    pu = getToolByName(obj, 'portal_url', None)
    if pu is None:
        # `getObjectFromLinks` is not possible without access
        # to `portal_url`
        return
    rc = getToolByName(obj, 'reference_catalog', None)
    if rc is None:
        # `updateReferences` is not possible without access
        # to `reference_catalog`
        return
    refs = set()

    for field in obj.Schema().fields():
        if isinstance(field, TextField):
            accessor = field.getAccessor(obj)
            encoding = field.getRaw(obj, raw=1).original_encoding
            if accessor is not None:
                value = accessor()
            else:
                # Fields that have been added via schema extension do
                # not have an accessor method.
                value = field.get(obj)
            links = extractLinks(value, encoding)
            refs |= getObjectsFromLinks(obj, links)
    updateReferences(obj, referencedRelationship, refs)
Example #9
0
def modifiedDexterity(obj, event):
    """ a dexterity based object was modified """
    pu = getToolByName(obj, 'portal_url', None)
    if pu is None:
        # `getObjectFromLinks` is not possible without access
        # to `portal_url`
        return
    rc = getToolByName(obj, 'reference_catalog', None)
    if rc is None:
        # `updateReferences` is not possible without access
        # to `reference_catalog`
        return

    fti = getUtility(IDexterityFTI, name=obj.portal_type)
    schema = fti.lookupSchema()
    additional_schema = getAdditionalSchemata(context=obj,
                                              portal_type=obj.portal_type)

    schemas = [i for i in additional_schema] + [schema]

    refs = set()

    for schema in schemas:
        for name, field in getFieldsInOrder(schema):
            if isinstance(field, RichText):
                # Only check for "RichText" ?
                value = getattr(schema(obj), name)
                if not value:
                    continue
                links = extractLinks(value.raw)
                refs |= getObjectsFromLinks(obj, links)

    updateReferences(IReferenceable(obj), referencedRelationship, refs)
Example #10
0
    def get_referenced_objects(self):
        """Get referenced objects from cover object.

        :returns: a set of objects referenced
        :rtype: set of objects
        """
        refs = set()
        for tile_uuid in self.list_tiles():
            tile = self.get_tile(tile_uuid)
            uuid = tile.data.get('uuid', None)
            if uuid is not None:
                refs |= set([uuidToObject(uuid)])
            if IListTile.providedBy(tile):
                uuids = tile.data.get('uuids', [])
                if uuids is None:
                    continue
                for uuid in uuids:
                    refs |= set([uuidToObject(uuid)])
            elif IRichTextTile.providedBy(tile):
                value = tile.data.get('text')
                if value is None:
                    continue
                value = value.raw
                links = extractLinks(value)
                refs |= getObjectsFromLinks(self, links)
        return refs
Example #11
0
 def test_link_extraction_more_complex(self):
     doc2 = self.portal.doc2
     self._set_text(
         doc2,
         '<a href="doc1">Doc 2</a>' +
         '<a href="folder1/doc3"><img src="image1" /></a>',
     )
     self.assertEqual(extractLinks(self._get_text(doc2)),
                      ('doc1', 'folder1/doc3', 'image1'))
Example #12
0
def get_tile_data_links(obj, data):
    refs = set()
    if type(data) in (dict, PersistentMapping, PersistentDict):
        for field_name in ('content', 'video', 'image', 'images', 'audio'):
            val = data.get(field_name)
            if isinstance(val, basestring):
                links = extractLinks(val)
                refs |= li.getObjectsFromLinks(obj, links)
            elif isinstance(val, list):
                # could be list of uids
                refs |= get_refs_from_uids(val)
    return refs
 def test_link_extraction_more_complex(self):
     doc2 = self.portal.doc2
     self._set_text(
         doc2,
         '<a href="doc1">Doc 2</a>' +
         '<a href="folder1/doc3"><img src="image1" /></a>',
     )
     self.assertEqual(
         extractLinks(self._get_text(doc2)),
         ('doc1',
          'folder1/doc3',
          'image1')
     )
Example #14
0
 def retrieveLinks(self):
     """Finds all links from the object and return them."""
     links = set()
     for field in self.context.Schema().fields():
         if isinstance(field, TextField):
             accessor = field.getAccessor(self.context)
             encoding = field.getRaw(self.context, raw=1).original_encoding
             if accessor is not None:
                 value = accessor()
             else:
                 # Fields that have been added via schema extension do
                 # not have an accessor method.
                 value = field.get(self.context)
             links |= set(extractLinks(value, encoding))
     return links
Example #15
0
 def retrieveLinks(self):
     """Finds all links from the object and return them."""
     fti = getUtility(IDexterityFTI, name=self.context.portal_type)
     schema = fti.lookupSchema()
     additional_schema = getAdditionalSchemata(
         context=self.context, portal_type=self.context.portal_type)
     schemas = [i for i in additional_schema] + [schema]
     links = set()
     for schema in schemas:
         for name, field in getFieldsInOrder(schema):
             if isinstance(field, RichText):
                 value = getattr(schema(self.context), name)
                 if not value or not getattr(value, 'raw', None):
                     continue
                 links |= set(extractLinks(value.raw))
     return links
Example #16
0
def modifiedArchetype(obj, event):
    """ an archetype based object was modified """
    if not check_linkintegrity_dependencies(obj):
        return
    refs = set()
    for field in obj.Schema().fields():
        if isinstance(field, TextField):
            accessor = field.getAccessor(obj)
            encoding = field.getRaw(obj, raw=1).original_encoding
            if accessor is not None:
                value = accessor()
            else:
                # Fields that have been added via schema extension do
                # not have an accessor method.
                value = field.get(obj)
            links = extractLinks(value, encoding)
            refs |= getObjectsFromLinks(obj, links)
    updateReferences(obj, refs)
Example #17
0
def modifiedDexterity(obj, event):
    """ a dexterity based object was modified """
    if not check_linkintegrity_dependencies(obj):
        return
    fti = getUtility(IDexterityFTI, name=obj.portal_type)
    schema = fti.lookupSchema()
    additional_schema = getAdditionalSchemata(context=obj,
                                              portal_type=obj.portal_type)
    schemas = [i for i in additional_schema] + [schema]
    refs = set()
    for schema in schemas:
        for name, field in getFieldsInOrder(schema):
            if isinstance(field, RichText):
                # Only check for "RichText" ?
                value = getattr(schema(obj), name)
                if not value or not getattr(value, 'raw', None):
                    continue
                links = extractLinks(value.raw)
                refs |= getObjectsFromLinks(obj, links)
    updateReferences(obj, refs)
Example #18
0
 def testHandleParserException(self):
     self.assertEqual(extractLinks('<foo\'d>'), ())
     data = '<a href="http://foo.com">foo</a><bar\'d>'
     self.assertEqual(extractLinks(data), ('http://foo.com', ))
    def __iter__(self):

        for item in self.previous:
            pathkey = self.pathkey(*item.keys())[0]
            fileskey = self.fileskey(*item.keys())[0]

            if not (pathkey and fileskey):
                yield item; continue
            if 'extras' not in item[fileskey]:
                yield item; continue

            path = item[pathkey]
            obj = self.context.unrestrictedTraverse(path, None)
            if obj is None:         # path doesn't exist
                yield item; continue

            data = item[fileskey]['extras']['data']
            extras = loads(data)
            reindex = False
            reindex_security = False

            # Fix up type screwups between our type switching
            # and the CMF marshaller
            if hasattr(obj, '_getPortalTypeName'):
                cmf_type = obj._getPortalTypeName()
                if cmf_type != obj.meta_type:
                    obj._setPortalTypeName(obj.meta_type)
                    reindex = True

            # ROLES import
            # for groups/projects/usergroups
            if obj.meta_type in ['Project', 'FunctionalGroup', 'UserGroup', ]:
                # member times
                if extras.get('membertimes'):
                    setattr(obj, '_membertimes', extras['membertimes'])

                # roles stuff
                acl_users = getToolByName(obj, 'acl_users')
                groupinfo = obj.getACLUsersGroupInfo()

                # members/admins
                if extras.get('members'):
                    acl_users.source_groups.manage_addPrincipalsToGroup(
                        group_id=groupinfo['Members']['gid'],
                        principal_ids=extras['members'],
                        )
                    reindex_security = True
                    reindex = True
                # admins
                if extras.get('administrators'):
                    acl_users.source_groups.manage_addPrincipalsToGroup(
                        group_id=groupinfo['Administrators']['gid'],
                        principal_ids=extras['administrators'],
                        )
                    reindex_security = True
                    reindex = True
                # contributors
                for userid in extras.get('contributors', []):
                    obj.manage_setLocalRoles(userid, ['Contributor', ])
                    reindex_security = True
                    reindex = True
                # reviewers
                for userid in extras.get('reviewers', []):
                    obj.manage_setLocalRoles(userid, ['Reviewer', ])
                    reindex_security = True
                    reindex = True
                # shared access groups
                for UID in extras.get('sharedaccess', []):
                    # translate old 'UID' group name to new 'UID-Members'
                    obj.manage_setLocalRoles('%s-Members' % UID, ['Member', ])
                    reindex_security = True
                    reindex = True

            # LOCAL ROLES
            for userid, roles in extras.get('local_roles', {}).items():
                obj.manage_setLocalRoles(userid, roles)
                reindex_security = True
                reindex = True

            # RESTRICTED FOLDERS
            if obj.meta_type != 'Plone Site':
                portal_workflow = getToolByName(obj, 'portal_workflow')
                if portal_workflow.getInfoFor(obj, 'review_state', '') \
                        == 'restricted':
                    if hasattr(obj, 'getACLUsersGroupInfo'):
                        agid = obj.getACLUsersGroupInfo()['Administrators']['gid']
                        obj.manage_setLocalRoles(agid, ['Manager', ])

            # Technical Portal field changes
            if obj.meta_type in ['TechnicalTicket',
                                 'TechnicalTicketResponse', ]:
                if 'marshall' in item[fileskey]:
                    manifest = item[fileskey]['marshall']['data']
                    for field, info in self.parseManifest(manifest).items():
                        if field == 'attachmentLink' and info['_alltext']:
                            newfield = obj.getField('link')
                            newfield.getMutator(obj)(info['_alltext'])
                            reindex = True

                mapping = {
                    'attachmentFile':'attachmentFile1',
                    'attachment1':'attachmentFile1',
                    'attachment2':'attachmentFile2',
                    'attachment3':'attachmentFile3',
                    }
                if 'file-fields' in item[fileskey]:
                    # remap attachment fields
                    manifest = item[fileskey]['file-fields']['data']
                    for field, info in self.parseManifest(manifest).items():
                        if field in mapping:
                            fname = info['filename']
                            ct = info['mimetype']
                            data = item[fileskey][fname]['data']
                            newfield = obj.getField(mapping[field])
                            newfield.getMutator(obj)(data, filename=fname,
                                                     mimetype=ct)
                            reindex = True

            # update geolocation fields to new format
            if obj.meta_type in ['BusinessUnit', 'MapMarker', ]:
                location = obj.getGeolocation()
                if isinstance(location, list):
                    location = ', '.join(location)
                    obj.setGeolocation(location)
                    reindex = True

            # update blog messages to 'html'
            if obj.meta_type == 'BlogMessage':
                message = obj.message
                if message.getContentType() == 'text/plain':
                    rawmessage = obj.getRawMessage()
                    if rawmessage.startswith('<'):
                        # looks like it is already html
                        message.setContentType(obj, 'text/html')
                    else:
                        portal_transforms = getToolByName(obj,
                                                          'portal_transforms')
                        data = portal_transforms.convertTo('text/html',
                                                           rawmessage,
                                                           mimetype='text/plain')
                        htmlmessage = data.getData()
                        obj.setMessage(htmlmessage, mimetype='text/html')
                    reindex = True

            # Set default page of previously-known-as 'LargeDocument'
            if extras.has_key('default_page'):
                obj.setDefaultPage(extras['default_page'])
                reindex = True

            # enable next previous nav
            if extras.has_key('nextprev'):
                obj.setNextPreviousEnabled(extras['nextprev'])
                reindex = True

            # Force pages to text/html
            if obj.meta_type in ['Page', 'NewsItem', 'HelpPage', ]:
                body = obj.body
                if body.getContentType() == 'text/plain':
                    body.setContentType(obj, 'text/html')

            # CSR nom ratings
            if obj.meta_type == 'CSRNomination':
                ratings = extras.get('ratings', {})
                obj.setRatings(ratings)

            # Video file remapping
            if obj.meta_type in ['Video' ] \
                    and 'file-fields' in item[fileskey]:
                manifest = item[fileskey]['file-fields']['data']
                mapping = {
                    'preview':'video',
                    'video':'original',
                    }
                for field, info in self.parseManifest(manifest).items():
                    if field in mapping:
                        fname = info['filename']
                        ct = info['mimetype']
                        data = item[fileskey][fname]['data']
                        newfield = obj.getField(mapping[field])
                        newfield.getMutator(obj)(data, filename=fname,
                                                 mimetype=ct)
                        reindex = True

            # Add Page image fields as content items
            if obj.meta_type in ['Page', 'BlogMessage', ] \
                    and 'file-fields' in item[fileskey]:
                portal_workflow = getToolByName(obj, 'portal_workflow')
                manifest = item[fileskey]['file-fields']['data']
                images = []
                if obj.meta_type == 'BlogMessage':
                    folder = obj.inBlog()
                    bodyfieldname = 'message'
                else:
                    folder = obj.aq_parent
                    bodyfieldname = 'body'

                for field, info in self.parseManifest(manifest).items():
                    if not field.startswith('image'):
                        continue
                    fname = info['filename']
                    ct = info['mimetype']
                    #if fname in item[fileskey]:
                    data = item[fileskey][fname]['data']
                    imageob = create_object('Image', folder,
                                            rename_after=True,
                                            title=fname,
                                            )
                    imageob.setImage(data, filename=fname, mimetype=ct)
                    if obj.meta_type != 'BlogMessage':
                        # blog images are auto-published
                        portal_workflow.doActionFor(imageob, 'make_visible')
                    imageob.setDatePublished(obj.created())
                    imageob.reindexObject('getDatePublished')
                    # mimic the published data of the original item
                    images.append([field, imageob])

                # replace old links in document with new ones
                body = obj.getField(bodyfieldname).getAccessor(obj)()
                if not isinstance(body, unicode):
                    body = unicode(body, 'utf-8', 'ignore')
                for fieldname, imageob in images:
                    body = body.replace(
                        '%s/%s' % (obj.getId(), fieldname),
                        'resolveuid/%s/image' % (imageob.UID()),
                        )
                    # replace sizes
                    for oldsize, newsize in IMAGE_SIZE_MAPPING.items():
                        body = body.replace(
                            'image_%s' % oldsize,
                            'image_%s' % newsize,
                            )

                # fix inpage image styles
                body = body.replace('inpage-image-right', 'image-right')
                body = body.replace('inpage-image-left', 'image-left')

                # fix table styles
                body = body.replace("<TABLE class=inline",
                                    "<table class='listing'")

                obj.getField(bodyfieldname).getMutator(obj)(body, mimetype='text/html')

            # Remove hard-coded links to old site
            OLD_DOMAIN = 'https://oldsitename'
            if hasattr(obj, 'Schema'):
                schema = obj.Schema()
                for fieldname in schema.keys():
                    field = schema[fieldname]
                    if not isinstance(field, atapi.TextField):
                        continue
                    value = field.getAccessor(obj)()
                    if not isinstance(value, unicode):
                        value = unicode(value, 'utf-8', 'ignore')
                    value_changed = False
                    links = extractLinks(value)
                    for link in links:
                        if link.startswith(OLD_DOMAIN):
                            newlink = link.replace(OLD_DOMAIN, '')
                            value = value.replace(link, newlink)
                            value_changed = True
                    if value_changed:
                        mimetype = field.getContentType(obj)
                        field.getMutator(obj)(value, mimetype=mimetype)

            if reindex_security:
                obj.reindexObjectSecurity(skip_self=True)
            if reindex:
                try:
                    obj.reindexObject()
                except:
                    ERRORS_FILENAME = '%s/reindex_errors.txt' % BASE_DIR
                    error = 'Could not reindex %s' % obj.absolute_url()
                    from datetime import datetime
                    open(ERRORS_FILENAME, 'a+').write('%s %s\n' % (datetime.now(), error))

            yield item
Example #20
0
 def testHandleStringEncodingException(self):
     expected = (EXPECTED,)
     self.assertEqual(extractLinks(DATA), expected)
Example #21
0
 def testHandleParserException(self):
     self.assertEqual(extractLinks('<foo\'d>'), ())
     data = '<a href="http://foo.com">foo</a><bar\'d>'
     self.assertEqual(extractLinks(data), ('http://foo.com',))
Example #22
0
 def testHandleStringEncodingException(self):
     expected = (EXPECTED, )
     self.assertEqual(extractLinks(DATA), expected)
Example #23
0
    def __iter__(self):

        for item in self.previous:
            pathkey = self.pathkey(*item.keys())[0]
            fileskey = self.fileskey(*item.keys())[0]

            if not (pathkey and fileskey):
                yield item
                continue
            if 'extras' not in item[fileskey]:
                yield item
                continue

            path = item[pathkey]
            obj = self.context.unrestrictedTraverse(path, None)
            if obj is None:  # path doesn't exist
                yield item
                continue

            data = item[fileskey]['extras']['data']
            extras = loads(data)
            reindex = False
            reindex_security = False

            # Fix up type screwups between our type switching
            # and the CMF marshaller
            if hasattr(obj, '_getPortalTypeName'):
                cmf_type = obj._getPortalTypeName()
                if cmf_type != obj.meta_type:
                    obj._setPortalTypeName(obj.meta_type)
                    reindex = True

            # ROLES import
            # for groups/projects/usergroups
            if obj.meta_type in [
                    'Project',
                    'FunctionalGroup',
                    'UserGroup',
            ]:
                # member times
                if extras.get('membertimes'):
                    setattr(obj, '_membertimes', extras['membertimes'])

                # roles stuff
                acl_users = getToolByName(obj, 'acl_users')
                groupinfo = obj.getACLUsersGroupInfo()

                # members/admins
                if extras.get('members'):
                    acl_users.source_groups.manage_addPrincipalsToGroup(
                        group_id=groupinfo['Members']['gid'],
                        principal_ids=extras['members'],
                    )
                    reindex_security = True
                    reindex = True
                # admins
                if extras.get('administrators'):
                    acl_users.source_groups.manage_addPrincipalsToGroup(
                        group_id=groupinfo['Administrators']['gid'],
                        principal_ids=extras['administrators'],
                    )
                    reindex_security = True
                    reindex = True
                # contributors
                for userid in extras.get('contributors', []):
                    obj.manage_setLocalRoles(userid, [
                        'Contributor',
                    ])
                    reindex_security = True
                    reindex = True
                # reviewers
                for userid in extras.get('reviewers', []):
                    obj.manage_setLocalRoles(userid, [
                        'Reviewer',
                    ])
                    reindex_security = True
                    reindex = True
                # shared access groups
                for UID in extras.get('sharedaccess', []):
                    # translate old 'UID' group name to new 'UID-Members'
                    obj.manage_setLocalRoles('%s-Members' % UID, [
                        'Member',
                    ])
                    reindex_security = True
                    reindex = True

            # LOCAL ROLES
            for userid, roles in extras.get('local_roles', {}).items():
                obj.manage_setLocalRoles(userid, roles)
                reindex_security = True
                reindex = True

            # RESTRICTED FOLDERS
            if obj.meta_type != 'Plone Site':
                portal_workflow = getToolByName(obj, 'portal_workflow')
                if portal_workflow.getInfoFor(obj, 'review_state', '') \
                        == 'restricted':
                    if hasattr(obj, 'getACLUsersGroupInfo'):
                        agid = obj.getACLUsersGroupInfo(
                        )['Administrators']['gid']
                        obj.manage_setLocalRoles(agid, [
                            'Manager',
                        ])

            # Technical Portal field changes
            if obj.meta_type in [
                    'TechnicalTicket',
                    'TechnicalTicketResponse',
            ]:
                if 'marshall' in item[fileskey]:
                    manifest = item[fileskey]['marshall']['data']
                    for field, info in self.parseManifest(manifest).items():
                        if field == 'attachmentLink' and info['_alltext']:
                            newfield = obj.getField('link')
                            newfield.getMutator(obj)(info['_alltext'])
                            reindex = True

                mapping = {
                    'attachmentFile': 'attachmentFile1',
                    'attachment1': 'attachmentFile1',
                    'attachment2': 'attachmentFile2',
                    'attachment3': 'attachmentFile3',
                }
                if 'file-fields' in item[fileskey]:
                    # remap attachment fields
                    manifest = item[fileskey]['file-fields']['data']
                    for field, info in self.parseManifest(manifest).items():
                        if field in mapping:
                            fname = info['filename']
                            ct = info['mimetype']
                            data = item[fileskey][fname]['data']
                            newfield = obj.getField(mapping[field])
                            newfield.getMutator(obj)(data,
                                                     filename=fname,
                                                     mimetype=ct)
                            reindex = True

            # update geolocation fields to new format
            if obj.meta_type in [
                    'BusinessUnit',
                    'MapMarker',
            ]:
                location = obj.getGeolocation()
                if isinstance(location, list):
                    location = ', '.join(location)
                    obj.setGeolocation(location)
                    reindex = True

            # update blog messages to 'html'
            if obj.meta_type == 'BlogMessage':
                message = obj.message
                if message.getContentType() == 'text/plain':
                    rawmessage = obj.getRawMessage()
                    if rawmessage.startswith('<'):
                        # looks like it is already html
                        message.setContentType(obj, 'text/html')
                    else:
                        portal_transforms = getToolByName(
                            obj, 'portal_transforms')
                        data = portal_transforms.convertTo(
                            'text/html', rawmessage, mimetype='text/plain')
                        htmlmessage = data.getData()
                        obj.setMessage(htmlmessage, mimetype='text/html')
                    reindex = True

            # Set default page of previously-known-as 'LargeDocument'
            if extras.has_key('default_page'):
                obj.setDefaultPage(extras['default_page'])
                reindex = True

            # enable next previous nav
            if extras.has_key('nextprev'):
                obj.setNextPreviousEnabled(extras['nextprev'])
                reindex = True

            # Force pages to text/html
            if obj.meta_type in [
                    'Page',
                    'NewsItem',
                    'HelpPage',
            ]:
                body = obj.body
                if body.getContentType() == 'text/plain':
                    body.setContentType(obj, 'text/html')

            # CSR nom ratings
            if obj.meta_type == 'CSRNomination':
                ratings = extras.get('ratings', {})
                obj.setRatings(ratings)

            # Video file remapping
            if obj.meta_type in ['Video' ] \
                    and 'file-fields' in item[fileskey]:
                manifest = item[fileskey]['file-fields']['data']
                mapping = {
                    'preview': 'video',
                    'video': 'original',
                }
                for field, info in self.parseManifest(manifest).items():
                    if field in mapping:
                        fname = info['filename']
                        ct = info['mimetype']
                        data = item[fileskey][fname]['data']
                        newfield = obj.getField(mapping[field])
                        newfield.getMutator(obj)(data,
                                                 filename=fname,
                                                 mimetype=ct)
                        reindex = True

            # Add Page image fields as content items
            if obj.meta_type in ['Page', 'BlogMessage', ] \
                    and 'file-fields' in item[fileskey]:
                portal_workflow = getToolByName(obj, 'portal_workflow')
                manifest = item[fileskey]['file-fields']['data']
                images = []
                if obj.meta_type == 'BlogMessage':
                    folder = obj.inBlog()
                    bodyfieldname = 'message'
                else:
                    folder = obj.aq_parent
                    bodyfieldname = 'body'

                for field, info in self.parseManifest(manifest).items():
                    if not field.startswith('image'):
                        continue
                    fname = info['filename']
                    ct = info['mimetype']
                    #if fname in item[fileskey]:
                    data = item[fileskey][fname]['data']
                    imageob = create_object(
                        'Image',
                        folder,
                        rename_after=True,
                        title=fname,
                    )
                    imageob.setImage(data, filename=fname, mimetype=ct)
                    if obj.meta_type != 'BlogMessage':
                        # blog images are auto-published
                        portal_workflow.doActionFor(imageob, 'make_visible')
                    imageob.setDatePublished(obj.created())
                    imageob.reindexObject('getDatePublished')
                    # mimic the published data of the original item
                    images.append([field, imageob])

                # replace old links in document with new ones
                body = obj.getField(bodyfieldname).getAccessor(obj)()
                if not isinstance(body, unicode):
                    body = unicode(body, 'utf-8', 'ignore')
                for fieldname, imageob in images:
                    body = body.replace(
                        '%s/%s' % (obj.getId(), fieldname),
                        'resolveuid/%s/image' % (imageob.UID()),
                    )
                    # replace sizes
                    for oldsize, newsize in IMAGE_SIZE_MAPPING.items():
                        body = body.replace(
                            'image_%s' % oldsize,
                            'image_%s' % newsize,
                        )

                # fix inpage image styles
                body = body.replace('inpage-image-right', 'image-right')
                body = body.replace('inpage-image-left', 'image-left')

                # fix table styles
                body = body.replace("<TABLE class=inline",
                                    "<table class='listing'")

                obj.getField(bodyfieldname).getMutator(obj)(
                    body, mimetype='text/html')

            # Remove hard-coded links to old site
            OLD_DOMAIN = 'https://oldsitename'
            if hasattr(obj, 'Schema'):
                schema = obj.Schema()
                for fieldname in schema.keys():
                    field = schema[fieldname]
                    if not isinstance(field, atapi.TextField):
                        continue
                    value = field.getAccessor(obj)()
                    if not isinstance(value, unicode):
                        value = unicode(value, 'utf-8', 'ignore')
                    value_changed = False
                    links = extractLinks(value)
                    for link in links:
                        if link.startswith(OLD_DOMAIN):
                            newlink = link.replace(OLD_DOMAIN, '')
                            value = value.replace(link, newlink)
                            value_changed = True
                    if value_changed:
                        mimetype = field.getContentType(obj)
                        field.getMutator(obj)(value, mimetype=mimetype)

            if reindex_security:
                obj.reindexObjectSecurity(skip_self=True)
            if reindex:
                try:
                    obj.reindexObject()
                except:
                    ERRORS_FILENAME = '%s/reindex_errors.txt' % BASE_DIR
                    error = 'Could not reindex %s' % obj.absolute_url()
                    from datetime import datetime
                    open(ERRORS_FILENAME,
                         'a+').write('%s %s\n' % (datetime.now(), error))

            yield item
Example #24
0
 def testHandleParserException(self):
     self.assertEqual(extractLinks("<foo'd>"), ())
     self.assertEqual(extractLinks('<a href="http://foo.com">foo</a><bar\'d>'), ("http://foo.com",))