def _get_path(id, post_type, category, date): """Return the path to the item; this will be used to create the object and its containers if needed. :param id: [required] id of the post :type id: string :param post_type: [required] WordPress post type :type post_type: string :param category: [required] slug of the category :type category: string :param date: [required] creation date :type date: DateTime :returns: path to the object starting from the root of the site :rtype: string :raises: zExceptions.BadRequest """ if bad_id(id) is not None: raise BadRequest if post_type != 'attachment': # for posts and pages we need to contruct the path # according to the permalink structure: # "/%category%/%year%/%monthnum%/%day%/%postname%/" permalink_structure = '/{0}/{1}/{2}'.format( category, date.Date(), id) else: # for attachments we use the default location: # "/wp-content/uploads/%year%/%monthnum%/" permalink_structure = '/wp-content/uploads/{0}/{1}'.format( date.Date()[:-3], id) return permalink_structure
def prepareContents(self, registry, register_subdirs=0): # Creates objects for each file. fp = expandpath(self.filepath) data = {} l = listdir(fp) types = self._readTypesFile() for entry in l: if not self._isAllowableFilename(entry): continue e_filepath = path.join(self.filepath, entry) e_fp = expandpath(e_filepath) if path.isdir(e_fp): # Add a subdirectory only if it was previously registered, # unless register_subdirs is set. info = registry.getDirectoryInfo(e_filepath) if info is None and register_subdirs: # Register unknown subdirs if entry not in ('CVS', 'SVN', '.', '..'): registry.registerDirectoryByPath(e_fp) info = registry.getDirectoryInfo(e_filepath) if info is not None: mt = types.get(entry) t = None if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = DirectoryView ob = t(entry, e_filepath) data[ob.getId()] = ob else: pos = rfind(entry, '.') if pos >= 0: name = entry[:pos] ext = normalize(entry[pos + 1:]) else: name = entry ext = '' if not name or bad_id(entry) != -1 or name == 'REQUEST': # Not an allowable id. continue t = None mt = types.get(entry, None) if mt is None: mt = types.get(name, None) if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = registry.getTypeByExtension(ext) if t is not None: ob = t(name, e_filepath, fullname=entry) data[ob.getId()] = ob return data
def _checkId(self, id): """See if an id is valid CMF/Plone id""" portal = getToolByName(self, 'portal_url').getPortalObject() if not id: return 'Empty id' s = bad_id(id) if s: return '\'%s\' is not a valid id' % (id) # extra checks for Plone sites if portal.__class__.__name__ == 'PloneSite': if hasattr(portal, 'portal_properties') and \ hasattr(portal.portal_properties, 'site_properties') and \ hasattr(portal.portal_properties.site_properties, 'invalid_ids'): if id in portal.portal_properties.site_properties.invalid_ids: return '\'%s\' is a reserved id' % (id)
def _checkId(self, id): """See if an id is valid CMF/Plone id""" portal = getToolByName(self, 'portal_url').getPortalObject() if not id: return 'Empty id' s = bad_id(id) if s: return '\'%s\' is not a valid id' % (id) # extra checks for Plone sites if portal.__class__.__name__ == 'PloneSite': props = getToolByName(portal, 'portal_properties', None) if props is not None: if hasattr(props, 'site_properties') and \ hasattr(props.site_properties, 'invalid_ids'): if id in props.site_properties.invalid_ids: return '\'%s\' is a reserved id' % (id)
def prepareContents(self, registry, register_subdirs=0): # Creates objects for each file. data = {} objects = [] types = self._readTypesFile() for entry in _filtered_listdir(self._filepath, ignore=self.ignore): if not self._isAllowableFilename(entry): continue entry_filepath = path.join(self._filepath, entry) if path.isdir(entry_filepath): # Add a subdirectory only if it was previously registered, # unless register_subdirs is set. entry_minimal_fp = '/'.join((self._minimal_fp, entry)) info = registry.getDirectoryInfo(entry_minimal_fp) if info is None and register_subdirs: # Register unknown subdirs registry.registerDirectoryByPath(entry_filepath) info = registry.getDirectoryInfo(entry_minimal_fp) if info is not None: # Folders on the file system have no extension or # meta_type, as a crutch to enable customizing what gets # created to represent a filesystem folder in a # DirectoryView we use a fake type "FOLDER". That way # other implementations can register for that type and # circumvent the hardcoded assumption that all filesystem # directories will turn into DirectoryViews. mt = types.get(entry) or 'FOLDER' t = registry.getTypeByMetaType(mt) if t is None: t = DirectoryView metadata = FSMetadata(entry_filepath) metadata.read() ob = t(entry, entry_minimal_fp, properties=metadata.getProperties()) ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) else: pos = entry.rfind('.') if pos >= 0: name = entry[:pos] ext = path.normcase(entry[pos + 1:]) else: name = entry ext = '' if not name or name == 'REQUEST': # Not an allowable id. continue mo = bad_id(name) if mo is not None and mo != -1: # Both re and regex formats # Not an allowable id. continue t = None mt = types.get(entry, None) if mt is None: mt = types.get(name, None) if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = registry.getTypeByExtension(ext) if t is not None: metadata = FSMetadata(entry_filepath) metadata.read() try: ob = t(name, entry_filepath, fullname=entry, properties=metadata.getProperties()) except: import sys import traceback typ, val, tb = sys.exc_info() try: logger.exception("prepareContents") exc_lines = traceback.format_exception( typ, val, tb) ob = BadFile(name, entry_filepath, exc_str='\r\n'.join(exc_lines), fullname=entry) finally: tb = None # Avoid leaking frame! # FS-based security permissions = metadata.getSecurity() if permissions is not None: for name in permissions.keys(): acquire, roles = permissions[name] try: ob.manage_permission(name, roles, acquire) except ValueError: logger.exception("Error setting permissions") # only DTML Methods and Python Scripts can have proxy roles if hasattr(ob, '_proxy_roles'): try: ob._proxy_roles = tuple(metadata.getProxyRoles()) except: logger.exception("Error setting proxy role") ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) return data, tuple(objects)
def good_id(self, id): # Exposes ObjectManager's bad_id test to skin scripts. m = bad_id(id) if m is not None: return 0 return 1
def prepareContents(self, registry, register_subdirs=0): # Creates objects for each file. fp = expandpath(self.filepath) data = {} objects = [] types = self._readTypesFile() for entry in _filtered_listdir(fp): if not self._isAllowableFilename(entry): continue e_filepath = path.join(self.filepath, entry) e_fp = expandpath(e_filepath) if path.isdir(e_fp): # Add a subdirectory only if it was previously registered, # unless register_subdirs is set. info = registry.getDirectoryInfo(e_filepath) if info is None and register_subdirs: # Register unknown subdirs registry.registerDirectoryByPath(e_fp) info = registry.getDirectoryInfo(e_filepath) if info is not None: mt = types.get(entry) t = None if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = DirectoryView ob = t(entry, e_filepath) ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) else: pos = rfind(entry, '.') if pos >= 0: name = entry[:pos] ext = path.normcase(entry[pos + 1:]) else: name = entry ext = '' if not name or name == 'REQUEST': # Not an allowable id. continue mo = bad_id(name) if mo is not None and mo != -1: # Both re and regex formats # Not an allowable id. continue t = None mt = types.get(entry, None) if mt is None: mt = types.get(name, None) if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = registry.getTypeByExtension(ext) if t is not None: properties = self._readProperties(e_fp + '.properties') try: ob = t(name, e_filepath, fullname=entry, properties=properties) except: import traceback typ, val, tb = exc_info() try: exc_lines = traceback.format_exception( typ, val, tb) LOG('DirectoryView', ERROR, join(exc_lines, '\n')) ob = BadFile(name, e_filepath, exc_str=join(exc_lines, '\r\n'), fullname=entry) finally: tb = None # Avoid leaking frame! ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) return data, tuple(objects)
def prepareContents(self, registry, register_subdirs=0): # Creates objects for each file. data = {} objects = [] types = self._readTypesFile() for entry in _filtered_listdir(self._filepath, ignore=self.ignore): if not self._isAllowableFilename(entry): continue entry_minimal_fp = '/'.join((self._minimal_fp, entry)) entry_filepath = path.join(self._filepath, entry) if path.isdir(entry_filepath): # Add a subdirectory only if it was previously registered, # unless register_subdirs is set. info = registry.getDirectoryInfo(entry_minimal_fp) if info is None and register_subdirs: # Register unknown subdirs registry.registerDirectoryByPath(entry_filepath) info = registry.getDirectoryInfo(entry_minimal_fp) if info is not None: mt = types.get(entry) t = None if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = DirectoryView ob = t(entry, entry_minimal_fp) ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) else: pos = entry.rfind('.') if pos >= 0: name = entry[:pos] ext = path.normcase(entry[pos + 1:]) else: name = entry ext = '' if not name or name == 'REQUEST': # Not an allowable id. continue mo = bad_id(name) if mo is not None and mo != -1: # Both re and regex formats # Not an allowable id. continue t = None mt = types.get(entry, None) if mt is None: mt = types.get(name, None) if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = registry.getTypeByExtension(ext) if t is not None: metadata = FSMetadata(entry_filepath) metadata.read() try: ob = t(name, entry_minimal_fp, fullname=entry, properties=metadata.getProperties()) except: import traceback typ, val, tb = exc_info() try: exc_lines = traceback.format_exception( typ, val, tb) LOG('DirectoryView', ERROR, '\n'.join(exc_lines)) ob = BadFile(name, entry_minimal_fp, exc_str='\r\n'.join(exc_lines), fullname=entry) finally: tb = None # Avoid leaking frame! # FS-based security permissions = metadata.getSecurity() if permissions is not None: for name in permissions.keys(): acquire, roles = permissions[name] try: ob.manage_permission(name, roles, acquire) except ValueError: LOG('DirectoryView', ERROR, 'Error setting permissions', error=exc_info()) # only DTML Methods and Python Scripts can have proxy roles if hasattr(ob, '_proxy_roles'): try: ob._proxy_roles = tuple(metadata.getProxyRoles()) except: LOG('DirectoryView', ERROR, 'Error setting proxy role', error=exc_info()) ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) return data, tuple(objects)
def prepareContents(self, registry, register_subdirs=0): # Creates objects for each file. data = {} objects = [] types = self._readTypesFile() for entry in _filtered_listdir(self._filepath, ignore=self.ignore): if not self._isAllowableFilename(entry): continue entry_filepath = path.join(self._filepath, entry) if path.isdir(entry_filepath): # Add a subdirectory only if it was previously registered, # unless register_subdirs is set. entry_minimal_fp = '/'.join( (self._minimal_fp, entry) ) info = registry.getDirectoryInfo(entry_minimal_fp) if info is None and register_subdirs: # Register unknown subdirs registry.registerDirectoryByPath(entry_filepath) info = registry.getDirectoryInfo(entry_minimal_fp) if info is not None: # Folders on the file system have no extension or # meta_type, as a crutch to enable customizing what gets # created to represent a filesystem folder in a # DirectoryView we use a fake type "FOLDER". That way # other implementations can register for that type and # circumvent the hardcoded assumption that all filesystem # directories will turn into DirectoryViews. mt = types.get(entry) or 'FOLDER' t = registry.getTypeByMetaType(mt) if t is None: t = DirectoryView metadata = FSMetadata(entry_filepath) metadata.read() ob = t( entry , entry_minimal_fp , properties=metadata.getProperties() ) ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) else: pos = entry.rfind('.') if pos >= 0: name = entry[:pos] ext = path.normcase(entry[pos + 1:]) else: name = entry ext = '' if not name or name == 'REQUEST': # Not an allowable id. continue mo = bad_id(name) if mo is not None and mo != -1: # Both re and regex formats # Not an allowable id. continue t = None mt = types.get(entry, None) if mt is None: mt = types.get(name, None) if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = registry.getTypeByExtension(ext) if t is not None: metadata = FSMetadata(entry_filepath) metadata.read() try: ob = t(name, entry_filepath, fullname=entry, properties=metadata.getProperties()) except: import sys import traceback typ, val, tb = sys.exc_info() try: logger.exception("prepareContents") exc_lines = traceback.format_exception( typ, val, tb ) ob = BadFile( name, entry_filepath, exc_str='\r\n'.join(exc_lines), fullname=entry ) finally: tb = None # Avoid leaking frame! # FS-based security permissions = metadata.getSecurity() if permissions is not None: for name in permissions.keys(): acquire, roles = permissions[name] try: ob.manage_permission(name,roles,acquire) except ValueError: logger.exception("Error setting permissions") # only DTML Methods and Python Scripts can have proxy roles if hasattr(ob, '_proxy_roles'): try: ob._proxy_roles = tuple(metadata.getProxyRoles()) except: logger.exception("Error setting proxy role") ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) return data, tuple(objects)
def checkId(id): res = bad_id(id) if res != -1 and res is not None: raise ValueError, 'Illegal ID' return 1
def prepareContents(self, registry, register_subdirs=0): # Creates objects for each file. data = {} objects = [] types = self._readTypesFile() for entry in _filtered_listdir(self._filepath): if not self._isAllowableFilename(entry): continue entry_minimal_fp = '/'.join( (self._minimal_fp, entry) ) entry_filepath = path.join(self._filepath, entry) if path.isdir(entry_filepath): # Add a subdirectory only if it was previously registered, # unless register_subdirs is set. info = registry.getDirectoryInfo(entry_minimal_fp) if info is None and register_subdirs: # Register unknown subdirs registry.registerDirectoryByPath(entry_filepath) info = registry.getDirectoryInfo(entry_minimal_fp) if info is not None: mt = types.get(entry) t = None if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = DirectoryView ob = t(entry, entry_minimal_fp) ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) else: pos = entry.rfind('.') if pos >= 0: name = entry[:pos] ext = path.normcase(entry[pos + 1:]) else: name = entry ext = '' if not name or name == 'REQUEST': # Not an allowable id. continue mo = bad_id(name) if mo is not None and mo != -1: # Both re and regex formats # Not an allowable id. continue t = None mt = types.get(entry, None) if mt is None: mt = types.get(name, None) if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = registry.getTypeByExtension(ext) if t is not None: metadata = FSMetadata(entry_filepath) metadata.read() try: ob = t(name, entry_minimal_fp, fullname=entry, properties=metadata.getProperties()) except: import traceback typ, val, tb = exc_info() try: exc_lines = traceback.format_exception( typ, val, tb ) LOG( 'DirectoryView', ERROR, '\n'.join(exc_lines) ) ob = BadFile( name, entry_minimal_fp, exc_str='\r\n'.join(exc_lines), fullname=entry ) finally: tb = None # Avoid leaking frame! # FS-based security permissions = metadata.getSecurity() if permissions is not None: for name in permissions.keys(): acquire, roles = permissions[name] try: ob.manage_permission(name,roles,acquire) except ValueError: LOG('DirectoryView', ERROR, 'Error setting permissions', error=exc_info()) # only DTML Methods can have proxy roles if hasattr(ob, '_proxy_roles'): try: ob._proxy_roles = tuple(metadata.getProxyRoles()) except: LOG('DirectoryView', ERROR, 'Error setting proxy role', error=exc_info()) ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) return data, tuple(objects)
def __iter__(self): for item in self.previous: yield item filename = os.path.join(self.source, 'wp_posts.csv') assert os.path.isfile(filename), 'Missing file: ' + filename with open(filename) as csvfile: csv.field_size_limit(self.field_size_limit) reader = csv.DictReader(csvfile, **csv_options) for row in reader: if _skip(row, self.skip): # should we process this row? continue item = dict() post_type = row['post_type'] if post_type == 'post': # posts are imported as portal_type item['portal_type'] = self.portal_type elif post_type == 'page': # pages are imported as Page item['portal_type'] = 'Page' elif post_type == 'attachment': # attachments are imported as Image or File is_image = row['post_mime_type'].startswith('image') item['portal_type'] = 'Image' if is_image else 'File' item['_mimetype'] = row['post_mime_type'] item['_guid'] = row['guid'] # store for later if post_type != 'attachment': # for posts and pages the id is the post name item_id = row['post_name'] # Zope ids need to be ASCII item_id = fix_id(item_id) item['title'] = strip_tags(row['post_title']) else: # for attachments we need to parse the guid # and use the file name as title url = urlparse(row['guid']) item_id = item['title'] = url.path.split('/')[-1] item_id = fix_id(item_id) # on Zope ids can't start with "_" if bad_id(item_id) is not None: logger.warn('Invalid object id on row ID: ' + row['ID']) continue # WordPress stores only publication and modification times # we use publication date as creation date item['creation_date'] = item['effective_date'] = row['post_date'] item['modification_date'] = row['post_modified'] try: item['_path'] = self.get_path(row['ID'], item_id, post_type, item) except KeyError: # files defining taxonomies are probably outdated logger.warn('No taxonomies found for row ID: ' + row['ID']) continue item['description'] = row['post_excerpt'] # quotes are escaped; we need to fix that item['text'] = row['post_content'].replace('\\"', '"') # TODO: validate HTML to avoid post-processing surprises # use display_name instead of author_id, if match found author_id = row['post_author'] item['creators'] = self.display_names.get(author_id, author_id) if row['post_status'] == 'publish': item['_transitions'] = 'publish' item['_pinged'] = row['pinged'] # store for later yield item
def prepareContents(self, registry, register_subdirs=0): # Creates objects for each file. fp = expandpath(self.filepath) data = {} objects = [] l = listdir(fp) types = self._readTypesFile() for entry in l: if not self._isAllowableFilename(entry): continue e_filepath = path.join(self.filepath, entry) e_fp = expandpath(e_filepath) if path.isdir(e_fp): # Add a subdirectory only if it was previously registered, # unless register_subdirs is set. info = registry.getDirectoryInfo(e_filepath) if info is None and register_subdirs: # Register unknown subdirs if entry not in ('CVS', 'SVN', '.', '..'): registry.registerDirectoryByPath(e_fp) info = registry.getDirectoryInfo(e_filepath) if info is not None: mt = types.get(entry) t = None if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = DirectoryView ob = t(entry, e_filepath) ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) else: pos = rfind(entry, '.') if pos >= 0: name = entry[:pos] ext = path.normcase(entry[pos + 1:]) else: name = entry ext = '' if not name or name == 'REQUEST': # Not an allowable id. continue mo = bad_id(name) if mo is not None and mo != -1: # Both re and regex formats # Not an allowable id. continue t = None mt = types.get(entry, None) if mt is None: mt = types.get(name, None) if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = registry.getTypeByExtension(ext) if t is not None: try: ob = t(name, e_filepath, fullname=entry) except: from zLOG import LOG, ERROR import sys, traceback typ, val, tb = sys.exc_info() exc_lines = traceback.format_exception( typ, val, tb ) LOG( 'DirectoryView', ERROR, join( exc_lines, '\n' ) ) ob = BadFile( name , e_filepath , exc_str=join( exc_lines, '\r\n' ) , fullname=entry ) ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) return data, tuple(objects)
def prepareContents(self, registry, register_subdirs=0): # Creates objects for each file. fp = expandpath(self.filepath) data = {} objects = [] types = self._readTypesFile() for entry in _filtered_listdir(fp): if not self._isAllowableFilename(entry): continue e_filepath = path.join(self.filepath, entry) e_fp = expandpath(e_filepath) if path.isdir(e_fp): # Add a subdirectory only if it was previously registered, # unless register_subdirs is set. info = registry.getDirectoryInfo(e_filepath) if info is None and register_subdirs: # Register unknown subdirs registry.registerDirectoryByPath(e_fp) info = registry.getDirectoryInfo(e_filepath) if info is not None: mt = types.get(entry) t = None if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = DirectoryView ob = t(entry, e_filepath) ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) else: pos = rfind(entry, '.') if pos >= 0: name = entry[:pos] ext = path.normcase(entry[pos + 1:]) else: name = entry ext = '' if not name or name == 'REQUEST': # Not an allowable id. continue mo = bad_id(name) if mo is not None and mo != -1: # Both re and regex formats # Not an allowable id. continue t = None mt = types.get(entry, None) if mt is None: mt = types.get(name, None) if mt is not None: t = registry.getTypeByMetaType(mt) if t is None: t = registry.getTypeByExtension(ext) if t is not None: properties = self._readProperties( e_fp + '.properties') try: ob = t(name, e_filepath, fullname=entry, properties=properties) except: import traceback typ, val, tb = exc_info() try: exc_lines = traceback.format_exception( typ, val, tb ) LOG( 'DirectoryView', ERROR, join( exc_lines, '\n' ) ) ob = BadFile( name, e_filepath, exc_str=join( exc_lines, '\r\n' ), fullname=entry ) finally: tb = None # Avoid leaking frame! # FS-based security try: permissions = self._readSecurity(e_fp + '.security') if permissions is not None: for name in permissions.keys(): acquire,roles = permissions[name] ob.manage_permission(name,roles,acquire) except: LOG('DirectoryView', ERROR, 'Error setting permission from .security file information', error=exc_info()) ob_id = ob.getId() data[ob_id] = ob objects.append({'id': ob_id, 'meta_type': ob.meta_type}) return data, tuple(objects)