def __init__(self, transmogrifier, name, options, previous): self.transmogrifier = transmogrifier self.name = name self.options = options self.previous = previous self.directory = resolvePackageReferenceOrFile(options['directory']) self.metadata = None self.delimiter = None self.strict = False if 'metadata' in options: self.metadata = resolvePackageReferenceOrFile(options['metadata']) self.delimiter = options.get('delimiter', ',') self.strict = options.get('strict', False) self.requireMetadata = options.get('require-metadata', 'false').lower() != 'false' self.folderType = options.get('folder-type', 'Folder') self.fileType = options.get('file-type', 'File') self.imageType = options.get('image-type', 'Image') self.fileField = options.get('file-field', 'file') self.imageField = options.get('image-field', 'image') self.wrapData = options.get('wrap-data', 'true').lower() == 'true' self.defaultMimeType = options.get('default-mime-type', 'application/octet-stream') ignored = options.get('ignored') or '' self.ignored = Matcher(*ignored.splitlines())
def __init__(self, transmogrifier, name, options, previous): self.transmogrifier = transmogrifier self.name = name self.options = options self.previous = previous self.context = transmogrifier.context path = resolvePackageReferenceOrFile(options['path']) self.json = resolvePackageReferenceOrFile(options['json']) self.error_file = open(path,'w')
def __iter__(self): for item in self.previous: filename = resolvePackageReferenceOrFile(item[self.key]) file_ = open(filename, 'r') keys = item.keys() pathkey = self.pathkey(*keys)[0] typekey = self.typekey(*keys)[0] # Get the file object by path path = item[pathkey] obj = self.context.unrestrictedTraverse(path.lstrip('/'), None) if obj is None: # path doesn't exist yield item continue if not file_: yield item continue # Set file field fti = getUtility(IDexterityFTI, name=item[typekey]) schema = fti.lookupSchema() field = getFields(schema)[self.field] fileobj = field._type(file_, filename=file_.name[file_.name.rfind('/') + 1:].decode('utf-8')) field.set(field.interface(obj), fileobj) yield item
def __iter__(self): for item in self.previous: yield item client_id = self.options['client_id'] filename = resolvePackageReferenceOrFile(self.options['filename']) tables = xlrd_xls2array(filename) repository_table = tables[0] sheet_data = repository_table['sheet_data'] # clean up table sheet_data = sheet_data[4:] # remove human readable stuff keys = sheet_data[0] del sheet_data[0] for rownum, row in enumerate(sheet_data): data = {} # repofolder or reporoot if rownum == 0: data['_type'] = u'opengever.repository.repositoryroot' else: data['_type'] = u'opengever.repository.repositoryfolder' for colnum, cell in enumerate(row): key = keys[colnum] if key in (None, '', u''): continue if key in ('classification', 'privacy_layer', 'public_trial', 'retention_period', 'custody_period', 'archival_value', ) and cell in (None, '', u''): continue if key == 'reference_number' and not isinstance(cell, basestring): raise Exception("Reference number has to be string: %s" % cell) if key in ('valid_from', 'valid_until') and cell in ('', u''): cell = None if key == 'addable_dossier_types': cell = cell.replace(' ', '').split(',') cell = [t for t in cell if not t == ''] if key == 'archival_value': cell = ARCHIVAL_VALUE_MAPPING.get(cell, cell) if key == 'classification': cell = CLASSIFICATION_MAPPING.get(cell, cell) if key == 'privacy_layer': cell = PRIVACY_LAYER_MAPPING.get(cell, cell) if key == 'public_trial': cell = PUBLIC_TRIAL_MAPPING.get(cell, cell) data[key] = cell yield data
def __init__(self, transmogrifier, name, options, previous): self.name, self.options, self.previous = name, options, previous self.transmogrifier = transmogrifier self.context = transmogrifier.context for option, default in self._options: setattr(self, option.replace('-', '_'), self.get_option(option, default)) if type(self.remote_crawl_depth) in [str, unicode]: self.remote_crawl_depth = int(self.remote_crawl_depth) if type(self.remote_skip_path) in [str, unicode]: self.remote_skip_path = self.remote_skip_path.split() self.remote_skip_types = ('DTMLMethod','Script (Python)') if self.remote_path[-1] == '/': self.remote_path = self.remote_path[:-1] remote_username = self.get_option('remote-username', 'admin') remote_password = self.get_option('remote-password', 'admin') # Install a basic auth handler auth_handler = urllib2.HTTPBasicAuthHandler() auth_handler.add_password(realm='Zope', uri=self.remote_url, user=remote_username, passwd=remote_password) opener = urllib2.build_opener(auth_handler) urllib2.install_opener(opener) # Load cached data from the given file self.cache = resolvePackageReferenceOrFile(options.get('cache', '')) if self.cache and os.path.exists(self.cache): cache_file = open(self.cache, 'rb') cache = pickle.load(cache_file) cache_file.close() setattr(self, MEMOIZE_PROPNAME, cache)
def __init__(self, transmogrifier, name, options, previous): self.name, self.options, self.previous = name, options, previous self.transmogrifier = transmogrifier self.context = transmogrifier.context for option, default in self._options: setattr(self, option.replace('-', '_'), self.get_option(option, default)) if type(self.remote_crawl_depth) in [str, unicode]: self.remote_crawl_depth = int(self.remote_crawl_depth) if type(self.remote_skip_path) in [str, unicode]: self.remote_skip_path = self.remote_skip_path.split() if self.remote_catalog_query: self.remote_ok_path = self.get_ok_path() if self.remote_path[-1] == '/': self.remote_path = self.remote_path[:-1] if self.local_path[-1] == '/': self.local_path = self.local_path[:-1] # Load cached data from the given file self.cache = resolvePackageReferenceOrFile(options.get('cache', '')) if self.cache and os.path.exists(self.cache): cache_file = open(self.cache, 'rb') cache = pickle.load(cache_file) cache_file.close() setattr(self, MEMOIZE_PROPNAME, cache)
def __init__(self, transmogrifier, name, options, previous): self.previous = previous self.logger = logging.getLogger(options.get("name", transmogrifier.configuration_id + "." + name)) self.key = defaultMatcher(options, "url-key", name, "url") self.cachekey = Expression(options.get("cache-key", "string:_cache"), transmogrifier, name, options) self.headerskey = Expression(options.get("headers-key", "string:_headers"), transmogrifier, name, options) self.headersext = options.get("headers-extension", mimetypes.guess_extension("message/rfc822")) self.cachedir = resolvePackageReferenceOrFile( options.get("cache-directory", os.path.join(os.environ.get("PWD", os.getcwd()), "var/urlopener.cache.d")) ) if not os.path.isdir(self.cachedir): os.makedirs(self.cachedir) self.defaultpagename = options.get("default-page-name", ".{}.cache".format(options["blueprint"])) handlers = Expression(options.get("handlers", "python:[]"), transmogrifier, name, options)(options) if "ignore-error" in options: self.ignore_error = Expression(options["ignore-error"], transmogrifier, name, options) self.ignore_handler = HTTPDefaultErrorHandler() self.ignore_handler.section = self handlers.append(self.ignore_handler) if not [handler for handler in handlers if isinstance(handler, urllib2.HTTPRedirectHandler)]: handlers.append(HTTPRedirectHandler()) self.opener = urllib2.build_opener(*handlers)
def __init__(self, transmogrifier, name, options, previous): self.previous = previous self.options = options self.path = resolvePackageReferenceOrFile(options['directory']) if self.path is None or not os.path.isdir(self.path): raise IOError('Directory does not exists: {}'.format(self.path))
def __init__(self, transmogrifier, name, options, previous): self.previous = previous filename = resolvePackageReferenceOrFile(options['filename']) file_ = open(filename, 'r') self.source = json.loads(file_.read())
def __iter__(self): for item in self.previous: filename = resolvePackageReferenceOrFile(item[self.key]) file_ = open(filename, 'r') keys = item.keys() pathkey = self.pathkey(*keys)[0] typekey = self.typekey(*keys)[0] # Get the file object by path path = item[pathkey] obj = self.context.unrestrictedTraverse(path.lstrip('/'), None) if obj is None: # path doesn't exist yield item; continue if not file_: yield item; continue # Set file field fti = getUtility(IDexterityFTI, name=item[typekey]) schema = fti.lookupSchema() field = getFields(schema)[self.field] fileobj = field._type(file_, filename=file_.name[file_.name.rfind('/')+1:].decode('utf-8')) field.set(field.interface(obj), fileobj) yield item
def __init__(self, transmogrifier, name, options, previous): self.previous = previous if 'directory' in options: self.directory = resolvePackageReferenceOrFile(options['directory']) else: self.directory = None self.logger = logging.getLogger(name)
def __init__(self, transmogrifier, name, options, previous): self.previous = previous if 'directory' in options: self.directory = resolvePackageReferenceOrFile( options['directory']) else: self.directory = None self.logger = logging.getLogger(name)
def __init__(self, transmogrifier, name, options, previous): self.transmogrifier = transmogrifier self.name = name self.options = options self.previous = previous self.context = transmogrifier.context self.path = resolvePackageReferenceOrFile(options['path']) if self.path is None or not os.path.isdir(self.path): raise Exception, 'Path ('+str(self.path)+') does not exists.'
def __init__(self, transmogrifier, name, options, previous): self.previous = previous if 'directory' in options: self.directory = resolvePackageReferenceOrFile(options['directory']) else: self.directory = None if not os.path.exists(self.directory): raise ValueError("Directory %s does not exist" % self.directory) self.logger = logging.getLogger(name)
def __init__(self, transmogrifier, name, options, previous): """ Takes two options: directory: A full path to a directory or a relative path inside a package in the form collective.example:datadir. suffix: The extension of files that should be processed. """ self.previous = previous self.directory = resolvePackageReferenceOrFile(options['directory']) self.suffix = ".{0}".format(options['suffix'].strip())
def __init__(self, transmogrifier, name, options, previous): self.transmogrifier = transmogrifier self.name = name self.options = options self.previous = previous self.context = transmogrifier.context self.path = resolvePackageReferenceOrFile(options['path']) if self.path is None or not os.path.isdir(self.path): raise Exception, 'Path ('+str(self.path)+') does not exists.' self.datafield_prefix = options.get('datafield-prefix', DATAFIELD)
def __init__(self, transmogrifier, name, options, previous): self.transmogrifier = transmogrifier self.name = name self.options = options self.previous = previous self.blog_url = options["blog_url"] # custom options for this source self.filename = resolvePackageReferenceOrFile(options["filename"]) self.init_xml_obj(self.filename) # get the blog settings and add them as an annotation for # use later in the pipeline self.storage = IAnnotations(transmogrifier).setdefault(SETTINGS_KEY, {})
def __init__(self, transmogrifier, name, options, previous): self.transmogrifier = transmogrifier self.name = name self.options = options self.previous = previous self.context = transmogrifier.context self.path = resolvePackageReferenceOrFile(options["path"]) if self.path is None or not os.path.isdir(self.path): raise Exception("Path (" + str(self.path) + ") does not exists.") self.datafield_prefix = options.get("datafield-prefix", DATAFIELD)
def __init__(self, transmogrifier, name, options, previous): self.previous = previous filename = resolvePackageReferenceOrFile(options['filename']) file_ = open(filename, 'r') dialect = options.get('dialect', 'excel') fieldnames = options.get('fieldnames') if fieldnames: fieldnames = fieldnames.split() self.reader = csv.DictReader(file_, dialect=dialect, fieldnames=fieldnames)
def __init__(self, transmogrifier, name, options, previous): self.previous = previous self.pathkey = options.get('path-key', '_path') self.typekey = options.get('type-key', '_type') self.foldertype = options.get('folder-type', 'Folder') self.dirname = options['dirname'] = resolvePackageReferenceOrFile( options['dirname']) self.sortkey = Expression( options.get( 'sort-key', "python:not basename.lower() == '.htaccess', " "not basename.lower().startswith('index'), " "not 'overview' in basename.lower(), basename"), transmogrifier, name, options)
def __init__(self, transmogrifier, name, options, previous): self.previous = previous self.pathkey = options.get('path-key', '_path') self.typekey = options.get('type-key', '_type') self.foldertype = options.get('folder-type', 'Folder') self.dirname = options['dirname'] = resolvePackageReferenceOrFile( options['dirname']) self.sortkey = Expression( options.get('sort-key', "python:not basename.lower() == '.htaccess', " "not basename.lower().startswith('index'), " "not 'overview' in basename.lower(), basename"), transmogrifier, name, options)
def __init__(self, transmogrifier, name, options, previous): self.transmogrifier = transmogrifier self.name = name self.options = options self.previous = previous self.context = transmogrifier.context self.path = resolvePackageReferenceOrFile(options['path']) if self.path is None or not os.path.isdir(self.path): raise Exception, 'Path ('+str(self.path)+') does not exists.' self.datafield_prefix = options.get('datafield-prefix', DATAFIELD) self.datafield_separator = options.get('datafield-separator', None)
def __init__(self, transmogrifier, name, options, previous): self.transmogrifier = transmogrifier self.name = name self.options = options self.previous = previous # TODO: try to get import contextpath if there is no path set self.path = resolvePackageReferenceOrFile(options['path']) if self.path is None or not os.path.isdir(self.path): raise Exception('Path ({}) does not exists.'.format(str(self.path))) self.path = self.path.rstrip(os.sep) self.enabled = options.get('enabled', "True").lower() in ("true", "1", "on", "yes") # add path prefix to imported content self.prefix = options.get('prefix', '').strip().strip(os.sep) # keys for sections further down the chain self.pathkey = options.get('path-key', '_path').strip() self.fileskey = options.get('files-key', '_files').strip()
def __init__(self, transmogrifier, name, options, previous): self.previous = previous self.context = transmogrifier.context self.condition = Condition(options.get('condition', 'python:True'), transmogrifier, name, options) self.debug = options.get('debug', False) self.filename = options.get('filename', 'data.json') self.path = resolvePackageReferenceOrFile(options.get('path', '')) self.types = options.get('types', []) self.sections = options.get('sections', []) self.results = self._unjsonify(self.path, self.types, self.sections) if self.path is None or not os.path.isdir(self.path): raise Exception('Path (' + str(self.path) + ') does not exists.') self.logger = logging.getLogger(name)
def __init__(self, transmogrifier, name, options, previous): self.transmogrifier = transmogrifier self.name = name self.options = options self.previous = previous # TODO: try to get import contextpath if there is no path set self.path = resolvePackageReferenceOrFile(options['path']) if self.path is None or not os.path.isdir(self.path): raise Exception('Path ({}) does not exists.'.format(str( self.path))) self.path = self.path.rstrip(os.sep) self.enabled = options.get('enabled', "True").lower() in ("true", "1", "on", "yes") # add path prefix to imported content self.prefix = options.get('prefix', '').strip().strip(os.sep) # keys for sections further down the chain self.pathkey = options.get('path-key', '_path').strip() self.fileskey = options.get('files-key', '_files').strip()
def __init__(self, transmogrifier, name, options, previous): self.previous = previous client = get_current_client() replace_map = { 'users_group': client.users_group.groupid, 'inbox_group': client.inbox_group.groupid, 'client_id': client.client_id, } repository_root = transmogrifier.context.REQUEST.get( 'repository_root', None) if repository_root: replace_map['repository_root_name'] = repository_root[0] filename = resolvePackageReferenceOrFile(options['filename']) file_ = open(filename, 'r') data = file_.read() % replace_map self.source = json.loads(data)
def __init__(self, transmogrifier, name, options, previous): self.previous = previous self.logger = logging.getLogger( options.get('name', transmogrifier.configuration_id + '.' + name)) self.key = defaultMatcher(options, 'url-key', name, 'url') self.cachekey = Expression(options.get('cache-key', 'string:_cache'), transmogrifier, name, options) self.headerskey = Expression( options.get('headers-key', 'string:_headers'), transmogrifier, name, options) self.headersext = options.get( 'headers-extension', mimetypes.guess_extension('message/rfc822')) self.cachedir = resolvePackageReferenceOrFile( options.get( 'cache-directory', os.path.join(os.environ.get('PWD', os.getcwd()), 'var/urlopener.cache.d'))) if not os.path.isdir(self.cachedir): os.makedirs(self.cachedir) self.defaultpagename = options.get( 'default-page-name', '.{0}.cache'.format(options['blueprint'])) handlers = Expression(options.get('handlers', 'python:[]'), transmogrifier, name, options)(options) if 'ignore-error' in options: self.ignore_error = Expression(options['ignore-error'], transmogrifier, name, options) self.ignore_handler = HTTPDefaultErrorHandler() self.ignore_handler.section = self handlers.append(self.ignore_handler) if not [ handler for handler in handlers if isinstance(handler, urllib2.HTTPRedirectHandler) ]: handlers.append(HTTPRedirectHandler()) self.opener = urllib2.build_opener(*handlers)
def __init__(self, transmogrifier, name, options, previous): self.previous = previous self.logger = logging.getLogger(options.get( 'name', transmogrifier.configuration_id + '.' + name)) self.key = defaultMatcher(options, 'url-key', name, 'url') self.cachekey = Expression( options.get('cache-key', 'string:_cache'), transmogrifier, name, options) self.headerskey = Expression( options.get('headers-key', 'string:_headers'), transmogrifier, name, options) self.headersext = options.get( 'headers-extension', mimetypes.guess_extension('message/rfc822')) self.cachedir = resolvePackageReferenceOrFile( options.get('cache-directory', os.path.join(os.environ.get('PWD', os.getcwd()), 'var/urlopener.cache.d'))) if not os.path.isdir(self.cachedir): os.makedirs(self.cachedir) self.defaultpagename = options.get( 'default-page-name', '.{0}.cache'.format(options['blueprint'])) handlers = Expression( options.get('handlers', 'python:[]'), transmogrifier, name, options)(options) if 'ignore-error' in options: self.ignore_error = Expression( options['ignore-error'], transmogrifier, name, options) self.ignore_handler = HTTPDefaultErrorHandler() self.ignore_handler.section = self handlers.append(self.ignore_handler) if not [handler for handler in handlers if isinstance(handler, urllib2.HTTPRedirectHandler)]: handlers.append(HTTPRedirectHandler()) self.opener = urllib2.build_opener(*handlers)
def __iter__(self): for item in self.previous: filename = resolvePackageReferenceOrFile(item[self.key]) file_ = open(filename, 'r') keys = item.keys() pathkey = self.pathkey(*keys)[0] typekey = self.typekey(*keys)[0] # Get the file object by path path = item[pathkey] obj = self.context.unrestrictedTraverse(path.lstrip('/'), None) if obj is None: # path doesn't exist yield item continue if not file_: yield item continue # Set file field fti = getUtility(IDexterityFTI, name=item[typekey]) schema = fti.lookupSchema() field = getFields(schema)[self.field] # Don't pass the file descriptor but only the file's data as # a string, because else the source files get removed! filedata = file_.read() filename = file_.name[file_.name.rfind('/') + 1:].decode('utf-8') fileobj = field._type(filedata, filename=filename) field.set(field.interface(obj), fileobj) # Fire ObjectModifiedEvent so that digitally_available gets set notify(ObjectModifiedEvent(obj)) yield item
def __init__(self, transmogrifier, name, options, previous): self.previous = previous self.directory = resolvePackageReferenceOrFile(options['directory'])
def __iter__(self): for item in self.previous: # not enough info if '_path' not in item: yield item continue obj = self.context.unrestrictedTraverse( str(item['_path'].lstrip('/')), None) # path doesn't exist if obj is None: yield item continue # do nothing if we got a wrong object through acquisition path = item['_path'] if path.startswith('/'): path = path[1:] if '/'.join(obj.getPhysicalPath()[self.root_path_length:]) != path: yield item continue if IBaseObject.providedBy(obj): for key in item.keys(): if not key.startswith(self.datafield_prefix): continue fieldname = key[len(self.datafield_prefix):] field = obj.getField(fieldname) if field is None: continue # get the full path of the files file_path = resolvePackageReferenceOrFile( self.options['path']) + item[key] if not os.path.exists(file_path): continue f = open(file_path) value = f.read() f.close() # XXX: handle other data field implementations field_value = field.get(obj) if not hasattr(field_value, 'data') or ( value != field_value.data): field.set(obj, value) obj.setFilename(item[key]['filename'], fieldname) obj.setContentType( item[key]['content_type'], fieldname) if dexterity_available and IDexterityContent.providedBy(obj): for key in item.keys(): if not key.startswith(self.datafield_prefix): continue fieldname = key[len(self.datafield_prefix):] # get the full path of the files file_path = resolvePackageReferenceOrFile( self.options['path']) + item[key] if not os.path.exists(file_path): continue f = open(file_path) value = f.read() f.close() filename = item['id'].decode('utf-8') contenttype = '' # get all fields for this obj for schemata in iterSchemata(obj): for name, field in getFieldsInOrder(schemata): if field.__name__ == fieldname: # create a blob instance instance = field._type( data=value, filename=filename, contentType=contenttype, ) # set it field.set(field.interface(obj), instance) continue yield item
def resolve_directory(self, value): if value.startswith('$'): directory = os.getenv(value[1:], '') else: directory = resolvePackageReferenceOrFile(value) return directory
def __iter__(self): for item in self.previous: yield item client_id = self.options['client_id'] filename = resolvePackageReferenceOrFile(self.options['filename']) tables = xlrd_xls2array(filename) repository_table = tables[0] sheet_data = repository_table['sheet_data'] # clean up table sheet_data = sheet_data[4:] # remove human readable stuff keys = sheet_data[0] del sheet_data[0] for rownum, row in enumerate(sheet_data): data = {} # repofolder or reporoot if rownum == 0: data['_type'] = u'opengever.repository.repositoryroot' else: data['_type'] = u'opengever.repository.repositoryfolder' for colnum, cell in enumerate(row): key = keys[colnum] if key in (None, '', u''): continue if key in ( 'classification', 'privacy_layer', 'public_trial', 'retention_period', 'custody_period', 'archival_value', ) and cell in (None, '', u''): continue if key == 'reference_number' and not isinstance( cell, basestring): raise Exception("Reference number has to be string: %s" % cell) if key in ('valid_from', 'valid_until') and cell in ('', u''): cell = None if key == 'addable_dossier_types': cell = cell.replace(' ', '').split(',') cell = [t for t in cell if not t == ''] if key == 'archival_value': cell = ARCHIVAL_VALUE_MAPPING.get(cell, cell) if key == 'classification': cell = CLASSIFICATION_MAPPING.get(cell, cell) if key == 'privacy_layer': cell = PRIVACY_LAYER_MAPPING.get(cell, cell) if key == 'public_trial': cell = PUBLIC_TRIAL_MAPPING.get(cell, cell) data[key] = cell yield data