def getfiles(self, zip_file): # TODO validate zip_file,files,csv_file and raise if not good file # TODO also for csv check MUST_INCLUDED_ATTRIBUTES if not zip_file: raise ImportExportError('No file Provided') data = {} """ The problem in the standard zipfile module is that when passed a file object (not a filename), it uses that same passed-in file object for every call to the open method. This means that tell and seek are getting called on the same file and so trying to open multiple files within the zip file is causing the file position to be shared and so multiple open calls result in them stepping all over each other. In contrast, when passed a filename, open opens a new file object. error: *** BadZipfile: Bad CRC-32 for file 'Plone.csv' There I realized if I do a seek(0) on the file object before initializing the ZipFile, the error goes away. Don't see why, as I did nothing before on the file object :/ """ zip_file.seek(0) zfile = zipfile.ZipFile(zip_file, 'r') for name in zfile.namelist(): """ .open() returns a file-like object while .read() return a string like object and csv.DictWriter needs a file like object """ # FIXME .open() should work. # May be after retriving data from /tmp in the server may solve # this HACK data[name] = cStringIO.StringIO() data[name].write(zfile.read(name)) data[name].seek(0) return data
def converttojson(self, data=None, header=None): if not data: raise ImportExportError('Provide data to jsonify') # A major BUG here # The fieldnames parameter is a sequence whose # elements are associated with the fields of the input data in order reader = csv.DictReader(data, fieldnames=None) data = [] for row in reader: for key in row.keys(): if not key: del row[key] data.append(row) # jsonify quoted json values data = self.jsonify(data) # filter keys which are not in header, feature of advance_import if header: for index in range(len(data)): for k in data[index].keys(): if k not in header: del data[index][k] self.filter_keys(data) return data
def export(self): global MUST_INCLUDED_ATTRIBUTES errors = [] # create zip in memory self.zip = utils.InMemoryZip() # defines Pipeline self.conversion = utils.Pipeline() if self.request and self.request.method == 'POST': id_ = self.context.absolute_url_path()[1:] exportType = self.request.get('exportFormat', None) if self.request.get('exportFields', None) and (exportType == 'csv' or exportType == 'combined'): # fields/keys to include headers = self.request.get('exportFields', None) # BUG in html checkbox input, which send value as a string if only one value have been checked if isinstance(headers, str): headers = [headers] headers = list(set(MUST_INCLUDED_ATTRIBUTES + headers)) else: # 'No check provided. Thus exporting whole content' headers = self.getheaders() ''' MUST_INCLUDED_ATTRIBUTES must present in headers and that too at first position of list ''' for element in reversed(MUST_INCLUDED_ATTRIBUTES): headers.insert(0, element) # results is a list of dicts objData = self.serialize(self.context) results = objData[:-1] if objData[-1] != '': errorLog = objData[-1] self.zip.append('errorLog.txt', errorLog) self.conversion.convertjson(self, results, headers) self.request.RESPONSE.setHeader('content-type', 'application/zip') cd = 'attachment; filename=%s.zip' % (id_) self.request.RESPONSE.setHeader('Content-Disposition', cd) return self.zip.read() else: raise ImportExportError('Invalid Request')
def export(self): global MUST_INCLUDED_ATTRIBUTES errors = [] # create zip in memory self.zip = utils.InMemoryZip() # defines Pipeline self.conversion = utils.Pipeline() if self.request and self.request.method == 'POST': id_ = self.context.absolute_url_path()[1:] exportType = self.request.get('exportFormat', None) if self.request.get('exportFields', None) and (exportType == 'csv' or exportType == 'combined'): # NOQA: E501 # fields/keys to include headers = self.request.get('exportFields', None) # BUG in html checkbox input, which # send value as a string if only one value have been checked if isinstance(headers, str): headers = [headers] else: # 'No check provided. Thus exporting whole content' headers = self.getheaders() # Just to make sure headers does not contain any attribute # from MUST_INCLUDED_ATTRIBUTES headers = list(set(headers) - set(MUST_INCLUDED_ATTRIBUTES)) # Appending MUST_INCLUDED_ATTRIBUTES in the beginning headers = MUST_INCLUDED_ATTRIBUTES + headers # results is a list of dicts objData = self.serialize(self.context) results = objData[:-1] if objData[-1] != '': errorLog = objData[-1] self.zip.append('errorLog.txt', errorLog) self.conversion.convertjson(self, results, headers) self.request.RESPONSE.setHeader('content-type', 'application/zip') cd = 'attachment; filename={arg}.zip'.format(arg=str(id_)) self.request.RESPONSE.setHeader('Content-Disposition', cd) return self.zip.read() else: raise ImportExportError('Invalid Request')
def findcsv(self): # the zip may also have csv content of site ignore = str('*'+ os.sep + '*') for key in self.files.keys(): if fnmatch.fnmatch(key, ignore): pass elif fnmatch.fnmatch(key, '*.csv'): if not self.csv_file: self.csv_file = self.files[key] else: raise ImportExportError('More than 1 csv file provided, require only 1 ') return self.csv_file
def requestFile(self, file_): if isinstance(file_, list): for element in file_: self.requestFile(element) else: file_.seek(0) if not file_.read(): raise ImportExportError('Provide Good File') file_.seek(0) try: filename = file_.filename except Exception: filename = file_.name self.files[filename] = file_ return True
def getImportfields(self): global MUST_INCLUDED_ATTRIBUTES try: self.files = {} # request files file_ = self.request.get('file') # files are at self.files self.requestFile(file_) # file structure and analyser self.files = utils.fileAnalyse(self.files) if not self.files.getCsv(): raise ImportExportError('Provide a good csv file') csvData = self.files.getCsv() # convert csv to json conversion = utils.Pipeline() jsonData = conversion.converttojson(data=csvData) # get headers from jsonData headers = conversion.getcsvheaders(jsonData) headers = filter( lambda headers: headers not in MUST_INCLUDED_ATTRIBUTES, headers) # get matrix of headers matrix = self.getmatrix(headers=headers, columns=4) except Exception as e: matrix = {'Error': e.message} # JS requires json dump matrix = json.dumps(matrix) return matrix
def imports(self): global MUST_EXCLUDED_ATTRIBUTES global MUST_INCLUDED_ATTRIBUTES # global files # try: if self.request.method == 'POST': # request files file_ = self.request.get('file') # files are at self.files self.files = {} self.requestFile(file_) # file structure and analyser self.files = utils.fileAnalyse(self.files) if not self.files.getCsv(): raise ImportExportError('Provide a good csv file') # create zip in memory self.zip = utils.InMemoryZip() # defines Pipeline self.conversion = utils.Pipeline() # defines mapping for UID self.mapping = utils.mapping(self) # get list of existingPath self.getExistingpath() error_log = '' temp_log = '' # check for include attributes in advanced tab if self.request.get('importFields', None): # fields/keys to include include = self.request.get('importFields', None) # BUG in html checkbox input, which send value as a # string if only one value have been checked if isinstance(include, str): include = [include] include = list(set(MUST_INCLUDED_ATTRIBUTES + include)) else: # 'No check provided. Thus exporting whole content' include = None # convert csv to json data = self.conversion.converttojson(data=self.files.getCsv(), header=include) # invoke non-existent content, if any error_log += self.createcontent(data) # map old and new UID in memory self.mapping.mapNewUID(data) # deserialize for index in range(len(data)): obj_data = data[index] if not obj_data.get('path', None): error_log += 'pathError in {arg} \n'.format( arg=obj_data['path']) continue # get blob content into json data obj_data, temp_log = self.conversion.fillblobintojson( obj_data, self.files.getFiles(), self.mapping) error_log += temp_log # os.sep is preferrable to support multiple filesystem # return context of object object_context = self.getobjcontext(obj_data['path'].split( os.sep)) # all import error will be logged back if object_context: error_log += self.deserialize(object_context, obj_data) else: error_log += 'pathError for {arg}\n'.format( arg=obj_data['path']) self.request.RESPONSE.setHeader('content-type', 'application/text; charset=utf-8') return error_log else: raise ImportExportError('Invalid Request Method')
def imports(self): global MUST_EXCLUDED_ATTRIBUTES global MUST_INCLUDED_ATTRIBUTES # global files # try: if self.request.method == 'POST': # request files file_ = self.request.get('file') # get the defined import key self.primary_key = \ self.request.get('import_key', 'path') # match related self.settings, based on defined key self.new_content_action = \ self.request.get('new_content', 'add') self.matching_content_action = \ self.request.get('matching_content', 'update') self.existing_content_no_match_action = \ self.request.get('existing_content_no_match', 'keep') # files are at self.files self.files = {} self.requestFile(file_) # file structure and analyser self.files = utils.fileAnalyse(self.files) if not self.files.getCsv(): raise ImportExportError('Provide a good csv file') # create zip in memory self.zip = utils.InMemoryZip() # defines Pipeline self.conversion = utils.Pipeline() # defines mapping for UID self.mapping = utils.mapping(self) # get list of existingPath self.getExistingpath() error_log = '' temp_log = '' # check for include attributes in advanced tab if self.request.get('importFields', None): # fields/keys to include include = self.request.get('importFields', None) # BUG in html checkbox input, which send value as a # string if only one value have been checked if isinstance(include, str): include = [include] include = list(set(MUST_INCLUDED_ATTRIBUTES + include)) else: # 'No check provided. Thus exporting whole content' include = None # convert csv to json data = self.conversion.converttojson(data=self.files.getCsv(), header=include) error_log += self.processContentCreation(data=data) # map old and new UID in memory self.mapping.mapNewUID(data) self.reindexMatchedTraversalPaths() error_log += self.deleteNoMatchingContent() self.reindexMatchedTraversalPaths() error_log += self.deleteNoMatchingContent() # deserialize for index in range(len(data)): obj_data = data[index] path_ = obj_data.get('path', None) if not path_: error_log += 'pathError upon deseralizing the content for {arg} \n'.format( arg=obj_data['path']) continue obj_absolute_path = "/".join( self.getobjpath(path_.split(os.sep))) if obj_absolute_path not in self.matchedTraversalPaths: continue if path_ not in self.matchedTraversalPaths: continue # get blob content into json data obj_data, temp_log = self.conversion.fillblobintojson( obj_data, self.files.getFiles(), self.mapping) error_log += temp_log # os.sep is preferrable to support multiple filesystem # return context of object print obj_data object_context = self.getobjcontext(obj_data['path'].split( os.sep)) # all import error will be logged back if object_context: error_log += self.deserialize(object_context, obj_data) else: error_log += 'Error while attempting to update {arg}\n'.format( arg=obj_data['path']) self.request.RESPONSE.setHeader('content-type', 'application/text; charset=utf-8') cd = 'attachment; filename=import-log.txt' self.request.RESPONSE.setHeader('Content-Disposition', cd) return error_log else: raise ImportExportError('Invalid Request Method')