예제 #1
0
 def getfiles(self, zip_file):
     # TODO validate zip_file,files,csv_file and raise if not good file
     # TODO also for csv check MUST_INCLUDED_ATTRIBUTES
     if not zip_file:
         raise ImportExportError('No file Provided')
     data = {}
     """ The problem in the standard zipfile module is that when passed
     a file object (not a filename), it uses that same passed-in file
     object for every call to the open method. This means that tell and
     seek are getting called on the same file and so trying to open multiple
     files within the zip file is causing the file position to be shared and
     so multiple open calls result in them stepping all over each other. In
     contrast, when passed a filename, open opens a new file object.
     error: *** BadZipfile: Bad CRC-32 for file 'Plone.csv'
     There I realized if I do a seek(0) on the file object before
     initializing the ZipFile, the error goes away. Don't see why, as I did
     nothing before on the file object :/ """
     zip_file.seek(0)
     zfile = zipfile.ZipFile(zip_file, 'r')
     for name in zfile.namelist():
         """ .open() returns a file-like object while .read() return a string
         like object and csv.DictWriter needs a file like object """
         # FIXME .open() should work.
         # May be after retriving data from /tmp in the server may solve
         # this HACK
         data[name] = cStringIO.StringIO()
         data[name].write(zfile.read(name))
         data[name].seek(0)
     return data
예제 #2
0
    def converttojson(self, data=None, header=None):
        if not data:
            raise ImportExportError('Provide data to jsonify')
        # A major BUG here
        # The fieldnames parameter is a sequence whose
        # elements are associated with the fields of the input data in order
        reader = csv.DictReader(data, fieldnames=None)
        data = []
        for row in reader:
            for key in row.keys():
                if not key:
                    del row[key]
            data.append(row)
        # jsonify quoted json values
        data = self.jsonify(data)

        # filter keys which are not in header, feature of advance_import
        if header:
            for index in range(len(data)):
                for k in data[index].keys():
                    if k not in header:
                        del data[index][k]

        self.filter_keys(data)
        return data
예제 #3
0
    def export(self):

        global MUST_INCLUDED_ATTRIBUTES
        errors = []

        # create zip in memory
        self.zip = utils.InMemoryZip()

        # defines Pipeline
        self.conversion = utils.Pipeline()

        if self.request and self.request.method == 'POST':

            id_ = self.context.absolute_url_path()[1:]

            exportType = self.request.get('exportFormat', None)

            if self.request.get('exportFields',
                                None) and (exportType == 'csv'
                                           or exportType == 'combined'):

                # fields/keys to include
                headers = self.request.get('exportFields', None)
                # BUG in html checkbox input, which send value as a string if only one value have been checked
                if isinstance(headers, str):
                    headers = [headers]
                headers = list(set(MUST_INCLUDED_ATTRIBUTES + headers))

            else:
                # 'No check provided. Thus exporting whole content'
                headers = self.getheaders()
            '''
            MUST_INCLUDED_ATTRIBUTES must present in headers and that too
            at first position of list
            '''
            for element in reversed(MUST_INCLUDED_ATTRIBUTES):
                headers.insert(0, element)

            # results is a list of dicts
            objData = self.serialize(self.context)
            results = objData[:-1]
            if objData[-1] != '':
                errorLog = objData[-1]
                self.zip.append('errorLog.txt', errorLog)

            self.conversion.convertjson(self, results, headers)

            self.request.RESPONSE.setHeader('content-type', 'application/zip')
            cd = 'attachment; filename=%s.zip' % (id_)
            self.request.RESPONSE.setHeader('Content-Disposition', cd)

            return self.zip.read()

        else:
            raise ImportExportError('Invalid Request')
    def export(self):

        global MUST_INCLUDED_ATTRIBUTES
        errors = []

        # create zip in memory
        self.zip = utils.InMemoryZip()

        # defines Pipeline
        self.conversion = utils.Pipeline()

        if self.request and self.request.method == 'POST':

            id_ = self.context.absolute_url_path()[1:]

            exportType = self.request.get('exportFormat', None)

            if self.request.get('exportFields',
                                None) and (exportType == 'csv' or exportType
                                           == 'combined'):  # NOQA: E501

                # fields/keys to include
                headers = self.request.get('exportFields', None)
                # BUG in html checkbox input, which
                # send value as a string if only one value have been checked
                if isinstance(headers, str):
                    headers = [headers]
            else:
                # 'No check provided. Thus exporting whole content'
                headers = self.getheaders()

            # Just to make sure headers does not contain any attribute
            # from MUST_INCLUDED_ATTRIBUTES
            headers = list(set(headers) - set(MUST_INCLUDED_ATTRIBUTES))
            # Appending MUST_INCLUDED_ATTRIBUTES in the beginning
            headers = MUST_INCLUDED_ATTRIBUTES + headers

            # results is a list of dicts
            objData = self.serialize(self.context)
            results = objData[:-1]
            if objData[-1] != '':
                errorLog = objData[-1]
                self.zip.append('errorLog.txt', errorLog)

            self.conversion.convertjson(self, results, headers)
            self.request.RESPONSE.setHeader('content-type', 'application/zip')
            cd = 'attachment; filename={arg}.zip'.format(arg=str(id_))
            self.request.RESPONSE.setHeader('Content-Disposition', cd)

            return self.zip.read()

        else:
            raise ImportExportError('Invalid Request')
예제 #5
0
    def findcsv(self):
        # the zip may also have csv content of site
        ignore = str('*'+ os.sep + '*')
        for key in self.files.keys():
            if fnmatch.fnmatch(key, ignore):
                pass
            elif fnmatch.fnmatch(key, '*.csv'):
                    if not self.csv_file:
                        self.csv_file  = self.files[key]
                    else:
                        raise ImportExportError('More than 1 csv file provided, require only 1 ')

        return self.csv_file
예제 #6
0
    def requestFile(self, file_):

        if isinstance(file_, list):
            for element in file_:
                self.requestFile(element)

        else:
            file_.seek(0)
            if not file_.read():
                raise ImportExportError('Provide Good File')
            file_.seek(0)
            try:
                filename = file_.filename
            except Exception:
                filename = file_.name

            self.files[filename] = file_
            return True
예제 #7
0
    def getImportfields(self):

        global MUST_INCLUDED_ATTRIBUTES

        try:
            self.files = {}
            # request files
            file_ = self.request.get('file')
            # files are at self.files
            self.requestFile(file_)

            # file structure and analyser
            self.files = utils.fileAnalyse(self.files)

            if not self.files.getCsv():
                raise ImportExportError('Provide a good csv file')

            csvData = self.files.getCsv()
            # convert csv to json
            conversion = utils.Pipeline()
            jsonData = conversion.converttojson(data=csvData)
            # get headers from jsonData
            headers = conversion.getcsvheaders(jsonData)

            headers = filter(
                lambda headers: headers not in MUST_INCLUDED_ATTRIBUTES,
                headers)

            # get matrix of headers
            matrix = self.getmatrix(headers=headers, columns=4)

        except Exception as e:
            matrix = {'Error': e.message}

        # JS requires json dump
        matrix = json.dumps(matrix)
        return matrix
예제 #8
0
    def imports(self):

        global MUST_EXCLUDED_ATTRIBUTES
        global MUST_INCLUDED_ATTRIBUTES
        # global files

        # try:
        if self.request.method == 'POST':

            # request files
            file_ = self.request.get('file')

            # files are at self.files
            self.files = {}
            self.requestFile(file_)

            # file structure and analyser
            self.files = utils.fileAnalyse(self.files)

            if not self.files.getCsv():
                raise ImportExportError('Provide a good csv file')

            # create zip in memory
            self.zip = utils.InMemoryZip()

            # defines Pipeline
            self.conversion = utils.Pipeline()

            # defines mapping for UID
            self.mapping = utils.mapping(self)

            # get list of existingPath
            self.getExistingpath()

            error_log = ''
            temp_log = ''

            # check for include attributes in advanced tab
            if self.request.get('importFields', None):

                # fields/keys to include
                include = self.request.get('importFields', None)
                # BUG in html checkbox input, which send value as a
                #  string if only one value have been checked
                if isinstance(include, str):
                    include = [include]
                include = list(set(MUST_INCLUDED_ATTRIBUTES + include))

            else:
                # 'No check provided. Thus exporting whole content'
                include = None

            # convert csv to json
            data = self.conversion.converttojson(data=self.files.getCsv(),
                                                 header=include)
            # invoke non-existent content,  if any
            error_log += self.createcontent(data)

            # map old and new UID in memory
            self.mapping.mapNewUID(data)

            # deserialize
            for index in range(len(data)):

                obj_data = data[index]

                if not obj_data.get('path', None):
                    error_log += 'pathError in {arg} \n'.format(
                        arg=obj_data['path'])
                    continue

                # get blob content into json data
                obj_data, temp_log = self.conversion.fillblobintojson(
                    obj_data, self.files.getFiles(), self.mapping)

                error_log += temp_log

                #  os.sep is preferrable to support multiple filesystem
                #  return context of object
                object_context = self.getobjcontext(obj_data['path'].split(
                    os.sep))

                # all import error will be logged back
                if object_context:
                    error_log += self.deserialize(object_context, obj_data)
                else:
                    error_log += 'pathError for {arg}\n'.format(
                        arg=obj_data['path'])

            self.request.RESPONSE.setHeader('content-type',
                                            'application/text; charset=utf-8')
            return error_log

        else:
            raise ImportExportError('Invalid Request Method')
    def imports(self):

        global MUST_EXCLUDED_ATTRIBUTES
        global MUST_INCLUDED_ATTRIBUTES
        # global files
        # try:
        if self.request.method == 'POST':

            # request files
            file_ = self.request.get('file')

            # get the defined import key
            self.primary_key = \
                self.request.get('import_key', 'path')

            # match related self.settings, based on defined key
            self.new_content_action = \
                self.request.get('new_content', 'add')
            self.matching_content_action = \
                self.request.get('matching_content', 'update')
            self.existing_content_no_match_action = \
                self.request.get('existing_content_no_match', 'keep')

            # files are at self.files
            self.files = {}
            self.requestFile(file_)

            # file structure and analyser
            self.files = utils.fileAnalyse(self.files)

            if not self.files.getCsv():
                raise ImportExportError('Provide a good csv file')

            # create zip in memory
            self.zip = utils.InMemoryZip()

            # defines Pipeline
            self.conversion = utils.Pipeline()

            # defines mapping for UID
            self.mapping = utils.mapping(self)

            # get list of existingPath
            self.getExistingpath()

            error_log = ''
            temp_log = ''

            # check for include attributes in advanced tab
            if self.request.get('importFields', None):

                # fields/keys to include
                include = self.request.get('importFields', None)
                # BUG in html checkbox input, which send value as a
                #  string if only one value have been checked
                if isinstance(include, str):
                    include = [include]
                include = list(set(MUST_INCLUDED_ATTRIBUTES + include))

            else:
                # 'No check provided. Thus exporting whole content'
                include = None

            # convert csv to json
            data = self.conversion.converttojson(data=self.files.getCsv(),
                                                 header=include)
            error_log += self.processContentCreation(data=data)

            # map old and new UID in memory
            self.mapping.mapNewUID(data)
            self.reindexMatchedTraversalPaths()
            error_log += self.deleteNoMatchingContent()

            self.reindexMatchedTraversalPaths()
            error_log += self.deleteNoMatchingContent()

            # deserialize
            for index in range(len(data)):

                obj_data = data[index]
                path_ = obj_data.get('path', None)
                if not path_:
                    error_log += 'pathError upon deseralizing the content for {arg} \n'.format(
                        arg=obj_data['path'])
                    continue
                obj_absolute_path = "/".join(
                    self.getobjpath(path_.split(os.sep)))
                if obj_absolute_path not in self.matchedTraversalPaths:
                    continue

                if path_ not in self.matchedTraversalPaths:
                    continue

                # get blob content into json data
                obj_data, temp_log = self.conversion.fillblobintojson(
                    obj_data, self.files.getFiles(), self.mapping)

                error_log += temp_log

                #  os.sep is preferrable to support multiple filesystem
                #  return context of object
                print obj_data
                object_context = self.getobjcontext(obj_data['path'].split(
                    os.sep))

                # all import error will be logged back
                if object_context:
                    error_log += self.deserialize(object_context, obj_data)
                else:
                    error_log += 'Error while attempting to update {arg}\n'.format(
                        arg=obj_data['path'])

            self.request.RESPONSE.setHeader('content-type',
                                            'application/text; charset=utf-8')
            cd = 'attachment; filename=import-log.txt'
            self.request.RESPONSE.setHeader('Content-Disposition', cd)
            return error_log

        else:
            raise ImportExportError('Invalid Request Method')