def validate(self):
        # local variables
        list_error      = []

        dataset         = self.svm_data['data']['dataset']
        acceptable_type = ['text/plain', 'text/csv', 'text/xml', 'text/json', 'application/xml', 'application/json']

        unique_hash     = set()
        dataset_keep    = []

        if (dataset.get('file_upload', None)):

            for index, filedata in enumerate(dataset['file_upload']):
                try:
                    filehash = calculate_md5(filedata['file'])
                    # add 'hashed' value of file reference(s) to a list
                    if filehash not in unique_hash:
                        unique_hash.add(filehash)
                        mimetype = filedata['file'].content_type

                        # validate mimetype
                        if (mimetype not in acceptable_type):
                            msg = '''Problem: Uploaded file, \'''' + filedata['filename'] + '''\', must be one of the formats:'''
                            msg += '\n ' + ', '.join(acceptable_type)
                            list_error.append(msg)

                        # keep non-duplicated file uploads
                        else:
                            dataset_keep.append({'type': mimetype, 'file': filedata['file'], 'filename': filedata['filename']})
                except:
                    msg = 'Problem with file upload #' + str(index) + '. Please re-upload the file.'
                    list_error.append(msg)

            # replace portion of dataset with unique 'file reference(s)'
            dataset['file_upload'][:] = dataset_keep

        else:
            msg = 'No file(s) were uploaded'
            list_error.append(msg)

        # return error
        if len(list_error) > 0:
            return {'error': list_error, 'dataset': None}
        else:
            return {'error': None, 'dataset': dataset}
    def validate(self):
        """@validate

        This method validates the file extension, associated with any file(s)
        representing the dataset, during a 'training' session. If any of the
        file(s) fails validation, this method will return False. Otherwise,
        the method will return a list of unique 'file upload(s)', discarding
        duplicates.

        """

        # local variables
        list_error = []

        dataset = self.svm_data['data']['dataset']
        acceptable_type = ['csv', 'xml', 'json']

        unique_hash = set()
        dataset_keep = []

        if (dataset.get('file_upload', None)):

            for index, filedata in enumerate(dataset['file_upload']):
                try:
                    split_path = os.path.splitext(filedata['filename'])
                    filehash = calculate_md5(filedata['file'])
                    # add 'hashed' value of file reference(s) to a list
                    if filehash not in unique_hash:
                        unique_hash.add(filehash)
                        file_extension = split_path[1][1:].strip().lower()

                        # validate file_extension
                        if (file_extension not in acceptable_type):
                            msg = '''Problem: Uploaded file, \''''
                            msg += filedata['filename']
                            msg += '''\', must be one of the formats:'''
                            msg += '\n ' + ', '.join(acceptable_type)
                            list_error.append(msg)

                        # keep non-duplicated file uploads
                        else:
                            dataset_keep.append({
                                'type':
                                file_extension,
                                'file':
                                filedata['file'],
                                'filename':
                                filedata['filename']
                            })
                except:
                    msg = 'Problem with file upload #' + filedata['filename']
                    msg += '. Please re-upload the file.'
                    list_error.append(msg)

            # replace portion of dataset with unique 'file reference(s)'
            dataset['file_upload'][:] = dataset_keep

        else:
            msg = 'No file(s) were uploaded'
            list_error.append(msg)

        # return error
        if len(list_error) > 0:
            return {'error': list_error, 'dataset': None}
        else:
            return {'error': None, 'dataset': dataset}
Beispiel #3
0
    def validate(self):
        '''

        This method validates the file extension, associated with any file(s)
        representing the dataset, during a 'training' session. If any of the
        file(s) fails validation, this method will return False. Otherwise,
        the method will return a list of unique 'file upload(s)', discarding
        duplicates.

        '''

        # local variables
        list_error = []
        acceptable_type = ['csv', 'xml', 'json']

        unique_data = set()
        dataset_keep = []

        # validate and restructure: file upload
        if (
                self.premodel_data['data'].get('settings', None) and
                self.premodel_data['data']['settings'].get(
                    'dataset_type', None) == 'file_upload' and
                self.premodel_data.get('data', None) and
                self.premodel_data['data'].get('dataset', None) and
                self.premodel_data['data']['dataset'].get('file_upload', None)
        ):

            dataset = self.premodel_data['data']['dataset']

            for index, filedata in enumerate(dataset['file_upload']):
                try:
                    split_path = os.path.splitext(filedata['filename'])
                    filehash = calculate_md5(filedata['file'])
                    # add 'hashed' value of file reference(s) to a list
                    if filehash not in unique_data:
                        unique_data.add(filehash)
                        file_extension = split_path[1][1:].strip().lower()

                        # validate file_extension
                        if (file_extension not in acceptable_type):
                            msg = '''Problem: Uploaded file, \''''
                            msg += filedata['filename']
                            msg += '''\', must be one of the formats:'''
                            msg += '\n ' + ', '.join(acceptable_type)
                            list_error.append(msg)

                        # keep non-duplicated file uploads
                        else:
                            dataset_keep.append({
                                'type': file_extension,
                                'file': filedata['file'],
                                'filename': filedata['filename']
                            })
                except:
                    msg = 'Problem with file upload #' + filedata['filename']
                    msg += '. Please re-upload the file.'
                    list_error.append(msg)

            # replace portion of dataset with unique 'file reference(s)'
            dataset['file_upload'][:] = dataset_keep

        # validate and restructure: url reference
        elif (
                self.premodel_data.get('data', None) and
                self.premodel_data['data'].get('dataset', None) and
                self.premodel_data['data']['dataset'].get(
                    'type', None) and
                self.premodel_data['data']['dataset']['type'] == 'dataset_url'
        ):

            dataset = self.premodel_data['data']['dataset']
            urls = self.premodel_data['data']['dataset']['file_upload']

            for index, url in enumerate(urls):
                split_path = os.path.splitext(url)
                file_extension = split_path[1][1:].strip().lower()

                try:
                    if url not in unique_data:
                        unique_data.add(url)

                        # validate file_extension
                        if (file_extension not in acceptable_type):
                            msg = '''Problem: url reference, \''''
                            msg += file_extension
                            msg += '''\', must be one of the formats:'''
                            msg += '\n ' + ', '.join(acceptable_type)
                            list_error.append(msg)

                        # keep non-duplicated url references
                        else:
                            filename = os.path.split(url)[1]
                            dataset_keep.append({
                                'type': file_extension,
                                'file': cStringIO.StringIO(
                                            urllib.urlopen(url).read()
                                        ),
                                'filename': filename
                            })

                except:
                    msg = 'Problem with url reference ' + url
                    msg += '. Please re-upload the information.'
                    list_error.append(msg)

            # define unique 'file reference(s)'
            dataset['file_upload'][:] = dataset_keep

        else:
            msg = 'No file(s) were uploaded'
            list_error.append(msg)

        # return error
        if len(list_error) > 0:
            return {'error': list_error, 'dataset': None}
        else:
            return {'error': None, 'dataset': dataset}
    def validate(self):
        """@validate

        This method validates the file extension, associated with any file(s)
        representing the dataset, during a 'training' session. If any of the
        file(s) fails validation, this method will return False. Otherwise,
        the method will return a list of unique 'file upload(s)', discarding
        duplicates.

        """

        # local variables
        list_error = []

        dataset = self.svm_data['data']['dataset']
        acceptable_type = ['csv', 'xml', 'json']

        unique_hash = set()
        dataset_keep = []

        if (dataset.get('file_upload', None)):

            for index, filedata in enumerate(dataset['file_upload']):
                try:
                    split_path = os.path.splitext(filedata['filename'])
                    filehash = calculate_md5(filedata['file'])
                    # add 'hashed' value of file reference(s) to a list
                    if filehash not in unique_hash:
                        unique_hash.add(filehash)
                        file_extension = split_path[1][1:].strip().lower()

                        # validate file_extension
                        if (file_extension not in acceptable_type):
                            msg = '''Problem: Uploaded file, \''''
                            msg += filedata['filename']
                            msg += '''\', must be one of the formats:'''
                            msg += '\n ' + ', '.join(acceptable_type)
                            list_error.append(msg)

                        # keep non-duplicated file uploads
                        else:
                            dataset_keep.append({
                                'type': file_extension,
                                'file': filedata['file'],
                                'filename': filedata['filename']
                            })
                except:
                    msg = 'Problem with file upload #' + filedata['filename']
                    msg += '. Please re-upload the file.'
                    list_error.append(msg)

            # replace portion of dataset with unique 'file reference(s)'
            dataset['file_upload'][:] = dataset_keep

        else:
            msg = 'No file(s) were uploaded'
            list_error.append(msg)

        # return error
        if len(list_error) > 0:
            return {'error': list_error, 'dataset': None}
        else:
            return {'error': None, 'dataset': dataset}
    def validate(self):
        '''

        This method validates the file extension, associated with any file(s)
        representing the dataset, during a 'training' session. If any of the
        file(s) fails validation, this method will return False. Otherwise,
        the method will return a list of unique 'file upload(s)', discarding
        duplicates.

        '''

        # local variables
        list_error = []
        acceptable_type = ['csv', 'xml', 'json']

        unique_data = set()
        dataset_keep = []

        # validate and restructure: file upload
        if (self.premodel_data['data'].get('settings', None)
                and self.premodel_data['data']['settings'].get(
                    'dataset_type', None) == 'file_upload'
                and self.premodel_data.get('data', None)
                and self.premodel_data['data'].get('dataset', None)
                and self.premodel_data['data']['dataset'].get(
                    'file_upload', None)):

            dataset = self.premodel_data['data']['dataset']

            for index, filedata in enumerate(dataset['file_upload']):
                try:
                    split_path = os.path.splitext(filedata['filename'])
                    filehash = calculate_md5(filedata['file'])
                    # add 'hashed' value of file reference(s) to a list
                    if filehash not in unique_data:
                        unique_data.add(filehash)
                        file_extension = split_path[1][1:].strip().lower()

                        # validate file_extension
                        if (file_extension not in acceptable_type):
                            msg = '''Problem: Uploaded file, \''''
                            msg += filedata['filename']
                            msg += '''\', must be one of the formats:'''
                            msg += '\n ' + ', '.join(acceptable_type)
                            list_error.append(msg)

                        # keep non-duplicated file uploads
                        else:
                            dataset_keep.append({
                                'type':
                                file_extension,
                                'file':
                                filedata['file'],
                                'filename':
                                filedata['filename']
                            })
                except:
                    msg = 'Problem with file upload #' + filedata['filename']
                    msg += '. Please re-upload the file.'
                    list_error.append(msg)

            # replace portion of dataset with unique 'file reference(s)'
            dataset['file_upload'][:] = dataset_keep

        # validate and restructure: url reference
        elif (self.premodel_data.get('data', None)
              and self.premodel_data['data'].get('dataset', None)
              and self.premodel_data['data']['dataset'].get('type', None) and
              self.premodel_data['data']['dataset']['type'] == 'dataset_url'):

            dataset = self.premodel_data['data']['dataset']
            urls = self.premodel_data['data']['dataset']['file_upload']

            for index, url in enumerate(urls):
                split_path = os.path.splitext(url)
                file_extension = split_path[1][1:].strip().lower()

                try:
                    if url not in unique_data:
                        unique_data.add(url)

                        # validate file_extension
                        if (file_extension not in acceptable_type):
                            msg = '''Problem: url reference, \''''
                            msg += file_extension
                            msg += '''\', must be one of the formats:'''
                            msg += '\n ' + ', '.join(acceptable_type)
                            list_error.append(msg)

                        # keep non-duplicated url references
                        else:
                            filename = os.path.split(url)[1]
                            dataset_keep.append({
                                'type':
                                file_extension,
                                'file':
                                cStringIO.StringIO(urllib.urlopen(url).read()),
                                'filename':
                                filename
                            })

                except:
                    msg = 'Problem with url reference ' + url
                    msg += '. Please re-upload the information.'
                    list_error.append(msg)

            # define unique 'file reference(s)'
            dataset['file_upload'][:] = dataset_keep

        else:
            msg = 'No file(s) were uploaded'
            list_error.append(msg)

        # return error
        if len(list_error) > 0:
            return {'error': list_error, 'dataset': None}
        else:
            return {'error': None, 'dataset': dataset}