Example #1
0
class dropboxClient(object):

    ''' a class of methods to manage file storage on Dropbox API '''

    # https://www.dropbox.com/developers/documentation/http/documentation

    _class_fields = {
        'schema': {
            'access_token': '',
            'collection_name': 'labPack',
            'record_key': 'obs/terminal/2016-03-17T17-24-51-687845Z.ogg',
            'record_key_path': '/home/user/.config/collective-acuity-labpack/user-data/obs/terminal',
            'record_key_comp': 'obs',
            'previous_key': 'obs/terminal/2016-03-17T17-24-51-687845Z.yaml',
            'secret_key': '6tZ0rUexOiBcOse2-dgDkbeY',
            'prefix': 'obs/terminal',
            'delimiter': '2016-03-17T17-24-51-687845Z.yaml',
            'max_results': 1
        },
        'components': {
            '.collection_name': {
                'max_length': 255,
                'must_not_contain': ['/', '^\\.']
            },
            '.record_key': {
                'must_not_contain': [ '[^\\w\\-\\./]', '^\\.', '\\.$', '^/', '//' ]
            },
            '.record_key_path': {
                'max_length': 32767
            },
            '.record_key_comp': {
                'max_length': 255
            },
            '.secret_key': {
                'must_not_contain': [ '[\\t\\n\\r]' ]
            },
            '.max_results': {
                'min_value': 1,
                'integer_data': True
            },
            '.previous_key': {
                'must_not_contain': [ '[^\\w\\-\\./]', '^\\.', '\\.$', '^/', '//' ]
            },
            '.prefix': {
                'must_not_contain': [ '[^\\w\\-\\./]', '^\\.', '\\.$', '^/', '//' ]
            }
        },
        'metadata': {
            'record_optimal_bytes': 10000 * 1024,
            'record_max_bytes': 150000 * 1024
        }
    }
    
    def __init__(self, access_token, collection_name=''):
        
        '''
            a method to initialize the dropboxClient class
            
        :param access_token: string with oauth2 access token for users account
        '''    

        title = '%s.__init__' % self.__class__.__name__
    
    # construct input validation model
        self.fields = jsonModel(self._class_fields)
        
    # validate inputs
        input_fields = {
            'access_token': access_token,
            'collection_name': collection_name
        }
        for key, value in input_fields.items():
            object_title = '%s(%s=%s)' % (title, key, str(value))
            self.fields.validate(value, '.%s' % key, object_title)
    
    # workaround for module namespace conflict
        from sys import path as sys_path
        sys_path.append(sys_path.pop(0))
        from dropbox import Dropbox
        from dropbox.files import FileMetadata, WriteMode, DeleteArg
        from dropbox.exceptions import ApiError
        sys_path.insert(0, sys_path.pop())
    
    # construct dropbox client
        from labpack.compilers.objects import _method_constructor
        self.dropbox = Dropbox(oauth2_access_token=access_token)
    
    # construct dropbox objects
        self.objects = _method_constructor({
            'FileMetadata': FileMetadata,
            'ApiError': ApiError,
            'WriteMode': WriteMode,
            'DeleteArg': DeleteArg
        })
    
    # construct collection name
        self.collection_name = collection_name
    
    def _import(self, record_key, record_data, overwrite=True, last_modified=0.0, **kwargs):
        
        '''
            a helper method for other storage clients to import into appdata
            
        :param record_key: string with key for record
        :param record_data: byte data for body of record
        :param overwrite: [optional] boolean to overwrite existing records
        :param last_modified: [optional] float to record last modified date
        :param kwargs: [optional] keyword arguments from other import methods 
        :return: boolean indicating whether record was imported
        '''
        
        title = '%s._import' % self.__class__.__name__
    
    # check overwrite
        if not overwrite:
            if self.exists(record_key):
                return False
    
    # check max size
        import sys
        record_max = self.fields.metadata['record_max_bytes']
        record_size = sys.getsizeof(record_data)
        error_prefix = '%s(record_key="%s", record_data=b"...")' % (title, record_key)
        if record_size > record_max:
            raise ValueError('%s exceeds maximum record data size of %s bytes.' % (error_prefix, record_max))
    
    # TODO: apply session upload for files greater than record_max
            
    # construct upload kwargs
        upload_kwargs = {
            'f': record_data,
            'path': '/%s' % record_key,
            'mute': True,
            'mode': self.objects.WriteMode.overwrite
        }
    
    # modify file time
        import re
        if re.search('\\.drep$', record_key):
            from labpack.records.time import labDT
            drep_time = labDT.fromEpoch(1)
            upload_kwargs['client_modified'] = drep_time
        elif last_modified:
            from labpack.records.time import labDT
            mod_time = labDT.fromEpoch(last_modified)
            upload_kwargs['client_modified'] = mod_time
    
    # send upload request
        try:
            self.dropbox.files_upload(**upload_kwargs)
        except:
            raise DropboxConnectionError(title)
        
        return True
    
    def _walk(self, root_path=''):
        ''' an iterator method which walks the file structure of the dropbox collection '''
        title = '%s._walk' % self.__class__.__name__
        if root_path:
            root_path = '/%s' % root_path
        try:
            response = self.dropbox.files_list_folder(path=root_path, recursive=True)
            for record in response.entries:
                if not isinstance(record, self.objects.FileMetadata):
                    continue
                yield record.path_display[1:]
            if response.has_more:
                while response.has_more:
                    response = self.dropbox.files_list_folder_continue(response.cursor)
                    for record in response.entries:
                        if not isinstance(record, self.objects.FileMetadata):
                            continue
                        yield record.path_display[1:]
        except:
            raise DropboxConnectionError(title)
    
    def exists(self, record_key):
        
        ''' 
            a method to determine if a record exists in collection

        :param record_key: string with key of record
        :return: boolean reporting status
        '''
        
        title = '%s.exists' % self.__class__.__name__
    
    # validate inputs
        input_fields = {
            'record_key': record_key
        }
        for key, value in input_fields.items():
            object_title = '%s(%s=%s)' % (title, key, str(value))
            self.fields.validate(value, '.%s' % key, object_title)
    
    # send get metadata request
        file_path = '/%s' % record_key
        try:
            self.dropbox.files_get_metadata(file_path)
        except Exception as err:
            if str(err).find("LookupError('not_found'") > -1:
                return False
            else:
                raise DropboxConnectionError(title)

        return True
        
    def save(self, record_key, record_data, overwrite=True, secret_key=''):

        ''' 
            a method to create a record in the collection folder

        :param record_key: string with name to assign to record (see NOTES below)
        :param record_data: byte data for record body
        :param overwrite: [optional] boolean to overwrite records with same name
        :param secret_key: [optional] string with key to encrypt data
        :return: string with name of record

        NOTE:   record_key may only contain alphanumeric, /, _, . or -
                characters and may not begin with the . or / character.

        NOTE:   using one or more / characters splits the key into
                separate segments. these segments will appear as a
                sub directories inside the record collection and each
                segment is used as a separate index for that record
                when using the list method
                eg. lab/unittests/1473719695.2165067.json is indexed:
                [ 'lab', 'unittests', '1473719695.2165067', '.json' ]
        '''

        title = '%s.save' % self.__class__.__name__
            
    # validate inputs
        input_fields = {
            'record_key': record_key,
            'secret_key': secret_key
        }
        for key, value in input_fields.items():
            if value:
                object_title = '%s(%s=%s)' % (title, key, str(value))
                self.fields.validate(value, '.%s' % key, object_title)
    
    # validate byte data
        if not isinstance(record_data, bytes):
            raise ValueError('%s(record_data=b"...") must be byte data.' % title)
        
    # construct and validate file path
        file_root, file_name = os.path.split(record_key)
        self.fields.validate(file_name, '.record_key_comp')
        while file_root:
            file_root, path_node = os.path.split(file_root)
            self.fields.validate(path_node, '.record_key_comp')

    # check overwrite exception
        if not overwrite:
            if self.exists(record_key):
                raise Exception('%s(record_key="%s") already exists. To overwrite, set overwrite=True' % (title, record_key))
    
    # check size of file
        import sys
        record_optimal = self.fields.metadata['record_optimal_bytes']
        record_max = self.fields.metadata['record_max_bytes']
        record_size = sys.getsizeof(record_data)
        error_prefix = '%s(record_key="%s", record_data=b"...")' % (title, record_key)
        if record_size > record_max:
            raise ValueError('%s exceeds maximum record data size of %s bytes.' % (error_prefix, record_max))
        elif record_size > record_optimal:
            print('[WARNING] %s exceeds optimal record data size of %s bytes.' % (error_prefix, record_optimal))
    
    # TODO add upload session for support of files over 150MB
    # http://dropbox-sdk-python.readthedocs.io/en/latest/moduledoc.html#dropbox.dropbox.Dropbox.files_upload_session_start
            
    # encrypt data
        if secret_key:
            from labpack.encryption import cryptolab
            record_data, secret_key = cryptolab.encrypt(record_data, secret_key)
    
    # construct upload kwargs
        upload_kwargs = {
            'f': record_data,
            'path': '/%s' % record_key,
            'mute': True,
            'mode': self.objects.WriteMode.overwrite
        }
    
    # modify file time
        import re
        if re.search('\\.drep$', file_name):
            from labpack.records.time import labDT
            drep_time = labDT.fromEpoch(1)
            upload_kwargs['client_modified'] = drep_time
    
    # send upload request
        try:
            self.dropbox.files_upload(**upload_kwargs)
        except:
            raise DropboxConnectionError(title)
        
        return record_key
    
    def load(self, record_key, secret_key=''):

        ''' 
            a method to retrieve byte data of appdata record

        :param record_key: string with name of record
        :param secret_key: [optional] string used to decrypt data
        :return: byte data for record body
        '''

        title = '%s.load' % self.__class__.__name__
    
    # validate inputs
        input_fields = {
            'record_key': record_key,
            'secret_key': secret_key
        }
        for key, value in input_fields.items():
            if value:
                object_title = '%s(%s=%s)' % (title, key, str(value))
                self.fields.validate(value, '.%s' % key, object_title)

    # construct file path
        file_path = '/%s' % record_key
    
    # request file data
        try:
            metadata, response = self.dropbox.files_download(file_path)
        except Exception as err:
            if str(err).find("LookupError('not_found'") > -1:
                raise Exception('%s(record_key=%s) does not exist.' % (title, record_key))
            else:
                raise DropboxConnectionError(title)
        record_data = response.content
    
    # decrypt (if necessary)
        if secret_key:
            from labpack.encryption import cryptolab
            record_data = cryptolab.decrypt(record_data, secret_key)
    
        return record_data
    
    def conditional_filter(self, path_filters):

        ''' a method to construct a conditional filter function for list method

        :param path_filters: dictionary or list of dictionaries with query criteria
        :return: filter_function object

        path_filters:
        [ { 0: { conditional operators }, 1: { conditional_operators }, ... } ]

        conditional operators:
            "byte_data": false,
            "discrete_values": [ "" ],
            "excluded_values": [ "" ],
            "greater_than": "",
            "less_than": "",
            "max_length": 0,
            "max_value": "",
            "min_length": 0,
            "min_value": "",
            "must_contain": [ "" ],
            "must_not_contain": [ "" ],
            "contains_either": [ "" ]
        '''

        title = '%s.conditional_filter' % self.__class__.__name__
        
        from labpack.compilers.filters import positional_filter
        filter_function = positional_filter(path_filters, title)
        
        return filter_function

    def list(self, prefix='', delimiter='', filter_function=None, max_results=1, previous_key=''):
        
        ''' 
            a method to list keys in the dropbox collection

        :param prefix: string with prefix value to filter results
        :param delimiter: string with value which results must not contain (after prefix)
        :param filter_function: (positional arguments) function used to filter results
        :param max_results: integer with maximum number of results to return
        :param previous_key: string with key in collection to begin search after
        :return: list of key strings

            NOTE:   each key string can be divided into one or more segments
                    based upon the / characters which occur in the key string as
                    well as its file extension type. if the key string represents
                    a file path, then each directory in the path, the file name
                    and the file extension are all separate indexed values.

                    eg. lab/unittests/1473719695.2165067.json is indexed:
                    [ 'lab', 'unittests', '1473719695.2165067', '.json' ]

                    it is possible to filter the records in the collection according
                    to one or more of these path segments using a filter_function.

            NOTE:   the filter_function must be able to accept an array of positional
                    arguments and return a value that can evaluate to true or false.
                    while searching the records, list produces an array of strings
                    which represent the directory structure in relative path of each
                    key string. if a filter_function is provided, this list of strings
                    is fed to the filter function. if the function evaluates this input
                    and returns a true value the file will be included in the list
                    results.
        '''
        
        title = '%s.list' % self.__class__.__name__
        
    # validate input
        input_fields = {
            'prefix': prefix,
            'delimiter': delimiter,
            'max_results': max_results,
            'previous_key': previous_key
        }
        for key, value in input_fields.items():
            if value:
                object_title = '%s(%s=%s)' % (title, key, str(value))
                self.fields.validate(value, '.%s' % key, object_title)

    # validate filter function
        if filter_function:
            try:
                path_segments = [ 'lab', 'unittests', '1473719695.2165067', '.json' ]
                filter_function(*path_segments)
            except:
                err_msg = '%s(filter_function=%s)' % (title, filter_function.__class__.__name__)
                raise TypeError('%s must accept positional arguments.' % err_msg)

    # construct empty results list
        results_list = []
        check_key = True
        if previous_key: 
            check_key = False
    
    # determine root path
        root_path = ''
        if prefix:
            from os import path
            root_path, file_name = path.split(prefix)

    # iterate over dropbox files
        for file_path in self._walk(root_path):
            path_segments = file_path.split(os.sep)
            record_key = os.path.join(*path_segments)
            record_key = record_key.replace('\\','/')
            if record_key == previous_key:
                check_key = True
    
    # find starting point
            if not check_key:
                continue
                
    # apply prefix filter
            partial_key = record_key
            if prefix:
                if record_key.find(prefix) == 0:
                    partial_key = record_key[len(prefix):]
                else:
                    continue
    
    # apply delimiter filter
            if delimiter:
                if partial_key.find(delimiter) > -1:
                    continue
    
    # apply filter function
            if filter_function:
                if filter_function(*path_segments):
                    results_list.append(record_key)
            else:
                results_list.append(record_key)

    # return results list
            if len(results_list) == max_results:
                return results_list

        return results_list
    
    def delete(self, record_key):

        ''' a method to delete a file

        :param record_key: string with name of file
        :return: string reporting outcome
        '''

        title = '%s.delete' % self.__class__.__name__

    # validate inputs
        input_fields = {
            'record_key': record_key
        }
        for key, value in input_fields.items():
            object_title = '%s(%s=%s)' % (title, key, str(value))
            self.fields.validate(value, '.%s' % key, object_title)

    # validate existence of file
        if not self.exists(record_key):
            exit_msg = '%s does not exist.' % record_key
            return exit_msg
            
    # remove file
        current_dir = os.path.split(record_key)[0]
        try:
            file_path = '/%s' % record_key
            self.dropbox.files_delete(file_path)
        except:
            raise DropboxConnectionError(title)

    # remove empty directories in path to file
        try:
            while current_dir:
                folder_path = '/%s' % current_dir
                response = self.dropbox.files_list_folder(folder_path)
                if not response.entries:
                    self.dropbox.files_delete(folder_path)
                    current_dir = os.path.split(current_dir)[0]
                else:
                    break
        except:
            raise DropboxConnectionError(title)

        exit_msg = '%s has been deleted.' % record_key
        return exit_msg
    
    def remove(self):
        
        ''' 
            a method to remove all records in the collection

        NOTE:   this method removes all the files in the collection, but the
                collection folder itself created by oauth2 cannot be removed.
                only the user can remove the app folder
                
        :return: string with confirmation of deletion
        '''

        title = '%s.remove' % self.__class__.__name__
    
    # get contents in root
        try:
            response = self.dropbox.files_list_folder(path='')
        except:
            raise DropboxConnectionError(title)

    # populate delete list
        delete_list = []
        for file in response.entries:
            delete_list.append(self.objects.DeleteArg(path=file.path_display))

    # continue retrieval if folder is large
        if response.has_more:
            try:
                while response.has_more:
                    response = self.dropbox.files_list_folder_continue(response.cursor)
                    for file in response.entries:
                        delete_list.append(self.objects.DeleteArg(path=file.path_display))
            except:
                raise DropboxConnectionError(title)

    # send batch delete request
        try:
            self.dropbox.files_delete_batch(delete_list)
        except:
            raise DropboxConnectionError(title)
    
    # return outcome
        insert = 'collection'
        if self.collection_name:
            insert = self.collection_name
        exit_msg = 'Contents of %s will been removed from Dropbox.' % insert
        return exit_msg

    def export(self, storage_client, overwrite=True):
        
        '''
            a method to export all the records in collection to another platform
            
        :param storage_client: class object with storage client methods
        :return: string with exit message
        '''
        
        title = '%s.export' % self.__class__.__name__
        
    # validate storage client
        method_list = [ 'save', 'load', 'list', 'export', 'delete', 'remove', '_import', 'collection_name' ]
        for method in method_list:
            if not getattr(storage_client, method, None):
                from labpack.parsing.grammar import join_words
                raise ValueError('%s(storage_client=...) must be a client object with %s methods.' % (title, join_words(method_list)))
            
    # walk collection folder to find files
        import os
        count = 0
        skipped = 0
        for file_path in self._walk():
            path_segments = file_path.split(os.sep)
            record_key = os.path.join(*path_segments)
            record_key = record_key.replace('\\','/')
            file_path = '/%s' % file_path
            
    # retrieve data and metadata
            try:
                metadata, response = self.dropbox.files_download(file_path)
            except:
                raise DropboxConnectionError(title)
            record_data = response.content
            client_modified = metadata.client_modified
            
    # import record into storage client
            last_modified = 0.0
            if client_modified:
                from dateutil.tz import tzutc
                from labpack.records.time import labDT
                last_modified = labDT.fromPython(client_modified.replace(tzinfo=tzutc())).epoch()
            outcome = storage_client._import(record_key, record_data, overwrite=overwrite, last_modified=last_modified)
            if outcome:
                count += 1
            else:
                skipped += 1
            
    # report outcome
        plural = ''
        skip_insert = ''
        new_folder = storage_client.collection_name
        if count != 1:
            plural = 's'
        if skipped > 0:
            skip_plural = ''
            if skipped > 1:
                skip_plural = 's'
            skip_insert = ' %s record%s skipped to avoid overwrite.' % (str(skipped), skip_plural)
        exit_msg = '%s record%s exported to %s.%s' % (str(count), plural, new_folder, skip_insert)
        return exit_msg