Example #1
0
class NetworkMonitor(threading.Thread):
    THREAD_NAME = "netmon"
    logger = logger_factory.get_logger(__name__)

    def __init__(self,
                 test_uri='https://onedrive.com',
                 retry_delay_sec=30,
                 proxies=None):
        """
        :param str test_uri: The url to use in testing internet connectivity.
        :param int retry_delay_sec: The amount of seconds to wait before retry.
        :param dict[str, str] proxies: A dict of protocol-url pairs.
        """
        super().__init__()
        self.name = NetworkMonitor.THREAD_NAME
        self.daemon = True
        self.test_uri = test_uri
        self.retry_delay = retry_delay_sec
        self.proxies = proxies
        self.queue = queue.Queue()
        self.conditions = {}
        self.logger.info("Initialized.")

    def suspend_caller(self):
        """Put the calling thread into suspension queue."""
        me = threading.current_thread()
        cond = self.conditions[me.ident] = threading.Condition()
        self.queue.put(me)
        self.logger.info("Suspended due to network failure.")
        cond.acquire()
        # Put the caller thread to sleep
        cond.wait()
        # Thread is waken up by manager
        cond.release()
        del self.conditions[me.ident]
        self.logger.info("Resumed.")

    def is_connected(self):
        """
        Test if internet connection is OK by connecting to the test URI provided.
        If proxy setting is set in the client, it will be used as well.
        :return: True if internet connection is on; False otherwise.
        """
        try:
            requests.head(self.test_uri, proxies=self.proxies)
            return True
        except requests.ConnectionError:
            return False

    def run(self):
        while True:
            th = self.queue.get()  # blocking call
            while not self.is_connected():
                time.sleep(self.retry_delay)
            self.conditions[th.ident].acquire()
            self.conditions[th.ident].notify()
            self.conditions[th.ident].release()
Example #2
0
def main():
    global logger
    args = fix_log_args(parse_args())
    logger = logger_factory.get_logger('Main')
    check_config_dir()
    load_user_config()
    load_item_storage()
    load_task_storage()
    start_task_workers()
    refill_tasks()
Example #3
0
def main():
    global logger
    args = fix_log_args(parse_args())
    logger = logger_factory.get_logger('Main')
    check_config_dir()
    load_user_config()
    load_item_storage()
    load_task_storage()
    start_task_workers()
    refill_tasks()
Example #4
0
class TaskConsumer(threading.Thread):
    terminate_sign = threading.Event()
    logger = logger_factory.get_logger('TaskConsumer')

    def __init__(self, task_pool):
        """
        :param onedrived.store.task_pool.TaskPool task_pool:
        """
        super().__init__()
        self.daemon = True
        self.task_pool = task_pool

    def run(self):
        self.logger.debug('Started.')
        while True:
            self.task_pool.semaphore.acquire()
            if self.terminate_sign.is_set():
                break
            task = self.task_pool.pop_task()
            self.logger.debug(
                'Acquired task of type "%s" on parent "%s", name "%s".',
                type(task).__name__, task.local_parent_path, task.item_name)
            task.handle()
        self.logger.debug('Stopped.')
Example #5
0
class ManagedRESTClient:
    AUTO_RETRY_SECONDS = 30
    RECOVERABLE_STATUS_CODES = {requests.codes.too_many, 500, 502, 503, 504}
    logger = logger_factory.get_logger(__name__)

    def __init__(self, session, net_mon, account, proxies=None):
        """
        :param session: Dictate a requests Session object.
        :param onedrived.common.netman.NetworkMonitor net_mon: Network monitor instance.
        :param onedrived.api.accounts.PersonalAccount | onedrived.api.accounts.BusinessAccount account: Account.
        :param dict[str, str] proxies: (Optional) A dictionary of protocol-host pairs.
        :return: No return value.
        """
        self.session = session
        self.net_mon = net_mon
        self.account = account
        self.proxies = proxies

    def request(self, method, url, params, ok_status_code, auto_renew):
        """
        Perform a HTTP request call. Do auto-recover as fits.
        :param str method: One of {GET, POST, PATCH, PUT, DELETE}.
        :param str url: URL of the HTTP request.
        :param dict[str, str | dict | bytes] params: Params to send to the request call.
        :param int ok_status_code: Expected status code for HTTP response.
        :param True | False auto_renew: If True, auto recover the expired token.
        :rtype: requests.Response
        :raise errors.OneDriveError:
        """
        while True:
            try:
                request = getattr(self.session, method)(url, **params)
                bad_status = request.status_code != ok_status_code if isinstance(ok_status_code, int) \
                    else request.status_code not in ok_status_code
                if bad_status:
                    if request.status_code in self.RECOVERABLE_STATUS_CODES:
                        if 'Retry-After' in request.headers:
                            retry_after_seconds = int(
                                request.headers['Retry-After'])
                        else:
                            retry_after_seconds = self.AUTO_RETRY_SECONDS
                        self.logger.info(
                            'Server returned code %d which is assumed recoverable. Retry in %d seconds',
                            request.status_code, retry_after_seconds)
                        raise errors.OneDriveRecoverableError(
                            retry_after_seconds)
                    raise errors.OneDriveError(request.json())
                return request
            except requests.ConnectionError:
                self.net_mon.suspend_caller()
            except errors.OneDriveRecoverableError as e:
                time.sleep(e.retry_after_seconds)
            except errors.OneDriveTokenExpiredError as e:
                if auto_renew:
                    self.logger.info('Access token expired. Try refreshing...')
                    self.account.renew_tokens()
                else:
                    raise e

    def get(self,
            url,
            params=None,
            headers=None,
            ok_status_code=requests.codes.ok,
            auto_renew=True):
        """
        Perform a HTTP GET request.
        :param str url: URL of the HTTP request.
        :param dict[str, T] | None params: (Optional) Dictionary to construct query string.
        :param dict | None headers: (Optional) Additional headers for the HTTP request.
        :param int ok_status_code: (Optional) Expected status code for the HTTP response.
        :param True | False auto_renew: (Optional) If True, auto recover from expired token error or Internet failure.
        :rtype: requests.Response
        """
        args = {'proxies': self.proxies}
        if params is not None:
            args['params'] = params
        if headers is not None:
            args['headers'] = headers
        return self.request('get',
                            url,
                            args,
                            ok_status_code=ok_status_code,
                            auto_renew=auto_renew)

    def download(self):
        pass

    def post(self,
             url,
             data=None,
             json=None,
             headers=None,
             ok_status_code=requests.codes.ok,
             auto_renew=True):
        """
        Perform a HTTP POST request.
        :param str url: URL of the HTTP request.
        :param dict | None data: (Optional) Data in POST body of the request.
        :param dict | None json: (Optional) Send the dictionary as JSON content in POST body and set proper headers.
        :param dict | None headers: (Optional) Additional headers for the HTTP request.
        :param int ok_status_code: (Optional) Expected status code for the HTTP response.
        :param True | False auto_renew: (Optional) If True, auto recover from expired token error or Internet failure.
        :rtype: requests.Response
        """
        params = {'proxies': self.proxies}
        if json is not None:
            params['json'] = json
        else:
            params['data'] = data
        if headers is not None:
            params['headers'] = headers
        return self.request('post',
                            url,
                            params,
                            ok_status_code=ok_status_code,
                            auto_renew=auto_renew)

    def patch(self,
              url,
              json,
              ok_status_code=requests.codes.ok,
              auto_renew=True):
        """
        Perform a HTTP PATCH request.
        :param str url: URL of the HTTP request.
        :param dict json: Send the dictionary as JSON content in POST body and set proper headers.
        :param int ok_status_code: (Optional) Expected status code for the HTTP response.
        :param True | False auto_renew: (Optional) If True, auto recover from expired token error or Internet failure.
        :rtype: requests.Response
        """
        params = {'proxies': self.proxies, 'json': json}
        return self.request('patch',
                            url,
                            params,
                            ok_status_code=ok_status_code,
                            auto_renew=auto_renew)

    def put(self,
            url,
            data,
            headers=None,
            ok_status_code=requests.codes.ok,
            auto_renew=True):
        """
        Perform a HTTP PUT request.
        :param str url: URL of the HTTP request.
        :param bytes | None data: Binary data to send in the request body.
        :param dict | None headers: Additional headers for the HTTP request.
        :param int ok_status_code: (Optional) Expected status code for the HTTP response.
        :param True | False auto_renew: (Optional) If True, auto recover from expired token error or Internet failure.
        :rtype: requests.Response
        """
        params = {'proxies': self.proxies, 'data': data}
        if headers is not None:
            params['headers'] = headers
        return self.request('put',
                            url,
                            params=params,
                            ok_status_code=ok_status_code,
                            auto_renew=auto_renew)

    def delete(self, url, ok_status_code=requests.codes.ok, auto_renew=True):
        """
        Perform a HTTP DELETE request on the specified URL.
        :param str url: URL of the HTTP request.
        :param int ok_status_code: (Optional) Expected status code for the HTTP response.
        :param True | False auto_renew: (Optional) If True, auto recover from expired token error or Internet failure.
        :rtype: requests.Response
        """
        return self.request('delete',
                            url, {'proxies': self.proxies},
                            ok_status_code=ok_status_code,
                            auto_renew=auto_renew)
Example #6
0
class DriveStorage:
    logger = logger_factory.get_logger('DriveStorage')

    def __init__(self, db_path, account_store):
        """
        :param str db_path: Path to Drive database.
        :param onedrived.store.account_db.AccountStorage account_store:
        """
        self._conn = sqlite3.connect(db_path, isolation_level=None)
        self._cursor = self._conn.cursor()
        self._cursor.execute(get_content('onedrive_drives.sql'))
        self._conn.commit()
        self._all_drives = {}
        self._drive_roots = {}
        self.account_store = account_store
        atexit.register(self.close)

    @staticmethod
    def get_key(drive_id, account_id, account_type):
        return drive_id, account_id, account_type

    def assemble_drive_record(self, row, container):
        drive_id, account_id, account_type, drive_dump = row
        try:
            drive_root = self.get_drive_root(account_id, account_type)
        except KeyError:
            self.logger.warning(
                'The %s account %s for drive %s was not registered.',
                account_type, account_id, drive_id)
            return
        try:
            drive = drives.DriveObject.load(drive_root, account_id,
                                            account_type, drive_dump)
            container[self.get_key(drive.drive_id, account_id,
                                   account_type)] = drive
        except ValueError as e:
            self.logger.warning('Cannot load drive %s from database: %s',
                                drive_id, e)

    def get_drive_root(self, account_id, account_type):
        key = (account_id, account_type)
        if key not in self._drive_roots:
            self._drive_roots[key] = drives.DriveRoot(
                self.account_store.get_account(account_id, account_type))
        return self._drive_roots[key]

    def get_all_drives(self):
        """
        :rtype: dict[(str, str, str), onedrived.api.drives.DriveObject]
        """
        self._conn.commit()
        q = self._cursor.execute(
            'SELECT drive_id, account_id, account_type, drive_dump FROM drives'
        )
        for row in q.fetchall():
            self.assemble_drive_record(row, self._all_drives)
        return self._all_drives

    def add_record(self, drive):
        account = drive.root.account
        params = (drive.drive_id, account.profile.user_id, account.TYPE,
                  drive.config.local_root, drive.dump())
        self._cursor.execute(
            'INSERT OR REPLACE INTO drives (drive_id, account_id, account_type, local_root, '
            'drive_dump) VALUES (?,?,?,?,?)', params)
        self._conn.commit()

    def delete_record(self, drive):
        key = self.get_key(drive.drive_id, drive.root.account.profile.user_id,
                           drive.root.account.TYPE)
        del self._all_drives[key]
        self._cursor.execute(
            'DELETE FROM drives WHERE drive_id=? AND account_id=? AND account_type=?',
            key)
        self._conn.commit()

    def delete_records_by_account(self, account_id, account_type):
        key = (account_id, account_type)
        if key in self._drive_roots:
            del self._drive_roots[key]
        self._cursor.execute(
            'DELETE FROM drives WHERE account_id=? AND account_type=?', key)
        self._all_drives = {}

    def close(self):
        self._conn.commit()
        self._cursor.close()
        self._conn.close()
Example #7
0
class DriveConfig:
    DEFAULT_VALUES = {
        'max_get_size_bytes': 1048576,
        'max_put_size_bytes': 524288,
        'local_root': None,
        'ignore_files': set(),
    }

    logger = logger_factory.get_logger('DriveConfig')

    def __init__(self, data):
        for k, v in self.DEFAULT_VALUES.items():
            if k not in data:
                data[k] = v
        if isinstance(data['ignore_files'], list):
            data['ignore_files'] = set(data['ignore_files'])
        for item in self.DEFAULT_VALUES['ignore_files']:
            if item not in data['ignore_files']:
                data['ignore_files'].add(item)
        self.data = data

    @staticmethod
    def default_config():
        return DriveConfig(deepcopy(DriveConfig.DEFAULT_VALUES))

    @classmethod
    def set_default_config(cls, config):
        """
        Set the new config as default, with side-effect of updating all existing configs that use (unsaved) default
        values.
        :param onedrived.api.drives.DriveConfig config:
        """
        for k, v in cls.DEFAULT_VALUES.items():
            v2 = getattr(config, k)
            if v2 != v:
                cls.DEFAULT_VALUES[k] = v2

    @property
    def max_get_size_bytes(self):
        """
        :rtype: int
        """
        return self.data['max_get_size_bytes']

    @property
    def max_put_size_bytes(self):
        """
        :rtype: int
        """
        return self.data['max_put_size_bytes']

    @property
    def local_root(self):
        """
        :rtype: str
        """
        return self.data['local_root']

    @property
    def ignore_files(self):
        """
        :rtype: [str]
        """
        return self.data['ignore_files']

    # noinspection PyAttributeOutsideInit
    @property
    def path_filter(self):
        if not hasattr(self, '_path_filter'):
            rules = set()
            for path in self.ignore_files:
                try:
                    with open(path, 'r') as f:
                        rules.update(f.read().splitlines())
                except Exception as e:
                    self.logger.error('Failed to load ignore list "%s": %s',
                                      path, e)
            self._path_filter = path_filter.PathFilter(rules)
        return self._path_filter

    def dump(self, exact_dump=False):
        data = {}
        for key in ['max_get_size_bytes', 'max_put_size_bytes', 'local_root']:
            if exact_dump or getattr(self, key) != self.DEFAULT_VALUES[key]:
                data[key] = getattr(self, key)
        ignore_files = [
            s for s in self.ignore_files
            if exact_dump or s not in self.DEFAULT_VALUES['ignore_files']
        ]
        if len(ignore_files) > 0:
            data['ignore_files'] = ignore_files
        return data

    @classmethod
    def load(cls, d):
        return DriveConfig(d)
Example #8
0
class ItemStorage:
    """
    Local storage for items under ONE drive.
    """

    logger = logger_factory.get_logger('ItemStorage')

    def __init__(self, db_path, drive):
        """
        :param str db_path: A unique path for the database to store items for the target drive.
        :param onedrived.api.drives.DriveObject drive: The underlying drive object.
        """
        if not hasattr(drive, 'storage_lock'):
            drive.storage_lock = ReadWriteLock()
        self.lock = drive.storage_lock
        self._conn = sqlite3.connect(db_path,
                                     isolation_level=None,
                                     check_same_thread=False)
        self.drive = drive
        self._cursor = self._conn.cursor()
        self._cursor.execute(get_content('onedrive_items.sql'))
        self._conn.commit()
        atexit.register(self.close)

    def close(self):
        self._cursor.close()
        self._conn.close()

    def local_path_to_remote_path(self, path):
        return path.replace(self.drive.config.local_root,
                            self.drive.drive_path + '/root:', 1)

    def get_items_by_id(self,
                        item_id=None,
                        parent_path=None,
                        item_name=None,
                        local_parent_path=None):
        """
        FInd all qualified records from database by ID or path.
        :param str item_id: ID of the target item.
        :param str parent_path: Path reference of the target item's parent. Used with item_name.
        :param str item_name: Name of the item. Used with parent_path or local_parent_path.
        :param str local_parent_path: Local path to item's parent directory.
        :return dict[str, onedrived.store.items_db.ItemRecord]: All qualified records index by item ID.
        """
        if local_parent_path is not None:
            parent_path = self.local_path_to_remote_path(local_parent_path)
        args = {
            'item_id': item_id,
            'parent_path': parent_path,
            'item_name': item_name
        }
        return self.get_items(args)

    def get_items_by_hash(self, crc32_hash=None, sha1_hash=None):
        """
        Find all qualified records from database whose hash values match either parameter.
        :param str crc32_hash: CRC32 hash of the target item.
        :param str sha1_hash: SHA-1 hash of the target item.
        :return dict[str, onedrived.store.items_db.ItemRecord]: All qualified records index by item ID.
        """
        args = {'crc32_hash': crc32_hash, 'sha1_hash': sha1_hash}
        return self.get_items(args, 'OR')

    @staticmethod
    def _get_where_clause(args, relation='AND'):
        """
        Form a where clause in SQL query and the tuples for the filler values.
        :param dict[str, str | int]] args: Keys are where conditions and values are the filler values.
        :param str relation: Either 'AND' or 'OR'.
        :return (str, ()):
        """
        keys = []
        values = []
        for k, v in args.items():
            if v is not None:
                keys.append(k + '=?')
                values.append(v)
        relation = ' ' + relation + ' '
        return relation.join(keys), tuple(values)

    def get_items(self, args, relation='AND'):
        """
        :param dict[str, int | str] args: Criteria used to construct SQL query.
        :param str relation: Relation of the criteria.
        :return dict[str, onedrived.store.items_db.ItemRecord]: All matching rows in the form of ItemRecord.
        """
        where, values = self._get_where_clause(args, relation)
        ret = {}
        self.lock.acquire_read()
        q = self._conn.execute(
            'SELECT item_id, type, item_name, parent_id, parent_path, etag, ctag, size, '
            'created_time, modified_time, status, crc32_hash, sha1_hash FROM items WHERE '
            + where, values)
        for row in q.fetchall():
            item = ItemRecord(row)
            ret[item.item_id] = item
        self.lock.release_read()
        return ret

    def update_item(self,
                    item,
                    status=ItemRecordStatuses.OK,
                    parent_path=None):
        """
        :param onedrived.api.items.OneDriveItem item:
        :param str status: One value of enum ItemRecordStatuses.
        :param str parent_path: If item does not have a parent reference, fallback to this path.
        """
        if item.is_folder:
            crc32_hash = None
            sha1_hash = None
        else:
            file_facet = item.file_props
            crc32_hash = file_facet.hashes.crc32
            sha1_hash = file_facet.hashes.sha1
        parent_ref = item.parent_reference
        try:
            parent_path = parent_ref.path
        except Exception:
            pass
        created_time_str = datetime_to_str(item.created_time)
        modified_time_str = datetime_to_str(item.modified_time)
        self.lock.acquire_write()
        self._cursor.execute(
            'INSERT OR REPLACE INTO items (item_id, type, item_name, parent_id, parent_path, etag, '
            'ctag, size, created_time, modified_time, status, crc32_hash, sha1_hash)'
            ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
            (item.id, item.type, item.name, parent_ref.id, parent_path,
             item.e_tag, item.c_tag, item.size, created_time_str,
             modified_time_str, status, crc32_hash, sha1_hash))
        self._conn.commit()
        self.lock.release_write()

    def delete_item(self,
                    item_id=None,
                    parent_path=None,
                    item_name=None,
                    local_parent_path=None,
                    is_folder=False):
        """
        Delete the specified item from database. If the item is a directory, then also delete all its children items.
        :param str item_id: ID of the target item.
        :param str parent_path: Path reference of the target item's parent. Used with item_name.
        :param str item_name: Name of the item. Used with parent_path or local_parent_path.
        :param str local_parent_path: Local path to item's parent directory.
        :param True | False is_folder: True to indicate that the item is a folder (delete all children).
        """
        if local_parent_path is not None:
            parent_path = self.local_path_to_remote_path(local_parent_path)
        where, values = self._get_where_clause({
            'item_id': item_id,
            'parent_path': parent_path,
            'item_name': item_name
        })
        self.lock.acquire_write()
        if is_folder:
            # Translate ID reference to path and name reference.
            q = self._cursor.execute(
                'SELECT item_id, parent_path, item_name FROM items WHERE ' +
                where, values)
            row = q.fetchone()
            if row is None:
                self.logger.warning(
                    'The folder to delete does not exist: %s, %s', where,
                    str(values))
            else:
                item_id, parent_path, item_name = row
                self._cursor.execute(
                    'DELETE FROM items WHERE parent_id=? OR parent_path LIKE ?',
                    (item_id, parent_path + '/' + item_name + '/%'))
        self._cursor.execute('DELETE FROM items WHERE ' + where, values)
        self._conn.commit()
        self.lock.release_write()

    def update_status(self,
                      status,
                      item_id=None,
                      parent_path=None,
                      item_name=None,
                      local_parent_path=None):
        """
        Update the status tag of the target item.
        :param str status:
        :param str item_id: ID of the target item.
        :param str parent_path: Path reference of the target item's parent. Used with item_name.
        :param str item_name: Name of the item. Used with parent_path or local_parent_path.
        :param str local_parent_path: Path relative to drive's local root. If at root, use ''.
        """
        if local_parent_path is not None:
            parent_path = self.local_path_to_remote_path(local_parent_path)
        where, values = self._get_where_clause({
            'item_id': item_id,
            'parent_path': parent_path,
            'item_name': item_name
        })
        values = (status, ) + values
        self.lock.acquire_write()
        self._cursor.execute('UPDATE items SET status=? WHERE ' + where,
                             values)
        self._conn.commit()
        self.lock.release_write()
Example #9
0
class TaskBase:
    logger = logger_factory.get_logger('Tasks')

    def __init__(self, parent_task=None):
        """
        Initialize basic properties from the task from the parent task.
        :param TaskBase | None parent_task: The parent task. None for root task.
        """
        self._hold = False
        self._item = None
        if parent_task is not None:
            self.drive = parent_task.drive
            self.items_store = parent_task.items_store
            self.task_pool = parent_task.task_pool

    @property
    def drive(self):
        """
        :rtype: onedrived.api.drives.DriveObject
        """
        return self._drive

    # noinspection PyAttributeOutsideInit
    @drive.setter
    def drive(self, d):
        self._drive = d

    @property
    def items_store(self):
        """
        :rtype: onedrived.store.items_db.ItemStorage
        """
        return self._items_store

    # noinspection PyAttributeOutsideInit
    @items_store.setter
    def items_store(self, s):
        self._items_store = s

    @property
    def task_pool(self):
        """
        :rtype: onedrived.store.task_pool.TaskPool
        """
        return self._task_pool

    # noinspection PyAttributeOutsideInit
    @task_pool.setter
    def task_pool(self, p):
        self._task_pool = p

    @property
    def item_name(self):
        """
        :rtype: str
        """
        return self._item_name

    # noinspection PyAttributeOutsideInit
    @item_name.setter
    def item_name(self, n):
        self._item_name = n

    @property
    def rel_parent_path(self):
        """Relative parent path of the item referred to."""
        return self._rel_parent_path

    # noinspection PyAttributeOutsideInit
    @rel_parent_path.setter
    def rel_parent_path(self, v):
        """
        Set path relative to the repository root.
        :param str v: For root itself use ''; for item under root use '/' and always end with '/'.
        """
        self._rel_parent_path = v

    @property
    def remote_parent_path(self):
        p = self.drive.drive_path + '/root:' + self.rel_parent_path
        if p[-1] == '/':
            p = p[:-1]
        return p

    @property
    def local_parent_path(self):
        return self.drive.config.local_root + self.rel_parent_path

    @property
    def remote_path(self):
        return self.remote_parent_path + '/' + self.item_name

    @property
    def local_path(self):
        return self.local_parent_path + self.item_name

    @property
    def rel_path(self):
        return self.rel_parent_path + self.item_name

    @property
    def item_obj(self):
        return self._item

    # noinspection PyAttributeOutsideInit
    @item_obj.setter
    def item_obj(self, n):
        self._item = n

    @property
    def should_hold(self):
        """
        :rtype: True | False
        """
        return self._hold

    @should_hold.setter
    def should_hold(self, v):
        """
        If set True, this task will hold the path it works on. It's then the task's responsibility to unhold the path.
        :param True | False v:
        """
        self._hold = v

    def handle(self):
        raise NotImplementedError('Subclass should override this stub.')
Example #10
0
class FileSystemMonitor(threading.Thread):
    MOVE_DETECTION_DELAY_SEC = 4
    SYNC_PARENT_DELAY_SEC = 60

    logger = get_logger('fsmon')

    def __init__(self, drive_store, items_store_manager, task_pool):
        """
        :param onedrived.store.drives_db.DriveStorage drive_store:
        :param onedrived.store.items_db.ItemStorageManager items_store_manager:
        :param onedrived.store.task_pool.TaskPool task_pool:
        """
        super().__init__(name='fsmon', daemon=True)
        self._items_store_man = items_store_manager
        self._task_pool = task_pool
        self._all_drives = drive_store.get_all_drives().values()
        self._running = False
        self._delayed_tasks = set()
        self._task_bases = dict()
        self._preprocess_drives()

    def _enqueue_delayed_task(self, task):
        """
        :param onedrived.common.tasks.TaskBase task:
        """
        if task in self._delayed_tasks:
            self._task_pool.add_task(task)

    def _find_drive(self, path):
        for d in self._all_drives:
            if path.startswith(d.config.local_root):
                return d

    def _preprocess_drives(self):
        for drive in self._all_drives:
            task_base = TaskBase(None)
            task_base.drive = drive
            task_base.task_pool = self._task_pool
            task_base.items_store = self._items_store_man.get_item_storage(drive)
            self._task_bases[drive] = task_base

    def _sync_parent_dir_of(self, drive, rel_path):
        rel_parent_path, dir_name = rel_path.rsplit('/', maxsplit=1)
        task = merge_task.MergeDirTask(self._task_bases[drive], rel_parent_path=rel_parent_path, item_name=dir_name)
        self._task_pool.add_task(task)

    def _process_create_dir_event(self, drive, local_parent_path, dir_name):
        """
        Subroutine to handle the event that a new directory is created.
        :param onedrived.api.drives.DriveObject drive:
        :param str local_parent_path: Local path to the parent of this newly created directory.
        :param str dir_name: Name of the newly created directory.
        """
        rel_parent_path = _get_rel_parent_path(drive, local_parent_path)
        task = up_task.CreateDirTask(self._task_bases[drive], rel_parent_path=rel_parent_path, item_name=dir_name)
        task.handle()
        if task.should_sync_parent:
            self._sync_parent_dir_of(drive, rel_parent_path)

    def _process_delete_event(self, drive, local_parent_path, ent_name, is_folder):
        """
        Subroutine to handle the event that an entry is deleted.
        :param onedrived.api.drives.DriveObject drive:
        :param str local_parent_path:
        :param str ent_name:
        :param True | False is_folder: True to indicate that the deleted item is a directory.
        """
        rel_parent_path = _get_rel_parent_path(drive, local_parent_path)
        task = delete_task.DeleteItemTask(parent_task=self._task_bases[drive], rel_parent_path=rel_parent_path,
                                          item_name=ent_name, is_folder=is_folder)
        self._task_pool.add_task(task)

    def _process_move_from_event(self, drive, local_parent_path, ent_name, is_folder):
        # First try finding the item in database.
        rel_parent_path = _get_rel_parent_path(drive, local_parent_path)
        item_store = self._items_store_man.get_item_storage(drive)
        q = item_store.get_items_by_id(local_parent_path=local_parent_path, item_name=ent_name)
        try:
            item_id, item = q.popitem()
            if item.is_folder != is_folder:
                raise KeyError()
        except KeyError:
            # If the record does not match, sync the parent after some time.
            threading.Timer(self.SYNC_PARENT_DELAY_SEC, self._sync_parent_dir_of, (drive, rel_parent_path))
            return
        task = delete_task.DeleteItemTask(parent_task=self._task_bases[drive], rel_parent_path=rel_parent_path,
                                          item_name=ent_name, is_folder=is_folder)
        task.item_obj = item
        self._delayed_tasks.add(task)
        threading.Timer(self.MOVE_DETECTION_DELAY_SEC, self._enqueue_delayed_task, task)

    def _convert_delete_dir_to_move(self, drive, local_parent_path, ent_name):

        for t in self._delayed_tasks:
            if isinstance(t, delete_task.DeleteItemTask)

    def _process_move_to_event(self, drive, local_parent_path, ent_name):
        local_path = local_parent_path + '/' + ent_name
        item_store = self._items_store_man.get_item_storage(drive)
        try:
            is_folder = os.path.isdir(local_path)
            for t in self._delayed_tasks:
                pass
                # if not isinstance(t, delete_task.DeleteItemTask) or t.local_parent_path != local_parent_path \
                #        or t.item_name !:
                #    pass
                #    continue
                # if t.item_obj.is_folder == is_folder:

    def _process_event(self, event_str, local_parent_path, ent_name):
        """
        The event dispatcher.
        :param str event_str:
        :param str local_parent_path:
        :param str ent_name:
        """
        drive = self._find_drive(local_parent_path)
        if event_str == 'CREATE,ISDIR':
            # A new directory was created. The directory might have name conflict with existing item, and might have
            # been deleted by the time CreateDirTask runs. It might have been added new files to as well. Therefore,
            # handle this type of event synchronously.
            # If there is no network, this handle will be blocked. But if no network, tasks will pile up anyway.
            self._process_create_dir_event(drive, local_parent_path, ent_name)
        elif 'CLOSE_WRITE' in event_str:
            # A file that opened in writable mode was closed. If the file is being uploaded, the current upload session
            # should be aborted. Also the task to upload that file could have been fetched by a worker and not traceable
            # in TaskPool.
            pass
        elif 'MOVED_FROM' in event_str:
            # Add delayed move to a queue. Use a new thread to add it to the task queue later.
            # A better way is to use asyncio, but it is not part of Python 3.3 standard (which adds extra dependency).
            self._process_move_from_event(drive, local_parent_path, ent_name, 'ISDIR' in event_str)
        elif 'MOVED_TO' in event_str:
            self._process_move_to_event(drive, local_parent_path, ent_name)
        elif 'DELETE' in event_str:
            self._process_delete_event(drive, local_parent_path, ent_name, 'ISDIR' in event_str)

    def close(self):
        """ An external thread should call close() and then join() this thread (to finish the last task) to stop. """
        if self._running:
            subprocess.call(['kill', '-s', '9', str(self._subp.pid)])
            self._running = False

    def run(self):
        if not shutil.which('inotifywait'):
            self.logger.critical('Cannot start file system monitor because command "inotifywait" was not found.')
            return
        self._running = True
        self.logger.info('Starting.')
        args = ['inotifywait', '--quiet', '--csv', '-e', 'unmount,create,close_write,delete,move',
                '--exclude', '\..*\.!od', '-mr'] + [drive.config.local_root for drive in self._all_drives]
        self._subp = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
        reader = csv.reader(self._subp.stdout)
        for row in reader:
            local_parent_path, event_str, ent_name = row
            self._process_event(event_str, local_parent_path, ent_name)
        self.logger.info('Stopped.')
Example #11
0
class DriveObject:
    """
    Abstracts a specific Drive resource. All items.OneDriveItem objects are generated by DriveObject API.
    """

    VERSION_KEY = '@version'
    VERSION_VALUE = 0

    logger = logger_factory.get_logger('DriveObject')

    def __init__(self, root, data, config):
        """
        :param onedrived.api.drives.OneDriveRoot root: The parent root object.
        :param dict[str, str | int | dict] data: The deserialized Drive dictionary.
        :param onedrived.common.drive_config.DriveConfig config: Drive configuration.
        """
        self._data = data
        self.config = config
        self.root = root
        self.is_default = root.account.profile.user_id.lower(
        ) == self.drive_id.lower()
        if self.is_default:
            self.drive_path = '/drive'
        else:
            self.drive_path = '/drives/' + data['id']
        self.drive_uri = root.account.client.API_URI

    @property
    def drive_id(self):
        """
        Return the drive ID.
        :rtype: str
        """
        return self._data['id']

    @property
    def type(self):
        """
        Return a string representing the drive's type. {'personal', 'business'}
        :rtype: str
        """
        return self._data['driveType']

    @property
    def quota(self):
        """
        :rtype: onedrived.api.facets.QuotaFacet
        """
        return facets.QuotaFacet(self._data['quota'])

    def refresh(self):
        """
        Refresh metadata of the drive object.
        """
        self.root.purge_drive_cache(self.drive_id)
        new_drive = self.root.get_drive(self.drive_id)
        # noinspection PyProtectedMember
        self._data = new_drive._data
        del new_drive

    def get_item_uri(self, item_id=None, item_path=None):
        """
        Generate URL to the specified item. If both item_id and item_path are None, return root item.
        :param str | None item_id: (Optional) ID of the specified item.
        :param str | None item_path: (Optional) Path to the specified item.
        :rtype: str
        """
        uri = self.drive_uri
        if item_id is not None:
            uri += self.drive_path + '/items/' + item_id
        elif item_path is not None:  # and item_path != self.drive_path + '/root:':
            uri += item_path
        else:
            uri += self.drive_path + '/root'
        return uri

    def get_root_dir(self, list_children=True):
        return self.get_item(None, None, list_children)

    def build_item(self, data):
        return items.OneDriveItem(self, data)

    def get_item(self, item_id=None, item_path=None, list_children=True):
        """
        Retrieve the metadata of an item from OneDrive server.
        :param str | None item_id:  ID of the item. Required if item_path is None.
        :param str | None item_path: Path to the item relative to drive root. Required if item_id is None.
        :rtype: onedrived.api.items.OneDriveItem
        """
        uri = self.get_item_uri(item_id, item_path)
        if list_children:
            uri += '?expand=children'
        request = self.root.account.session.get(uri)
        return items.OneDriveItem(self, request.json())

    def get_children(self, item_id=None, item_path=None):
        """
        Assuming the target item is a directory, return a collection of all its children items.
        :param str | None item_id: (Optional) ID of the target directory.
        :param str | None item_path: (Optional) Path to the target directory.
        :rtype: onedrived.api.items.ItemCollection
        """
        uri = self.get_item_uri(item_id, item_path)
        if item_path is not None:
            uri += ':'
        uri += '/children'
        request = self.root.account.session.get(uri)
        return items.ItemCollection(self, request.json())

    def create_dir(self,
                   name,
                   parent_id=None,
                   parent_path=None,
                   conflict_behavior=options.NameConflictBehavior.DEFAULT):
        """
        Create a new directory under the specified parent directory.
        :param str name: Name of the new directory.
        :param str | None parent_id: (Optional) ID of the parent directory item.
        :param str | None parent_path: (Optional) Path to the parent directory item.
        :param str conflict_behavior: (Optional) What to do if name exists. One value from options.nameConflictBehavior.
        :rtype: onedrived.api.items.OneDriveItem
        """
        data = {
            'name': name,
            'folder': {},
            '@name.conflictBehavior': conflict_behavior
        }
        uri = self.get_item_uri(parent_id, parent_path) + '/children'
        request = self.root.account.session.post(
            uri, json=data, ok_status_code=requests.codes.created)
        return items.OneDriveItem(self, request.json())

    def upload_file(self,
                    filename,
                    data,
                    size,
                    parent_id=None,
                    parent_path=None,
                    conflict_behavior=options.NameConflictBehavior.REPLACE):
        """
        Upload a file object to the specified parent directory, the method of which is determined by file size.
        :param str filename: Name of the remote file.
        :param file data: An opened file object available for reading.
        :param int size: Size of the content to upload.
        :param str | None parent_id: (Optional) ID of the parent directory.
        :param str | None parent_path: (Optional) Path to the parent directory.
        :param str conflict_behavior: (Optional) Specify the behavior to use if the file already exists.
        :rtype: onedrived.api.items.OneDriveItem
        """
        if size <= self.config.max_put_size_bytes:
            return self.put_file(filename, data, parent_id, parent_path,
                                 conflict_behavior)
        else:
            return self.put_large_file(filename, data, size, parent_id,
                                       parent_path, conflict_behavior)

    def put_large_file(self,
                       filename,
                       data,
                       size,
                       parent_id=None,
                       parent_path=None,
                       conflict_behavior=options.NameConflictBehavior.REPLACE):
        """
        Upload a large file by splitting it into fragments.
        https://github.com/OneDrive/onedrive-api-docs/blob/master/items/upload_large_files.md
        :param str filename: Name of the remote file.
        :param file data: An opened file object available for reading.
        :param int size: Size of the content to upload.
        :param str | None parent_id: (Optional) ID of the parent directory.
        :param str | None parent_path: (Optional) Path to the parent directory.
        :param str conflict_behavior: (Optional) Specify the behavior to use if the file already exists.
        :rtype: onedrived.api.items.OneDriveItem
        """
        # Create an upload session.
        if parent_id is not None:
            parent_id += ':'
        uri = self.get_item_uri(
            parent_id, parent_path) + '/' + filename + ':/upload.createSession'
        payload = {'item': {'name': filename}}
        if conflict_behavior != options.NameConflictBehavior.REPLACE:
            payload['item']['@name.conflictBehavior'] = conflict_behavior
        size_str = str(size)
        request = self.root.account.session.post(uri, json=payload)
        current_session = resources.UploadSession(request.json())

        # Upload content.
        expected_ranges = [
            (0, size - 1)
        ]  # Use local value rather than that given in session.
        while len(expected_ranges) > 0:  # Ranges must come in order
            f, t = expected_ranges.pop(0)  # Both inclusive
            if t is None or t >= size:
                t = size - 1
            next_cursor = f + self.config.max_put_size_bytes
            if t >= next_cursor:
                expected_ranges.insert(0, (next_cursor, t))
                t = next_cursor - 1
            data.seek(f)
            chunk = data.read(t - f + 1)
            headers = {'Content-Range': str(f) + '-' + str(t) + '/' + size_str}
            request = self.root.account.session.put(
                current_session.upload_url,
                data=chunk,
                headers=headers,
                ok_status_code=requests.codes.accepted)
            current_session.update(request.json())
            # TODO: handle timeout error
            # https://github.com/OneDrive/onedrive-api-docs/blob/master/items/upload_large_files.md#request-upload-status

    def put_file(self,
                 filename,
                 data,
                 parent_id=None,
                 parent_path=None,
                 conflict_behavior=options.NameConflictBehavior.REPLACE):
        """
        Use HTTP PUT to upload a file that is relatively small (less than 100M).
        :param str filename: Name of the remote file.
        :param file data: An opened file object available for reading.
        :param str | None parent_id: (Optional) ID of the parent directory.
        :param str | None parent_path: (Optional) Path to the parent directory.
        :param str conflict_behavior: (Optional) Specify the behavior to use if the file already exists.
        :rtype: onedrived.api.items.OneDriveItem
        """
        if parent_id is not None:
            parent_id += ':'
        uri = self.get_item_uri(parent_id,
                                parent_path) + '/' + filename + ':/content'
        if conflict_behavior != options.NameConflictBehavior.REPLACE:
            uri += '[email protected]=' + conflict_behavior
        request = self.root.account.session.put(
            uri, data=data, ok_status_code=requests.codes.created)
        return items.OneDriveItem(self, request.json())

    def download_file(self, file, size, item_id=None, item_path=None):
        """
        Download the target item to target file object. If the file is too large, download by fragments.
        :param file file: An open file object available for writing binary data.
        :param int size: Expected size of the item.
        :param str | None item_id: ID of the target file.
        :param str | None item_path: Path to the target file.
        """
        if size <= self.config.max_get_size_bytes:
            self.get_file_content(item_id, item_path, file=file)
            return
        t = 0
        while t < size:
            f = t
            t += self.config.max_get_size_bytes - 1  # Both inclusive.
            if t >= size:
                t = size - 1
            self.get_file_content(item_id,
                                  item_path,
                                  range_bytes=(f, t),
                                  file=file)
            t += 1

    def get_file_content(self,
                         item_id=None,
                         item_path=None,
                         range_bytes=None,
                         file=None):
        """
        Get the content of an item.
        :param str | None item_id: ID of the target file.
        :param str | None item_path: Path to the target file.
        :param (int, int) | None range_bytes: Range of the bytes to download.
        :param file | None file: An opened file object. If set, write the content there. Otherwise return the content.
        :rtype: bytes
        """
        uri = self.get_item_uri(item_id, item_path) + '/content'
        if range_bytes is None:
            headers = None
            ok_status_code = requests.codes.ok
        else:
            headers = {'Range': 'bytes=%d-%d' % range_bytes}
            ok_status_code = requests.codes.partial
        request = self.root.account.session.get(uri,
                                                headers=headers,
                                                ok_status_code=ok_status_code)
        if file is not None:
            file.write(request.content)
        else:
            return request.content

    def delete_item(self, item_id=None, item_path=None):
        """
        https://github.com/OneDrive/onedrive-api-docs/blob/master/items/delete.md
        Delete the specified item on OneDrive server.
        :param str | None item_id:  ID of the item. Required if item_path is None.
        :param str | None item_path: Path to the item relative to drive root. Required if item_id is None.
        """
        uri = self.get_item_uri(item_id, item_path)
        self.root.account.session.delete(
            uri, ok_status_code=requests.codes.no_content)

    def update_item(self,
                    item_id=None,
                    item_path=None,
                    new_name=None,
                    new_description=None,
                    new_parent_reference=None,
                    new_file_system_info=None):
        """
        Update the metadata of the specified item.
        :param str | None item_id: (Optional) ID of the target item.
        :param str | None item_path: (Optional) Path to the target item.
        :param str | None new_name: (Optional) If set, update the item metadata with the new name.
        :param str | None new_description: (Optional) If set, update the item metadata with the new description.
        :param onedrived.api.resources.ItemReference | None new_parent_reference: (Optional) If set,
        move the item.
        :param onedrived.api.facets.FileSystemInfoFacet | None new_file_system_info: (Optional) If set, update the
        client-wise timestamps.
        :rtype: onedrived.api.items.OneDriveItem
        """
        if item_id is None and item_path is None:
            raise ValueError('Root is immutable. A specific item is required.')
        data = {}
        if new_name is not None:
            data['name'] = new_name
        if new_description is not None:
            data['description'] = new_description
        if new_parent_reference is not None:
            data['parentReference'] = new_parent_reference.data
        if new_file_system_info is not None:
            data['fileSystemInfo'] = new_file_system_info.data
        if len(data) == 0:
            raise ValueError('Nothing is to change.')
        uri = self.get_item_uri(item_id, item_path)
        request = self.root.account.session.patch(uri, data)
        return items.OneDriveItem(self, request.json())

    def copy_item(self,
                  dest_reference,
                  item_id=None,
                  item_path=None,
                  new_name=None):
        """
        Copy an item (including any children) on OneDrive under a new parent.
        :param onedrived.api.resources.ItemReference dest_reference: Reference to new parent.
        :param str | None item_id: (Optional) ID of the source item. Required if item_path is None.
        :param str | None item_path: (Optional) Path to the source item. Required if item_id is None.
        :param str | None new_name: (Optional) If set, use this name for the copied item.
        :rtype: onedrived.api.resources.AsyncCopySession
        """
        if not isinstance(dest_reference, resources.ItemReference):
            raise ValueError('Destination should be an ItemReference object.')
        if item_id is None and item_path is None:
            raise ValueError('Source of copy must be specified.')
        uri = self.get_item_uri(item_id, item_path)
        if item_path is not None:
            uri += ':'
        uri += '/action.copy'
        data = {'parentReference': dest_reference.data}
        if new_name is not None:
            data['name'] = new_name
        headers = {'Prefer': 'respond-async'}
        request = self.root.account.session.post(
            uri,
            json=data,
            headers=headers,
            ok_status_code=requests.codes.accepted)
        return resources.AsyncCopySession(self, request.headers)

    def get_thumbnail(self):
        raise NotImplementedError('The API feature is not used yet.')

    def search(self, keyword, select=None, item_id=None, item_path=None):
        """
        Use a keyword to search for items within the specified directory (default: root).
        :param str keyword: Keyword for the search.
        :param [str] | None select: Only fetch the specified fields.
        :param str | None item_id: (Optional) ID of the item to search within.
        :param str | None item_path: (Optional) Path to the item to search within.
        :return onedrived.api.items.ItemCollection:
        """
        params = {'q': keyword}
        if select is not None:
            params['select'] = ','.join(select)
        uri = self.get_item_uri(item_id, item_path) + '/view.search'
        request = self.root.account.session.get(uri, params=params)
        return items.ItemCollection(self, request.json())

    def get_changes(self):
        raise NotImplementedError('The API feature is not used yet.')

    def get_special_dir(self, name):
        raise NotImplementedError('The API feature is not used yet.')

    def dump(self):
        data = {
            'config_dump': self.config.dump(),
            'data': self._data,
            self.VERSION_KEY: self.VERSION_VALUE
        }
        return json.dumps(data)

    @classmethod
    def load(cls, drive_root, account_id, account_type, s):
        data = json.loads(s)
        drive = DriveObject(drive_root, data['data'],
                            drive_config.DriveConfig.load(data['config_dump']))
        try:
            drive_root.add_cached_drive(account_id, account_type, drive)
        except ValueError as e:
            cls.logger.warning(
                'Faild to register deserialized drive %s to drive root: %s',
                drive.drive_id, e)
        return drive