예제 #1
0
def download_queue_processor():
    """
    Implements a simple re-try mechanism for pending downloads
    :return:
    """
    while True:
        if download_queue.not_empty:
            item, path, oauth = download_queue.get()  # blocks
            if item['type'] == 'file':
                info = redis_get(r_c, item) if r_c.exists(redis_key(item['id'])) else None
                # client = Client(oauth)  # keep it around for easy access
                # hack because we did not use to store the file_path, but do not want to force a download
                if info and 'file_path' not in info:
                    info['file_path'] = path
                    r_c.set(redis_key(item['id']), json.dumps(info))
                    r_c.set('diy_crate.last_save_time_stamp', int(time.time()))
                # no version, or diff version, or the file does not exist locally
                if not info or info['etag'] != item['etag'] or not os.path.exists(path):
                    try:
                        for i in range(15):
                            if os.path.basename(path).startswith('.~lock'):  # avoid downloading lock files
                                break
                            try:
                                with open(path, 'wb') as item_handler:
                                    crate_logger.debug('About to download: {obj_name}, '
                                                       '{obj_id}'.format(obj_name=item['name'], obj_id=item['id']))
                                    item.download_to(item_handler)
                                    path_to_add = os.path.dirname(path)
                                    wm.add_watch(path=path_to_add, mask=mask, rec=True, auto_add=True)
                                    notify_user_with_gui('Downloaded: {}'.format(path))
                            except BoxAPIException as e:
                                crate_logger.debug(traceback.format_exc())
                                if e.status == 404:
                                    crate_logger.debug('Apparently item: {obj_id}, {path} has been deleted, '
                                                       'right before we tried to download'.format(obj_id=item['id'],
                                                                                                  path=path))
                                break
                            was_versioned = r_c.exists(redis_key(item['id']))
                            #
                            # version_info[item['id']] = version_info.get(item['id'], {'etag': item['etag'],
                            #                                                          'fresh_download': True,
                            #                                                          'time_stamp': time.time()})
                            # version_info[item['id']]['etag'] = item['etag']
                            # version_info[item['id']]['fresh_download'] = not was_versioned
                            # version_info[item['id']]['time_stamp'] = os.path.getmtime(path)  # duh...since we have it!
                            redis_set(r_c, item, os.path.getmtime(path), box_dir_path=BOX_DIR,
                                      fresh_download=not was_versioned, folder=os.path.dirname(path))
                            break
                    except (ConnectionResetError, ConnectionError):
                        crate_logger.debug(traceback.format_exc())
                        time.sleep(5)
                download_queue.task_done()
            else:
                download_queue.task_done()
예제 #2
0
    def process_event(self, event, operation):
        """
        Wrapper to process the given event on the operation.
        :param event:
        :param operation:
        :return:
        """
        if operation == 'delete':
            crate_logger.debug('Doing a delete on {}'.format(event.pathname))
            folders_to_traverse = self.folders_to_traverse(event.path)
            crate_logger.debug(folders_to_traverse)
            client = Client(self.oauth)
            box_folder = client.folder(folder_id='0').get()
            cur_box_folder = box_folder
            # if we're modifying in root box dir, then we've already found the folder
            is_base = BOX_DIR in (event.path, event.path[:-1],)
            cur_box_folder = self.traverse_path(client, event, cur_box_folder, folders_to_traverse)
            last_dir = os.path.split(event.path)[-1]
            if not is_base:
                AssertionError(cur_box_folder['name'] == last_dir,
                               cur_box_folder['name'] + 'not equals ' + last_dir)
            event_was_for_dir = 'IN_ISDIR'.lower() in event.maskname.lower()
            num_entries = cur_box_folder['item_collection']['total_count']
            limit = 100
            for offset in range(0, num_entries, limit):
                for entry in cur_box_folder.get_items(offset=offset, limit=limit):
                    if not event_was_for_dir and entry['type'] == 'file' and entry['name'] == event.name:
                        if entry['id'] not in self.files_from_box:
                            cur_file = client.file(file_id=entry['id']).get()
                            if cur_file.delete():  # does not actually check correctly...unless not "ok" is false
                                # del version_info[cur_file['id']]
                                r_c.delete(redis_key(cur_file['id']))
                        else:
                            self.files_from_box.remove(entry['id'])  # just wrote if, assuming create event didn't run
                        break
                    elif event_was_for_dir and entry['type'] == 'folder' and entry['name'] == event.name:
                        if entry['id'] not in self.folders_from_box:
                            self.get_folder(client, entry['id']).delete()
                            # cur_folder = client.folder(folder_id=entry['id']).get()
                            # upload_queue.put(partial(cur_folder.update_contents, event.pathname))
                        else:
                            self.folders_from_box.remove(entry['id'])  # just wrote if, assuming create event didn't run
                        break
        elif operation == 'move':
            crate_logger.debug('Doing a move on: {}'.format(event))
            src_event, dest_event = event
            folders_to_traverse = self.folders_to_traverse(dest_event.path)
            crate_logger.debug(folders_to_traverse)
            client = Client(self.oauth)
            box_folder = client.folder(folder_id='0').get()
            cur_box_folder = box_folder
            # if we're modifying in root box dir, then we've already found the folder
            cur_box_folder = self.traverse_path(client, dest_event, cur_box_folder, folders_to_traverse)
            src_folders_to_traverse = self.folders_to_traverse(src_event.path)
            src_box_folder = box_folder
            src_box_folder = self.traverse_path(client, src_event, src_box_folder, src_folders_to_traverse)
            is_rename = src_event.path == dest_event.path
            # is_a_directory = 'IN_ISDIR'.lower() in dest_event.maskname.lower()
            did_find_src_file = os.path.isdir(dest_event.pathname)  # true if we are a directory :)
            did_find_src_folder = os.path.isfile(dest_event.pathname)  # true if we are a regular file :)
            is_file = os.path.isfile(dest_event.pathname)
            is_dir = os.path.isdir(dest_event.pathname)
            move_from_remote = False
            src_num_entries = src_box_folder['item_collection']['total_count']
            limit = 100
            for offset in range(0, src_num_entries, limit):
                for entry in src_box_folder.get_items(offset=offset, limit=limit):
                    did_find_src_file = is_file and entry['name'] == src_event.name and entry['type'] == 'file'
                    did_find_src_folder = is_dir and entry['name'] == src_event.name and entry['type'] == 'folder'
                    if did_find_src_file:
                        src_file = client.file(file_id=entry['id']).get()
                        if is_rename:
                            src_file.rename(dest_event.name)
                        else:
                            did_find_cur_file = os.path.isdir(dest_event.pathname)  # should check box instead
                            did_find_cur_folder = os.path.isfile(dest_event.pathname)  # should check box instead
                            cur_num_entries = cur_box_folder['item_collection']['total_count']
                            for cur_offset in range(0, cur_num_entries, limit):
                                for cur_entry in cur_box_folder.get_items(offset=cur_offset, limit=limit):
                                    matching_name = cur_entry['name'] == dest_event.name
                                    did_find_cur_file = is_file and matching_name and isinstance(cur_entry, File)
                                    did_find_cur_folder = is_dir and matching_name and isinstance(cur_entry, Folder)
                                    if did_find_cur_file:
                                        self.upload_queue.put([os.path.getmtime(dest_event.pathname),
                                                               partial(cur_entry.update_contents,
                                                                       dest_event.pathname),
                                                               self.oauth])
                                        self.upload_queue.put(partial(src_file.delete))
                                        break
                                    elif did_find_cur_folder:
                                        crate_logger.debug(
                                            'do not currently support movinga same name folder into parent with'
                                            'folder inside of the same name -- would may need to update the '
                                            'contents')
                                        break
                                if (is_file and did_find_cur_file) or (is_dir and did_find_cur_folder):
                                    break
                            if is_file and not did_find_cur_file:
                                src_file.move(cur_box_folder)
                                # do not yet support moving and renaming in one go
                                assert src_file['name'] == dest_event.name
                    elif did_find_src_folder:
                        src_folder = client.folder(folder_id=entry['id']).get()
                        if is_rename:
                            src_folder.rename(dest_event.name)
                        else:
                            src_folder.move(cur_box_folder)
                            # do not yet support moving and renaming in one go
                            assert src_folder['name'] == dest_event.name
                    elif entry['name'] == dest_event.name:
                        move_from_remote = True
            if not move_from_remote:  # if it was moved from a different folder on remote, could be false still
                dest_box_folder = box_folder
                dest_folders_to_traverse = self.folders_to_traverse(dest_event.path)
                dest_box_folder = self.traverse_path(client, dest_event, dest_box_folder, dest_folders_to_traverse)
                dest_num_entries = dest_box_folder['item_collection']['total_count']
                limit = 100
                for offset in range(0, dest_num_entries, limit):
                    for entry in cur_box_folder.get_items(offset=offset, limit=limit):
                        if entry['name'] == dest_event.name:
                            move_from_remote = True
                            break
                if not move_from_remote:
                    if is_file and not did_find_src_file:
                        # src file [should] no longer exist[s]. this file did not originate in box, too.
                        last_modified_time = os.path.getmtime(dest_event.pathname)
                        self.upload_queue.put([last_modified_time,
                                               partial(cur_box_folder.upload, dest_event.pathname, dest_event.name),
                                               self.oauth])
                    elif is_dir and not did_find_src_folder:
                        self.upload_queue.put(partial(cur_box_folder.create_subfolder, dest_event.name))
                        wm.add_watch(dest_event.pathname, rec=True, mask=mask)

        elif operation == 'create':
            crate_logger.debug("Creating: {}".format(event.pathname))
            folders_to_traverse = self.folders_to_traverse(event.path)
            crate_logger.debug(folders_to_traverse)
            client = Client(self.oauth)
            box_folder = client.folder(folder_id='0').get()
            cur_box_folder = box_folder
            # if we're modifying in root box dir, then we've already found the folder
            is_base = BOX_DIR in (event.path, event.path[:-1],)
            cur_box_folder = self.traverse_path(client, event, cur_box_folder, folders_to_traverse)
            last_dir = os.path.split(event.path)[-1]
            if not is_base:
                assert cur_box_folder['name'] == last_dir
            did_find_the_file = os.path.isdir(event.pathname)  # true if we are a directory :)
            did_find_the_folder = os.path.isfile(event.pathname)  # true if we are a regular file :)
            is_file = os.path.isfile(event.pathname)
            is_dir = os.path.isdir(event.pathname)
            num_entries = cur_box_folder['item_collection']['total_count']
            limit = 100
            for offset in range(0, num_entries, limit):
                for entry in cur_box_folder.get_items(offset=offset, limit=limit):
                    did_find_the_file = is_file and entry['type'] == 'file' and entry['name'] == event.name
                    did_find_the_folder = is_dir and entry['type'] == 'folder' and entry['name'] == event.name
                    if did_find_the_file:
                        if entry['id'] not in self.files_from_box:
                            # more accurately, was this created offline?
                            AssertionError(False,
                                           'We should not be able to create a '
                                           'file that exists in box; should be a close/modify.')
                            crate_logger.debug('Update the file: {}'.format(event.pathname))
                            a_file = client.file(file_id=entry['id']).get()
                            # seem it is possible to get more than one create (without having a delete in between)
                            self.upload_queue.put(partial(a_file.update_contents, event.pathname))
                            # cur_box_folder.upload(event.pathname, event.name)
                        else:
                            self.files_from_box.remove(entry['id'])  # just downloaded it
                        break
                    elif did_find_the_folder:
                        # we are not going to re-create the folder, but we are also not checking if the contents in this
                        # local creation are different from the contents in box.
                        if entry['id'] in self.folders_from_box:
                            self.folders_from_box.remove(entry['id'])  # just downloaded it
                        break
            if is_file and not did_find_the_file:
                crate_logger.debug('Upload the file: {}'.format(event.pathname))
                last_modified_time = os.path.getctime(event.pathname)
                self.upload_queue.put([last_modified_time, partial(cur_box_folder.upload, event.pathname, event.name),
                                       self.oauth])
            elif is_dir and not did_find_the_folder:
                crate_logger.debug('Upload the folder: {}'.format(event.pathname))
                self.upload_queue.put(partial(cur_box_folder.create_subfolder, event.name))
                wm.add_watch(event.pathname, rec=True, mask=mask)
        elif operation == 'modify':
            crate_logger.debug("{op}...: {pathname}".format(op=operation, pathname=event.pathname))
            folders_to_traverse = self.folders_to_traverse(event.path)
            crate_logger.debug(folders_to_traverse)
            client = Client(self.oauth)
            cur_box_folder = None
            folder_id = '0'
            retry_limit = 5
            cur_box_folder = get_box_folder(client, cur_box_folder, folder_id, retry_limit)
            # if we're modifying in root box dir, then we've already found the folder
            is_base = BOX_DIR in (event.path, event.path[:-1],)
            cur_box_folder = self.traverse_path(client, event, cur_box_folder, folders_to_traverse)
            last_dir = os.path.split(event.path)[-1]
            if not is_base:
                AssertionError(cur_box_folder['name'] == last_dir,
                               cur_box_folder['name'] + 'not equals ' + last_dir)
            did_find_the_file = os.path.isdir(event.pathname)  # true if we are a directory :)
            did_find_the_folder = os.path.isfile(event.pathname)  # true if we are a regular file :)
            is_file = os.path.isfile(event.pathname)
            is_dir = os.path.isdir(event.pathname)
            num_entries = cur_box_folder['item_collection']['total_count']
            limit = 100
            for offset in range(0, num_entries, limit):
                for entry in cur_box_folder.get_items(offset=offset, limit=limit):
                    did_find_the_file = is_file and entry['type'] == 'file' and entry['name'] == event.name
                    did_find_the_folder = is_dir and entry['type'] == 'folder' and entry['name'] == event.name
                    if did_find_the_file:
                        last_modified_time = os.path.getmtime(event.pathname)
                        if entry['id'] not in self.files_from_box:
                            cur_file = client.file(file_id=entry['id']).get()
                            can_update = True
                            was_versioned = r_c.exists(redis_key(cur_file['id']))
                            try:
                                info = redis_get(r_c, cur_file) if was_versioned else None
                                info = info if was_versioned else {'fresh_download': True,
                                                                   'etag': '0', 'time_stamp': 0}
                                item_version = info
                                if cur_file['etag'] == item_version['etag'] and \
                                        ((item_version['fresh_download'] and item_version[
                                            'time_stamp'] >= last_modified_time) or
                                             (not item_version['fresh_download'] and item_version[
                                                 'time_stamp'] >= last_modified_time)):
                                    can_update = False
                                if can_update:
                                    self.upload_queue.put([last_modified_time,
                                                           partial(cur_file.update_contents, event.pathname),
                                                           self.oauth])
                                else:
                                    is_new_time_stamp = item_version['time_stamp'] >= last_modified_time
                                    crate_logger.debug('Skipping the update because not versioned: {not_versioned}, '
                                                       'fresh_download: {fresh_download}, '
                                                       'version time_stamp >= '
                                                       'new time stamp: {new_time_stamp}, '
                                                       'event pathname: {path_name}, '
                                                       'cur file id: {obj_id}'.format(not_versioned=not was_versioned,
                                                                                      fresh_download=item_version[
                                                                                          'fresh_download'],
                                                                                      new_time_stamp=is_new_time_stamp,
                                                                                      path_name=event.pathname,
                                                                                      obj_id=cur_file['id']))
                            except TypeError:
                                crate_logger.debug(traceback.format_exc())
                            except Exception:
                                crate_logger.debug(traceback.format_exc())

                        else:
                            self.files_from_box.remove(entry['id'])  # just wrote if, assuming create event didn't run
                        break
                    elif did_find_the_folder:
                        if entry['id'] not in self.folders_from_box:
                            crate_logger.debug('Cannot create a subfolder when it already exists: {}'.format(event.pathname))
                            # cur_folder = client.folder(folder_id=entry['id']).get()
                            # upload_queue.put(partial(cur_folder.update_contents, event.pathname))
                        else:
                            self.folders_from_box.remove(entry['id'])  # just wrote if, assuming create event didn't run
                        break
            if is_file and not did_find_the_file:
                crate_logger.debug('Uploading contents...: {}'.format(event.pathname))
                last_modified_time = os.path.getmtime(event.pathname)
                self.upload_queue.put([last_modified_time,
                                       partial(cur_box_folder.upload, event.pathname, event.name),
                                       self.oauth])
            if is_dir and not did_find_the_folder:
                crate_logger.debug('Creating a sub-folder...: {}'.format(event.pathname))
                self.upload_queue.put(partial(cur_box_folder.create_subfolder, event.name))
                wm.add_watch(event.pathname, rec=True, mask=mask)
        elif operation == 'real_close':
            crate_logger.debug("Real  close...: {}".format(event.pathname))
            folders_to_traverse = self.folders_to_traverse(event.path)
            crate_logger.debug(folders_to_traverse)
            client = Client(self.oauth)
            cur_box_folder = None
            cur_box_folder = get_box_folder(client, cur_box_folder, '0', 5)
            # if we're modifying in root box dir, then we've already found the folder
            is_base = BOX_DIR in (event.path, event.path[:-1],)
            cur_box_folder = self.traverse_path(client, event, cur_box_folder, folders_to_traverse)
            last_dir = os.path.split(event.path)[-1]
            if not is_base:
                AssertionError(cur_box_folder['name'] == last_dir,
                               cur_box_folder['name'] + 'not equals ' + last_dir)
            did_find_the_file = os.path.isdir(event.pathname)  # true if we are a directory :)
            did_find_the_folder = os.path.isfile(event.pathname)  # true if we are a regular file :)
            is_file = os.path.isfile(event.pathname)
            is_dir = os.path.isdir(event.pathname)
            num_entries = cur_box_folder['item_collection']['total_count']
            limit = 100
            for offset in range(0, num_entries, limit):
                for entry in cur_box_folder.get_items(offset=offset, limit=limit):
                    did_find_the_file = is_file and entry['type'] == 'file' and entry['name'] == event.name
                    did_find_the_folder = is_dir and entry['type'] == 'folder' and entry['name'] == event.name
                    if did_find_the_file:
                        break
            # not a box file/folder (though could have been copied from a local box item)
            if is_file and not did_find_the_file:
                last_modified_time = os.path.getmtime(event.pathname)
                self.upload_queue.put([last_modified_time,
                                       partial(cur_box_folder.upload, event.pathname, event.name),
                                       self.oauth])
            elif is_dir and not did_find_the_folder:
                cur_box_folder.create_subfolder(event.name)
                wm.add_watch(event.pathname, rec=True, mask=mask, auto_add=True)
예제 #3
0
def walk_and_notify_and_download_tree(path, box_folder, client, oauth_obj, p_id=None):
    """
    Walk the path recursively and add watcher and create the path.
    :param path:
    :param box_folder:
    :param client:
    :param oauth_obj:
    :param p_id:
    :return:
    """
    if os.path.isdir(path):
        wm.add_watch(path, mask, rec=True, auto_add=True)
        local_files = os.listdir(path)
    b_folder = client.folder(folder_id=box_folder['id']).get()
    num_entries_in_folder = b_folder['item_collection']['total_count']
    limit = 100
    for offset in range(0, num_entries_in_folder, limit):
        for box_item in b_folder.get_items(limit=limit, offset=offset):
            if box_item['name'] in local_files:
                local_files.remove(box_item['name'])
    for local_file in local_files:  # prioritize the local_files not yet on box's server.
        cur_box_folder = b_folder
        local_path = os.path.join(path, local_file)
        if os.path.isfile(local_path):
            upload_queue.put([os.path.getmtime(local_path), partial(cur_box_folder.upload, local_path, local_file),
                              oauth_obj])
    ids_in_folder = []
    for offset in range(0, num_entries_in_folder, limit):
        for box_item in b_folder.get_items(limit=limit, offset=offset):
            ids_in_folder.append(box_item['id'])
            if box_item['name'] in local_files:
                local_files.remove(box_item['name'])
            if box_item['type'] == 'folder':
                local_path = os.path.join(path, box_item['name'])
                fresh_download = False
                if not os.path.isdir(local_path):
                    os.mkdir(local_path)
                    fresh_download = True
                retry_limit = 15
                for i in range(0, retry_limit):
                    try:
                        redis_set(cache_client=r_c, cloud_item=box_item,
                                  last_modified_time=os.path.getmtime(local_path),
                                  box_dir_path=BOX_DIR, fresh_download=fresh_download,
                                  folder=os.path.dirname(local_path))
                        walk_and_notify_and_download_tree(local_path,
                                                          client.folder(folder_id=box_item['id']).get(),
                                                          client, oauth_obj,
                                                          p_id=box_folder['id'])
                        break
                    except BoxAPIException as e:
                        crate_logger.debug(traceback.format_exc())
                        if e.status == 404:
                            crate_logger.debug('Box says: {obj_id}, '
                                               '{obj_name}, is a 404 status.'.format(obj_id=box_item['id'],
                                                                                     obj_name=box_item[
                                                                                         'name']))
                            crate_logger.debug(
                                'But, this is a folder, we do not handle recursive folder deletes correctly yet.')
                            break
                    except (ConnectionError, ConnectionResetError, BrokenPipeError):
                        crate_logger.debug('Attempt {idx}/{limit}; {the_trace}'.format(the_trace=traceback.format_exc(),
                                                                                       idx=i+1, limit=retry_limit))
            else:
                try:
                    file_obj = box_item
                    download_queue.put((file_obj, os.path.join(path, box_item['name']), oauth_obj))
                except BoxAPIException as e:
                    crate_logger.debug(traceback.format_exc())
                    if e.status == 404:
                        crate_logger.debug('Box says: {obj_id}, {obj_name}, '
                                           'is a 404 status.'.format(obj_id=box_item['id'], obj_name=box_item['name']))
                        if r_c.exists(redis_key(box_item['id'])):
                            crate_logger.debug('Deleting {obj_id}, '
                                               '{obj_name}'.format(obj_id=box_item['id'], obj_name=box_item['name']))
                            r_c.delete(redis_key(box_item['id']))
    redis_set(cache_client=r_c, cloud_item=b_folder, last_modified_time=os.path.getmtime(path),
              box_dir_path=BOX_DIR, fresh_download=not r_c.exists(redis_key(box_folder['id'])),
              folder=os.path.dirname(path),
              sub_ids=ids_in_folder, parent_id=p_id)