def download_queue_processor(): """ Implements a simple re-try mechanism for pending downloads :return: """ while True: if download_queue.not_empty: item, path, oauth = download_queue.get() # blocks if item['type'] == 'file': info = redis_get(r_c, item) if r_c.exists(redis_key(item['id'])) else None # client = Client(oauth) # keep it around for easy access # hack because we did not use to store the file_path, but do not want to force a download if info and 'file_path' not in info: info['file_path'] = path r_c.set(redis_key(item['id']), json.dumps(info)) r_c.set('diy_crate.last_save_time_stamp', int(time.time())) # no version, or diff version, or the file does not exist locally if not info or info['etag'] != item['etag'] or not os.path.exists(path): try: for i in range(15): if os.path.basename(path).startswith('.~lock'): # avoid downloading lock files break try: with open(path, 'wb') as item_handler: crate_logger.debug('About to download: {obj_name}, ' '{obj_id}'.format(obj_name=item['name'], obj_id=item['id'])) item.download_to(item_handler) path_to_add = os.path.dirname(path) wm.add_watch(path=path_to_add, mask=mask, rec=True, auto_add=True) notify_user_with_gui('Downloaded: {}'.format(path)) except BoxAPIException as e: crate_logger.debug(traceback.format_exc()) if e.status == 404: crate_logger.debug('Apparently item: {obj_id}, {path} has been deleted, ' 'right before we tried to download'.format(obj_id=item['id'], path=path)) break was_versioned = r_c.exists(redis_key(item['id'])) # # version_info[item['id']] = version_info.get(item['id'], {'etag': item['etag'], # 'fresh_download': True, # 'time_stamp': time.time()}) # version_info[item['id']]['etag'] = item['etag'] # version_info[item['id']]['fresh_download'] = not was_versioned # version_info[item['id']]['time_stamp'] = os.path.getmtime(path) # duh...since we have it! redis_set(r_c, item, os.path.getmtime(path), box_dir_path=BOX_DIR, fresh_download=not was_versioned, folder=os.path.dirname(path)) break except (ConnectionResetError, ConnectionError): crate_logger.debug(traceback.format_exc()) time.sleep(5) download_queue.task_done() else: download_queue.task_done()
def process_event(self, event, operation): """ Wrapper to process the given event on the operation. :param event: :param operation: :return: """ if operation == 'delete': crate_logger.debug('Doing a delete on {}'.format(event.pathname)) folders_to_traverse = self.folders_to_traverse(event.path) crate_logger.debug(folders_to_traverse) client = Client(self.oauth) box_folder = client.folder(folder_id='0').get() cur_box_folder = box_folder # if we're modifying in root box dir, then we've already found the folder is_base = BOX_DIR in (event.path, event.path[:-1],) cur_box_folder = self.traverse_path(client, event, cur_box_folder, folders_to_traverse) last_dir = os.path.split(event.path)[-1] if not is_base: AssertionError(cur_box_folder['name'] == last_dir, cur_box_folder['name'] + 'not equals ' + last_dir) event_was_for_dir = 'IN_ISDIR'.lower() in event.maskname.lower() num_entries = cur_box_folder['item_collection']['total_count'] limit = 100 for offset in range(0, num_entries, limit): for entry in cur_box_folder.get_items(offset=offset, limit=limit): if not event_was_for_dir and entry['type'] == 'file' and entry['name'] == event.name: if entry['id'] not in self.files_from_box: cur_file = client.file(file_id=entry['id']).get() if cur_file.delete(): # does not actually check correctly...unless not "ok" is false # del version_info[cur_file['id']] r_c.delete(redis_key(cur_file['id'])) else: self.files_from_box.remove(entry['id']) # just wrote if, assuming create event didn't run break elif event_was_for_dir and entry['type'] == 'folder' and entry['name'] == event.name: if entry['id'] not in self.folders_from_box: self.get_folder(client, entry['id']).delete() # cur_folder = client.folder(folder_id=entry['id']).get() # upload_queue.put(partial(cur_folder.update_contents, event.pathname)) else: self.folders_from_box.remove(entry['id']) # just wrote if, assuming create event didn't run break elif operation == 'move': crate_logger.debug('Doing a move on: {}'.format(event)) src_event, dest_event = event folders_to_traverse = self.folders_to_traverse(dest_event.path) crate_logger.debug(folders_to_traverse) client = Client(self.oauth) box_folder = client.folder(folder_id='0').get() cur_box_folder = box_folder # if we're modifying in root box dir, then we've already found the folder cur_box_folder = self.traverse_path(client, dest_event, cur_box_folder, folders_to_traverse) src_folders_to_traverse = self.folders_to_traverse(src_event.path) src_box_folder = box_folder src_box_folder = self.traverse_path(client, src_event, src_box_folder, src_folders_to_traverse) is_rename = src_event.path == dest_event.path # is_a_directory = 'IN_ISDIR'.lower() in dest_event.maskname.lower() did_find_src_file = os.path.isdir(dest_event.pathname) # true if we are a directory :) did_find_src_folder = os.path.isfile(dest_event.pathname) # true if we are a regular file :) is_file = os.path.isfile(dest_event.pathname) is_dir = os.path.isdir(dest_event.pathname) move_from_remote = False src_num_entries = src_box_folder['item_collection']['total_count'] limit = 100 for offset in range(0, src_num_entries, limit): for entry in src_box_folder.get_items(offset=offset, limit=limit): did_find_src_file = is_file and entry['name'] == src_event.name and entry['type'] == 'file' did_find_src_folder = is_dir and entry['name'] == src_event.name and entry['type'] == 'folder' if did_find_src_file: src_file = client.file(file_id=entry['id']).get() if is_rename: src_file.rename(dest_event.name) else: did_find_cur_file = os.path.isdir(dest_event.pathname) # should check box instead did_find_cur_folder = os.path.isfile(dest_event.pathname) # should check box instead cur_num_entries = cur_box_folder['item_collection']['total_count'] for cur_offset in range(0, cur_num_entries, limit): for cur_entry in cur_box_folder.get_items(offset=cur_offset, limit=limit): matching_name = cur_entry['name'] == dest_event.name did_find_cur_file = is_file and matching_name and isinstance(cur_entry, File) did_find_cur_folder = is_dir and matching_name and isinstance(cur_entry, Folder) if did_find_cur_file: self.upload_queue.put([os.path.getmtime(dest_event.pathname), partial(cur_entry.update_contents, dest_event.pathname), self.oauth]) self.upload_queue.put(partial(src_file.delete)) break elif did_find_cur_folder: crate_logger.debug( 'do not currently support movinga same name folder into parent with' 'folder inside of the same name -- would may need to update the ' 'contents') break if (is_file and did_find_cur_file) or (is_dir and did_find_cur_folder): break if is_file and not did_find_cur_file: src_file.move(cur_box_folder) # do not yet support moving and renaming in one go assert src_file['name'] == dest_event.name elif did_find_src_folder: src_folder = client.folder(folder_id=entry['id']).get() if is_rename: src_folder.rename(dest_event.name) else: src_folder.move(cur_box_folder) # do not yet support moving and renaming in one go assert src_folder['name'] == dest_event.name elif entry['name'] == dest_event.name: move_from_remote = True if not move_from_remote: # if it was moved from a different folder on remote, could be false still dest_box_folder = box_folder dest_folders_to_traverse = self.folders_to_traverse(dest_event.path) dest_box_folder = self.traverse_path(client, dest_event, dest_box_folder, dest_folders_to_traverse) dest_num_entries = dest_box_folder['item_collection']['total_count'] limit = 100 for offset in range(0, dest_num_entries, limit): for entry in cur_box_folder.get_items(offset=offset, limit=limit): if entry['name'] == dest_event.name: move_from_remote = True break if not move_from_remote: if is_file and not did_find_src_file: # src file [should] no longer exist[s]. this file did not originate in box, too. last_modified_time = os.path.getmtime(dest_event.pathname) self.upload_queue.put([last_modified_time, partial(cur_box_folder.upload, dest_event.pathname, dest_event.name), self.oauth]) elif is_dir and not did_find_src_folder: self.upload_queue.put(partial(cur_box_folder.create_subfolder, dest_event.name)) wm.add_watch(dest_event.pathname, rec=True, mask=mask) elif operation == 'create': crate_logger.debug("Creating: {}".format(event.pathname)) folders_to_traverse = self.folders_to_traverse(event.path) crate_logger.debug(folders_to_traverse) client = Client(self.oauth) box_folder = client.folder(folder_id='0').get() cur_box_folder = box_folder # if we're modifying in root box dir, then we've already found the folder is_base = BOX_DIR in (event.path, event.path[:-1],) cur_box_folder = self.traverse_path(client, event, cur_box_folder, folders_to_traverse) last_dir = os.path.split(event.path)[-1] if not is_base: assert cur_box_folder['name'] == last_dir did_find_the_file = os.path.isdir(event.pathname) # true if we are a directory :) did_find_the_folder = os.path.isfile(event.pathname) # true if we are a regular file :) is_file = os.path.isfile(event.pathname) is_dir = os.path.isdir(event.pathname) num_entries = cur_box_folder['item_collection']['total_count'] limit = 100 for offset in range(0, num_entries, limit): for entry in cur_box_folder.get_items(offset=offset, limit=limit): did_find_the_file = is_file and entry['type'] == 'file' and entry['name'] == event.name did_find_the_folder = is_dir and entry['type'] == 'folder' and entry['name'] == event.name if did_find_the_file: if entry['id'] not in self.files_from_box: # more accurately, was this created offline? AssertionError(False, 'We should not be able to create a ' 'file that exists in box; should be a close/modify.') crate_logger.debug('Update the file: {}'.format(event.pathname)) a_file = client.file(file_id=entry['id']).get() # seem it is possible to get more than one create (without having a delete in between) self.upload_queue.put(partial(a_file.update_contents, event.pathname)) # cur_box_folder.upload(event.pathname, event.name) else: self.files_from_box.remove(entry['id']) # just downloaded it break elif did_find_the_folder: # we are not going to re-create the folder, but we are also not checking if the contents in this # local creation are different from the contents in box. if entry['id'] in self.folders_from_box: self.folders_from_box.remove(entry['id']) # just downloaded it break if is_file and not did_find_the_file: crate_logger.debug('Upload the file: {}'.format(event.pathname)) last_modified_time = os.path.getctime(event.pathname) self.upload_queue.put([last_modified_time, partial(cur_box_folder.upload, event.pathname, event.name), self.oauth]) elif is_dir and not did_find_the_folder: crate_logger.debug('Upload the folder: {}'.format(event.pathname)) self.upload_queue.put(partial(cur_box_folder.create_subfolder, event.name)) wm.add_watch(event.pathname, rec=True, mask=mask) elif operation == 'modify': crate_logger.debug("{op}...: {pathname}".format(op=operation, pathname=event.pathname)) folders_to_traverse = self.folders_to_traverse(event.path) crate_logger.debug(folders_to_traverse) client = Client(self.oauth) cur_box_folder = None folder_id = '0' retry_limit = 5 cur_box_folder = get_box_folder(client, cur_box_folder, folder_id, retry_limit) # if we're modifying in root box dir, then we've already found the folder is_base = BOX_DIR in (event.path, event.path[:-1],) cur_box_folder = self.traverse_path(client, event, cur_box_folder, folders_to_traverse) last_dir = os.path.split(event.path)[-1] if not is_base: AssertionError(cur_box_folder['name'] == last_dir, cur_box_folder['name'] + 'not equals ' + last_dir) did_find_the_file = os.path.isdir(event.pathname) # true if we are a directory :) did_find_the_folder = os.path.isfile(event.pathname) # true if we are a regular file :) is_file = os.path.isfile(event.pathname) is_dir = os.path.isdir(event.pathname) num_entries = cur_box_folder['item_collection']['total_count'] limit = 100 for offset in range(0, num_entries, limit): for entry in cur_box_folder.get_items(offset=offset, limit=limit): did_find_the_file = is_file and entry['type'] == 'file' and entry['name'] == event.name did_find_the_folder = is_dir and entry['type'] == 'folder' and entry['name'] == event.name if did_find_the_file: last_modified_time = os.path.getmtime(event.pathname) if entry['id'] not in self.files_from_box: cur_file = client.file(file_id=entry['id']).get() can_update = True was_versioned = r_c.exists(redis_key(cur_file['id'])) try: info = redis_get(r_c, cur_file) if was_versioned else None info = info if was_versioned else {'fresh_download': True, 'etag': '0', 'time_stamp': 0} item_version = info if cur_file['etag'] == item_version['etag'] and \ ((item_version['fresh_download'] and item_version[ 'time_stamp'] >= last_modified_time) or (not item_version['fresh_download'] and item_version[ 'time_stamp'] >= last_modified_time)): can_update = False if can_update: self.upload_queue.put([last_modified_time, partial(cur_file.update_contents, event.pathname), self.oauth]) else: is_new_time_stamp = item_version['time_stamp'] >= last_modified_time crate_logger.debug('Skipping the update because not versioned: {not_versioned}, ' 'fresh_download: {fresh_download}, ' 'version time_stamp >= ' 'new time stamp: {new_time_stamp}, ' 'event pathname: {path_name}, ' 'cur file id: {obj_id}'.format(not_versioned=not was_versioned, fresh_download=item_version[ 'fresh_download'], new_time_stamp=is_new_time_stamp, path_name=event.pathname, obj_id=cur_file['id'])) except TypeError: crate_logger.debug(traceback.format_exc()) except Exception: crate_logger.debug(traceback.format_exc()) else: self.files_from_box.remove(entry['id']) # just wrote if, assuming create event didn't run break elif did_find_the_folder: if entry['id'] not in self.folders_from_box: crate_logger.debug('Cannot create a subfolder when it already exists: {}'.format(event.pathname)) # cur_folder = client.folder(folder_id=entry['id']).get() # upload_queue.put(partial(cur_folder.update_contents, event.pathname)) else: self.folders_from_box.remove(entry['id']) # just wrote if, assuming create event didn't run break if is_file and not did_find_the_file: crate_logger.debug('Uploading contents...: {}'.format(event.pathname)) last_modified_time = os.path.getmtime(event.pathname) self.upload_queue.put([last_modified_time, partial(cur_box_folder.upload, event.pathname, event.name), self.oauth]) if is_dir and not did_find_the_folder: crate_logger.debug('Creating a sub-folder...: {}'.format(event.pathname)) self.upload_queue.put(partial(cur_box_folder.create_subfolder, event.name)) wm.add_watch(event.pathname, rec=True, mask=mask) elif operation == 'real_close': crate_logger.debug("Real close...: {}".format(event.pathname)) folders_to_traverse = self.folders_to_traverse(event.path) crate_logger.debug(folders_to_traverse) client = Client(self.oauth) cur_box_folder = None cur_box_folder = get_box_folder(client, cur_box_folder, '0', 5) # if we're modifying in root box dir, then we've already found the folder is_base = BOX_DIR in (event.path, event.path[:-1],) cur_box_folder = self.traverse_path(client, event, cur_box_folder, folders_to_traverse) last_dir = os.path.split(event.path)[-1] if not is_base: AssertionError(cur_box_folder['name'] == last_dir, cur_box_folder['name'] + 'not equals ' + last_dir) did_find_the_file = os.path.isdir(event.pathname) # true if we are a directory :) did_find_the_folder = os.path.isfile(event.pathname) # true if we are a regular file :) is_file = os.path.isfile(event.pathname) is_dir = os.path.isdir(event.pathname) num_entries = cur_box_folder['item_collection']['total_count'] limit = 100 for offset in range(0, num_entries, limit): for entry in cur_box_folder.get_items(offset=offset, limit=limit): did_find_the_file = is_file and entry['type'] == 'file' and entry['name'] == event.name did_find_the_folder = is_dir and entry['type'] == 'folder' and entry['name'] == event.name if did_find_the_file: break # not a box file/folder (though could have been copied from a local box item) if is_file and not did_find_the_file: last_modified_time = os.path.getmtime(event.pathname) self.upload_queue.put([last_modified_time, partial(cur_box_folder.upload, event.pathname, event.name), self.oauth]) elif is_dir and not did_find_the_folder: cur_box_folder.create_subfolder(event.name) wm.add_watch(event.pathname, rec=True, mask=mask, auto_add=True)
def walk_and_notify_and_download_tree(path, box_folder, client, oauth_obj, p_id=None): """ Walk the path recursively and add watcher and create the path. :param path: :param box_folder: :param client: :param oauth_obj: :param p_id: :return: """ if os.path.isdir(path): wm.add_watch(path, mask, rec=True, auto_add=True) local_files = os.listdir(path) b_folder = client.folder(folder_id=box_folder['id']).get() num_entries_in_folder = b_folder['item_collection']['total_count'] limit = 100 for offset in range(0, num_entries_in_folder, limit): for box_item in b_folder.get_items(limit=limit, offset=offset): if box_item['name'] in local_files: local_files.remove(box_item['name']) for local_file in local_files: # prioritize the local_files not yet on box's server. cur_box_folder = b_folder local_path = os.path.join(path, local_file) if os.path.isfile(local_path): upload_queue.put([os.path.getmtime(local_path), partial(cur_box_folder.upload, local_path, local_file), oauth_obj]) ids_in_folder = [] for offset in range(0, num_entries_in_folder, limit): for box_item in b_folder.get_items(limit=limit, offset=offset): ids_in_folder.append(box_item['id']) if box_item['name'] in local_files: local_files.remove(box_item['name']) if box_item['type'] == 'folder': local_path = os.path.join(path, box_item['name']) fresh_download = False if not os.path.isdir(local_path): os.mkdir(local_path) fresh_download = True retry_limit = 15 for i in range(0, retry_limit): try: redis_set(cache_client=r_c, cloud_item=box_item, last_modified_time=os.path.getmtime(local_path), box_dir_path=BOX_DIR, fresh_download=fresh_download, folder=os.path.dirname(local_path)) walk_and_notify_and_download_tree(local_path, client.folder(folder_id=box_item['id']).get(), client, oauth_obj, p_id=box_folder['id']) break except BoxAPIException as e: crate_logger.debug(traceback.format_exc()) if e.status == 404: crate_logger.debug('Box says: {obj_id}, ' '{obj_name}, is a 404 status.'.format(obj_id=box_item['id'], obj_name=box_item[ 'name'])) crate_logger.debug( 'But, this is a folder, we do not handle recursive folder deletes correctly yet.') break except (ConnectionError, ConnectionResetError, BrokenPipeError): crate_logger.debug('Attempt {idx}/{limit}; {the_trace}'.format(the_trace=traceback.format_exc(), idx=i+1, limit=retry_limit)) else: try: file_obj = box_item download_queue.put((file_obj, os.path.join(path, box_item['name']), oauth_obj)) except BoxAPIException as e: crate_logger.debug(traceback.format_exc()) if e.status == 404: crate_logger.debug('Box says: {obj_id}, {obj_name}, ' 'is a 404 status.'.format(obj_id=box_item['id'], obj_name=box_item['name'])) if r_c.exists(redis_key(box_item['id'])): crate_logger.debug('Deleting {obj_id}, ' '{obj_name}'.format(obj_id=box_item['id'], obj_name=box_item['name'])) r_c.delete(redis_key(box_item['id'])) redis_set(cache_client=r_c, cloud_item=b_folder, last_modified_time=os.path.getmtime(path), box_dir_path=BOX_DIR, fresh_download=not r_c.exists(redis_key(box_folder['id'])), folder=os.path.dirname(path), sub_ids=ids_in_folder, parent_id=p_id)