def test_iterate_files(create_folder, create_file, managed_folder): # this test never hits the else: break in iterate files # I assume that what this means is that folder.get_items works. # so indirectly, the fact that else: break isn't covered means that get_items works folders = [managed_folder] folders.append(create_folder(parent_folder=managed_folder)) folders.append(create_folder(parent_folder=managed_folder)) folders.append(create_folder(parent_folder=folders[-1])) files = set() for folder in folders: for _ in range(5): files.add(create_file(parent_folder=folder)) results_files, results_shared = [], [] for item, shared in common.iterate_files(managed_folder): results_files.append(item) results_shared.append(shared) assert len(results_files) == len(files) assert set(results_files) == files # Test behavior when we are forced to page through a large number of files # in a single folder: for _ in range(common.GET_ITEMS_LIMIT * 2 + 1): files.add(create_file(parent_folder=managed_folder)) results_files, results_shared = [], [] for item, shared in common.iterate_files(managed_folder): results_files.append(item) results_shared.append(shared) assert len(results_files) == len(files) assert set(results_files) == files
def lambda_handler(event, context): ddb_table = common.get_ddb_table() box_client, _ = common.get_box_client() root_folder = common.get_folder(box_client, common.BOX_FOLDER_ID) root_shared = common.is_box_object_public(root_folder) LOGGER.info("Checking files in Box") shared_file_ids = set() shared_filepaths = set() count = 0 for file, shared in common.iterate_files(root_folder, shared=root_shared): count += 1 if (not common.is_box_object_public(file)) and shared: # this includes an API call file = common.create_shared_link(box_client, file, access="open", allow_download=True) elif (common.is_box_object_public(file)) and (not shared): file = common.remove_shared_link(box_client, file) if common.is_box_object_public(file): shared_file_ids.add(file.id) shared_filepaths.add(common.get_filepath(file)) common.put_file_item(ddb_table, file) else: common.delete_file_item(ddb_table, file) LOGGER.info("Processed %s files", count) LOGGER.info("Checking items in DynamoDB") count = 0 scan_response = ddb_table.scan() delete_keys = set() while True: for item in scan_response["Items"]: count += 1 if (item["box_file_id"] not in shared_file_ids) or (item["filepath"] not in shared_filepaths): delete_keys.add(item["filepath"]) # If the data returned by a scan would exceed 1MB, DynamoDB will begin paging. # The LastEvaluatedKey field is the placeholder used to request the next page. if scan_response.get("LastEvaluatedKey"): scan_response = ddb_table.scan( ExclusiveStartKey=scan_response["LastEvaluatedKey"]) else: # this clause isn't reached by testing atm break for key in delete_keys: ddb_table.delete_item(Key={"filepath": key}) LOGGER.info("Processed %s items", count)
def lambda_handler(event, context): ddb_table = common.get_ddb_table() box_client, _ = common.get_box_client() root_folder = box_client.folder(common.BOX_FOLDER_ID) LOGGER.info("Checking files in Box") shared_file_ids = set() count = 0 for file in common.iterate_files(root_folder): count += 1 if common.is_box_file_public(file): shared_file_ids.add(file.id) common.put_file_item(ddb_table, file) else: common.delete_file_item(ddb_table, file) LOGGER.info("Processed %s files", count) LOGGER.info("Checking items in DynamoDB") count = 0 scan_response = ddb_table.scan() delete_keys = set() while True: for item in scan_response["Items"]: count += 1 if item["box_file_id"] not in shared_file_ids: delete_keys.add(item["filename"]) # If the data returned by a scan would exceed 1MB, DynamoDB will begin paging. # The LastEvaluatedKey field is the placeholder used to request the next page. if scan_response.get("LastEvaluatedKey"): scan_response = ddb_table.scan( ExclusiveStartKey=scan_response["LastEvaluatedKey"] ) else: break for key in delete_keys: ddb_table.delete_item(Key={"filename": key}) LOGGER.info("Processed %s items", count)
def lambda_handler(event, context): LOGGER.info(json.dumps(event)) raw_body = event["body"] body = json.loads(raw_body) trigger = body["trigger"] webhook_id = body["webhook"]["id"] source = body["source"] # The event structure varies by trigger if "item" in source: box_id = source["item"]["id"] box_type = source["item"]["type"] elif "id" in source: box_id = source["id"] box_type = source["type"] else: raise RuntimeError("Missing id field") LOGGER.info("Received trigger %s on %s id %s", trigger, box_type, box_id) # only get a box client if we're actually going to need one if trigger not in common.HANDLED_TRIGGERS: LOGGER.info("%s is not supported by this endpoint", trigger) return STATUS_SUCCESS client, webhook_key = common.get_box_client() ddb = common.get_ddb_table() webhook = client.webhook(webhook_id) is_valid = webhook.validate_message(bytes(raw_body, "utf-8"), event["headers"], webhook_key) if not is_valid: LOGGER.critical("Received invalid webhook request") return STATUS_SUCCESS if trigger in common.HANDLED_FILE_TRIGGERS: file = common.get_file(client, box_id) if not file: LOGGER.warning("File %s is missing (trashed or deleted)", box_id) common.delete_file_item(ddb, file) return STATUS_SUCCESS if common.is_box_file_public(file): common.put_file_item(ddb, file) else: common.delete_file_item(ddb, file) elif trigger in common.HANDLED_FOLDER_TRIGGERS: folder = common.get_folder(client, box_id) if not folder: LOGGER.warning("Folder %s is missing (trashed or deleted)", box_id) # NOTE(eslavich): The Box API doesn't appear to give us a way to # list the contents of a trashed folder, so we're just going to have # to let the sync lambda clean up the relevant DynamoDB rows. return STATUS_SUCCESS for file in common.iterate_files(folder): if common.is_box_file_public(file): common.put_file_item(ddb, file) else: common.delete_file_item(ddb, file) return STATUS_SUCCESS
def lambda_handler(event, context): LOGGER.info(json.dumps(event)) raw_body = event["body"] body = json.loads(raw_body) trigger = body["trigger"] webhook_id = body["webhook"]["id"] source = body["source"] # The event structure varies by trigger if "item" in source: box_id = source["item"]["id"] box_type = source["item"]["type"] elif "id" in source: # not covered by tests box_id = source["id"] box_type = source["type"] else: # not covered by tests raise RuntimeError("Missing id field") LOGGER.info("Received trigger %s on %s id %s", trigger, box_type, box_id) # only get a box client if we're actually going to need one if trigger not in common.HANDLED_TRIGGERS: LOGGER.info("%s is not supported by this endpoint", trigger) return STATUS_SUCCESS client, webhook_key = common.get_box_client() ddb = common.get_ddb_table() webhook = client.webhook(webhook_id) is_valid = webhook.validate_message(bytes(raw_body, "utf-8"), event["headers"], webhook_key) if not is_valid: LOGGER.critical("Received invalid webhook request") return STATUS_SUCCESS if (trigger in common.HANDLED_FILE_TRIGGERS) and (box_type == "file"): file = common.get_file(client, box_id) if not file: LOGGER.warning("File %s is missing (trashed or deleted)", box_id) # We don't know what the file's path was, so we'll just have to # let the sync lambda clean up DynamoDB. return STATUS_SUCCESS # if the file isn't public but any parent directory is, make a shared link parent_public = common.is_any_parent_public(client, file) if (not common.is_box_object_public(file)) and parent_public: # this includes an api call file = common.create_shared_link(client, file, access="open", allow_download=True) # if the file is public but no parent directory is, delete the shared link if (common.is_box_object_public(file)) and (not parent_public): file = common.remove_shared_link(client, file) if common.is_box_object_public(file): common.put_file_item(ddb, file) else: common.delete_file_item(ddb, file) elif (trigger in common.HANDLED_FOLDER_TRIGGERS) and (box_type == "folder"): folder = common.get_folder(client, box_id) if not folder: LOGGER.warning("Folder %s is missing (trashed or deleted)", box_id) # The Box API doesn't appear to give us a way to list the contents of # a trashed folder, so we're just going to have to let the sync lambda # clean up the relevant DynamoDB rows. return STATUS_SUCCESS folder_shared = common.is_box_object_public(folder) for file, shared in common.iterate_files(folder, shared=folder_shared): # if the file isn't public but any parent directory is if (not common.is_box_object_public(file)) and shared: # this includes an api call file = common.create_shared_link(client, file, access="open", allow_download=True) elif (common.is_box_object_public(file)) and (not shared): file = common.remove_shared_link(client, file) if common.is_box_object_public(file): common.put_file_item(ddb, file) else: common.delete_file_item(ddb, file) return STATUS_SUCCESS