def nothing_extract(event): """ Function :param event (dict) -- contains auth header and list of HTTP links to extractable files: :return metadata (dict) -- metadata as gotten from the materials_io library: """ import time import os import sys from xtract_sdk.packagers.family import Family from xtract_sdk.packagers.family_batch import FamilyBatch t0 = time.time() sys.path.insert(1, '/') # A list of file paths all_families = event['family_batch'] if type(all_families) == dict: family_batch = FamilyBatch() for family in all_families["families"]: fam = Family() fam.from_dict(family) family_batch.add_family(fam) all_families = family_batch for family in all_families.families: family_id = family.family_id fam_files = family.files headers = family.headers for file_obj in fam_files: # new_path = os.path.join(family_id, local_filename) for i in range(10): with open(file_obj['path'], 'r') as f: f.close() t1 = time.time() return {"family_batch": all_families, "container_version": os.environ["container_version"], "transfer_time": 0, "import_time": 0, "family_fetch_time": 0, "file_unpack_time": 0, "full_extract_loop_time": 0, "total_time": t1 - t0 }
def create_mock_event(files, parser=None): mock_event = dict() fam_batch = FamilyBatch() test_fam_1 = Family() group_file_objs = [] for file in files: base_path = file group_file_objs.append({'path': base_path, 'metadata': dict()}) test_fam_1.download_type = "LOCAL" test_fam_1.add_group(files=group_file_objs, parser=parser) fam_batch.add_family(test_fam_1) mock_event['family_batch'] = fam_batch return mock_event
def create_many_family_mock_event(files, parser=None): # TODO: this will break for matio mock_event = dict() fam_batch = FamilyBatch() family_id = None for file in files: if type(file) is dict: family_id = str(file['family_id']) file = file['filename'] test_fam_1 = Family() group_file_objs = [] base_path = file group_file_objs.append({'path': base_path, 'metadata': dict()}) test_fam_1.download_type = "LOCAL" test_fam_1.add_group(files=group_file_objs, parser=parser) if family_id is not None: test_fam_1.family_id = family_id fam_batch.add_family(test_fam_1) mock_event['family_batch'] = fam_batch return mock_event
from extractors.xtract_imagesort import ImagesExtractor from xtract_sdk.packagers.family import Family from xtract_sdk.packagers.family_batch import FamilyBatch from extractors.utils.base_extractor import base_extractor # ****** JOAO LOCAL TESTS ****** ep_name = "test_images_ep" xtract_dir = "/Users/joaovictor/.xtract" # Note: this is the following to my local version of the git repo 'xtract-images' sys_path_add = "/Users/joaovictor/xtract/xtract-images" module_path = "xtract_images_main" # The file containing 'execute_extractor' recursion_depth = 5000 metadata_write_path = "/Users/joaovictor/Desktop/test_metadata" # HERE WE PACK LOCAL FAMILIES INTO SAME STRUCTURES AS USED BY XTRACT. test_fam_1 = Family() test_fam_2 = Family() base_path = "/Users/joaovictor/xtract/xtract-images/training_data/" test_fam_1.download_type = "LOCAL" test_fam_1.add_group(files=[{'path': os.path.join(base_path, 'graphics/social.png'), 'metadata': dict()}], parser=None) test_fam_1.add_group(files=[{'path': os.path.join(base_path, 'maps/Fig1.jpg.png'), 'metadata': dict()}], parser=None) test_fam_1.add_group(files=[{'path': os.path.join(base_path, 'maps/311.jpg'), 'metadata': dict()}], parser=None) print(test_fam_1) print(f"[DEBUG] JSON form of our family object: {test_fam_1.to_dict()}") fam_batch = FamilyBatch() fam_batch.add_family(test_fam_1) # ****** TEST AND RUN THE EXTRACTOR ON YOUR FAMILYBATCH() *******
import os from extractors.xtract_keyword import KeywordExtractor from xtract_sdk.packagers.family import Family from xtract_sdk.packagers.family_batch import FamilyBatch from extractors.utils.base_extractor import base_extractor # ****** JOAO LOCAL TESTS ****** ep_name = "test_keyword_ep" xtract_dir = "/Users/joaovictor/.xtract" # Note: this is the following to my local version of the git repo 'xtract-keyword' sys_path_add = "/Users/joaovictor/xtract/xtract-keyword" module_path = "xtract_keyword_main" # The file containing 'execute_extractor' metadata_write_path = "/Users/joaovictor/Desktop/test_metadata" # HERE WE PACK LOCAL FAMILIES INTO SAME STRUCTURES AS USED BY XTRACT. test_fam_1 = Family() test_fam_2 = Family() base_path = "/Users/joaovictor/xtract/xtract-keyword/tests/test_files" test_fam_1.add_group(files=[{'path': os.path.join(base_path, 'freetext2'), 'metadata': dict()}], parser=None) test_fam_1.download_type = "LOCAL" # print(f"[DEBUG] JSON form of our family object: {test_fam_1.to_dict()}") fam_batch = FamilyBatch() fam_batch.add_family(test_fam_1) # # ****** TEST AND RUN THE EXTRACTOR ON YOUR FAMILYBATCH() ******* extractor = KeywordExtractor() event = extractor.create_event(family_batch=fam_batch, ep_name=ep_name, xtract_dir=xtract_dir,
def orch_thread(self, headers): to_terminate = False print(f"ENDPOINTS TO CHECK: {self.fx_eps_to_check}") all_extractors = get_all_extractors(self.fx_eps_to_check) print(f"Fetched all extractors... {all_extractors}") fxc = get_fx_client(headers) self.cur_status = "EXTRACTING" while True: # If our accounting is complete # NOTE: when concurrent, will also need to check if scheduling is DONE. if self.counters['fx']['success'] + \ self.counters['fx']['failed'] + \ self.counters['flagged_unknown'] == self.counters['cumu_scheduled'] \ and self.cur_status == 'SCHEDULED': to_terminate = True if to_terminate: print("[ORCH] Terminating!") print(f"Final counters: {self.counters}") self.cur_status = 'COMPLETED' # TODO: Need to push this status to DB. break print(f"[ORCH] WQ length: {self.to_xtract_q.qsize()}") if self.to_xtract_q.empty() and self.funcx_current_tasks.empty(): print(f"[ORCH] Empty work thread. Sleeping...") time.sleep(5) else: batch = fxc.create_batch() batch_len = 0 while not self.to_xtract_q.empty( ): # TODO: also need max batch size here. family = self.to_xtract_q.get() self.counters['cumu_orch_enter'] += 1 extractor_id = family['first_extractor'] if extractor_id in extractor_map: extractor = extractor_map[extractor_id] else: self.counters['flagged_unknown'] += 1 continue # We should not need to repack and add an empty base_url fam_batch = FamilyBatch() packed_family = Family() family['base_url'] = None packed_family.from_dict(family) fam_batch.add_family(packed_family) # TODO: hardcodes galore. event = extractor.create_event( family_batch=fam_batch, ep_name='default', xtract_dir="/home/tskluzac/.xtract", sys_path_add="/", module_path=f"xtract_{extractor_id}_main", metadata_write_path='/home/tskluzac/mdata') fx_ep_id = self.fx_eps_to_check[ 0] # TODO: Should not be fixed to first fx_ep. print(f"Endpoint ID: {fx_ep_id}") batch.add( event, endpoint_id=fx_ep_id, function_id=all_extractors[f"xtract-{extractor_id}"] [fx_ep_id]) batch_len += 1 # Only want to send tasks if we retrieved tasks. if batch_len > 0: batch_res = fxc.batch_run(batch) time.sleep(1.1) for item in batch_res: self.funcx_current_tasks.put(item) poll_batch = [] # print("Entering task loop") for i in range(0, 20): # TODO: hardcode if not self.funcx_current_tasks.empty(): tid = self.funcx_current_tasks.get() poll_batch.append(tid) # print(f"Current length of poll_batch: {len(poll_batch)}") if len(poll_batch) > 0: x = fxc.get_batch_result(poll_batch) time.sleep(1.1) # print(f"Poll result: {x}") for item in x: result = x[item] if result['status'] == 'success': self.counters['fx']['success'] += 1 elif result['status'] == 'failed': result['exception'].reraise() self.counters['fx']['failures'] += 1 elif result['pending']: self.funcx_current_tasks.put(item) else: # If we haven't figured it out until here, we need some dev... raise ValueError( "[ORCH] CRITICAL Unrecognized funcX status...") print(self.counters)
flow = InstalledAppFlow.from_client_secrets_file( '../xtract_sdk/downloaders/credentials.json', SCOPES) creds = flow.run_local_server(port=0) # Save the credentials for the next run with open('token.pickle', 'wb') as token: pickle.dump(creds, token) return creds gdr = GoogleDriveDownloader(auth_creds=do_login_flow()) file_1 = "1RbSdH_nI0EHvxFswpl1Qss7CyWXBHo-o" # JPG image! file_2 = "1ecjFs55sNxBiwoAtztHcoA450Gh7ak0m9VqK0Wrm1Ms" # free text document fam_1 = Family() # TODO: Put the Google Drive arguments into a "gdrive_cfg" sub-dicitonary. fam_1.add_group(files=[{ 'path': file_1, 'is_gdoc': False, 'metadata': {}, 'mimeType': 'image/jpg' }], parser='image') fam_2 = Family() fam_2.add_group(files=[{ 'path': file_2, 'is_gdoc': True, 'metadata': {}, 'mimeType': 'text/plain'
def preproc_fam_batches(self): fam_count = 0 # Just create an empty one out here so Python doesn't yell at me. fam_batch = FamilyBatch() num_overloads = 0 # while we have files and haven't exceeded the weak scaling threshold (file_cutoff) while not self.family_queue.empty() and fam_count < file_cutoff: fam_batch = FamilyBatch() total_fam_batch_size = 0 # Keep making batch until while len(fam_batch.families ) < map_size and not self.family_queue.empty( ) and fam_count < file_cutoff: fam_count += 1 fam = self.family_queue.get() total_family_size = 0 # First convert to the correct paths for file_obj in fam['files']: old_path = file_obj['path'] new_path = self.path_converter(fam['family_id'], old_path) file_obj['path'] = new_path file_size = file_obj['metadata']['physical']['size'] total_family_size += file_size for group in fam['groups']: for file_obj in group['files']: old_path = file_obj['path'] new_path = self.path_converter(fam['family_id'], old_path) file_obj['path'] = new_path empty_fam = Family() empty_fam.from_dict(fam) # We will ONLY handle the SIZE issue in here. if soft_batch_bytes_max > 0: # So if this last file would put us over the top, if total_fam_batch_size + total_family_size > soft_batch_bytes_max: num_overloads += 1 print(f"Num overloads {num_overloads}") # then we append the old batch (if not empty), if len(fam_batch.families) > 0: self.fam_batches.append(fam_batch) # empty the old one fam_batch = FamilyBatch() total_fam_batch_size = total_family_size assert (len(fam_batch.families) == 0) # and then continue (here we either add to our prior fam_batch OR the new one). fam_batch.add_family(empty_fam) assert len(fam_batch.families) <= map_size self.fam_batches.append(fam_batch) # img_extractor = NothingExtractor() img_extractor = MatioExtractor() # TODO: ADDING TEST. Making sure we have all of our files here. ta = time.time() num_families = 0 for item in self.fam_batches: num_families += len(item.families) print(num_families) tb = time.time() print(f"Time to move families: {tb-ta}") time.sleep(5) # exit() # exit() # This check makes sure our batches are the correct size to avoid the January 2021 disaster of having vastly # incorrect numbers of batches. # # Here we are checking that the number of families we are processing is LESS than the total number of # batches times the batch size (e.g., the last batch can be full or empty), and the number of families # is GREATER than the case where our last map is missing. # # # This leaves a very small window for error. Could use modulus to be more exact. # TODO: Bring this back (but use for grouping by num. files) # try: # assert len(self.fam_batches) * (map_size-1) <= fam_count <= len(self.fam_batches) * map_size # except AssertionError as e: # print(f"Caught {e} after creating client batches...") # print(f"Number of batches: {len(self.fam_batches)}") # print(f"Family Count: {fam_count}") # # print("Cannot continue. Exiting...") # exit() print(f"Container type: {container_type}") print(f"Location: {location}") self.fn_uuid = img_extractor.register_function( container_type=container_type, location=location, ep_id=ep_id, group="a31d8dce-5d0a-11ea-afea-0a53601d30b5") # funcX batching. Here we take the 'user' FamilyBatch objects and put them into a batch we send to funcX. num_fx_batches = 0 current_batch = [] print(f"Number of family batches: {len(self.fam_batches)}") for fam_batch in self.fam_batches: # print(len(current_batch)) # print(batch_size) if len(current_batch) < batch_size: current_batch.append(fam_batch) else: # print("Marking batch!") # print(len(current_batch)) self.funcx_batches.put(current_batch) current_batch = [fam_batch] num_fx_batches += 1 # Grab the stragglers. if len(current_batch) > 0: print("Marking batch!") self.funcx_batches.put(current_batch) num_fx_batches += 1 # See same description as above (map example) for explanation. try: theor_full_batches = math.ceil(len(self.fam_batches) / batch_size) # print(f"Theoretical full batches: {}") assert theor_full_batches == num_fx_batches except AssertionError as e: print(f"Caught {e} after creating funcX batches...") print(f"Number of batches: {self.funcx_batches.qsize()}") print(f"Family Count: {num_fx_batches}") print("Cannot continue. Exiting...") exit()
import os # from extractors.xtract_python import PythonExtractor from xtract_sdk.packagers.family import Family from xtract_sdk.packagers.family_batch import FamilyBatch from extractors.utils.base_extractor import base_extractor # ****** JOAO LOCAL TESTS ****** ep_name = "test_python_ep" xtract_dir = "/Users/joaovictor/.xtract" # Note: this is the following to my local version of the git repo 'xtract-keyword' sys_path_add = "/Users/joaovictor/xtract/xtract-python" module_path = "xtract_python_main" # The file containing 'execute_extractor' metadata_write_path = "/Users/joaovictor/Desktop/test_metadata" # HERE WE PACK LOCAL FAMILIES INTO SAME STRUCTURES AS USED BY XTRACT. test_fam_1 = Family() test_fam_2 = Family() base_path = "/Users/joaovictor/xtract/xtract-python" test_fam_1.add_group(files=[{ 'path': os.path.join(base_path, 'tests/test_files/multi_line.py'), 'metadata': dict() }], parser=None) test_fam_1.download_type = "LOCAL" print(f"[DEBUG] JSON form of our family object: {test_fam_1.to_dict()}") fam_batch = FamilyBatch() fam_batch.add_family(test_fam_1)
import os from xtract_sdk.xtract import XtractAgent from xtract_sdk.packagers.family import Family from xtract_sdk.packagers.family_batch import FamilyBatch xag = XtractAgent(ep_name='tyler_test', xtract_dir='/Users/tylerskluzacek/.xtract') fam_to_process = Family(download_type='LOCAL', base_url="") base_path = '/Users/tylerskluzacek/data_folder/413cafa0-9b43-4ffb-9c54-4834dd265a46' fam_to_process.add_group(files=[{ 'path': os.path.join(base_path, 'INCAR'), 'metadata': {} }, { 'path': os.path.join(base_path, 'OUTCAR'), 'metadata': {} }, { 'path': os.path.join(base_path, 'POSCAR'), 'metadata': {} }], parser='dft') fam_to_process = fam_to_process.to_dict() xag.load_family(fam_to_process) xag.fetch_all_files() for item in xag.ready_families: print(item)
import os from extractors.xtract_netcdf import NetCDFExtractor from xtract_sdk.packagers.family import Family from xtract_sdk.packagers.family_batch import FamilyBatch from extractors.utils.base_extractor import base_extractor # ****** JOAO LOCAL TESTS ****** ep_name = "test_netcdf_ep" xtract_dir = "/Users/joaovictor/.xtract" # Note: this is the following to my local version of the git repo 'xtract-netcdf' sys_path_add = "/Users/joaovictor/xtract/xtract-netcdf" module_path = "xtract_netcdf_main" # The file containing 'execute_extractor' metadata_write_path = "/Users/joaovictor/Desktop/test_metadata" # HERE WE PACK LOCAL FAMILIES INTO SAME STRUCTURES AS USED BY XTRACT. test_fam_1 = Family() test_fam_2 = Family() base_path = "/Users/joaovictor/xtract/xtract-netcdf/tests/test_files/" test_fam_1.add_group(files=[{'path': os.path.join(base_path, 'sresa1b_ncar_ccsm3-example.nc'), 'metadata': dict()}], parser=None) test_fam_1.download_type = "LOCAL" print(f"[DEBUG] JSON form of our family object: {test_fam_1.to_dict()}") fam_batch = FamilyBatch() fam_batch.add_family(test_fam_1) # # ****** TEST AND RUN THE EXTRACTOR ON YOUR FAMILYBATCH() ******* extractor = NetCDFExtractor() event = extractor.create_event(family_batch=fam_batch, ep_name=ep_name, xtract_dir=xtract_dir,
from funcx import FuncXClient from extractors.xtract_tabular import tabular_extract import time from xtract_sdk.packagers.family import Family from xtract_sdk.packagers.family_batch import FamilyBatch file_id = "1XCS2Xqu35TiQgCpI8J8uu4Mss9FNnp1-AuHo-pMujb4" file_id2 = "0B5nDSpS9a_3kUFdiTXRFdS12QUk" family_1 = Family() family_2 = Family() family_1.add_group(files=[{ 'path': file_id, 'is_gdoc': True, 'mimeType': "text/csv" }], parser='xtract-tabular') family_1.base_url = "" family_2.add_group(files=[{ 'path': file_id2, 'is_gdoc': False }], parser='xtract-tabular') family_2.download_type = "GDRIVE" fam_batch = FamilyBatch() fam_batch.add_family(family_1) fam_batch.add_family(family_2)
def preproc_fam_batches(self): total_tasks = 0 print("PREPROCESSING!") while not self.image_path_list.empty(): fam_batch = FamilyBatch() # print(len(fam_batch.families)) while len(fam_batch.families) < map_size: if self.image_path_list.empty(): break path = self.image_path_list.get() print(path) family = dict() family['family_id'] = None # TODO: CHANGE THIS FOR THETA. if system == 'midway2': family['files'] = [{ 'path': f'/project2/chard/skluzacek/train2014/{path}' }] elif system == 'theta': family['files'] = [{ 'path': f'/projects/CSC249ADCD01/skluzacek/train2014/{path}' }] family['metadata'] = dict() family['headers'] = None family['download_type'] = None family['groups'] = [] empty_fam = Family() empty_fam.from_dict(family) print("ADDING FAMILY TO FAM BATCH") fam_batch.add_family(empty_fam) #if total_tasks > max_tasks: self.fam_batches.append(fam_batch) img_extractor = ImageExtractor() print(f"REGISTERING FUNCTION") self.fn_uuid = img_extractor.register_function( container_type=container_type, location=location, ep_id=ep_id, group="a31d8dce-5d0a-11ea-afea-0a53601d30b5") current_batch = [] for fam_batch in self.fam_batches: if len(current_batch) < batch_size: current_batch.append(fam_batch) else: print(f"Length of current batch: {len(current_batch)}") self.funcx_batches.put(current_batch) current_batch = [fam_batch] # Grab the stragglers. if len(current_batch) > 0: self.funcx_batches.put(current_batch) print("Let me see") batch_counter = 0
from funcx import FuncXClient from extractors.xtract_images import images_extract import time from xtract_sdk.packagers.family import Family from xtract_sdk.packagers.family_batch import FamilyBatch fam_1 = Family() fam_batch = FamilyBatch() fam_batch.add_family(fam_1) fam_1.add_group(files=[{"path": '/home/skluzacek/i_spy.jpeg', "is_gdoc": False, "mimeType": "image/jpg", "metadata": {}}], parser='image') def test(event): import os return os.environ['container_version'] def main(fxc, ep_id): container_uuid = fxc.register_container('xtract-images.img', 'singularity') print("Container UUID: {}".format(container_uuid)) fn_uuid = fxc.register_function(images_extract, #ep_id, # TODO: We do not need ep id here container_uuid=container_uuid, description="New sum function defined without string spec") print("FN_UUID : ", fn_uuid) res = fxc.run({'family_batch': fam_batch, 'creds': None, 'download_file': False}, endpoint_id=ep_id, function_id=fn_uuid)
def group(self, file_ls: List[str]): crawl_tallies = { "text": 0, "tabular": 0, "images": 0, "presentation": 0, "other": 0, "hierarch": 0, "compressed": 0 } """Given list of metadata dicts, output updated list of extractors NOTE FOR THIS GROUPER :: 1 file = 1 family = 1 group = 1 file """ if not self.by_file: raise ValueError( "Unable to process groups of more than 1 file by extension!") families = [] mappings = self.get_mappings() for fdict in file_ls: groups = [] valid_mapping = False mimeType = None for mapping in mappings: if fdict['extension'].lower() in mappings[mapping]: # TODO: this will eventually need to be a list of extractors. fdict['extractor'] = mapping # mapping = extractor_name! valid_mapping = True mimeType = fdict["mimeType"] crawl_tallies[mapping] += 1 if not valid_mapping: mimeType = fdict["mimeType"] if 'vnd.google-apps.document' in mimeType: fdict['extractor'] = "text" mimeType = "text/plain" crawl_tallies["text"] += 1 elif 'vnd.google-apps.spreadsheet' in mimeType: fdict['extractor'] = "tabular" mimeType = "text/csv" crawl_tallies['tabular'] += 1 elif 'vnd.google-apps.presentation' in mimeType: # fdict['extractor'] = "text" # TODO: this should come back soon. fdict['extractor'] = None mimeType = None crawl_tallies['presentation'] += 1 # TODO from Will: " slides: text, tabular, images, BERT... order is not important" else: # Now we default to None fdict['extractor'] = None mimeType = None crawl_tallies['other'] += 1 groups.append(fdict) family = Family() family.add_group(files=[{ "path": fdict["id"], "metadata": fdict, "mimeType": mimeType }], parser=fdict["extractor"]) families.append(family.to_dict()) return families
from extractors.xtract_xpcs import XPCSExtractor from xtract_sdk.packagers.family import Family from xtract_sdk.packagers.family_batch import FamilyBatch from extractors.utils.base_extractor import base_extractor # # TYLER LOCAL TESTS ep_name = "test_tabular_ep" xtract_dir = "/Users/tylerskluzacek/.xtract" # Note: this is the following to my local version of the git repo 'xtract-tabular' sys_path_add = "/Users/tylerskluzacek/PycharmProjects/xtract-xpcs" module_path = "gather_xpcs_metadata" # The file containing 'execute_extractor' recursion_depth = 5000 metadata_write_path = "/Users/tylerskluzacek/Desktop/test_metadata" # HERE WE PACK LOCAL FAMILIES INTO SAME STRUCTURES AS USED BY XTRACT. test_fam_1 = Family() test_fam_2 = Family() base_path = '/Users/tylerskluzacek/A001_00004_Vol20_att1_Rq0_0001-100000.hdf' test_fam_1.add_group(files=[{ 'path': base_path, 'metadata': dict() }], parser=None) test_fam_1.download_type = "LOCAL" print(f"[DEBUG] JSON form of our family object: {test_fam_1.to_dict()}") fam_batch = FamilyBatch() fam_batch.add_family(test_fam_1) extractor = XPCSExtractor()
recursion_depth=5000, metadata_write_path="/Users/tylerskluzacek/Desktop/test_metadata") # Execute the extractor on our family_batch. xtra.execute_extractions(family_batch=event['family_batch'], input_type=str) # All metadata are held in XtractAgent's memory. Flush to disk! xtra.flush_metadata_to_files(writer='json') return xtra.get_completion_stats() mock_event = dict() test_fam_1 = Family() test_fam_2 = Family() base_path = "/Users/tylerskluzacek/xtract-sdk/tests/xtract-tabular/tests/test_files" test_fam_1.add_group(files=[{ 'path': os.path.join(base_path, 'comma_delim'), 'metadata': dict() }], parser=None) test_fam_1.download_type = "LOCAL" print(test_fam_1.to_dict()) fam_batch = FamilyBatch() fam_batch.add_family(test_fam_1) mock_event['family_batch'] = fam_batch
from xtract_sdk.packagers.family import Family from xtract_sdk.packagers.family_batch import FamilyBatch from xtract_sdk.downloaders.google_drive import GoogleDriveDownloader import time import pickle, os # TODO: extract from a list of families fam = Family(str(0), headers={'potato': 'tomato'}, metadata=None) fam2 = Family(str(1), headers={'potato': 'tomato'}, metadata=None) group_id = fam.add_group(files=[{ 'path': 'a', 'metadata': {} }, { 'path': 'b', 'metadata': {} }, { 'path': 'c', 'metadata': {} }], parser="camel") group_id2 = fam.add_group(files=[{ 'path': 'c', 'metadata': {} }, { 'path': 'd', 'metadata': {} }, { 'path': 'e', 'metadata': {} }],