def interpret_file_genpath(target_cache, bundle_uuid, genpath, post): """ |cache| is a mapping from target (bundle_uuid, subpath) to the info map, which is to be read/written to avoid reading/parsing the same file many times. |genpath| specifies the subpath and various fields (e.g., for /stats:train/errorRate, subpath = 'stats', key = 'train/errorRate'). |post| function to apply to the resulting value. Return the string value. """ MAX_LINES = 10000 # Maximum number of lines we need to read from a file. # Load the file if not is_file_genpath(genpath): raise UsageError('Not file genpath: %s' % genpath) genpath = genpath[1:] if ':' in genpath: # Looking for a particular key in the file subpath, key = genpath.split(':') else: subpath, key = genpath, None target = (bundle_uuid, subpath) if target not in target_cache: info = None try: target_info = rest_util.get_target_info(target, 0) if target_info['type'] == 'file': contents = head_target(target, MAX_LINES) if len(contents) == 0: info = '' elif all('\t' in x for x in contents): # Tab-separated file (key\tvalue\nkey\tvalue...) info = {} for x in contents: kv = x.strip().split("\t", 1) if len(kv) == 2: info[kv[0]] = kv[1] else: try: # JSON file info = json.loads(''.join(contents)) except (TypeError, ValueError): try: # YAML file # Use safe_load because yaml.load() could execute # arbitrary Python code info = yaml.safe_load(''.join(contents)) except yaml.YAMLError: # Plain text file info = ''.join(contents) except NotFoundError: pass # Try to interpret the structure of the file by looking inside it. target_cache[target] = info # Traverse the info object. info = target_cache.get(target, None) if key is not None and info is not None: for k in key.split('/'): if isinstance(info, dict): info = info.get(k, None) elif isinstance(info, list): try: info = info[int(k)] except (KeyError, ValueError): info = None else: info = None if info is None: break return apply_func(post, info)
def resolve_interpreted_blocks(interpreted_blocks): """ Called by the web interface. Takes a list of interpreted worksheet items (returned by worksheet_util.interpret_items) and fetches the appropriate information, replacing the 'interpreted' field in each item. The result can be serialized via JSON. """ def set_error_data(block_index, message): interpreted_blocks[block_index] = ( MarkupBlockSchema().load({'id': block_index, 'text': 'ERROR: ' + message}).data ) for block_index, block in enumerate(interpreted_blocks): if block is None: continue mode = block['mode'] try: # Replace data with a resolved version. if mode == BlockModes.markup_block: # no need to do anything pass elif mode == BlockModes.record_block or mode == BlockModes.table_block: # header_name_posts is a list of (name, post-processing) pairs. contents = block['rows'] # Request information contents = interpret_genpath_table_contents(contents) block['rows'] = contents elif mode == BlockModes.contents_block or mode == BlockModes.image_block: try: target_info = rest_util.get_target_info( (block['bundles_spec']['bundle_infos'][0]['uuid'], block['target_genpath']), 0, ) if target_info['type'] == 'directory' and mode == BlockModes.contents_block: block['status']['code'] = FetchStatusCodes.ready block['lines'] = ['<directory>'] elif target_info['type'] == 'file': block['status']['code'] = FetchStatusCodes.ready if mode == BlockModes.contents_block: block['lines'] = head_target( ( block['bundles_spec']['bundle_infos'][0]['uuid'], block['target_genpath'], ), block['max_lines'], replace_non_unicode=True, ) elif mode == BlockModes.image_block: block['status']['code'] = FetchStatusCodes.ready block['image_data'] = base64.b64encode( cat_target( ( block['bundles_spec']['bundle_infos'][0]['uuid'], block['target_genpath'], ) ) ) else: block['status']['code'] = FetchStatusCodes.not_found if mode == BlockModes.contents_block: block['lines'] = None elif mode == BlockModes.image_block: block['image_data'] = None except NotFoundError as e: block['status']['code'] = FetchStatusCodes.not_found if mode == BlockModes.contents_block: block['lines'] = None elif mode == BlockModes.image_block: block['image_data'] = None elif mode == BlockModes.graph_block: # data = list of {'target': ...} # Add a 'points' field that contains the contents of the target. for info in block['trajectories']: target = (info['bundle_uuid'], info['target_genpath']) try: target_info = rest_util.get_target_info(target, 0) except NotFoundError as e: continue if target_info['type'] == 'file': contents = head_target(target, block['max_lines'], replace_non_unicode=True) # Assume TSV file without header for now, just return each line as a row info['points'] = points = [] for line in contents: row = line.split('\t') points.append(row) elif mode == BlockModes.subworksheets_block: # do nothing pass else: raise UsageError('Invalid display mode: %s' % mode) except UsageError as e: set_error_data(block_index, e.message) except StandardError: import traceback traceback.print_exc() set_error_data(block_index, "Unexpected error interpreting item") block['is_refined'] = True return interpreted_blocks
def interpret_file_genpath(target_cache, bundle_uuid, genpath, post): """ |cache| is a mapping from target (bundle_uuid, subpath) to the info map, which is to be read/written to avoid reading/parsing the same file many times. |genpath| specifies the subpath and various fields (e.g., for /stats:train/errorRate, subpath = 'stats', key = 'train/errorRate'). |post| function to apply to the resulting value. Return the string value. """ MAX_LINES = 10000 # Maximum number of lines we need to read from a file. # Load the file if not is_file_genpath(genpath): raise UsageError('Not file genpath: %s' % genpath) genpath = genpath[1:] if ':' in genpath: # Looking for a particular key in the file subpath, key = genpath.split(':') else: subpath, key = genpath, None target = (bundle_uuid, subpath) if target not in target_cache: target_info = rest_util.get_target_info(target, 0) # Try to interpret the structure of the file by looking inside it. if target_info is not None and target_info['type'] == 'file': contents = head_target(target, MAX_LINES) if len(contents) == 0: info = '' elif all('\t' in x for x in contents): # Tab-separated file (key\tvalue\nkey\tvalue...) info = {} for x in contents: kv = x.strip().split("\t", 1) if len(kv) == 2: info[kv[0]] = kv[1] else: try: # JSON file info = json.loads(''.join(contents)) except (TypeError, ValueError): try: # YAML file # Use safe_load because yaml.load() could execute # arbitrary Python code info = yaml.safe_load(''.join(contents)) except yaml.YAMLError: # Plain text file info = ''.join(contents) else: info = None target_cache[target] = info # Traverse the info object. info = target_cache.get(target, None) if key is not None and info is not None: for k in key.split('/'): if isinstance(info, dict): info = info.get(k, None) elif isinstance(info, list): try: info = info[int(k)] except (KeyError, ValueError): info = None else: info = None if info is None: break return apply_func(post, info)
def resolve_interpreted_items(interpreted_items): """ Called by the web interface. Takes a list of interpreted worksheet items (returned by worksheet_util.interpret_items) and fetches the appropriate information, replacing the 'interpreted' field in each item. The result can be serialized via JSON. """ def error_data(mode, message): if mode == 'record' or mode == 'table': return (('ERROR', ), [{'ERROR': message}]) else: return [message] for item in interpreted_items: if item is None: continue mode = item['mode'] data = item['interpreted'] properties = item['properties'] try: # Replace data with a resolved version. if mode == 'markup': # no need to do anything pass elif mode == 'record' or mode == 'table': # header_name_posts is a list of (name, post-processing) pairs. header, contents = data # Request information contents = interpret_genpath_table_contents(contents) data = (header, contents) elif mode == 'contents': try: max_lines = int( properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES)) except ValueError: raise UsageError("maxlines must be integer") target_info = rest_util.get_target_info(data, 0) if target_info is not None and target_info[ 'type'] == 'directory': data = ['<directory>'] elif target_info is not None and target_info['type'] == 'file': data = head_target(data, max_lines, replace_non_unicode=True) else: data = None elif mode == 'html': target_info = rest_util.get_target_info(data, 0) if target_info is not None and target_info['type'] == 'file': data = head_target(data, None) else: data = None elif mode == 'image': target_info = rest_util.get_target_info(data, 0) if target_info is not None and target_info['type'] == 'file': data = base64.b64encode(cat_target(data)) else: data = None elif mode == 'graph': try: max_lines = int( properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES)) except ValueError: raise UsageError("maxlines must be integer") # data = list of {'target': ...} # Add a 'points' field that contains the contents of the target. for info in data: target = info['target'] target_info = rest_util.get_target_info(target, 0) if target_info is not None and target_info[ 'type'] == 'file': contents = head_target(target, max_lines, replace_non_unicode=True) # Assume TSV file without header for now, just return each line as a row info['points'] = points = [] for line in contents: row = line.split('\t') points.append(row) elif mode == 'search': data = interpret_search(data) elif mode == 'wsearch': data = interpret_wsearch(data) elif mode == 'worksheet': pass else: raise UsageError('Invalid display mode: %s' % mode) except UsageError as e: data = error_data(mode, e.message) except StandardError: import traceback traceback.print_exc() data = error_data(mode, "Unexpected error interpreting item") # Assign the interpreted from the processed data item['interpreted'] = data return interpreted_items
def resolve_interpreted_blocks(interpreted_blocks, brief): """ Called by the web interface. Takes a list of interpreted worksheet items (returned by worksheet_util.interpret_items) and fetches the appropriate information, replacing the 'interpreted' field in each item. The result can be serialized via JSON. """ def set_error_data(block_index, message): interpreted_blocks[block_index] = (MarkupBlockSchema().load({ 'id': block_index, 'text': 'ERROR: ' + message }).data) for block_index, block in enumerate(interpreted_blocks): if block is None: continue mode = block['mode'] try: # Replace data with a resolved version. if mode in (BlockModes.markup_block, BlockModes.placeholder_block): # no need to do anything pass elif mode == BlockModes.record_block or mode == BlockModes.table_block: # header_name_posts is a list of (name, post-processing) pairs. # Request information if brief: # In brief mode, only calculate whether we should interpret genpaths, and if so, set status to briefly_loaded. should_interpret_genpaths = (len( get_genpaths_table_contents_requests(block['rows'])) > 0) block['status'] = ( FetchStatusSchema.get_briefly_loaded_status() if should_interpret_genpaths else FetchStatusSchema.get_ready_status()) else: block['rows'] = interpret_genpath_table_contents( block['rows']) block['status'] = FetchStatusSchema.get_ready_status() elif mode == BlockModes.contents_block or mode == BlockModes.image_block: bundle_uuid = block['bundles_spec']['bundle_infos'][0]['uuid'] target_path = block['target_genpath'] target = BundleTarget(bundle_uuid, target_path) try: target_info = rest_util.get_target_info(target, 0) if target_info[ 'type'] == 'directory' and mode == BlockModes.contents_block: block['status']['code'] = FetchStatusCodes.ready block['lines'] = ['<directory>'] elif target_info['type'] == 'file': block['status']['code'] = FetchStatusCodes.ready if mode == BlockModes.contents_block: block['lines'] = head_target( target_info['resolved_target'], block['max_lines']) elif mode == BlockModes.image_block: block['status']['code'] = FetchStatusCodes.ready block['image_data'] = base64.b64encode( bytes( cat_target(target_info['resolved_target'])) ).decode('utf-8') else: block['status']['code'] = FetchStatusCodes.not_found if mode == BlockModes.contents_block: block['lines'] = None elif mode == BlockModes.image_block: block['image_data'] = None except NotFoundError: block['status']['code'] = FetchStatusCodes.not_found if mode == BlockModes.contents_block: block['lines'] = None elif mode == BlockModes.image_block: block['image_data'] = None elif mode == BlockModes.graph_block: # data = list of {'target': ...} # Add a 'points' field that contains the contents of the target. for info in block['trajectories']: target = BundleTarget(info['bundle_uuid'], info['target_genpath']) try: target_info = rest_util.get_target_info(target, 0) except NotFoundError: continue if target_info['type'] == 'file': contents = head_target(target_info['resolved_target'], block['max_lines']) # Assume TSV file without header for now, just return each line as a row info['points'] = points = [] for line in contents: row = line.split('\t') points.append(row) elif mode == BlockModes.subworksheets_block: # do nothing pass elif mode == BlockModes.schema_block: pass else: raise UsageError('Invalid display mode: %s' % mode) except UsageError as e: set_error_data(block_index, str(e)) except Exception: import traceback traceback.print_exc() set_error_data(block_index, "Unexpected error interpreting item") block['is_refined'] = True return interpreted_blocks