예제 #1
0
def interpret_file_genpath(target_cache, bundle_uuid, genpath, post):
    """
    |cache| is a mapping from target (bundle_uuid, subpath) to the info map,
    which is to be read/written to avoid reading/parsing the same file many
    times.
    |genpath| specifies the subpath and various fields (e.g., for
    /stats:train/errorRate, subpath = 'stats', key = 'train/errorRate').
    |post| function to apply to the resulting value.
    Return the string value.
    """
    MAX_LINES = 10000  # Maximum number of lines we need to read from a file.

    # Load the file
    if not is_file_genpath(genpath):
        raise UsageError('Not file genpath: %s' % genpath)
    genpath = genpath[1:]
    if ':' in genpath:  # Looking for a particular key in the file
        subpath, key = genpath.split(':')
    else:
        subpath, key = genpath, None

    target = (bundle_uuid, subpath)
    if target not in target_cache:
        info = None
        try:
            target_info = rest_util.get_target_info(target, 0)
            if target_info['type'] == 'file':
                contents = head_target(target, MAX_LINES)

                if len(contents) == 0:
                    info = ''
                elif all('\t' in x for x in contents):
                    # Tab-separated file (key\tvalue\nkey\tvalue...)
                    info = {}
                    for x in contents:
                        kv = x.strip().split("\t", 1)
                        if len(kv) == 2:
                            info[kv[0]] = kv[1]
                else:
                    try:
                        # JSON file
                        info = json.loads(''.join(contents))
                    except (TypeError, ValueError):
                        try:
                            # YAML file
                            # Use safe_load because yaml.load() could execute
                            # arbitrary Python code
                            info = yaml.safe_load(''.join(contents))
                        except yaml.YAMLError:
                            # Plain text file
                            info = ''.join(contents)
        except NotFoundError:
            pass

        # Try to interpret the structure of the file by looking inside it.
        target_cache[target] = info

    # Traverse the info object.
    info = target_cache.get(target, None)
    if key is not None and info is not None:
        for k in key.split('/'):
            if isinstance(info, dict):
                info = info.get(k, None)
            elif isinstance(info, list):
                try:
                    info = info[int(k)]
                except (KeyError, ValueError):
                    info = None
            else:
                info = None
            if info is None:
                break
    return apply_func(post, info)
예제 #2
0
def resolve_interpreted_blocks(interpreted_blocks):
    """
    Called by the web interface.  Takes a list of interpreted worksheet
    items (returned by worksheet_util.interpret_items) and fetches the
    appropriate information, replacing the 'interpreted' field in each item.
    The result can be serialized via JSON.
    """

    def set_error_data(block_index, message):
        interpreted_blocks[block_index] = (
            MarkupBlockSchema().load({'id': block_index, 'text': 'ERROR: ' + message}).data
        )

    for block_index, block in enumerate(interpreted_blocks):
        if block is None:
            continue
        mode = block['mode']

        try:
            # Replace data with a resolved version.
            if mode == BlockModes.markup_block:
                # no need to do anything
                pass
            elif mode == BlockModes.record_block or mode == BlockModes.table_block:
                # header_name_posts is a list of (name, post-processing) pairs.
                contents = block['rows']
                # Request information
                contents = interpret_genpath_table_contents(contents)

                block['rows'] = contents
            elif mode == BlockModes.contents_block or mode == BlockModes.image_block:
                try:
                    target_info = rest_util.get_target_info(
                        (block['bundles_spec']['bundle_infos'][0]['uuid'], block['target_genpath']),
                        0,
                    )
                    if target_info['type'] == 'directory' and mode == BlockModes.contents_block:
                        block['status']['code'] = FetchStatusCodes.ready
                        block['lines'] = ['<directory>']
                    elif target_info['type'] == 'file':
                        block['status']['code'] = FetchStatusCodes.ready
                        if mode == BlockModes.contents_block:
                            block['lines'] = head_target(
                                (
                                    block['bundles_spec']['bundle_infos'][0]['uuid'],
                                    block['target_genpath'],
                                ),
                                block['max_lines'],
                                replace_non_unicode=True,
                            )
                        elif mode == BlockModes.image_block:
                            block['status']['code'] = FetchStatusCodes.ready
                            block['image_data'] = base64.b64encode(
                                cat_target(
                                    (
                                        block['bundles_spec']['bundle_infos'][0]['uuid'],
                                        block['target_genpath'],
                                    )
                                )
                            )
                    else:
                        block['status']['code'] = FetchStatusCodes.not_found
                        if mode == BlockModes.contents_block:
                            block['lines'] = None
                        elif mode == BlockModes.image_block:
                            block['image_data'] = None
                except NotFoundError as e:
                    block['status']['code'] = FetchStatusCodes.not_found
                    if mode == BlockModes.contents_block:
                        block['lines'] = None
                    elif mode == BlockModes.image_block:
                        block['image_data'] = None

            elif mode == BlockModes.graph_block:
                # data = list of {'target': ...}
                # Add a 'points' field that contains the contents of the target.
                for info in block['trajectories']:
                    target = (info['bundle_uuid'], info['target_genpath'])
                    try:
                        target_info = rest_util.get_target_info(target, 0)
                    except NotFoundError as e:
                        continue
                    if target_info['type'] == 'file':
                        contents = head_target(target, block['max_lines'], replace_non_unicode=True)
                        # Assume TSV file without header for now, just return each line as a row
                        info['points'] = points = []
                        for line in contents:
                            row = line.split('\t')
                            points.append(row)
            elif mode == BlockModes.subworksheets_block:
                # do nothing
                pass
            else:
                raise UsageError('Invalid display mode: %s' % mode)

        except UsageError as e:
            set_error_data(block_index, e.message)

        except StandardError:
            import traceback

            traceback.print_exc()
            set_error_data(block_index, "Unexpected error interpreting item")

        block['is_refined'] = True

    return interpreted_blocks
예제 #3
0
def interpret_file_genpath(target_cache, bundle_uuid, genpath, post):
    """
    |cache| is a mapping from target (bundle_uuid, subpath) to the info map,
    which is to be read/written to avoid reading/parsing the same file many
    times.
    |genpath| specifies the subpath and various fields (e.g., for
    /stats:train/errorRate, subpath = 'stats', key = 'train/errorRate').
    |post| function to apply to the resulting value.
    Return the string value.
    """
    MAX_LINES = 10000  # Maximum number of lines we need to read from a file.

    # Load the file
    if not is_file_genpath(genpath):
        raise UsageError('Not file genpath: %s' % genpath)
    genpath = genpath[1:]
    if ':' in genpath:  # Looking for a particular key in the file
        subpath, key = genpath.split(':')
    else:
        subpath, key = genpath, None

    target = (bundle_uuid, subpath)
    if target not in target_cache:
        target_info = rest_util.get_target_info(target, 0)

        # Try to interpret the structure of the file by looking inside it.
        if target_info is not None and target_info['type'] == 'file':
            contents = head_target(target, MAX_LINES)

            if len(contents) == 0:
                info = ''
            elif all('\t' in x for x in contents):
                # Tab-separated file (key\tvalue\nkey\tvalue...)
                info = {}
                for x in contents:
                    kv = x.strip().split("\t", 1)
                    if len(kv) == 2: info[kv[0]] = kv[1]
            else:
                try:
                    # JSON file
                    info = json.loads(''.join(contents))
                except (TypeError, ValueError):
                    try:
                        # YAML file
                        # Use safe_load because yaml.load() could execute
                        # arbitrary Python code
                        info = yaml.safe_load(''.join(contents))
                    except yaml.YAMLError:
                        # Plain text file
                        info = ''.join(contents)
        else:
            info = None
        target_cache[target] = info

    # Traverse the info object.
    info = target_cache.get(target, None)
    if key is not None and info is not None:
        for k in key.split('/'):
            if isinstance(info, dict):
                info = info.get(k, None)
            elif isinstance(info, list):
                try:
                    info = info[int(k)]
                except (KeyError, ValueError):
                    info = None
            else:
                info = None
            if info is None: break
    return apply_func(post, info)
예제 #4
0
def resolve_interpreted_items(interpreted_items):
    """
    Called by the web interface.  Takes a list of interpreted worksheet
    items (returned by worksheet_util.interpret_items) and fetches the
    appropriate information, replacing the 'interpreted' field in each item.
    The result can be serialized via JSON.
    """
    def error_data(mode, message):
        if mode == 'record' or mode == 'table':
            return (('ERROR', ), [{'ERROR': message}])
        else:
            return [message]

    for item in interpreted_items:
        if item is None:
            continue
        mode = item['mode']
        data = item['interpreted']
        properties = item['properties']

        try:
            # Replace data with a resolved version.
            if mode == 'markup':
                # no need to do anything
                pass
            elif mode == 'record' or mode == 'table':
                # header_name_posts is a list of (name, post-processing) pairs.
                header, contents = data
                # Request information
                contents = interpret_genpath_table_contents(contents)
                data = (header, contents)
            elif mode == 'contents':
                try:
                    max_lines = int(
                        properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES))
                except ValueError:
                    raise UsageError("maxlines must be integer")

                target_info = rest_util.get_target_info(data, 0)
                if target_info is not None and target_info[
                        'type'] == 'directory':
                    data = ['<directory>']
                elif target_info is not None and target_info['type'] == 'file':
                    data = head_target(data,
                                       max_lines,
                                       replace_non_unicode=True)
                else:
                    data = None
            elif mode == 'html':
                target_info = rest_util.get_target_info(data, 0)
                if target_info is not None and target_info['type'] == 'file':
                    data = head_target(data, None)
                else:
                    data = None
            elif mode == 'image':
                target_info = rest_util.get_target_info(data, 0)
                if target_info is not None and target_info['type'] == 'file':
                    data = base64.b64encode(cat_target(data))
                else:
                    data = None
            elif mode == 'graph':
                try:
                    max_lines = int(
                        properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES))
                except ValueError:
                    raise UsageError("maxlines must be integer")

                # data = list of {'target': ...}
                # Add a 'points' field that contains the contents of the target.
                for info in data:
                    target = info['target']
                    target_info = rest_util.get_target_info(target, 0)
                    if target_info is not None and target_info[
                            'type'] == 'file':
                        contents = head_target(target,
                                               max_lines,
                                               replace_non_unicode=True)
                        # Assume TSV file without header for now, just return each line as a row
                        info['points'] = points = []
                        for line in contents:
                            row = line.split('\t')
                            points.append(row)
            elif mode == 'search':
                data = interpret_search(data)
            elif mode == 'wsearch':
                data = interpret_wsearch(data)
            elif mode == 'worksheet':
                pass
            else:
                raise UsageError('Invalid display mode: %s' % mode)

        except UsageError as e:
            data = error_data(mode, e.message)

        except StandardError:
            import traceback
            traceback.print_exc()
            data = error_data(mode, "Unexpected error interpreting item")

        # Assign the interpreted from the processed data
        item['interpreted'] = data

    return interpreted_items
예제 #5
0
def resolve_interpreted_blocks(interpreted_blocks, brief):
    """
    Called by the web interface.  Takes a list of interpreted worksheet
    items (returned by worksheet_util.interpret_items) and fetches the
    appropriate information, replacing the 'interpreted' field in each item.
    The result can be serialized via JSON.
    """
    def set_error_data(block_index, message):
        interpreted_blocks[block_index] = (MarkupBlockSchema().load({
            'id':
            block_index,
            'text':
            'ERROR: ' + message
        }).data)

    for block_index, block in enumerate(interpreted_blocks):
        if block is None:
            continue
        mode = block['mode']

        try:
            # Replace data with a resolved version.
            if mode in (BlockModes.markup_block, BlockModes.placeholder_block):
                # no need to do anything
                pass
            elif mode == BlockModes.record_block or mode == BlockModes.table_block:
                # header_name_posts is a list of (name, post-processing) pairs.
                # Request information
                if brief:
                    # In brief mode, only calculate whether we should interpret genpaths, and if so, set status to briefly_loaded.
                    should_interpret_genpaths = (len(
                        get_genpaths_table_contents_requests(block['rows'])) >
                                                 0)
                    block['status'] = (
                        FetchStatusSchema.get_briefly_loaded_status()
                        if should_interpret_genpaths else
                        FetchStatusSchema.get_ready_status())
                else:
                    block['rows'] = interpret_genpath_table_contents(
                        block['rows'])
                    block['status'] = FetchStatusSchema.get_ready_status()
            elif mode == BlockModes.contents_block or mode == BlockModes.image_block:
                bundle_uuid = block['bundles_spec']['bundle_infos'][0]['uuid']
                target_path = block['target_genpath']
                target = BundleTarget(bundle_uuid, target_path)
                try:
                    target_info = rest_util.get_target_info(target, 0)
                    if target_info[
                            'type'] == 'directory' and mode == BlockModes.contents_block:
                        block['status']['code'] = FetchStatusCodes.ready
                        block['lines'] = ['<directory>']
                    elif target_info['type'] == 'file':
                        block['status']['code'] = FetchStatusCodes.ready
                        if mode == BlockModes.contents_block:
                            block['lines'] = head_target(
                                target_info['resolved_target'],
                                block['max_lines'])
                        elif mode == BlockModes.image_block:
                            block['status']['code'] = FetchStatusCodes.ready
                            block['image_data'] = base64.b64encode(
                                bytes(
                                    cat_target(target_info['resolved_target']))
                            ).decode('utf-8')
                    else:
                        block['status']['code'] = FetchStatusCodes.not_found
                        if mode == BlockModes.contents_block:
                            block['lines'] = None
                        elif mode == BlockModes.image_block:
                            block['image_data'] = None
                except NotFoundError:
                    block['status']['code'] = FetchStatusCodes.not_found
                    if mode == BlockModes.contents_block:
                        block['lines'] = None
                    elif mode == BlockModes.image_block:
                        block['image_data'] = None

            elif mode == BlockModes.graph_block:
                # data = list of {'target': ...}
                # Add a 'points' field that contains the contents of the target.
                for info in block['trajectories']:
                    target = BundleTarget(info['bundle_uuid'],
                                          info['target_genpath'])
                    try:
                        target_info = rest_util.get_target_info(target, 0)
                    except NotFoundError:
                        continue
                    if target_info['type'] == 'file':
                        contents = head_target(target_info['resolved_target'],
                                               block['max_lines'])
                        # Assume TSV file without header for now, just return each line as a row
                        info['points'] = points = []
                        for line in contents:
                            row = line.split('\t')
                            points.append(row)
            elif mode == BlockModes.subworksheets_block:
                # do nothing
                pass
            elif mode == BlockModes.schema_block:
                pass
            else:
                raise UsageError('Invalid display mode: %s' % mode)

        except UsageError as e:
            set_error_data(block_index, str(e))

        except Exception:
            import traceback

            traceback.print_exc()
            set_error_data(block_index, "Unexpected error interpreting item")

        block['is_refined'] = True

    return interpreted_blocks
예제 #6
0
def resolve_interpreted_blocks(interpreted_blocks):
    """
    Called by the web interface.  Takes a list of interpreted worksheet
    items (returned by worksheet_util.interpret_items) and fetches the
    appropriate information, replacing the 'interpreted' field in each item.
    The result can be serialized via JSON.
    """

    def set_error_data(block_index, message):
        interpreted_blocks[block_index] = (
            MarkupBlockSchema().load({'id': block_index, 'text': 'ERROR: ' + message}).data
        )

    for block_index, block in enumerate(interpreted_blocks):
        if block is None:
            continue
        mode = block['mode']

        try:
            # Replace data with a resolved version.
            if mode == BlockModes.markup_block:
                # no need to do anything
                pass
            elif mode == BlockModes.record_block or mode == BlockModes.table_block:
                # header_name_posts is a list of (name, post-processing) pairs.
                contents = block['rows']
                # Request information
                contents = interpret_genpath_table_contents(contents)

                block['rows'] = contents
            elif mode == BlockModes.contents_block or mode == BlockModes.image_block:
                try:
                    target_info = rest_util.get_target_info(
                        (block['bundles_spec']['bundle_infos'][0]['uuid'], block['target_genpath']),
                        0,
                    )
                    if target_info['type'] == 'directory' and mode == BlockModes.contents_block:
                        block['status']['code'] = FetchStatusCodes.ready
                        block['lines'] = ['<directory>']
                    elif target_info['type'] == 'file':
                        block['status']['code'] = FetchStatusCodes.ready
                        if mode == BlockModes.contents_block:
                            block['lines'] = head_target(
                                (
                                    block['bundles_spec']['bundle_infos'][0]['uuid'],
                                    block['target_genpath'],
                                ),
                                block['max_lines'],
                                replace_non_unicode=True,
                            )
                        elif mode == BlockModes.image_block:
                            block['status']['code'] = FetchStatusCodes.ready
                            block['image_data'] = base64.b64encode(
                                cat_target(
                                    (
                                        block['bundles_spec']['bundle_infos'][0]['uuid'],
                                        block['target_genpath'],
                                    )
                                )
                            )
                    else:
                        block['status']['code'] = FetchStatusCodes.not_found
                        if mode == BlockModes.contents_block:
                            block['lines'] = None
                        elif mode == BlockModes.image_block:
                            block['image_data'] = None
                except NotFoundError as e:
                    block['status']['code'] = FetchStatusCodes.not_found
                    if mode == BlockModes.contents_block:
                        block['lines'] = None
                    elif mode == BlockModes.image_block:
                        block['image_data'] = None

            elif mode == BlockModes.graph_block:
                # data = list of {'target': ...}
                # Add a 'points' field that contains the contents of the target.
                for info in block['trajectories']:
                    target = (info['bundle_uuid'], info['target_genpath'])
                    try:
                        target_info = rest_util.get_target_info(target, 0)
                    except NotFoundError as e:
                        continue
                    if target_info['type'] == 'file':
                        contents = head_target(target, block['max_lines'], replace_non_unicode=True)
                        # Assume TSV file without header for now, just return each line as a row
                        info['points'] = points = []
                        for line in contents:
                            row = line.split('\t')
                            points.append(row)
            elif mode == BlockModes.subworksheets_block:
                # do nothing
                pass
            else:
                raise UsageError('Invalid display mode: %s' % mode)

        except UsageError as e:
            set_error_data(block_index, e.message)

        except StandardError:
            import traceback

            traceback.print_exc()
            set_error_data(block_index, "Unexpected error interpreting item")

        block['is_refined'] = True

    return interpreted_blocks