Example #1
0
def interpret_file_genpath(target_cache, bundle_uuid, genpath, post):
    """
    |cache| is a mapping from target (bundle_uuid, subpath) to the info map,
    which is to be read/written to avoid reading/parsing the same file many
    times.
    |genpath| specifies the subpath and various fields (e.g., for
    /stats:train/errorRate, subpath = 'stats', key = 'train/errorRate').
    |post| function to apply to the resulting value.
    Return the string value.
    """
    MAX_LINES = 10000  # Maximum number of lines we need to read from a file.

    # Load the file
    if not is_file_genpath(genpath):
        raise UsageError('Not file genpath: %s' % genpath)
    genpath = genpath[1:]
    if ':' in genpath:  # Looking for a particular key in the file
        subpath, key = genpath.split(':')
    else:
        subpath, key = genpath, None

    target = (bundle_uuid, subpath)
    if target not in target_cache:
        info = None
        try:
            target_info = rest_util.get_target_info(target, 0)
            if target_info['type'] == 'file':
                contents = head_target(target, MAX_LINES)

                if len(contents) == 0:
                    info = ''
                elif all('\t' in x for x in contents):
                    # Tab-separated file (key\tvalue\nkey\tvalue...)
                    info = {}
                    for x in contents:
                        kv = x.strip().split("\t", 1)
                        if len(kv) == 2:
                            info[kv[0]] = kv[1]
                else:
                    try:
                        # JSON file
                        info = json.loads(''.join(contents))
                    except (TypeError, ValueError):
                        try:
                            # YAML file
                            # Use safe_load because yaml.load() could execute
                            # arbitrary Python code
                            info = yaml.safe_load(''.join(contents))
                        except yaml.YAMLError:
                            # Plain text file
                            info = ''.join(contents)
        except NotFoundError:
            pass

        # Try to interpret the structure of the file by looking inside it.
        target_cache[target] = info

    # Traverse the info object.
    info = target_cache.get(target, None)
    if key is not None and info is not None:
        for k in key.split('/'):
            if isinstance(info, dict):
                info = info.get(k, None)
            elif isinstance(info, list):
                try:
                    info = info[int(k)]
                except (KeyError, ValueError):
                    info = None
            else:
                info = None
            if info is None:
                break
    return apply_func(post, info)
Example #2
0
def interpret_file_genpath(target_cache, bundle_uuid, genpath, post):
    """
    |cache| is a mapping from target (bundle_uuid, subpath) to the info map,
    which is to be read/written to avoid reading/parsing the same file many
    times.
    |genpath| specifies the subpath and various fields (e.g., for
    /stats:train/errorRate, subpath = 'stats', key = 'train/errorRate').
    |post| function to apply to the resulting value.
    Return the string value.
    """
    MAX_LINES = 10000  # Maximum number of lines we need to read from a file.

    # Load the file
    if not is_file_genpath(genpath):
        raise UsageError('Not file genpath: %s' % genpath)
    genpath = genpath[1:]
    if ':' in genpath:  # Looking for a particular key in the file
        subpath, key = genpath.split(':')
    else:
        subpath, key = genpath, None

    target = (bundle_uuid, subpath)
    if target not in target_cache:
        target_info = rest_util.get_target_info(target, 0)

        # Try to interpret the structure of the file by looking inside it.
        if target_info is not None and target_info['type'] == 'file':
            contents = head_target(target, MAX_LINES)

            if len(contents) == 0:
                info = ''
            elif all('\t' in x for x in contents):
                # Tab-separated file (key\tvalue\nkey\tvalue...)
                info = {}
                for x in contents:
                    kv = x.strip().split("\t", 1)
                    if len(kv) == 2: info[kv[0]] = kv[1]
            else:
                try:
                    # JSON file
                    info = json.loads(''.join(contents))
                except (TypeError, ValueError):
                    try:
                        # YAML file
                        # Use safe_load because yaml.load() could execute
                        # arbitrary Python code
                        info = yaml.safe_load(''.join(contents))
                    except yaml.YAMLError:
                        # Plain text file
                        info = ''.join(contents)
        else:
            info = None
        target_cache[target] = info

    # Traverse the info object.
    info = target_cache.get(target, None)
    if key is not None and info is not None:
        for k in key.split('/'):
            if isinstance(info, dict):
                info = info.get(k, None)
            elif isinstance(info, list):
                try:
                    info = info[int(k)]
                except (KeyError, ValueError):
                    info = None
            else:
                info = None
            if info is None: break
    return apply_func(post, info)