Esempio n. 1
0
def get_fingerprinters(tag):
    """Return a sequence of fingerprint functors for a specific tag.
    """
    if not len(_tag2fx):
        # charge
        from testkraut import cfg
        tags = set(cfg.options('system fingerprints')).union(cfg.options('fingerprints'))
        for tag_ in tags:
            fp_tag = set()
            for fps_str in cfg.get('system fingerprints', tag_, default="").split() \
                         + cfg.get('fingerprints', tag_, default="").split():
                fps_comp = fps_str.split('.')
                try:
                    mod = __import__('.'.join(fps_comp[:-1]), globals(), locals(),
                                     fps_comp[-1:], -1)
                    fps = getattr(mod, fps_comp[-1])
                except:
                    lgr.warning(
                        "ignoring invalid fingerprinting function '%s' for tag '%s'"
                        % (fps_str, tag_))
                fp_tag.add(fps)
            _tag2fx[tag_] = fp_tag
    fprinters = _tag2fx.get(tag, set())
    fprinters.add(_fp_file)
    return fprinters
Esempio n. 2
0
def get_fingerprinters(tag):
    """Return a sequence of fingerprint functors for a specific tag.
    """
    if not len(_tag2fx):
        # charge
        from testkraut import cfg
        tags = set(cfg.options('system fingerprints')).union(
            cfg.options('fingerprints'))
        for tag_ in tags:
            fp_tag = set()
            for fps_str in cfg.get('system fingerprints', tag_, default="").split() \
                         + cfg.get('fingerprints', tag_, default="").split():
                fps_comp = fps_str.split('.')
                try:
                    mod = __import__('.'.join(fps_comp[:-1]), globals(),
                                     locals(), fps_comp[-1:], -1)
                    fps = getattr(mod, fps_comp[-1])
                except:
                    lgr.warning(
                        "ignoring invalid fingerprinting function '%s' for tag '%s'"
                        % (fps_str, tag_))
                fp_tag.add(fps)
            _tag2fx[tag_] = fp_tag
    fprinters = _tag2fx.get(tag, set())
    fprinters.add(_fp_file)
    return fprinters
Esempio n. 3
0
def place_file_into_dir(filespec,
                        dest_dir,
                        search_dirs=None,
                        cache=None,
                        force_overwrite=True,
                        symlink_to_cache=True):
    """Search for a file given a SPEC and place it into a destination directory

    Parameters
    ----------
    filespec : SPEC dict
      Dictionary with information on the file, keys could be, e.g., 'value',
      'type', or 'sha1sum'.
    dest_dir : path
      Path of the destination/target directory
    search_dirs : None or sequence
      If not None, a sequence of additional local directories to be searched for
      the desired file (tetskraut configuration might provide more locations
      that will also be searched afterwards)
    cache : None or path
      If not None, a path to a file cache directory where the desired file is
      searched by its sha1sum (if present in the SPEC)
    """
    # TODO refactor from simple cachedir to cache object that could also obtain
    # a file from remote locations

    # sanity
    if not 'type' in filespec or filespec['type'] != 'file':
        raise ValueError("expected SPEC is not a file SPEC, got : '%s'" %
                         filespec)
    # have a default cache
    if cache is None:
        cache = get_filecache_dir()
    if not os.path.exists(cache):
        os.makedirs(cache)
    # search path
    if search_dirs is None:
        search_dirs = []
    search_dirs += cfg.get('data sources', 'local dirs', default='').split()

    fname = filespec['value']
    # where the file needs to end up in the testbed
    dest_fname = opj(dest_dir, fname)
    # this will be the discovered file path
    fpath = None
    # first try the cache
    fpath = locate_file_in_cache(filespec, cache)
    # do a local search
    if fpath is None and len(search_dirs):
        lgr.debug("cache lookup for '%s' unsuccessful, trying local search" %
                  fname)
        # do a two-pass scan: first try locating the file by name to avoid
        # sha1-summing all files
        for search_dir in search_dirs:
            for root, dirnames, filenames in os.walk(search_dir):
                cand_path = opj(root, fname)
                if not os.path.isfile(cand_path):
                    lgr.debug("could not find file '%s' at '%s'" %
                              (fname, cand_path))
                    continue
                hashmatch = check_file_hash(filespec, cand_path)
                if hashmatch in (True, None):
                    lgr.debug("found matching file '%s' at '%s'" %
                              (fname, cand_path))
                    # run with the file if there is no hash or it matches
                    fpath = cand_path
                    break
            if not fpath is None:
                break
        if fpath is None and ('sha1sum' in filespec or 'md5sum' in filespec):
            lgr.debug(
                "could not find file '%s' by its name, doing hash lookup" %
                fname)
            # 2nd pass if we have a hash try locating by hash
            for search_dir in search_dirs:
                for root, dirnames, filenames in os.walk(search_dir):
                    for cand_name in filenames:
                        cand_path = opj(root, cand_name)
                        if check_file_hash(filespec, cand_path) is True:
                            lgr.debug("found matching file '%s' at '%s'" %
                                      (fname, cand_path))
                            fpath = cand_path
                            break
                    if not fpath is None:
                        break
                if not fpath is None:
                    break
        if not fpath is None and ('md5sum' in filespec
                                  or 'sha1sum' in filespec):
            # place in cache -- but only if any hash is given in the file spec
            # if no hash is given, this file is volatile and it makes no sense
            # to cache it
            if not 'sha1sum' in filespec:
                sha1 = sha1sum(fpath)
            else:
                sha1 = filespec['sha1sum']
            dst_path = opj(cache, sha1)
            if os.path.exists(dst_path) or os.path.lexists(dst_path):
                os.remove(dst_path)
                lgr.debug("removing existing cache entry '%s'" % dst_path)
            if symlink_to_cache:
                os.symlink(fpath, dst_path)
                lgr.debug("symlink to cache '%s'->'%s'" % (fpath, dst_path))
            elif hasattr(os, 'link'):
                # be nice and try hard-linking
                try:
                    os.link(fpath, dst_path)
                    lgr.debug("hardlink to cache '%s'->'%s'" %
                              (fpath, dst_path))
                except OSError:
                    # silently fail if linking doesn't work (e.g.
                    # cross-device link ... will recover later
                    shutil.copy(fpath, dst_path)
                    lgr.debug("copy to cache '%s'->'%s'" % (fpath, dst_path))
            else:
                shutil.copy(fpath, dst_path)
                lgr.debug("copy to cache '%s'->'%s'" % (fpath, dst_path))
    # trying external data sources
    if fpath is None and 'url' in filespec:
        # url is given
        fpath = download_file(filespec['url'], dest_fname)
    if fpath is None and 'sha1sum' in filespec:
        # lookup in any configured hash store
        hashpots = cfg.get('data sources', 'hash stores').split()
        sha1 = filespec['sha1sum']
        lgr.debug("local search '%s' unsuccessful, trying hash stores" % fname)
        dst_path = opj(cache, sha1)
        for hp in hashpots:
            fpath = download_file('%s%s' % (hp, sha1), dst_path)
            if not fpath is None:
                break
    if fpath is None:
        # out of ideas
        raise LookupError("cannot find file matching spec %s" % filespec)
    # get the file into the dest_dir
    if not fpath == dest_fname \
       and (force_overwrite or not os.path.isfile(dest_fname)):
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)
        shutil.copy(fpath, dest_fname)
    else:
        lgr.debug("skip copying already present file '%s'" % fname)
Esempio n. 4
0
def place_file_into_dir(filespec, dest_dir, search_dirs=None, cache=None,
                        force_overwrite=True, symlink_to_cache=True):
    """Search for a file given a SPEC and place it into a destination directory

    Parameters
    ----------
    filespec : SPEC dict
      Dictionary with information on the file, keys could be, e.g., 'value',
      'type', or 'sha1sum'.
    dest_dir : path
      Path of the destination/target directory
    search_dirs : None or sequence
      If not None, a sequence of additional local directories to be searched for
      the desired file (tetskraut configuration might provide more locations
      that will also be searched afterwards)
    cache : None or path
      If not None, a path to a file cache directory where the desired file is
      searched by its sha1sum (if present in the SPEC)
    """
    # TODO refactor from simple cachedir to cache object that could also obtain
    # a file from remote locations

    # sanity
    if not 'type' in filespec or filespec['type'] != 'file':
        raise ValueError("expected SPEC is not a file SPEC, got : '%s'"
                         % filespec)
    # have a default cache
    if cache is None:
        cache = get_filecache_dir()
    if not os.path.exists(cache):
        os.makedirs(cache)
    # search path
    if search_dirs is None:
        search_dirs = []
    search_dirs += cfg.get('data sources', 'local dirs', default='').split()

    fname = filespec['value']
    # where the file needs to end up in the testbed
    dest_fname = opj(dest_dir, fname)
    # this will be the discovered file path
    fpath = None
    # first try the cache
    fpath = locate_file_in_cache(filespec, cache)
    # do a local search
    if fpath is None and len(search_dirs):
        lgr.debug("cache lookup for '%s' unsuccessful, trying local search"
                  % fname)
        # do a two-pass scan: first try locating the file by name to avoid
        # sha1-summing all files
        for search_dir in search_dirs:
            for root, dirnames, filenames in os.walk(search_dir):
                cand_path = opj(root, fname)
                if not os.path.isfile(cand_path):
                    lgr.debug("could not find file '%s' at '%s'"
                              % (fname, cand_path))
                    continue
                hashmatch = check_file_hash(filespec, cand_path)
                if hashmatch in (True, None):
                    lgr.debug("found matching file '%s' at '%s'"
                              % (fname, cand_path))
                    # run with the file if there is no hash or it matches
                    fpath = cand_path
                    break
            if not fpath is None:
                break
        if fpath is None and ('sha1sum' in filespec or 'md5sum' in filespec):
            lgr.debug("could not find file '%s' by its name, doing hash lookup"
                      % fname)
            # 2nd pass if we have a hash try locating by hash
            for search_dir in search_dirs:
                for root, dirnames, filenames in os.walk(search_dir):
                    for cand_name in filenames:
                        cand_path = opj(root, cand_name)
                        if check_file_hash(filespec, cand_path) is True:
                            lgr.debug("found matching file '%s' at '%s'"
                                      % (fname, cand_path))
                            fpath = cand_path
                            break
                    if not fpath is None:
                        break
                if not fpath is None:
                    break
        if not fpath is None and ('md5sum' in filespec or 'sha1sum' in filespec):
            # place in cache -- but only if any hash is given in the file spec
            # if no hash is given, this file is volatile and it makes no sense
            # to cache it
            if not 'sha1sum' in filespec:
                sha1 = sha1sum(fpath)
            else:
                sha1 = filespec['sha1sum']
            dst_path = opj(cache, sha1)
            if os.path.exists(dst_path) or os.path.lexists(dst_path):
                os.remove(dst_path)
                lgr.debug("removing existing cache entry '%s'" % dst_path)
            if symlink_to_cache:
                os.symlink(fpath, dst_path)
                lgr.debug("symlink to cache '%s'->'%s'" % (fpath, dst_path))
            elif hasattr(os, 'link'):
                # be nice and try hard-linking
                try:
                    os.link(fpath, dst_path)
                    lgr.debug("hardlink to cache '%s'->'%s'" % (fpath, dst_path))
                except OSError:
                    # silently fail if linking doesn't work (e.g.
                    # cross-device link ... will recover later
                    shutil.copy(fpath, dst_path)
                    lgr.debug("copy to cache '%s'->'%s'" % (fpath, dst_path))
            else:
                shutil.copy(fpath, dst_path)
                lgr.debug("copy to cache '%s'->'%s'" % (fpath, dst_path))
    # trying external data sources
    if fpath is None and 'url' in filespec:
        # url is given
        fpath = download_file(filespec['url'], dest_fname)
    if fpath is None and 'sha1sum' in filespec:
        # lookup in any configured hash store
        hashpots = cfg.get('data sources', 'hash stores').split()
        sha1 = filespec['sha1sum']
        lgr.debug("local search '%s' unsuccessful, trying hash stores"
                  % fname)
        dst_path = opj(cache, sha1)
        for hp in hashpots:
            fpath = download_file('%s%s' % (hp, sha1), dst_path)
            if not fpath is None:
                break
    if fpath is None:
        # out of ideas
        raise LookupError("cannot find file matching spec %s" % filespec)
    # get the file into the dest_dir
    if not fpath == dest_fname \
       and (force_overwrite or not os.path.isfile(dest_fname)):
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)
        shutil.copy(fpath, dest_fname)
    else:
        lgr.debug("skip copying already present file '%s'" % fname)
Esempio n. 5
0
def run(args):
    lgr = args.logger
    lgr.debug("using file cache at '%s'" % args.filecache)
    if not len(args.ids):
        # if none specified go through all the SPECs in the lib
        args.ids = []
        for tld in get_test_library_paths(args.library): 
            args.ids.extend([os.path.basename(d)
                                for d in glob(opj(tld, '*'))
                                        if os.path.isdir(d)])
    wanted_files = set()
    hash_lookup = {}
    # scan the SPECs of all tests for needed files and their sha1sums
    for test_id in args.ids:
        lgr.debug("scan required files for test '%s'" % test_id)
        spec = get_spec(test_id, args.library)
        for _, input in spec.get_inputs('file').iteritems():
            if 'sha1sum' in input:
                wanted_files.add(input['sha1sum'])
                hash_lookup[input['sha1sum']] = (test_id, input.get('value', ''))
                lgr.debug("add '%s' (%s) to the list of files to look for"
                          % (input.get('value', ''), input['sha1sum']))
    # what do we have in the cache?
    have_files = [os.path.basename(f) for f in glob(opj(args.filecache, '*'))
                        if os.path.isfile(f)]
    # what is missing
    missing_files = wanted_files.difference(have_files)
    search_cache = {}
    # search in all locale dirs
    for search_dir in (args.search + args.library):
        for root, dirnames, filenames in os.walk(search_dir):
            for fname in filenames:
                fpath = opj(root, fname)
                sha1 = sha1sum(fpath)
                if sha1 in missing_files:
                    # make path relative to cache dir
                    search_cache[sha1] = os.path.relpath(fpath, args.filecache)
                    lgr.debug("found missing '%s' at '%s'" % (sha1, fpath))
                    missing_files.remove(sha1)
    # ensure the cache is there
    if not os.path.exists(args.filecache):
        os.makedirs(args.filecache)
    # try downloading missing files from the web
    hashpots = cfg.get('hash stores', 'http').split()
    for sha1 in missing_files.copy():
        for hp in hashpots:
            try:
                urip = urllib2.urlopen('%s%s' % (hp, sha1))
                dst_path = opj(args.filecache, sha1)
                fp = open(dst_path, 'wb')
                lgr.debug("download '%s%s'->'%s'" % (hp, sha1, dst_path))
                fp.write(urip.read())
                fp.close()
                missing_files.remove(sha1)
                break
            except urllib2.HTTPError:
                lgr.debug("cannot find '%s' at '%s'" % (sha1, hp))
            except urllib2.URLError:
                lgr.debug("cannot connect to at '%s'" % hp)
    # copy/link them into the cache
    for sha1, fpath in search_cache.iteritems():
        dst_path = opj(args.filecache, sha1)
        if os.path.lexists(dst_path):
            if os.path.islink(dst_path):
                # remove existing symlink
                os.remove(dst_path)
                lgr.debug("removing existing symlink '%s' in filecache"
                          % dst_path)
            else:
                lgr.warning(
                    "Will not replace existing non-symlink cache content: [%s: %s (%s)]"
                    % (hash_lookup[sha1] + (sha1,)))
        if not args.copy:
            os.symlink(fpath, dst_path)
            lgr.debug("symlink '%s'->'%s'" % (fpath, dst_path))
        elif hasattr(os, 'link'):
            # be nice and try hard-linking
            try:
                os.link(fpath, dst_path)
                lgr.debug("hardlink '%s'->'%s'" % (fpath, dst_path))
            except OSError:
                # silently fail if linking doesn't work (e.g.
                # cross-device link ... will recover later
                shutil.copy(fpath, dst_path)
                lgr.debug("copylink '%s'->'%s'" % (fpath, dst_path))
        else:
            shutil.copy(fpath, dst_path)
            lgr.debug("copylink '%s'->'%s'" % (fpath, dst_path))
    if len(missing_files):
        lgr.warning('cannot find needed file(s):')
        for mf in missing_files:
            lgr.warning('  %s: %s (%s)' % (hash_lookup[mf] + (mf,)))
Esempio n. 6
0
def run(args):
    lgr = args.logger
    lgr.debug("using file cache at '%s'" % args.filecache)
    if not len(args.ids):
        # if none specified go through all the SPECs in the lib
        args.ids = []
        for tld in get_test_library_paths(args.library):
            args.ids.extend([
                os.path.basename(d) for d in glob(opj(tld, '*'))
                if os.path.isdir(d)
            ])
    wanted_files = set()
    hash_lookup = {}
    # scan the SPECs of all tests for needed files and their sha1sums
    for test_id in args.ids:
        lgr.debug("scan required files for test '%s'" % test_id)
        spec = get_spec(test_id, args.library)
        for _, input in spec.get_inputs('file').iteritems():
            if 'sha1sum' in input:
                wanted_files.add(input['sha1sum'])
                hash_lookup[input['sha1sum']] = (test_id,
                                                 input.get('value', ''))
                lgr.debug("add '%s' (%s) to the list of files to look for" %
                          (input.get('value', ''), input['sha1sum']))
    # what do we have in the cache?
    have_files = [
        os.path.basename(f) for f in glob(opj(args.filecache, '*'))
        if os.path.isfile(f)
    ]
    # what is missing
    missing_files = wanted_files.difference(have_files)
    search_cache = {}
    # search in all locale dirs
    for search_dir in (args.search + args.library):
        for root, dirnames, filenames in os.walk(search_dir):
            for fname in filenames:
                fpath = opj(root, fname)
                sha1 = sha1sum(fpath)
                if sha1 in missing_files:
                    # make path relative to cache dir
                    search_cache[sha1] = os.path.relpath(fpath, args.filecache)
                    lgr.debug("found missing '%s' at '%s'" % (sha1, fpath))
                    missing_files.remove(sha1)
    # ensure the cache is there
    if not os.path.exists(args.filecache):
        os.makedirs(args.filecache)
    # try downloading missing files from the web
    hashpots = cfg.get('hash stores', 'http').split()
    for sha1 in missing_files.copy():
        for hp in hashpots:
            try:
                urip = urllib2.urlopen('%s%s' % (hp, sha1))
                dst_path = opj(args.filecache, sha1)
                fp = open(dst_path, 'wb')
                lgr.debug("download '%s%s'->'%s'" % (hp, sha1, dst_path))
                fp.write(urip.read())
                fp.close()
                missing_files.remove(sha1)
                break
            except urllib2.HTTPError:
                lgr.debug("cannot find '%s' at '%s'" % (sha1, hp))
            except urllib2.URLError:
                lgr.debug("cannot connect to at '%s'" % hp)
    # copy/link them into the cache
    for sha1, fpath in search_cache.iteritems():
        dst_path = opj(args.filecache, sha1)
        if os.path.lexists(dst_path):
            if os.path.islink(dst_path):
                # remove existing symlink
                os.remove(dst_path)
                lgr.debug("removing existing symlink '%s' in filecache" %
                          dst_path)
            else:
                lgr.warning(
                    "Will not replace existing non-symlink cache content: [%s: %s (%s)]"
                    % (hash_lookup[sha1] + (sha1, )))
        if not args.copy:
            os.symlink(fpath, dst_path)
            lgr.debug("symlink '%s'->'%s'" % (fpath, dst_path))
        elif hasattr(os, 'link'):
            # be nice and try hard-linking
            try:
                os.link(fpath, dst_path)
                lgr.debug("hardlink '%s'->'%s'" % (fpath, dst_path))
            except OSError:
                # silently fail if linking doesn't work (e.g.
                # cross-device link ... will recover later
                shutil.copy(fpath, dst_path)
                lgr.debug("copylink '%s'->'%s'" % (fpath, dst_path))
        else:
            shutil.copy(fpath, dst_path)
            lgr.debug("copylink '%s'->'%s'" % (fpath, dst_path))
    if len(missing_files):
        lgr.warning('cannot find needed file(s):')
        for mf in missing_files:
            lgr.warning('  %s: %s (%s)' % (hash_lookup[mf] + (mf, )))