Exemple #1
0
def get_python_component(component_dir, which="model", raise_err=True):
    # TODO - if component_dir has an extension, then just return that file path
    if get_file_path(component_dir,
                     f"{which}-template",
                     extensions=[".yml", ".yaml"],
                     raise_err=raise_err) is None:
        return get_file_path(component_dir,
                             which,
                             extensions=[".py"],
                             raise_err=raise_err)
    else:
        return None
Exemple #2
0
def cli_test_source(command, raw_args):
    """Runs test on the model
    """
    assert command == "test-source"
    # setup the arg-parsing
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description='Test models in model source')
    parser.add_argument('source', default="kipoi", help='Which source to test')
    parser.add_argument('--git-range',
                        nargs='+',
                        help='''Git range (e.g. commits or something like
                        "master HEAD" to check commits in HEAD vs master, or just "HEAD" to
                        include uncommitted changes). All models modified within this range will
                        be tested.''')
    parser.add_argument('-n',
                        '--dry_run',
                        action='store_true',
                        help='Dont run model testing')
    parser.add_argument('-b',
                        '--batch_size',
                        default=4,
                        type=int,
                        help='Batch size')
    parser.add_argument('-x',
                        '--exitfirst',
                        action='store_true',
                        help='exit instantly on first error or failed test.')
    parser.add_argument(
        '-k',
        default=None,
        help='only run tests which match the given substring expression')
    parser.add_argument('-c',
                        '--clean_env',
                        action='store_true',
                        help='clean the environment after running.')
    parser.add_argument(
        "--vep",
        action=kipoi.cli.main.DeprecateAction,
        help=
        "This argument is deprecated. Please use https://github.com/kipoi/kipoi-veff2 directly"
    )
    parser.add_argument('--common_env',
                        action='store_true',
                        help='Test models in common environments.')
    parser.add_argument('--all',
                        action='store_true',
                        help="Test all models in the source")
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help=
        "Increase output verbosity. Show conda stdout during env installation."
    )
    parser.add_argument('--shard_id', type=int, default=-1, help="Shard id")
    parser.add_argument('--num_of_shards',
                        type=int,
                        default=-1,
                        help="Number of shards")
    parser.add_argument('--singularity',
                        action='store_true',
                        help='Test models within their singularity containers')

    args = parser.parse_args(raw_args)
    if args.singularity and args.source != "kipoi":
        raise IOError(
            "Singularity containers are available for kipoi models only")
    if args.singularity and args.common_env:
        raise IOError("Please use only one of --singularity and --common_env")

    # --------------------------------------------
    source = kipoi.get_source(args.source)
    all_models = all_models_to_test(source)
    if args.k is not None:
        all_models = [x for x in all_models if re.match(args.k, x)]

    if len(all_models) == 0:
        logger.info("No models found in the source")
        sys.exit(1)

    if args.all:
        test_models = all_models
        logger.info('Testing all models:\n- {0}'.format(
            '\n- '.join(test_models)))
    else:
        test_models = restrict_models_to_test(all_models, source,
                                              args.git_range)
        if len(test_models) == 0:
            logger.info("No model modified according to git, exiting.")
            sys.exit(0)
        logger.info('{0}/{1} models modified according to git:\n- {2}'.format(
            len(test_models), len(all_models), '\n- '.join(test_models)))
    # Sort the models alphabetically
    test_models = sorted(test_models)
    if args.num_of_shards > 0 and args.shard_id >= 0:
        if args.shard_id >= args.num_of_shards:
            logger.info(
                "Shard id is invalid. It should be a value between 0 and {0}.".
                format(args.num_of_shards - 1))
            sys.exit(1)
        else:
            all_test_models = test_models
            sublists = np.array_split(all_test_models, args.num_of_shards)
            list_of_shards = [list(split) for split in sublists]
            test_models = list_of_shards[args.shard_id]

    logger.info(test_models)
    # Parse the repo config
    cfg_path = get_file_path(source.local_path,
                             "config",
                             extensions=[".yml", ".yaml"],
                             raise_err=False)
    if cfg_path is not None:
        cfg = kipoi.specs.SourceConfig.load(cfg_path, append_path=False)
        logger.info("Found config {0}:\n{1}".format(cfg_path, cfg))
    else:
        cfg = None

    if args.dry_run:
        logger.info(
            "-n/--dry_run enabled. Skipping model testing and exiting.")
        sys.exit(0)

    # TODO - make sure the modes are always tested in the same order?
    #        - make sure the keras config doesn't get cluttered

    # Test common environments
    if args.common_env:
        logger.info("Installing common environmnets")
        import yaml
        models_yaml_path = os.path.join(source.local_path, SPECIAL_ENV_PREFIX,
                                        "models.yaml")
        if not os.path.exists(models_yaml_path):
            logger.error(
                "{} doesn't exists when installing the common environment".
                format(models_yaml_path))
            sys.exit(1)
        model_envs = yaml.safe_load(
            open(os.path.join(source.local_path, SPECIAL_ENV_PREFIX,
                              "models.yaml"),
                 "r",
                 encoding="utf-8"))

        test_envs = {
            get_common_env(m, model_envs)
            for m in test_models if get_common_env(m, model_envs) is not None
        }

        if len(test_envs) == 0:
            logger.info("No common environments to test")
            sys.exit(0)

        logger.info(
            "Instaling environments covering the following models: \n{}".
            format(yaml.dump(model_envs)))
        for env in test_envs:
            if env_exists(env):
                logger.info(
                    "Common environment already exists: {}. Skipping the installation"
                    .format(env))
            else:
                logger.info("Installing environment: {}".format(env))
                create_model_env(os.path.join(SPECIAL_ENV_PREFIX, env),
                                 args.source, env)

    logger.info("Running {0} tests..".format(len(test_models)))
    failed_models = []
    for i in range(len(test_models)):
        m = test_models[i]
        print('-' * 20)
        print("{0}/{1} - model: {2}".format(i + 1, len(test_models), m))
        print('-' * 20)
        try:
            if not args.common_env and not args.singularity:
                # Prepend "test-" to the standard kipoi env name
                env_name = conda_env_name(m, source=args.source)
                env_name = "test-" + env_name
                # Test
                test_model(m,
                           args.source,
                           env_name,
                           get_batch_size(cfg, m, args.batch_size),
                           create_env=True,
                           verbose=args.verbose)
            elif args.singularity and not args.common_env:
                print("Testing within singularity container....")
                test_model_singularity(m,
                                       args.source,
                                       get_batch_size(cfg, m, args.batch_size),
                                       verbose=args.verbose)
            elif args.common_env and not args.singularity:
                # figure out the common environment name
                env_name = get_common_env(m, model_envs)
                if env_name is None:
                    # skip is none was found
                    logger.info(
                        "Common environment not found for {}".format(m))
                    continue
                # ---------------------------
                # Test
                print("test_model...")
                test_model(m,
                           args.source,
                           env_name,
                           get_batch_size(cfg, m, args.batch_size),
                           create_env=False,
                           verbose=args.verbose)
            else:
                raise IOError(
                    "Please either choose --common_env or --singularity or none"
                )
        except Exception as e:
            logger.error("Model {0} failed: {1}".format(m, e))
            failed_models += [m]
            if args.exitfirst:
                if args.clean_env and not args.common_env:
                    rm_env(env_name)
                sys.exit(1)
        finally:
            if args.clean_env and not args.common_env:
                rm_env(env_name)
    print('-' * 40)
    if failed_models:
        logger.error("{0}/{1} tests failed for models:\n- {2}".format(
            len(failed_models), len(test_models), "\n- ".join(failed_models)))
        sys.exit(1)

    logger.info('All tests ({0}) passed'.format(len(test_models)))
Exemple #3
0
def get_component_file(component_dir, which="model", raise_err=True):
    # TODO - if component_dir has an extension, then just return that file path
    return get_file_path(component_dir, which, extensions=[".yml", ".yaml"], raise_err=raise_err)