Ejemplo n.º 1
0
def cli_get_example(command, raw_args):
    """Downloads the example files to the desired directory
    """
    assert command == "get-example"
    # setup the arg-parsing
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description='Get example files')
    add_model(parser, source="kipoi")
    parser.add_argument("-o", "--output", default="example", required=False,
                        help="Output directory where to store the examples. Default: 'example'")
    args = parser.parse_args(raw_args)
    # --------------------------------------------
    md = kipoi.get_model_descr(args.model, args.source)
    src = kipoi.get_source(args.source)

    # load the default dataloader
    if isinstance(md.default_dataloader, kipoi.specs.DataLoaderImport):
        with cd(src.get_model_dir(args.model)):
            dl_descr = md.default_dataloader.get()
    else:
        # load from directory
        # attach the default dataloader already to the model
        dl_descr = kipoi.get_dataloader_descr(os.path.join(args.model, md.default_dataloader),
                                              source=args.source)

    kwargs = dl_descr.download_example(output_dir=args.output, dry_run=False)

    logger.info("Example files downloaded to: {}".format(args.output))
    logger.info("use the following dataloader kwargs:")
    print(json.dumps(kwargs))
Ejemplo n.º 2
0
def merge_deps(models,
               dataloaders=None,
               source="kipoi",
               vep=False,
               gpu=False):
    deps = Dependencies()
    for model in models:
        logger.info("Loading model: {0} description".format(model))

        parsed_source, parsed_model = parse_source_name(source, model)
        model_descr = kipoi.get_model_descr(parsed_model, parsed_source)

        deps = deps.merge(model_descr.dependencies)
        # handle the dataloader=None case
        if dataloaders is None or not dataloaders:
            dataloader = os.path.normpath(os.path.join(parsed_model,
                                                       model_descr.default_dataloader))
            logger.info("Inferred dataloader name: {0} from".format(dataloader) +
                        " the model.")
            dataloader_descr = kipoi.get_dataloader_descr(dataloader, parsed_source)
            deps = deps.merge(dataloader_descr.dependencies)

    if dataloaders is not None or dataloaders:
        for dataloader in dataloaders:
            parsed_source, parsed_dataloader = parse_source_name(source, dataloader)
            dataloader_descr = kipoi.get_dataloader_descr(parsed_dataloader, parsed_source)
            deps = deps.merge(dataloader_descr.dependencies)

    # add Kipoi to the dependencies
    deps = KIPOI_DEPS.merge(deps)

    if vep:
        # add vep dependencies
        logger.info("Adding the vep dependencies")
        deps = VEP_DEPS.merge(deps)

    if gpu:
        logger.info("Using gpu-compatible dependencies")
        deps = deps.gpu()

    if platform == "darwin":
        logger.info("Using osx-type dependencies")
        deps = deps.osx()

    return deps
Ejemplo n.º 3
0
def get_dataloader_descr(model_name, source):
    from kipoi.utils import cd
    src = kipoi.get_source(source)
    md = kipoi.get_model_descr(model_name, source=source)
    if isinstance(md.default_dataloader, str):
        dl_path = os.path.join(model_name, md.default_dataloader)
        return kipoi.get_dataloader_descr(dl_path, source=source)
    else:
        with cd(src.get_model_dir(model_name)):
            return md.default_dataloader.get()
Ejemplo n.º 4
0
def test_load_model(example):
    example_dir = "example/models/{0}".format(example)

    if example in {"rbp", "iris_model_template"} and sys.version_info[0] == 2:
        pytest.skip("example not supported on python 2 ")

    if INSTALL_REQ:
        install_dataloader_requirements(example_dir, "dir")
    Dl = kipoi.get_dataloader_factory(example_dir, source="dir")

    Dl.type
    Dl.defined_as
    Dl.args
    Dl.info
    Dl.output_schema
    Dl.source
    # datalaoder
    Dl.batch_iter
    Dl.load_all

    Dl.print_args()

    kipoi.get_dataloader_descr(example_dir, source="dir").print_kwargs()
Ejemplo n.º 5
0
def model_list(model_name):
    """ Models list view """
    source = current_app.config['SOURCE']
    df = get_model_list(source)
    model_name = model_name.rstrip('/')
    vtype_path = get_view(model_name, df)

    if vtype_path is None:
        # run 404
        return
        # pass
    else:
        vtype, path = vtype_path

    # render the model detail view
    if vtype == "model":
        # Model info retrieved from kipoi
        model = kipoi.get_model_descr(model_name, source=source)

        # Model dataloaders info retrieved from kipoi
        dataloader = kipoi.get_dataloader_descr(os.path.join(
            model_name, model.default_dataloader),
                                                source=source)
        title = model_name.split('/')
        # obtain snippets
        code_snippets = get_snippets(model_name, source)

        return render_template("models/model_details.html",
                               model_name=model_name,
                               model=model,
                               dataloader=dataloader,
                               title=title,
                               code_snippets=code_snippets)

    # run the normal model list view on a subsetted table
    elif vtype == "model_list":
        model_df = get_model_list(source)
        # Filter the results
        model_df = model_df[model_df.model.str.contains("^" + path + "/")]

        filtered_models = model_df.to_dict(orient='records')
        return render_template("models/index.html", models=filtered_models)

    # redirect to the group list
    elif vtype == "group_list":
        return redirect(url_for('models.list_groups', group_name=path))
Ejemplo n.º 6
0
def get_dataloader_descr(model_name, source='kipoi'):
    """Not yet nicely integrated with Kipoi

    Args:
      model_name: model name as a string

    Returns:
      (model output schema, list of required files)
    """
    dl_skip_arguments = {
        "kipoiseq.dataloaders.SeqIntervalDl":
        ['alphabet_axis', 'dummy_axis', 'alphabet', 'dtype']
    }
    md = kipoi.get_model_descr(model_name)
    src = kipoi.get_source(source)

    # get dataloader
    if isinstance(md.default_dataloader, str):
        dataloader = kipoi.get_dataloader_descr(os.path.join(
            model_name, md.default_dataloader),
                                                source=source)
        dataloader_name = md.default_dataloader
        dataloader_args = dataloader.args
    else:
        with cd(src.get_model_dir(model_name)):
            dataloader = md.default_dataloader.get()
        dataloader_name = md.default_dataloader.defined_as
        dataloader_args = OrderedDict([
            (k, v) for k, v in dataloader.args.items()
            if k not in list(md.default_dataloader.default_args) +
            dl_skip_arguments.get(dataloader_name, [])
        ])

        if md.default_dataloader.defined_as == 'kipoiseq.dataloaders.SeqIntervalDl':
            # HACK - cleanup some values for SeqIntervalDl
            if md.default_dataloader.default_args.get("ignore_targets", False):
                dataloader_args.pop('label_dtype', None)

    required_files = []
    if 'fasta_file' in dataloader.args:
        required_files.append("fasta_file")
    if 'gtf_file' in dataloader.args:
        required_files.append("gtf_file")

    return get_output_schema(md.schema.targets), required_files
Ejemplo n.º 7
0
def cli_info(command, raw_args):
    """CLI interface to predict
    """
    assert command == "info"
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description="Prints dataloader" +
                                     " keyword arguments.")
    add_model(parser)
    add_dataloader(parser, with_args=False)
    args = parser.parse_args(raw_args)

    # --------------------------------------------
    # load model & dataloader
    md = kipoi.get_model_descr(args.model, args.source)
    src = kipoi.get_source(args.source)

    # load the default dataloader
    try:
        if isinstance(md.default_dataloader, kipoi.specs.DataLoaderImport):
            with cd(src.get_model_dir(args.model)):
                dl_descr = md.default_dataloader.get()
        else:
            # load from directory
            # attach the default dataloader already to the model
            dl_descr = kipoi.get_dataloader_descr(os.path.join(
                args.model, md.default_dataloader),
                                                  source=args.source)
    # if kipoiseq is not installed you get an ImportError
    except ImportError:
        dl_descr = None

    print("-" * 80)
    print("'{0}' from source '{1}'".format(str(args.model), str(args.source)))
    print("")
    print("Model information")
    print("-----------")
    print(md.info.get_config_as_yaml())
    if dl_descr:
        print("Dataloader arguments")
        print("--------------------")
        dl_descr.print_args()
    print("--------------------\n")
    print("Run `kipoi get-example {} -o example` to download example files.\n".
          format(args.model))
Ejemplo n.º 8
0
def merge_deps(models,
               dataloaders=None,
               source="kipoi",
               vep=False,
               interpret=False,
               gpu=False):
    """Setup the dependencies
    """

    special_envs, only_models = split_models_special_envs(models)
    deps = Dependencies()

    # Treat the handcrafted environments differently
    for special_env in special_envs:
        from related import from_yaml
        logger.info("Loading environment definition: {0}".format(special_env))

        # Load and merge the handcrafted deps.
        yaml_path = os.path.join(
            kipoi.get_source(source).local_path, special_env + ".yaml")

        if not os.path.exists(yaml_path):
            raise ValueError(
                "Environment definition file {0} not found in source {1}".
                format(yaml_path, source))

        with open(yaml_path, "r", encoding="utf-8") as fh:
            special_env_deps = Dependencies.from_env_dict(from_yaml(fh))
        deps = deps.merge(special_env_deps)

    for model in only_models:
        logger.info("Loading model: {0} description".format(model))

        parsed_source, parsed_model = parse_source_name(source, model)

        sub_models = list_subcomponents(parsed_model, parsed_source, "model")
        if len(sub_models) == 0:
            raise ValueError("Model {0} not found in source {1}".format(
                parsed_model, parsed_source))
        if len(sub_models) > 1:
            logger.info(
                "Found {0} models under the model name: {1}. Merging dependencies for all"
                .format(len(sub_models), parsed_model))

        for sub_model in sub_models:
            model_descr = kipoi.get_model_descr(sub_model, parsed_source)
            model_dir = kipoi.get_source(parsed_source).get_model_dir(
                sub_model)
            deps = deps.merge(model_descr.dependencies)

            # handle the dataloader=None case
            if dataloaders is None or not dataloaders:
                if isinstance(model_descr.default_dataloader,
                              DataLoaderImport):
                    # dataloader specified by the import
                    deps = deps.merge(
                        model_descr.default_dataloader.dependencies)
                    if model_descr.default_dataloader.parse_dependencies:
                        # add dependencies specified in the yaml file
                        # load from the dataloader description if you can
                        try:
                            with cd(model_dir):
                                dataloader_descr = model_descr.default_dataloader.get(
                                )
                            deps = deps.merge(dataloader_descr.dependencies)
                        except ImportError as e:
                            # package providing the dataloader is not installed yet
                            if model_descr.default_dataloader.defined_as.startswith(
                                    "kipoiseq."):
                                logger.info(
                                    "kipoiseq not installed. Using default kipoiseq dependencies for the dataloader: {}"
                                    .format(model_descr.default_dataloader.
                                            defined_as))
                                deps = deps.merge(KIPOISEQ_DEPS)
                            else:
                                logger.warning(
                                    "Unable to extract dataloader description. "
                                    "Make sure the package containing the dataloader `{}` is installed"
                                    .format(model_descr.default_dataloader.
                                            defined_as))
                else:
                    dataloader = os.path.normpath(
                        os.path.join(sub_model,
                                     str(model_descr.default_dataloader)))
                    logger.info("Inferred dataloader name: {0} from".format(
                        dataloader) + " the model.")
                    dataloader_descr = kipoi.get_dataloader_descr(
                        dataloader, parsed_source)
                    deps = deps.merge(dataloader_descr.dependencies)
    if dataloaders is not None or dataloaders:
        for dataloader in dataloaders:
            parsed_source, parsed_dataloader = parse_source_name(
                source, dataloader)
            sub_dataloaders = list_subcomponents(parsed_dataloader,
                                                 parsed_source, "dataloader")
            if len(sub_dataloaders) == 0:
                raise ValueError(
                    "Dataloader: {0} not found in source {1}".format(
                        parsed_dataloader, parsed_source))

            if len(sub_dataloaders) > 1:
                logger.info(
                    "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all"
                    .format(len(sub_dataloaders), parsed_dataloader))
            for sub_dataloader in sub_dataloaders:
                dataloader_descr = kipoi.get_dataloader_descr(
                    sub_dataloader, parsed_source)
                deps = deps.merge(dataloader_descr.dependencies)

    # add Kipoi to the dependencies
    deps = KIPOI_DEPS.merge(deps)

    if vep:
        # add vep dependencies
        logger.info("Adding the vep dependencies")
        deps = VEP_DEPS.merge(deps)

    if interpret:
        # add vep dependencies
        logger.info("Adding the interpret dependencies")
        deps = INTERPRET_DEPS.merge(deps)

    if gpu:
        logger.info("Using gpu-compatible dependencies")
        deps = deps.gpu()

    if platform == "darwin":
        logger.info("Using osx-type dependencies")
        deps = deps.osx()

    return deps
Ejemplo n.º 9
0
def merge_deps(models, dataloaders=None, source="kipoi", vep=False, gpu=False):
    """Setup the dependencies
    """
    deps = Dependencies()
    for model in models:
        logger.info("Loading model: {0} description".format(model))

        parsed_source, parsed_model = parse_source_name(source, model)

        sub_models = list_subcomponents(parsed_model, parsed_source, "model")
        if len(sub_models) == 0:
            raise ValueError("Model {0} not found in source {1}".format(
                parsed_model, parsed_source))
        if len(sub_models) > 1:
            logger.info(
                "Found {0} models under the model name: {1}. Merging dependencies for all"
                .format(len(sub_models), parsed_model))

        for sub_model in sub_models:
            model_descr = kipoi.get_model_descr(sub_model, parsed_source)
            model_dir = kipoi.get_source(parsed_source).get_model_dir(
                sub_model)
            deps = deps.merge(model_descr.dependencies)

            # handle the dataloader=None case
            if dataloaders is None or not dataloaders:
                if isinstance(model_descr.default_dataloader,
                              DataLoaderImport):
                    # dataloader specified by the import
                    deps = deps.merge(
                        model_descr.default_dataloader.dependencies)
                    if model_descr.default_dataloader.parse_dependencies:
                        # add dependencies specified in the yaml file
                        # load from the dataloader description if you can
                        try:
                            with cd(model_dir):
                                dataloader_descr = model_descr.default_dataloader.get(
                                )
                            deps = deps.merge(dataloader_descr.dependencies)
                        except ImportError as e:
                            # package providing the dataloader is not installed yet
                            if model_descr.default_dataloader.defined_as.startswith(
                                    "kipoiseq."):
                                logger.info(
                                    "kipoiseq not installed. Using default kipoiseq dependencies for the dataloader: {}"
                                    .format(model_descr.default_dataloader.
                                            defined_as))
                                deps = deps.merge(KIPOISEQ_DEPS)
                            else:
                                logger.warn(
                                    "Unable to extract dataloader description. "
                                    "Make sure the package containing the dataloader `{}` is installed"
                                    .format(model_descr.default_dataloader.
                                            defined_as))
                else:
                    dataloader = os.path.normpath(
                        os.path.join(sub_model,
                                     str(model_descr.default_dataloader)))
                    logger.info("Inferred dataloader name: {0} from".format(
                        dataloader) + " the model.")
                    dataloader_descr = kipoi.get_dataloader_descr(
                        dataloader, parsed_source)
                    deps = deps.merge(dataloader_descr.dependencies)
    if dataloaders is not None or dataloaders:
        for dataloader in dataloaders:
            parsed_source, parsed_dataloader = parse_source_name(
                source, dataloader)
            sub_dataloaders = list_subcomponents(parsed_dataloader,
                                                 parsed_source, "dataloader")
            if len(sub_dataloaders) == 0:
                raise ValueError(
                    "Dataloader: {0} not found in source {1}".format(
                        parsed_dataloader, parsed_source))

            if len(sub_dataloaders) > 1:
                logger.info(
                    "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all"
                    .format(len(sub_dataloaders), parsed_dataloader))
            for sub_dataloader in sub_dataloaders:
                dataloader_descr = kipoi.get_dataloader_descr(
                    sub_dataloader, parsed_source)
                deps = deps.merge(dataloader_descr.dependencies)

    # add Kipoi to the dependencies
    deps = KIPOI_DEPS.merge(deps)

    if vep:
        # add vep dependencies
        logger.info("Adding the vep dependencies")
        deps = VEP_DEPS.merge(deps)

    if gpu:
        logger.info("Using gpu-compatible dependencies")
        deps = deps.gpu()

    if platform == "darwin":
        logger.info("Using osx-type dependencies")
        deps = deps.osx()

    return deps
def get_dataloader_descr(model_name):
    dl_path = os.path.join(
        model_name,
        kipoi.get_model_descr(model_name).default_dataloader)
    return kipoi.get_dataloader_descr(dl_path)
Ejemplo n.º 11
0
def test__get_dl_bed_fields():
    model_dir = "examples/rbp/"
    assert (
        kipoi.postprocessing.variant_effects.utils.generic._get_dl_bed_fields(
            kipoi.get_dataloader_descr(model_dir,
                                       source="dir")) == ['intervals_file'])
Ejemplo n.º 12
0
def model_list(model_name):
    """ Models list view """
    from kipoi.utils import cd
    source = current_app.config['SOURCE']
    df = get_model_list(source)
    model_name = model_name.rstrip('/')
    vtype_path = get_view(model_name, df)

    if vtype_path is None:
        # run 404
        return
        # pass
    else:
        vtype, path = vtype_path

    # render the model detail view
    if vtype == "model":
        # Model info retrieved from kipoi
        model = kipoi.get_model_descr(model_name, source=source)
        src = kipoi.get_source(source)
        model_dir = kipoi.utils.relative_path(src.get_model_dir(model_name),
                                              src.local_path)
        model_url = github_dir_tree(src.remote_url, model_dir)
        # Model dataloaders info retrieved from kipoi
        if model.default_dataloader:
            if isinstance(model.default_dataloader, str):
                dl_rel_path = True
                dataloader = kipoi.get_dataloader_descr(os.path.join(
                    model_name, model.default_dataloader),
                                                        source=source)
                dataloader_name = model.default_dataloader
                dataloader_args = dataloader.args
            else:
                dl_rel_path = False
                with cd(src.get_model_dir(model_name)):
                    dataloader = model.default_dataloader.get()
                dataloader_name = model.default_dataloader.defined_as
                dataloader_args = OrderedDict([
                    (k, v) for k, v in dataloader.args.items()
                    if k not in list(model.default_dataloader.default_args) +
                    dl_skip_arguments.get(dataloader_name, [])
                ])

                if model.default_dataloader.defined_as == 'kipoiseq.dataloaders.SeqIntervalDl':
                    # HACK - cleanup some values for SeqIntervalDl
                    if model.default_dataloader.default_args.get(
                            "ignore_targets", False):
                        dataloader_args.pop('label_dtype', None)
        else:
            dataloader = None
            dataloader_name = ''
            dataloader_args = {}
            dl_rel_path = False

        title = model_name.split('/')
        # obtain snippets
        code_snippets = get_snippets(model_name, source)
        if model_name == "SeqVec/embedding2structure":
            code_snippets["docker"] = ''
            code_snippets["singularity"] = ''
            code_snippets["cli"] = ''
            code_snippets["python"] = ''
            code_snippets["R"] = ''

        # reading the README content
        readme_dir = kipoi.get_source(
            current_app.config['SOURCE']).get_model_dir(model_name)
        try:
            # python doesnt handle case sensetive path. so:
            filelists = os.listdir(readme_dir)
            readmeindx = [x.lower() for x in filelists].index("readme.md")
            filecontent = open(os.path.join(readme_dir, filelists[readmeindx]),
                               "r").read()
            readmecontent = render_markdown(filecontent)
            # remove the title because already there is a title
            readmecontent = re.sub("<[hH][12]>.*</[hH][12]>",
                                   "",
                                   readmecontent,
                                   count=1)
            readmecontent = Markup(readmecontent)
        except IOError:
            readmecontent = ""
        except ValueError:
            readmecontent = ""
        return render_template(
            "models/model_details.html",
            model_name=model_name,
            model=model,
            contributors=update_contributors(model.info.contributors,
                                             model.info.authors),
            authors=update_authors(model.info.authors, model.info.cite_as),
            dataloader=dataloader,
            dataloader_args=dataloader_args,
            dataloader_name=dataloader_name,
            model_url=model_url,
            dl_rel_path=dl_rel_path,
            cite_as=update_cite_as(model.info.cite_as),
            title=title,
            code_snippets=code_snippets,
            readmecontent=readmecontent,
            model_postprocessing=available_postprocessing(model_name))

    # run the normal model list view on a subsetted table
    elif vtype == "model_list":
        model_df = get_model_list(source)

        # TODO - augment the results

        # Filter the results
        model_df = model_df[model_df.model.str.contains("^" + path + "/")]

        filtered_models = model_df.to_dict(orient='records')
        filtered_models = [update_cite_as_dict(x) for x in filtered_models]

        # update contributors
        filtered_models = [
            update_contributors_as_dict(x) for x in filtered_models
        ]

        # update authors
        filtered_models = [update_authors_as_dict(x) for x in filtered_models]

        # get readme file
        readme_dir = os.path.join(
            kipoi.get_source(current_app.config['SOURCE']).local_path,
            model_name)
        try:
            filelists = os.listdir(readme_dir)
            readmeindx = [x.lower() for x in filelists].index("readme.md")
            filecontent = open(os.path.join(readme_dir, filelists[readmeindx]),
                               "r").read()
            readmecontent = render_markdown(filecontent)
        except IOError:
            readmecontent = ""
        except ValueError:
            readmecontent = ""

        return render_template("models/index.html",
                               models=filtered_models,
                               readmecontent=readmecontent)

    # redirect to the group list
    elif vtype == "group_list":
        return redirect(url_for('models.list_groups', group_name=path))
Ejemplo n.º 13
0
def merge_deps(models, dataloaders=None, source="kipoi", vep=False, gpu=False):
    """Setup the dependencies
    """
    deps = Dependencies()
    for model in models:
        logger.info("Loading model: {0} description".format(model))

        parsed_source, parsed_model = parse_source_name(source, model)

        sub_models = list_subcomponents(parsed_model, parsed_source, "model")
        if len(sub_models) == 0:
            raise ValueError("Model {0} not found in source {1}".format(
                parsed_model, parsed_source))
        if len(sub_models) > 1:
            logger.info(
                "Found {0} models under the model name: {1}. Merging dependencies for all"
                .format(len(sub_models), parsed_model))

        for sub_model in sub_models:
            model_descr = kipoi.get_model_descr(sub_model, parsed_source)
            deps = deps.merge(model_descr.dependencies)

            # handle the dataloader=None case
            if dataloaders is None or not dataloaders:
                dataloader = os.path.normpath(
                    os.path.join(sub_model, model_descr.default_dataloader))
                logger.info(
                    "Inferred dataloader name: {0} from".format(dataloader) +
                    " the model.")
                dataloader_descr = kipoi.get_dataloader_descr(
                    dataloader, parsed_source)
                deps = deps.merge(dataloader_descr.dependencies)
    if dataloaders is not None or dataloaders:
        for dataloader in dataloaders:
            parsed_source, parsed_dataloader = parse_source_name(
                source, dataloader)
            sub_dataloaders = list_subcomponents(parsed_dataloader,
                                                 parsed_source, "dataloader")
            if len(sub_dataloaders) == 0:
                raise ValueError(
                    "Dataloader: {0} not found in source {1}".format(
                        parsed_dataloader, parsed_source))

            if len(sub_dataloaders) > 1:
                logger.info(
                    "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all"
                    .format(len(sub_dataloaders), parsed_dataloader))
            for sub_dataloader in sub_dataloaders:
                dataloader_descr = kipoi.get_dataloader_descr(
                    sub_dataloader, parsed_source)
                deps = deps.merge(dataloader_descr.dependencies)

    # add Kipoi to the dependencies
    deps = KIPOI_DEPS.merge(deps)

    if vep:
        # add vep dependencies
        logger.info("Adding the vep dependencies")
        deps = VEP_DEPS.merge(deps)

    if gpu:
        logger.info("Using gpu-compatible dependencies")
        deps = deps.gpu()

    if platform == "darwin":
        logger.info("Using osx-type dependencies")
        deps = deps.osx()

    return deps