def main(argv=sys.argv[1:], gpf_instance=None):
    if gpf_instance is None:
        gpf_instance = GPFInstance()

    argv = parse_cli_arguments(argv, gpf_instance)

    genotype_storage_db = gpf_instance.genotype_storage_db
    genotype_storage = genotype_storage_db.get_genotype_storage(
        argv.genotype_storage
    )
    if not genotype_storage or (
            genotype_storage and not genotype_storage.is_impala()):
        print("missing or non-impala genotype storage")
        return

    assert os.path.exists(argv.variants)

    study_config = genotype_storage.impala_load_dataset(
        argv.study_id, argv.variants, argv.pedigree)

    if argv.study_config:
        input_config = GPFConfigParser.load_config_raw(argv.study_config)
        study_config = recursive_dict_update(study_config, input_config)

    study_config = StudyConfigBuilder(study_config).build_config()
    assert study_config is not None
    save_study_config(
        gpf_instance.dae_config, argv.study_id, study_config,
        force=argv.force)
Beispiel #2
0
    def simple_study_import(
        self,
        study_id,
        families_loader=None,
        variant_loaders=None,
        study_config=None,
        **kwargs,
    ):

        families_config = self._import_families_file(study_id, families_loader)
        variants_config = self._import_variants_files(study_id,
                                                      variant_loaders)

        config_dict = {
            "id": study_id,
            "conf_dir": ".",
            "has_denovo": False,
            "has_cnv": False,
            "genotype_storage": {
                "id": self.id,
                "files": {
                    "variants": variants_config,
                    "pedigree": families_config,
                },
            },
            "genotype_browser": {
                "enabled": True
            },
        }
        if not variant_loaders:
            config_dict["genotype_browser"]["enabled"] = False
        else:
            variant_loaders[0].get_attribute("source_type")
            if any([
                    loader.get_attribute("source_type") == "denovo"
                    for loader in variant_loaders
            ]):
                config_dict["has_denovo"] = True
            if any([
                    loader.get_attribute("source_type") == "cnv"
                    for loader in variant_loaders
            ]):
                config_dict["has_denovo"] = True
                config_dict["has_cnv"] = True

        if study_config is not None:
            study_config_dict = GPFConfigParser.load_config_raw(study_config)
            config_dict = recursive_dict_update(config_dict, study_config_dict)

        config_builder = StudyConfigBuilder(config_dict)
        return config_builder.build_config()
Beispiel #3
0
    def simple_study_import(self,
                            study_id,
                            families_loader=None,
                            variant_loaders=None,
                            study_config=None,
                            output=".",
                            include_reference=False):

        variants_dir = None
        has_denovo = False
        has_cnv = False
        bucket_index = 0

        if variant_loaders:
            for index, variant_loader in enumerate(variant_loaders):
                assert isinstance(variant_loader, VariantsLoader), \
                    type(variant_loader)

                if variant_loader.get_attribute("source_type") == "denovo":
                    has_denovo = True

                if variant_loader.get_attribute("source_type") == "cnv":
                    has_denovo = True
                    has_cnv = True

                if variant_loader.transmission_type == \
                        TransmissionType.denovo:
                    assert index < 100

                    bucket_index = index  # denovo buckets < 100
                elif variant_loader.transmission_type == \
                        TransmissionType.transmitted:
                    bucket_index = index + 100  # transmitted buckets >=100

                variants_dir = os.path.join(output, "variants")
                partition_description = NoPartitionDescriptor(variants_dir)

                ParquetManager.variants_to_parquet(
                    variant_loader,
                    partition_description,
                    # parquet_filenames.variants,
                    bucket_index=bucket_index,
                    include_reference=include_reference)

        pedigree_filename = os.path.join(output, "pedigree",
                                         "pedigree.parquet")
        families = families_loader.load()
        ParquetManager.families_to_parquet(families, pedigree_filename)

        config_dict = self.impala_load_dataset(study_id,
                                               variants_dir=variants_dir,
                                               pedigree_file=pedigree_filename)

        config_dict["has_denovo"] = has_denovo
        config_dict["has_cnv"] = has_cnv

        if study_config is not None:
            study_config_dict = GPFConfigParser.load_config_raw(study_config)
            config_dict = recursive_dict_update(config_dict, study_config_dict)

        config_builder = StudyConfigBuilder(config_dict)

        return config_builder.build_config()