def main(argv=sys.argv[1:], gpf_instance=None): if gpf_instance is None: gpf_instance = GPFInstance() argv = parse_cli_arguments(argv, gpf_instance) genotype_storage_db = gpf_instance.genotype_storage_db genotype_storage = genotype_storage_db.get_genotype_storage( argv.genotype_storage ) if not genotype_storage or ( genotype_storage and not genotype_storage.is_impala()): print("missing or non-impala genotype storage") return assert os.path.exists(argv.variants) study_config = genotype_storage.impala_load_dataset( argv.study_id, argv.variants, argv.pedigree) if argv.study_config: input_config = GPFConfigParser.load_config_raw(argv.study_config) study_config = recursive_dict_update(study_config, input_config) study_config = StudyConfigBuilder(study_config).build_config() assert study_config is not None save_study_config( gpf_instance.dae_config, argv.study_id, study_config, force=argv.force)
def simple_study_import( self, study_id, families_loader=None, variant_loaders=None, study_config=None, **kwargs, ): families_config = self._import_families_file(study_id, families_loader) variants_config = self._import_variants_files(study_id, variant_loaders) config_dict = { "id": study_id, "conf_dir": ".", "has_denovo": False, "has_cnv": False, "genotype_storage": { "id": self.id, "files": { "variants": variants_config, "pedigree": families_config, }, }, "genotype_browser": { "enabled": True }, } if not variant_loaders: config_dict["genotype_browser"]["enabled"] = False else: variant_loaders[0].get_attribute("source_type") if any([ loader.get_attribute("source_type") == "denovo" for loader in variant_loaders ]): config_dict["has_denovo"] = True if any([ loader.get_attribute("source_type") == "cnv" for loader in variant_loaders ]): config_dict["has_denovo"] = True config_dict["has_cnv"] = True if study_config is not None: study_config_dict = GPFConfigParser.load_config_raw(study_config) config_dict = recursive_dict_update(config_dict, study_config_dict) config_builder = StudyConfigBuilder(config_dict) return config_builder.build_config()
def simple_study_import(self, study_id, families_loader=None, variant_loaders=None, study_config=None, output=".", include_reference=False): variants_dir = None has_denovo = False has_cnv = False bucket_index = 0 if variant_loaders: for index, variant_loader in enumerate(variant_loaders): assert isinstance(variant_loader, VariantsLoader), \ type(variant_loader) if variant_loader.get_attribute("source_type") == "denovo": has_denovo = True if variant_loader.get_attribute("source_type") == "cnv": has_denovo = True has_cnv = True if variant_loader.transmission_type == \ TransmissionType.denovo: assert index < 100 bucket_index = index # denovo buckets < 100 elif variant_loader.transmission_type == \ TransmissionType.transmitted: bucket_index = index + 100 # transmitted buckets >=100 variants_dir = os.path.join(output, "variants") partition_description = NoPartitionDescriptor(variants_dir) ParquetManager.variants_to_parquet( variant_loader, partition_description, # parquet_filenames.variants, bucket_index=bucket_index, include_reference=include_reference) pedigree_filename = os.path.join(output, "pedigree", "pedigree.parquet") families = families_loader.load() ParquetManager.families_to_parquet(families, pedigree_filename) config_dict = self.impala_load_dataset(study_id, variants_dir=variants_dir, pedigree_file=pedigree_filename) config_dict["has_denovo"] = has_denovo config_dict["has_cnv"] = has_cnv if study_config is not None: study_config_dict = GPFConfigParser.load_config_raw(study_config) config_dict = recursive_dict_update(config_dict, study_config_dict) config_builder = StudyConfigBuilder(config_dict) return config_builder.build_config()