def output(self): """ Returns the full parquet path as an output for the Luigi Task (e.g. impc/dr15.2/parquet/mgi_markers_parquet) """ return ImpcConfig().get_target( f"{self.output_path}mgi_markers_parquet")
def output(self): """ Returns the full parquet path as an output for the Luigi Task (e.g. impc/dr15.2/output/genotype_phenotype_parquet) """ return ImpcConfig().get_target( f"{self.output_path}genotype_phenotype_parquet")
def output(self): self.output_path = ( self.output_path + "/" if not self.output_path.endswith("/") else self.output_path ) if self.extract_windowed_data == "true": return ImpcConfig().get_target( f"{self.output_path}open_stats_parquet_with_windowing_data" ) elif self.raw_data_in_output == "include": return ImpcConfig().get_target( f"{self.output_path}open_stats_parquet_with_raw_data" ) else: return ImpcConfig().get_target(f"{self.output_path}open_stats_parquet")
def output(self): """ Returns the full parquet path as an output for the Luigi Task (e.g. impc/dr15.2/parquet/colonies_tracking_clean_parquet) """ return ImpcConfig().get_target( f"{self.output_path}colonies_tracking_clean_parquet")
def output(self): self.output_path = ( self.output_path + "/" if not self.output_path.endswith("/") else self.output_path ) return ImpcConfig().get_target(f"{self.output_path}mgi_phenotype_parquet")
def output(self): """ Returns the full parquet path as an output for the Luigi Task (e.g. impc/dr15.2/parquet/product_report_parquet) """ return ImpcConfig().get_target( f"{self.output_path}product_report_raw_parquet")
def output(self): """ Returns the full parquet path as an output for the Luigi Task (e.g. impc/dr15.2/parquet/impc_ontology_metadata_parquet) """ return ImpcConfig().get_target( f"{self.output_path}impc_ontology_metadata_parquet")
def output(self): """ Returns the full parquet path as an output for the Luigi Task (e.g. impc/dr15.2/parquet/mouse_specimen_clean_parquet) """ return ImpcConfig().get_target( f"{self.output_path}{self.specimen_type}_specimen_clean_parquet")
def output(self): """ Returns the full parquet path as an output for the Luigi Task (e.g. impc/dr15.2/parquet/product_report_parquet) """ return ImpcConfig().get_target( f"{self.output_path}/impc_web_api/gene_diseases_json")
def output(self): """ Returns the full parquet path as an output for the Luigi Task (e.g. impc/dr15.2/parquet/specimen_level_experiment_cross_ref_parquet) """ return ImpcConfig().get_target( f"{self.output_path}specimen_level_experiment_cross_ref_parquet")
def output(self): """ Returns the full parquet path as an output for the Luigi Task (e.g. impc/dr16.0/parquet/stats_analysis_out_parquet) """ return ImpcConfig().get_target( f"{self.output_path}stats_analysis_out_parquet")
def output(self): """ Returns the full parquet path as an output for the Luigi Task (e.g. impc/dr15.2/parquet/gene_histopath_json) """ return ImpcConfig().get_target( f"{self.output_path}/impc_web_api/gene_histopath_json" )
def output(self): self.output_path = ( self.output_path + "/" if not self.output_path.endswith("/") else self.output_path ) return ImpcConfig().get_target( f"{self.output_path}imits_{self.entity_type.lower()}_raw_parquet" )
def output(self): self.output_path = ( self.output_path + "/" if not self.output_path.endswith("/") else self.output_path ) return ImpcConfig().get_target( f"{self.output_path}{self.entity_type}_normalized_parquet" )
def app_options(self): """ Generates the options pass to the PySpark job """ return [ self.obo_ontology_input_path, ImpcConfig().deploy_mode, self.output().path, ]
def output(self): """ Returns the full parquet path as an output for the Luigi Task (e.g. impc/dr15.2/parquet/line_level_experiment_cross_ref_parquet) """ self.output_path = (self.output_path + "/" if not self.output_path.endswith("/") else self.output_path) return ImpcConfig().get_target( f"{self.output_path}line_level_experiment_cross_ref_parquet")
def extract_ontology_terms(spark_session: SparkSession) -> DataFrame: """ :param spark_session: :param ontologies_path: :return: """ ontology_terms = [] if ImpcConfig().deploy_mode in ["local", "client"]: for ontology_desc in ONTOLOGIES: print( f"Processing {ontology_desc['id']}.{ontology_desc['format']}") ontology: Ontology = pronto.Ontology.from_obo_library( f"{ontology_desc['id']}.{ontology_desc['format']}") part_of_rel: Relationship = None for rel in ontology.relationships(): if rel.id == "part_of": part_of_rel = rel break if part_of_rel is not None: part_of_rel.transitive = False print("Starting to compute super classes from part_of") for term in ontology.terms(): for super_part_term in term.objects(part_of_rel): if super_part_term.id in ontology.keys(): term.superclasses().add(super_part_term) print("Finished to compute super classes from part_of") top_level_terms = [ ontology[term] for term in ontology_desc["top_level_terms"] ] top_level_ancestors = [] for top_level_term in top_level_terms: top_level_ancestors.extend( top_level_term.superclasses(with_self=False)) top_level_ancestors = set(top_level_ancestors) ontology_terms += [ _parse_ontology_term(term, top_level_terms, top_level_ancestors, part_of_rel) for term in ontology.terms() if term.name is not None ] print( f"Finished processing {ontology_desc['id']}.{ontology_desc['format']}" ) ontology_terms_json = spark_session.sparkContext.parallelize( ontology_terms) ontology_terms_df = spark_session.read.json(ontology_terms_json, schema=ONTOLOGY_SCHEMA, mode="FAILFAST") return ontology_terms_df
def output(self): self.output_path = (self.output_path + "/" if not self.output_path.endswith("/") else self.output_path) return ImpcConfig().get_target( f"{self.output_path}impress_parameter_parquet")
def output(self): return ImpcConfig().get_target(f"{self.output_path}gene_bundle_parquet")
def output(self): """ Returns the full parquet path as an output for the Luigi Task (e.g. impc/dr16.0/parquet/allele_ref_parquet) """ return ImpcConfig().get_target(f"{self.output_path}allele_ref_parquet")
def output(self): return ImpcConfig().get_target(f"{self.output_path}/pain-xml/")
def output(self): self.output_path = (self.output_path + "/" if not self.output_path.endswith("/") else self.output_path) return ImpcConfig().get_target( f"{self.output_path}impc_images_core_parquet")
def output(self): self.output_path = (self.output_path + "/" if not self.output_path.endswith("/") else self.output_path) return ImpcConfig().get_target( f"{self.output_path}flatten_observations_parquet")
def output(self): self.output_path = (self.output_path + "/" if not self.output_path.endswith("/") else self.output_path) return ImpcConfig().get_target(f"{self.output_path}mp_chooser_json")
def output(self): return ImpcConfig().get_target("done!")
def output(self): return ImpcConfig().get_target(f"{self.output_path}batch_query_parquet")
def output(self): self.output_path = (self.output_path + "/" if not self.output_path.endswith("/") else self.output_path) return ImpcConfig().get_target( f"{self.output_path}images_pipeline_input_csv")