Example #1
0
 def get_ontology_hpo(self, conf, output, riot):
     hpo_filename = self.download_converted_file(conf.etl.hpo, output, riot)
     hpo = HPO(hpo_filename)
     hpo.generate()
     create_output_dir(output.prod_dir + "/" + conf.etl.hpo.path)
     hpo.save_hpo(output.prod_dir + "/" + conf.etl.hpo.path + "/" +
                  conf.etl.hpo.output_filename)
Example #2
0
 def get_ontology_mondo(self, conf, output, riot):
     mondo_filename = self.download_converted_file(conf.etl.mondo, output,
                                                   riot)
     mondo = MONDO(mondo_filename)
     mondo.generate()
     create_output_dir(output.prod_dir + "/" + conf.etl.mondo.path)
     mondo.save_mondo(output.prod_dir + "/" + conf.etl.mondo.path + "/" +
                      conf.etl.mondo.output_filename)
Example #3
0
 def get_hpo_phenotypes(self, conf, output):
     hpo_pheno_filename = Downloads.dowload_staging_http(
         output.staging_dir, conf.etl.hpo_phenotypes)
     hpo_phenotypes = HPOPhenotypes(hpo_pheno_filename)
     create_output_dir(output.prod_dir + "/" + conf.etl.hpo_phenotypes.path)
     hpo_phenotypes.run(output.prod_dir + "/" +
                        conf.etl.hpo_phenotypes.path + "/" +
                        conf.etl.hpo_phenotypes.output_filename)
 def create_output_structure(self, output_dir):
     """By default the directories prod and staging are created"""
     remove_output_dir(
         output_dir) if self.args.force_clean else logger.info(
             "Warning: Output not deleted.")
     self.yaml.outputs.prod_dir = create_output_dir(output_dir + '/prod')
     self.yaml.outputs.staging_dir = create_output_dir(output_dir +
                                                       '/staging')
Example #5
0
 def process(self, conf, output, cmd_conf):
     riot = Riot(cmd_conf)
     filename_input = Downloads.dowload_staging_http(
         output.staging_dir, conf.etl)
     file_ouput_path = os.path.join(output.prod_dir, conf.etl.path)
     create_output_dir(file_ouput_path)
     riot.convert_owl_to_jsonld(filename_input, file_ouput_path,
                                conf.etl.owl_jq)
Example #6
0
 def get_project_scores(self, project_score_entry, output):
     logger.info("Downloading project scores target files")
     # we only want one file from a zipped archive
     file_of_interest = 'EssentialityMatrices/04_binaryDepScores.tsv'
     file_input = Downloads.dowload_staging_http(output.staging_dir,
                                                 project_score_entry)
     output_dir = os.path.join(output.prod_dir, project_score_entry.path)
     create_output_dir(output_dir)
     extract_file_from_zip(file_of_interest, file_input, output_dir)
Example #7
0
 def get_ontology_EFO(self, conf, output, riot):
     efo_filename = self.download_converted_file(conf.etl.efo, output, riot)
     efo = EFO(efo_filename)
     efo.generate()
     create_output_dir(output.prod_dir + "/" + conf.etl.efo.path)
     efo.save_static_disease_file(output.prod_dir + "/" +
                                  conf.etl.efo.path + "/" +
                                  conf.etl.efo.diseases_static_file)
     efo.save_diseases(output.prod_dir + "/" + conf.etl.efo.path + "/" +
                       conf.etl.efo.output_filename)
 def process(self, conf, output, cmd_conf):
     download = DownloadResource(output.staging_dir)
     uri_release = conf.uri.replace("{release}", str(conf.release))
     create_output_dir(
         os.path.join(output.prod_dir, conf.path, str(conf.release)))
     jq_cmd = Utils.check_path_command("jq", cmd_conf.jq)
     for species in conf.resources:
         logger.debug(f'Downloading files for {species}')
         filename_json = self.download_species(uri_release, conf.release,
                                               output.staging_dir, download,
                                               species)
         self.extract_fields_from_json(filename_json, conf, output, jq_cmd)
Example #9
0
 def _download_selected_event_files(self, repo_metadata, output):
     downloaded_files = dict()
     # Body
     if repo_metadata:
         logger.info("OpenFDA FAERs metadata received")
         fda_output = create_output_dir(
             os.path.join(output.prod_dir, "fda-inputs"))
         fda = OpenfdaHelper(fda_output)
         # Parallel data gathering
         logger.info("Prepare download pool of {} processes".format(
             mp.cpu_count()))
         download_pool = mp.Pool(mp.cpu_count())
         logger.info(mp.current_process())
         try:
             for _ in tqdm.tqdm(download_pool.map(
                     fda._do_download_openfda_event_file,
                     repo_metadata['results']['drug']['event']
                 ['partitions']),
                                total=len(repo_metadata['results']['drug']
                                          ['event']['partitions'])):
                 logger.info('\rdone {0:%}'.format(
                     _ / len(repo_metadata['results']['drug']['event']
                             ['partitions'])))
         except Exception as e:
             logger.info("Something went wrong: " + str(e))
     return downloaded_files
Example #10
0
 def get_normal_tissues(self, output, resource):
     filename = Downloads.dowload_staging_http(output.staging_dir, resource)
     filename_unzip = make_unzip_single_file(filename)
     gzip_filename = os.path.join(
         create_output_dir(os.path.join(output.prod_dir, resource.path)),
         resource.output_filename.replace('{suffix}', self.suffix))
     make_gzip(filename_unzip, gzip_filename)
Example #11
0
 def get_gnomad(self, gnomad, output):
     filename = Downloads.dowload_staging_http(output.staging_dir, gnomad)
     filename_unzip = make_ungzip(filename)
     gzip_filename = os.path.join(
         create_output_dir(os.path.join(output.prod_dir, gnomad.path)),
         gnomad.output_filename)
     make_gzip(filename_unzip, gzip_filename)
Example #12
0
 def save_tissue_translation_map(self, output_path, resource, filename):
     tissues_json = {}
     with URLZSource(filename).open(mode='rb') as r_file:
         tissues_json['tissues'] = json.load(r_file)['tissues']
     r_file.close()
     create_output_dir(os.path.join(output_path, resource.path))
     filename_tissue = os.path.join(
         output_path, resource.path,
         resource.output_filename.replace('{suffix}', self.suffix))
     with jsonlines.open(filename_tissue, mode='w') as writer:
         for item in tissues_json['tissues']:
             entry = {
                 k: v
                 for k, v in tissues_json['tissues'][item].items()
             }
             entry['tissue_id'] = item
             writer.write(entry)
Example #13
0
 def get_subcellular_location(self, sub_location, output):
     filename = Downloads.dowload_staging_http(output.staging_dir,
                                               sub_location)
     filename_unzip = make_unzip_single_file(filename)
     gzip_filename = os.path.join(
         create_output_dir(os.path.join(output.prod_dir,
                                        sub_location.path)),
         sub_location.output_filename)
     make_gzip(filename_unzip, gzip_filename)
Example #14
0
 def extract_ensembl(self, ensembl, output, cmd):
     logger.info("Converting Ensembl json file into jsonl.")
     jq_cmd = Utils.check_path_command("jq", cmd.jq)
     resource_stage = Dict()
     resource_stage.uri = ensembl.uri.replace('{release}',
                                              str(ensembl.release))
     file_input = Downloads.dowload_staging_ftp(output.staging_dir,
                                                resource_stage)
     output_dir = os.path.join(output.prod_dir, ensembl.path)
     output_file = os.path.join(create_output_dir(output_dir),
                                ensembl.output_filename)
     with open(output_file, "wb") as jsonwrite:
         jqp = subprocess.Popen([jq_cmd, "-c", ensembl.jq, file_input],
                                stdout=subprocess.PIPE)
         jsonwrite.write(jqp.stdout.read())
Example #15
0
 def owl_to_json(self, filename_input, output_dir, resource, riot):
     file_ouput_path = output_dir + "/" + resource.path
     create_output_dir(file_ouput_path)
     return riot.convert_owl_to_jsonld(filename_input, file_ouput_path,
                                       resource.owl_jq)
Example #16
0
 def download_indices(self, conf, output):
     output_dir = create_output_dir(output.prod_dir+"/" + conf.etl.chembl.path)
     es_files_written = self._handle_elasticsearch(conf.etl.chembl, output_dir)
     return es_files_written