コード例 #1
0
    def _load_and_process_data(self):

        for sub_type in self.data_type_config.get_sub_type_objects():

            species_encoded = urllib.parse.quote_plus(\
                    ETLHelper.species_lookup_by_data_provider(sub_type.get_data_provider()))

            commit_size = self.data_type_config.get_neo4j_commit_size()
            #batch_size = self.data_type_config.get_generator_batch_size()
            batch_size = 100000

            generators = self.get_generators(sub_type, batch_size,
                                             species_encoded)

            query_template_list = [
                [
                    self.geo_xref_query_template, commit_size,
                    "geo_xref_data_" + sub_type.get_data_provider() + ".csv"
                ],
            ]

            query_and_file_list = self.process_query_params(
                query_template_list)
            CSVTransactor.save_file_static(generators, query_and_file_list)
            Neo4jTransactor.execute_query_batch(query_and_file_list)
コード例 #2
0
    def save_descriptions_report_files(self, data_provider, json_desc_writer, context_info, gd_data_manager):
        """Save Descripitons Report Files"""

        release_version = ".".join(context_info.env["ALLIANCE_RELEASE"].split(".")[0:2])
        json_desc_writer.overall_properties.species = data_provider
        json_desc_writer.overall_properties.release_version = release_version
        json_desc_writer.overall_properties.date = self.cur_date
        file_name = self.cur_date + "_" + data_provider
        file_path = os.path.join("tmp", file_name)
        json_desc_writer.write_json(file_path=file_path + ".json",
                                    pretty=True,
                                    include_single_gene_stats=True,
                                    data_manager=gd_data_manager)
        json_desc_writer.write_plain_text(file_path=file_path + ".txt")
        readme = "This file contains the following fields: gene ID, gene name, and gene description. The gene " \
                 "descriptions are generated by an algorithm developed by the Alliance that uses highly structured " \
                 "gene data such as associations to various ontology terms (e.g., Gene Ontology terms) and the " \
                 "Alliance strict orthology set. The original set of ontology terms that a gene is annotated to may " \
                 "have been trimmed to an ancestor term in the ontology, in order to balance readability with the " \
                 "amount of information in the description. The complete set of annotations to any gene in this file " \
                 "may be found in the relevant data tables on the Alliance gene page."
        species = ETLHelper.species_lookup_by_data_provider(data_provider)
        taxon_id = ETLHelper.get_taxon_from_mod(data_provider)
        header = create_header(file_type='Gene Descriptions', database_version=context_info.env["ALLIANCE_RELEASE"],
                               data_format='txt', readme=readme, species=species, taxon_ids='# TaxonIDs:NCBITaxon:' +
                                                                                            taxon_id)
        header = "\n".join([line.strip() for line in header.splitlines() if len(line.strip()) != 0])
        self.add_header_to_file(file_path=file_path + ".txt", header=header)
        json_desc_writer.write_tsv(file_path=file_path + ".tsv")
        header = create_header(file_type='Gene Descriptions', database_version=context_info.env["ALLIANCE_RELEASE"],
                               data_format='tsv', readme=readme, species=species, taxon_ids='# TaxonIDs:NCBITaxon:' +
                                                                                            taxon_id)
        header = "\n".join([line.strip() for line in header.splitlines() if len(line.strip()) != 0])
        self.add_header_to_file(file_path=file_path + ".tsv", header=header)
        if context_info.env["GENERATE_REPORTS"]:
            self.upload_files_to_fms(file_path, context_info, data_provider, self.logger)