Python Gene._entrez_ensembl_lookupの例

プログラミング言語: Python

名前空間/パッケージ名: data_pipeline.helper.gene

クラス/型: Gene

メソッド/関数: _entrez_ensembl_lookup

hotexamples.comのコード掲載数: 3

Python Gene._entrez_ensembl_lookup - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdata_pipeline.helper.gene.Gene._entrez_ensembl_lookupの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

_check_gene_history(3)

_entrez_ensembl_lookup(3)

gene_history_mapping(2)

gene_mapping(2)

_convert_entrezid2ensembl(1)

_ensembl_entrez_lookup(1)

_replace_oldids_with_newids(1)

ensembl_gene_parse(1)

ensmart_gene_parse(1)

ensmart_homolog_parse(1)

gene2ensembl_parse(1)

gene_history_parse(1)

gene_info_parse(1)

gene_pub_parse(1)

コード例 #1

ファイルを表示

ファイル: gene_pathways.py プロジェクト: premanand17/django-data-pipeline

    def _process_pathway(cls, download_file, stage_output_file, section, source, is_public, config=None):
        '''Function to parse the pathway input files eg: kegg, reactome, go
        INPUT file format:
        Pathway name \t Pathyway url \t List of entrez ids
        REACTOME_RNA_POL_I_TRANSCRIPTION_TERMINATION
        http://www.broadinstitute.org/gsea/msigdb/cards/REACTOME_RNA_POL_I_TRANSCRIPTION_TERMINATION1022
        2068    2071    25885    284119    2965    2966    2967    2968    4331

        The entrez ids are converted to ensembl ids and logs are written to track the conversion rates (LESS/MORE/EQUAL)
        '''
        json_target_file_path = stage_output_file.replace(".out", ".json")
        json_target_file = open(json_target_file_path, mode='w', encoding='utf-8')
        json_target_file.write('{"docs":[\n')

        count = 0
        tmp_row_count_file = open(download_file, encoding='utf-8')
        row_count = sum(1 for row in tmp_row_count_file)
        logger.debug('Number of lines in the file ' + str(row_count))

        load_mapping = True

        gene_sets = []
        with open(download_file, encoding='utf-8') as csvfile:
            reader = csv.reader(csvfile, delimiter='\t', quoting=csv.QUOTE_NONE)
            for row in reader:
                gene_sets.extend(row[2:])
        csvfile.close()
        ens_look_up = Gene._entrez_ensembl_lookup(gene_sets, section, config)

        with open(download_file, encoding='utf-8') as csvfile:
            reader = csv.reader(csvfile, delimiter='\t', quoting=csv.QUOTE_NONE)

            for row in reader:
                path_object = dict()
                pathway_name = row[0]
                pathway_url = row[1]
                gene_sets = row[2:]

                converted_genesets = [ens_look_up[entrez] for entrez in gene_sets if entrez in ens_look_up]
                path_object["pathway_name"] = pathway_name
                path_object["pathway_url"] = pathway_url
                path_object["gene_sets"] = converted_genesets
                path_object["source"] = source
                path_object["is_public"] = is_public
                json_target_file.write(json.dumps(path_object))
                count += 1
                if row_count == count:
                    json_target_file.write('\n')
                else:
                    json_target_file.write(',\n')

            json_target_file.write('\n]}')

        logger.debug("No. genes to load "+str(count))
        logger.debug("Json written to " + json_target_file_path)
        logger.debug("Load mappings")

        if load_mapping:
            status = cls._load_pathway_mappings(section)
            print(status)

コード例 #2

ファイルを表示

ファイル: test_compare_newcriteria_results_with_old_mart.py プロジェクト: D-I-L/django-criteria

    def get_ensemb_ids(self, entrez_list):
        config = {}
        section = {}
        section['index'] = 'genes_hg38_v0.0.2'
        section['index_type'] = 'gene_history'
        config['GENE_HISTORY'] = section

        result_dict = Gene._entrez_ensembl_lookup(entrez_list, section, config)
        return result_dict

コード例 #3

ファイルを表示

ファイル: gene_interactions.py プロジェクト: premanand17/django-data-pipeline

    def _process_bioplex(cls, download_file, stage_output_file, section, config):
        '''Function to process bioplex data files. Interactors are in first two columns, they are converted to
        ensembl ids and stored in temperory.out files
        Input File format:
        GeneA    GeneB    UniprotA    UniprotB    SymbolA    SymbolB    pW    pNI    pInt
        100    728378    P00813    A5A3E0    ADA    POTEF    2.38086E-09    0.000331856    0.999668142
        100    345651    P00813    Q562R1    ADA    ACTBL2    9.79E-18    0.211914437    0.788085563

        Output file format:
        interactorA    interactorB
        ENSG00000196839    ENSG00000196604
        ENSG00000196839    ENSG00000169067
        '''
        stage_output_file_handler = open(stage_output_file, 'w')
        mapped_counter = 0
        unmapped_ids = []
        stage_output_file_handler.write('interactorA' + '\t' + 'interactorB\n')

        gene_sets = []
        with open(download_file, encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile, delimiter='\t', quoting=csv.QUOTE_NONE)
            for row in reader:
                gene_sets.extend([row['GeneA'], row['GeneB']])
        csvfile.close()

        ens_look_up = Gene._entrez_ensembl_lookup(gene_sets, section, config)

        with open(download_file, encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile, delimiter='\t', quoting=csv.QUOTE_NONE)
            for row in reader:
                interactor_a = row['GeneA']
                interactor_b = row['GeneB']
                if interactor_a in ens_look_up and interactor_b in ens_look_up:
                    line = ens_look_up[interactor_a] + '\t' + ens_look_up[interactor_b] + '\n'
                    stage_output_file_handler.write(line)
                    mapped_counter += 1
                else:
                    line = interactor_a + '\t' + interactor_b + '\n'
                    unmapped_ids.append(interactor_a)
                    unmapped_ids.append(interactor_b)

        logger.debug("\n".join(unmapped_ids))
        logger.debug("Mapped {}  Unmapped {} " . format(mapped_counter, len(unmapped_ids)))

        stage_output_file_handler.close()
        cls._process_interaction_out_file(stage_output_file, section, False)