def __init__(self, email: str, query_str: str, outprefix: str) -> None: """ Initialize class :param gene_list: """ self.paths = PhenoXPaths(outprefix) self.query_str = query_str self.email = email
def __init__(self, cluster_dict: Dict, meta_dict: Dict, outprefix: str) -> None: """ Initialize class :param cluster_dict: key=cluster_ids, value=list of GDS ids :param meta_dict: key=gds_ids, value=list of gds metrics (n_samples, dates, GPLs) :param meta_list: index list of gds categories """ self.paths = PhenoXPaths(outprefix) self.clusters = cluster_dict self.num_clusters = len(cluster_dict) self.meta_dict = meta_dict self.meta_list = ('Sample N', 'Submission Age', 'GPL')
def __init__(self, outprefix: str): """ Initialize MeSH search tool """ paths = PhenoXPaths(outprefix) mesh_json_path = os.path.join(paths.data_dir, 'mesh.json') self.mesh = dict() if not os.path.exists(mesh_json_path): mesh_bin_file = glob.glob(os.path.join(paths.data_dir, '*.bin')) if mesh_bin_file: self._parse_mesh_bin(mesh_bin_file[0], mesh_json_path) self.mesh = json.load(open(mesh_json_path, 'r'))
def __init__(self, outprefix, term, email, tool="phenotypeXpression", efetch_batch=5000, elink_batch=100): Entrez.email = email Entrez.tool = tool self.efetch_batch = efetch_batch self.elink_batch = elink_batch self.db = 'geoprofiles' self.paths = PhenoXPaths(outprefix) term_name = term.replace(' ', '-') self.hcluster_file = os.path.join( self.paths.output_dir, "{}_{}_hierarchical_clusters.pdf".format(self.paths.outprefix, term_name) ) self.tree_file = os.path.join( self.paths.output_dir, "{}_{}_newick_tree.txt".format(self.paths.outprefix, term_name) ) self.heatmap_file = os.path.join( self.paths.output_dir, "{}_{}_heatmap.pdf".format(self.paths.outprefix, term_name) ) self.dist_graph_file = os.path.join( self.paths.output_dir, "{}_{}_dist_graph.pdf".format(self.paths.outprefix, term_name) )
def __init__(self, email: str, outprefix: str): Entrez.email = email self.paths = PhenoXPaths(outprefix) self.pmid_abstracts = dict() # disease and human phenotype NER self.pmid_dner = {} # raw entity text self.pmid_ent_text = {} self.dner_cluster = {} self.total_dner = [] self.nlp = spacy.load('en') self.id2kw = pickle.load( open(os.path.join(self.paths.data_dir, 'id2kw_dict.pkl'), 'rb')) self.kw2id = pickle.load( open(os.path.join(self.paths.data_dir, 'kw2id_dict.pkl'), 'rb')) entity = Entity(keywords_list=list(self.kw2id.keys()), label='DO/HPO') self.nlp.add_pipe(entity, last=True) # read synonyms from HGNC with open(os.path.join(self.paths.data_dir, 'hgnc_synonyms.json'), 'r') as f: hgnc_syn = f.read() self.hgnc = json.loads(hgnc_syn)
import os import json from phenox.paths import PhenoXPaths paths = PhenoXPaths() hgnc_json_file = os.path.join(paths.data_dir, 'hgnc.json') hgnc_syn_file = os.path.join(paths.data_dir, 'hgnc_synonyms.json') with open(hgnc_json_file, 'r') as f: text = f.read() hgnc = json.loads(text) num_entries = hgnc['response']['numFound'] data = hgnc['response']['docs'] hgnc_dict = dict() for entry in data: try: symbol = entry['symbol'] alias_symbol = entry['alias_symbol'] if 'alias_symbol' in entry else [] prev_symbol = entry['prev_symbol'] if 'prev_symbol' in entry else [] name = entry['name'] prev_name = entry['prev_name'] if 'prev_name' in entry else [] hgnc_dict[symbol] = { 'names': [name] + prev_name, 'aliases': alias_symbol + prev_symbol
def __init__(self, outprefix: str): self.paths = PhenoXPaths(outprefix)