def get_uniprot_to_id(uniprot_file, uniprot_ids=None, only_min=True, key_function=int): """ uniprot_file = %(data_dir)s/uniprot/idmapping.tab or idmapping.tab.symbol or idmapping.tab.mouse Can be used to convert to geneids (key_function=int) as well as symbols (key_function=len) depending on the input file """ uniprot_to_gene = parse_uniprot.get_uniprot_to_geneid(uniprot_file, uniprot_ids, only_min, key_function) return uniprot_to_gene
def main(): mapping_file = "/home/emre/data/uniprot/idmapping.tab" uniprot_to_geneid = parse_uniprot.get_uniprot_to_geneid(mapping_file, uniprot_ids=None) file_name = "/home/emre/data/tissue/amigo_extracellular.tsv" #file_name = "/home/emre/data/tissue/amigo_membrane.tsv" geneid_to_localization = get_geneid_to_localization(file_name, mapping_file) print geneid_to_localization["5594"] # MAPK1 return
def get_geneid_to_localization(file_name, mapping_file): name_to_geneid = parse_uniprot.get_uniprot_to_geneid(mapping_file, uniprot_ids=None) geneid_to_localization = {} uniprots_unmatched = set() for line in open(file_name): words = line.strip("\n").split("\t") uniprot, go, evidence = words[0], words[3], words[7] idx = uniprot.find(":") uniprot = uniprot[idx+1:] #if evidence not in ("EXP", "IDA", "IPA", "IMP", "IGI", "IEP", "HDA"): # continue if uniprot in name_to_geneid: geneid = name_to_geneid[uniprot] else: #print "Unmatched id", uniprot if not uniprot.startswith("URS"): uniprots_unmatched.add(uniprot) continue # Store evidence type as well geneid_to_localization.setdefault(geneid, set()).add((go, evidence)) print "Unmatched:", len(uniprots_unmatched) #, ", ".join(sorted(uniprots_unmatched)) return geneid_to_localization
def get_uniprot_to_symbol(uniprot_symbol_file, uniprot_ids): uniprot_to_geneid = parse_uniprot.get_uniprot_to_geneid( uniprot_symbol_file, uniprot_ids, only_min=False) return uniprot_to_geneid
def get_uniprot_to_geneid(uniprot_file, uniprot_ids): uniprot_to_geneid = parse_uniprot.get_uniprot_to_geneid( uniprot_file, uniprot_ids) return uniprot_to_geneid
def get_uniprot_to_geneid(uniprot_file, uniprot_ids): uniprot_to_geneid = parse_uniprot.get_uniprot_to_geneid(uniprot_file, uniprot_ids) return uniprot_to_geneid
def get_uniprot_to_symbol(uniprot_symbol_file, uniprot_ids): uniprot_to_geneid = parse_uniprot.get_uniprot_to_geneid(uniprot_symbol_file, uniprot_ids, only_min=False) return uniprot_to_geneid