def test_converter_convert_list(): """Try to convert a list of Ensembl IDs to gene symbols.""" gene_ids = ['ENSG00000000003.14', 'ENSG00000000005.5', 'ENSG00000000419.12'] gene_symbols = ['TSPAN6', 'TNMD', 'DPM1'] cleaned_ids = convert.clean_ensembl_ids(gene_ids) converter = convert.IDConverter('ensembl_gene_id', 'symbol') assert converter.convert_list(cleaned_ids) == gene_symbols
def test_converter_convert(): """Try to convert an example Ensembl ID to a gene symbol.""" gene_id = 'ENSG00000000003.14' gene_symbol = 'TSPAN6' cleaned_id = convert.clean_ensembl_id(gene_id) converter = convert.IDConverter('ensembl_gene_id', 'symbol') assert converter.convert(cleaned_id) == gene_symbol
def test_converter_all_targets(): """Try to convert an example Ensembl ID to all allowed identifier types.""" conversion_targets = convert.IDConverter.potential_ids gene_id = 'ENSG00000000003.14' cleaned_id = convert.clean_ensembl_id(gene_id) for target_id in conversion_targets: converter = convert.IDConverter('ensembl_gene_id', target_id) converter.convert(cleaned_id)
def make_godict(gofile, force=False): """ Parses the Gene Ontology file and creates a dictionary that is easier to work with. Saves the dictionary as a json file. Notes: uniprot id: column 1 gene symbol: column 2 GO Evidence codes: column 5 Experiment: Inferred from Experiment (EXP) Inferred from Direct Assay (IDA) Inferred from Physical Interaction (IPI) Inferred from Mutant Phenotype (IMP) Inferred from Genetic Interaction (IGI) Inferred from Expression Pattern (IEP) Computational: Inferred from Sequence or structural Similarity (ISS) Inferred from Sequence Orthology (ISO) Inferred from Sequence Alignment (ISA) Inferred from Sequence Model (ISM) Inferred from Genomic Context (IGC) Inferred from Biological aspect of Ancestor (IBA) Inferred from Biological aspect of Descendant (IBD) Inferred from Key Residues (IKR) Inferred from Rapid Divergence(IRD) Inferred from Reviewed Computational Analysis (RCA) Literature: Traceable Author Statement (TAS) Non-traceable Author Statement (NAS) Other: Inferred by Curator (IC) No biological Data available (ND) evidence code Inferred from Electronic Annotation (IEA) Args: gofile (str): path to the gene ontology file force (optional; bool): overwrite the json file if true Returns: None """ from genemunge import convert converter = convert.IDConverter('uniprot_ids', 'ensembl_gene_id') # check if the outputfile already exists if not force and os.path.exists(OUTPUTFILE): return # id: {name, namespace, def, parents, children, genes} # connections (parent/child): 'is_a' or 'part_of' # ignore if 'is_obsolete: true' # read in the ontology file with open(gofile, "r") as go: unparsed = [line.rstrip() for line in go] # find the indices marking the beginning of each term indices = [i for i, x in enumerate(unparsed) if begins_with_pattern(x, "id:")] # group the terms grouped = [unparsed[indices[i]: indices[i+1]] for i in range(len(indices)-1)] # get rid of obselete terms not_obsolete = [g for g in grouped if first_match(g, obsolete) is None] # get rid of any term that doesn't have ids has_id = [g for g in not_obsolete if first_match(g, go_id) is not None] # create the go dictionary godict = {} for group in has_id: parse_group(group, godict) # add the children terms for term in godict: parents = godict[term]['parents'] for p in parents: if term not in godict[p]['children']: godict[p]['children'] += [term] # add the annotations with gzip.open(ANNOTATIONFILE ,'rb') as annotfile: for raw_line in annotfile: line = raw_line.decode('utf-8') if line[0] != '!': # comments parsed = line.strip().split('\t') database = parsed[0] # currently, this is always UniProtKB database_id = parsed[1] symbol = parsed[2] # ORF for unnamed qualifier = parsed[3] go_term = parsed[4] database_reference = parsed[5] evidence = parsed[6] # what to do about colocalizes_with and contributes_to? if 'NOT' not in qualifier: try: ensembl = converter.convert(database_id) # add the identifier if it is not NaN if ensembl == ensembl: godict[go_term]['genes'][evidence] += [ensembl] except KeyError: # we have filtered out obsolete go terms # therefore, we have to catch this exception pass # write to the file with open(OUTPUTFILE, "w") as outfile: json.dump(godict, outfile)
def test_converter_construct(): """Try to construct an IDConverter object.""" converter = convert.IDConverter('symbol', 'name')