def _get_translocation_target(node_modifier_data): # First check if there is a translocation modifier if node_modifier_data is None or node_modifier_data == {}: return None modifier = node_modifier_data.get(pc.MODIFIER) if modifier is None or modifier != pc.TRANSLOCATION: return None # Next, make sure there is information on the translocation target transloc_data = node_modifier_data.get(pc.EFFECT) if transloc_data is None: return None to_loc_info = transloc_data.get(pc.TO_LOC) if not to_loc_info: return None to_loc_ns = to_loc_info.get('namespace') to_loc_name = to_loc_info.get('name') # Only use GO Cellular Component location names if to_loc_ns not in ('GO', 'GOCC', 'GOCCID') or not to_loc_name: return None return go_client.get_valid_location(to_loc_name)
def test_get_valid_location(): assert go_client.get_valid_location('0001669') == 'acrosomal vesicle' assert go_client.get_valid_location('GO:0001669') == 'acrosomal vesicle' assert go_client.get_valid_location('acrosomal vesicle') == \ 'acrosomal vesicle' assert go_client.get_valid_location('acrosome') == 'acrosomal vesicle'
def fix_json_stmt(json_stmt): # Step 1: fix JSON directly to reduce errors when deserializing # 1.1 Fix statement type issues # Change to IncreaseAmount if json_stmt['type'] == 'GeneTranscriptExpress': json_stmt['type'] = 'IncreaseAmount' stmt_type = json_stmt['type'] stmt_class = get_statement_by_name(stmt_type) # 1.2 - Check for string agents. for ag_key in stmt_class._agent_order: json_stmt[ag_key] = fix_json_agent(json_stmt.get(ag_key)) # 1.3 - Fix other misc things that are statement type specific if stmt_type in mod_class_names: position = json_stmt.get('position') residue = json_stmt.get('residue') if isinstance(position, list): if len(position) != 1: logger.error('Invalid position: %s' % position) else: json_stmt['position'] = position[0] if isinstance(residue, list): if len(residue) != 1: logger.error('Invalid residue: %s' % residue) elif not isinstance(residue[0], str): logger.error('Invalid residue: %s' % residue) else: json_stmt['residue'] = residue[0] elif isinstance(residue, bool): json_stmt['residue'] = None elif stmt_type in ('Activation', 'Inhibition'): obj_activity = json_stmt.get('obj_activity') if isinstance(obj_activity, list): if len(obj_activity) != 1: logger.error('Invalid object activity: %s' % obj_activity) else: json_stmt['obj_activity'] = obj_activity[0] obj = json_stmt.get('obj') if isinstance(obj, (list, str)): raise InvalidAgent elif stmt_type == 'Translocation': # Fix locations if possible for loc_param in ('from_location', 'to_location'): loc = json_stmt.get(loc_param) if loc: # Some invalid locations are produced very often and can be # silently skipped if loc in known_invalid_locations: loc = None else: loc = go_client.get_valid_location(loc) if not loc: loc = None else: loc = None json_stmt[loc_param] = loc # Skip Translocation with both locations None if (json_stmt.get('from_location') is None and json_stmt.get('to_location') is None): raise TranslocationWithoutLocations elif stmt_type == 'IncreaseAmount': # Skip if there is no subject subj = json_stmt.get('subj') if not subj: raise MissingSubj # 1.4 - Fix evidence evs = json_stmt.get('evidence') if evs and isinstance(evs, list): ev = evs[0] text = ev.get('text') if not isinstance(text, str) or not text: raise MissingEvidenceText pmid = ev.get('pmid') if isinstance(pmid, str) and pmid.startswith('PMID'): json_stmt['evidence'][0]['pmid'] = \ json_stmt['evidence'][0]['pmid'][4:] else: raise InvalidEvidence return json_stmt