def get_complexes(self, force_contains=None): """Extract INDRA Complex statements from the model. Parameters ---------- force_contains : Optional[list[str]] A list of gene names for filtering. Only Statements in which the gene names in the force_contains list appear will be extracted. Default: None """ for obj in self.model.getObjects().toArray(): bpe = _cast_biopax_element(obj) if not _is_complex(bpe): continue citations = self._get_citations(bpe) source_id = bpe.getUri() if not citations: ev = Evidence(source_api='biopax', pmid=None, source_id=source_id) else: ev = [Evidence(source_api='biopax', pmid=cit, source_id=source_id) for cit in citations] members = self._get_complex_members(bpe) if members is not None: if len(members) > 10: logger.info('Skipping complex with more than 10 members.') continue complexes = _get_combinations(members) for c in complexes: self.statements.append(decode_obj(Complex(c, ev), encoding='utf-8'))
def get_complexes(self): """Extract INDRA Complex statements from the model.""" for obj in self.model.getObjects().toArray(): bpe = _cast_biopax_element(obj) if not _is_complex(bpe): continue citations = self._get_citations(bpe) source_id = bpe.getUri() if not citations: ev = Evidence(source_api='biopax', pmid=None, source_id=source_id) else: ev = [ Evidence(source_api='biopax', pmid=cit, source_id=source_id) for cit in citations ] members = self._get_complex_members(bpe) if members is not None: if len(members) > 10: logger.info('Skipping complex with more than 10 members.') continue complexes = _get_combinations(members) for c in complexes: self.statements.append( decode_obj(Complex(c, ev), encoding='utf-8'))
def get_ubiquitination(self, force_contains=None): """Extract INDRA Ubiquitination statements from the model. Parameters ---------- force_contains : Optional[list[str]] A list of gene names for filtering. Only Statements in which the gene names in the force_contains list appear will be extracted. Default: None """ stmts = self._get_generic_modification('ubiq', force_contains=force_contains) for s in stmts: self.statements.append(decode_obj(Ubiquitination(*s), encoding='utf-8'))
def get_phosphorylation(self): """Extract INDRA Phosphorylation statements from the model.""" stmts = self._get_generic_modification('phospho') for s in stmts: self.statements.append( decode_obj(Phosphorylation(*s), encoding='utf-8'))
def get_regulate_amounts(self): """Extract INDRA RegulateAmount statements from the model.""" pb = _bpp('PatternBox') p = pb.controlsExpressionWithTemplateReac() s = _bpp('Searcher') res = s.searchPlain(self.model, p) res_array = [_match_to_array(m) for m in res.toArray()] stmts = [] for res in res_array: # FIXME: for some reason labels are not accessible # for these queries. It would be more reliable # to get results by label instead of index. ''' controller_er = res[p.indexOf('controller ER')] generic_controller_er = res[p.indexOf('generic controller ER')] controller_simple_pe = res[p.indexOf('controller simple PE')] controller_pe = res[p.indexOf('controller PE')] control = res[p.indexOf('Control')] conversion = res[p.indexOf('Conversion')] input_pe = res[p.indexOf('input PE')] input_simple_pe = res[p.indexOf('input simple PE')] changed_generic_er = res[p.indexOf('changed generic ER')] output_pe = res[p.indexOf('output PE')] output_simple_pe = res[p.indexOf('output simple PE')] changed_er = res[p.indexOf('changed ER')] ''' # TODO: here, res[3] is the complex physical entity # for instance http://pathwaycommons.org/pc2/ # Complex_43c6b8330562c1b411d21e9d1185bae9 # consists of 3 components: JUN, FOS and NFAT # where NFAT further contains 3 member physical entities. # # However, res[2] iterates over all 5 member physical entities # of the complex which doesn't represent the underlying # structure faithfully. It would be better to use res[3] # (the complex itself) and look at components and then # members. However, then, it would not be clear how to # construct an INDRA Agent for the controller. controller = self._get_agents_from_entity(res[2]) controlled_pe = res[6] controlled = self._get_agents_from_entity(controlled_pe) conversion = res[5] direction = conversion.getTemplateDirection() if direction is not None: direction = direction.name() if direction != 'FORWARD': logger.warning('Unhandled conversion direction %s' % direction) continue # Sometimes interaction type is annotated as # term=='TRANSCRIPTION'. Other times this is not # annotated. int_type = conversion.getInteractionType().toArray() if int_type: for it in int_type: for term in it.getTerm().toArray(): pass control = res[4] control_type = control.getControlType() if control_type: control_type = control_type.name() citations = BiopaxProcessor._get_citations(control) source_id = control.getUri() if not citations: citations = [None] ev = [ Evidence(source_api='biopax', pmid=cit, source_id=source_id) for cit in citations ] for subj, obj in itertools.product(_listify(controller), _listify(controlled)): subj_act = ActivityCondition('transcription', True) subj.activity = subj_act if control_type == 'ACTIVATION': st = IncreaseAmount(subj, obj, evidence=ev) elif control_type == 'INHIBITION': st = DecreaseAmount(subj, obj, evidence=ev) else: logger.warning('Unhandled control type %s' % control_type) continue st_dec = decode_obj(st, encoding='utf-8') self.statements.append(st_dec)
def get_activity_modification(self): """Extract INDRA ActiveForm statements from the model.""" mcc = _bpp('constraint.ModificationChangeConstraint') mcct = _bpp('constraint.ModificationChangeConstraint$Type') mod_filter = 'residue modification, active' for is_active in [True, False]: p = self._construct_modification_pattern() if is_active: rel = mcct.GAIN else: rel = mcct.LOSS p.add(mcc(rel, mod_filter), "input simple PE", "output simple PE") s = _bpp('Searcher') res = s.searchPlain(self.model, p) res_array = [_match_to_array(m) for m in res.toArray()] for r in res_array: reaction = r[p.indexOf('Conversion')] citations = self._get_citations(reaction) activity = 'activity' input_spe = r[p.indexOf('input simple PE')] output_spe = r[p.indexOf('output simple PE')] # Get the modifications mod_in =\ BiopaxProcessor._get_entity_mods(input_spe) mod_out =\ BiopaxProcessor._get_entity_mods(output_spe) mod_shared = set(mod_in).intersection(set(mod_out)) gained_mods = set(mod_out).difference(set(mod_in)) # Here we get the evidence for the BiochemicalReaction source_id = reaction.getUri() citations = BiopaxProcessor._get_citations(reaction) if not citations: ev = Evidence(source_api='biopax', pmid=None, source_id=source_id) else: ev = [ Evidence(source_api='biopax', pmid=cit, source_id=source_id) for cit in citations ] monomers = self._get_agents_from_entity(output_spe) for monomer in _listify(monomers): static_mods =\ set(monomer.mods).difference(gained_mods) mods = [ m for m in gained_mods if m[0] not in ['active', 'inactive'] ] mcs = [ModCondition(m[0], m[1], m[2], True) for m in mods] # NOTE: with the ActiveForm representation we cannot # separate static_mods and gained_mods. We assume here # that the static_mods are inconsequential and therefore # are not mentioned as an Agent condition, following # don't care don't write semantics. Therefore only the # gained_mods are listed in the ActiveForm as Agent # conditions. monomer.mods = mcs if mods: stmt = ActiveForm(monomer, activity, is_active, evidence=ev) self.statements.append( decode_obj(stmt, encoding='utf-8'))
def get_ubiquitination(self): """Extract INDRA Ubiquitination statements from the model.""" stmts = self._get_generic_modification('ubiq') for s in stmts: self.statements.append( decode_obj(Ubiquitination(*s), encoding='utf-8'))
def get_glycosylation(self): """Extract INDRA Glycosylation statements from the model.""" stmts = self._get_generic_modification('glycosyl') for s in stmts: self.statements.append( decode_obj(Glycosylation(*s), encoding='utf-8'))
def get_regulate_amounts(self): pb = _bpp('PatternBox') p = pb.controlsExpressionWithTemplateReac() s = _bpp('Searcher') res = s.searchPlain(self.model, p) res_array = [_match_to_array(m) for m in res.toArray()] stmts = [] for res in res_array: # FIXME: for some reason labels are not accessible # for these queries. It would be more reliable # to get results by label instead of index. ''' controller_er = res[p.indexOf('controller ER')] generic_controller_er = res[p.indexOf('generic controller ER')] controller_simple_pe = res[p.indexOf('controller simple PE')] controller_pe = res[p.indexOf('controller PE')] control = res[p.indexOf('Control')] conversion = res[p.indexOf('Conversion')] input_pe = res[p.indexOf('input PE')] input_simple_pe = res[p.indexOf('input simple PE')] changed_generic_er = res[p.indexOf('changed generic ER')] output_pe = res[p.indexOf('output PE')] output_simple_pe = res[p.indexOf('output simple PE')] changed_er = res[p.indexOf('changed ER')] ''' # TODO: here, res[3] is the complex physical entity # for instance http://pathwaycommons.org/pc2/ # Complex_43c6b8330562c1b411d21e9d1185bae9 # consists of 3 components: JUN, FOS and NFAT # where NFAT further contains 3 member physical entities. # # However, res[2] iterates over all 5 member physical entities # of the complex which doesn't represent the underlying # structure faithfully. It would be better to use res[3] # (the complex itself) and look at components and then # members. However, then, it would not be clear how to # construct an INDRA Agent for the controller. controller = self._get_agents_from_entity(res[2]) controlled_pe = res[6] controlled = self._get_agents_from_entity(controlled_pe) conversion = res[5] direction = conversion.getTemplateDirection() if direction is not None: direction = direction.name() if direction != 'FORWARD': logger.warning('Unhandled conversion direction %s' % direction) continue # Sometimes interaction type is annotated as # term=='TRANSCRIPTION'. Other times this is not # annotated. int_type = conversion.getInteractionType().toArray() if int_type: for it in int_type: for term in it.getTerm().toArray(): pass control = res[4] control_type = control.getControlType() if control_type: control_type = control_type.name() citations = BiopaxProcessor._get_citations(control) source_id = control.getUri() if not citations: citations = [None] ev = [Evidence(source_api='biopax', pmid=cit, source_id=source_id) for cit in citations] for subj, obj in itertools.product(_listify(controller), _listify(controlled)): subj_act = ActivityCondition('transcription', True) subj.activity = subj_act if control_type == 'ACTIVATION': st = IncreaseAmount(subj, obj, evidence=ev) elif control_type == 'INHIBITION': st = DecreaseAmount(subj, obj, evidence=ev) else: logger.warning('Unhandled control type %s' % control_type) continue st_dec = decode_obj(st, encoding='utf-8') self.statements.append(st_dec)
def get_activity_modification(self, force_contains=None): """Extract INDRA ActiveForm statements from the model. Parameters ---------- force_contains : Optional[list[str]] A list of gene names for filtering. Only Statements in which the gene names in the force_contains list appear will be extracted. Default: None """ mcc = _bpp('constraint.ModificationChangeConstraint') mcct = _bpp('constraint.ModificationChangeConstraint$Type') mod_filter = 'residue modification, active' for is_active in [True, False]: p = self._construct_modification_pattern() if is_active: rel = mcct.GAIN else: rel = mcct.LOSS p.add(mcc(rel, mod_filter), "input simple PE", "output simple PE") s = _bpp('Searcher') res = s.searchPlain(self.model, p) res_array = [_match_to_array(m) for m in res.toArray()] for r in res_array: reaction = r[p.indexOf('Conversion')] citations = self._get_citations(reaction) activity = 'activity' input_spe = r[p.indexOf('input simple PE')] output_spe = r[p.indexOf('output simple PE')] # Get the modifications mod_in =\ BiopaxProcessor._get_entity_mods(input_spe) mod_out =\ BiopaxProcessor._get_entity_mods(output_spe) mod_shared = set(mod_in).intersection(set(mod_out)) gained_mods = set(mod_out).difference(set(mod_in)) # Here we get the evidence for the BiochemicalReaction source_id = reaction.getUri() citations = BiopaxProcessor._get_citations(reaction) if not citations: ev = Evidence(source_api='biopax', pmid=None, source_id=source_id) else: ev = [Evidence(source_api='biopax', pmid=cit, source_id=source_id) for cit in citations] monomers = self._get_agents_from_entity(output_spe) for monomer in _listify(monomers): if force_contains is not None: if monomer not in force_contains: continue static_mods =\ set(monomer.mods).difference(gained_mods) mods = [m for m in gained_mods if m.mod_type not in ['active', 'inactive']] # NOTE: with the ActiveForm representation we cannot # separate static_mods and gained_mods. We assume here # that the static_mods are inconsequential and therefore # are not mentioned as an Agent condition, following # don't care don't write semantics. Therefore only the # gained_mods are listed in the ActiveForm as Agent # conditions. monomer.mods = mods if mods: stmt = ActiveForm(monomer, activity, is_active, evidence=ev) self.statements.append(decode_obj(stmt, encoding='utf-8'))