Ejemplo n.º 1
0
 def _set_edgetype(self, code):
     if self._edgetype is not None:
         self._edgetype.acc = code
     else:
         self._edgetype = SingleID()
         self._edgetype.set('edgetype', code)
         self.ids.ids.append(self._edgetype)
Ejemplo n.º 2
0
 def _consolidate_publications(self, group):
     pubs = set(pub.get() for intr in group
                for pub in intr.publications.ids)
     for db, acc in pubs:
         pub = SingleID()
         pub.set(db, acc)
         self.publications.ids.append(pub)
     for db, acc in pubs:
         if db == 'pubmed':
             self.pmid = int(acc)
             break
Ejemplo n.º 3
0
    def insert_conflict(self, ppiTrim_id):
        """
        Insert ppiTrim ID of the conflicting interaction
        """

        if len(self._conflicts) == 0:
            cnflct = SingleID()
            self.confidence.ids.append(cnflct)
        else:
            cnflct = self.confidence.ids[-1]

        self._conflicts.append(ppiTrim_id)
        cnflct.set('conflicts', ','.join(self._conflicts))
Ejemplo n.º 4
0
    def _set_confidence(self):

        maxsources = SingleID()
        maxsources.set('maxsources', self._maxsources)
        dmconsistency = SingleID()
        dmconsistency.set('dmconsistency', self._dmconsistency)
        self.confidence.ids += [maxsources, dmconsistency]
Ejemplo n.º 5
0
    def _consolidate_ids(self, group):
        # We keep original IDs plus irigids plus our own ppiTrimID. The latter
        # will be repeated under interaction checksum.
        srcids = set()
        irigids = set()
        for intr in group:
            srcids.update(intr._get_source_ids())
            irigids.update(intr._get_irigids())

        all_ids = sorted(srcids) + sorted(irigids)
        for db, acc in all_ids:
            item = SingleID()
            item.set(db, acc)
            self.ids.ids.append(item)
Ejemplo n.º 6
0
class Interaction(object):
    """
    General interaction parsed from PSI-MI 2.6 TAB file
    """

    _term_cache = {}

    _fields = [
        ('detection_method', 6, PSIMITerm),
        ('author', 7, PlainText),
        ('publications', 8, MultipleIDs),
        ('interaction_type', 11, PSIMITerm),
        ('source_db', 12, MultiplePSIMITerms),
        ('ids', 13, MultipleIDs),
        ('confidence', 14, ConfidenceScores),
        ('expansion', 15, PlainText),
        ('xrefs', 24, EmptyField),
        ('annotations', 27, EmptyField),
        ('host_taxid', 28, TaxonomyID),
        ('parameters', 29, EmptyField),
        ('creation_date', 30, DateField),
        ('update_date', 31, DateField),
        ('checksum', 34, SingleID),
        ('negative', 35, PlainText),
    ]
    _interactor_class = Interactor

    @classmethod
    def get_complex_data(cls, cols):
        """
        Check if the cols come from a line describing a complex. If yes, return
        all identifying fields, otherwise return None.
        """
        p = cls._interactor_class()
        p.from_cols(cols, 0)
        if p.is_complex():
            idata = tuple([p.uid.acc] + [cols[i] for _, i, _ in cls._fields])
        else:
            idata = None
        return idata

    @staticmethod
    def write_header(fp):
        """
        Write the header with field names (36 columns)
        """
        fp.write('#')
        fp.write('\t'.join(HEADER_FIELDS))
        fp.write('\n')

    def __init__(self):

        self.complex = None
        self.idata = None
        self.num_lines = 0

        self.interactors = []
        for field_name, _, field_class in self._fields:
            setattr(self, field_name, field_class())

        # Extra fields related to iRefIndex
        self._edgetype = None
        self.pmid = None
        self.rigid = None

        # Used only for ppiTrim complexes. Bait is a distinguished point so two
        # complexes with same members, properties etc. are not the same if
        # their baits are different. Just in case, we allow more than one bait,
        # although this should not happen for complexes.
        self.baits = None

    def __str__(self):

        data = [
            '%s\t%s\n' % (field_name, str(getattr(self, field_name)))
            for field_name, _, _ in self._fields
        ]
        return ''.join(data)

    def find_baits(self):

        baits = []
        # Template-deflated complexes have the bait of their template annotated
        bait_ids = None
        for item in self.confidence.ids:
            if item.db == 'templatebaits':
                bait_ids = item.acc.split(',')
                break

        if bait_ids is not None:
            for p in self.interactors:
                if p.uid.acc in bait_ids:
                    baits.append(p)
            assert len(baits) > 0
        else:
            # Normal case - just look for the bait
            for p in self.interactors:
                for erole in p.experimental_role:
                    if erole.term_id == 'MI:0496':
                        baits.append(p)
                        # Here we force just one bait but this could be removed
                        # later
                        break
        return baits

    def set_template_baits(self, bait_ids):

        templatebaits = SingleID()
        bait_ids = list(bait_ids)
        templatebaits.set('templatebaits', ','.join(bait_ids))
        self.confidence.ids += [templatebaits]

    def get_ppiTrim_id(self):
        """
        Compute ppiTrim_id (hash) for the interaction.
        Also assign the bait attribute.
        """

        # To assign the bait(s) we either need to do it when each interactor is
        # added (not possible now) or wait until all interactors are input but
        # before this information is used anywhere. This function is where the
        # bait is used first (to compute SHA1 digest) so it makes sense to set
        # the bait right here. Also, this function is one of the last steps in
        # construction of ppiTrim consolidated interactions.
        self.baits = self.find_baits()

        hash_factory = hashlib.sha1()
        uids = [p.uid.acc for p in self.interactors]
        baits = [p.uid.acc for p in self.baits]
        pubs = [pub.__str__() for pub in self.publications.ids]
        props = map(
            str, [item.name for item in self.source_db] +
            [self.detection_method.term_id, self.interaction_type.term_id])
        msg = '.'.join(pubs + uids + props + [self.edgetype] + baits)
        hash_factory.update(msg)
        return base64.b64encode(hash_factory.digest())

    def is_complex(self):
        """
        Return True if interaction is considered a 'complex'
        """
        if self.complex is None:
            return False
        return True

    def _get_source_ids(self):
        srcids = [
            item.get() for item in self.ids.ids
            if item.db not in ('rigid', 'irigid', 'edgetype', 'ppiTrim')
        ]
        return srcids

    def _get_irigids(self):
        irigids = [item.get() for item in self.ids.ids if item.db == 'irigid']
        return irigids

    def _set_extra_fields(self):

        for item in self.publications.ids:
            if item.db == 'pubmed':
                self.pmid = int(item.acc)
                break

        for item in self.ids.ids:
            if item.db == 'edgetype':
                self._edgetype = item
            elif item.db == 'rigid' and self.rigid is None:
                self.rigid = item.acc

    def from_cols(self, cols):
        """
        Sets all relevant field names and interactors from a list of column
        strings.
        """

        if not self.is_complex():
            # First time - add everything
            p1 = self._interactor_class()
            p1.from_cols(cols, 0)
            if p1.is_complex():
                self.complex = p1
                self.idata = tuple([cols[i] for _, i, _ in self._fields])
            else:
                self.interactors.append(p1)

            for field_name, i, _ in self._fields:
                field = getattr(self, field_name)
                field.from_string(cols[i])
            self._set_extra_fields()

        # Subsequently - only if complex. The calling routine should ensure
        # that the line being added is the part of the same complex as the
        # first one.
        p2 = self._interactor_class()
        p2.from_cols(cols, 1)
        self.interactors.append(p2)
        self.num_lines += 1

    def to_file(self, fp):
        """
        Write entire interaction in PSI-MI 2.6 TAB format (one or more lines)
        """

        cols = [None] * 36
        for field_name, i, _ in self._fields:
            field = getattr(self, field_name)
            cols[i] = field.__str__()

        if self.is_complex():
            self.complex.to_cols(cols, 0)
            for p in self.interactors:
                p.to_cols(cols, 1)
                fp.write('\t'.join(cols))
                fp.write('\n')
        else:
            # There are exactly two interactors
            for j, p in enumerate(self.interactors):
                p.to_cols(cols, j)
            fp.write('\t'.join(cols))
            fp.write('\n')

    def binary_from_complex(self, p1, p2, code):
        """
        Extract a binary interaction given by p1 and p2 from a complex
        """

        assert self.is_complex()
        interaction = Interaction()
        for field_name, _, _ in self._fields:
            prop1 = getattr(self, field_name)
            prop2 = getattr(interaction, field_name)
            prop2.set(*prop1.get())

        interaction.expansion.txt = 'none'
        interaction.creation_date.set_today()
        interaction.update_date.set_today()
        interaction.edgetype = code

        interaction.interactors = [p1, p2]
        return interaction

    def _get_edgetype(self):
        if self._edgetype is not None:
            return self._edgetype.acc
        return None

    def _set_edgetype(self, code):
        if self._edgetype is not None:
            self._edgetype.acc = code
        else:
            self._edgetype = SingleID()
            self._edgetype.set('edgetype', code)
            self.ids.ids.append(self._edgetype)

    edgetype = property(_get_edgetype, _set_edgetype)
Ejemplo n.º 7
0
    def set_template_baits(self, bait_ids):

        templatebaits = SingleID()
        bait_ids = list(bait_ids)
        templatebaits.set('templatebaits', ','.join(bait_ids))
        self.confidence.ids += [templatebaits]
Ejemplo n.º 8
0
    def _consolidate_interactors(self, group):
        # Order of interactors cannot be important - biological/experimental
        # role should be annotated. Interactors are grouped by uid, so this
        # should be well-characterized.
        old_interactors = {}
        for intr in group:
            for p in intr.interactors:
                if p.uid.get() not in old_interactors:
                    old_interactors[p.uid.get()] = []
                old_interactors[p.uid.get()].append(p)

        self_edge = (len(old_interactors) == 1)
        new_interactors = []
        for pgroup in old_interactors.itervalues():
            q = Interactor()

            # uid is the same by construction (smallest geneid)
            q.uid.set(*pgroup[0].uid.get())

            # alt consists of one or more gene ids. If more than one,
            # order is important because the symbols should be in the same
            # order in the alias field. So, to do this properly, we need to
            # join gene ids and symbols, and do icrogids (in alias)
            # separately.
            icrogids = set(item.acc for p in pgroup for item in p.alias.ids
                           if item.db == 'icrogid')
            genes = set()
            for p in pgroup:
                for geneid_, symbol_ in zip(p.alt.ids, p.alias.ids):
                    genes.add((geneid_.acc, symbol_.acc))

            for geneid, symbol in sorted(genes):
                geneid_ = SingleID()
                geneid_.set(q.uid.db, geneid)
                symbol_ = SingleID()
                symbol_.set(q.uid.db, symbol)
                q.alt.ids.append(geneid_)
                q.alias.ids.append(symbol_)

            for icrogid in sorted(icrogids):
                icrogid_ = SingleID()
                icrogid_.set('icrogid', icrogid)
                q.alias.ids.append(icrogid_)

            # Organism ought to be the same everywhere due to filtering in
            # Phase 1 - NOT TRUE ANY MORE
            q.organism.set(*pgroup[0].organism.get())

            # biological role
            bio_roles = [
                item for p in pgroup for item in p.biological_role
                if item.term_id is not None
            ]
            if len(bio_roles):
                q.biological_role.from_items(bio_roles)
            else:
                q.biological_role.add('MI:0499', 'unspecified role')

            # experimental role - set if consistent, otherwise unspecified
            exp_roles = [
                item for p in pgroup for item in p.experimental_role
                if item.term_id is not None
            ]
            if len(exp_roles):
                q.experimental_role.from_items(exp_roles)
            else:
                q.experimental_role.add('MI:0499', 'unspecified role')

            # interactor type is always MI:0326(protein) for PPIs
            q.interactor_type.add('MI:0326', 'protein')

            # xrefs, annotations and checksum are left as null
            new_interactors.append(q)

        new_interactors.sort(key=lambda p: p.uid.acc)

        if self_edge:
            # Self-edge
            q = new_interactors[0]
            new_interactors.append(q)
        self.interactors = new_interactors
Ejemplo n.º 9
0
    def normalize_ids_with_gene(self, gene_ids, gene_symbols):
        """
        Set primary ID (uid) to NCBI Gene. Also keep only Gene IDs and rogid
        as secondary IDs.
        """

        # Reset the ID fields
        if self.is_complex():
            # We do not touch uid for a complex
            reset_fields = ['alt', 'alias']
        else:
            reset_fields = ['uid', 'alt', 'alias']

        for field_name, _, field_class in self._fields:
            if field_name in reset_fields:
                setattr(self, field_name, field_class())

        # Reassign IDs - only if not complex
        if not self.is_complex():
            self.uid.set('entrezgene/locuslink', '%d' % min(gene_ids))
            for gene_id, gene_symbol in zip(gene_ids, gene_symbols):
                new_id_field = SingleID()
                new_symbol_field = SingleID()
                new_id_field.set('entrezgene/locuslink', '%d' % gene_id)
                new_symbol_field.set('entrezgene/locuslink', gene_symbol)
                self.alt.ids.append(new_id_field)
                self.alias.ids.append(new_symbol_field)

        # Add rogid for all interactors (including complexes) - if it exists
        rogid_field = SingleID()
        rogid_field.set('rogid', self.id)
        self.alias.ids.append(rogid_field)