Ejemplo n.º 1
0
 def to_jaspar(self, filename):
     """Writes the PWM to the given file in JASPAR format."""
     jaspar_motif = jaspar.Motif(matrix_id=self.TF.accession_number,
                                 name=self.name,
                                 instances=self._motif.instances)
     with open(filename, 'w') as f:
         f.write(jaspar.write([jaspar_motif], 'jaspar'))
Ejemplo n.º 2
0
 def _PSSM_model_to_jaspar(self, filename):
     """Writes the PSSM to the given file in jaspar format."""
     header = self.strain_name
     jaspar_motif = jaspar.Motif(matrix_id='',
                                 name=header,
                                 counts=self.TF_binding_model.pwm)
     with open(filename, 'w') as f:
         f.write(jaspar.write([jaspar_motif], 'jaspar'))
Ejemplo n.º 3
0
    def _fetch_motif_by_internal_id(self, int_id):
        """Fetch basic motif information (PRIVATE)."""
        cur = self.dbh.cursor()
        cur.execute(
            "select BASE_ID, VERSION, COLLECTION, NAME from MATRIX where id = %s",
            (int_id,),
        )

        row = cur.fetchone()

        # This should never happen as it is an internal method. If it does
        # we should probably raise an exception
        if not row:
            warnings.warn(
                f"Could not fetch JASPAR motif with internal ID = {int_id}",
                BiopythonWarning,
            )
            return None

        base_id = row[0]
        version = row[1]
        collection = row[2]
        name = row[3]

        matrix_id = "".join([base_id, ".", str(version)])

        # fetch the counts matrix
        counts = self._fetch_counts_matrix(int_id)

        # Create new JASPAR motif
        motif = jaspar.Motif(matrix_id, name, collection=collection, counts=counts)

        # fetch species
        cur.execute("select TAX_ID from MATRIX_SPECIES where id = %s", (int_id,))
        tax_ids = []
        rows = cur.fetchall()
        for row in rows:
            tax_ids.append(row[0])

        # Many JASPAR motifs (especially those not in the CORE collection)
        # do not have taxonomy IDs. So this warning would get annoying.
        # if not tax_ids:
        #     warnings.warn("Could not fetch any taxonomy IDs for JASPAR motif"
        #                   " {0}".format(motif.matrix_id), BiopythonWarning)

        motif.species = tax_ids

        # fetch protein accession numbers
        cur.execute("select ACC FROM MATRIX_PROTEIN where id = %s", (int_id,))
        accs = []
        rows = cur.fetchall()
        for row in rows:
            accs.append(row[0])

        # Similarly as for taxonomy IDs, it would get annoying to print
        # warnings for JASPAR motifs which do not have accession numbers.

        motif.acc = accs

        # fetch remaining annotation as tags from the ANNOTATION table
        cur.execute("select TAG, VAL from MATRIX_ANNOTATION where id = %s", (int_id,))
        rows = cur.fetchall()

        # Since JASPAR 2018 tf_family and tf_class are return as array.
        tf_family = []
        tf_class = []

        for row in rows:
            attr = row[0]
            val = row[1]
            if attr == "class":
                tf_class.append(val)
            elif attr == "family":
                tf_family.append(val)
            elif attr == "tax_group":
                motif.tax_group = val
            elif attr == "type":
                motif.data_type = val
            elif attr == "pazar_tf_id":
                motif.pazar_id = val
            elif attr == "medline":
                motif.medline = val
            elif attr == "comment":
                motif.comment = val
            else:
                # TODO If we were to implement additional abitrary tags
                # motif.tag(attr, val)
                pass

        motif.tf_family = tf_family
        motif.tf_class = tf_class

        return motif
Ejemplo n.º 4
0
    def _fetch_motif_by_internal_id(self, int_id):
        # fetch basic motif information
        sql = "select BASE_ID, VERSION, COLLECTION, NAME from MATRIX where id = %d" % int_id

        cur = self.dbh.cursor()
        cur.execute(sql)

        row = cur.fetchone()

        base_id = row[0]
        version = row[1]
        collection = row[2]
        name = row[3]

        matrix_id = "".join([base_id, '.', str(version)])

        # fetch the counts matrix
        counts = self._fetch_counts_matrix(int_id)

        # Create new JASPAR motif
        motif = jaspar.Motif(
            matrix_id, name, collection=collection, counts=counts
        )

        # fetch species
        sql = "select TAX_ID from MATRIX_SPECIES where id = %d" % int_id
        cur.execute(sql)
        tax_ids = []
        rows = cur.fetchall()
        for row in rows:
            tax_ids.append(row[0])

        motif.species = tax_ids

        # fetch protein accession numbers
        sql = "select ACC FROM MATRIX_PROTEIN where id = %d" % int_id
        cur.execute(sql)
        accs = []
        rows = cur.fetchall()
        for row in rows:
            accs.append(row[0])

        motif.acc = accs

        # fetch remaining annotation as tags from the ANNOTATION table
        sql = "select TAG, VAL from MATRIX_ANNOTATION where id = %d" % int_id
        cur.execute(sql)
        rows = cur.fetchall()
        for row in rows:
            attr = row[0]
            val = row[1]
            if attr == 'class':
                motif.tf_class = val
            elif attr == 'family':
                motif.tf_family = val
            elif attr == 'tax_group':
                motif.tax_group = val
            elif attr == 'type':
                motif.data_type = val
            elif attr == 'pazar_tf_id':
                motif.pazar_id = val
            elif attr == 'medline':
                motif.medline = val
            elif attr == 'comment':
                motif.comment = val
            else:
                """
                TODO If we were to implement additional abitrary tags
                motif.tag(attr, val)
                """
                pass

        return motif