def to_jaspar(self, filename): """Writes the PWM to the given file in JASPAR format.""" jaspar_motif = jaspar.Motif(matrix_id=self.TF.accession_number, name=self.name, instances=self._motif.instances) with open(filename, 'w') as f: f.write(jaspar.write([jaspar_motif], 'jaspar'))
def _PSSM_model_to_jaspar(self, filename): """Writes the PSSM to the given file in jaspar format.""" header = self.strain_name jaspar_motif = jaspar.Motif(matrix_id='', name=header, counts=self.TF_binding_model.pwm) with open(filename, 'w') as f: f.write(jaspar.write([jaspar_motif], 'jaspar'))
def _fetch_motif_by_internal_id(self, int_id): """Fetch basic motif information (PRIVATE).""" cur = self.dbh.cursor() cur.execute( "select BASE_ID, VERSION, COLLECTION, NAME from MATRIX where id = %s", (int_id,), ) row = cur.fetchone() # This should never happen as it is an internal method. If it does # we should probably raise an exception if not row: warnings.warn( f"Could not fetch JASPAR motif with internal ID = {int_id}", BiopythonWarning, ) return None base_id = row[0] version = row[1] collection = row[2] name = row[3] matrix_id = "".join([base_id, ".", str(version)]) # fetch the counts matrix counts = self._fetch_counts_matrix(int_id) # Create new JASPAR motif motif = jaspar.Motif(matrix_id, name, collection=collection, counts=counts) # fetch species cur.execute("select TAX_ID from MATRIX_SPECIES where id = %s", (int_id,)) tax_ids = [] rows = cur.fetchall() for row in rows: tax_ids.append(row[0]) # Many JASPAR motifs (especially those not in the CORE collection) # do not have taxonomy IDs. So this warning would get annoying. # if not tax_ids: # warnings.warn("Could not fetch any taxonomy IDs for JASPAR motif" # " {0}".format(motif.matrix_id), BiopythonWarning) motif.species = tax_ids # fetch protein accession numbers cur.execute("select ACC FROM MATRIX_PROTEIN where id = %s", (int_id,)) accs = [] rows = cur.fetchall() for row in rows: accs.append(row[0]) # Similarly as for taxonomy IDs, it would get annoying to print # warnings for JASPAR motifs which do not have accession numbers. motif.acc = accs # fetch remaining annotation as tags from the ANNOTATION table cur.execute("select TAG, VAL from MATRIX_ANNOTATION where id = %s", (int_id,)) rows = cur.fetchall() # Since JASPAR 2018 tf_family and tf_class are return as array. tf_family = [] tf_class = [] for row in rows: attr = row[0] val = row[1] if attr == "class": tf_class.append(val) elif attr == "family": tf_family.append(val) elif attr == "tax_group": motif.tax_group = val elif attr == "type": motif.data_type = val elif attr == "pazar_tf_id": motif.pazar_id = val elif attr == "medline": motif.medline = val elif attr == "comment": motif.comment = val else: # TODO If we were to implement additional abitrary tags # motif.tag(attr, val) pass motif.tf_family = tf_family motif.tf_class = tf_class return motif
def _fetch_motif_by_internal_id(self, int_id): # fetch basic motif information sql = "select BASE_ID, VERSION, COLLECTION, NAME from MATRIX where id = %d" % int_id cur = self.dbh.cursor() cur.execute(sql) row = cur.fetchone() base_id = row[0] version = row[1] collection = row[2] name = row[3] matrix_id = "".join([base_id, '.', str(version)]) # fetch the counts matrix counts = self._fetch_counts_matrix(int_id) # Create new JASPAR motif motif = jaspar.Motif( matrix_id, name, collection=collection, counts=counts ) # fetch species sql = "select TAX_ID from MATRIX_SPECIES where id = %d" % int_id cur.execute(sql) tax_ids = [] rows = cur.fetchall() for row in rows: tax_ids.append(row[0]) motif.species = tax_ids # fetch protein accession numbers sql = "select ACC FROM MATRIX_PROTEIN where id = %d" % int_id cur.execute(sql) accs = [] rows = cur.fetchall() for row in rows: accs.append(row[0]) motif.acc = accs # fetch remaining annotation as tags from the ANNOTATION table sql = "select TAG, VAL from MATRIX_ANNOTATION where id = %d" % int_id cur.execute(sql) rows = cur.fetchall() for row in rows: attr = row[0] val = row[1] if attr == 'class': motif.tf_class = val elif attr == 'family': motif.tf_family = val elif attr == 'tax_group': motif.tax_group = val elif attr == 'type': motif.data_type = val elif attr == 'pazar_tf_id': motif.pazar_id = val elif attr == 'medline': motif.medline = val elif attr == 'comment': motif.comment = val else: """ TODO If we were to implement additional abitrary tags motif.tag(attr, val) """ pass return motif