def convert(motifs_list, alphabet): """ Returns a new list of motifs converting using given alphabet. """ new_list = [] for motif in motifs_list: tot_inst = [] # Recreate instances using given alphabet for i in motif.instances: inst = meme.Instance(i.tostring(), alphabet) inst.motif_name = i.motif_name inst.sequence_name = i.sequence_name inst.start = int(i.start) inst.length = int(i.length) tot_inst.append(inst) tot_inst = motifs.Instances(tot_inst, alphabet) # Converting instances and motifs mot = meme.Motif(alphabet=alphabet, instances=tot_inst) mot.name = motif.name mot.evalue = motif.evalue new_list.append(mot) return new_list
def __read_database_and_motifs(record, handle): for line in handle: if line.startswith('DATABASE AND MOTIFS'): break line = next(handle) if not line.startswith('****'): raise ValueError("Line does not start with '****':\n%s" % line) line = next(handle) if 'DATABASE' not in line: raise ValueError("Line does not contain 'DATABASE':\n%s" % line) words = line.strip().split() record.database = words[1] if words[2] == '(nucleotide)': record.alphabet = IUPAC.unambiguous_dna elif words[2] == '(peptide)': record.alphabet = IUPAC.protein for line in handle: if 'MOTIF WIDTH' in line: break line = next(handle) if '----' not in line: raise ValueError("Line does not contain '----':\n%s" % line) for line in handle: if not line.strip(): break words = line.strip().split() motif = meme.Motif(record.alphabet) motif.name = words[0] motif.length = int(words[1]) # words[2] contains the best possible match record.append(motif)
def __read_database_and_motifs(record, handle): for line in handle: if line.startswith('DATABASE AND MOTIFS'): break line = next(handle) if not line.startswith('****'): raise ValueError("Line does not start with '****':\n%s" % line) line = next(handle) if 'DATABASE' not in line: raise ValueError("Line does not contain 'DATABASE':\n%s" % line) words = line.strip().split() record.database = words[1] if words[2] == '(nucleotide)': record.alphabet = "ACGT" elif words[2] == '(peptide)': record.alphabet = "ACDEFGHIKLMNPQRSTVWY" for line in handle: if 'WIDTH BEST POSSIBLE MATCH' in line: break line = next(handle) if '----' not in line: raise ValueError("Line does not contain '----':\n%s" % line) has_motif_ids = (len(line.strip().split()) == 5) for line in handle: if not line.strip(): break words = line.strip().split() motif = meme.Motif(record.alphabet) motif.name = words[0] if has_motif_ids: motif.id = words[1] motif.alt_id = words[2] motif.length = int(words[-2]) # words[-1] contains the best possible match record.append(motif)
def __read_metadata(record, xml_tree): record.version = xml_tree.getroot().get('version') record.database = xml_tree.find('sequence_dbs').find('sequence_db').get('source') record.alphabet = xml_tree.find('alphabet').get('name') record.strand_handling = xml_tree.find('settings').get('strand_handling') # TODO - read other metadata for i, motif_tree in enumerate(xml_tree.find('motifs').findall('motif')): motif = meme.Motif(record.alphabet) # TODO - motif.name not in XML - always index? motif.name = str(i + 1) motif.id = motif_tree.get('id') motif.alt_id = motif_tree.get('alt') motif.length = int(motif_tree.get('length')) # TODO - add nsites, evalue record.append(motif)
def __read_metadata(record, xml_tree): record.version = xml_tree.getroot().get("version") record.database = xml_tree.find("sequence_dbs").find("sequence_db").get( "source") record.alphabet = xml_tree.find("alphabet").get("name") record.strand_handling = xml_tree.find("settings").get("strand_handling") # TODO - read other metadata for i, motif_tree in enumerate(xml_tree.find("motifs").findall("motif")): motif = meme.Motif(record.alphabet) # TODO - motif.name not in XML - always index? motif.name = str(i + 1) motif.id = motif_tree.get("id") motif.alt_id = motif_tree.get("alt") motif.length = int(motif_tree.get("length")) # TODO - add nsites, evalue record.append(motif)