nid + Martel.AnyEol()) # PID g6754304 pid = Martel.Group("pid", Martel.Re("[\w\d]+")) pid_line = Martel.Group("pid_line", Martel.Str("PID") + blank_space + pid + Martel.AnyEol()) # version and GI line # VERSION AC007323.5 GI:6587720 version = Martel.Group("version", Std.dbid(Martel.Re("[\w\d\.]+"), {"type" : "primary", "dbname" : "genbank"})) gi = Martel.Group("gi", Std.dbid(Martel.Re("[\d]+"), {"type" : "secondary", "dbname" : "genbank"})) version_line = Martel.Group("version_line", Martel.Str("VERSION") + blank_space + version + Martel.Opt(blank_space + Martel.Str("GI:") + gi) + Martel.AnyEol()) # DBSOURCE REFSEQ: accession NM_010510.1
""" import warnings warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at [email protected] to avoid permanent removal of this module from Biopython", DeprecationWarning) from Bio import Std import Martel from Martel import Time import sprot40 # The ID line contains a versioned period number ID_exp = Martel.Group("ID", Martel.Str("ID ") + \ Std.dbid(Martel.Group("entry_name", Martel.Re("[\w.]+")), {"type": "primary", "dbname": "sp"}) + \ Martel.Spaces() + \ Martel.Word("data_class_table") + \ Martel.Str(";") + Martel.Spaces() + \ Martel.Word("molecule_type") + \ Martel.Str(";") + Martel.Spaces() + \ Martel.Digits("sequence_length") + \ Martel.Str(" AA.") + \ Martel.AnyEol() ) # The DT formatted lines look different, and there is not # a third DT line for annotations # DT 04-MAR-2003 (IPI Human rel. 2.17, Created) # DT 04-MAR-2003 (IPI Human rel. 2.17, Last sequence update)
o block_data - A callback tag for the data in the block (ie. the stuff you are interested in). """ diff = INDENT - len(identifier) assert diff > 0, diff return Martel.Group(block_tag, Martel.Str(identifier + " " * diff) + Martel.ToEol(block_data) + Martel.Rep(Martel.AnyEol() | (Martel.Str(" " * INDENT) + Martel.ToEol(block_data)))) # The first line # LOCUS AC007323 86436 bp DNA PLN 19-JAN-2000 locus = Std.dbid(Martel.Word(), {"dbname": "gb", "type": "primary"}) size = Martel.Group("size", Martel.Rep1(Martel.Integer())) # deal with the different kinds of residues we can have residue_prefixes = Martel.Str("ss-", "ds-", "ms-") residue_types = [ Std.alphabet(Martel.Str("DNA"), {"alphabet": "iupac-ambiguous-dna"}), Std.alphabet(Martel.Str("RNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("mRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("tRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("rRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("uRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("snRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("PROTEIN"), {"alphabet": "iupac-protein"}),
from Martel import RecordReader, Time from Bio import Std from Bio.expressions.swissprot import sprot38 whitespace = Martel.Spaces() ## ID - identification (begins each entry; 1 per entry) # ID entryname dataclass; molecule; division; sequencelength BP. divisions = Martel.Re("EST|PHG|FUN|GSS|HTC|HTG|HUM|INV|ORG|MAM|VRT|PLN|" + \ "PRO|ROD|SYN|STS|UNC|VRL|[A-Z]{3}") # XXX is found in S40706 ID_line = Martel.Str("ID ") + \ Std.dbid(Martel.UntilSep("entry_name", " "), {"type": "primary", "dbname": "embl"}) + \ whitespace + \ Martel.ToSep("dataclass", ";") + \ whitespace + \ Martel.Group("molecule", Std.alphabet(Martel.Str("DNA", "circular DNA"), {"alphabet": "iupac-ambiguous-dna"}) | Std.alphabet(Martel.Str("RNA", "circular RNA"), {"alphabet": "iupac-ambiguous-rna"}) | Std.alphabet(Martel.Str("XXX"), {"alphabet": "nucleotide"})) + \ Martel.Str("; ") + \ Martel.Group("division", divisions) + \ Martel.Str("; ") + \ Martel.Digits("length") + \ Martel.Str(" BP.") + \
import Martel from Martel import RecordReader, Time from Bio import Std def Simple(tag, tag_data): return Martel.Group(tag, Martel.Str(tag + " ") + \ Martel.ToEol(tag_data) ) #--- ID ID = Martel.Group("ID", Martel.Str("ID ") + \ Std.dbid(Martel.Word("entry_name"), {"type": "primary", "dbname": "sp"}) + \ Martel.Spaces() + \ Martel.Word("data_class_table") + \ Martel.Str(";") + Martel.Spaces() + \ Martel.Word("molecule_type") + \ Martel.Str(";") + Martel.Spaces() + \ Martel.Digits("sequence_length") + \ Martel.Str(" AA.") + \ Martel.AnyEol() ) #--- AC AC = Martel.Group("AC", Martel.Str("AC ") + \ Std.dbid(Martel.Word("ac_number"), {"type": "accession",
warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at [email protected] to avoid permanent removal of this module from Biopython", DeprecationWarning) from Martel import * from Martel import RecordReader from Bio import Std # Header goes up to the line starting with "ID" header = Rep(AssertNot(Str("ID ")) + \ ToEol()) # ID kringle; BLOCK # ID 14-3-3; BLOCK # but not! # IDSA_METJA|Q58270 ( 46) GGKRIRPYLTV 11 ID = Str("ID ") + Std.dbid(ToSep(sep = ";"), {"type": "primary"}) + \ Str(" BLOCK") + AnyEol() # AC IPB000001A; distance from previous block=(10,266) AC = Str("AC ") + Std.dbid(ToSep(sep = ";"), {"type": "accession"}) + \ Str(" distance from previous block=(") + \ Integer("dist1") + Str(",") + Integer("dist2") + \ Str(")") + AnyEol() # DE Kringle domain # If the DE line is long, it doen't fold .. it's all on one line DE = Str("DE ") + ToEol("description") # BL CCY; width=14; seqs=44; 99.5%=717; strength=1059