def make_2id(s, dbname, primary_name, secondary_name): assert secondary_name is not None if primary_name is None: return Str(s + "||") + \ Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname, "type": secondary_name}) return Str(s + "|") + \ Std.dbxref_dbid(UntilSep(sep = "|"), {"dbname": dbname, "type": primary_name}) + \ Str("|") + \ Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname, "type": secondary_name})
# Database: Non-redundant GenBank CDS translations+PDB+SwissProt+SPupdate+PIR # 307,320 sequences; 92,696,426 total letters. # Database: mgpep # 468 sequences; 170,400 total letters # Database: plantPept # 6418 sequences; 2,370,771 total letters. # Database: Non-redundant GenBank CDS # translations+PDB+SwissProt+SPupdate+PIR # 301,269 sequences; 90,873,415 total letters query_database = (Str("Database:") + Opt(Spaces()) + Std.database_name(UntilEol()) + AnyEol() + Spaces() + Std.database_num_sequences(Number(), {"bioformat:decode": "int.comma"}) + Str(" sequences;") + Spaces() + Std.database_num_letters(Number(), {"bioformat:decode": "int.comma"}) + Spaces() + Str("total letters.") + ToEol()) # notice the "." -----^^^ # Smallest # Sum # High Probability # Sequences producing High-scoring Segment Pairs: Score P(N) N #
nid + Martel.AnyEol()) # PID g6754304 pid = Martel.Group("pid", Martel.Re("[\w\d]+")) pid_line = Martel.Group("pid_line", Martel.Str("PID") + blank_space + pid + Martel.AnyEol()) # version and GI line # VERSION AC007323.5 GI:6587720 version = Martel.Group("version", Std.dbid(Martel.Re("[\w\d\.]+"), {"type" : "primary", "dbname" : "genbank"})) gi = Martel.Group("gi", Std.dbid(Martel.Re("[\d]+"), {"type" : "secondary", "dbname" : "genbank"})) version_line = Martel.Group("version_line", Martel.Str("VERSION") + blank_space + version + Martel.Opt(blank_space + Martel.Str("GI:") + gi) + Martel.AnyEol()) # DBSOURCE REFSEQ: accession NM_010510.1
""" import warnings warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at [email protected] to avoid permanent removal of this module from Biopython", DeprecationWarning) from Bio import Std import Martel from Martel import Time import sprot40 # The ID line contains a versioned period number ID_exp = Martel.Group("ID", Martel.Str("ID ") + \ Std.dbid(Martel.Group("entry_name", Martel.Re("[\w.]+")), {"type": "primary", "dbname": "sp"}) + \ Martel.Spaces() + \ Martel.Word("data_class_table") + \ Martel.Str(";") + Martel.Spaces() + \ Martel.Word("molecule_type") + \ Martel.Str(";") + Martel.Spaces() + \ Martel.Digits("sequence_length") + \ Martel.Str(" AA.") + \ Martel.AnyEol() ) # The DT formatted lines look different, and there is not # a third DT line for annotations # DT 04-MAR-2003 (IPI Human rel. 2.17, Created) # DT 04-MAR-2003 (IPI Human rel. 2.17, Last sequence update)
outputs and that the family line is not parsed into it's respective parts (see multitude of comments on this below). """ import warnings warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at [email protected] to avoid permanent removal of this module from Biopython", DeprecationWarning) from Martel import * from Martel import RecordReader from Bio import Std # -- header # hmmpfam - search one or more sequences against HMM database program_description = (Std.application_name(Str("hmmpfam")) + ToEol()) # HMMER 2.2g (August 2001) program_version = (Str("HMMER ") + Std.application_version(Re(r"\d\.\d\w") | Re(r"\d\.\d\.\d")) + ToEol()) # Copyright (C) 1992-2001 HHMI/Washington University School of Medicine # Freely distributed under the GNU General Public License (GPL) copyright = (ToEol() + ToEol()) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
o block_data - A callback tag for the data in the block (ie. the stuff you are interested in). """ diff = INDENT - len(identifier) assert diff > 0, diff return Martel.Group(block_tag, Martel.Str(identifier + " " * diff) + Martel.ToEol(block_data) + Martel.Rep(Martel.AnyEol() | (Martel.Str(" " * INDENT) + Martel.ToEol(block_data)))) # The first line # LOCUS AC007323 86436 bp DNA PLN 19-JAN-2000 locus = Std.dbid(Martel.Word(), {"dbname": "gb", "type": "primary"}) size = Martel.Group("size", Martel.Rep1(Martel.Integer())) # deal with the different kinds of residues we can have residue_prefixes = Martel.Str("ss-", "ds-", "ms-") residue_types = [ Std.alphabet(Martel.Str("DNA"), {"alphabet": "iupac-ambiguous-dna"}), Std.alphabet(Martel.Str("RNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("mRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("tRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("rRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("uRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("snRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("PROTEIN"), {"alphabet": "iupac-protein"}),
from Martel import RecordReader, Time from Bio import Std from Bio.expressions.swissprot import sprot38 whitespace = Martel.Spaces() ## ID - identification (begins each entry; 1 per entry) # ID entryname dataclass; molecule; division; sequencelength BP. divisions = Martel.Re("EST|PHG|FUN|GSS|HTC|HTG|HUM|INV|ORG|MAM|VRT|PLN|" + \ "PRO|ROD|SYN|STS|UNC|VRL|[A-Z]{3}") # XXX is found in S40706 ID_line = Martel.Str("ID ") + \ Std.dbid(Martel.UntilSep("entry_name", " "), {"type": "primary", "dbname": "embl"}) + \ whitespace + \ Martel.ToSep("dataclass", ";") + \ whitespace + \ Martel.Group("molecule", Std.alphabet(Martel.Str("DNA", "circular DNA"), {"alphabet": "iupac-ambiguous-dna"}) | Std.alphabet(Martel.Str("RNA", "circular RNA"), {"alphabet": "iupac-ambiguous-rna"}) | Std.alphabet(Martel.Str("XXX"), {"alphabet": "nucleotide"})) + \ Martel.Str("; ") + \ Martel.Group("division", divisions) + \ Martel.Str("; ") + \ Martel.Digits("length") + \ Martel.Str(" BP.") + \
import Martel from Martel import RecordReader, Time from Bio import Std def Simple(tag, tag_data): return Martel.Group(tag, Martel.Str(tag + " ") + \ Martel.ToEol(tag_data) ) #--- ID ID = Martel.Group("ID", Martel.Str("ID ") + \ Std.dbid(Martel.Word("entry_name"), {"type": "primary", "dbname": "sp"}) + \ Martel.Spaces() + \ Martel.Word("data_class_table") + \ Martel.Str(";") + Martel.Spaces() + \ Martel.Word("molecule_type") + \ Martel.Str(";") + Martel.Spaces() + \ Martel.Digits("sequence_length") + \ Martel.Str(" AA.") + \ Martel.AnyEol() ) #--- AC AC = Martel.Group("AC", Martel.Str("AC ") + \ Std.dbid(Martel.Word("ac_number"), {"type": "accession",
def make_1id(s, dbname, name): return Str(s + "|") + \ Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname, "type": name})
# SWISS-PROT sp|accession|entry name ids.append(make_2id("sp", "sp", "primary", "secondary")) # Brookhaven Protein Data Bank pdb|entry|chain ids.append(make_2id("pdb", "x-pdb", "primary", "secondary")) # XXX not correct # Patents pat|country|number ids.append(make_2id("pat", "x-pat", "primary", "secondary")) # XXX not correct # GenInfo Backbone Id bbs|number ids.append(make_1id("bbs", "x-bbs", "primary")) # General database identifier gnl|database|identifier gnl_id = Str("gnl|") + \ Std.dbxref_dbname(UntilSep(sep = "| ")) + \ Str("|") + \ Std.dbxref_dbid(UntilSep(sep = "| ")) ids.append(gnl_id) # NCBI Reference Sequence ref|accession|locus ids.append(make_2id("ref", "x-ref", "primary", "secondary")) # Local Sequence identifier lcl|identifier ids.append(make_1id("lcl", "local", "primary")) # "|" them all together ncbi_word = Std.dbxref(reduce(operator.or_, ids)) #ncbi_term = Assert(Re("[^ \R]+\|")) + \ ncbi_term = ncbi_word + Rep(Str("|") + ncbi_word)
warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at [email protected] to avoid permanent removal of this module from Biopython", DeprecationWarning) from Martel import * from Martel import RecordReader from Bio import Std # Header goes up to the line starting with "ID" header = Rep(AssertNot(Str("ID ")) + \ ToEol()) # ID kringle; BLOCK # ID 14-3-3; BLOCK # but not! # IDSA_METJA|Q58270 ( 46) GGKRIRPYLTV 11 ID = Str("ID ") + Std.dbid(ToSep(sep = ";"), {"type": "primary"}) + \ Str(" BLOCK") + AnyEol() # AC IPB000001A; distance from previous block=(10,266) AC = Str("AC ") + Std.dbid(ToSep(sep = ";"), {"type": "accession"}) + \ Str(" distance from previous block=(") + \ Integer("dist1") + Str(",") + Integer("dist2") + \ Str(")") + AnyEol() # DE Kringle domain # If the DE line is long, it doen't fold .. it's all on one line DE = Str("DE ") + ToEol("description") # BL CCY; width=14; seqs=44; 99.5%=717; strength=1059