예제 #1
0
def make_2id(s, dbname, primary_name, secondary_name):
    assert secondary_name is not None
    if primary_name is None:
        return Str(s + "||") + \
               Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname,
                                                      "type": secondary_name})

    return Str(s + "|") + \
           Std.dbxref_dbid(UntilSep(sep = "|"), {"dbname": dbname,
                                                 "type": primary_name}) + \
           Str("|") + \
           Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname,
                                                  "type": secondary_name})
예제 #2
0
# Database:  Non-redundant GenBank CDS translations+PDB+SwissProt+SPupdate+PIR
#            307,320 sequences; 92,696,426 total letters.

# Database: mgpep
#            468 sequences; 170,400 total letters

# Database:  plantPept
#            6418 sequences; 2,370,771 total letters.

# Database: Non-redundant GenBank CDS
# translations+PDB+SwissProt+SPupdate+PIR
#            301,269 sequences; 90,873,415 total letters

query_database = (Str("Database:") + Opt(Spaces()) +
                  Std.database_name(UntilEol()) + AnyEol() +
                  Spaces() +
                  Std.database_num_sequences(Number(),
                                           {"bioformat:decode": "int.comma"}) +
                  Str(" sequences;") + Spaces() + 
                  Std.database_num_letters(Number(),
                                           {"bioformat:decode": "int.comma"}) +
                  Spaces() + Str("total letters.") + ToEol())
#                        notice the "."  -----^^^


#                                                                      Smallest
#                                                                        Sum
#                                                               High  Probability
# Sequences producing High-scoring Segment Pairs:              Score  P(N)      N
#  
예제 #3
0
                        nid +
                        Martel.AnyEol())

# PID         g6754304
pid = Martel.Group("pid",
                   Martel.Re("[\w\d]+"))
pid_line = Martel.Group("pid_line", 
                        Martel.Str("PID") +
                        blank_space +
                        pid + 
                        Martel.AnyEol())

# version and GI line
# VERSION     AC007323.5  GI:6587720
version = Martel.Group("version",
                       Std.dbid(Martel.Re("[\w\d\.]+"),
                                {"type" : "primary", "dbname" : "genbank"}))

gi = Martel.Group("gi",
                  Std.dbid(Martel.Re("[\d]+"), 
                           {"type" : "secondary", "dbname" : "genbank"}))

version_line = Martel.Group("version_line",
                            Martel.Str("VERSION") +
                            blank_space +
                            version +
                            Martel.Opt(blank_space +
                                       Martel.Str("GI:") +
                                       gi) +
                            Martel.AnyEol())

# DBSOURCE    REFSEQ: accession NM_010510.1
예제 #4
0
"""

import warnings
warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at [email protected] to avoid permanent removal of this module from Biopython", DeprecationWarning)



from Bio import Std
import Martel
from Martel import Time
import sprot40

# The ID line contains a versioned period number
ID_exp = Martel.Group("ID",
                  Martel.Str("ID   ") + \
                  Std.dbid(Martel.Group("entry_name", Martel.Re("[\w.]+")),
                      {"type": "primary", "dbname": "sp"}) + \
                  Martel.Spaces() + \
                  Martel.Word("data_class_table") + \
                  Martel.Str(";") + Martel.Spaces() + \
                  Martel.Word("molecule_type") + \
                  Martel.Str(";") + Martel.Spaces() + \
                  Martel.Digits("sequence_length") + \
                  Martel.Str(" AA.") + \
                  Martel.AnyEol()
                  )

# The DT formatted lines look different, and there is not
# a third DT line for annotations
# DT   04-MAR-2003 (IPI Human rel. 2.17, Created)
# DT   04-MAR-2003 (IPI Human rel. 2.17, Last sequence update)
예제 #5
0
outputs and that the family line is not parsed into it's respective parts 
(see multitude of comments on this below).
"""

import warnings
warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at [email protected] to avoid permanent removal of this module from Biopython", DeprecationWarning)



from Martel import *
from Martel import RecordReader
from Bio import Std

# -- header
# hmmpfam - search one or more sequences against HMM database
program_description = (Std.application_name(Str("hmmpfam")) + 
                       ToEol())

# HMMER 2.2g (August 2001)
program_version = (Str("HMMER ") +
                   Std.application_version(Re(r"\d\.\d\w") |
                                           Re(r"\d\.\d\.\d")) +
                   ToEol())

# Copyright (C) 1992-2001 HHMI/Washington University School of Medicine
# Freely distributed under the GNU General Public License (GPL)

copyright = (ToEol() +
             ToEol())

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
예제 #6
0
    o block_data - A callback tag for the data in the block (ie. the
    stuff you are interested in).
    """
    diff = INDENT - len(identifier)
    assert diff > 0, diff

    return Martel.Group(block_tag,
                        Martel.Str(identifier + " " * diff) +
                        Martel.ToEol(block_data) +
                        Martel.Rep(Martel.AnyEol() |
                                   (Martel.Str(" " * INDENT) + Martel.ToEol(block_data))))
                                   

# The first line
# LOCUS       AC007323    86436 bp    DNA             PLN       19-JAN-2000
locus = Std.dbid(Martel.Word(), {"dbname": "gb", "type": "primary"})

size = Martel.Group("size",
                    Martel.Rep1(Martel.Integer()))

# deal with the different kinds of residues we can have
residue_prefixes = Martel.Str("ss-", "ds-", "ms-")
residue_types = [
    Std.alphabet(Martel.Str("DNA"), {"alphabet": "iupac-ambiguous-dna"}),
    Std.alphabet(Martel.Str("RNA"), {"alphabet": "iupac-ambiguous-rna"}),
    Std.alphabet(Martel.Str("mRNA"), {"alphabet": "iupac-ambiguous-rna"}),
    Std.alphabet(Martel.Str("tRNA"), {"alphabet": "iupac-ambiguous-rna"}),
    Std.alphabet(Martel.Str("rRNA"), {"alphabet": "iupac-ambiguous-rna"}),
    Std.alphabet(Martel.Str("uRNA"), {"alphabet": "iupac-ambiguous-rna"}),
    Std.alphabet(Martel.Str("snRNA"), {"alphabet": "iupac-ambiguous-rna"}),
    Std.alphabet(Martel.Str("PROTEIN"), {"alphabet": "iupac-protein"}),
예제 #7
0
from Martel import RecordReader, Time
from Bio import Std

from Bio.expressions.swissprot import sprot38

whitespace = Martel.Spaces()

## ID - identification             (begins each entry; 1 per entry)
# ID   entryname  dataclass; molecule; division; sequencelength BP.

divisions = Martel.Re("EST|PHG|FUN|GSS|HTC|HTG|HUM|INV|ORG|MAM|VRT|PLN|" + \
                      "PRO|ROD|SYN|STS|UNC|VRL|[A-Z]{3}")

# XXX is found in S40706
ID_line = Martel.Str("ID   ") + \
          Std.dbid(Martel.UntilSep("entry_name", " "), {"type": "primary",
                                                        "dbname": "embl"}) + \
          whitespace + \
          Martel.ToSep("dataclass", ";") + \
          whitespace + \
          Martel.Group("molecule",
                       Std.alphabet(Martel.Str("DNA", "circular DNA"),
                                    {"alphabet": "iupac-ambiguous-dna"}) |
                       Std.alphabet(Martel.Str("RNA", "circular RNA"),
                                    {"alphabet": "iupac-ambiguous-rna"}) |
                       Std.alphabet(Martel.Str("XXX"),
                                    {"alphabet": "nucleotide"})) + \
          Martel.Str("; ") + \
          Martel.Group("division", divisions) + \
          Martel.Str("; ") + \
          Martel.Digits("length") + \
          Martel.Str(" BP.") + \
예제 #8
0

import Martel
from Martel import RecordReader, Time
from Bio import Std

def Simple(tag, tag_data):
    return Martel.Group(tag,
                        Martel.Str(tag + "   ") + \
                        Martel.ToEol(tag_data)
                        )
#--- ID

ID = Martel.Group("ID",
                  Martel.Str("ID   ") + \
                  Std.dbid(Martel.Word("entry_name"), {"type": "primary",
                                                       "dbname": "sp"}) + \
                  Martel.Spaces() + \
                  Martel.Word("data_class_table") + \
                  Martel.Str(";") + Martel.Spaces() + \
                  Martel.Word("molecule_type") + \
                  Martel.Str(";") + Martel.Spaces() + \
                  Martel.Digits("sequence_length") + \
                  Martel.Str(" AA.") + \
                  Martel.AnyEol()
                  )
#--- AC

AC = Martel.Group("AC",
                  Martel.Str("AC   ") + \
                  Std.dbid(Martel.Word("ac_number"),
                           {"type": "accession",
예제 #9
0
                        nid +
                        Martel.AnyEol())

# PID         g6754304
pid = Martel.Group("pid",
                   Martel.Re("[\w\d]+"))
pid_line = Martel.Group("pid_line",
                        Martel.Str("PID") +
                        blank_space +
                        pid +
                        Martel.AnyEol())

# version and GI line
# VERSION     AC007323.5  GI:6587720
version = Martel.Group("version",
                       Std.dbid(Martel.Re("[\w\d\.]+"),
                                {"type" : "primary", "dbname" : "genbank"}))

gi = Martel.Group("gi",
                  Std.dbid(Martel.Re("[\d]+"),
                           {"type" : "secondary", "dbname" : "genbank"}))

version_line = Martel.Group("version_line",
                            Martel.Str("VERSION") +
                            blank_space +
                            version +
                            Martel.Opt(blank_space +
                                       Martel.Str("GI:") +
                                       gi) +
                            Martel.AnyEol())

# DBSOURCE    REFSEQ: accession NM_010510.1
예제 #10
0
def make_1id(s, dbname, name):
    return Str(s + "|") + \
           Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname,
                                                  "type": name})
예제 #11
0
# SWISS-PROT                   sp|accession|entry name
ids.append(make_2id("sp", "sp", "primary", "secondary"))

# Brookhaven Protein Data Bank pdb|entry|chain
ids.append(make_2id("pdb", "x-pdb", "primary", "secondary"))  # XXX not correct

# Patents                      pat|country|number 
ids.append(make_2id("pat", "x-pat", "primary", "secondary"))  # XXX not correct

# GenInfo Backbone Id          bbs|number 
ids.append(make_1id("bbs", "x-bbs", "primary"))

# General database identifier  gnl|database|identifier
gnl_id = Str("gnl|") + \
         Std.dbxref_dbname(UntilSep(sep = "| ")) + \
         Str("|") + \
         Std.dbxref_dbid(UntilSep(sep = "| "))
ids.append(gnl_id)

# NCBI Reference Sequence      ref|accession|locus
ids.append(make_2id("ref", "x-ref", "primary", "secondary"))

# Local Sequence identifier    lcl|identifier
ids.append(make_1id("lcl", "local", "primary"))

# "|" them all together
ncbi_word = Std.dbxref(reduce(operator.or_, ids))

#ncbi_term = Assert(Re("[^ \R]+\|")) + \
ncbi_term =  ncbi_word + Rep(Str("|") + ncbi_word)
예제 #12
0
warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at [email protected] to avoid permanent removal of this module from Biopython", DeprecationWarning)


from Martel import *
from Martel import RecordReader
from Bio import Std

# Header goes up to the line starting with "ID"
header = Rep(AssertNot(Str("ID   ")) + \
             ToEol())

# ID   kringle; BLOCK
# ID   14-3-3; BLOCK
#  but not!
# IDSA_METJA|Q58270  (  46) GGKRIRPYLTV  11
ID = Str("ID   ") + Std.dbid(ToSep(sep = ";"), {"type": "primary"}) + \
     Str(" BLOCK") + AnyEol()

# AC   IPB000001A; distance from previous block=(10,266)
AC = Str("AC   ") + Std.dbid(ToSep(sep = ";"), {"type": "accession"}) + \
     Str(" distance from previous block=(") + \
     Integer("dist1") + Str(",") + Integer("dist2") + \
     Str(")") + AnyEol()


# DE   Kringle domain
#  If the DE line is long, it doen't fold .. it's all on one line
DE = Str("DE   ") + ToEol("description")


# BL   CCY;  width=14; seqs=44; 99.5%=717; strength=1059