Esempio n. 1
0
def make_2id(s, dbname, primary_name, secondary_name):
    assert secondary_name is not None
    if primary_name is None:
        return Str(s + "||") + \
               Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname,
                                                      "type": secondary_name})

    return Str(s + "|") + \
           Std.dbxref_dbid(UntilSep(sep = "|"), {"dbname": dbname,
                                                 "type": primary_name}) + \
           Str("|") + \
           Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname,
                                                  "type": secondary_name})
           Martel.AnyEol() + \
           Martel.Str("FH") + \
           Martel.AnyEol()

## FT - feature table data         (>=0 per entry)
##FT_line = Martel.Str("FT   ") + \
##          Martel.ToEol("ft_data")
##FT_block = Martel.Rep1(FT_line)

fq_dbxref = Std.feature_qualifier_name(Martel.Str("db_xref")) + \
            Martel.Str('=') + \
            Std.feature_qualifier_description(
                Martel.Str('"') + \
                Std.dbxref(Std.dbxref_dbname(Martel.UntilSep(None, ":")) + \
                           Martel.Str(":") + \
                           Std.dbxref_dbid(Martel.UntilSep(None, '"'))) + \
                Martel.Str('"')) + \
            Martel.AnyEol()
                       

fq_generic = \
           Martel.Assert(Martel.Word() + Martel.Str("=")) + \
           Std.feature_qualifier_name(Martel.Word()) + \
           Martel.Str("=") + \
           Std.feature_qualifier_description(Martel.UntilEol()) + \
           Martel.AnyEol() + \
           Martel.Rep(
               Martel.Str("FT                   ") + \
               (Martel.AssertNot(Martel.Str("/")) |
               Martel.AssertNot(Martel.Re(r"/\w+="))) + \
           Std.feature_qualifier_description(Martel.UntilEol()) + \
                           )
                       )

#--- DR

# This is needed for things like
#   DR   MGD; MGI:95401; EPB4.1.
# where I need to scan up to the last "."  That is, I want
# "EPB4.1" to be the secondary identifier, not "EPB4" nor "EPB4.1."

_to_secondary_end = Martel.Re(r"([^.\R]|(?!.\R)\.)+")

database_id = Std.dbxref_dbname(Martel.UntilSep("database_identifier", ";"),
                                {"style": "sp"})

primary_id = Std.dbxref_dbid(Martel.UntilSep("primary_identifier", ";"),
                             {"type": "primary"})

secondary_id = Std.dbxref_dbid(Martel.Group("secondary_identifier",
                                            _to_secondary_end),
                               {"type": "accession"})

# used in StdHandler for fast dxbref - don't rename!
real_DR_general = Std.dbxref(database_id + Martel.Str("; ") + \
                        primary_id + Martel.Str("; ") + \
                        secondary_id,
                        )
fast_DR_general = Std.fast_dbxref(real_DR_general,
                             {"style": "sp-general"})

DR_general = Martel.FastFeature(fast_DR_general, "fast-sp-dbxref",
                                real_DR_general.group_names() )
Esempio n. 4
0
def make_1id(s, dbname, name):
    return Str(s + "|") + \
           Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname,
                                                  "type": name})
Esempio n. 5
0
ids.append(make_2id("sp", "sp", "primary", "secondary"))

# Brookhaven Protein Data Bank pdb|entry|chain
ids.append(make_2id("pdb", "x-pdb", "primary", "secondary"))  # XXX not correct

# Patents                      pat|country|number 
ids.append(make_2id("pat", "x-pat", "primary", "secondary"))  # XXX not correct

# GenInfo Backbone Id          bbs|number 
ids.append(make_1id("bbs", "x-bbs", "primary"))

# General database identifier  gnl|database|identifier
gnl_id = Str("gnl|") + \
         Std.dbxref_dbname(UntilSep(sep = "| ")) + \
         Str("|") + \
         Std.dbxref_dbid(UntilSep(sep = "| "))
ids.append(gnl_id)

# NCBI Reference Sequence      ref|accession|locus
ids.append(make_2id("ref", "x-ref", "primary", "secondary"))

# Local Sequence identifier    lcl|identifier
ids.append(make_1id("lcl", "local", "primary"))

# "|" them all together
ncbi_word = Std.dbxref(reduce(operator.or_, ids))

#ncbi_term = Assert(Re("[^ \R]+\|")) + \
ncbi_term =  ncbi_word + Rep(Str("|") + ncbi_word)

# Anything else
# BL   CCY;  width=14; seqs=44; 99.5%=717; strength=1059
BL = Str("BL   ") + ToSep("protomat_id", ";") + \
     Str("  width=") + Digits("width") + \
     Str("; seqs=") + Digits("numseqs") + \
     Str("; 99.5%=") + Digits("protomat_count") + \
     Str("; strength=") + Digits("strength") + \
     AnyEol()


# PLMN_BOVIN|P06868  (  60) CEEETDFVCRAFQY  26
# ^^^^^^^^^^^^^^^^^
#                     ^^^^-- number of segments
#                           ^^^^^^^^^^^^^^-- matching sequence
#                                           ^^-- weight
#
identifier = (Std.dbxref_dbid(UntilSep(sep = "|."), 
                          {"dbname": "swissprot", "type": "primary"}) + \
              Str("|") + \
              Std.dbxref_dbid(UntilSep(sep = " "), 
                              {"dbname": "swissprot", "type": "accession"})) |\
              Std.dbxref_dbid(UntilSep(sep = " "))
                              
segment = AssertNot(Re(r".. ")) + \
          identifier + \
          Re(r" *\( *") + \
          Integer("position") + \
          Re(r"\) *") + \
          Word("matching_sequence") + Spaces() + \
          Digits("weight") + AnyEol()

segment_block = Rep1(segment | AnyEol())