Martel.Opt(CC_copyright_begin + \ Martel.Rep(CC_copyright) + \ CC_copyright_end \ ) ) #--- DR # This is needed for things like # DR MGD; MGI:95401; EPB4.1. # where I need to scan up to the last "." That is, I want # "EPB4.1" to be the secondary identifier, not "EPB4" nor "EPB4.1." _to_secondary_end = Martel.Re(r"([^.\R]|(?!.\R)\.)+") database_id = Std.dbxref_dbname(Martel.UntilSep("database_identifier", ";"), {"style": "sp"}) primary_id = Std.dbxref_dbid(Martel.UntilSep("primary_identifier", ";"), {"type": "primary"}) secondary_id = Std.dbxref_dbid(Martel.Group("secondary_identifier", _to_secondary_end), {"type": "accession"}) # used in StdHandler for fast dxbref - don't rename! real_DR_general = Std.dbxref(database_id + Martel.Str("; ") + \ primary_id + Martel.Str("; ") + \ secondary_id, ) fast_DR_general = Std.fast_dbxref(real_DR_general, {"style": "sp-general"})
## FH - feature table header (0 or 2 per entry) FH_block = Martel.Str("FH Key Location/Qualifiers") + \ Martel.AnyEol() + \ Martel.Str("FH") + \ Martel.AnyEol() ## FT - feature table data (>=0 per entry) ##FT_line = Martel.Str("FT ") + \ ## Martel.ToEol("ft_data") ##FT_block = Martel.Rep1(FT_line) fq_dbxref = Std.feature_qualifier_name(Martel.Str("db_xref")) + \ Martel.Str('=') + \ Std.feature_qualifier_description( Martel.Str('"') + \ Std.dbxref(Std.dbxref_dbname(Martel.UntilSep(None, ":")) + \ Martel.Str(":") + \ Std.dbxref_dbid(Martel.UntilSep(None, '"'))) + \ Martel.Str('"')) + \ Martel.AnyEol() fq_generic = \ Martel.Assert(Martel.Word() + Martel.Str("=")) + \ Std.feature_qualifier_name(Martel.Word()) + \ Martel.Str("=") + \ Std.feature_qualifier_description(Martel.UntilEol()) + \ Martel.AnyEol() + \ Martel.Rep( Martel.Str("FT ") + \ (Martel.AssertNot(Martel.Str("/")) |
# SWISS-PROT sp|accession|entry name ids.append(make_2id("sp", "sp", "primary", "secondary")) # Brookhaven Protein Data Bank pdb|entry|chain ids.append(make_2id("pdb", "x-pdb", "primary", "secondary")) # XXX not correct # Patents pat|country|number ids.append(make_2id("pat", "x-pat", "primary", "secondary")) # XXX not correct # GenInfo Backbone Id bbs|number ids.append(make_1id("bbs", "x-bbs", "primary")) # General database identifier gnl|database|identifier gnl_id = Str("gnl|") + \ Std.dbxref_dbname(UntilSep(sep = "| ")) + \ Str("|") + \ Std.dbxref_dbid(UntilSep(sep = "| ")) ids.append(gnl_id) # NCBI Reference Sequence ref|accession|locus ids.append(make_2id("ref", "x-ref", "primary", "secondary")) # Local Sequence identifier lcl|identifier ids.append(make_1id("lcl", "local", "primary")) # "|" them all together ncbi_word = Std.dbxref(reduce(operator.or_, ids)) #ncbi_term = Assert(Re("[^ \R]+\|")) + \ ncbi_term = ncbi_word + Rep(Str("|") + ncbi_word)