Ejemplo n.º 1
0
base_count_line = Martel.Group("base_count_line",
                               Martel.Str("BASE COUNT") +
                               blank_space +
                               base_count +
                               Martel.AnyEol())

# ORIGIN      
#       1 ggacaaggcc aaggatgctg ctgctgcagc tggagcttcc gcgcaacaag taaacagata
origin_line = Martel.Group("origin_line",
                           Martel.Str("ORIGIN") +
                           (Martel.ToEol("origin_name") |
                            Martel.AnyEol()))

base_number = Martel.Group("base_number",
                           Martel.Re("[\d]+"))
sequence = Std.sequence(Martel.Group("sequence",
                        Martel.Re("[\w]+")))
sequence_plus_spaces = Martel.Group("sequence_plus_spaces",
                                    Martel.Rep1(Martel.Str(" ") +
                                        Martel.Opt(sequence)) + 
                                    Martel.Opt(Martel.Str(" ")))
sequence_line = Martel.Group("sequence_line",
                             blank_space +
                             Martel.Opt(base_number) +
                             sequence_plus_spaces +
                             Martel.AnyEol())

sequence_entry = Std.sequence_block(Martel.Group("sequence_entry",
                                    origin_line +
                                    Martel.Rep1(sequence_line)))

# CONTIG
Ejemplo n.º 2
0
base_count_line = Martel.Group("base_count_line",
                               Martel.Str("BASE COUNT") +
                               blank_space +
                               base_count +
                               Martel.AnyEol())

# ORIGIN      
#       1 ggacaaggcc aaggatgctg ctgctgcagc tggagcttcc gcgcaacaag taaacagata
origin_line = Martel.Group("origin_line",
                           Martel.Str("ORIGIN") +
                           (Martel.ToEol("origin_name") |
                            Martel.AnyEol()))

base_number = Martel.Group("base_number",
                           Martel.Re("[\d]+"))
sequence = Std.sequence(Martel.Re(r"[\w ]+"))

sequence_line = Martel.Group("sequence_line",
                             blank_space +
                             Martel.Opt(base_number) +
                             sequence +
                             Martel.AnyEol())

sequence_entry = Std.sequence_block(origin_line +
                                    Martel.Rep1(sequence_line))

# CONTIG
# this is the contig information for RefSeq records

contig_location = Martel.Group("contig_location",
                    Martel.ToEol("feature_location") + \
Ejemplo n.º 3
0

#--- SQ

# SQ   SEQUENCE  XXXX AA; XXXXX MW;  XXXXX CRC32;
# (Those X's don't really indicate the size)

SQ = Martel.Group("SQ",
   Martel.Re("SQ   SEQUENCE +(?P<sequence_length>\d+) AA;" \
             " +(?P<molecular_weight>\d+) MW;" \
             " +(?P<crc?type=32>\w+) CRC32;\R")
                  )
##SQ_data = Martel.Group("SQ_data",
##                       Martel.Re("     (?P<sequence>[^\R]*)\R"))
SQ_data = Martel.Str("     ") + \
          Std.sequence(Martel.UntilEol()) + \
          Martel.AnyEol()


##sequence = Martel.Group("sequence_block", Martel.Group("SQ_data_block",
##                                                 SQ + Martel.Rep(SQ_data)))
sequence = Std.sequence_block(SQ + Martel.Rep(SQ_data),
                              {"alphabet": "iupac-ambiguous-protein"})

#--- //

end = Martel.Group("END", Martel.Str("//") + Martel.AnyEol())

####################### put it all together

record = Std.record(
Ejemplo n.º 4
0
          Martel.Str(" BP; ") + \
          Martel.Digits("num_A") + \
          Martel.Str(" A; ") + \
          Martel.Digits("num_C") + \
          Martel.Str(" C; ") + \
          Martel.Digits("num_G") + \
          Martel.Str(" G; ") + \
          Martel.Digits("num_T") + \
          Martel.Str(" T; ") + \
          Martel.Digits("num_other") + \
          Martel.Str(" other;") + \
          Martel.AnyEol()

## bb - (blanks) sequence data     (>=1 per entry)
SQ_data = Martel.Str("     ") + \
          Std.sequence(Martel.Re(".{65}")) + \
          whitespace + \
          Martel.Digits("end_position") + \
          Martel.AnyEol()

SQ_block = Std.sequence_block(SQ_line + Martel.Rep1(SQ_data))

## // - termination line           (ends each entry; 1 per entry)
end = Martel.Str("//") + Martel.AnyEol()

record = Martel.Group("record", \
                      ID_line + \
                      Martel.Opt(XX) + \
                      AC_block + \
                      Martel.Opt(XX) + \
                      SV_line + \
Ejemplo n.º 5
0
# "|" them all together
ncbi_word = Std.dbxref(reduce(operator.or_, ids))

#ncbi_term = Assert(Re("[^ \R]+\|")) + \
ncbi_term =  ncbi_word + Rep(Str("|") + ncbi_word)

# Anything else
generic_term = Std.dbxref(
                 Std.dbxref_dbid(UntilSep(sep = " "), {"dbname": "local"})
               )
id_term = ncbi_term | generic_term
###########################################################

comment_lines = Rep(Str("#") + ToEol())
title = Str(">") + Std.description_line(id_term + UntilEol()) + AnyEol()
seqline = AssertNot(Str(">")) + Std.sequence(UntilEol()) + AnyEol()
# can get a sequence line without an Eol at the end of a file
seqline_nonewline = AssertNot(Str(">")) + Std.sequence(Word())

sequence = Std.sequence_block(Rep(seqline | seqline_nonewline))

record = Std.record(comment_lines + title + sequence + Rep(AnyEol()))

# define a format which reads records, but allows #-style comments in 
# the FASTA file
format = HeaderFooter("dataset", {"format": "fasta"},
                      comment_lines, RecordReader.Until, (">",),
                      record, RecordReader.StartsWith, (">",),
                      comment_lines, RecordReader.Everything, ()
                     )