def test1(): fields = ( ["Andrew", "Dalke", "12"], ["Liz", "Nelson", "22"], ["Mandrake", "Moose", "23"], ["Lisa", "Marie", "91"], ) text = "" for line in fields: text = text + string.join(line, " ") + "\n" format = Martel.Rep1( Martel.Group("line", Martel.Word("name", {"type": "first"}) + \ Martel.Spaces() + \ Martel.Word("name", {"type": "last"}) + \ Martel.Spaces() + \ Martel.Integer("age") + \ Martel.AnyEol() )) iterator = format.make_iterator("line") i = 0 for record in iterator.iterateString(text, LAX.LAX()): assert record["name"] == fields[i][:2], (record["name"], fields[i][:2]) assert record["age"] == fields[i][2:3], (record["age"], fields[i][2:3]) i = i + 1 i = 0 for record in iterator.iterateString(text, LAX.LAXAttrs()): assert [x[0] for x in record["name"]] == fields[i][:2], \ ([x[0] for x in record["name"]], fields[i][:2]) assert [x[0] for x in record["age"]] == fields[i][2:3], \ ([x[0] for x in record["age"]], fields[i][2:3]) assert record["name"][0][1]["type"] == "first" assert record["name"][1][1]["type"] == "last" assert record["age"][0][1].keys() == [] i = i + 1
Martel.Str(tag + " ") + Martel.Group(tag_data, Martel.Re("[^\R]*")) + Martel.AnyEol()) #--- ID ID = Martel.Group("ID", Martel.Re( r"ID (?P<entry_name>\w+) +(?P<data_class_table>\w+); +" \ r"(?P<molecule_type>\w+); +(?P<sequence_length>\d+) AA\.\R" )) #--- AC AC = Martel.Group( "AC", Martel.Re(r"AC (?P<ac_number>\w+);( (?P<ac_number>\w+);)*\R")) AC_block = Martel.Group("AC_block", Martel.Rep1(AC)) #--- DT DT_created = Martel.Group("DT_created", Martel.Re( r"DT (?P<day>\d\d)-(?P<month>...)-(?P<year>\d{4}) \(Rel. "\ r"(?P<release>\d\d), Created\)\R" )) DT_seq_update = Martel.Group("DT_seq_update", Martel.Re( r"DT (?P<day>\d\d)-(?P<month>...)-(?P<year>\d{4}) \(Rel. "\ r"(?P<release>\d\d), Last sequence update\)\R" )) DT_ann_update = Martel.Group("DT_ann_update", Martel.Re( r"DT (?P<day>\d\d)-(?P<month>...)-(?P<year>\d{4}) \(Rel. "\ r"(?P<release>\d\d), Last annotation update\)\R" ))
# Copyright 2001 by Katharine Lindner. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Martel regular expression for Intelligenetic format (DEPRECATED). This is a huge regular regular expression for the IntelliGenetics/MASE format, built using the 'regular expressions on steroids' capabilities of Martel. """ #http://immuno.bme.nwu.edu/seqhunt.html # Martel import Martel # --- first set up some helper constants and functions comment_line = Martel.Group( "comment_line", \ Martel.Str( ';' ) + Martel.ToEol( "comment" ) ) comment_lines = Martel.Group("comment_lines", Martel.Rep(comment_line)) title_line = Martel.Group( "title_line", \ Martel.Expression.Assert( Martel.Str( ';' ), 1 ) + Martel.ToEol() ) residue_line = Martel.Group( "residue_line", \ Martel.Expression.Assert( Martel.Str( ';' ), 1 ) + Martel.ToEol( "sequence" ) ) residue_lines = Martel.Group("residue_lines", Martel.Rep1(residue_line)) intelligenetics_record = comment_lines + title_line + residue_lines
from Martel import UntilSep from Bio.NBRF.ValSeq import valid_sequence_dict sequence_types = map(Str, valid_sequence_dict.keys()) sequence_type = Group("sequence_type", Alt(*sequence_types)) name_line = Martel.Group( "name_line", \ Str( ">" ) + sequence_type + Str( ";" ) + UntilEol("sequence_name") + AnyEol() ) comment_line = UntilEol("comment") + AnyEol() # 0x2a -- '*' # 10 -- '\n', 13 -- '\r' newline endings excluded_chars = chr(0x2a) + chr(10) + chr(13) # sequence lines with only sequence sequence_text = Group( "sequence_text", \ Martel.Rep1( AnyBut( excluded_chars ) ) ) sequence_line = Group("sequence_line", sequence_text + AnyEol()) # the final line, has a '*' and potentially some sequence sequence_final_line = Group( "sequence_final_line", UntilSep("sequence_final_text", chr(0x2a)) + Str(chr(0x2a)) + Rep1(AnyEol())) sequence_block = Group("sequence_block", Rep(sequence_line)) nbrf_record = name_line + comment_line + sequence_block + sequence_final_line
amplifier = Martel.Group("amplifier", Martel.Re("[\d]+")) amplimer_line = Martel.Str("Amplimer ") + amplifier + Martel.AnyEol() # Sequence: AC074298 AC074298 # Telomere associated sequence for Arabidopsis thaliana TEL1N # CCGGTTTCTCTGGTTGAAAA hits forward strand at 114 with 0 mismatches # TCACATTCCCAAATGTAGATCG hits reverse strand at [114] with 0 mismatches seq_indent = Martel.Str("\t") sequence_id = Martel.Group("sequence_id", Martel.ToEol()) sequence_descr = Martel.Group("sequence_descr", Martel.ToEol()) sequence_info = sequence_id + sequence_descr forward_strand_info = Martel.Group("forward_strand_info", Martel.ToEol()) reverse_strand_info = Martel.Group("reverse_strand_info", Martel.ToEol()) amplifier_sequence = Martel.Group( "amplifier_sequence", sequence_info + forward_strand_info + reverse_strand_info) amplifier_sequence_lines = seq_indent + Martel.Str("Sequence: ") + \ amplifier_sequence amplifier_length = Martel.Group("amplifier_length", Martel.Re("[\d]+")) amplifier_length_line = seq_indent + Martel.Str("Amplimer length: ") + \ amplifier_length + Martel.Str(" bp") + \ Martel.AnyEol() record = Martel.Group( "primersearch_record", Martel.Rep1(blank_line + primer_name_line + Martel.Rep(amplimer_line + amplifier_sequence_lines + amplifier_length_line)))