Re("[\w\-]+")) + Spaces() + Integer("dalign_query_end") + Spaces() + AnyEol(), Str("- ") + ToEol()) + ToEol()) # blank line domain_alignment = (domain_align_header + Rep1(Opt(rf_line) + domain_align_top + domain_align_middle + domain_align_bottom)) # // record_end = Str("//") + AnyEol() record = Std.record(Rep(sequence_info + family_header + (no_hit_line | Rep1(family_hit_line)) + domain_header + (no_hit_line | Rep1(domain_hit_line)) + alignment_header + (no_hit_line | Rep1(domain_alignment)) + record_end )) format = HeaderFooter("hmmpfam", {}, header, RecordReader.CountLines, (8,), record, RecordReader.EndsWith, ("//\n",), None, None, None)
# // record_end = Martel.Group("record_end", Martel.Str("//") + Martel.Rep1(Martel.AnyEol())) record = Std.record(Martel.Group("genbank_record", locus_line + \ definition_block + \ accession_block + \ Martel.Opt(nid_line) + \ Martel.Opt(pid_line) + \ Martel.Opt(version_line) + \ Martel.Opt(db_source_block) + \ keywords_block + \ Martel.Opt(segment_line) + \ source_block + \ organism_block + \ Martel.Rep(reference) + \ Martel.Opt(primary) +\ Martel.Opt(comment_block) + \ features_line + \ feature_block + \ Martel.Alt(Martel.Opt(base_count_line) + sequence_entry, contig_block) + \ record_end)) # if you download a big mess of GenBank files, it'll have a header # in that case you should be using 'ncbi_format' instead of the standard # 'format' header = Martel.Re("""\
#--- // end = Martel.Group("END", Martel.Str("//") + Martel.AnyEol()) ####################### put it all together record = Std.record( ID + AC_block + DT_created + DT_seq_update + DT_ann_update + Martel.Opt(DE_block) + Martel.Opt(GN_block) + Martel.Opt(OS_block) + Martel.Opt(OG_block) + Martel.Opt(OC_block) + Martel.Group("OX_block", Martel.NullOp()) + Martel.Group("reference_block", Martel.Rep(reference)) + comment + Martel.Opt(DR_block) + Martel.Opt(KW_block) + Martel.Opt(feature_block) + sequence + end, {"format": "swissprot/38"}) format_expression = Martel.Group("dataset", Martel.Rep1(record), {"format": "swissprot/38"}) format = Martel.ParseRecords("dataset", {"format": "swissprot/38"},
# "|" them all together ncbi_word = Std.dbxref(reduce(operator.or_, ids)) #ncbi_term = Assert(Re("[^ \R]+\|")) + \ ncbi_term = ncbi_word + Rep(Str("|") + ncbi_word) # Anything else generic_term = Std.dbxref( Std.dbxref_dbid(UntilSep(sep = " "), {"dbname": "local"}) ) id_term = ncbi_term | generic_term ########################################################### comment_lines = Rep(Str("#") + ToEol()) title = Str(">") + Std.description_line(id_term + UntilEol()) + AnyEol() seqline = AssertNot(Str(">")) + Std.sequence(UntilEol()) + AnyEol() # can get a sequence line without an Eol at the end of a file seqline_nonewline = AssertNot(Str(">")) + Std.sequence(Word()) sequence = Std.sequence_block(Rep(seqline | seqline_nonewline)) record = Std.record(comment_lines + title + sequence + Rep(AnyEol())) # define a format which reads records, but allows #-style comments in # the FASTA file format = HeaderFooter("dataset", {"format": "fasta"}, comment_lines, RecordReader.Until, (">",), record, RecordReader.StartsWith, (">",), comment_lines, RecordReader.Everything, () )