Ejemplo n.º 1
0
                               Re("[\w\-]+")) + Spaces() +
                           Integer("dalign_query_end") +
                           Spaces() + AnyEol(),
                           Str("- ") + ToEol()) +
                       ToEol()) # blank line

domain_alignment = (domain_align_header +
                    Rep1(Opt(rf_line) +
                         domain_align_top +
                         domain_align_middle +
                         domain_align_bottom))

# //
record_end = Str("//") + AnyEol()


record = Std.record(Rep(sequence_info +
                        family_header +
                        (no_hit_line | Rep1(family_hit_line)) +
                        domain_header +
                        (no_hit_line | Rep1(domain_hit_line)) +
                        alignment_header +
                        (no_hit_line | Rep1(domain_alignment)) +
                        record_end
                    ))

format = HeaderFooter("hmmpfam", {},
                      header, RecordReader.CountLines, (8,),
                      record, RecordReader.EndsWith, ("//\n",),
                      None, None, None)
Ejemplo n.º 2
0
# //
record_end = Martel.Group("record_end",
                          Martel.Str("//") +
                          Martel.Rep1(Martel.AnyEol()))

record = Std.record(Martel.Group("genbank_record",
                      locus_line + \
                      definition_block + \
                      accession_block + \
                      Martel.Opt(nid_line) + \
                      Martel.Opt(pid_line) + \
                      Martel.Opt(version_line) + \
                      Martel.Opt(db_source_block) + \
                      keywords_block + \
                      Martel.Opt(segment_line) + \
                      source_block + \
                      organism_block + \
                      Martel.Rep(reference) + \
                      Martel.Opt(primary) +\
                      Martel.Opt(comment_block) + \
                      features_line + \
                      feature_block + \
                      Martel.Alt(Martel.Opt(base_count_line) +
                                 sequence_entry,
                                 contig_block) + \
                      record_end))

# if you download a big mess of GenBank files, it'll have a header
# in that case you should be using 'ncbi_format' instead of the standard
# 'format'
header = Martel.Re("""\
Ejemplo n.º 3
0
#--- //

end = Martel.Group("END", Martel.Str("//") + Martel.AnyEol())

####################### put it all together

record = Std.record(
    ID +
    AC_block +
    DT_created +
    DT_seq_update +
    DT_ann_update +
    Martel.Opt(DE_block) +
    Martel.Opt(GN_block) +
    Martel.Opt(OS_block) +
    Martel.Opt(OG_block) +
    Martel.Opt(OC_block) +
    Martel.Group("OX_block", Martel.NullOp()) +
    Martel.Group("reference_block", Martel.Rep(reference)) +
    comment +
    Martel.Opt(DR_block) +
    Martel.Opt(KW_block) +
    Martel.Opt(feature_block) +
    sequence +
    end,
                      {"format": "swissprot/38"})


format_expression = Martel.Group("dataset", Martel.Rep1(record),
                                 {"format": "swissprot/38"})

format = Martel.ParseRecords("dataset", {"format": "swissprot/38"},
Ejemplo n.º 4
0
# "|" them all together
ncbi_word = Std.dbxref(reduce(operator.or_, ids))

#ncbi_term = Assert(Re("[^ \R]+\|")) + \
ncbi_term =  ncbi_word + Rep(Str("|") + ncbi_word)

# Anything else
generic_term = Std.dbxref(
                 Std.dbxref_dbid(UntilSep(sep = " "), {"dbname": "local"})
               )
id_term = ncbi_term | generic_term
###########################################################

comment_lines = Rep(Str("#") + ToEol())
title = Str(">") + Std.description_line(id_term + UntilEol()) + AnyEol()
seqline = AssertNot(Str(">")) + Std.sequence(UntilEol()) + AnyEol()
# can get a sequence line without an Eol at the end of a file
seqline_nonewline = AssertNot(Str(">")) + Std.sequence(Word())

sequence = Std.sequence_block(Rep(seqline | seqline_nonewline))

record = Std.record(comment_lines + title + sequence + Rep(AnyEol()))

# define a format which reads records, but allows #-style comments in 
# the FASTA file
format = HeaderFooter("dataset", {"format": "fasta"},
                      comment_lines, RecordReader.Until, (">",),
                      record, RecordReader.StartsWith, (">",),
                      comment_lines, RecordReader.Everything, ()
                     )