예제 #1
0
파일: Generic.py 프로젝트: cbirdlab/sap
    def __format__(self, format_spec):
        """Returns the alignment as a string in the specified file format.

        This method supports the python format() function added in
        Python 2.6/3.0.  The format_spec should be a lower case
        string supported by Bio.AlignIO as an output file format.
        See also the alignment's format() method."""
        if format_spec:
            from SAP.Bio._py3k import StringIO
            from SAP.Bio import AlignIO
            handle = StringIO()
            AlignIO.write([self], handle, format_spec)
            return handle.getvalue()
        else:
            #Follow python convention and default to using __str__
            return str(self)
예제 #2
0
def UniprotIterator(handle, alphabet=Alphabet.ProteinAlphabet(), return_raw_comments=False):
    """Generator function to parse UniProt XML as SeqRecord objects.

    parses an XML entry at a time from any UniProt XML file
    returns a SeqRecord for each iteration

    This generator can be used in Bio.SeqIO

    return_raw_comments = True --> comment fields are returned as complete XML to allow further processing
    skip_parsing_errors = True --> if parsing errors are found, skip to next entry
    """
    if isinstance(alphabet, Alphabet.NucleotideAlphabet):
        raise ValueError("Wrong alphabet %r" % alphabet)
    if isinstance(alphabet, Alphabet.Gapped):
        if isinstance(alphabet.alphabet, Alphabet.NucleotideAlphabet):
            raise ValueError("Wrong alphabet %r" % alphabet)

    if not hasattr(handle, "read"):
        if isinstance(handle, str):
            handle = StringIO(handle)
        else:
            raise Exception('An XML-containing handler or an XML string must be passed')

    if ElementTree is None:
        from SAP.Bio import MissingExternalDependencyError
        raise MissingExternalDependencyError(
                "No ElementTree module was found. "
                "Use Python 2.5+, lxml or elementtree if you "
                "want to use Bio.SeqIO.UniprotIO.")

    for event, elem in ElementTree.iterparse(handle, events=("start", "end")):
        if event == "end" and elem.tag == NS + "entry":
            yield Parser(elem, alphabet=alphabet, return_raw_comments=return_raw_comments).parse()
            elem.clear()
예제 #3
0
    def __format__(self, format_spec):
        """Serialize the tree as a string in the specified file format.

        This method supports the ``format`` built-in function added in Python
        2.6/3.0.

        :param format_spec: a lower-case string supported by `Bio.Phylo.write`
            as an output file format.
        """
        if format_spec:
            from SAP.Bio._py3k import StringIO
            from SAP.Bio.Phylo import _io
            handle = StringIO()
            _io.write([self], handle, format_spec)
            return handle.getvalue()
        else:
            # Follow python convention and default to using __str__
            return str(self)
예제 #4
0
파일: BaseTree.py 프로젝트: kaspermunch/sap
    def __format__(self, format_spec):
        """Serialize the tree as a string in the specified file format.

        This method supports the ``format`` built-in function added in Python
        2.6/3.0.

        :param format_spec: a lower-case string supported by `Bio.Phylo.write`
            as an output file format.
        """
        if format_spec:
            from SAP.Bio._py3k import StringIO
            from SAP.Bio.Phylo import _io
            handle = StringIO()
            _io.write([self], handle, format_spec)
            return handle.getvalue()
        else:
            # Follow python convention and default to using __str__
            return str(self)
예제 #5
0
파일: SeqRecord.py 프로젝트: cbirdlab/sap
    def __format__(self, format_spec):
        """Returns the record as a string in the specified file format.

        This method supports the python format() function added in
        Python 2.6/3.0.  The format_spec should be a lower case string
        supported by Bio.SeqIO as an output file format. See also the
        SeqRecord's format() method.

        Under Python 3 please note that for binary formats a bytes
        string is returned, otherwise a (unicode) string is returned.
        """
        if not format_spec:
            #Follow python convention and default to using __str__
            return str(self)
        from SAP.Bio import SeqIO
        if format_spec in SeqIO._BinaryFormats:
            #Return bytes on Python 3
            from io import BytesIO
            handle = BytesIO()
        else:
            from SAP.Bio._py3k import StringIO
            handle = StringIO()
        SeqIO.write(self, handle, format_spec)
        return handle.getvalue()
예제 #6
0
파일: MarkovModel.py 프로젝트: cbirdlab/sap
 def __str__(self):
     from SAP.Bio._py3k import StringIO
     handle = StringIO()
     save(self, handle)
     handle.seek(0)
     return handle.read()
예제 #7
0
asis             549 TCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGT    598
                                                                       
asis             311 --------------------------------------------------    311

asis             599 GAAAGGGGGTTAATAGC    615
                                      
asis             311 -----------------    311


#---------------------------------------
#---------------------------------------"""

    from SAP.Bio._py3k import StringIO

    alignments = list(EmbossIterator(StringIO(pair_example)))
    assert len(alignments) == 1
    assert len(alignments[0]) == 2
    assert [r.id for r in alignments[0]] \
           == ["IXI_234", "IXI_235"]

    alignments = list(EmbossIterator(StringIO(simple_example)))
    assert len(alignments) == 1
    assert len(alignments[0]) == 4
    assert [r.id for r in alignments[0]] \
           == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"]

    alignments = list(EmbossIterator(StringIO(pair_example + simple_example)))
    assert len(alignments) == 2
    assert len(alignments[0]) == 2
    assert len(alignments[1]) == 4
예제 #8
0
파일: NeXMLIO.py 프로젝트: cbirdlab/sap
 def from_string(cls, treetext):
     handle = StringIO(treetext)
     return cls(handle)
예제 #9
0
파일: FastaIO.py 프로젝트: cbirdlab/sap
            count += 1
            print_record(record)
        assert count == 1
        print(str(record.__class__))

    if os.path.isfile(faa_filename):
        print("--------")
        print("FastaIterator (multiple sequences)")
        with open(faa_filename, "r") as h:
            iterator = FastaIterator(h, alphabet=generic_protein, title2ids=genbank_name_function)
        count = 0
        for record in iterator:
            count += 1
            print_record(record)
            break
        assert count > 0
        print(str(record.__class__))

    from SAP.Bio._py3k import StringIO
    print("--------")
    print("FastaIterator (empty input file)")
    #Just to make sure no errors happen
    iterator = FastaIterator(StringIO(""))
    count = 0
    for record in iterator:
        count += 1
    assert count == 0

    print("Done")

예제 #10
0
파일: _index.py 프로젝트: cbirdlab/sap
 def get(self, offset):
     """Returns SeqRecord."""
     #Should be overridden for binary file formats etc:
     return self._parse(StringIO(_bytes_to_string(self.get_raw(offset))))
예제 #11
0
 def parse_str(self, string):
     return self.parse(StringIO(string))
예제 #12
0
파일: _index.py 프로젝트: cbirdlab/sap
 def get(self, offset):
     return self._parse(StringIO(_bytes_to_string(self.get_raw(offset))))
예제 #13
0
파일: PhylipIO.py 프로젝트: kaspermunch/sap
           VLHDTPNILY -FIKTAGNGQ FKAVGDSLEA Q-----QYGI AFPKGS--DE
           AYNDRLVVNY -IINDQ-KLP VRGAGQIGDA A-----PVGI ALKKGN--SA
           AFQDEVAASE GFLKQPVGKD YKFGGPSVKD EKLFGVGTGM GLRKED--NE

           LQAEVNKALA EMRADGTVEK ISVKWFGADI TK----
           LRKKVNKALD ELRKDGTLKK LSEKYFNEDI TVEQKH
           VVDQVNKALK EMKEDGTLSK ISKKWFGEDV SK----
           LITKFNQVLE ALRQDGTLKQ ISIEWFGYDI TQ----
           LLKAVNDAIA EMQKDGTLQA LSEKWFGADV TK----
           LRDKVNGALK TLRENGTYNE IYKKWFGTEP K-----
           LKDQIDKALT EMRSDGTFEK ISQKWFGQDV GQP---
           LREALNKAFA EMRADGTYEK LAKKYFDFDV YGG---
"""

    from SAP.Bio._py3k import StringIO
    handle = StringIO(phylip_text)
    count = 0
    for alignment in PhylipIterator(handle):
        for record in alignment:
            count = count+1
            print(record.id)
            #print str(record.seq)
    assert count == 8

    expected = """mkklvlslsl vlafssataa faaipqniri gtdptyapfe sknsqgelvg
    fdidlakelc krintqctfv enpldalips lkakkidaim sslsitekrq qeiaftdkly
    aadsrlvvak nsdiqptves lkgkrvgvlq gttqetfgne hwapkgieiv syqgqdniys
    dltagridaafqdevaaseg flkqpvgkdy kfggpsvkde klfgvgtgmg lrkednelre
    alnkafaemradgtyeklak kyfdfdvygg""".replace(" ", "").replace("\n", "").upper()
    assert str(record.seq).replace("-", "") == expected
예제 #14
0
 def __str__(self):
     from SAP.Bio._py3k import StringIO
     handle = StringIO()
     save(self, handle)
     handle.seek(0)
     return handle.read()
예제 #15
0
>>><<<


579 residues in 3 query   sequences
45119 residues in 180 library sequences
 Scomplib [34.26]
 start: Tue May 20 16:38:45 2008 done: Tue May 20 16:38:45 2008
 Total Scan time:  0.020 Total Display time:  0.010

Function used was FASTA [version 34.26 January 12, 2007]

"""

    from SAP.Bio._py3k import StringIO

    alignments = list(FastaM10Iterator(StringIO(simple_example)))
    assert len(alignments) == 4, len(alignments)
    assert len(alignments[0]) == 2
    for a in alignments:
        print("Alignment %i sequences of length %i" \
              % (len(a), a.get_alignment_length()))
        for r in a:
            print("%s %s %i" % (r.seq, r.id, r.annotations["original_length"]))
        #print(a.annotations)
    print("Done")

    import os
    path = "../../Tests/Fasta/"
    files = sorted(f for f in os.listdir(path)
                   if os.path.splitext(f)[-1] == ".m10")
    for filename in files:
예제 #16
0
파일: TabIO.py 프로젝트: cbirdlab/sap
        title = self.clean(record.id)
        seq = self._get_seq_string(record)  # Catches sequence being None
        assert "\t" not in title
        assert "\n" not in title
        assert "\r" not in title
        assert "\t" not in seq
        assert "\n" not in seq
        assert "\r" not in seq
        self.handle.write("%s\t%s\n" % (title, seq))


if __name__ == "__main__":
    print("Running quick self test")
    from SAP.Bio._py3k import StringIO

    #This example has a trailing blank line which should be ignored
    handle = StringIO("Alpha\tAAAAAAA\nBeta\tCCCCCCC\n\n")
    records = list(TabIterator(handle))
    assert len(records) == 2

    handle = StringIO("Alpha\tAAAAAAA\tExtra\nBeta\tCCCCCCC\n")
    try:
        records = list(TabIterator(handle))
        assert False, "Should have reject this invalid example!"
    except ValueError:
        #Good!
        pass

    print("Done")
예제 #17
0
파일: NCBIWWW.py 프로젝트: cbirdlab/sap
def qblast(program, database, sequence,
           auto_format=None, composition_based_statistics=None,
           db_genetic_code=None, endpoints=None, entrez_query='(none)',
           expect=10.0, filter=None, gapcosts=None, genetic_code=None,
           hitlist_size=50, i_thresh=None, layout=None, lcase_mask=None,
           matrix_name=None, nucl_penalty=None, nucl_reward=None,
           other_advanced=None, perc_ident=None, phi_pattern=None,
           query_file=None, query_believe_defline=None, query_from=None,
           query_to=None, searchsp_eff=None, service=None, threshold=None,
           ungapped_alignment=None, word_size=None,
           alignments=500, alignment_view=None, descriptions=500,
           entrez_links_new_window=None, expect_low=None, expect_high=None,
           format_entrez_query=None, format_object=None, format_type='XML',
           ncbi_gi=None, results_file=None, show_overview=None, megablast=None,
           ):
    """Do a BLAST search using the QBLAST server at NCBI.

    Supports all parameters of the qblast API for Put and Get.
    Some useful parameters:
    program        blastn, blastp, blastx, tblastn, or tblastx (lower case)
    database       Which database to search against (e.g. "nr").
    sequence       The sequence to search.
    ncbi_gi        TRUE/FALSE whether to give 'gi' identifier.
    descriptions   Number of descriptions to show.  Def 500.
    alignments     Number of alignments to show.  Def 500.
    expect         An expect value cutoff.  Def 10.0.
    matrix_name    Specify an alt. matrix (PAM30, PAM70, BLOSUM80, BLOSUM45).
    filter         "none" turns off filtering.  Default no filtering
    format_type    "HTML", "Text", "ASN.1", or "XML".  Def. "XML".
    entrez_query   Entrez query to limit Blast search
    hitlist_size   Number of hits to return. Default 50
    megablast      TRUE/FALSE whether to use MEga BLAST algorithm (blastn only)
    service        plain, psi, phi, rpsblast, megablast (lower case)

    This function does no checking of the validity of the parameters
    and passes the values to the server as is.  More help is available at:
    http://www.ncbi.nlm.nih.gov/BLAST/Doc/urlapi.html

    """
    import time

    assert program in ['blastn', 'blastp', 'blastx', 'tblastn', 'tblastx']

    # Format the "Put" command, which sends search requests to qblast.
    # Parameters taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node5.html on 9 July 2007
    # Additional parameters are taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node9.html on 8 Oct 2010
    # To perform a PSI-BLAST or PHI-BLAST search the service ("Put" and "Get" commands) must be specified
    # (e.g. psi_blast = NCBIWWW.qblast("blastp", "refseq_protein", input_sequence, service="psi"))
    parameters = [
        ('AUTO_FORMAT', auto_format),
        ('COMPOSITION_BASED_STATISTICS', composition_based_statistics),
        ('DATABASE', database),
        ('DB_GENETIC_CODE', db_genetic_code),
        ('ENDPOINTS', endpoints),
        ('ENTREZ_QUERY', entrez_query),
        ('EXPECT', expect),
        ('FILTER', filter),
        ('GAPCOSTS', gapcosts),
        ('GENETIC_CODE', genetic_code),
        ('HITLIST_SIZE', hitlist_size),
        ('I_THRESH', i_thresh),
        ('LAYOUT', layout),
        ('LCASE_MASK', lcase_mask),
        ('MEGABLAST', megablast),
        ('MATRIX_NAME', matrix_name),
        ('NUCL_PENALTY', nucl_penalty),
        ('NUCL_REWARD', nucl_reward),
        ('OTHER_ADVANCED', other_advanced),
        ('PERC_IDENT', perc_ident),
        ('PHI_PATTERN', phi_pattern),
        ('PROGRAM', program),
        #('PSSM',pssm), - It is possible to use PSI-BLAST via this API?
        ('QUERY', sequence),
        ('QUERY_FILE', query_file),
        ('QUERY_BELIEVE_DEFLINE', query_believe_defline),
        ('QUERY_FROM', query_from),
        ('QUERY_TO', query_to),
        #('RESULTS_FILE',...), - Can we use this parameter?
        ('SEARCHSP_EFF', searchsp_eff),
        ('SERVICE', service),
        ('THRESHOLD', threshold),
        ('UNGAPPED_ALIGNMENT', ungapped_alignment),
        ('WORD_SIZE', word_size),
        ('CMD', 'Put'),
        ]
    query = [x for x in parameters if x[1] is not None]
    message = _as_bytes(_urlencode(query))

    # Send off the initial query to qblast.
    # Note the NCBI do not currently impose a rate limit here, other
    # than the request not to make say 50 queries at once using multiple
    # threads.
    request = _Request("http://blast.ncbi.nlm.nih.gov/Blast.cgi",
                       message,
                       {"User-Agent":"BiopythonClient"})
    handle = _urlopen(request)

    # Format the "Get" command, which gets the formatted results from qblast
    # Parameters taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node6.html on 9 July 2007
    rid, rtoe = _parse_qblast_ref_page(handle)
    parameters = [
        ('ALIGNMENTS', alignments),
        ('ALIGNMENT_VIEW', alignment_view),
        ('DESCRIPTIONS', descriptions),
        ('ENTREZ_LINKS_NEW_WINDOW', entrez_links_new_window),
        ('EXPECT_LOW', expect_low),
        ('EXPECT_HIGH', expect_high),
        ('FORMAT_ENTREZ_QUERY', format_entrez_query),
        ('FORMAT_OBJECT', format_object),
        ('FORMAT_TYPE', format_type),
        ('NCBI_GI', ncbi_gi),
        ('RID', rid),
        ('RESULTS_FILE', results_file),
        ('SERVICE', service),
        ('SHOW_OVERVIEW', show_overview),
        ('CMD', 'Get'),
        ]
    query = [x for x in parameters if x[1] is not None]
    message = _as_bytes(_urlencode(query))

    # Poll NCBI until the results are ready.  Use a backoff delay from 2 - 120 second wait
    delay = 2.0
    previous = time.time()
    while True:
        current = time.time()
        wait = previous + delay - current
        if wait > 0:
            time.sleep(wait)
            previous = current + wait
        else:
            previous = current
        if delay + .5*delay <= 120:
            delay += .5*delay
        else:
            delay = 120

        request = _Request("http://blast.ncbi.nlm.nih.gov/Blast.cgi",
                           message,
                           {"User-Agent":"BiopythonClient"})
        handle = _urlopen(request)
        results = _as_string(handle.read())

        # Can see an "\n\n" page while results are in progress,
        # if so just wait a bit longer...
        if results=="\n\n":
            continue
        # XML results don't have the Status tag when finished
        if "Status=" not in results:
            break
        i = results.index("Status=")
        j = results.index("\n", i)
        status = results[i+len("Status="):j].strip()
        if status.upper() == "READY":
            break

    return StringIO(results)
예제 #18
0
          "MKKLVLSLSLVLAFSSATAAF-------------------AAIPQNIRIG" + \
          "TDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPS" + \
          "LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLV"

    for alignment in ClustalIterator(StringIO(aln_example2 + aln_example1)):
        print("Alignment with %i records of length %i" \
              % (len(alignment),
                 alignment.get_alignment_length()))

    print("Checking empty file...")
    assert 0 == len(list(ClustalIterator(StringIO(""))))

    print("Checking write/read...")
    alignments = list(ClustalIterator(StringIO(aln_example1))) \
               + list(ClustalIterator(StringIO(aln_example2)))*2
    handle = StringIO()
    ClustalWriter(handle).write_file(alignments)
    handle.seek(0)
    for i, a in enumerate(ClustalIterator(handle)):
        assert a.get_alignment_length() == alignments[i].get_alignment_length()
    handle.seek(0)

    print("Testing write/read when there is only one sequence...")
    alignment = alignment[0:1]
    handle = StringIO()
    ClustalWriter(handle).write_file([alignment])
    handle.seek(0)
    for i, a in enumerate(ClustalIterator(handle)):
        assert a.get_alignment_length() == alignment.get_alignment_length()
        assert len(a) == 1
예제 #19
0
파일: SeqXmlIO.py 프로젝트: kaspermunch/sap
                            self.xml_generator.endElement("property")

                elif isinstance(value, (int, float, basestring)):

                    attr = {"name": key, "value": str(value)}
                    self.xml_generator.startElement(
                        "property", AttributesImpl(attr))
                    self.xml_generator.endElement("property")

if __name__ == "__main__":
    print("Running quick self test")

    from SAP.Bio import SeqIO
    import sys

    with open("Tests/SeqXML/protein_example.xml", "r") as fileHandle:
        records = list(SeqIO.parse(fileHandle, "seqxml"))

    from SAP.Bio._py3k import StringIO
    stringHandle = StringIO()

    SeqIO.write(records, stringHandle, "seqxml")
    SeqIO.write(records, sys.stdout, "seqxml")
    print("")

    stringHandle.seek(0)
    records = list(SeqIO.parse(stringHandle, "seqxml"))

    SeqIO.write(records, sys.stdout, "seqxml")
    print("")
예제 #20
0
           VLHDTPNILY -FIKTAGNGQ FKAVGDSLEA Q-----QYGI AFPKGS--DE
           AYNDRLVVNY -IINDQ-KLP VRGAGQIGDA A-----PVGI ALKKGN--SA
           AFQDEVAASE GFLKQPVGKD YKFGGPSVKD EKLFGVGTGM GLRKED--NE

           LQAEVNKALA EMRADGTVEK ISVKWFGADI TK----
           LRKKVNKALD ELRKDGTLKK LSEKYFNEDI TVEQKH
           VVDQVNKALK EMKEDGTLSK ISKKWFGEDV SK----
           LITKFNQVLE ALRQDGTLKQ ISIEWFGYDI TQ----
           LLKAVNDAIA EMQKDGTLQA LSEKWFGADV TK----
           LRDKVNGALK TLRENGTYNE IYKKWFGTEP K-----
           LKDQIDKALT EMRSDGTFEK ISQKWFGQDV GQP---
           LREALNKAFA EMRADGTYEK LAKKYFDFDV YGG---
"""

    from SAP.Bio._py3k import StringIO
    handle = StringIO(phylip_text)
    count = 0
    for alignment in PhylipIterator(handle):
        for record in alignment:
            count = count + 1
            print(record.id)
            #print str(record.seq)
    assert count == 8

    expected = """mkklvlslsl vlafssataa faaipqniri gtdptyapfe sknsqgelvg
    fdidlakelc krintqctfv enpldalips lkakkidaim sslsitekrq qeiaftdkly
    aadsrlvvak nsdiqptves lkgkrvgvlq gttqetfgne hwapkgieiv syqgqdniys
    dltagridaafqdevaaseg flkqpvgkdy kfggpsvkde klfgvgtgmg lrkednelre
    alnkafaemradgtyeklak kyfdfdvygg""".replace(" ", "").replace("\n",
                                                                "").upper()
    assert str(record.seq).replace("-", "") == expected
예제 #21
0
            #just the generic Alphabet (default for fasta files)
            raise ValueError("Need a DNA, RNA or Protein alphabet")

if __name__ == "__main__":
    from SAP.Bio._py3k import StringIO
    print("Quick self test")
    print("")
    print("Repeated names without a TAXA block")
    handle = StringIO("""#NEXUS
    [TITLE: NoName]

    begin data;
    dimensions ntax=4 nchar=50;
    format interleave datatype=protein   gap=- symbols="FSTNKEYVQMCLAWPHDRIG";

    matrix
    CYS1_DICDI          -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---- 
    ALEU_HORVU          MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG 
    CATH_HUMAN          ------MWAT LPLLCAGAWL LGV------- -PVCGAAELS VNSLEK----
    CYS1_DICDI          -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---X
    ;
    end; 
    """)
    for a in NexusIterator(handle):
        print(a)
        for r in a:
            print("%r %s %s" % (r.seq, r.name, r.id))
    print("Done")

    print("")
    print("Repeated names with a TAXA block")
예제 #22
0
V_Harveyi_PATH                 LETGRIDTISNQITMTDARKAKYLFADPYVVDG-AQI
B_subtilis_YXEM                LQTGKLDTISNQVAVTDERKETYNFTKPYAYAG-TQI
B_subtilis_GlnH_homo_YCKK      LNSKRFDVVANQVG-KTDREDKYDFSDKYTTSR-AVV
YA80_HAEIN                     LNAKRFDVIANQTNPSPERLKKYSFTTPYNYSG-GVI
FLIY_ECOLI                     LDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQAL
E_coli_GlnH                    LQTKNVDLALAGITITDERKKAIDFSDGYYKSG-LLV
Deinococcus_radiodurans        LQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEII
HISJ_E_COLI                    LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLV
HISJ_E_COLI                    LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLV
                               *.: . *        .  *     *:          :

"""

    from SAP.Bio._py3k import StringIO

    alignments = list(ClustalIterator(StringIO(aln_example1)))
    assert 1 == len(alignments)
    assert alignments[0]._version == "1.81"
    alignment = alignments[0]
    assert 2 == len(alignment)
    assert alignment[0].id == "gi|4959044|gb|AAD34209.1|AF069"
    assert alignment[1].id == "gi|671626|emb|CAA85685.1|"
    assert str(alignment[0].seq) == \
          "MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNN" + \
          "LLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDW" + \
          "LNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQT" + \
          "SENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTE" + \
          "VPTTRAQRRA"

    alignments = list(ClustalIterator(StringIO(aln_example2)))
    assert 1 == len(alignments)
예제 #23
0
파일: SeqXmlIO.py 프로젝트: cbirdlab/sap
                elif isinstance(value, (int, float, basestring)):

                    attr = {"name": key, "value": str(value)}
                    self.xml_generator.startElement("property",
                                                    AttributesImpl(attr))
                    self.xml_generator.endElement("property")


if __name__ == "__main__":
    print("Running quick self test")

    from SAP.Bio import SeqIO
    import sys

    with open("Tests/SeqXML/protein_example.xml", "r") as fileHandle:
        records = list(SeqIO.parse(fileHandle, "seqxml"))

    from SAP.Bio._py3k import StringIO
    stringHandle = StringIO()

    SeqIO.write(records, stringHandle, "seqxml")
    SeqIO.write(records, sys.stdout, "seqxml")
    print("")

    stringHandle.seek(0)
    records = list(SeqIO.parse(stringHandle, "seqxml"))

    SeqIO.write(records, sys.stdout, "seqxml")
    print("")