Esempio n. 1
0
    def test_append_to_flowgram_file(self):
        """append_to_flowgram_file appends a flowgram to a flowgram file."""

        fh, tmp_filename = init_flowgram_file(n=100, l=400)
        self.assert_(exists(tmp_filename))
        self.tmp_filename = tmp_filename

        flow1 = Flowgram("0 1.2 2.1 3.4 0.02 0.01 1.02 0.08")
        append_to_flowgram_file("test_id", flow1, fh)

        flow2 = Flowgram('0.5 1.0 4.1 0.0 0.0 1.23 0.0 3.1',
                         Name='a',
                         floworder="TACG",
                         header_info={
                             'Bases': 'TACCCCAGGG',
                             'Clip Qual Right': 7,
                             'Flow Indexes': "1\t2\t3\t3\t3\t3\t6\t8\t8\t8"
                         })
        append_to_flowgram_file("test_id2", flow2, fh, trim=True)
        # close and re-open to read from start, seek might work as well here...
        fh.close()
        fh = open(tmp_filename)
        result_file_content = list(fh)
        self.assertEqual(result_file_content, [
            "100 400\n", "test_id 8 0.0 1.2 2.1 3.4 0.02 0.01 1.02 0.08\n",
            "test_id2 6 0.5 1.0 4.1 0.0 0.0 1.23\n"
        ])
Esempio n. 2
0
    def test_append_to_flowgram_file(self):
        """append_to_flowgram_file appends a flowgram to a flowgram file."""

        fh, tmp_filename = init_flowgram_file(n=100, l=400)
        self.assert_(exists(tmp_filename))
        self.tmp_filename = tmp_filename

        flow1 = Flowgram("0 1.2 2.1 3.4 0.02 0.01 1.02 0.08")
        append_to_flowgram_file("test_id", flow1, fh)

        flow2 = Flowgram(
            "0.5 1.0 4.1 0.0 0.0 1.23 0.0 3.1",
            Name="a",
            floworder="TACG",
            header_info={"Bases": "TACCCCAGGG", "Clip Qual Right": 7, "Flow Indexes": "1\t2\t3\t3\t3\t3\t6\t8\t8\t8"},
        )
        append_to_flowgram_file("test_id2", flow2, fh, trim=True)
        # close and re-open to read from start, seek might work as well here...
        fh.close()
        fh = open(tmp_filename)
        result_file_content = list(fh)
        self.assertEqual(
            result_file_content,
            ["100 400\n", "test_id 8 0.0 1.2 2.1 3.4 0.02 0.01 1.02 0.08\n", "test_id2 6 0.5 1.0 4.1 0.0 0.0 1.23\n"],
        )
Esempio n. 3
0
def get_flowgram_distances(id,
                           flowgram,
                           flowgrams,
                           fc,
                           ids,
                           outdir,
                           error_profile=DENOISER_DATA_DIR +
                           'FLX_error_profile.dat'):
    """Computes distance scores of flowgram to all flowgrams in parser.

    id: The flowgram identifier, also used to name intermediate files

    flowgram: This flowgram is used to filter all the other flowgrams

    flowgrams: iterable filehandle of flowgram file

    fc: a sink for flowgrams, either a FlowgramContainerArray or
        FlowgramContainerFile object

    ids: dict of ids of flowgrams in flowgrams that should  be aligned

    outdir: directory for intermediate files

    error_profile: path to error profile *.dat file
    """
    check_flowgram_ali_exe()
    # File that serves as input for external alignment program
    (fh, tmpfile) = init_flowgram_file(prefix=outdir)
    append_to_flowgram_file(id, flowgram, fh)

    k = 0
    names = []
    for f in flowgrams:
        if (f.Name in ids):
            fc.add(f)
            append_to_flowgram_file(f.Name, f, fh, trim=False)
            k += 1
            names.append(f.Name)
    fh.close()

    # TODO: capture stderr and warn user
    scores_fh = popen(
        "%s -relscore_pairid %s %s " %
        (get_flowgram_ali_exe(), error_profile, tmpfile), 'r')
    scores = [map(float, (s.split())) for s in scores_fh if s != "\n"]

    if (k != len(scores)):
        raise RuntimeError(
            "Something bad has happened! I received less " +
            "alignment scores than there are flowgrams. Most likely this " +
            "means that the alignment program is not setup or corrupted. " +
            "Please run the test scripts to figure out the cause of the error."
        )

    remove(tmpfile)

    return (scores, names, fc)
Esempio n. 4
0
def get_flowgram_distances(id, flowgram, flowgrams, fc, ids, outdir,
                           error_profile=DENOISER_DATA_DIR +
                           'FLX_error_profile.dat'):
    """Computes distance scores of flowgram to all flowgrams in parser.

    id: The flowgram identifier, also used to name intermediate files

    flowgram: This flowgram is used to filter all the other flowgrams

    flowgrams: iterable filehandle of flowgram file

    fc: a sink for flowgrams, either a FlowgramContainerArray or
        FlowgramContainerFile object

    ids: dict of ids of flowgrams in flowgrams that should  be aligned

    outdir: directory for intermediate files

    error_profile: path to error profile *.dat file
    """
    check_flowgram_ali_exe()
    # File that serves as input for external alignment program
    (fh, tmpfile) = init_flowgram_file(prefix=outdir)
    append_to_flowgram_file(id, flowgram, fh)

    k = 0
    names = []
    for f in flowgrams:
        if(f.Name in ids):
            fc.add(f)
            append_to_flowgram_file(f.Name, f, fh, trim=False)
            k += 1
            names.append(f.Name)
    fh.close()

    # TODO: capture stderr and warn user
    scores_fh = popen("%s -relscore_pairid %s %s " %
                      (get_flowgram_ali_exe(),
                       error_profile, tmpfile), 'r')
    scores = [map(float, (s.split())) for s in scores_fh if s != "\n"]

    if (k != len(scores)):
        raise RuntimeError("Something bad has happened! I received less " +
                           "alignment scores than there are flowgrams. Most likely this " +
                           "means that the alignment program is not setup or corrupted. " +
                           "Please run the test scripts to figure out the cause of the error.")

    remove(tmpfile)

    return (scores, names, fc)