def test_append_to_flowgram_file(self): """append_to_flowgram_file appends a flowgram to a flowgram file.""" fh, tmp_filename = init_flowgram_file(n=100, l=400) self.assert_(exists(tmp_filename)) self.tmp_filename = tmp_filename flow1 = Flowgram("0 1.2 2.1 3.4 0.02 0.01 1.02 0.08") append_to_flowgram_file("test_id", flow1, fh) flow2 = Flowgram('0.5 1.0 4.1 0.0 0.0 1.23 0.0 3.1', Name='a', floworder="TACG", header_info={ 'Bases': 'TACCCCAGGG', 'Clip Qual Right': 7, 'Flow Indexes': "1\t2\t3\t3\t3\t3\t6\t8\t8\t8" }) append_to_flowgram_file("test_id2", flow2, fh, trim=True) # close and re-open to read from start, seek might work as well here... fh.close() fh = open(tmp_filename) result_file_content = list(fh) self.assertEqual(result_file_content, [ "100 400\n", "test_id 8 0.0 1.2 2.1 3.4 0.02 0.01 1.02 0.08\n", "test_id2 6 0.5 1.0 4.1 0.0 0.0 1.23\n" ])
def test_append_to_flowgram_file(self): """append_to_flowgram_file appends a flowgram to a flowgram file.""" fh, tmp_filename = init_flowgram_file(n=100, l=400) self.assert_(exists(tmp_filename)) self.tmp_filename = tmp_filename flow1 = Flowgram("0 1.2 2.1 3.4 0.02 0.01 1.02 0.08") append_to_flowgram_file("test_id", flow1, fh) flow2 = Flowgram( "0.5 1.0 4.1 0.0 0.0 1.23 0.0 3.1", Name="a", floworder="TACG", header_info={"Bases": "TACCCCAGGG", "Clip Qual Right": 7, "Flow Indexes": "1\t2\t3\t3\t3\t3\t6\t8\t8\t8"}, ) append_to_flowgram_file("test_id2", flow2, fh, trim=True) # close and re-open to read from start, seek might work as well here... fh.close() fh = open(tmp_filename) result_file_content = list(fh) self.assertEqual( result_file_content, ["100 400\n", "test_id 8 0.0 1.2 2.1 3.4 0.02 0.01 1.02 0.08\n", "test_id2 6 0.5 1.0 4.1 0.0 0.0 1.23\n"], )
def get_flowgram_distances(id, flowgram, flowgrams, fc, ids, outdir, error_profile=DENOISER_DATA_DIR + 'FLX_error_profile.dat'): """Computes distance scores of flowgram to all flowgrams in parser. id: The flowgram identifier, also used to name intermediate files flowgram: This flowgram is used to filter all the other flowgrams flowgrams: iterable filehandle of flowgram file fc: a sink for flowgrams, either a FlowgramContainerArray or FlowgramContainerFile object ids: dict of ids of flowgrams in flowgrams that should be aligned outdir: directory for intermediate files error_profile: path to error profile *.dat file """ check_flowgram_ali_exe() # File that serves as input for external alignment program (fh, tmpfile) = init_flowgram_file(prefix=outdir) append_to_flowgram_file(id, flowgram, fh) k = 0 names = [] for f in flowgrams: if (f.Name in ids): fc.add(f) append_to_flowgram_file(f.Name, f, fh, trim=False) k += 1 names.append(f.Name) fh.close() # TODO: capture stderr and warn user scores_fh = popen( "%s -relscore_pairid %s %s " % (get_flowgram_ali_exe(), error_profile, tmpfile), 'r') scores = [map(float, (s.split())) for s in scores_fh if s != "\n"] if (k != len(scores)): raise RuntimeError( "Something bad has happened! I received less " + "alignment scores than there are flowgrams. Most likely this " + "means that the alignment program is not setup or corrupted. " + "Please run the test scripts to figure out the cause of the error." ) remove(tmpfile) return (scores, names, fc)
def get_flowgram_distances(id, flowgram, flowgrams, fc, ids, outdir, error_profile=DENOISER_DATA_DIR + 'FLX_error_profile.dat'): """Computes distance scores of flowgram to all flowgrams in parser. id: The flowgram identifier, also used to name intermediate files flowgram: This flowgram is used to filter all the other flowgrams flowgrams: iterable filehandle of flowgram file fc: a sink for flowgrams, either a FlowgramContainerArray or FlowgramContainerFile object ids: dict of ids of flowgrams in flowgrams that should be aligned outdir: directory for intermediate files error_profile: path to error profile *.dat file """ check_flowgram_ali_exe() # File that serves as input for external alignment program (fh, tmpfile) = init_flowgram_file(prefix=outdir) append_to_flowgram_file(id, flowgram, fh) k = 0 names = [] for f in flowgrams: if(f.Name in ids): fc.add(f) append_to_flowgram_file(f.Name, f, fh, trim=False) k += 1 names.append(f.Name) fh.close() # TODO: capture stderr and warn user scores_fh = popen("%s -relscore_pairid %s %s " % (get_flowgram_ali_exe(), error_profile, tmpfile), 'r') scores = [map(float, (s.split())) for s in scores_fh if s != "\n"] if (k != len(scores)): raise RuntimeError("Something bad has happened! I received less " + "alignment scores than there are flowgrams. Most likely this " + "means that the alignment program is not setup or corrupted. " + "Please run the test scripts to figure out the cause of the error.") remove(tmpfile) return (scores, names, fc)