def found_terminator(self): """Action performed when the terminator is found.""" #Note this function is event-triggered #This means we received all necessary data if self.log_fh!=None: self.log_fh.write("Data for round %d received: %s\n" % (self.counter, time())) this_round_fp = "%s_%d"% (self.fp, self.counter) (fh, filename) = init_flowgram_file(filename=this_round_fp+".dat", n=0) for chunk in self.in_buffer: fh.write(chunk) fh.close() self.in_buffer = [] result = _process_data(this_round_fp, self.log_fh, self.error_profile) remove(this_round_fp+".dat") #return results to server #Do we need buffering here? #No, push() does the buffering for us self.push(result) self.push("--END--") self.counter += 1
def found_terminator(self): """Action performed when the terminator is found.""" # Note this function is event-triggered # This means we received all necessary data if self.log_fh is not None: self.log_fh.write("Data for round %d received: %s\n" % (self.counter, time())) this_round_fp = "%s_%d" % (self.fp, self.counter) (fh, filename) = init_flowgram_file(filename=this_round_fp + ".dat", n=0) for chunk in self.in_buffer: fh.write(chunk) fh.close() self.in_buffer = [] result = _process_data(this_round_fp, self.log_fh, self.error_profile) remove(this_round_fp + ".dat") # return results to server # Do we need buffering here? # No, push() does the buffering for us self.push(result) self.push("--END--") self.counter += 1
def test_append_to_flowgram_file(self): """append_to_flowgram_file appends a flowgram to a flowgram file.""" fh, tmp_filename = init_flowgram_file(n=100, l=400) self.assert_(exists(tmp_filename)) self.tmp_filename = tmp_filename flow1 = Flowgram("0 1.2 2.1 3.4 0.02 0.01 1.02 0.08") append_to_flowgram_file("test_id", flow1, fh) flow2 = Flowgram('0.5 1.0 4.1 0.0 0.0 1.23 0.0 3.1', Name='a', floworder="TACG", header_info={ 'Bases': 'TACCCCAGGG', 'Clip Qual Right': 7, 'Flow Indexes': "1\t2\t3\t3\t3\t3\t6\t8\t8\t8" }) append_to_flowgram_file("test_id2", flow2, fh, trim=True) # close and re-open to read from start, seek might work as well here... fh.close() fh = open(tmp_filename) result_file_content = list(fh) self.assertEqual(result_file_content, [ "100 400\n", "test_id 8 0.0 1.2 2.1 3.4 0.02 0.01 1.02 0.08\n", "test_id2 6 0.5 1.0 4.1 0.0 0.0 1.23\n" ])
def test_append_to_flowgram_file(self): """append_to_flowgram_file appends a flowgram to a flowgram file.""" fh, tmp_filename = init_flowgram_file(n=100, l=400) self.assert_(exists(tmp_filename)) self.tmp_filename = tmp_filename flow1 = Flowgram("0 1.2 2.1 3.4 0.02 0.01 1.02 0.08") append_to_flowgram_file("test_id", flow1, fh) flow2 = Flowgram( "0.5 1.0 4.1 0.0 0.0 1.23 0.0 3.1", Name="a", floworder="TACG", header_info={"Bases": "TACCCCAGGG", "Clip Qual Right": 7, "Flow Indexes": "1\t2\t3\t3\t3\t3\t6\t8\t8\t8"}, ) append_to_flowgram_file("test_id2", flow2, fh, trim=True) # close and re-open to read from start, seek might work as well here... fh.close() fh = open(tmp_filename) result_file_content = list(fh) self.assertEqual( result_file_content, ["100 400\n", "test_id 8 0.0 1.2 2.1 3.4 0.02 0.01 1.02 0.08\n", "test_id2 6 0.5 1.0 4.1 0.0 0.0 1.23\n"], )
def get_flowgram_distances(id, flowgram, flowgrams, fc, ids, outdir, error_profile=DENOISER_DATA_DIR + 'FLX_error_profile.dat'): """Computes distance scores of flowgram to all flowgrams in parser. id: The flowgram identifier, also used to name intermediate files flowgram: This flowgram is used to filter all the other flowgrams flowgrams: iterable filehandle of flowgram file fc: a sink for flowgrams, either a FlowgramContainerArray or FlowgramContainerFile object ids: dict of ids of flowgrams in flowgrams that should be aligned outdir: directory for intermediate files error_profile: path to error profile *.dat file """ check_flowgram_ali_exe() # File that serves as input for external alignment program (fh, tmpfile) = init_flowgram_file(prefix=outdir) append_to_flowgram_file(id, flowgram, fh) k = 0 names = [] for f in flowgrams: if (f.Name in ids): fc.add(f) append_to_flowgram_file(f.Name, f, fh, trim=False) k += 1 names.append(f.Name) fh.close() # TODO: capture stderr and warn user scores_fh = popen( "%s -relscore_pairid %s %s " % (get_flowgram_ali_exe(), error_profile, tmpfile), 'r') scores = [map(float, (s.split())) for s in scores_fh if s != "\n"] if (k != len(scores)): raise RuntimeError( "Something bad has happened! I received less " + "alignment scores than there are flowgrams. Most likely this " + "means that the alignment program is not setup or corrupted. " + "Please run the test scripts to figure out the cause of the error." ) remove(tmpfile) return (scores, names, fc)
def test_init_flowgram_file(self): """init_flowgram_file opens an file and writes header.""" fh, tmp_filename = init_flowgram_file(n=100, l=400) self.assert_(exists(tmp_filename)) self.tmp_filename = tmp_filename fh.close() result_file_content = list(open(tmp_filename)) self.assertEqual(result_file_content, ["100 400\n"])
def get_flowgram_distances(id, flowgram, flowgrams, fc, ids, outdir, error_profile=DENOISER_DATA_DIR + 'FLX_error_profile.dat'): """Computes distance scores of flowgram to all flowgrams in parser. id: The flowgram identifier, also used to name intermediate files flowgram: This flowgram is used to filter all the other flowgrams flowgrams: iterable filehandle of flowgram file fc: a sink for flowgrams, either a FlowgramContainerArray or FlowgramContainerFile object ids: dict of ids of flowgrams in flowgrams that should be aligned outdir: directory for intermediate files error_profile: path to error profile *.dat file """ check_flowgram_ali_exe() # File that serves as input for external alignment program (fh, tmpfile) = init_flowgram_file(prefix=outdir) append_to_flowgram_file(id, flowgram, fh) k = 0 names = [] for f in flowgrams: if(f.Name in ids): fc.add(f) append_to_flowgram_file(f.Name, f, fh, trim=False) k += 1 names.append(f.Name) fh.close() # TODO: capture stderr and warn user scores_fh = popen("%s -relscore_pairid %s %s " % (get_flowgram_ali_exe(), error_profile, tmpfile), 'r') scores = [map(float, (s.split())) for s in scores_fh if s != "\n"] if (k != len(scores)): raise RuntimeError("Something bad has happened! I received less " + "alignment scores than there are flowgrams. Most likely this " + "means that the alignment program is not setup or corrupted. " + "Please run the test scripts to figure out the cause of the error.") remove(tmpfile) return (scores, names, fc)