def _process_data(this_round_fp, log_fh=None, error_profile=None): """compute alignment scores for flowgrams in this_round_fp. this_round_fp: fp to input data log_fh: fh to log file error_profile: path to error profile """ # we have data! cmd = "%s -relscore_pairid %s %s.dat"\ % (get_flowgram_ali_exe(), error_profile, this_round_fp) proc = Popen(cmd, shell=True, universal_newlines=True,\ stdout=PIPE, stderr=PIPE) stdout,stderr = proc.communicate() if proc.returncode != 0: host = gethostname() if log_fh: log_fh.write("An error occured on %s at %f\n%s"%(host, time(), cmd)) log_fh.write(stderr) log_fh.close() raise RuntimeError("Worker process crashed. Aborting...!\n" + "Note: You need to kill the other jobs yourself") if log_fh: log_fh.write(this_round_fp+"... done!\n") return stdout
def _process_data(this_round_fp, log_fh=None, error_profile=None): """compute alignment scores for flowgrams in this_round_fp. this_round_fp: fp to input data log_fh: fh to log file error_profile: path to error profile """ # we have data! cmd = "%s -relscore_pairid %s %s.dat"\ % (get_flowgram_ali_exe(), error_profile, this_round_fp) proc = Popen(cmd, shell=True, universal_newlines=True, stdout=PIPE, stderr=PIPE) stdout, stderr = proc.communicate() if proc.returncode != 0: host = gethostname() if log_fh: log_fh.write("An error occured on %s at %f\n%s" % (host, time(), cmd)) log_fh.write(stderr) log_fh.close() raise RuntimeError("Worker process crashed. Aborting...!\n" + "Note: You need to kill the other jobs yourself") if log_fh: log_fh.write(this_round_fp + "... done!\n") return stdout
def get_flowgram_distances(id, flowgram, flowgrams, fc, ids, outdir, error_profile=DENOISER_DATA_DIR + 'FLX_error_profile.dat'): """Computes distance scores of flowgram to all flowgrams in parser. id: The flowgram identifier, also used to name intermediate files flowgram: This flowgram is used to filter all the other flowgrams flowgrams: iterable filehandle of flowgram file fc: a sink for flowgrams, either a FlowgramContainerArray or FlowgramContainerFile object ids: dict of ids of flowgrams in flowgrams that should be aligned outdir: directory for intermediate files error_profile: path to error profile *.dat file """ check_flowgram_ali_exe() # File that serves as input for external alignment program (fh, tmpfile) = init_flowgram_file(prefix=outdir) append_to_flowgram_file(id, flowgram, fh) k = 0 names = [] for f in flowgrams: if (f.Name in ids): fc.add(f) append_to_flowgram_file(f.Name, f, fh, trim=False) k += 1 names.append(f.Name) fh.close() # TODO: capture stderr and warn user scores_fh = popen( "%s -relscore_pairid %s %s " % (get_flowgram_ali_exe(), error_profile, tmpfile), 'r') scores = [map(float, (s.split())) for s in scores_fh if s != "\n"] if (k != len(scores)): raise RuntimeError( "Something bad has happened! I received less " + "alignment scores than there are flowgrams. Most likely this " + "means that the alignment program is not setup or corrupted. " + "Please run the test scripts to figure out the cause of the error." ) remove(tmpfile) return (scores, names, fc)
def get_flowgram_distances(id, flowgram, flowgrams, fc, ids, outdir, error_profile=DENOISER_DATA_DIR + 'FLX_error_profile.dat'): """Computes distance scores of flowgram to all flowgrams in parser. id: The flowgram identifier, also used to name intermediate files flowgram: This flowgram is used to filter all the other flowgrams flowgrams: iterable filehandle of flowgram file fc: a sink for flowgrams, either a FlowgramContainerArray or FlowgramContainerFile object ids: dict of ids of flowgrams in flowgrams that should be aligned outdir: directory for intermediate files error_profile: path to error profile *.dat file """ check_flowgram_ali_exe() # File that serves as input for external alignment program (fh, tmpfile) = init_flowgram_file(prefix=outdir) append_to_flowgram_file(id, flowgram, fh) k = 0 names = [] for f in flowgrams: if(f.Name in ids): fc.add(f) append_to_flowgram_file(f.Name, f, fh, trim=False) k += 1 names.append(f.Name) fh.close() # TODO: capture stderr and warn user scores_fh = popen("%s -relscore_pairid %s %s " % (get_flowgram_ali_exe(), error_profile, tmpfile), 'r') scores = [map(float, (s.split())) for s in scores_fh if s != "\n"] if (k != len(scores)): raise RuntimeError("Something bad has happened! I received less " + "alignment scores than there are flowgrams. Most likely this " + "means that the alignment program is not setup or corrupted. " + "Please run the test scripts to figure out the cause of the error.") remove(tmpfile) return (scores, names, fc)
def test_flowgramAli_bin(self): """Check if we have a working FlowgramAligner""" ali_fp = get_flowgram_ali_exe() self.assertTrue(exists(ali_fp), "The alignment program is not where it's supposed to be: %s" % ali_fp) # test if its callable and actually works command = "%s -h" % ali_fp proc = Popen(command, shell=True, universal_newlines=True, stdout=PIPE, stderr=STDOUT) if proc.wait() != 0: self.fail("Calling %s failed. Check permissions and that it is in fact an executable." % ali_fp) result = proc.stdout.read() # check that the help string looks correct self.assertTrue(result.startswith("Usage"))
def test_flowgramAli_bin(self): """Check if we have a working FlowgramAligner""" ali_exe = get_flowgram_ali_exe() self.assertTrue(which(ali_exe) is not None, "The alignment program %s " "is not accessible via the PATH environment variable." % ali_exe) # test if its callable and actually works command = "%s -h" % ali_exe proc = Popen(command, shell=True, universal_newlines=True, stdout=PIPE, stderr=STDOUT) if (proc.wait() != 0): self.fail("Calling %s failed. Check permissions and that it is in fact an executable." % ali_exe) result = proc.stdout.read() # check that the help string looks correct self.assertTrue(result.startswith("Usage"))
def test_flowgramAli_bin(self): """Check if we have a working FlowgramAligner""" ali_fp = get_flowgram_ali_exe() self.assertTrue( exists(ali_fp), "The alignment program is not where it's supposed to be: %s" % ali_fp) #test if its callable and actually works command = "%s -h" % ali_fp proc = Popen(command, shell=True, universal_newlines=True,\ stdout=PIPE,stderr=STDOUT) if (proc.wait() != 0): self.fail( "Calling %s failed. Check permissions and that it is in fact an executable." % ali_fp) result = proc.stdout.read() #check that the help string looks correct self.assertTrue(result.startswith("Usage"))