def xtest_infile_outfile_condor(): 'It tests that we can set an input file and an output file' bin = create_test_binary() #with infile in_file = NamedTemporaryFile() content = 'hola1\nhola2\nhola3\nhola4\nhola5\nhola6\nhola7\nhola8\n' content += 'hola9\nhola10|n' in_file.write(content) in_file.flush() out_file = NamedTemporaryFile() cmd = [bin] cmd.extend(['-i', in_file.name, '-t', out_file.name]) stdout = NamedTemporaryFile() stderr = NamedTemporaryFile() cmd_def = [{'options': ('-i', '--input'), 'io': 'in', 'splitter':''}, {'options': ('-t', '--output'), 'io': 'out'}] from psubprocess import CondorPopen popen = Popen(cmd, stdout=stdout, stderr=stderr, cmd_def=cmd_def, runner=CondorPopen, runner_conf={'transfer_executable':True}) assert popen.wait() == 0 #waits till finishes and looks to the retcode assert not open(stdout.name).read() assert not open(stderr.name).read() assert open(out_file.name).read() == content in_file.close() os.remove(bin)
def test(): d = {} for i in range( 10000 ): d[ 'foo' + str( i ) ] = 'bar' + str( i ) # Open temporary file and get name file = NamedTemporaryFile() file_name = file.name # Write cdb to file FileCDBDict.to_file( d, file ) file.flush() # Open on disk file2 = open( file_name ) cdb = FileCDBDict( file2 ) for key, value in d.iteritems(): assert cdb[key] == value try: cdb['notin'] assert False, "KeyError was not raised" except KeyError, e: pass
def test_2_infile_outfile(): 'It tests that we can set 2 input files and an output file' bin = create_test_binary() #with infile content = 'hola1\nhola2\nhola3\nhola4\nhola5\nhola6\nhola7\nhola8\n' content += 'hola9\nhola10|n' in_file1 = NamedTemporaryFile() in_file1.write(content) in_file1.flush() in_file2 = NamedTemporaryFile() in_file2.write(content) in_file2.flush() out_file1 = NamedTemporaryFile() out_file2 = NamedTemporaryFile() cmd = [bin] cmd.extend(['-i', in_file1.name, '-t', out_file1.name]) cmd.extend(['-x', in_file2.name, '-z', out_file2.name]) stdout = NamedTemporaryFile() stderr = NamedTemporaryFile() cmd_def = [{'options': ('-i', '--input'), 'io': 'in', 'splitter':''}, {'options': ('-x', '--input'), 'io': 'in', 'splitter':''}, {'options': ('-t', '--output'), 'io': 'out'}, {'options': ('-z', '--output'), 'io': 'out'}] popen = Popen(cmd, stdout=stdout, stderr=stderr, cmd_def=cmd_def) assert popen.wait() == 0 #waits till finishes and looks to the retcod assert not open(stdout.name).read() assert not open(stderr.name).read() assert open(out_file1.name).read() == content assert open(out_file2.name).read() == content in_file1.close() in_file2.close() os.remove(bin)
def compile_inline(self,data,ext): """ Compile inline css. Have to compile to a file, because some css compilers may not output to stdout, but we know they all output to a file. It's a little hackish, but you shouldn't be compiling in production anyway, right? """ compiler = settings.COMPILER_FORMATS[ext] try: bin = compiler['binary_path'] except: raise Exception("Path to CSS compiler must be included in COMPILER_FORMATS") tmp_file = NamedTemporaryFile(mode='w',suffix=ext) tmp_file.write(dedent(data)) tmp_file.flush() path, ext = os.path.splitext(tmp_file.name) tmp_css = ''.join((path,'.css')) self.compile(path,compiler) data = open(tmp_css,'r').read() # cleanup tmp_file.close() os.remove(tmp_css) return data
def get_logs(self): """ Build the logs entry for the metadata 'output' section :return: list, Output instances """ # Collect logs from server kwargs = {} if self.namespace is not None: kwargs['namespace'] = self.namespace logs = self.osbs.get_build_logs(self.build_id, **kwargs) # Deleted once closed logfile = NamedTemporaryFile(prefix=self.build_id, suffix=".log", mode='w') logfile.write(logs) logfile.flush() docker_logs = NamedTemporaryFile(prefix="docker-%s" % self.build_id, suffix=".log", mode='w') docker_logs.write("\n".join(self.workflow.build_logs)) docker_logs.flush() return [Output(file=docker_logs, metadata=self.get_output_metadata(docker_logs.name, "build.log")), Output(file=logfile, metadata=self.get_output_metadata(logfile.name, "openshift-final.log"))]
def test_seq_pipeline_parallel_run_with_fasta_qual(self): 'The pipeline runs in parallel with fasta and qual' pipeline = 'sanger_with_qual' fhand_adaptors = NamedTemporaryFile() fhand_adaptors.write(ADAPTORS) fhand_adaptors.flush() arabidopsis_genes = 'arabidopsis_genes+' univec = os.path.join(TEST_DATA_DIR, 'blast', arabidopsis_genes) configuration = {'remove_vectors': {'vectors': univec}, 'remove_adaptors': {'adaptors': fhand_adaptors.name}} seq1 = create_random_seqwithquality(500, qual_range=50) seq2 = create_random_seqwithquality(500, qual_range=51) seq3 = create_random_seqwithquality(500, qual_range=52) seqs = [seq1, seq2, seq3] inseq_fhand, inqual_fhand = create_temp_seq_file(seqs, format='qual') in_fhands = {} in_fhands['in_seq'] = open(inseq_fhand.name) in_fhands['in_qual'] = open(inqual_fhand.name) outseq_fhand = NamedTemporaryFile() outqual_fhand = NamedTemporaryFile() writer = SequenceWriter(outseq_fhand, qual_fhand=outqual_fhand, file_format='fasta') writers = {'seq': writer} seq_pipeline_runner(pipeline, configuration, in_fhands, processes=4, writers=writers) out_fhand = open(outseq_fhand.name, 'r') result_seq = out_fhand.read() assert result_seq.count('>') == 3
def hmmscan(fasta, database_path, ncpus=10): F = NamedTemporaryFile() F.write(fasta) F.flush() OUT = NamedTemporaryFile() cmd = '%s --cpu %s -o /dev/null -Z 190000 --tblout %s %s %s' %(HMMSCAN, ncpus, OUT.name, database_path, F.name) #print cmd sts = subprocess.call(cmd, shell=True) byquery = defaultdict(list) if sts == 0: for line in OUT: #['#', '---', 'full', 'sequence', '----', '---', 'best', '1', 'domain', '----', '---', 'domain', 'number', 'estimation', '----'] #['#', 'target', 'name', 'accession', 'query', 'name', 'accession', 'E-value', 'score', 'bias', 'E-value', 'score', 'bias', 'exp', 'reg', 'clu', 'ov', 'env', 'dom', 'rep', 'inc', 'description', 'of', 'target'] #['#-------------------', '----------', '--------------------', '----------', '---------', '------', '-----', '---------', '------', '-----', '---', '---', '---', '---', '---', '---', '---', '---', '---------------------'] #['delNOG20504', '-', '553220', '-', '1.3e-116', '382.9', '6.2', '3.4e-116', '381.6', '6.2', '1.6', '1', '1', '0', '1', '1', '1', '1', '-'] if line.startswith('#'): continue fields = line.split() # output is not tab delimited! Should I trust this split? hit, _, query, _ , evalue, score, bias, devalue, dscore, dbias = fields[0:10] evalue, score, bias, devalue, dscore, dbias = map(float, [evalue, score, bias, devalue, dscore, dbias]) byquery[query].append([hit, evalue, score]) OUT.close() F.close() return byquery
def test_pipeline_run(): 'It tests that the pipeline runs ok' pipeline = 'sanger_with_qual' fhand_adaptors = NamedTemporaryFile() fhand_adaptors.write(ADAPTORS) fhand_adaptors.flush() arabidopsis_genes = 'arabidopsis_genes+' univec = os.path.join(TEST_DATA_DIR, 'blast', arabidopsis_genes) configuration = {'remove_vectors_blastdb': {'vectors': univec}, 'remove_adaptors': {'adaptors': fhand_adaptors.name}} seq_fhand = open(os.path.join(TEST_DATA_DIR, 'seq.fasta'), 'r') qual_fhand = open(os.path.join(TEST_DATA_DIR, 'qual.fasta'), 'r') seq_iter = seqs_in_file(seq_fhand, qual_fhand) filtered_seq_iter = _pipeline_builder(pipeline, seq_iter, configuration) seq_list = list(filtered_seq_iter) assert 'CGAtcgggggg' in str(seq_list[0].seq) assert len(seq_list) == 6
def test_bwa_mapping(): '''It test that the gmap doesn't map anything''' reference = join(TEST_DATA_DIR, 'blast/arabidopsis_genes') work_dir = NamedTemporaryDir() reference_fpath = join(work_dir.name, 'arabidopsis_genes') os.symlink(reference, reference_fpath) reads_fhand = NamedTemporaryFile(suffix='.sfastq') reads_fhand.write(SOLEXA) reads_fhand.flush() out_bam_fhand = NamedTemporaryFile() out_bam_fpath = out_bam_fhand.name out_bam_fhand.close() parameters = {'colorspace': False, 'reads_length':'short', 'threads':None, 'java_conf':None} map_reads_with_bwa(reference_fpath, reads_fhand.name, out_bam_fpath, parameters) test_sam_fhand = NamedTemporaryFile(suffix='sam') bam2sam(out_bam_fpath, test_sam_fhand.name) result = open(test_sam_fhand.name).read() assert 'seq17' in result unmapped_fhand = StringIO.StringIO() parameters = {'colorspace': False, 'reads_length':'short', 'threads':None, 'java_conf':None, 'unmapped_fhand':unmapped_fhand} map_reads_with_bwa(reference_fpath, reads_fhand.name, out_bam_fpath, parameters) assert 'seq17' in unmapped_fhand.getvalue() test_sam_fhand = NamedTemporaryFile(suffix='sam') bam2sam(out_bam_fpath, test_sam_fhand.name) result = open(test_sam_fhand.name).read() assert 'seq17' not in result
def test_main(self): xml = """<record> <datafield tag="999" ind1="C" ind2="5"> <subfield code="s">Test Journal Name,100,10</subfield> </datafield> </record>""" xml_temp_file = NamedTemporaryFile(dir=CFG_TMPDIR) xml_temp_file.write(xml) xml_temp_file.flush() kb = "TEST JOURNAL NAME---Converted" kb_temp_file = NamedTemporaryFile(dir=CFG_TMPDIR) kb_temp_file.write(kb) kb_temp_file.flush() dest_temp_fd, dest_temp_path = mkstemp(dir=CFG_TMPDIR) try: os.close(dest_temp_fd) process = subprocess.Popen([self.bin_path, xml_temp_file.name, '--kb', kb_temp_file.name, '-o', dest_temp_path], stderr=subprocess.PIPE, stdout=subprocess.PIPE) process.wait() transformed_xml = open(dest_temp_path).read() self.assertXmlEqual(transformed_xml, """<?xml version="1.0" encoding="UTF-8"?> <collection xmlns="http://www.loc.gov/MARC21/slim"> <record><datafield ind1="C" ind2="5" tag="999"><subfield code="s">Converted,100,10</subfield></datafield></record> </collection>""") finally: os.unlink(dest_temp_path)
def save(self, filename, mtime=1300507380.0): """ Serialize this RingData instance to disk. :param filename: File into which this instance should be serialized. :param mtime: time used to override mtime for gzip, default or None if the caller wants to include time """ # Override the timestamp so that the same ring data creates # the same bytes on disk. This makes a checksum comparison a # good way to see if two rings are identical. # # This only works on Python 2.7; on 2.6, we always get the # current time in the gzip output. tempf = NamedTemporaryFile(dir=".", prefix=filename, delete=False) if 'mtime' in inspect.getargspec(GzipFile.__init__).args: gz_file = GzipFile(filename, mode='wb', fileobj=tempf, mtime=mtime) else: gz_file = GzipFile(filename, mode='wb', fileobj=tempf) self.serialize_v1(gz_file) gz_file.close() tempf.flush() os.fsync(tempf.fileno()) tempf.close() os.chmod(tempf.name, 0o644) os.rename(tempf.name, filename)
class ZabbixSender(object): """""" #---------------------------------------------------------------------- def __init__(self, config, logfile): self._config = config self._logfile = logfile self._tempfile = None #---------------------------------------------------------------------- def send(self, stats): self._write_temporary_file(stats) self._send_data_to_zabbix() #---------------------------------------------------------------------- def _write_temporary_file(self, stats): self._tempfile = NamedTemporaryFile() for item in stats: self._tempfile.write(u'- memcached[%s] %s\n' % (item.key, item.value)) self._tempfile.flush() #---------------------------------------------------------------------- def _send_data_to_zabbix(self): cmd = [u'zabbix_sender', u'-c', self._config, u'-i', self._tempfile.name] logfile = open(self._logfile, 'a') call(cmd, stdout=logfile) logfile.close() self._tempfile.close()
def as_fasta(seqs, index_dir=None): ftype = get_seqs_type(seqs) if ftype == "fasta": return seqs elif ftype == "fastafile": return Fasta(seqs) else: if index_dir is None: raise ValueError("need index_dir / genome to convert to FASTA") tmpfa = NamedTemporaryFile() if ftype == "bedfile": track2fasta(index_dir, seqs, tmpfa.name) else: if ftype == "regionfile": seqs = [l.strip() for l in open(seqs).readlines()] tmpbed = NamedTemporaryFile() for seq in seqs: vals = re.split(r'[:-]', seq) tmpbed.write("{}\t{}\t{}\n".format(*vals)) tmpbed.flush() track2fasta(index_dir, tmpbed.name, tmpfa.name) return Fasta(tmpfa.name)
def make_fasta(self): 'it returns a fasta fhand' fhand = NamedTemporaryFile() fhand.write('>seq{0:d}\nACTATCATGGCAGATA\n'.format(self.counter)) fhand.flush() self.counter += 1 return fhand
def refine_by_scanning(motifs, fastafile): tmp_gff = NamedTemporaryFile() file_in = NamedTemporaryFile() for m in motifs: file_in.write("%s\n" % m.to_pfm()) file_in.flush() cmd = "pwmscan.py -i %s -p %s -c 0.8 > %s" % (fastafile, file_in.name, tmp_gff.name) p = Popen(cmd, shell=True) stdout,stderr = p.communicate() aligns = {} for line in open(tmp_gff.name): vals = line.strip().split("\t") motif,instance = [x.split(" ")[1].replace('"', "") for x in vals[8].split(" ; ")] if vals[6] == "+": aligns.setdefault(motif,[]).append(instance.upper()) else: aligns.setdefault(motif,[]).append(rc(instance.upper())) tmp_out = NamedTemporaryFile() refined_motifs = [] for id,align in aligns.items(): if len(align) > 10: motif = motif_from_align(align) refined_motifs.append(motif) return refined_motifs
def test_scrape_info_from_fname(): 'scrape info from fpath' fhand = NamedTemporaryFile(prefix='st_prot.pl_454.A.', suffix='.fasta') fhand.write('>seq\nTGATGC') fhand.flush() info = scrape_info_from_fname(fhand.name) assert info['st'] == 'prot'
def render_to_temporary_file(self, template_name, mode='w+b', bufsize=-1, suffix='.html', prefix='tmp', dir=None, delete=True): template = self.resolve_template(template_name) context = self.resolve_context(self.context_data) content = smart_str(template.render(context)) content = make_absolute_paths(content) try: tempfile = NamedTemporaryFile(mode=mode, bufsize=bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete) except TypeError: tempfile = NamedTemporaryFile(mode=mode, buffering=bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete) try: tempfile.write(content) tempfile.flush() return tempfile except TypeError: tempfile.write(bytes(content, 'UTF-8')) tempfile.flush() return tempfile except: # Clean-up tempfile if an Exception is raised. tempfile.close() raise
def test_safe_md5(self): """Make sure we have the expected md5 with varied input types This method is ported from PyCogent (http://www.pycogent.org). PyCogent is a GPL project, but we obtained permission from the authors of this method to port it to the BIOM Format project (and keep it under BIOM's BSD license). """ exp = 'd3b07384d113edec49eaa6238ad5ff00' tmp_f = NamedTemporaryFile( mode='w', prefix='test_safe_md5', suffix='txt') tmp_f.write('foo\n') tmp_f.flush() obs = safe_md5(open(tmp_f.name, 'U')) self.assertEqual(obs, exp) obs = safe_md5(['foo\n']) self.assertEqual(obs, exp) # unsupported type raises TypeError self.assertRaises(TypeError, safe_md5, 42)
def get_tempfile(self, **kwargs): kwargs.setdefault('suffix', '.vrt') tempfile = NamedTemporaryFile(**kwargs) tempfile.write(self.content) tempfile.flush() tempfile.seek(0) return tempfile
def test_lots_splits_outfile(): 'It tests that we can set 2 input files and an output file' bin = create_test_binary() splits = 200 content = ['hola%d\n' % split for split in range(splits)] content = ''.join(content) in_file1 = NamedTemporaryFile() in_file1.write(content) in_file1.flush() in_file2 = NamedTemporaryFile() in_file2.write(content) in_file2.flush() out_file1 = NamedTemporaryFile() out_file2 = NamedTemporaryFile() cmd = [bin] cmd.extend(['-i', in_file1.name, '-t', out_file1.name]) cmd.extend(['-x', in_file2.name, '-z', out_file2.name]) stdout = NamedTemporaryFile() stderr = NamedTemporaryFile() cmd_def = [{'options': ('-i', '--input'), 'io': 'in', 'splitter':''}, {'options': ('-x', '--input'), 'io': 'in', 'splitter':''}, {'options': ('-t', '--output'), 'io': 'out'}, {'options': ('-z', '--output'), 'io': 'out'}] popen = Popen(cmd, stdout=stdout, stderr=stderr, cmd_def=cmd_def, splits=splits) assert popen.wait() == 0 #waits till finishes and looks to the retcod assert not open(stdout.name).read() assert not open(stderr.name).read() assert open(out_file1.name).read() == content assert open(out_file2.name).read() == content in_file1.close() in_file2.close() os.remove(bin)
def get_pairwise_distances(seq_series, tree_file = None, seq_file = None): if seq_file is None: fasta_handle = NTF() if tree_file is None: tree_handle = NTF() else: tree_handle = open(tree_file, 'w') for (pat, visit), seq in zip(seq_series.index, seq_series.values): nheader = '%s-%s' % (pat, visit) fasta_handle.write('>%s\n%s\n' % (nheader, ''.join(seq))) fasta_handle.flush() os.fsync(fasta_handle.fileno()) cmd = 'muscle -in %(ifile)s -tree2 %(treefile)s -gapopen -2.9' cmdlist = shlex.split(cmd % { 'ifile':fasta_handle.name, 'treefile':tree_handle.name }) t = check_call(cmdlist) tree = Phylo.read(open(tree_handle.name), 'newick') seq_names = tree.get_terminals() dmat = {} for p1, p2 in combinations(seq_names, 2): d = tree.distance(p1, p2) dmat[(p1.name, p2.name)] = d dmat[(p2.name, p1.name)] = d return dmat
def test_gmap_without_mapping_output(): '''It test that the gmap doesn't map anything''' mappers_dir = join(TEST_DATA_DIR, 'mappers') cmap_dir = join(TEST_DATA_DIR, 'mappers', 'gmap') work_dir = NamedTemporaryDir() temp_genome = join(work_dir.name, 'genome.fa') os.symlink(join(mappers_dir, 'genome.fa'), temp_genome) reads_fhand = NamedTemporaryFile() reads_fhand.write('>seq\natgtgatagat\n') reads_fhand.flush() out_bam_fhand = NamedTemporaryFile() out_bam_fpath = out_bam_fhand.name out_bam_fhand.close() parameters = {'threads':None, 'kmer':13} map_reads_with_gmap(temp_genome, reads_fhand.name, out_bam_fpath, parameters) reads_fhand.close() temp_sam_fhand = NamedTemporaryFile(suffix='.sam') bam2sam(out_bam_fpath, temp_sam_fhand.name, True) result = open(temp_sam_fhand.name).read() assert 'seq\t4\t*\t0\t0' in result
class TestPipeline(unittest.TestCase): """ Class to test a pipeline with an iterative node """ def setUp(self): """ In the setup construct the pipeline and set some input parameters. """ # Construct the pipelHine self.pipeline = MyPipeline() # Set some input parameters self.parallel_processes = 10 self.input_file = NamedTemporaryFile(delete = False) self.input_file.write('\x00\x00' * self.parallel_processes) self.input_file.flush() self.input_file.close() self.pipeline.input_image = self.input_file.name self.output_file = NamedTemporaryFile() self.output_file.close() self.pipeline.output_image = self.output_file.name def test_iterative_pipeline_connection(self): """ Method to test if an iterative node and built in iterative process are correctly connected. """ # Test the output connection self.pipeline() result = open(self.pipeline.output_image,'rb').read() numbers = struct.unpack_from('H' * self.parallel_processes, result) self.assertEqual(numbers, tuple(range(self.parallel_processes)))
def from_wav(cls, fileName): """ params :param fileName: file name to read """ outputFile = NamedTemporaryFile(mode='w+b', delete=False) command = [ converter, '-y', '-i', fileName, # specifying input file '-vn', # drop any video streams in the file '-sn', # drop any subtitles present in the file '-f', 'wav', # specify the output file format needed '-ar', '44100', # uniform sample rate for all audio files outputFile.name ] # now use ffmpeg for conversion subprocess.call(command, stdout=open(os.devnull), stderr=open(os.devnull)) outputFile.flush() obj = cls(outputFile.name) outputFile.close() os.unlink(outputFile.name) return obj
def solve(self, cnf): s = Solution() infile = NamedTemporaryFile(mode='w') outfile = NamedTemporaryFile(mode='r') io = DimacsCnf() infile.write(io.tostring(cnf)) infile.flush() ret = call(self.command % (infile.name, outfile.name), shell=True) infile.close() if ret != 10: return s s.success = True lines = outfile.readlines()[1:] for line in lines: varz = line.split(" ")[:-1] for v in varz: v = v.strip() value = v[0] != '-' v = v.lstrip('-') vo = io.varobj(v) s.varmap[vo] = value # Close deletes the tmp files outfile.close() return s
def get_splice_score(a, s_type=5): if s_type not in [3,5]: raise Exception("Invalid splice type {}, should be 3 or 5".format(s_type)) maxent = config.maxentpath if not maxent: raise Exception("Please provide path to the score5.pl and score3.pl maxent scripts in config file") tmp = NamedTemporaryFile() for name,seq in a: tmp.write(">{}\n{}\n".format(name,seq)) tmp.flush() cmd = "perl score{}.pl {}".format(s_type, tmp.name) p = sp.Popen(cmd, shell=True, cwd=maxent, stdout=sp.PIPE) score = 0 for line in p.stdout.readlines(): vals = line.strip().split("\t") if len(vals) > 1: try: score += float(vals[-1]) except ValueError: logger.error("valueError, skipping: {}".format(vals)) except: logger.error("Something unexpected happened") return score
def test_dup_bin(self): seqs = '@seq1.f\naaaa\n+\nHHHH\[email protected]\naaaa\n+\nHHHH\n' seqs += '@seq2.f\naaab\n+\nHHHH\[email protected]\naaaa\n+\nHHHH\n' in_fhand = NamedTemporaryFile() in_fhand.write(seqs) in_fhand.flush() filter_bin = os.path.join(BIN_DIR, 'filter_duplicates') assert 'usage' in check_output([filter_bin, '-h']) result = check_output([filter_bin, in_fhand.name]) assert'@seq1.f\naaaa\n+\nHHHH\[email protected]\naaab\n+\nHHHH\n' in result result = check_output([filter_bin], stdin=in_fhand) assert'@seq1.f\naaaa\n+\nHHHH\[email protected]\naaab\n+\nHHHH\n' in result assert'@seq1.f\naaaa\n+\nHHHH\[email protected]\naaab\n+\nHHHH\n' in result result = check_output([filter_bin, in_fhand.name, '-m', '3']) assert'@seq1.f\naaaa\n+\nHHHH\[email protected]\naaab\n+\nHHHH\n' in result result = check_output([filter_bin, in_fhand.name, '--paired_reads']) assert seqs in result result = check_output([filter_bin, in_fhand.name, '-l', '1']) assert result == '@seq1.f\naaaa\n+\nHHHH\n' return # TODO Fallo sin arreglar in_fhand = open(os.path.join(TEST_DATA_DIR, 'illum_fastq.fastq')) try: result = check_output([filter_bin], stdin=in_fhand) # print result self.fail() except UndecidedFastqVersionError: pass
def create_temp_file(self, edid_binary): edid_file = NamedTemporaryFile(delete=False) edid_file.write(edid_binary) edid_file.flush() edid_file.seek(0) return edid_file
def test_seq_pipeline_parallel_run(self): 'It tests that the pipeline runs ok' pipeline = 'sanger_without_qual' fhand_adaptors = NamedTemporaryFile() fhand_adaptors.write(ADAPTORS) fhand_adaptors.flush() arabidopsis_genes = 'arabidopsis_genes+' univec = os.path.join(TEST_DATA_DIR, 'blast', arabidopsis_genes) configuration = {'remove_vectors': {'vectors': univec}, 'remove_adaptors': {'adaptors': fhand_adaptors.name}} in_fhands = {} in_fhands['in_seq'] = open(os.path.join(TEST_DATA_DIR, 'seq.fasta'), 'r') out_fhand = NamedTemporaryFile() writer = SequenceWriter(out_fhand, file_format='fasta') writers = {'seq': writer} seq_pipeline_runner(pipeline, configuration, in_fhands, processes=4, writers=writers) out_fhand = open(out_fhand.name, 'r') result_seq = out_fhand.read() assert result_seq.count('>') == 6 #are we keeping the description? assert 'mdust' in result_seq
def test_nosplit(): 'It test that we can set some input files to be not split' bin = create_test_binary() #with infile in_file = NamedTemporaryFile() content = 'hola1\nhola2\n' in_file.write(content) in_file.flush() out_file = NamedTemporaryFile() cmd = [bin] cmd.extend(['-i', in_file.name, '-t', out_file.name]) stdout = NamedTemporaryFile() stderr = NamedTemporaryFile() cmd_def = [{'options': ('-i', '--input'), 'io': 'in', 'special':['no_split']}, {'options': ('-t', '--output'), 'io': 'out'}] splits = 4 popen = Popen(cmd, stdout=stdout, stderr=stderr, cmd_def=cmd_def, splits=splits) assert popen.wait() == 0 #waits till finishes and looks to the retcod assert not open(stdout.name).read() assert not open(stderr.name).read() assert open(out_file.name).read() == content * splits in_file.close() os.remove(bin)
def test_cat_seqs(self): 'It test the cat seqs' cat_bin = os.path.join(SEQ_BIN_DIR, 'cat_seqs') # help assert 'usage' in check_output([cat_bin, '-h']) # fasta to fasta in_fhand1 = self.make_fasta() in_fhand2 = self.make_fasta() result = check_output([cat_bin, in_fhand1.name, in_fhand2.name]) assert '>seq1\nACTATCATGGCAGATA\n>seq2\nACTATCATGGCAGATA' in result # from fastq to fastq fhand = NamedTemporaryFile() fhand.write('@seq1\nACTA\n+\nqqqq\n') fhand.flush() result = check_output([cat_bin, fhand.name]) assert result == '@seq1\nACTA\n+\nqqqq\n' # No input fhand = NamedTemporaryFile() fhand.write('') fhand.flush() try: stderr = NamedTemporaryFile() result = check_output([cat_bin, fhand.name], stderr=stderr) self.fail() except CalledProcessError: assert 'The file is empty' in open(stderr.name).read() os.remove('cat_seqs.error') # No format in_fhand1 = self.make_fasta() in_fhand2 = self.make_fasta() result = check_output([cat_bin, in_fhand1.name, in_fhand2.name]) assert '>seq3\nACTATCATGGCAGATA\n>seq4\nACTATCATGGCAGATA' in result in_fhand1 = self.make_fasta() in_fhand2 = NamedTemporaryFile() in_fhand2.write('@seq\nATAT\n+\n????\n') in_fhand2.flush() try: stderr = NamedTemporaryFile() result = check_output([cat_bin, in_fhand2.name, in_fhand1.name], stderr=stderr) self.fail() except CalledProcessError: stderr_str = open(stderr.name).read() assert 'output format taken from first given file' in stderr_str
def get_locus_values(loci, locus_bed_path, ambiguous_bigwig=None, plus_bigwig=None, minus_bigwig=None): ''' Finds coverage values for each transcript. loci - Dict of locus objects from models.LocusGroup.get_loci_dict() locus_bed_bed - Path to BED file with loci intervals. ''' if plus_bigwig and minus_bigwig: plus_tab = NamedTemporaryFile(mode='w') minus_tab = NamedTemporaryFile(mode='w') call_bigwig_average_over_bed( plus_bigwig, locus_bed_path, plus_tab.name, ) call_bigwig_average_over_bed( minus_bigwig, locus_bed_path, minus_tab.name, ) plus_tab.flush() minus_tab.flush() return reconcile_stranded_coverage( loci, read_bigwig_average_over_bed_tab_file(loci, plus_tab.name), read_bigwig_average_over_bed_tab_file(loci, minus_tab.name), ) elif ambiguous_bigwig: tab = NamedTemporaryFile(mode='w') call_bigwig_average_over_bed( ambiguous_bigwig, locus_bed_path, tab.name, ) tab.flush() out_values = read_bigwig_average_over_bed_tab_file(loci, tab.name) return out_values else: raise ValueError('Improper bigWig files specified.')
def test_orphan_events(): """Tests cases with orphan house and basic event events.""" tmp = NamedTemporaryFile(mode="w+") tmp.write("FT\n") tmp.write("g1 := g2 & e1\n") tmp.write("g2 := h1 & e1\n") tmp.write("p(e1) = 0.5\n") tmp.write("s(h1) = false\n") tmp.flush() assert parse_input_file(tmp.name) is not None tmp.write("p(e2) = 0.1\n") # orphan basic event tmp.flush() assert parse_input_file(tmp.name) is not None tmp.write("s(h2) = true\n") # orphan house event tmp.flush() assert parse_input_file(tmp.name) is not None
def get_client_certificate(name, namespace, ca_pem, ca_key_pem): common_name = '{}-client'.format(name) client_csr = { 'CN': common_name, 'hosts': [], 'key': { 'algo': 'rsa', 'size': 2048 }, 'names': [{ 'O': common_name }] } ca_file = NamedTemporaryFile(delete=False) ca_key_file = NamedTemporaryFile(delete=False) client_csr_file = NamedTemporaryFile(delete=False) ca_file.write(ca_pem) ca_file.flush() ca_key_file.write(ca_key_pem) ca_key_file.flush() client_csr_json = json.dumps(client_csr).encode('utf-8') client_csr_file.write(client_csr_json + b'\n') client_csr_file.flush() cmd = '''./cfssl gencert \ -ca={} \ -ca-key={} \ -config=ca-config.json \ -profile=client {}'''.format(ca_file.name, ca_key_file.name, client_csr_file.name) c = delegator.run(cmd) ca_file.close() ca_key_file.close() client_csr_file.close() if not c.out: logging.error('cfssl {}'.format(c.err)) r = json.loads(c.out) mongod_pem = r['cert'] + r['key'] return mongod_pem, r['csr']
class fileTest(unittest.TestCase): CORRELATE = Exscript.util.file def setUp(self): data = '[account-pool]\n' data += 'user1=' + base64.encodestring('password1') + '\n' data += 'user2:' + base64.encodestring('password2') + '\n' data += 'user3 = ' + base64.encodestring('password3') + '\n' data += 'user4 : ' + base64.encodestring('password4') + '\n' self.account_file = NamedTemporaryFile() self.account_file.write(data) self.account_file.flush() self.host_file = NamedTemporaryFile() self.host_file.write('\n'.join(hosts)) self.host_file.flush() self.csv_host_file = NamedTemporaryFile() self.csv_host_file.write('hostname test\n') self.csv_host_file.write('\n'.join([h + ' blah' for h in hosts])) self.csv_host_file.flush() def tearDown(self): self.account_file.close() self.host_file.close() self.csv_host_file.close() def testGetAccountsFromFile(self): from Exscript.util.file import get_accounts_from_file accounts = get_accounts_from_file(self.account_file.name) result = [(a.get_name(), a.get_password()) for a in accounts] result.sort() self.assertEqual(account_pool, result) def testGetHostsFromFile(self): from Exscript.util.file import get_hosts_from_file result = get_hosts_from_file(self.host_file.name) self.assertEqual([h.get_name() for h in result], expected_hosts) def testGetHostsFromCsv(self): from Exscript.util.file import get_hosts_from_csv result = get_hosts_from_csv(self.csv_host_file.name) hostnames = [h.get_name() for h in result] testvars = [h.get('test')[0] for h in result] self.assertEqual(hostnames, expected_hosts) self.assertEqual(testvars, ['blah' for h in result])
class ZoneFile(object): def __init__(self, lines=None, no_header=False): self._file = NamedTemporaryFile() if not no_header: self._file.write(DUMMY_ZONE_HEADER) if lines is not None: self.writelines(lines) self._file.flush() @property def name(self): return self._file.name def write(self, str): self._file.write(str) self._file.flush() def writelines(self, lines): self._file.writelines("%s\n" % line for line in lines) self._file.flush()
def heroku_kafka_producer(extra_config={}): cert_file = NamedTemporaryFile(suffix='.crt', delete=False) cert_file.write(os.environ['KAFKA_CLIENT_CERT'].encode('utf-8')) cert_file.flush() key_file = NamedTemporaryFile(suffix='.key', delete=True) private_key = crypto.load_privatekey(crypto.FILETYPE_PEM, os.environ['KAFKA_CLIENT_CERT_KEY']) pwd = str(os.urandom(33)) key_enc = crypto.dump_privatekey(crypto.FILETYPE_PEM, private_key, cipher='DES-EDE3-CBC', passphrase=pwd.encode()) key_file.write(key_enc) key_file.flush() trust_file = NamedTemporaryFile(suffix='.crt', delete=False) trust_file.write(os.environ['KAFKA_TRUSTED_CERT'].encode('utf-8')) trust_file.flush() kafka_brokers = get_kafka_brokers() config = { 'bootstrap.servers': kafka_brokers, 'security.protocol': 'ssl', 'ssl.ca.location': trust_file.name, 'ssl.certificate.location': cert_file.name, 'ssl.key.location': key_file.name, 'ssl.key.password': pwd } producer_config = {**config, **extra_config} producer = Producer(producer_config) # Files will automatically be deleted when closed cert_file.close() key_file.close() trust_file.close() return producer
class ZoneFile(object): def __init__(self, lines=None, no_header=False): self._file = NamedTemporaryFile(delete=False) if not no_header: self._file.write(bytes(DUMMY_ZONE_HEADER, encoding='utf-8')) if lines is not None: self.writelines(lines) self._file.flush() def __del__(self): self._file.close() @property def name(self): return self._file.name def write(self, str): self._file.write(bytes(str)) self._file.flush() def writelines(self, lines): self._file.writelines( bytes("%s\n" % line, encoding='utf-8') for line in lines) self._file.flush()
def execute(self, context): vertica = VerticaHook(vertica_conn_id=self.vertica_conn_id) mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id) tmpfile = None result = None selected_columns = [] count = 0 with closing(vertica.get_conn()) as conn: with closing(conn.cursor()) as cursor: cursor.execute(self.sql) selected_columns = [d.name for d in cursor.description] if self.bulk_load: tmpfile = NamedTemporaryFile("w") self.log.info( "Selecting rows from Vertica to local file %s...", tmpfile.name) self.log.info(self.sql) csv_writer = csv.writer(tmpfile, delimiter='\t', encoding='utf-8') for row in cursor.iterate(): csv_writer.writerow(row) count += 1 tmpfile.flush() else: self.log.info("Selecting rows from Vertica...") self.log.info(self.sql) result = cursor.fetchall() count = len(result) self.log.info("Selected rows from Vertica %s", count) if self.mysql_preoperator: self.log.info("Running MySQL preoperator...") mysql.run(self.mysql_preoperator) try: if self.bulk_load: self.log.info("Bulk inserting rows into MySQL...") with closing(mysql.get_conn()) as conn: with closing(conn.cursor()) as cursor: cursor.execute( "LOAD DATA LOCAL INFILE '%s' INTO " "TABLE %s LINES TERMINATED BY '\r\n' (%s)" % (tmpfile.name, self.mysql_table, ", ".join(selected_columns))) conn.commit() tmpfile.close() else: self.log.info("Inserting rows into MySQL...") mysql.insert_rows(table=self.mysql_table, rows=result, target_fields=selected_columns) self.log.info("Inserted rows into MySQL %s", count) except (MySQLdb.Error, MySQLdb.Warning): self.log.info("Inserted rows into MySQL 0") raise if self.mysql_postoperator: self.log.info("Running MySQL postoperator...") mysql.run(self.mysql_postoperator) self.log.info("Done")
def dump(objdump, path): n = NamedTemporaryFile(delete=False) o = check_output([objdump, '-d', '-x', '-s', path]) n.write(o) n.flush() return n.name
def convert( boundary=None, input_file=None, output_file=None, src_srs=4326, driver=None, layers=None, layer_name=None, task_uid=None, projection: int = 4326, creation_options: list = None, dataset_creation_options: list = None, layer_creation_options: list = None, is_raster: bool = True, warp_params: dict = None, translate_params: dict = None, use_translate: bool = False, access_mode: str = "overwrite", config_options: List[Tuple[str]] = None, distinct_field=None, ): """ Uses gdal to convert and clip a supported dataset file to a mask if boundary is passed in. :param use_translate: A flag to force the use of translate instead of warp. :param layer_creation_options: Data options specific to vector conversion. :param dataset_creation_options: Data options specific to vector conversion. :param translate_params: A dict of params to pass into gdal translate. :param warp_params: A dict of params to pass into gdal warp. :param is_raster: A explicit declaration that dataset is raster (for disambiguating mixed mode files...gpkg) :param boundary: A geojson file or bbox (xmin, ymin, xmax, ymax) to serve as a cutline :param input_file: A raster or vector file to be clipped :param output_file: The dataset to put the clipped output in (if not specified will use in_dataset) :param driver: Short name of output driver to use (defaults to input format) :param layer_name: Table name in database for in_dataset :param layers: A list of layers to include for translation. :param task_uid: A task uid to update :param projection: A projection as an int referencing an EPSG code (e.g. 4326 = EPSG:4326) :param creation_options: Additional options to pass to the convert method (e.g. "-co SOMETHING") :param config_options: A list of gdal configuration options as a tuple (option, value). :return: Filename of clipped dataset """ if isinstance(input_file, str) and not use_translate: input_file = [input_file] meta_list = [] for _index, _file in enumerate(input_file): input_file[_index], output_file = get_dataset_names(_file, output_file) meta_list.append(get_meta(input_file[_index], is_raster)) src_src = f"EPSG:{src_srs}" dst_src = f"EPSG:{projection}" # Currently, when there are more than 1 files, they much each be the same driver, making the meta the same. meta = meta_list[0] if not driver: driver = meta["driver"] or "gpkg" # Geopackage raster only supports byte band type, so check for that band_type = None dstalpha = None if driver.lower() == "gpkg": band_type = gdal.GDT_Byte if meta.get("nodata") is None and meta.get("is_raster"): dstalpha = True # Clip the dataset if a boundary is passed in. temp_boundfile = None geojson = None bbox = None if boundary: # Strings are expected to be a file. if isinstance(boundary, str): if not os.path.isfile(boundary): raise Exception(f"Called convert using a boundary of {boundary} but no such path exists.") elif is_valid_bbox(boundary): geojson = bbox2polygon(boundary) bbox = boundary elif isinstance(boundary, dict): geojson = boundary if geojson: temp_boundfile = NamedTemporaryFile(suffix=".json") temp_boundfile.write(json.dumps(geojson).encode()) temp_boundfile.flush() boundary = temp_boundfile.name if meta["is_raster"]: cmd = get_task_command( convert_raster, input_file, output_file, driver=driver, creation_options=creation_options, band_type=band_type, dst_alpha=dstalpha, boundary=boundary, src_srs=src_src, dst_srs=dst_src, task_uid=task_uid, warp_params=warp_params, translate_params=translate_params, use_translate=use_translate, config_options=config_options, ) else: cmd = get_task_command( convert_vector, input_file, output_file, driver=driver, dataset_creation_options=dataset_creation_options, layer_creation_options=layer_creation_options, src_srs=src_src, dst_srs=dst_src, layers=layers, layer_name=layer_name, task_uid=task_uid, boundary=boundary, bbox=bbox, access_mode=access_mode, config_options=config_options, distinct_field=distinct_field, ) try: task_process = TaskProcess(task_uid=task_uid) task_process.start_process(cmd) except CancelException: # If we don't allow cancel exception to propagate then the task won't exit properly. # TODO: Allow retry state to be more informed. raise except Exception as e: logger.error(e) raise Exception("File conversion failed. Please try again or contact support.") finally: if temp_boundfile: temp_boundfile.close() if requires_zip(driver): logger.debug(f"Requires zip: {output_file}") output_file = create_zip_file(output_file, get_zip_name(output_file)) return output_file
class SshLocationContainer(object): """Run a Docker container to serve as an SSH location.""" NAME = 'backuppy_test' PORT = 22 USERNAME = '******' PASSWORD = '******' IDENTITY = os.path.join(RESOURCE_PATH, 'id_rsa') PATH = '/backuppy/' def __init__(self, mount_point=None): """Initialize a new instance.""" self._started = False self._ip = None self._fingerprint = None self._known_hosts = None self._mount_point = mount_point def _ensure_started(self): """Ensure the container has been started.""" if not self._started: raise RuntimeError('This container has not been started yet.') def start(self): """Start the container.""" docker_args = [] if self._mount_point is not None: docker_args += ['-v', '%s:%s' % (self._mount_point, self.PATH)] self.stop() subprocess.check_call(['docker', 'run', '-d', '--name', self.NAME] + docker_args + ['backuppy_ssh_location']) self._started = True self. await () subprocess.check_call([ 'sshpass', '-p', self.PASSWORD, 'scp', '-o', 'UserKnownHostsFile=%s' % self.known_hosts().name, '%s.pub' % self.IDENTITY, '%s@%s:~/.ssh/authorized_keys' % (self.USERNAME, self.ip) ]) def stop(self): """Stop the container.""" if not self._started: return self._started = False subprocess.check_call(['docker', 'stop', self.NAME]) subprocess.check_call(['docker', 'container', 'rm', self.NAME]) self._known_hosts.close() @property def ip(self): """Get the container's IP address. :return: str """ self._ensure_started() if not self._ip: self._ip = str( subprocess.check_output([ 'docker', 'inspect', '-f', '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}', self.NAME ]).strip().decode('utf-8')) return self._ip @property def fingerprint(self): """Get the container's SSH host key fingerprint. :return: str """ self._ensure_started() if not self._fingerprint: self._fingerprint = str( subprocess.check_output(['ssh-keyscan', '-t', 'rsa', self.ip]).decode('utf-8')) return self._fingerprint def known_hosts(self): """Get an SSH known_hosts file containing just this container. :return: File """ if self._known_hosts: return self._known_hosts self._known_hosts = NamedTemporaryFile(mode='r+') self._known_hosts.write(self.fingerprint) self._known_hosts.flush() return self._known_hosts def await (self): """Wait until the container is ready.""" subprocess.check_call( ['./vendor/bin/wait-for-it', '%s:%d' % (self.ip, self.PORT)]) def source(self, configuration): """Get the back-up source to this container. :return: backuppy.location.Source """ return SshSource(configuration.notifier, self.USERNAME, self.ip, self.PATH, identity=self.IDENTITY, host_keys=self.known_hosts().name) def target(self, configuration): """Get the back-up target to this container. :return: backuppy.location.Target """ return SshTarget(configuration.notifier, self.USERNAME, self.ip, self.PATH, identity=self.IDENTITY, host_keys=self.known_hosts().name)
def make_file(lines): f = NamedTemporaryFile() f.writelines(lines) f.flush() return f
def run(self, set_atoms=False): # !TODO: split this function """Method which explicitly runs LAMMPS.""" pbc = self.atoms.get_pbc() if all(pbc): cell = self.atoms.get_cell() elif not any(pbc): # large enough cell for non-periodic calculation - # LAMMPS shrink-wraps automatically via input command # "periodic s s s" # below cell = 2 * np.max(np.abs(self.atoms.get_positions())) * np.eye(3) else: warnings.warn( "semi-periodic ASE cell detected - translation " + "to proper LAMMPS input cell might fail" ) cell = self.atoms.get_cell() self.prism = Prism(cell) self.set_missing_parameters() self.calls += 1 # change into subdirectory for LAMMPS calculations cwd = os.getcwd() os.chdir(self.parameters.tmp_dir) # setup file names for LAMMPS calculation label = "{0}{1:>06}".format(self.label, self.calls) lammps_in = uns_mktemp( prefix="in_" + label, dir=self.parameters.tmp_dir ) lammps_log = uns_mktemp( prefix="log_" + label, dir=self.parameters.tmp_dir ) lammps_trj_fd = NamedTemporaryFile( prefix="trj_" + label, suffix=(".bin" if self.parameters.binary_dump else ""), dir=self.parameters.tmp_dir, delete=(not self.parameters.keep_tmp_files), ) lammps_trj = lammps_trj_fd.name if self.parameters.no_data_file: lammps_data = None else: lammps_data_fd = NamedTemporaryFile( prefix="data_" + label, dir=self.parameters.tmp_dir, delete=(not self.parameters.keep_tmp_files), mode='w', encoding='ascii' ) write_lammps_data( lammps_data_fd, self.atoms, specorder=self.parameters.specorder, force_skew=self.parameters.always_triclinic, velocities=self.parameters.write_velocities, prismobj=self.prism, units=self.parameters.units, atom_style=self.parameters.atom_style ) lammps_data = lammps_data_fd.name lammps_data_fd.flush() # see to it that LAMMPS is started if not self._lmp_alive(): command = self.get_lammps_command() # Attempt to (re)start lammps self._lmp_handle = Popen( shlex.split(command, posix=(os.name == "posix")), stdin=PIPE, stdout=PIPE, ) lmp_handle = self._lmp_handle # Create thread reading lammps stdout (for reference, if requested, # also create lammps_log, although it is never used) if self.parameters.keep_tmp_files: lammps_log_fd = open(lammps_log, "wb") fd = SpecialTee(lmp_handle.stdout, lammps_log_fd) else: fd = lmp_handle.stdout thr_read_log = Thread(target=self.read_lammps_log, args=(fd,)) thr_read_log.start() # write LAMMPS input (for reference, also create the file lammps_in, # although it is never used) if self.parameters.keep_tmp_files: lammps_in_fd = open(lammps_in, "wb") fd = SpecialTee(lmp_handle.stdin, lammps_in_fd) else: fd = lmp_handle.stdin write_lammps_in( lammps_in=fd, parameters=self.parameters, atoms=self.atoms, prismobj=self.prism, lammps_trj=lammps_trj, lammps_data=lammps_data, ) if self.parameters.keep_tmp_files: lammps_in_fd.close() # Wait for log output to be read (i.e., for LAMMPS to finish) # and close the log file if there is one thr_read_log.join() if self.parameters.keep_tmp_files: lammps_log_fd.close() if not self.parameters.keep_alive: self._lmp_end() exitcode = lmp_handle.poll() if exitcode and exitcode != 0: cwd = os.getcwd() raise RuntimeError( "LAMMPS exited in {} with exit code: {}." "".format(cwd, exitcode) ) # A few sanity checks if len(self.thermo_content) == 0: raise RuntimeError("Failed to retrieve any thermo_style-output") if int(self.thermo_content[-1]["atoms"]) != len(self.atoms): # This obviously shouldn't happen, but if prism.fold_...() fails, # it could raise RuntimeError("Atoms have gone missing") trj_atoms = read_lammps_dump( infileobj=lammps_trj, order=False, index=-1, prismobj=self.prism, specorder=self.parameters.specorder, ) if set_atoms: self.atoms = trj_atoms.copy() self.forces = trj_atoms.get_forces() # !TODO: trj_atoms is only the last snapshot of the system; Is it # desirable to save also the inbetween steps? if self.parameters.trajectory_out is not None: # !TODO: is it advisable to create here temporary atoms-objects self.trajectory_out.write(trj_atoms) tc = self.thermo_content[-1] self.results["energy"] = convert( tc["pe"], "energy", self.parameters["units"], "ASE" ) self.results["free_energy"] = self.results["energy"] self.results["forces"] = self.forces.copy() stress = np.array( [-tc[i] for i in ("pxx", "pyy", "pzz", "pyz", "pxz", "pxy")] ) # We need to apply the Lammps rotation stuff to the stress: xx, yy, zz, yz, xz, xy = stress stress_tensor = np.array([[xx, xy, xz], [xy, yy, yz], [xz, yz, zz]]) R = self.prism.rot_mat stress_atoms = np.dot(R, stress_tensor) stress_atoms = np.dot(stress_atoms, R.T) stress_atoms = stress_atoms[[0, 1, 2, 1, 0, 0], [0, 1, 2, 2, 2, 1]] stress = stress_atoms self.results["stress"] = convert( stress, "pressure", self.parameters["units"], "ASE" ) lammps_trj_fd.close() if not self.parameters.no_data_file: lammps_data_fd.close() os.chdir(cwd)
def test_loglevel(self): """Test the loglevel default setting""" def _loglevel(lvl, msg): lvl_int = topt.log.getEffectiveLevel() if is_py3(): lvl_name = logging.getLevelName(lvl_int) else: lvl_name = [ k for k, v in logging._levelNames.items() if v == lvl_int ][0] self.assertEqual(lvl_int, fancylogger.getLevelInt(lvl), msg="%s (expected %s got %s)" % (msg, lvl, lvl_name)) topt = TestOption1( go_args=['--ext-optional=REALVALUE'], go_nosystemexit=True, ) _loglevel(topt.DEFAULT_LOGLEVEL.upper(), 'Test default loglevel') topt = TestOption1( go_args=['--debug'], go_nosystemexit=True, ) _loglevel('DEBUG', '--debug gives DEBUG') topt = TestOption1( go_args=['--info'], go_nosystemexit=True, ) _loglevel('INFO', '--info gives INFO') topt = TestOption1( go_args=['--quiet'], go_nosystemexit=True, ) _loglevel('WARNING', '--quiet gives WARNING') # last one wins topt = TestOption1( go_args=['--debug', '--info', '--quiet'], go_nosystemexit=True, ) _loglevel('WARNING', 'last wins: --debug --info --quiet gives WARNING') CONFIGFILE1 = b""" [base] debug=1 """ tmp1 = NamedTemporaryFile() tmp1.write(CONFIGFILE1) tmp1.flush() # flush, otherwise empty envvar = 'logactionoptiontest'.upper() topt = TestOption1(go_configfiles=[tmp1.name], go_args=[], go_nosystemexit=True, envvar_prefix=envvar) _loglevel('DEBUG', 'DEBUG set via configfile') # set via environment; environment wins over cfg file os.environ['%s_INFO' % envvar] = '1' topt = TestOption1(go_configfiles=[tmp1.name], go_args=[], go_nosystemexit=True, envvar_prefix=envvar) _loglevel('INFO', 'env wins: debug in configfile and _INFO in env gives INFO') # commandline always wins topt = TestOption1(go_configfiles=[tmp1.name], go_args=['--quiet'], go_nosystemexit=True, envvar_prefix=envvar) _loglevel( 'WARNING', 'commandline wins: debug in configfile, _INFO in env and --quiet gives WARNING' ) # remove tmp1 del os.environ['%s_INFO' % envvar] tmp1.close()
def test_configfiles(self): """Test configfiles (base section for empty prefix from auto_section_name)""" CONFIGFILE1 = b""" [base] store=ok longbase=1 store-with-dash=XX [level] prefix-and-dash=YY [ext] extend=one,two,three strtuple=a,b strlist=x,y [remainder] opt1=value1 """ tmp1 = NamedTemporaryFile() tmp1.write(CONFIGFILE1) tmp1.flush() # flush, otherwise empty topt = TestOption1(go_configfiles=[tmp1.name], go_args=[]) # nothing passed by commandline self.assertEqual(topt.options.configfiles, _init_configfiles) self.assertEqual(topt.configfiles, [tmp1.name] + _init_configfiles) self.assertEqual(topt.options.store, 'ok') self.assertEqual(topt.options.longbase, True) self.assertEqual(topt.options.justatest, True) self.assertEqual(topt.options.store_with_dash, 'XX') self.assertEqual(topt.options.level_prefix_and_dash, 'YY') self.assertEqual(topt.options.ext_extend, ['one', 'two', 'three']) self.assertEqual(topt.options.ext_strtuple, ('a', 'b')) self.assertEqual(topt.options.ext_strlist, ['x', 'y']) self.assertTrue('remainder' in topt.configfile_remainder) self.assertFalse('base' in topt.configfile_remainder) self.assertEqual(topt.configfile_remainder['remainder'], {'opt1': 'value1'}) topt1b = TestOption1(go_configfiles=[tmp1.name], go_args=['--store=notok']) self.assertEqual(topt1b.options.store, 'notok') self.assertEqual(topt1b.options.configfiles, _init_configfiles) self.assertEqual(topt1b.configfiles, [tmp1.name] + _init_configfiles) CONFIGFILE2 = b""" [base] store=notok2 longbase=0 justatest=0 debug=1 """ tmp2 = NamedTemporaryFile() tmp2.write(CONFIGFILE2) tmp2.flush() # flush, otherwise empty # multiple config files, last one wins # cmdline wins always topt2 = TestOption1(go_configfiles=[tmp1.name, tmp2.name], go_args=['--store=notok3']) self.assertEqual(topt2.options.configfiles, _init_configfiles) self.assertEqual(topt2.configfiles, [tmp1.name, tmp2.name] + _init_configfiles) self.assertEqual(topt2.options.store, 'notok3') self.assertEqual(topt2.options.justatest, False) self.assertEqual(topt2.options.longbase, False) self.assertEqual(topt2.options.debug, True) # add test for _action_taken for dest in ['ext_strlist', 'longbase', 'store']: self.assertTrue(topt2.options._action_taken.get(dest, None)) for dest in ['level_longlevel']: self.assertFalse(dest in topt2.options._action_taken) # This works because we manipulate DEFAULT and use all uppercase name CONFIGFILE3 = b""" [base] store=%(FROMINIT)s """ tmp3 = NamedTemporaryFile() tmp3.write(CONFIGFILE3) tmp3.flush() # flush, otherwise empty initenv = {'DEFAULT': {'FROMINIT': 'woohoo'}} topt3 = TestOption1(go_configfiles=[tmp3.name, tmp2.name], go_args=['--ignoreconfigfiles=%s' % tmp2.name], go_configfiles_initenv=initenv) self.assertEqual(topt3.options.configfiles, _init_configfiles) self.assertEqual(topt3.configfiles, [tmp3.name, tmp2.name] + _init_configfiles) self.assertEqual(topt3.options.ignoreconfigfiles, [tmp2.name]) self.assertEqual(topt3.options.store, 'woohoo') # remove files tmp1.close() tmp2.close() tmp3.close()
def _make_pyplate(self, contents): contents = dedent(contents) pyplate = NamedTemporaryFile() pyplate.write(contents) pyplate.flush() return pyplate
class _FileLogger: """File logging class wrapper. Class wrapping is needed manly for safety of log file removal after Blender is shut down. Registering fuction for atexit module makes sure than, file is deleted if Blender is closed normally. However file is not deleted if process is killed in Linux. On Windows, on the other hand, file gets deleted even if Blender is closed from Task Manager -> End Task/Process """ __log_file = None def __init__(self): self.__log_file = NamedTemporaryFile(mode="w+", suffix=".log.txt", delete=True) # instead of destructor we are using delete method, # to close and consequentially delete log file atexit.register(self.delete) def delete(self): """Closes file and consiquentally deletes it as log file was created in that fashion. """ # close file only if it's still exists in class variable if self.__log_file is not None: self.__log_file.close() self.__log_file = None def write(self, msg_object): """Writes message to the log file. :param msg_object: message to be written to file :type msg_object: object """ self.__log_file.write(msg_object) def flush(self): """Flushes written content to file on disk.""" self.__log_file.flush() def get_log(self): """Gets current content of temporary SCS BT log file, which was created at startup and is having log of BT session. :return: current content of log file as string :rtype: str """ # firstly move to start of the file self.__log_file.seek(0) log = "" for line in self.__log_file.readlines(): log += line.replace( "\t ", "\t\t " ) # replace for Blender text editor to be aligned the same as in console return log
def configure_and_load(self, config_element, tool_data_path, from_shed_config=False, url_timeout=10): """ Configure and load table from an XML element. """ self.separator = config_element.get('separator', '\t') self.comment_char = config_element.get('comment_char', '#') # Configure columns self.parse_column_spec(config_element) # store repo info if available: repo_elem = config_element.find('tool_shed_repository') if repo_elem is not None: repo_info = dict(tool_shed=repo_elem.find('tool_shed').text, name=repo_elem.find('repository_name').text, owner=repo_elem.find('repository_owner').text, installed_changeset_revision=repo_elem.find('installed_changeset_revision').text) else: repo_info = None # Read every file for file_element in config_element.findall('file'): tmp_file = None filename = file_element.get('path', None) if filename is None: # Handle URLs as files filename = file_element.get('url', None) if filename: tmp_file = NamedTemporaryFile(prefix='TTDT_URL_%s-' % self.name) try: tmp_file.write(requests.get(filename, timeout=url_timeout).text) except Exception as e: log.error('Error loading Data Table URL "%s": %s', filename, e) continue log.debug('Loading Data Table URL "%s" as filename "%s".', filename, tmp_file.name) filename = tmp_file.name tmp_file.flush() filename = file_path = expand_here_template(filename, here=self.here) found = False if file_path is None: log.debug("Encountered a file element (%s) that does not contain a path value when loading tool data table '%s'.", util.xml_to_string(file_element), self.name) continue # FIXME: splitting on and merging paths from a configuration file when loading is wonky # Data should exist on disk in the state needed, i.e. the xml configuration should # point directly to the desired file to load. Munging of the tool_data_tables_conf.xml.sample # can be done during installing / testing / metadata resetting with the creation of a proper # tool_data_tables_conf.xml file, containing correct <file path=> attributes. Allowing a # path.join with a different root should be allowed, but splitting should not be necessary. if tool_data_path and from_shed_config: # Must identify with from_shed_config as well, because the # regular galaxy app has and uses tool_data_path. # We're loading a tool in the tool shed, so we cannot use the Galaxy tool-data # directory which is hard-coded into the tool_data_table_conf.xml entries. filename = os.path.split(file_path)[1] filename = os.path.join(tool_data_path, filename) if self.tool_data_path_files.exists(filename): found = True elif self.tool_data_path_files.exists("%s.sample" % filename) and not from_shed_config: log.info("Could not find tool data %s, reading sample" % filename) filename = "%s.sample" % filename found = True else: # Since the path attribute can include a hard-coded path to a specific directory # (e.g., <file path="tool-data/cg_crr_files.loc" />) which may not be the same value # as self.tool_data_path, we'll parse the path to get the filename and see if it is # in self.tool_data_path. file_path, file_name = os.path.split(filename) if file_path and file_path != self.tool_data_path: corrected_filename = os.path.join(self.tool_data_path, file_name) if self.tool_data_path_files.exists(corrected_filename): filename = corrected_filename found = True errors = [] if found: self.extend_data_with(filename, errors=errors) self._update_version() else: self.missing_index_file = filename log.warning("Cannot find index file '%s' for tool data table '%s'" % (filename, self.name)) if filename not in self.filenames or not self.filenames[filename]['found']: self.filenames[filename] = dict(found=found, filename=filename, from_shed_config=from_shed_config, tool_data_path=tool_data_path, config_element=config_element, tool_shed_repository=repo_info, errors=errors) else: log.debug("Filename '%s' already exists in filenames (%s), not adding", filename, list(self.filenames.keys())) # Remove URL tmp file if tmp_file is not None: tmp_file.close()
class RNAfoldApp(LocalApp): """ Compute the minimum free energy secondary structure of a ribonucleic acid sequence using *ViennaRNA's* *RNAfold* software. Internally this creates a :class:`Popen` instance, which handles the execution. Parameters ---------- sequence : NucleotideSequence The RNA sequence. temperature : int, optional The temperature (°C) to be assumed for the energy parameters. bin_path : str, optional Path of the *RNAfold* binary. Examples -------- >>> sequence = NucleotideSequence("CGACGTAGATGCTAGCTGACTCGATGC") >>> app = RNAfoldApp(sequence) >>> app.start() >>> app.join() >>> print(app.get_mfe()) -1.3 >>> print(app.get_dot_bracket()) (((.((((.......)).))))).... """ def __init__(self, sequence, temperature=37, bin_path="RNAfold"): super().__init__(bin_path) self._sequence = sequence self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False) self._temperature = str(temperature) def run(self): in_file = FastaFile() set_sequence(in_file, self._sequence) in_file.write(self._in_file) self._in_file.flush() self.set_arguments( [self._in_file.name, "--noPS", "-T", self._temperature]) super().run() def evaluate(self): super().evaluate() lines = self.get_stdout().split("\n") content = lines[2] dotbracket, mfe = content.split(" ", maxsplit=1) mfe = float(mfe[1:-1]) self._mfe = mfe self._dotbracket = dotbracket def clean_up(self): super().clean_up() cleanup_tempfile(self._in_file) @requires_state(AppState.CREATED) def set_temperature(self, temperature): """ Adjust the energy parameters according to a temperature in degrees Celsius. Parameters ---------- temperature : int The temperature. """ self._temperature = str(temperature) @requires_state(AppState.JOINED) def get_mfe(self): """ Get the minimum free energy of the input sequence. Returns ------- mfe : float The minimum free energy. Examples -------- >>> sequence = NucleotideSequence("CGACGTAGATGCTAGCTGACTCGATGC") >>> app = RNAfoldApp(sequence) >>> app.start() >>> app.join() >>> print(app.get_mfe()) -1.3 """ return self._mfe @requires_state(AppState.JOINED) def get_dot_bracket(self): """ Get the minimum free energy secondary structure of the input sequence in dot bracket notation. Returns ------- dotbracket : str The secondary structure in dot bracket notation. Examples -------- >>> sequence = NucleotideSequence("CGACGTAGATGCTAGCTGACTCGATGC") >>> app = RNAfoldApp(sequence) >>> app.start() >>> app.join() >>> print(app.get_dot_bracket()) (((.((((.......)).))))).... """ return self._dotbracket @requires_state(AppState.JOINED) def get_base_pairs(self): """ Get the base pairs from the minimum free energy secondary structure of the input sequence. Returns ------- base_pairs : ndarray, shape=(n,2) Each row corresponds to the positions of the bases in the sequence. Examples -------- >>> sequence = NucleotideSequence("CGACGTAGATGCTAGCTGACTCGATGC") >>> app = RNAfoldApp(sequence) >>> app.start() >>> app.join() >>> print(app.get_base_pairs()) [[ 0 22] [ 1 21] [ 2 20] [ 4 19] [ 5 18] [ 6 16] [ 7 15]] For reference, the corresponding dot bracket notation can be displayed as below. >>> print(app.get_dot_bracket()) (((.((((.......)).))))).... """ return base_pairs_from_dot_bracket(self._dotbracket) @staticmethod def compute_secondary_structure(sequence, bin_path="RNAfold"): """ Compute the minimum free energy secondary structure of a ribonucleic acid sequence using *ViennaRNA's* *RNAfold* software. This is a convenience function, that wraps the :class:`RNAfoldApp` execution. Parameters ---------- sequence : NucleotideSequence The RNA sequence. bin_path : str, optional Path of the *RNAfold* binary. Returns ------- dotbracket : str The secondary structure in dot bracket notation. mfe : float The minimum free energy. """ app = RNAfoldApp(sequence, bin_path=bin_path) app.start() app.join() return app.get_dot_bracket(), app.get_mfe()
def _iternocache(self, source, key, reverse): debug('iterate without cache') self._clearcache() it = iter(source) flds = it.next() yield tuple(flds) if key is not None: # convert field selection into field indices indices = asindices(flds, key) else: indices = range(len(flds)) # now use field indices to construct a _getkey function # N.B., this will probably raise an exception on short rows getkey = sortable_itemgetter(*indices) # initialise the first chunk rows = list(itertools.islice(it, 0, self.buffersize)) rows.sort(key=getkey, reverse=reverse) # have we exhausted the source iterator? if self.buffersize is None or len(rows) < self.buffersize: if self.cache: debug('caching mem') self._fldcache = flds self._memcache = rows self._getkey = getkey # actually not needed to iterate from memcache for row in rows: yield tuple(row) else: chunkfiles = [] while rows: # dump the chunk f = NamedTemporaryFile(dir=self.tempdir) for row in rows: pickle.dump(row, f, protocol=-1) f.flush() # N.B., do not close the file! Closing will delete # the file, and we might want to keep it around # if it can be cached. We'll let garbage collection # deal with this, i.e., when no references to the # chunk files exist any more, garbage collection # should be an implicit close, which will cause file # deletion. chunkfiles.append(f) # grab the next chunk rows = list(itertools.islice(it, 0, self.buffersize)) rows.sort(key=getkey, reverse=reverse) if self.cache: debug('caching files %r', chunkfiles) self._fldcache = flds self._filecache = chunkfiles self._getkey = getkey chunkiters = [iterchunk(f) for f in chunkfiles] for row in _mergesorted(getkey, reverse, *chunkiters): yield tuple(row)
def run_stats(self): """Main function which do the process.""" # Get the common fields. self.admin_layer = self.cbx_aggregation_layer.currentLayer() selected_indicators = self.indicators_list() if not self.name_field: self.name_field = self.le_new_column.placeholderText() # Output. self.output_file_path = self.le_output_filepath.text() try: self.button_box_ok.setDisabled(True) # noinspection PyArgumentList QApplication.setOverrideCursor(Qt.WaitCursor) # noinspection PyArgumentList QApplication.processEvents() if not self.admin_layer: raise NoLayerProvidedException if not self.admin_layer and self.use_point_layer: raise NoLayerProvidedException crs_admin_layer = self.admin_layer.crs() if not self.use_point_layer and not self.use_area: if not self.cbx_list_indicators: raise FieldException( field_1='List Indicators should not empty') # Output if not self.output_file_path: temp_file = NamedTemporaryFile(delete=False, suffix='-geopublichealth.shp') self.output_file_path = temp_file.name temp_file.flush() temp_file.close() admin_layer_provider = self.admin_layer.dataProvider() fields = self.admin_layer.fields() if admin_layer_provider.fields().indexFromName( self.name_field) != -1: raise FieldExistingException(field=self.name_field) for indicator_selected in selected_indicators: fields.append( QgsField("Z" + indicator_selected[0], QVariant.Double)) fields.append(QgsField(self.name_field, QVariant.Double)) file_writer = QgsVectorFileWriter(self.output_file_path, 'utf-8', fields, QgsWkbTypes.Polygon, self.admin_layer.crs(), 'ESRI Shapefile') count = self.admin_layer.featureCount() stats = {} for indicator_selected in selected_indicators: values = [] indicator_selected_name = str(indicator_selected[0]) for i, feature in enumerate(self.admin_layer.getFeatures()): index = self.admin_layer.fields().indexFromName( indicator_selected_name) if feature[index]: value = float(feature[index]) else: value = 0.0 values.append(value) stats[indicator_selected_name] = Stats(values) for i, feature in enumerate(self.admin_layer.getFeatures()): attributes = feature.attributes() composite_index_value = 0.0 for indicator_selected in selected_indicators: indicator_selected_name = str(indicator_selected[0]) index = self.admin_layer.fields().indexFromName( indicator_selected_name) if feature[index]: value = float(feature[index]) else: value = 0.0 zscore = (value - stats[indicator_selected_name].average( )) / stats[indicator_selected_name].standard_deviation() attributes.append(float(zscore)) if indicator_selected[1] == '+': composite_index_value -= zscore else: composite_index_value += zscore attributes.append(float(composite_index_value)) new_feature = QgsFeature() new_geom = QgsGeometry(feature.geometry()) new_feature.setAttributes(attributes) new_feature.setGeometry(new_geom) file_writer.addFeature(new_feature) del file_writer self.output_layer = QgsVectorLayer(self.output_file_path, self.name_field, 'ogr') QgsProject.instance().addMapLayer(self.output_layer) if self.symbology.isChecked(): self.add_symbology() self.signalStatus.emit(3, tr('Successful process')) except GeoPublicHealthException as e: display_message_bar(msg=e.msg, level=e.level, duration=e.duration) finally: self.button_box_ok.setDisabled(False) # noinspection PyArgumentList QApplication.restoreOverrideCursor() # noinspection PyArgumentList QApplication.processEvents()
def download(urls, name, ext, live=False): url = urls[0] m3u8 = ext == 'm3u8' m3u8_crypto = False audio = subtitle = None # for live video, always use ffmpeg to rebuild timeline. if not live and m3u8: live = live_m3u8(url) internal = not live and m3u8_internal if m3u8: m3u8_crypto = crypto_m3u8(url) # rebuild m3u8 urls when use internal downloader, # change the ext to segment's ext, default is "ts", # otherwise change the ext to "flv" or "mp4". if internal: urls, audio, subtitle = load_m3u8(url) ext = urlparse(urls[0])[2].split('.')[-1] if ext not in ['ts', 'm4s', 'mp4', 'm4a']: ext = 'ts' elif live: ext = 'flv' else: ext = 'mp4' elif ext == 'mpd': # very slow # and now, it has many problems # TODO: implement internal download/merge process internal = False ext = 'mp4' # OK check internal if not internal: launch_ffmpeg_download(url, name + '.' + ext, allow_all_ext=m3u8_crypto) else: if save_urls(urls, name, ext, jobs=args.jobs, fail_confirm=not args.no_fail_confirm, fail_retry_eta=args.fail_retry_eta): lenth = len(urls) if (m3u8 or lenth > 1) and not args.no_merge: fix_sa_name(name, ext, lenth) if m3u8_crypto: # use ffmpeg to merge internal downloaded m3u8 # build the local m3u8, and then the headers cannot be set lm3u8 = NamedTemporaryFile(mode='w+t', suffix='.m3u8', dir='.', encoding='utf-8') lkeys = [] # temp keys' references m = _load_m3u8(url) for k in m.keys + m.session_keys: if k and k.uri: key = NamedTemporaryFile(mode='w+b', suffix='.key', dir='.') key.write(http.get_response(k.absolute_uri).content) key.flush() k.uri = os.path.basename(key.name) lkeys.append(key) for i, seg in enumerate(m.segments): seg.uri = '%s_%d.%s' % (name, i, ext) lm3u8.write(m.dumps()) lm3u8.flush() launch_ffmpeg_download(lm3u8.name, name + '.mp4', False, True) else: launch_ffmpeg_merge(name, ext, lenth) clean_slices(name, ext, lenth) else: logger.critical('{}> donwload failed'.format(name)) if audio: ext = 'm4a' lenth = len(audio) if save_urls(audio, name, ext, jobs=args.jobs, fail_confirm=not args.no_fail_confirm, fail_retry_eta=args.fail_retry_eta): if (m3u8 or lenth > 1) and not args.no_merge: fix_sa_name(name, ext, lenth) launch_ffmpeg_merge(name, ext, lenth) clean_slices(name, ext, lenth) else: logger.critical('{}> HLS audio donwload failed'.format(name)) if subtitle: ext = 'srt' if not save_urls(subtitle[:1], name, ext, jobs=args.jobs, fail_confirm=not args.no_fail_confirm, fail_retry_eta=args.fail_retry_eta): logger.critical('{}> HLS subtitle donwload failed'.format(name))
def consensusCalling(self, spot, bam, reference, args): """ Make a consensus of all the reads in the region and identify all of the SVs in the region """ # MAXNUMREADS = 100 #I don't think we'll need more than this many reads MAXATTEMPTS = MAXNUMREADS / 2 #I don't feel like trying 100 times SPANBUFFER = 100 #number of bases I want a read to span chrom, start, end = spot.chrom, spot.start, spot.end buffer = args.buffer supportReads = [] spanReads = [] #Fetch reads and trim totCnt = 0 for read in bam.fetch(chrom, max(0, start - buffer - SPANBUFFER), end + buffer + SPANBUFFER): if read.qname not in spot.varReads: continue seq, qual = self.readTrim(read, start - buffer, end + buffer) if read.pos < start - SPANBUFFER and read.aend > end + SPANBUFFER: spanReads.append((len(seq), seq, qual)) else: supportReads.append((seq, qual)) totCnt += 1 if len(spanReads) == 0: logging.debug("noone spans - consensus aborted. %s" % (str(spot))) spot.tags["noSpan"] = True return [spot] spanReads.sort(reverse=True) if len(spanReads) > MAXNUMREADS: origSupportReads = [(x[1], x[2]) for x in spanReads[:MAXNUMREADS]] elif len(spanReads) + len(supportReads) > MAXNUMREADS: origSupportReads = [(x[1], x[2]) for x in spanReads ] + supportReads[:MAXNUMREADS - len(spanReads)] else: origSupportReads = [(x[1], x[2]) for x in spanReads] + supportReads logging.debug("Alt reads: %d total, %d extra support" % (totCnt, len(origSupportReads))) mySpots = [] refReadId = 0 haveVar = False #Attempt each spanRead until we get one that passes #while refReadId < len(spanReads) and not haveVar and refReadId < MAXATTEMPTS: #refread = spanReads[refReadId] #supportReads = origSupportReads[:refReadId] + origSupportReads[refReadId+1:] refReadId += 1 #read that spans most of the region goes first #use the rest for cleaning #building consensus sequence foutreads = NamedTemporaryFile(suffix=".fasta") qoutreads = open(foutreads.name + '.qual', 'w') for id, i in enumerate(origSupportReads): foutreads.write(">%d\n%s\n" % (id, i[0])) qoutreads.write(">%d\n%s\n" % (id, " ".join(str(ord(j) - 33) for j in i[1]))) foutreads.flush() qoutreads.flush() #foutref = NamedTemporaryFile(suffix=".fasta") #foutref.write(">%s:%d-%d\n%s" % (spot.chrom, start, end, refread[1])) #foutref.flush() logging.debug("Making the contig....") #run it through phrap #make out.fasta and out.fasta.qual #run phrap #if asm -- consensus only r, o, e = exe("phrap %s -minmatch 6 -minscore 20" % (foutreads.name), timeout=3) if r != 0: #failed logging.warning('phrap failed ' + self.name) logging.warning(o) logging.warning(e) return [ ] #here is where I'd like to add just the no-consensus spot results = mergeFastaQual(foutreads.name + ".contigs", foutreads.name + ".contigs.qual") if len(results) == 0: logging.warning('no asm made ' + self.name) return [ ] #here is where I'd like to add just the no-consensus spot logging.info('%d contigs made %s' % (len(results), self.name)) #then run it through consensus logging.debug("Polishing contigs") alignOut = NamedTemporaryFile(suffix=".m5") blasr(foutreads.name, foutreads.name + ".contigs", format="-m 5", nproc=1, outname=alignOut.name) # elif no asm and consensus only (faster) if args.polish == "pbbanana": aligns = M5File(alignOut.name) con = ">con\n%s\n" % consensus(aligns).sequence conName = "pbbanana" elif args.polish == "pbdagcon": logging.debug("pbdagcon is running") #using minerrreads - 1 because one f them is already being used as seed! r, con, e = exe("pbdagcon -c %d -t 0 %s" % (max(0, args.minErrReads - 1), alignOut.name), timeout=1) #r, con, e = exe("pbdagcon %s" % (alignOut.name), timeout=2) logging.debug("back from pbdagcon") logging.debug((r, e)) #raw_input("press ent") if con is not None: con = con[con.index("\n") + 1:] else: con = "" conName = "pbdagcon" alignOut.close() #foutref.close() foutreads.close() #we don't have a consensus - retry if len(con) == 0: logging.debug("Trying another seed read for consensus") con = results.values()[0].seq logging.debug("%s %d bp seq" % (conName, len(con.split('\n')[1]))) #try improving consensus conOut = NamedTemporaryFile(suffix=".fasta") conOut.write(con) #conOut.close() conOut.flush() refOut = NamedTemporaryFile(suffix=".fasta") #j = reference.fetch(chrom, max(0, start-buffer), end+buffer) #fout = open("f****e.ref.fasta",'w') #fout.write(j) #fout.close() refOut.write(">%s:%d-%d\n%s\n" % (chrom, start, end, \ reference.fetch(chrom, max(0, start-buffer), end+buffer))) refOut.flush() #map consensus to refregion varSam = NamedTemporaryFile(suffix=".sam") blasr(conOut.name, refOut.name, format="--sam", outname=varSam.name) #consensus=False) -- would this help? #or what if I fed it through leftalign? sam = pysam.Samfile(varSam.name) matches = 0.0 bases = 0.0 nReads = 0 mySpots = [] for read in sam: nReads += 1 spot.tags["consensusCreated"] = True for svstart, svsize, svtype, altseq in expandCigar( read, args.minIndelSize, CONFIRMCOLLAPSE, True): newspot = copy.deepcopy(spot) if spot.svtype == svtype and svtype == "INS": haveVar = True newspot.start = svstart + start - buffer newspot.end = svstart + start - buffer newspot.tags["seq"] = altseq newspot.size = svsize gt, gq = genotype(newspot) newspot.tags["GT"] = gt newspot.tags["GQ"] = gq mySpots.append(newspot) elif spot.svtype == svtype and svtype == "DEL": haveVar = True newspot.start = svstart + start - buffer newspot.end = svstart + svsize + start - buffer newspot.size = -svsize gt, gq = genotype(newspot) newspot.tags["GT"] = gt newspot.tags["GQ"] = gq newspot.tags["seq"] = reference.fetch( chrom, newspot.start, newspot.end) mySpots.append(newspot) #identity = matches/bases #If no var, nothing is returned. #for newspot in mySpots: #newspot.tags["alnIdentityEstimate"] = identity #Keep reporting the actual contigs out until we #find a reason to need it (and also we can get quals...) #vbam.reset() #for id, read in enumerate(vbam): #newspot.tags["contigSeq%d" % (id)] = read.seq #newspot.tags["contigQual%d" % (id)] = read.qual #vbam.close() #varBam.close() refOut.close() logging.debug("%d consensus reads created %d spots" % (nReads, len(mySpots))) return mySpots
def ShellCommandResults(CmdLine, Opt): """ Execute the command, returning the output content """ file_list = NamedTemporaryFile(delete=False) filename = file_list.name Results = [] returnValue = 0 try: subprocess.check_call(args=shlex.split(CmdLine), stderr=subprocess.STDOUT, stdout=file_list) except subprocess.CalledProcessError as err_val: file_list.close() if not Opt.silent: sys.stderr.write("ERROR : %d : %s\n" % (err_val.returncode, err_val.__str__())) if os.path.exists(filename): sys.stderr.write(" : Partial results may be in this file: %s\n" % filename) sys.stderr.flush() returnValue = err_val.returncode except IOError as err_val: (errno, strerror) = err_val.args file_list.close() if not Opt.silent: sys.stderr.write("I/O ERROR : %s : %s\n" % (str(errno), strerror)) sys.stderr.write("ERROR : this command failed : %s\n" % CmdLine) if os.path.exists(filename): sys.stderr.write(" : Partial results may be in this file: %s\n" % filename) sys.stderr.flush() returnValue = errno except OSError as err_val: (errno, strerror) = err_val.args file_list.close() if not Opt.silent: sys.stderr.write("OS ERROR : %s : %s\n" % (str(errno), strerror)) sys.stderr.write("ERROR : this command failed : %s\n" % CmdLine) if os.path.exists(filename): sys.stderr.write(" : Partial results may be in this file: %s\n" % filename) sys.stderr.flush() returnValue = errno except KeyboardInterrupt: file_list.close() if not Opt.silent: sys.stderr.write("ERROR : Command terminated by user : %s\n" % CmdLine) if os.path.exists(filename): sys.stderr.write(" : Partial results may be in this file: %s\n" % filename) sys.stderr.flush() returnValue = 1 finally: if not file_list.closed: file_list.flush() os.fsync(file_list.fileno()) file_list.close() if os.path.exists(filename): fd_ = open(filename, 'r') Results = fd_.readlines() fd_.close() os.unlink(filename) if returnValue > 0: return returnValue return Results
def build_summary(doc, rouge_settings): params = build_rouge_params(rouge_settings) match_pattern = "X ROUGE-{} Eval".format(rouge_settings["order"]) ref_file = get_reference_file(doc) input_sentences = get_input_sentences(doc, 25) n_inputs = len(input_sentences) input_ids = [i for i in xrange(n_inputs)] candidate_files = [NamedTemporaryFile("w", delete=False) for i in xrange(n_inputs)] config_lines = ["{} {}".format(cf.name, ref_file.name) for cf in candidate_files] config_file = NamedTemporaryFile("w", delete=False) greedy_summary = "" greedy_score = 0 for z in range(n_inputs): cfg_text = "\n".join(config_lines) config_file.truncate(len(cfg_text)) config_file.seek(0) config_file.write(cfg_text) config_file.flush() for i in xrange(len(config_lines)): input_id = input_ids[i] sum = "{}{}\n".format(greedy_summary, input_sentences[input_id]) cf = candidate_files[i] cf.truncate(len(sum)) cf.seek(0) cf.write(sum) cf.flush() output = subprocess.check_output(params + [config_file.name]) i = 0 max_score = greedy_score max_id = None for line in output.split("\n"): if line.startswith(match_pattern): score = float(line.split()[4][2:]) if score > max_score: max_score = score max_id = i i += 1 if max_id is not None: greedy_score = max_score greedy_summary = "{}{}\n".format( greedy_summary, input_sentences[max_id]) input_ids.pop(max_id) config_lines = config_lines[:-1] else: break for cf in candidate_files: cf.close() os.remove(cf.name) os.remove(ref_file.name) config_file.close() os.remove(config_file.name) return greedy_summary.strip()
def from_file_using_temporary_files(cls, file, format=None, codec=None, parameters=None, **kwargs): orig_file = file file, close_file = _fd_or_path_or_tempfile(file, 'rb', tempfile=False) if format: format = format.lower() format = AUDIO_FILE_EXT_ALIASES.get(format, format) def is_format(f): f = f.lower() if format == f: return True if isinstance(orig_file, basestring): return orig_file.lower().endswith(".{0}".format(f)) if isinstance(orig_file, bytes): return orig_file.lower().endswith( (".{0}".format(f)).encode('utf8')) return False if is_format("wav"): try: obj = cls._from_safe_wav(file) if close_file: file.close() return obj except: file.seek(0) elif is_format("raw") or is_format("pcm"): sample_width = kwargs['sample_width'] frame_rate = kwargs['frame_rate'] channels = kwargs['channels'] metadata = { 'sample_width': sample_width, 'frame_rate': frame_rate, 'channels': channels, 'frame_width': channels * sample_width } obj = cls(data=file.read(), metadata=metadata) if close_file: file.close() return obj input_file = NamedTemporaryFile(mode='wb', delete=False) try: input_file.write(file.read()) except (OSError): input_file.flush() input_file.close() input_file = NamedTemporaryFile(mode='wb', delete=False, buffering=2**31 - 1) if close_file: file.close() close_file = True file = open(orig_file, buffering=2**13 - 1, mode='rb') reader = file.read(2**31 - 1) while reader: input_file.write(reader) reader = file.read(2**31 - 1) input_file.flush() if close_file: file.close() output = NamedTemporaryFile(mode="rb", delete=False) conversion_command = [ cls.converter, '-y', # always overwrite existing files ] # If format is not defined # ffmpeg/avconv will detect it automatically if format: conversion_command += ["-f", format] if codec: # force audio decoder conversion_command += ["-acodec", codec] conversion_command += [ "-i", input_file.name, # input_file options (filename last) "-vn", # Drop any video streams if there are any "-f", "wav", # output options (filename last) output.name ] if parameters is not None: # extend arguments with arbitrary set conversion_command.extend(parameters) log_conversion(conversion_command) with open(os.devnull, 'rb') as devnull: p = subprocess.Popen(conversion_command, stdin=devnull, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p_out, p_err = p.communicate() log_subprocess_output(p_out) log_subprocess_output(p_err) try: if p.returncode != 0: raise CouldntDecodeError( "Decoding failed. ffmpeg returned error code: {0}\n\nOutput from ffmpeg/avlib:\n\n{1}" .format(p.returncode, p_err)) obj = cls._from_safe_wav(output) finally: input_file.close() output.close() os.unlink(input_file.name) os.unlink(output.name) return obj
def test_split_mates(self): 'It tests the detection of oligos in sequence files' mate_fhand = NamedTemporaryFile(suffix='.fasta') linker = TITANIUM_LINKER # a complete linker seq5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC' seq3 = 'ATCGATCATGTTGTATTGTGTACTATACACACACGTAGGTCGACTATCGTAGCTAGT' mate_fhand.write('>seq1\n' + seq5 + linker + seq3 + '\n') # no linker mate_fhand.write('>seq2\n' + seq5 + '\n') # a partial linker mate_fhand.write('>seq3\n' + seq5 + linker[2:25] + seq3 + '\n') # the linker is 5 prima mate_fhand.write('>seq4\n' + linker[10:] + seq3 + '\n') # two linkers mate_fhand.write('>seq5\n' + linker + seq3 + FLX_LINKER + seq5 + '\n') # reverse linker rev_linker = get_setting('TITANIUM_LINKER_REV') mate_fhand.write('>seq6\n' + seq5 + rev_linker + seq3 + '\n') mate_fhand.flush() splitter = MatePairSplitter() new_seqs = [] for packet in read_seq_packets([mate_fhand], 2): new_seqs.append(splitter(packet)) out_fhand = StringIO() write_seq_packets(out_fhand, new_seqs, file_format='fasta') result = out_fhand.getvalue() xpect = r'>seq1\1' xpect += '\n' xpect += 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC\n' xpect += r'>seq1\2' xpect += '\n' xpect += 'ATCGATCATGTTGTATTGTGTACTATACACACACGTAGGTCGACTATCGTAGCTAGT\n' xpect += '>seq2\n' xpect += 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC\n' xpect += '>seq3_pl.part1\n' xpect += 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTG\n' xpect += '>seq3_pl.part2\n' xpect += 'GTGTACTATACACACACGTAGGTCGACTATCGTAGCTAGT\n' xpect += '>seq4\n' xpect += 'ATCGATCATGTTGTATTGTGTACTATACACACACGTAGGTCGACTATCGTAGCTAGT\n' xpect += '>seq5_mlc.part1\n' xpect += 'TCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCGATCATGTTGTAT' xpect += 'TG' xpect += 'TGTACTATACACACACGTAGGTCGACTATCGTAGCTAGT\n' xpect += '>seq5_mlc.part2\n' xpect += 'ACCTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC' xpect += '\n' xpect += r'>seq6\1' xpect += '\n' xpect += 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC\n' xpect += r'>seq6\2' xpect += '\n' xpect += 'ATCGATCATGTTGTATTGTGTACTATACACACACGTAGGTCGACTATCGTAGCTAGT\n' assert xpect == result # with short linker in 3 prima mate_fhand = NamedTemporaryFile(suffix='.fasta') seq = ">seq1\nCATCAATGACATCACAAATGACATCAACAAACTCAAA" seq += "CTCACATACACTGCTGTACCGTAC" mate_fhand.write(seq) mate_fhand.flush() splitter = MatePairSplitter() new_seqs = [] for packet in read_seq_packets([mate_fhand], 1): new_seqs.append(splitter(packet)) out_fhand = StringIO() write_seq_packets(out_fhand, new_seqs, file_format='fasta') result = ">seq1\nCATCAATGACATCACAAATGACATCAACAAACTCAAACTCACATACA\n" assert result == out_fhand.getvalue()
def test_mate_pair_unorderer_checker(): 'It test the mate pair function' # with equal seqs but the last ones file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with the firsts seqs different file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # unordered file file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2_unordered.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with reads with no direcction file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp
def tmpfile_with_content(self, value): tmpf = NamedTemporaryFile(mode='wt') tmpf.write(value) tmpf.flush() return tmpf