Example #1
0
    def xtest_infile_outfile_condor():
        'It tests that we can set an input file and an output file'
        bin = create_test_binary()
        #with infile
        in_file = NamedTemporaryFile()
        content = 'hola1\nhola2\nhola3\nhola4\nhola5\nhola6\nhola7\nhola8\n'
        content += 'hola9\nhola10|n'
        in_file.write(content)
        in_file.flush()
        out_file = NamedTemporaryFile()

        cmd = [bin]
        cmd.extend(['-i', in_file.name, '-t', out_file.name])
        stdout = NamedTemporaryFile()
        stderr = NamedTemporaryFile()
        cmd_def = [{'options': ('-i', '--input'), 'io': 'in', 'splitter':''},
                   {'options': ('-t', '--output'), 'io': 'out'}]
        from psubprocess import CondorPopen
        popen = Popen(cmd, stdout=stdout, stderr=stderr, cmd_def=cmd_def,
                      runner=CondorPopen,
                      runner_conf={'transfer_executable':True})
        assert popen.wait() == 0 #waits till finishes and looks to the retcode
        assert not open(stdout.name).read()
        assert not open(stderr.name).read()
        assert open(out_file.name).read() == content
        in_file.close()
        os.remove(bin)
Example #2
0
def test():

    d = {}
    for i in range( 10000 ):
        d[ 'foo' + str( i ) ] = 'bar' + str( i )
    
    # Open temporary file and get name    
    file = NamedTemporaryFile()
    file_name = file.name
        
    # Write cdb to file
    FileCDBDict.to_file( d, file )
    file.flush()
    
    # Open on disk
    file2 = open( file_name )
    cdb = FileCDBDict( file2 )
    
    for key, value in d.iteritems():
        assert cdb[key] == value
    
    try:
        cdb['notin']
        assert False, "KeyError was not raised"
    except KeyError, e:
        pass
Example #3
0
    def test_2_infile_outfile():
        'It tests that we can set 2 input files and an output file'
        bin = create_test_binary()
        #with infile
        content = 'hola1\nhola2\nhola3\nhola4\nhola5\nhola6\nhola7\nhola8\n'
        content += 'hola9\nhola10|n'
        in_file1 = NamedTemporaryFile()
        in_file1.write(content)
        in_file1.flush()
        in_file2 = NamedTemporaryFile()
        in_file2.write(content)
        in_file2.flush()
        out_file1 = NamedTemporaryFile()
        out_file2 = NamedTemporaryFile()

        cmd = [bin]
        cmd.extend(['-i', in_file1.name, '-t', out_file1.name])
        cmd.extend(['-x', in_file2.name, '-z', out_file2.name])
        stdout = NamedTemporaryFile()
        stderr = NamedTemporaryFile()
        cmd_def = [{'options': ('-i', '--input'), 'io': 'in', 'splitter':''},
                   {'options': ('-x', '--input'), 'io': 'in', 'splitter':''},
                   {'options': ('-t', '--output'), 'io': 'out'},
                   {'options': ('-z', '--output'), 'io': 'out'}]
        popen = Popen(cmd, stdout=stdout, stderr=stderr, cmd_def=cmd_def)
        assert popen.wait() == 0 #waits till finishes and looks to the retcod
        assert not open(stdout.name).read()
        assert not open(stderr.name).read()
        assert open(out_file1.name).read() == content
        assert open(out_file2.name).read() == content
        in_file1.close()
        in_file2.close()
        os.remove(bin)
Example #4
0
    def compile_inline(self,data,ext):
        """
        Compile inline css. Have to compile to a file, because some css compilers
        may not output to stdout, but we know they all output to a file. It's a
        little hackish, but you shouldn't be compiling in production anyway,
        right?
        """
        compiler = settings.COMPILER_FORMATS[ext]
        try:
            bin = compiler['binary_path']
        except:
            raise Exception("Path to CSS compiler must be included in COMPILER_FORMATS")
        
        tmp_file = NamedTemporaryFile(mode='w',suffix=ext)
        tmp_file.write(dedent(data))
        tmp_file.flush()
        path, ext = os.path.splitext(tmp_file.name)
        tmp_css = ''.join((path,'.css'))
        
        self.compile(path,compiler)
        data = open(tmp_css,'r').read()
        
        # cleanup
        tmp_file.close()
        os.remove(tmp_css)

        return data  
    def get_logs(self):
        """
        Build the logs entry for the metadata 'output' section

        :return: list, Output instances
        """

        # Collect logs from server
        kwargs = {}
        if self.namespace is not None:
            kwargs['namespace'] = self.namespace
        logs = self.osbs.get_build_logs(self.build_id, **kwargs)

        # Deleted once closed
        logfile = NamedTemporaryFile(prefix=self.build_id,
                                     suffix=".log",
                                     mode='w')
        logfile.write(logs)
        logfile.flush()

        docker_logs = NamedTemporaryFile(prefix="docker-%s" % self.build_id,
                                         suffix=".log",
                                         mode='w')
        docker_logs.write("\n".join(self.workflow.build_logs))
        docker_logs.flush()

        return [Output(file=docker_logs,
                       metadata=self.get_output_metadata(docker_logs.name,
                                                         "build.log")),
                Output(file=logfile,
                       metadata=self.get_output_metadata(logfile.name,
                                                         "openshift-final.log"))]
    def test_seq_pipeline_parallel_run_with_fasta_qual(self):
        'The pipeline runs in parallel with fasta and qual'
        pipeline = 'sanger_with_qual'

        fhand_adaptors = NamedTemporaryFile()
        fhand_adaptors.write(ADAPTORS)
        fhand_adaptors.flush()
        arabidopsis_genes = 'arabidopsis_genes+'
        univec = os.path.join(TEST_DATA_DIR, 'blast', arabidopsis_genes)
        configuration = {'remove_vectors': {'vectors': univec},
                         'remove_adaptors': {'adaptors': fhand_adaptors.name}}

        seq1 = create_random_seqwithquality(500, qual_range=50)
        seq2 = create_random_seqwithquality(500, qual_range=51)
        seq3 = create_random_seqwithquality(500, qual_range=52)
        seqs = [seq1, seq2, seq3]
        inseq_fhand, inqual_fhand = create_temp_seq_file(seqs, format='qual')

        in_fhands = {}
        in_fhands['in_seq'] = open(inseq_fhand.name)
        in_fhands['in_qual'] = open(inqual_fhand.name)

        outseq_fhand = NamedTemporaryFile()
        outqual_fhand = NamedTemporaryFile()
        writer = SequenceWriter(outseq_fhand, qual_fhand=outqual_fhand,
                                file_format='fasta')
        writers = {'seq': writer}

        seq_pipeline_runner(pipeline, configuration, in_fhands,
                            processes=4, writers=writers)
        out_fhand = open(outseq_fhand.name, 'r')

        result_seq = out_fhand.read()
        assert result_seq.count('>') == 3
def hmmscan(fasta, database_path, ncpus=10):

    F = NamedTemporaryFile()
    F.write(fasta)
    F.flush()
    OUT = NamedTemporaryFile()
    cmd = '%s --cpu %s -o /dev/null -Z 190000 --tblout %s %s %s' %(HMMSCAN, ncpus, OUT.name, database_path, F.name)
    #print cmd
    sts = subprocess.call(cmd, shell=True)
    byquery = defaultdict(list)

    if sts == 0:
        for line in OUT:
            #['#', '---', 'full', 'sequence', '----', '---', 'best', '1', 'domain', '----', '---', 'domain', 'number', 'estimation', '----']
            #['#', 'target', 'name', 'accession', 'query', 'name', 'accession', 'E-value', 'score', 'bias', 'E-value', 'score', 'bias', 'exp', 'reg', 'clu', 'ov', 'env', 'dom', 'rep', 'inc', 'description', 'of', 'target']
            #['#-------------------', '----------', '--------------------', '----------', '---------', '------', '-----', '---------', '------', '-----', '---', '---', '---', '---', '---', '---', '---', '---', '---------------------']
            #['delNOG20504', '-', '553220', '-', '1.3e-116', '382.9', '6.2', '3.4e-116', '381.6', '6.2', '1.6', '1', '1', '0', '1', '1', '1', '1', '-']
            if line.startswith('#'): continue
            fields = line.split() # output is not tab delimited! Should I trust this split?
            hit, _, query, _ , evalue, score, bias, devalue, dscore, dbias = fields[0:10]
            evalue, score, bias, devalue, dscore, dbias = map(float, [evalue, score, bias, devalue, dscore, dbias])
            byquery[query].append([hit, evalue, score])
            
    OUT.close()
    F.close()
    return byquery
    def test_pipeline_run():
        'It tests that the pipeline runs ok'
        pipeline = 'sanger_with_qual'

        fhand_adaptors = NamedTemporaryFile()
        fhand_adaptors.write(ADAPTORS)
        fhand_adaptors.flush()

        arabidopsis_genes = 'arabidopsis_genes+'

        univec = os.path.join(TEST_DATA_DIR, 'blast', arabidopsis_genes)
        configuration = {'remove_vectors_blastdb': {'vectors': univec},
                         'remove_adaptors': {'adaptors': fhand_adaptors.name}}

        seq_fhand = open(os.path.join(TEST_DATA_DIR, 'seq.fasta'), 'r')
        qual_fhand = open(os.path.join(TEST_DATA_DIR, 'qual.fasta'), 'r')

        seq_iter = seqs_in_file(seq_fhand, qual_fhand)

        filtered_seq_iter = _pipeline_builder(pipeline, seq_iter,
                                              configuration)

        seq_list = list(filtered_seq_iter)
        assert 'CGAtcgggggg' in str(seq_list[0].seq)
        assert len(seq_list) == 6
    def test_bwa_mapping():
        '''It test that the gmap doesn't map anything'''
        reference = join(TEST_DATA_DIR, 'blast/arabidopsis_genes')
        work_dir = NamedTemporaryDir()
        reference_fpath = join(work_dir.name, 'arabidopsis_genes')
        os.symlink(reference, reference_fpath)

        reads_fhand = NamedTemporaryFile(suffix='.sfastq')
        reads_fhand.write(SOLEXA)
        reads_fhand.flush()

        out_bam_fhand = NamedTemporaryFile()
        out_bam_fpath = out_bam_fhand.name
        out_bam_fhand.close()

        parameters = {'colorspace': False, 'reads_length':'short',
                      'threads':None, 'java_conf':None}
        map_reads_with_bwa(reference_fpath, reads_fhand.name, out_bam_fpath,
                           parameters)
        test_sam_fhand = NamedTemporaryFile(suffix='sam')
        bam2sam(out_bam_fpath, test_sam_fhand.name)
        result = open(test_sam_fhand.name).read()
        assert 'seq17' in result

        unmapped_fhand = StringIO.StringIO()
        parameters = {'colorspace': False, 'reads_length':'short',
                      'threads':None, 'java_conf':None,
                      'unmapped_fhand':unmapped_fhand}
        map_reads_with_bwa(reference_fpath, reads_fhand.name, out_bam_fpath,
                           parameters)
        assert 'seq17' in unmapped_fhand.getvalue()
        test_sam_fhand = NamedTemporaryFile(suffix='sam')
        bam2sam(out_bam_fpath, test_sam_fhand.name)
        result = open(test_sam_fhand.name).read()
        assert 'seq17' not in result
    def test_main(self):
        xml = """<record>
            <datafield tag="999" ind1="C" ind2="5">
                <subfield code="s">Test Journal Name,100,10</subfield>
            </datafield>
        </record>"""
        xml_temp_file = NamedTemporaryFile(dir=CFG_TMPDIR)
        xml_temp_file.write(xml)
        xml_temp_file.flush()

        kb = "TEST JOURNAL NAME---Converted"
        kb_temp_file = NamedTemporaryFile(dir=CFG_TMPDIR)
        kb_temp_file.write(kb)
        kb_temp_file.flush()

        dest_temp_fd, dest_temp_path = mkstemp(dir=CFG_TMPDIR)
        try:
            os.close(dest_temp_fd)

            process = subprocess.Popen([self.bin_path, xml_temp_file.name,
                                       '--kb', kb_temp_file.name,
                                       '-o', dest_temp_path],
                                       stderr=subprocess.PIPE,
                                       stdout=subprocess.PIPE)
            process.wait()

            transformed_xml = open(dest_temp_path).read()
            self.assertXmlEqual(transformed_xml, """<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
<record><datafield ind1="C" ind2="5" tag="999"><subfield code="s">Converted,100,10</subfield></datafield></record>
</collection>""")
        finally:
            os.unlink(dest_temp_path)
Example #11
0
    def save(self, filename, mtime=1300507380.0):
        """
        Serialize this RingData instance to disk.

        :param filename: File into which this instance should be serialized.
        :param mtime: time used to override mtime for gzip, default or None
                      if the caller wants to include time
        """
        # Override the timestamp so that the same ring data creates
        # the same bytes on disk. This makes a checksum comparison a
        # good way to see if two rings are identical.
        #
        # This only works on Python 2.7; on 2.6, we always get the
        # current time in the gzip output.
        tempf = NamedTemporaryFile(dir=".", prefix=filename, delete=False)
        if 'mtime' in inspect.getargspec(GzipFile.__init__).args:
            gz_file = GzipFile(filename, mode='wb', fileobj=tempf,
                               mtime=mtime)
        else:
            gz_file = GzipFile(filename, mode='wb', fileobj=tempf)
        self.serialize_v1(gz_file)
        gz_file.close()
        tempf.flush()
        os.fsync(tempf.fileno())
        tempf.close()
        os.chmod(tempf.name, 0o644)
        os.rename(tempf.name, filename)
class ZabbixSender(object):
    """"""

    #----------------------------------------------------------------------
    def __init__(self, config, logfile):
        self._config = config
        self._logfile = logfile
        self._tempfile = None

    #----------------------------------------------------------------------
    def send(self, stats):
        self._write_temporary_file(stats)
        self._send_data_to_zabbix()

    #----------------------------------------------------------------------
    def _write_temporary_file(self, stats):
        self._tempfile = NamedTemporaryFile()
        for item in stats:
            self._tempfile.write(u'- memcached[%s] %s\n' % (item.key, item.value))

        self._tempfile.flush()

    #----------------------------------------------------------------------
    def _send_data_to_zabbix(self):
        cmd = [u'zabbix_sender', u'-c', self._config, u'-i', self._tempfile.name]
        logfile = open(self._logfile, 'a')
        call(cmd, stdout=logfile)
        logfile.close()
        self._tempfile.close()
Example #13
0
def as_fasta(seqs, index_dir=None):
    ftype = get_seqs_type(seqs)
    if ftype == "fasta":
        return seqs
    elif ftype == "fastafile":
        return Fasta(seqs)
    else:
        if index_dir is None:
            raise ValueError("need index_dir / genome to convert to FASTA")

        tmpfa = NamedTemporaryFile()
        
        if ftype == "bedfile":
            track2fasta(index_dir, seqs, tmpfa.name) 
        else:

            if ftype == "regionfile":
                seqs = [l.strip() for l in open(seqs).readlines()]
            tmpbed = NamedTemporaryFile()
            for seq in seqs:
                vals = re.split(r'[:-]', seq)
                tmpbed.write("{}\t{}\t{}\n".format(*vals))
            tmpbed.flush()
            track2fasta(index_dir, tmpbed.name, tmpfa.name) 
        return Fasta(tmpfa.name)
Example #14
0
 def make_fasta(self):
     'it returns a fasta fhand'
     fhand = NamedTemporaryFile()
     fhand.write('>seq{0:d}\nACTATCATGGCAGATA\n'.format(self.counter))
     fhand.flush()
     self.counter += 1
     return fhand
Example #15
0
	def refine_by_scanning(motifs, fastafile):
		
		tmp_gff = NamedTemporaryFile()
		file_in = NamedTemporaryFile()
		for m in motifs:
			file_in.write("%s\n" % m.to_pfm())
		file_in.flush()
		
		cmd = "pwmscan.py -i %s -p %s -c 0.8 > %s" % (fastafile, file_in.name, tmp_gff.name)
		p = Popen(cmd, shell=True)
		stdout,stderr = p.communicate()

		aligns = {}
		for line in open(tmp_gff.name):	
			vals = line.strip().split("\t")
			motif,instance = [x.split(" ")[1].replace('"', "") for x in vals[8].split(" ; ")]
		
			if vals[6] == "+":
				aligns.setdefault(motif,[]).append(instance.upper())
			else:
				aligns.setdefault(motif,[]).append(rc(instance.upper()))

		tmp_out = NamedTemporaryFile()
		
		refined_motifs = []
		for id,align in aligns.items():
			if len(align) > 10:
				motif = motif_from_align(align)
				refined_motifs.append(motif)
		
		return refined_motifs
 def test_scrape_info_from_fname():
     'scrape info from fpath'
     fhand = NamedTemporaryFile(prefix='st_prot.pl_454.A.', suffix='.fasta')
     fhand.write('>seq\nTGATGC')
     fhand.flush()
     info = scrape_info_from_fname(fhand.name)
     assert info['st'] == 'prot'
Example #17
0
    def render_to_temporary_file(self, template_name, mode='w+b', bufsize=-1,
                                 suffix='.html', prefix='tmp', dir=None,
                                 delete=True):
        template = self.resolve_template(template_name)

        context = self.resolve_context(self.context_data)

        content = smart_str(template.render(context))
        content = make_absolute_paths(content)

        try:
            tempfile = NamedTemporaryFile(mode=mode, bufsize=bufsize,
                                      suffix=suffix, prefix=prefix,
                                      dir=dir, delete=delete)
        except TypeError:
            tempfile = NamedTemporaryFile(mode=mode, buffering=bufsize,
                                      suffix=suffix, prefix=prefix,
                                      dir=dir, delete=delete)

        try:
            tempfile.write(content)
            tempfile.flush()
            return tempfile
        except TypeError:
            tempfile.write(bytes(content, 'UTF-8'))
            tempfile.flush()
            return tempfile
        except:
            # Clean-up tempfile if an Exception is raised.
            tempfile.close()
            raise
Example #18
0
    def test_safe_md5(self):
        """Make sure we have the expected md5 with varied input types

        This method is ported from PyCogent (http://www.pycogent.org). PyCogent
        is a GPL project, but we obtained permission from the authors of this
        method to port it to the BIOM Format project (and keep it under BIOM's
        BSD license).
        """
        exp = 'd3b07384d113edec49eaa6238ad5ff00'

        tmp_f = NamedTemporaryFile(
            mode='w',
            prefix='test_safe_md5',
            suffix='txt')
        tmp_f.write('foo\n')
        tmp_f.flush()

        obs = safe_md5(open(tmp_f.name, 'U'))
        self.assertEqual(obs, exp)

        obs = safe_md5(['foo\n'])
        self.assertEqual(obs, exp)

        # unsupported type raises TypeError
        self.assertRaises(TypeError, safe_md5, 42)
Example #19
0
 def get_tempfile(self, **kwargs):
     kwargs.setdefault('suffix', '.vrt')
     tempfile = NamedTemporaryFile(**kwargs)
     tempfile.write(self.content)
     tempfile.flush()
     tempfile.seek(0)
     return tempfile
Example #20
0
    def test_lots_splits_outfile():
        'It tests that we can set 2 input files and an output file'
        bin = create_test_binary()

        splits = 200
        content = ['hola%d\n' % split for split in range(splits)]
        content = ''.join(content)
        in_file1 = NamedTemporaryFile()
        in_file1.write(content)
        in_file1.flush()
        in_file2 = NamedTemporaryFile()
        in_file2.write(content)
        in_file2.flush()
        out_file1 = NamedTemporaryFile()
        out_file2 = NamedTemporaryFile()

        cmd = [bin]
        cmd.extend(['-i', in_file1.name, '-t', out_file1.name])
        cmd.extend(['-x', in_file2.name, '-z', out_file2.name])
        stdout = NamedTemporaryFile()
        stderr = NamedTemporaryFile()
        cmd_def = [{'options': ('-i', '--input'), 'io': 'in', 'splitter':''},
                   {'options': ('-x', '--input'), 'io': 'in', 'splitter':''},
                   {'options': ('-t', '--output'), 'io': 'out'},
                   {'options': ('-z', '--output'), 'io': 'out'}]
        popen = Popen(cmd, stdout=stdout, stderr=stderr, cmd_def=cmd_def,
                      splits=splits)
        assert popen.wait() == 0 #waits till finishes and looks to the retcod
        assert not open(stdout.name).read()
        assert not open(stderr.name).read()
        assert open(out_file1.name).read() == content
        assert open(out_file2.name).read() == content
        in_file1.close()
        in_file2.close()
        os.remove(bin)
def get_pairwise_distances(seq_series, tree_file = None, seq_file = None):
    
    if seq_file is None:
        fasta_handle = NTF()
    if tree_file is None:
        tree_handle = NTF()
    else:
        tree_handle = open(tree_file, 'w')
    for (pat, visit), seq in zip(seq_series.index, seq_series.values):
        nheader = '%s-%s' % (pat, visit)
        fasta_handle.write('>%s\n%s\n' % (nheader, ''.join(seq)))
    fasta_handle.flush()
    os.fsync(fasta_handle.fileno())
    cmd = 'muscle -in %(ifile)s -tree2 %(treefile)s -gapopen -2.9'
    cmdlist = shlex.split(cmd % {
                                 'ifile':fasta_handle.name, 
                                 'treefile':tree_handle.name
                                 })
    t = check_call(cmdlist)
    tree = Phylo.read(open(tree_handle.name), 'newick')
    seq_names = tree.get_terminals()
    dmat = {}
    for p1, p2 in combinations(seq_names, 2):
        d = tree.distance(p1, p2)
        dmat[(p1.name, p2.name)] = d
        dmat[(p2.name, p1.name)] = d
        
    return dmat
    def test_gmap_without_mapping_output():
        '''It test that the gmap doesn't map anything'''

        mappers_dir = join(TEST_DATA_DIR, 'mappers')
        cmap_dir = join(TEST_DATA_DIR, 'mappers', 'gmap')
        work_dir = NamedTemporaryDir()
        temp_genome = join(work_dir.name, 'genome.fa')
        os.symlink(join(mappers_dir, 'genome.fa'), temp_genome)

        reads_fhand = NamedTemporaryFile()
        reads_fhand.write('>seq\natgtgatagat\n')
        reads_fhand.flush()


        out_bam_fhand = NamedTemporaryFile()
        out_bam_fpath = out_bam_fhand.name
        out_bam_fhand.close()
        parameters = {'threads':None, 'kmer':13}
        map_reads_with_gmap(temp_genome, reads_fhand.name, out_bam_fpath,
                            parameters)
        reads_fhand.close()
        temp_sam_fhand = NamedTemporaryFile(suffix='.sam')
        bam2sam(out_bam_fpath, temp_sam_fhand.name, True)
        result = open(temp_sam_fhand.name).read()
        assert 'seq\t4\t*\t0\t0' in result
class TestPipeline(unittest.TestCase):
    """ Class to test a pipeline with an iterative node
    """
    def setUp(self):
        """ In the setup construct the pipeline and set some input parameters.
        """
        # Construct the pipelHine
        self.pipeline = MyPipeline()

        # Set some input parameters
        self.parallel_processes = 10
        self.input_file = NamedTemporaryFile(delete = False)
        self.input_file.write('\x00\x00' * self.parallel_processes)
        self.input_file.flush()
        self.input_file.close()
        self.pipeline.input_image = self.input_file.name
        self.output_file = NamedTemporaryFile()
        self.output_file.close()
        self.pipeline.output_image = self.output_file.name

    def test_iterative_pipeline_connection(self):
        """ Method to test if an iterative node and built in iterative
        process are correctly connected.
        """

        # Test the output connection
        self.pipeline()
        result = open(self.pipeline.output_image,'rb').read()
        numbers = struct.unpack_from('H' * self.parallel_processes, result)
        self.assertEqual(numbers, tuple(range(self.parallel_processes)))
Example #24
0
    def from_wav(cls, fileName):
        """
        params
        :param fileName: file name to read
        """
        outputFile = NamedTemporaryFile(mode='w+b', delete=False)
        command = [
            converter, '-y',
            '-i', fileName,  # specifying input file
            '-vn',  # drop any video streams in the file
            '-sn',  # drop any subtitles present in the file
            '-f', 'wav',  # specify the output file format needed
            '-ar', '44100',  # uniform sample rate for all audio files
            outputFile.name
        ]

        # now use ffmpeg for conversion
        subprocess.call(command, stdout=open(os.devnull), stderr=open(os.devnull))

        outputFile.flush()
        obj = cls(outputFile.name)
        outputFile.close()
        os.unlink(outputFile.name)

        return obj
Example #25
0
    def solve(self, cnf):
        s = Solution()

        infile = NamedTemporaryFile(mode='w')
        outfile = NamedTemporaryFile(mode='r')

        io = DimacsCnf()
        infile.write(io.tostring(cnf))
        infile.flush()

        ret = call(self.command % (infile.name, outfile.name), shell=True)

        infile.close()

        if ret != 10:
            return s

        s.success = True

        lines = outfile.readlines()[1:]

        for line in lines:
            varz = line.split(" ")[:-1]
            for v in varz:
                v = v.strip()
                value = v[0] != '-'
                v = v.lstrip('-')
                vo = io.varobj(v)
                s.varmap[vo] = value

        # Close deletes the tmp files
        outfile.close()

        return s
Example #26
0
File: util.py Project: simonvh/pita
def get_splice_score(a, s_type=5):
    if s_type not in [3,5]:
        raise Exception("Invalid splice type {}, should be 3 or 5".format(s_type))
    
    maxent = config.maxentpath
    if not maxent:
        raise Exception("Please provide path to the score5.pl and score3.pl maxent scripts in config file")

    tmp = NamedTemporaryFile()
    for name,seq in a:
        tmp.write(">{}\n{}\n".format(name,seq))
    tmp.flush()
    cmd = "perl score{}.pl {}".format(s_type, tmp.name)
    p = sp.Popen(cmd, shell=True, cwd=maxent, stdout=sp.PIPE)
    score = 0
    for line in p.stdout.readlines():
        vals = line.strip().split("\t")
        if len(vals) > 1:
            try:
                score += float(vals[-1])
            except ValueError:
                logger.error("valueError, skipping: {}".format(vals))
            except:
                logger.error("Something unexpected happened")
    return score
Example #27
0
    def test_dup_bin(self):
        seqs = '@seq1.f\naaaa\n+\nHHHH\[email protected]\naaaa\n+\nHHHH\n'
        seqs += '@seq2.f\naaab\n+\nHHHH\[email protected]\naaaa\n+\nHHHH\n'
        in_fhand = NamedTemporaryFile()
        in_fhand.write(seqs)
        in_fhand.flush()

        filter_bin = os.path.join(BIN_DIR, 'filter_duplicates')
        assert 'usage' in check_output([filter_bin, '-h'])
        result = check_output([filter_bin, in_fhand.name])
        assert'@seq1.f\naaaa\n+\nHHHH\[email protected]\naaab\n+\nHHHH\n' in result
        result = check_output([filter_bin], stdin=in_fhand)
        assert'@seq1.f\naaaa\n+\nHHHH\[email protected]\naaab\n+\nHHHH\n' in result
        assert'@seq1.f\naaaa\n+\nHHHH\[email protected]\naaab\n+\nHHHH\n' in result
        result = check_output([filter_bin, in_fhand.name, '-m', '3'])
        assert'@seq1.f\naaaa\n+\nHHHH\[email protected]\naaab\n+\nHHHH\n' in result
        result = check_output([filter_bin, in_fhand.name, '--paired_reads'])
        assert seqs in result
        result = check_output([filter_bin, in_fhand.name, '-l', '1'])
        assert result == '@seq1.f\naaaa\n+\nHHHH\n'

        return  # TODO Fallo sin arreglar
        in_fhand = open(os.path.join(TEST_DATA_DIR, 'illum_fastq.fastq'))
        try:
            result = check_output([filter_bin], stdin=in_fhand)
            # print result
            self.fail()
        except UndecidedFastqVersionError:
            pass
Example #28
0
    def create_temp_file(self, edid_binary):
        edid_file = NamedTemporaryFile(delete=False)
        edid_file.write(edid_binary)
        edid_file.flush()
        edid_file.seek(0)

        return edid_file
    def test_seq_pipeline_parallel_run(self):
        'It tests that the pipeline runs ok'
        pipeline = 'sanger_without_qual'

        fhand_adaptors = NamedTemporaryFile()
        fhand_adaptors.write(ADAPTORS)
        fhand_adaptors.flush()
        arabidopsis_genes = 'arabidopsis_genes+'
        univec = os.path.join(TEST_DATA_DIR, 'blast', arabidopsis_genes)
        configuration = {'remove_vectors': {'vectors': univec},
                         'remove_adaptors': {'adaptors': fhand_adaptors.name}}

        in_fhands = {}
        in_fhands['in_seq'] = open(os.path.join(TEST_DATA_DIR, 'seq.fasta'),
                                   'r')
        out_fhand = NamedTemporaryFile()
        writer = SequenceWriter(out_fhand, file_format='fasta')
        writers = {'seq': writer}

        seq_pipeline_runner(pipeline, configuration, in_fhands,
                            processes=4, writers=writers)
        out_fhand = open(out_fhand.name, 'r')

        result_seq = out_fhand.read()
        assert result_seq.count('>') == 6
        #are we keeping the description?
        assert 'mdust' in result_seq
Example #30
0
    def test_nosplit():
        'It test that we can set some input files to be not split'
        bin = create_test_binary()
        #with infile
        in_file = NamedTemporaryFile()
        content = 'hola1\nhola2\n'
        in_file.write(content)
        in_file.flush()
        out_file = NamedTemporaryFile()

        cmd = [bin]
        cmd.extend(['-i', in_file.name, '-t', out_file.name])
        stdout = NamedTemporaryFile()
        stderr = NamedTemporaryFile()
        cmd_def = [{'options': ('-i', '--input'), 'io': 'in',
                    'special':['no_split']},
                   {'options': ('-t', '--output'), 'io': 'out'}]
        splits = 4
        popen = Popen(cmd, stdout=stdout, stderr=stderr, cmd_def=cmd_def,
                      splits=splits)
        assert popen.wait() == 0 #waits till finishes and looks to the retcod
        assert not open(stdout.name).read()
        assert not open(stderr.name).read()
        assert open(out_file.name).read() == content * splits
        in_file.close()
        os.remove(bin)
Example #31
0
    def test_cat_seqs(self):
        'It test the cat seqs'
        cat_bin = os.path.join(SEQ_BIN_DIR, 'cat_seqs')

        # help
        assert 'usage' in check_output([cat_bin, '-h'])

        # fasta to fasta
        in_fhand1 = self.make_fasta()
        in_fhand2 = self.make_fasta()
        result = check_output([cat_bin, in_fhand1.name, in_fhand2.name])
        assert '>seq1\nACTATCATGGCAGATA\n>seq2\nACTATCATGGCAGATA' in result

        # from fastq to fastq
        fhand = NamedTemporaryFile()
        fhand.write('@seq1\nACTA\n+\nqqqq\n')
        fhand.flush()
        result = check_output([cat_bin, fhand.name])
        assert result == '@seq1\nACTA\n+\nqqqq\n'
        # No input
        fhand = NamedTemporaryFile()
        fhand.write('')
        fhand.flush()
        try:
            stderr = NamedTemporaryFile()
            result = check_output([cat_bin, fhand.name], stderr=stderr)
            self.fail()

        except CalledProcessError:
            assert 'The file is empty' in open(stderr.name).read()
            os.remove('cat_seqs.error')

        # No format
        in_fhand1 = self.make_fasta()
        in_fhand2 = self.make_fasta()
        result = check_output([cat_bin, in_fhand1.name, in_fhand2.name])
        assert '>seq3\nACTATCATGGCAGATA\n>seq4\nACTATCATGGCAGATA' in result

        in_fhand1 = self.make_fasta()
        in_fhand2 = NamedTemporaryFile()
        in_fhand2.write('@seq\nATAT\n+\n????\n')
        in_fhand2.flush()
        try:
            stderr = NamedTemporaryFile()
            result = check_output([cat_bin, in_fhand2.name, in_fhand1.name],
                                  stderr=stderr)
            self.fail()
        except CalledProcessError:
            stderr_str = open(stderr.name).read()
            assert 'output format taken from first given file' in stderr_str
Example #32
0
def get_locus_values(loci,
                     locus_bed_path,
                     ambiguous_bigwig=None,
                     plus_bigwig=None,
                     minus_bigwig=None):
    '''
    Finds coverage values for each transcript.

    loci - Dict of locus objects from models.LocusGroup.get_loci_dict()
    locus_bed_bed - Path to BED file with loci intervals.
    '''
    if plus_bigwig and minus_bigwig:
        plus_tab = NamedTemporaryFile(mode='w')
        minus_tab = NamedTemporaryFile(mode='w')

        call_bigwig_average_over_bed(
            plus_bigwig,
            locus_bed_path,
            plus_tab.name,
        )
        call_bigwig_average_over_bed(
            minus_bigwig,
            locus_bed_path,
            minus_tab.name,
        )

        plus_tab.flush()
        minus_tab.flush()

        return reconcile_stranded_coverage(
            loci,
            read_bigwig_average_over_bed_tab_file(loci, plus_tab.name),
            read_bigwig_average_over_bed_tab_file(loci, minus_tab.name),
        )

    elif ambiguous_bigwig:
        tab = NamedTemporaryFile(mode='w')

        call_bigwig_average_over_bed(
            ambiguous_bigwig,
            locus_bed_path,
            tab.name,
        )
        tab.flush()

        out_values = read_bigwig_average_over_bed_tab_file(loci, tab.name)
        return out_values

    else:
        raise ValueError('Improper bigWig files specified.')
Example #33
0
def test_orphan_events():
    """Tests cases with orphan house and basic event events."""
    tmp = NamedTemporaryFile(mode="w+")
    tmp.write("FT\n")
    tmp.write("g1 := g2 & e1\n")
    tmp.write("g2 := h1 & e1\n")
    tmp.write("p(e1) = 0.5\n")
    tmp.write("s(h1) = false\n")
    tmp.flush()
    assert parse_input_file(tmp.name) is not None
    tmp.write("p(e2) = 0.1\n")  # orphan basic event
    tmp.flush()
    assert parse_input_file(tmp.name) is not None
    tmp.write("s(h2) = true\n")  # orphan house event
    tmp.flush()
    assert parse_input_file(tmp.name) is not None
Example #34
0
def get_client_certificate(name, namespace, ca_pem, ca_key_pem):
    common_name = '{}-client'.format(name)
    client_csr = {
        'CN': common_name,
        'hosts': [],
        'key': {
            'algo': 'rsa',
            'size': 2048
        },
        'names': [{
            'O': common_name
        }]
    }

    ca_file = NamedTemporaryFile(delete=False)
    ca_key_file = NamedTemporaryFile(delete=False)
    client_csr_file = NamedTemporaryFile(delete=False)

    ca_file.write(ca_pem)
    ca_file.flush()
    ca_key_file.write(ca_key_pem)
    ca_key_file.flush()
    client_csr_json = json.dumps(client_csr).encode('utf-8')
    client_csr_file.write(client_csr_json + b'\n')
    client_csr_file.flush()

    cmd = '''./cfssl gencert \
             -ca={} \
             -ca-key={} \
             -config=ca-config.json \
             -profile=client {}'''.format(ca_file.name, ca_key_file.name,
                                          client_csr_file.name)

    c = delegator.run(cmd)

    ca_file.close()
    ca_key_file.close()
    client_csr_file.close()

    if not c.out:
        logging.error('cfssl {}'.format(c.err))

    r = json.loads(c.out)

    mongod_pem = r['cert'] + r['key']
    return mongod_pem, r['csr']
Example #35
0
class fileTest(unittest.TestCase):
    CORRELATE = Exscript.util.file

    def setUp(self):
        data = '[account-pool]\n'
        data += 'user1=' + base64.encodestring('password1') + '\n'
        data += 'user2:' + base64.encodestring('password2') + '\n'
        data += 'user3 = ' + base64.encodestring('password3') + '\n'
        data += 'user4 : ' + base64.encodestring('password4') + '\n'
        self.account_file = NamedTemporaryFile()
        self.account_file.write(data)
        self.account_file.flush()

        self.host_file = NamedTemporaryFile()
        self.host_file.write('\n'.join(hosts))
        self.host_file.flush()

        self.csv_host_file = NamedTemporaryFile()
        self.csv_host_file.write('hostname	test\n')
        self.csv_host_file.write('\n'.join([h + '	blah' for h in hosts]))
        self.csv_host_file.flush()

    def tearDown(self):
        self.account_file.close()
        self.host_file.close()
        self.csv_host_file.close()

    def testGetAccountsFromFile(self):
        from Exscript.util.file import get_accounts_from_file
        accounts = get_accounts_from_file(self.account_file.name)
        result = [(a.get_name(), a.get_password()) for a in accounts]
        result.sort()
        self.assertEqual(account_pool, result)

    def testGetHostsFromFile(self):
        from Exscript.util.file import get_hosts_from_file
        result = get_hosts_from_file(self.host_file.name)
        self.assertEqual([h.get_name() for h in result], expected_hosts)

    def testGetHostsFromCsv(self):
        from Exscript.util.file import get_hosts_from_csv
        result = get_hosts_from_csv(self.csv_host_file.name)
        hostnames = [h.get_name() for h in result]
        testvars = [h.get('test')[0] for h in result]
        self.assertEqual(hostnames, expected_hosts)
        self.assertEqual(testvars, ['blah' for h in result])
Example #36
0
class ZoneFile(object):
    def __init__(self, lines=None, no_header=False):
        self._file = NamedTemporaryFile()
        if not no_header:
            self._file.write(DUMMY_ZONE_HEADER)
        if lines is not None:
            self.writelines(lines)
        self._file.flush()

    @property
    def name(self):
        return self._file.name

    def write(self, str):
        self._file.write(str)
        self._file.flush()

    def writelines(self, lines):
        self._file.writelines("%s\n" % line for line in lines)
        self._file.flush()
Example #37
0
def heroku_kafka_producer(extra_config={}):
    cert_file = NamedTemporaryFile(suffix='.crt', delete=False)
    cert_file.write(os.environ['KAFKA_CLIENT_CERT'].encode('utf-8'))
    cert_file.flush()

    key_file = NamedTemporaryFile(suffix='.key', delete=True)
    private_key = crypto.load_privatekey(crypto.FILETYPE_PEM,
                                         os.environ['KAFKA_CLIENT_CERT_KEY'])
    pwd = str(os.urandom(33))
    key_enc = crypto.dump_privatekey(crypto.FILETYPE_PEM,
                                     private_key,
                                     cipher='DES-EDE3-CBC',
                                     passphrase=pwd.encode())
    key_file.write(key_enc)
    key_file.flush()

    trust_file = NamedTemporaryFile(suffix='.crt', delete=False)
    trust_file.write(os.environ['KAFKA_TRUSTED_CERT'].encode('utf-8'))
    trust_file.flush()

    kafka_brokers = get_kafka_brokers()

    config = {
        'bootstrap.servers': kafka_brokers,
        'security.protocol': 'ssl',
        'ssl.ca.location': trust_file.name,
        'ssl.certificate.location': cert_file.name,
        'ssl.key.location': key_file.name,
        'ssl.key.password': pwd
    }

    producer_config = {**config, **extra_config}
    producer = Producer(producer_config)

    # Files will automatically be deleted when closed
    cert_file.close()
    key_file.close()
    trust_file.close()

    return producer
Example #38
0
class ZoneFile(object):
    def __init__(self, lines=None, no_header=False):
        self._file = NamedTemporaryFile(delete=False)
        if not no_header:
            self._file.write(bytes(DUMMY_ZONE_HEADER, encoding='utf-8'))
        if lines is not None:
            self.writelines(lines)
        self._file.flush()

    def __del__(self):
        self._file.close()

    @property
    def name(self):
        return self._file.name

    def write(self, str):
        self._file.write(bytes(str))
        self._file.flush()

    def writelines(self, lines):
        self._file.writelines(
            bytes("%s\n" % line, encoding='utf-8') for line in lines)
        self._file.flush()
Example #39
0
    def execute(self, context):
        vertica = VerticaHook(vertica_conn_id=self.vertica_conn_id)
        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)

        tmpfile = None
        result = None

        selected_columns = []

        count = 0
        with closing(vertica.get_conn()) as conn:
            with closing(conn.cursor()) as cursor:
                cursor.execute(self.sql)
                selected_columns = [d.name for d in cursor.description]

                if self.bulk_load:
                    tmpfile = NamedTemporaryFile("w")

                    self.log.info(
                        "Selecting rows from Vertica to local file %s...",
                        tmpfile.name)
                    self.log.info(self.sql)

                    csv_writer = csv.writer(tmpfile,
                                            delimiter='\t',
                                            encoding='utf-8')
                    for row in cursor.iterate():
                        csv_writer.writerow(row)
                        count += 1

                    tmpfile.flush()
                else:
                    self.log.info("Selecting rows from Vertica...")
                    self.log.info(self.sql)

                    result = cursor.fetchall()
                    count = len(result)

                self.log.info("Selected rows from Vertica %s", count)

        if self.mysql_preoperator:
            self.log.info("Running MySQL preoperator...")
            mysql.run(self.mysql_preoperator)

        try:
            if self.bulk_load:
                self.log.info("Bulk inserting rows into MySQL...")
                with closing(mysql.get_conn()) as conn:
                    with closing(conn.cursor()) as cursor:
                        cursor.execute(
                            "LOAD DATA LOCAL INFILE '%s' INTO "
                            "TABLE %s LINES TERMINATED BY '\r\n' (%s)" %
                            (tmpfile.name, self.mysql_table,
                             ", ".join(selected_columns)))
                        conn.commit()
                tmpfile.close()
            else:
                self.log.info("Inserting rows into MySQL...")
                mysql.insert_rows(table=self.mysql_table,
                                  rows=result,
                                  target_fields=selected_columns)
            self.log.info("Inserted rows into MySQL %s", count)
        except (MySQLdb.Error, MySQLdb.Warning):
            self.log.info("Inserted rows into MySQL 0")
            raise

        if self.mysql_postoperator:
            self.log.info("Running MySQL postoperator...")
            mysql.run(self.mysql_postoperator)

        self.log.info("Done")
def dump(objdump, path):
    n = NamedTemporaryFile(delete=False)
    o = check_output([objdump, '-d', '-x', '-s', path])
    n.write(o)
    n.flush()
    return n.name
Example #41
0
def convert(
    boundary=None,
    input_file=None,
    output_file=None,
    src_srs=4326,
    driver=None,
    layers=None,
    layer_name=None,
    task_uid=None,
    projection: int = 4326,
    creation_options: list = None,
    dataset_creation_options: list = None,
    layer_creation_options: list = None,
    is_raster: bool = True,
    warp_params: dict = None,
    translate_params: dict = None,
    use_translate: bool = False,
    access_mode: str = "overwrite",
    config_options: List[Tuple[str]] = None,
    distinct_field=None,
):
    """
    Uses gdal to convert and clip a supported dataset file to a mask if boundary is passed in.
    :param use_translate: A flag to force the use of translate instead of warp.
    :param layer_creation_options: Data options specific to vector conversion.
    :param dataset_creation_options: Data options specific to vector conversion.
    :param translate_params: A dict of params to pass into gdal translate.
    :param warp_params: A dict of params to pass into gdal warp.
    :param is_raster: A explicit declaration that dataset is raster (for disambiguating mixed mode files...gpkg)
    :param boundary: A geojson file or bbox (xmin, ymin, xmax, ymax) to serve as a cutline
    :param input_file: A raster or vector file to be clipped
    :param output_file: The dataset to put the clipped output in (if not specified will use in_dataset)
    :param driver: Short name of output driver to use (defaults to input format)
    :param layer_name: Table name in database for in_dataset
    :param layers: A list of layers to include for translation.
    :param task_uid: A task uid to update
    :param projection: A projection as an int referencing an EPSG code (e.g. 4326 = EPSG:4326)
    :param creation_options: Additional options to pass to the convert method (e.g. "-co SOMETHING")
    :param config_options: A list of gdal configuration options as a tuple (option, value).
    :return: Filename of clipped dataset
    """

    if isinstance(input_file, str) and not use_translate:
        input_file = [input_file]

    meta_list = []
    for _index, _file in enumerate(input_file):
        input_file[_index], output_file = get_dataset_names(_file, output_file)
        meta_list.append(get_meta(input_file[_index], is_raster))

    src_src = f"EPSG:{src_srs}"
    dst_src = f"EPSG:{projection}"
    # Currently, when there are more than 1 files, they much each be the same driver, making the meta the same.
    meta = meta_list[0]
    if not driver:
        driver = meta["driver"] or "gpkg"

    # Geopackage raster only supports byte band type, so check for that
    band_type = None
    dstalpha = None
    if driver.lower() == "gpkg":
        band_type = gdal.GDT_Byte
    if meta.get("nodata") is None and meta.get("is_raster"):
        dstalpha = True

    # Clip the dataset if a boundary is passed in.
    temp_boundfile = None
    geojson = None
    bbox = None
    if boundary:
        # Strings are expected to be a file.
        if isinstance(boundary, str):
            if not os.path.isfile(boundary):
                raise Exception(f"Called convert using a boundary of {boundary} but no such path exists.")
        elif is_valid_bbox(boundary):
            geojson = bbox2polygon(boundary)
            bbox = boundary
        elif isinstance(boundary, dict):
            geojson = boundary
        if geojson:
            temp_boundfile = NamedTemporaryFile(suffix=".json")
            temp_boundfile.write(json.dumps(geojson).encode())
            temp_boundfile.flush()
            boundary = temp_boundfile.name

    if meta["is_raster"]:
        cmd = get_task_command(
            convert_raster,
            input_file,
            output_file,
            driver=driver,
            creation_options=creation_options,
            band_type=band_type,
            dst_alpha=dstalpha,
            boundary=boundary,
            src_srs=src_src,
            dst_srs=dst_src,
            task_uid=task_uid,
            warp_params=warp_params,
            translate_params=translate_params,
            use_translate=use_translate,
            config_options=config_options,
        )
    else:
        cmd = get_task_command(
            convert_vector,
            input_file,
            output_file,
            driver=driver,
            dataset_creation_options=dataset_creation_options,
            layer_creation_options=layer_creation_options,
            src_srs=src_src,
            dst_srs=dst_src,
            layers=layers,
            layer_name=layer_name,
            task_uid=task_uid,
            boundary=boundary,
            bbox=bbox,
            access_mode=access_mode,
            config_options=config_options,
            distinct_field=distinct_field,
        )
    try:
        task_process = TaskProcess(task_uid=task_uid)
        task_process.start_process(cmd)
    except CancelException:
        # If we don't allow cancel exception to propagate then the task won't exit properly.
        # TODO: Allow retry state to be more informed.
        raise
    except Exception as e:
        logger.error(e)
        raise Exception("File conversion failed. Please try again or contact support.")

    finally:
        if temp_boundfile:
            temp_boundfile.close()

    if requires_zip(driver):
        logger.debug(f"Requires zip: {output_file}")
        output_file = create_zip_file(output_file, get_zip_name(output_file))

    return output_file
Example #42
0
class SshLocationContainer(object):
    """Run a Docker container to serve as an SSH location."""

    NAME = 'backuppy_test'
    PORT = 22
    USERNAME = '******'
    PASSWORD = '******'
    IDENTITY = os.path.join(RESOURCE_PATH, 'id_rsa')
    PATH = '/backuppy/'

    def __init__(self, mount_point=None):
        """Initialize a new instance."""
        self._started = False
        self._ip = None
        self._fingerprint = None
        self._known_hosts = None
        self._mount_point = mount_point

    def _ensure_started(self):
        """Ensure the container has been started."""
        if not self._started:
            raise RuntimeError('This container has not been started yet.')

    def start(self):
        """Start the container."""
        docker_args = []
        if self._mount_point is not None:
            docker_args += ['-v', '%s:%s' % (self._mount_point, self.PATH)]
        self.stop()
        subprocess.check_call(['docker', 'run', '-d', '--name', self.NAME] +
                              docker_args + ['backuppy_ssh_location'])
        self._started = True
        self. await ()
        subprocess.check_call([
            'sshpass', '-p', self.PASSWORD, 'scp', '-o',
            'UserKnownHostsFile=%s' % self.known_hosts().name,
            '%s.pub' % self.IDENTITY,
            '%s@%s:~/.ssh/authorized_keys' % (self.USERNAME, self.ip)
        ])

    def stop(self):
        """Stop the container."""
        if not self._started:
            return
        self._started = False
        subprocess.check_call(['docker', 'stop', self.NAME])
        subprocess.check_call(['docker', 'container', 'rm', self.NAME])
        self._known_hosts.close()

    @property
    def ip(self):
        """Get the container's IP address.

        :return: str
        """
        self._ensure_started()

        if not self._ip:
            self._ip = str(
                subprocess.check_output([
                    'docker', 'inspect', '-f',
                    '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
                    self.NAME
                ]).strip().decode('utf-8'))

        return self._ip

    @property
    def fingerprint(self):
        """Get the container's SSH host key fingerprint.

        :return: str
        """
        self._ensure_started()

        if not self._fingerprint:
            self._fingerprint = str(
                subprocess.check_output(['ssh-keyscan', '-t', 'rsa',
                                         self.ip]).decode('utf-8'))

        return self._fingerprint

    def known_hosts(self):
        """Get an SSH known_hosts file containing just this container.

        :return: File
        """
        if self._known_hosts:
            return self._known_hosts

        self._known_hosts = NamedTemporaryFile(mode='r+')
        self._known_hosts.write(self.fingerprint)
        self._known_hosts.flush()
        return self._known_hosts

    def await (self):
        """Wait until the container is ready."""
        subprocess.check_call(
            ['./vendor/bin/wait-for-it',
             '%s:%d' % (self.ip, self.PORT)])

    def source(self, configuration):
        """Get the back-up source to this container.

        :return: backuppy.location.Source
        """
        return SshSource(configuration.notifier,
                         self.USERNAME,
                         self.ip,
                         self.PATH,
                         identity=self.IDENTITY,
                         host_keys=self.known_hosts().name)

    def target(self, configuration):
        """Get the back-up target to this container.

        :return: backuppy.location.Target
        """
        return SshTarget(configuration.notifier,
                         self.USERNAME,
                         self.ip,
                         self.PATH,
                         identity=self.IDENTITY,
                         host_keys=self.known_hosts().name)
Example #43
0
 def make_file(lines):
     f = NamedTemporaryFile()
     f.writelines(lines)
     f.flush()
     return f
Example #44
0
    def run(self, set_atoms=False):
        # !TODO: split this function
        """Method which explicitly runs LAMMPS."""
        pbc = self.atoms.get_pbc()
        if all(pbc):
            cell = self.atoms.get_cell()
        elif not any(pbc):
            # large enough cell for non-periodic calculation -
            # LAMMPS shrink-wraps automatically via input command
            #       "periodic s s s"
            # below
            cell = 2 * np.max(np.abs(self.atoms.get_positions())) * np.eye(3)
        else:
            warnings.warn(
                "semi-periodic ASE cell detected - translation "
                + "to proper LAMMPS input cell might fail"
            )
            cell = self.atoms.get_cell()
        self.prism = Prism(cell)

        self.set_missing_parameters()
        self.calls += 1

        # change into subdirectory for LAMMPS calculations
        cwd = os.getcwd()
        os.chdir(self.parameters.tmp_dir)

        # setup file names for LAMMPS calculation
        label = "{0}{1:>06}".format(self.label, self.calls)
        lammps_in = uns_mktemp(
            prefix="in_" + label, dir=self.parameters.tmp_dir
        )
        lammps_log = uns_mktemp(
            prefix="log_" + label, dir=self.parameters.tmp_dir
        )
        lammps_trj_fd = NamedTemporaryFile(
            prefix="trj_" + label,
            suffix=(".bin" if self.parameters.binary_dump else ""),
            dir=self.parameters.tmp_dir,
            delete=(not self.parameters.keep_tmp_files),
        )
        lammps_trj = lammps_trj_fd.name
        if self.parameters.no_data_file:
            lammps_data = None
        else:
            lammps_data_fd = NamedTemporaryFile(
                prefix="data_" + label,
                dir=self.parameters.tmp_dir,
                delete=(not self.parameters.keep_tmp_files),
                mode='w',
                encoding='ascii'
            )
            write_lammps_data(
                lammps_data_fd,
                self.atoms,
                specorder=self.parameters.specorder,
                force_skew=self.parameters.always_triclinic,
                velocities=self.parameters.write_velocities,
                prismobj=self.prism,
                units=self.parameters.units,
                atom_style=self.parameters.atom_style
            )
            lammps_data = lammps_data_fd.name
            lammps_data_fd.flush()

        # see to it that LAMMPS is started
        if not self._lmp_alive():
            command = self.get_lammps_command()
            # Attempt to (re)start lammps
            self._lmp_handle = Popen(
                shlex.split(command, posix=(os.name == "posix")),
                stdin=PIPE,
                stdout=PIPE,
            )
        lmp_handle = self._lmp_handle

        # Create thread reading lammps stdout (for reference, if requested,
        # also create lammps_log, although it is never used)
        if self.parameters.keep_tmp_files:
            lammps_log_fd = open(lammps_log, "wb")
            fd = SpecialTee(lmp_handle.stdout, lammps_log_fd)
        else:
            fd = lmp_handle.stdout
        thr_read_log = Thread(target=self.read_lammps_log, args=(fd,))
        thr_read_log.start()

        # write LAMMPS input (for reference, also create the file lammps_in,
        # although it is never used)
        if self.parameters.keep_tmp_files:
            lammps_in_fd = open(lammps_in, "wb")
            fd = SpecialTee(lmp_handle.stdin, lammps_in_fd)
        else:
            fd = lmp_handle.stdin
        write_lammps_in(
            lammps_in=fd,
            parameters=self.parameters,
            atoms=self.atoms,
            prismobj=self.prism,
            lammps_trj=lammps_trj,
            lammps_data=lammps_data,
        )

        if self.parameters.keep_tmp_files:
            lammps_in_fd.close()

        # Wait for log output to be read (i.e., for LAMMPS to finish)
        # and close the log file if there is one
        thr_read_log.join()
        if self.parameters.keep_tmp_files:
            lammps_log_fd.close()

        if not self.parameters.keep_alive:
            self._lmp_end()

        exitcode = lmp_handle.poll()
        if exitcode and exitcode != 0:
            cwd = os.getcwd()
            raise RuntimeError(
                "LAMMPS exited in {} with exit code: {}."
                "".format(cwd, exitcode)
            )

        # A few sanity checks
        if len(self.thermo_content) == 0:
            raise RuntimeError("Failed to retrieve any thermo_style-output")
        if int(self.thermo_content[-1]["atoms"]) != len(self.atoms):
            # This obviously shouldn't happen, but if prism.fold_...() fails,
            # it could
            raise RuntimeError("Atoms have gone missing")

        trj_atoms = read_lammps_dump(
            infileobj=lammps_trj,
            order=False,
            index=-1,
            prismobj=self.prism,
            specorder=self.parameters.specorder,
        )

        if set_atoms:
            self.atoms = trj_atoms.copy()

        self.forces = trj_atoms.get_forces()
        # !TODO: trj_atoms is only the last snapshot of the system; Is it
        #        desirable to save also the inbetween steps?
        if self.parameters.trajectory_out is not None:
            # !TODO: is it advisable to create here temporary atoms-objects
            self.trajectory_out.write(trj_atoms)

        tc = self.thermo_content[-1]
        self.results["energy"] = convert(
            tc["pe"], "energy", self.parameters["units"], "ASE"
        )
        self.results["free_energy"] = self.results["energy"]
        self.results["forces"] = self.forces.copy()
        stress = np.array(
            [-tc[i] for i in ("pxx", "pyy", "pzz", "pyz", "pxz", "pxy")]
        )

        # We need to apply the Lammps rotation stuff to the stress:
        xx, yy, zz, yz, xz, xy = stress
        stress_tensor = np.array([[xx, xy, xz],
                                  [xy, yy, yz],
                                  [xz, yz, zz]])
        R = self.prism.rot_mat
        stress_atoms = np.dot(R, stress_tensor)
        stress_atoms = np.dot(stress_atoms, R.T)
        stress_atoms = stress_atoms[[0, 1, 2, 1, 0, 0],
                                    [0, 1, 2, 2, 2, 1]]
        stress = stress_atoms

        self.results["stress"] = convert(
            stress, "pressure", self.parameters["units"], "ASE"
        )

        lammps_trj_fd.close()
        if not self.parameters.no_data_file:
            lammps_data_fd.close()

        os.chdir(cwd)
Example #45
0
    def test_loglevel(self):
        """Test the loglevel default setting"""
        def _loglevel(lvl, msg):
            lvl_int = topt.log.getEffectiveLevel()
            if is_py3():
                lvl_name = logging.getLevelName(lvl_int)
            else:
                lvl_name = [
                    k for k, v in logging._levelNames.items() if v == lvl_int
                ][0]
            self.assertEqual(lvl_int,
                             fancylogger.getLevelInt(lvl),
                             msg="%s (expected %s got %s)" %
                             (msg, lvl, lvl_name))

        topt = TestOption1(
            go_args=['--ext-optional=REALVALUE'],
            go_nosystemexit=True,
        )
        _loglevel(topt.DEFAULT_LOGLEVEL.upper(), 'Test default loglevel')

        topt = TestOption1(
            go_args=['--debug'],
            go_nosystemexit=True,
        )
        _loglevel('DEBUG', '--debug gives DEBUG')

        topt = TestOption1(
            go_args=['--info'],
            go_nosystemexit=True,
        )
        _loglevel('INFO', '--info gives INFO')

        topt = TestOption1(
            go_args=['--quiet'],
            go_nosystemexit=True,
        )
        _loglevel('WARNING', '--quiet gives WARNING')

        # last one wins
        topt = TestOption1(
            go_args=['--debug', '--info', '--quiet'],
            go_nosystemexit=True,
        )
        _loglevel('WARNING', 'last wins: --debug --info --quiet gives WARNING')

        CONFIGFILE1 = b"""
[base]
debug=1
"""
        tmp1 = NamedTemporaryFile()
        tmp1.write(CONFIGFILE1)
        tmp1.flush()  # flush, otherwise empty
        envvar = 'logactionoptiontest'.upper()
        topt = TestOption1(go_configfiles=[tmp1.name],
                           go_args=[],
                           go_nosystemexit=True,
                           envvar_prefix=envvar)
        _loglevel('DEBUG', 'DEBUG set via configfile')

        # set via environment; environment wins over cfg file
        os.environ['%s_INFO' % envvar] = '1'
        topt = TestOption1(go_configfiles=[tmp1.name],
                           go_args=[],
                           go_nosystemexit=True,
                           envvar_prefix=envvar)
        _loglevel('INFO',
                  'env wins: debug in configfile and _INFO in env gives INFO')

        # commandline always wins
        topt = TestOption1(go_configfiles=[tmp1.name],
                           go_args=['--quiet'],
                           go_nosystemexit=True,
                           envvar_prefix=envvar)
        _loglevel(
            'WARNING',
            'commandline wins: debug in configfile, _INFO in env and --quiet gives WARNING'
        )

        # remove tmp1
        del os.environ['%s_INFO' % envvar]
        tmp1.close()
Example #46
0
    def test_configfiles(self):
        """Test configfiles (base section for empty prefix from auto_section_name)"""
        CONFIGFILE1 = b"""
[base]
store=ok
longbase=1
store-with-dash=XX

[level]
prefix-and-dash=YY

[ext]
extend=one,two,three
strtuple=a,b
strlist=x,y

[remainder]
opt1=value1

"""
        tmp1 = NamedTemporaryFile()
        tmp1.write(CONFIGFILE1)
        tmp1.flush()  # flush, otherwise empty

        topt = TestOption1(go_configfiles=[tmp1.name], go_args=[])

        # nothing passed by commandline
        self.assertEqual(topt.options.configfiles, _init_configfiles)
        self.assertEqual(topt.configfiles, [tmp1.name] + _init_configfiles)

        self.assertEqual(topt.options.store, 'ok')
        self.assertEqual(topt.options.longbase, True)
        self.assertEqual(topt.options.justatest, True)
        self.assertEqual(topt.options.store_with_dash, 'XX')
        self.assertEqual(topt.options.level_prefix_and_dash, 'YY')
        self.assertEqual(topt.options.ext_extend, ['one', 'two', 'three'])
        self.assertEqual(topt.options.ext_strtuple, ('a', 'b'))
        self.assertEqual(topt.options.ext_strlist, ['x', 'y'])

        self.assertTrue('remainder' in topt.configfile_remainder)
        self.assertFalse('base' in topt.configfile_remainder)
        self.assertEqual(topt.configfile_remainder['remainder'],
                         {'opt1': 'value1'})

        topt1b = TestOption1(go_configfiles=[tmp1.name],
                             go_args=['--store=notok'])

        self.assertEqual(topt1b.options.store, 'notok')

        self.assertEqual(topt1b.options.configfiles, _init_configfiles)
        self.assertEqual(topt1b.configfiles, [tmp1.name] + _init_configfiles)

        CONFIGFILE2 = b"""
[base]
store=notok2
longbase=0
justatest=0
debug=1

"""
        tmp2 = NamedTemporaryFile()
        tmp2.write(CONFIGFILE2)
        tmp2.flush()  # flush, otherwise empty

        # multiple config files, last one wins
        # cmdline wins always
        topt2 = TestOption1(go_configfiles=[tmp1.name, tmp2.name],
                            go_args=['--store=notok3'])

        self.assertEqual(topt2.options.configfiles, _init_configfiles)
        self.assertEqual(topt2.configfiles,
                         [tmp1.name, tmp2.name] + _init_configfiles)

        self.assertEqual(topt2.options.store, 'notok3')
        self.assertEqual(topt2.options.justatest, False)
        self.assertEqual(topt2.options.longbase, False)
        self.assertEqual(topt2.options.debug, True)

        # add test for _action_taken
        for dest in ['ext_strlist', 'longbase', 'store']:
            self.assertTrue(topt2.options._action_taken.get(dest, None))

        for dest in ['level_longlevel']:
            self.assertFalse(dest in topt2.options._action_taken)

        # This works because we manipulate DEFAULT and use all uppercase name
        CONFIGFILE3 = b"""
[base]
store=%(FROMINIT)s
"""
        tmp3 = NamedTemporaryFile()
        tmp3.write(CONFIGFILE3)
        tmp3.flush()  # flush, otherwise empty

        initenv = {'DEFAULT': {'FROMINIT': 'woohoo'}}
        topt3 = TestOption1(go_configfiles=[tmp3.name, tmp2.name],
                            go_args=['--ignoreconfigfiles=%s' % tmp2.name],
                            go_configfiles_initenv=initenv)

        self.assertEqual(topt3.options.configfiles, _init_configfiles)
        self.assertEqual(topt3.configfiles,
                         [tmp3.name, tmp2.name] + _init_configfiles)
        self.assertEqual(topt3.options.ignoreconfigfiles, [tmp2.name])

        self.assertEqual(topt3.options.store, 'woohoo')

        # remove files
        tmp1.close()
        tmp2.close()
        tmp3.close()
Example #47
0
 def _make_pyplate(self, contents):
     contents = dedent(contents)
     pyplate = NamedTemporaryFile()
     pyplate.write(contents)
     pyplate.flush()
     return pyplate
Example #48
0
class _FileLogger:
    """File logging class wrapper.

    Class wrapping is needed manly for safety of log file removal
    after Blender is shut down.

    Registering fuction for atexit module makes sure than,
    file is deleted if Blender is closed normally.

    However file is not deleted if process is killed in Linux.
    On Windows, on the other hand, file gets deleted even if Blender
    is closed from Task Manager -> End Task/Process
    """
    __log_file = None

    def __init__(self):

        self.__log_file = NamedTemporaryFile(mode="w+",
                                             suffix=".log.txt",
                                             delete=True)

        # instead of destructor we are using delete method,
        # to close and consequentially delete log file
        atexit.register(self.delete)

    def delete(self):
        """Closes file and consiquentally deletes it as log file was created in that fashion.
        """

        # close file only if it's still exists in class variable
        if self.__log_file is not None:
            self.__log_file.close()
            self.__log_file = None

    def write(self, msg_object):
        """Writes message to the log file.

        :param msg_object: message to be written to file
        :type msg_object: object
        """

        self.__log_file.write(msg_object)

    def flush(self):
        """Flushes written content to file on disk."""

        self.__log_file.flush()

    def get_log(self):
        """Gets current content of temporary SCS BT log file,
        which was created at startup and is having log of BT session.

        :return: current content of log file as string
        :rtype: str
        """

        # firstly move to start of the file
        self.__log_file.seek(0)

        log = ""
        for line in self.__log_file.readlines():
            log += line.replace(
                "\t   ", "\t\t   "
            )  # replace for Blender text editor to be aligned the same as in console

        return log
    def configure_and_load(self, config_element, tool_data_path, from_shed_config=False, url_timeout=10):
        """
        Configure and load table from an XML element.
        """
        self.separator = config_element.get('separator', '\t')
        self.comment_char = config_element.get('comment_char', '#')
        # Configure columns
        self.parse_column_spec(config_element)

        # store repo info if available:
        repo_elem = config_element.find('tool_shed_repository')
        if repo_elem is not None:
            repo_info = dict(tool_shed=repo_elem.find('tool_shed').text, name=repo_elem.find('repository_name').text,
                             owner=repo_elem.find('repository_owner').text, installed_changeset_revision=repo_elem.find('installed_changeset_revision').text)
        else:
            repo_info = None
        # Read every file
        for file_element in config_element.findall('file'):
            tmp_file = None
            filename = file_element.get('path', None)
            if filename is None:
                # Handle URLs as files
                filename = file_element.get('url', None)
                if filename:
                    tmp_file = NamedTemporaryFile(prefix='TTDT_URL_%s-' % self.name)
                    try:
                        tmp_file.write(requests.get(filename, timeout=url_timeout).text)
                    except Exception as e:
                        log.error('Error loading Data Table URL "%s": %s', filename, e)
                        continue
                    log.debug('Loading Data Table URL "%s" as filename "%s".', filename, tmp_file.name)
                    filename = tmp_file.name
                    tmp_file.flush()
            filename = file_path = expand_here_template(filename, here=self.here)
            found = False
            if file_path is None:
                log.debug("Encountered a file element (%s) that does not contain a path value when loading tool data table '%s'.", util.xml_to_string(file_element), self.name)
                continue

            # FIXME: splitting on and merging paths from a configuration file when loading is wonky
            # Data should exist on disk in the state needed, i.e. the xml configuration should
            # point directly to the desired file to load. Munging of the tool_data_tables_conf.xml.sample
            # can be done during installing / testing / metadata resetting with the creation of a proper
            # tool_data_tables_conf.xml file, containing correct <file path=> attributes. Allowing a
            # path.join with a different root should be allowed, but splitting should not be necessary.
            if tool_data_path and from_shed_config:
                # Must identify with from_shed_config as well, because the
                # regular galaxy app has and uses tool_data_path.
                # We're loading a tool in the tool shed, so we cannot use the Galaxy tool-data
                # directory which is hard-coded into the tool_data_table_conf.xml entries.
                filename = os.path.split(file_path)[1]
                filename = os.path.join(tool_data_path, filename)
            if self.tool_data_path_files.exists(filename):
                found = True
            elif self.tool_data_path_files.exists("%s.sample" % filename) and not from_shed_config:
                log.info("Could not find tool data %s, reading sample" % filename)
                filename = "%s.sample" % filename
                found = True
            else:
                # Since the path attribute can include a hard-coded path to a specific directory
                # (e.g., <file path="tool-data/cg_crr_files.loc" />) which may not be the same value
                # as self.tool_data_path, we'll parse the path to get the filename and see if it is
                # in self.tool_data_path.
                file_path, file_name = os.path.split(filename)
                if file_path and file_path != self.tool_data_path:
                    corrected_filename = os.path.join(self.tool_data_path, file_name)
                    if self.tool_data_path_files.exists(corrected_filename):
                        filename = corrected_filename
                        found = True

            errors = []
            if found:
                self.extend_data_with(filename, errors=errors)
                self._update_version()
            else:
                self.missing_index_file = filename
                log.warning("Cannot find index file '%s' for tool data table '%s'" % (filename, self.name))

            if filename not in self.filenames or not self.filenames[filename]['found']:
                self.filenames[filename] = dict(found=found, filename=filename, from_shed_config=from_shed_config, tool_data_path=tool_data_path,
                                                config_element=config_element, tool_shed_repository=repo_info, errors=errors)
            else:
                log.debug("Filename '%s' already exists in filenames (%s), not adding", filename, list(self.filenames.keys()))
            # Remove URL tmp file
            if tmp_file is not None:
                tmp_file.close()
Example #50
0
class RNAfoldApp(LocalApp):
    """
    Compute the minimum free energy secondary structure of a ribonucleic
    acid sequence using *ViennaRNA's* *RNAfold* software.

    Internally this creates a :class:`Popen` instance, which handles
    the execution.

    Parameters
    ----------
    sequence : NucleotideSequence
        The RNA sequence.
    temperature : int, optional
        The temperature (°C) to be assumed for the energy parameters.
    bin_path : str, optional
        Path of the *RNAfold* binary.

    Examples
    --------

    >>> sequence = NucleotideSequence("CGACGTAGATGCTAGCTGACTCGATGC")
    >>> app = RNAfoldApp(sequence)
    >>> app.start()
    >>> app.join()
    >>> print(app.get_mfe())
    -1.3
    >>> print(app.get_dot_bracket())
    (((.((((.......)).)))))....
    """
    def __init__(self, sequence, temperature=37, bin_path="RNAfold"):
        super().__init__(bin_path)
        self._sequence = sequence
        self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False)
        self._temperature = str(temperature)

    def run(self):
        in_file = FastaFile()
        set_sequence(in_file, self._sequence)
        in_file.write(self._in_file)
        self._in_file.flush()
        self.set_arguments(
            [self._in_file.name, "--noPS", "-T", self._temperature])
        super().run()

    def evaluate(self):
        super().evaluate()
        lines = self.get_stdout().split("\n")
        content = lines[2]
        dotbracket, mfe = content.split(" ", maxsplit=1)
        mfe = float(mfe[1:-1])

        self._mfe = mfe
        self._dotbracket = dotbracket

    def clean_up(self):
        super().clean_up()
        cleanup_tempfile(self._in_file)

    @requires_state(AppState.CREATED)
    def set_temperature(self, temperature):
        """
        Adjust the energy parameters according to a temperature in
        degrees Celsius.

        Parameters
        ----------
        temperature : int
            The temperature.
        """
        self._temperature = str(temperature)

    @requires_state(AppState.JOINED)
    def get_mfe(self):
        """
        Get the minimum free energy of the input sequence.

        Returns
        -------
        mfe : float
            The minimum free energy.

        Examples
        --------

        >>> sequence = NucleotideSequence("CGACGTAGATGCTAGCTGACTCGATGC")
        >>> app = RNAfoldApp(sequence)
        >>> app.start()
        >>> app.join()
        >>> print(app.get_mfe())
        -1.3
        """
        return self._mfe

    @requires_state(AppState.JOINED)
    def get_dot_bracket(self):
        """
        Get the minimum free energy secondary structure of the input
        sequence in dot bracket notation.

        Returns
        -------
        dotbracket : str
            The secondary structure in dot bracket notation.

        Examples
        --------

        >>> sequence = NucleotideSequence("CGACGTAGATGCTAGCTGACTCGATGC")
        >>> app = RNAfoldApp(sequence)
        >>> app.start()
        >>> app.join()
        >>> print(app.get_dot_bracket())
        (((.((((.......)).)))))....
        """
        return self._dotbracket

    @requires_state(AppState.JOINED)
    def get_base_pairs(self):
        """
        Get the base pairs from the minimum free energy secondary
        structure of the input sequence.

        Returns
        -------
        base_pairs : ndarray, shape=(n,2)
            Each row corresponds to the positions of the bases in the
            sequence.

        Examples
        --------

        >>> sequence = NucleotideSequence("CGACGTAGATGCTAGCTGACTCGATGC")
        >>> app = RNAfoldApp(sequence)
        >>> app.start()
        >>> app.join()
        >>> print(app.get_base_pairs())
            [[ 0 22]
             [ 1 21]
             [ 2 20]
             [ 4 19]
             [ 5 18]
             [ 6 16]
             [ 7 15]]

        For reference, the corresponding dot bracket notation can be
        displayed as below.

        >>> print(app.get_dot_bracket())
        (((.((((.......)).)))))....
        """
        return base_pairs_from_dot_bracket(self._dotbracket)

    @staticmethod
    def compute_secondary_structure(sequence, bin_path="RNAfold"):
        """
        Compute the minimum free energy secondary structure of a 
        ribonucleic acid sequence using *ViennaRNA's* *RNAfold* software.

        This is a convenience function, that wraps the
        :class:`RNAfoldApp` execution.

        Parameters
        ----------
        sequence : NucleotideSequence
            The RNA sequence.
        bin_path : str, optional
            Path of the *RNAfold* binary.

        Returns
        -------
        dotbracket : str
            The secondary structure in dot bracket notation.
        mfe : float
            The minimum free energy.
        """
        app = RNAfoldApp(sequence, bin_path=bin_path)
        app.start()
        app.join()
        return app.get_dot_bracket(), app.get_mfe()
Example #51
0
    def _iternocache(self, source, key, reverse):
        debug('iterate without cache')
        self._clearcache()
        it = iter(source)

        flds = it.next()
        yield tuple(flds)

        if key is not None:
            # convert field selection into field indices
            indices = asindices(flds, key)
        else:
            indices = range(len(flds))
        # now use field indices to construct a _getkey function
        # N.B., this will probably raise an exception on short rows
        getkey = sortable_itemgetter(*indices)

        # initialise the first chunk
        rows = list(itertools.islice(it, 0, self.buffersize))
        rows.sort(key=getkey, reverse=reverse)

        # have we exhausted the source iterator?
        if self.buffersize is None or len(rows) < self.buffersize:

            if self.cache:
                debug('caching mem')
                self._fldcache = flds
                self._memcache = rows
                self._getkey = getkey # actually not needed to iterate from memcache

            for row in rows:
                yield tuple(row)

        else:

            chunkfiles = []

            while rows:

                # dump the chunk
                f = NamedTemporaryFile(dir=self.tempdir)
                for row in rows:
                    pickle.dump(row, f, protocol=-1)
                f.flush()
                # N.B., do not close the file! Closing will delete
                # the file, and we might want to keep it around
                # if it can be cached. We'll let garbage collection
                # deal with this, i.e., when no references to the
                # chunk files exist any more, garbage collection
                # should be an implicit close, which will cause file
                # deletion.
                chunkfiles.append(f)

                # grab the next chunk
                rows = list(itertools.islice(it, 0, self.buffersize))
                rows.sort(key=getkey, reverse=reverse)

            if self.cache:
                debug('caching files %r', chunkfiles)
                self._fldcache = flds
                self._filecache = chunkfiles
                self._getkey = getkey

            chunkiters = [iterchunk(f) for f in chunkfiles]
            for row in _mergesorted(getkey, reverse, *chunkiters):
                yield tuple(row)
Example #52
0
    def run_stats(self):
        """Main function which do the process."""

        # Get the common fields.
        self.admin_layer = self.cbx_aggregation_layer.currentLayer()

        selected_indicators = self.indicators_list()

        if not self.name_field:
            self.name_field = self.le_new_column.placeholderText()

        # Output.
        self.output_file_path = self.le_output_filepath.text()

        try:
            self.button_box_ok.setDisabled(True)
            # noinspection PyArgumentList
            QApplication.setOverrideCursor(Qt.WaitCursor)
            # noinspection PyArgumentList
            QApplication.processEvents()

            if not self.admin_layer:
                raise NoLayerProvidedException

            if not self.admin_layer and self.use_point_layer:
                raise NoLayerProvidedException

            crs_admin_layer = self.admin_layer.crs()

            if not self.use_point_layer and not self.use_area:
                if not self.cbx_list_indicators:
                    raise FieldException(
                        field_1='List Indicators should not empty')

            # Output
            if not self.output_file_path:
                temp_file = NamedTemporaryFile(delete=False,
                                               suffix='-geopublichealth.shp')
                self.output_file_path = temp_file.name
                temp_file.flush()
                temp_file.close()

            admin_layer_provider = self.admin_layer.dataProvider()
            fields = self.admin_layer.fields()

            if admin_layer_provider.fields().indexFromName(
                    self.name_field) != -1:
                raise FieldExistingException(field=self.name_field)

            for indicator_selected in selected_indicators:
                fields.append(
                    QgsField("Z" + indicator_selected[0], QVariant.Double))

            fields.append(QgsField(self.name_field, QVariant.Double))

            file_writer = QgsVectorFileWriter(self.output_file_path, 'utf-8',
                                              fields, QgsWkbTypes.Polygon,
                                              self.admin_layer.crs(),
                                              'ESRI Shapefile')

            count = self.admin_layer.featureCount()
            stats = {}

            for indicator_selected in selected_indicators:
                values = []
                indicator_selected_name = str(indicator_selected[0])

                for i, feature in enumerate(self.admin_layer.getFeatures()):
                    index = self.admin_layer.fields().indexFromName(
                        indicator_selected_name)

                    if feature[index]:
                        value = float(feature[index])
                    else:
                        value = 0.0
                    values.append(value)

                stats[indicator_selected_name] = Stats(values)

            for i, feature in enumerate(self.admin_layer.getFeatures()):
                attributes = feature.attributes()

                composite_index_value = 0.0
                for indicator_selected in selected_indicators:
                    indicator_selected_name = str(indicator_selected[0])
                    index = self.admin_layer.fields().indexFromName(
                        indicator_selected_name)

                    if feature[index]:
                        value = float(feature[index])
                    else:
                        value = 0.0

                    zscore = (value - stats[indicator_selected_name].average(
                    )) / stats[indicator_selected_name].standard_deviation()
                    attributes.append(float(zscore))

                    if indicator_selected[1] == '+':
                        composite_index_value -= zscore
                    else:
                        composite_index_value += zscore

                attributes.append(float(composite_index_value))
                new_feature = QgsFeature()
                new_geom = QgsGeometry(feature.geometry())
                new_feature.setAttributes(attributes)
                new_feature.setGeometry(new_geom)
                file_writer.addFeature(new_feature)

            del file_writer

            self.output_layer = QgsVectorLayer(self.output_file_path,
                                               self.name_field, 'ogr')
            QgsProject.instance().addMapLayer(self.output_layer)

            if self.symbology.isChecked():
                self.add_symbology()

            self.signalStatus.emit(3, tr('Successful process'))

        except GeoPublicHealthException as e:
            display_message_bar(msg=e.msg, level=e.level, duration=e.duration)

        finally:
            self.button_box_ok.setDisabled(False)
            # noinspection PyArgumentList
            QApplication.restoreOverrideCursor()
            # noinspection PyArgumentList
            QApplication.processEvents()
Example #53
0
def download(urls, name, ext, live=False):
    url = urls[0]
    m3u8 = ext == 'm3u8'
    m3u8_crypto = False
    audio = subtitle = None
    # for live video, always use ffmpeg to rebuild timeline.
    if not live and m3u8:
        live = live_m3u8(url)
    internal = not live and m3u8_internal
    if m3u8:
        m3u8_crypto = crypto_m3u8(url)
        # rebuild m3u8 urls when use internal downloader,
        # change the ext to segment's ext, default is "ts",
        # otherwise change the ext to "flv" or "mp4".
        if internal:
            urls, audio, subtitle = load_m3u8(url)
            ext = urlparse(urls[0])[2].split('.')[-1]
            if ext not in ['ts', 'm4s', 'mp4', 'm4a']:
                ext = 'ts'
        elif live:
            ext = 'flv'
        else:
            ext = 'mp4'
    elif ext == 'mpd':
        # very slow
        # and now, it has many problems
        # TODO: implement internal download/merge process
        internal = False
        ext = 'mp4'

    # OK check internal
    if not internal:
        launch_ffmpeg_download(url, name + '.' + ext, allow_all_ext=m3u8_crypto)
    else:
        if save_urls(urls, name, ext, jobs=args.jobs,
                     fail_confirm=not args.no_fail_confirm,
                     fail_retry_eta=args.fail_retry_eta):
            lenth = len(urls)
            if (m3u8 or lenth > 1) and not args.no_merge:
                fix_sa_name(name, ext, lenth)
                if m3u8_crypto:
                    # use ffmpeg to merge internal downloaded m3u8
                    # build the local m3u8, and then the headers cannot be set
                    lm3u8 = NamedTemporaryFile(mode='w+t', suffix='.m3u8',
                                               dir='.', encoding='utf-8')
                    lkeys = []  # temp keys' references
                    m = _load_m3u8(url)
                    for k in m.keys + m.session_keys:
                        if k and k.uri:
                            key = NamedTemporaryFile(mode='w+b', suffix='.key',
                                                     dir='.')
                            key.write(http.get_response(k.absolute_uri).content)
                            key.flush()
                            k.uri = os.path.basename(key.name)
                            lkeys.append(key)
                    for i, seg in enumerate(m.segments):
                        seg.uri = '%s_%d.%s' % (name, i, ext)
                    lm3u8.write(m.dumps())
                    lm3u8.flush()
                    launch_ffmpeg_download(lm3u8.name, name + '.mp4', False, True)
                else:
                    launch_ffmpeg_merge(name, ext, lenth)
                clean_slices(name, ext, lenth)
        else:
            logger.critical('{}> donwload failed'.format(name))
        if audio:
            ext = 'm4a'
            lenth = len(audio)
            if save_urls(audio, name, ext, jobs=args.jobs,
                         fail_confirm=not args.no_fail_confirm,
                         fail_retry_eta=args.fail_retry_eta):
                if (m3u8 or lenth > 1) and not args.no_merge:
                    fix_sa_name(name, ext, lenth)
                    launch_ffmpeg_merge(name, ext, lenth)
                    clean_slices(name, ext, lenth)
            else:
                logger.critical('{}> HLS audio donwload failed'.format(name))
        if subtitle:
            ext = 'srt'
            if not save_urls(subtitle[:1], name, ext, jobs=args.jobs,
                         fail_confirm=not args.no_fail_confirm,
                         fail_retry_eta=args.fail_retry_eta):
                logger.critical('{}> HLS subtitle donwload failed'.format(name))
Example #54
0
    def consensusCalling(self, spot, bam, reference, args):
        """
        Make a consensus of all the reads in the region and identify all of the SVs in the region
        """
        #
        MAXNUMREADS = 100  #I don't think we'll need more than this many reads
        MAXATTEMPTS = MAXNUMREADS / 2  #I don't feel like trying 100 times
        SPANBUFFER = 100  #number of bases I want a read to span

        chrom, start, end = spot.chrom, spot.start, spot.end
        buffer = args.buffer

        supportReads = []
        spanReads = []
        #Fetch reads and trim
        totCnt = 0
        for read in bam.fetch(chrom, max(0, start - buffer - SPANBUFFER),
                              end + buffer + SPANBUFFER):
            if read.qname not in spot.varReads:
                continue
            seq, qual = self.readTrim(read, start - buffer, end + buffer)
            if read.pos < start - SPANBUFFER and read.aend > end + SPANBUFFER:
                spanReads.append((len(seq), seq, qual))
            else:
                supportReads.append((seq, qual))
            totCnt += 1

        if len(spanReads) == 0:
            logging.debug("noone spans - consensus aborted. %s" % (str(spot)))
            spot.tags["noSpan"] = True
            return [spot]

        spanReads.sort(reverse=True)
        if len(spanReads) > MAXNUMREADS:
            origSupportReads = [(x[1], x[2]) for x in spanReads[:MAXNUMREADS]]
        elif len(spanReads) + len(supportReads) > MAXNUMREADS:
            origSupportReads = [(x[1], x[2]) for x in spanReads
                                ] + supportReads[:MAXNUMREADS - len(spanReads)]
        else:
            origSupportReads = [(x[1], x[2]) for x in spanReads] + supportReads
        logging.debug("Alt reads: %d total, %d extra support" %
                      (totCnt, len(origSupportReads)))

        mySpots = []
        refReadId = 0
        haveVar = False

        #Attempt each spanRead until we get one that passes
        #while refReadId < len(spanReads) and not haveVar and refReadId < MAXATTEMPTS:
        #refread = spanReads[refReadId]
        #supportReads = origSupportReads[:refReadId] + origSupportReads[refReadId+1:]
        refReadId += 1

        #read that spans most of the region goes first
        #use the rest for cleaning

        #building consensus sequence
        foutreads = NamedTemporaryFile(suffix=".fasta")
        qoutreads = open(foutreads.name + '.qual', 'w')
        for id, i in enumerate(origSupportReads):
            foutreads.write(">%d\n%s\n" % (id, i[0]))
            qoutreads.write(">%d\n%s\n" %
                            (id, " ".join(str(ord(j) - 33) for j in i[1])))
        foutreads.flush()
        qoutreads.flush()

        #foutref = NamedTemporaryFile(suffix=".fasta")
        #foutref.write(">%s:%d-%d\n%s" % (spot.chrom, start, end, refread[1]))
        #foutref.flush()

        logging.debug("Making the contig....")
        #run it through phrap
        #make out.fasta and out.fasta.qual
        #run phrap
        #if asm -- consensus only
        r, o, e = exe("phrap %s -minmatch 6 -minscore 20" % (foutreads.name),
                      timeout=3)

        if r != 0:  #failed
            logging.warning('phrap failed ' + self.name)
            logging.warning(o)
            logging.warning(e)
            return [
            ]  #here is where I'd like to add just the no-consensus spot

        results = mergeFastaQual(foutreads.name + ".contigs",
                                 foutreads.name + ".contigs.qual")
        if len(results) == 0:
            logging.warning('no asm made ' + self.name)
            return [
            ]  #here is where I'd like to add just the no-consensus spot
        logging.info('%d contigs made %s' % (len(results), self.name))

        #then run it through consensus
        logging.debug("Polishing contigs")

        alignOut = NamedTemporaryFile(suffix=".m5")
        blasr(foutreads.name,
              foutreads.name + ".contigs",
              format="-m 5",
              nproc=1,
              outname=alignOut.name)
        # elif no asm and consensus only (faster)

        if args.polish == "pbbanana":
            aligns = M5File(alignOut.name)
            con = ">con\n%s\n" % consensus(aligns).sequence
            conName = "pbbanana"
        elif args.polish == "pbdagcon":
            logging.debug("pbdagcon is running")
            #using minerrreads - 1 because one f them is already being used as seed!
            r, con, e = exe("pbdagcon -c %d -t 0 %s" %
                            (max(0, args.minErrReads - 1), alignOut.name),
                            timeout=1)
            #r, con, e = exe("pbdagcon %s" % (alignOut.name), timeout=2)
            logging.debug("back from pbdagcon")
            logging.debug((r, e))
            #raw_input("press ent")
            if con is not None:
                con = con[con.index("\n") + 1:]
            else:
                con = ""
            conName = "pbdagcon"
        alignOut.close()
        #foutref.close()
        foutreads.close()
        #we don't have a consensus - retry
        if len(con) == 0:
            logging.debug("Trying another seed read for consensus")
            con = results.values()[0].seq
        logging.debug("%s %d bp seq" % (conName, len(con.split('\n')[1])))

        #try improving consensus
        conOut = NamedTemporaryFile(suffix=".fasta")
        conOut.write(con)
        #conOut.close()
        conOut.flush()

        refOut = NamedTemporaryFile(suffix=".fasta")
        #j = reference.fetch(chrom, max(0, start-buffer), end+buffer)
        #fout = open("f****e.ref.fasta",'w')
        #fout.write(j)
        #fout.close()
        refOut.write(">%s:%d-%d\n%s\n" % (chrom, start, end, \
                    reference.fetch(chrom, max(0, start-buffer), end+buffer)))
        refOut.flush()

        #map consensus to refregion
        varSam = NamedTemporaryFile(suffix=".sam")
        blasr(conOut.name, refOut.name, format="--sam", outname=varSam.name)
        #consensus=False) -- would this help?
        #or what if I fed it through leftalign?

        sam = pysam.Samfile(varSam.name)

        matches = 0.0
        bases = 0.0
        nReads = 0
        mySpots = []
        for read in sam:
            nReads += 1
            spot.tags["consensusCreated"] = True
            for svstart, svsize, svtype, altseq in expandCigar(
                    read, args.minIndelSize, CONFIRMCOLLAPSE, True):
                newspot = copy.deepcopy(spot)

                if spot.svtype == svtype and svtype == "INS":
                    haveVar = True
                    newspot.start = svstart + start - buffer
                    newspot.end = svstart + start - buffer
                    newspot.tags["seq"] = altseq
                    newspot.size = svsize
                    gt, gq = genotype(newspot)
                    newspot.tags["GT"] = gt
                    newspot.tags["GQ"] = gq
                    mySpots.append(newspot)

                elif spot.svtype == svtype and svtype == "DEL":
                    haveVar = True
                    newspot.start = svstart + start - buffer
                    newspot.end = svstart + svsize + start - buffer
                    newspot.size = -svsize
                    gt, gq = genotype(newspot)
                    newspot.tags["GT"] = gt
                    newspot.tags["GQ"] = gq
                    newspot.tags["seq"] = reference.fetch(
                        chrom, newspot.start, newspot.end)
                    mySpots.append(newspot)
        #identity = matches/bases
        #If no var, nothing is returned.
        #for newspot in mySpots:
        #newspot.tags["alnIdentityEstimate"] = identity
        #Keep reporting the actual contigs out until we
        #find a reason to need it (and also we can get quals...)
        #vbam.reset()
        #for id, read in enumerate(vbam):
        #newspot.tags["contigSeq%d" % (id)] = read.seq
        #newspot.tags["contigQual%d" % (id)] = read.qual

        #vbam.close()
        #varBam.close()
        refOut.close()

        logging.debug("%d consensus reads created %d spots" %
                      (nReads, len(mySpots)))

        return mySpots
Example #55
0
def ShellCommandResults(CmdLine, Opt):
    """ Execute the command, returning the output content """
    file_list = NamedTemporaryFile(delete=False)
    filename = file_list.name
    Results = []

    returnValue = 0
    try:
        subprocess.check_call(args=shlex.split(CmdLine), stderr=subprocess.STDOUT, stdout=file_list)
    except subprocess.CalledProcessError as err_val:
        file_list.close()
        if not Opt.silent:
            sys.stderr.write("ERROR : %d : %s\n" % (err_val.returncode, err_val.__str__()))
            if os.path.exists(filename):
                sys.stderr.write("      : Partial results may be in this file: %s\n" % filename)
            sys.stderr.flush()
        returnValue = err_val.returncode

    except IOError as err_val:
        (errno, strerror) = err_val.args
        file_list.close()
        if not Opt.silent:
            sys.stderr.write("I/O ERROR : %s : %s\n" % (str(errno), strerror))
            sys.stderr.write("ERROR : this command failed : %s\n" % CmdLine)
            if os.path.exists(filename):
                sys.stderr.write("      : Partial results may be in this file: %s\n" % filename)
            sys.stderr.flush()
        returnValue = errno

    except OSError as err_val:
        (errno, strerror) = err_val.args
        file_list.close()
        if not Opt.silent:
            sys.stderr.write("OS ERROR : %s : %s\n" % (str(errno), strerror))
            sys.stderr.write("ERROR : this command failed : %s\n" % CmdLine)
            if os.path.exists(filename):
                sys.stderr.write("      : Partial results may be in this file: %s\n" % filename)
            sys.stderr.flush()
        returnValue = errno

    except KeyboardInterrupt:
        file_list.close()
        if not Opt.silent:
            sys.stderr.write("ERROR : Command terminated by user : %s\n" % CmdLine)
            if os.path.exists(filename):
                sys.stderr.write("      : Partial results may be in this file: %s\n" % filename)
            sys.stderr.flush()
        returnValue = 1

    finally:
        if not file_list.closed:
            file_list.flush()
            os.fsync(file_list.fileno())
            file_list.close()

    if os.path.exists(filename):
        fd_ = open(filename, 'r')
        Results = fd_.readlines()
        fd_.close()
        os.unlink(filename)

    if returnValue > 0:
        return returnValue

    return Results
def build_summary(doc, rouge_settings):

    params = build_rouge_params(rouge_settings)
    match_pattern = "X ROUGE-{} Eval".format(rouge_settings["order"])

    ref_file = get_reference_file(doc)

    input_sentences = get_input_sentences(doc, 25)
    n_inputs = len(input_sentences)
    input_ids = [i for i in xrange(n_inputs)]

    candidate_files = [NamedTemporaryFile("w", delete=False) 
                       for i in xrange(n_inputs)]
    config_lines = ["{} {}".format(cf.name, ref_file.name)
                    for cf in candidate_files]

    config_file = NamedTemporaryFile("w", delete=False)
    
    greedy_summary = ""
    greedy_score = 0

    for z in range(n_inputs):

        cfg_text = "\n".join(config_lines)
        config_file.truncate(len(cfg_text))
        config_file.seek(0)
        config_file.write(cfg_text)
        config_file.flush()

        for i in xrange(len(config_lines)):
            input_id = input_ids[i]
            sum = "{}{}\n".format(greedy_summary, input_sentences[input_id])
            cf = candidate_files[i]
            cf.truncate(len(sum))
            cf.seek(0)
            cf.write(sum)
            cf.flush()

        output = subprocess.check_output(params + [config_file.name])
        i = 0
        max_score = greedy_score
        max_id = None
        for line in output.split("\n"):
            if line.startswith(match_pattern):

                score = float(line.split()[4][2:])
                if score > max_score:
                    max_score = score
                    max_id = i
                i += 1
        if max_id is not None:
            greedy_score = max_score
            greedy_summary = "{}{}\n".format(
                greedy_summary, input_sentences[max_id])
            input_ids.pop(max_id)
            config_lines = config_lines[:-1]
        else:
            break
    
    for cf in candidate_files:
        cf.close()
        os.remove(cf.name)
    os.remove(ref_file.name)
    config_file.close()
    os.remove(config_file.name)

    return greedy_summary.strip()
Example #57
0
    def from_file_using_temporary_files(cls,
                                        file,
                                        format=None,
                                        codec=None,
                                        parameters=None,
                                        **kwargs):
        orig_file = file
        file, close_file = _fd_or_path_or_tempfile(file, 'rb', tempfile=False)

        if format:
            format = format.lower()
            format = AUDIO_FILE_EXT_ALIASES.get(format, format)

        def is_format(f):
            f = f.lower()
            if format == f:
                return True
            if isinstance(orig_file, basestring):
                return orig_file.lower().endswith(".{0}".format(f))
            if isinstance(orig_file, bytes):
                return orig_file.lower().endswith(
                    (".{0}".format(f)).encode('utf8'))
            return False

        if is_format("wav"):
            try:
                obj = cls._from_safe_wav(file)
                if close_file:
                    file.close()
                return obj
            except:
                file.seek(0)
        elif is_format("raw") or is_format("pcm"):
            sample_width = kwargs['sample_width']
            frame_rate = kwargs['frame_rate']
            channels = kwargs['channels']
            metadata = {
                'sample_width': sample_width,
                'frame_rate': frame_rate,
                'channels': channels,
                'frame_width': channels * sample_width
            }
            obj = cls(data=file.read(), metadata=metadata)
            if close_file:
                file.close()
            return obj

        input_file = NamedTemporaryFile(mode='wb', delete=False)
        try:
            input_file.write(file.read())
        except (OSError):
            input_file.flush()
            input_file.close()
            input_file = NamedTemporaryFile(mode='wb',
                                            delete=False,
                                            buffering=2**31 - 1)
            if close_file:
                file.close()
            close_file = True
            file = open(orig_file, buffering=2**13 - 1, mode='rb')
            reader = file.read(2**31 - 1)
            while reader:
                input_file.write(reader)
                reader = file.read(2**31 - 1)
        input_file.flush()
        if close_file:
            file.close()

        output = NamedTemporaryFile(mode="rb", delete=False)

        conversion_command = [
            cls.converter,
            '-y',  # always overwrite existing files
        ]

        # If format is not defined
        # ffmpeg/avconv will detect it automatically
        if format:
            conversion_command += ["-f", format]

        if codec:
            # force audio decoder
            conversion_command += ["-acodec", codec]

        conversion_command += [
            "-i",
            input_file.name,  # input_file options (filename last)
            "-vn",  # Drop any video streams if there are any
            "-f",
            "wav",  # output options (filename last)
            output.name
        ]

        if parameters is not None:
            # extend arguments with arbitrary set
            conversion_command.extend(parameters)

        log_conversion(conversion_command)

        with open(os.devnull, 'rb') as devnull:
            p = subprocess.Popen(conversion_command,
                                 stdin=devnull,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
        p_out, p_err = p.communicate()

        log_subprocess_output(p_out)
        log_subprocess_output(p_err)

        try:
            if p.returncode != 0:
                raise CouldntDecodeError(
                    "Decoding failed. ffmpeg returned error code: {0}\n\nOutput from ffmpeg/avlib:\n\n{1}"
                    .format(p.returncode, p_err))
            obj = cls._from_safe_wav(output)
        finally:
            input_file.close()
            output.close()
            os.unlink(input_file.name)
            os.unlink(output.name)

        return obj
    def test_split_mates(self):
        'It tests the detection of oligos in sequence files'

        mate_fhand = NamedTemporaryFile(suffix='.fasta')
        linker = TITANIUM_LINKER

        # a complete linker
        seq5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC'
        seq3 = 'ATCGATCATGTTGTATTGTGTACTATACACACACGTAGGTCGACTATCGTAGCTAGT'

        mate_fhand.write('>seq1\n' + seq5 + linker + seq3 + '\n')
        # no linker
        mate_fhand.write('>seq2\n' + seq5 + '\n')
        # a partial linker
        mate_fhand.write('>seq3\n' + seq5 + linker[2:25] + seq3 + '\n')
        # the linker is 5 prima
        mate_fhand.write('>seq4\n' + linker[10:] + seq3 + '\n')
        # two linkers
        mate_fhand.write('>seq5\n' + linker + seq3 + FLX_LINKER + seq5 + '\n')
        # reverse linker
        rev_linker = get_setting('TITANIUM_LINKER_REV')
        mate_fhand.write('>seq6\n' + seq5 + rev_linker + seq3 + '\n')
        mate_fhand.flush()

        splitter = MatePairSplitter()
        new_seqs = []
        for packet in read_seq_packets([mate_fhand], 2):
            new_seqs.append(splitter(packet))

        out_fhand = StringIO()
        write_seq_packets(out_fhand, new_seqs, file_format='fasta')

        result = out_fhand.getvalue()
        xpect = r'>seq1\1'
        xpect += '\n'
        xpect += 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC\n'
        xpect += r'>seq1\2'
        xpect += '\n'
        xpect += 'ATCGATCATGTTGTATTGTGTACTATACACACACGTAGGTCGACTATCGTAGCTAGT\n'
        xpect += '>seq2\n'
        xpect += 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC\n'
        xpect += '>seq3_pl.part1\n'
        xpect += 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTG\n'
        xpect += '>seq3_pl.part2\n'
        xpect += 'GTGTACTATACACACACGTAGGTCGACTATCGTAGCTAGT\n'
        xpect += '>seq4\n'
        xpect += 'ATCGATCATGTTGTATTGTGTACTATACACACACGTAGGTCGACTATCGTAGCTAGT\n'
        xpect += '>seq5_mlc.part1\n'
        xpect += 'TCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCGATCATGTTGTAT'
        xpect += 'TG'
        xpect += 'TGTACTATACACACACGTAGGTCGACTATCGTAGCTAGT\n'
        xpect += '>seq5_mlc.part2\n'
        xpect += 'ACCTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC'
        xpect += '\n'
        xpect += r'>seq6\1'
        xpect += '\n'
        xpect += 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC\n'
        xpect += r'>seq6\2'
        xpect += '\n'
        xpect += 'ATCGATCATGTTGTATTGTGTACTATACACACACGTAGGTCGACTATCGTAGCTAGT\n'
        assert xpect == result

        # with short linker in 3 prima
        mate_fhand = NamedTemporaryFile(suffix='.fasta')
        seq = ">seq1\nCATCAATGACATCACAAATGACATCAACAAACTCAAA"
        seq += "CTCACATACACTGCTGTACCGTAC"
        mate_fhand.write(seq)
        mate_fhand.flush()
        splitter = MatePairSplitter()
        new_seqs = []
        for packet in read_seq_packets([mate_fhand], 1):
            new_seqs.append(splitter(packet))
        out_fhand = StringIO()
        write_seq_packets(out_fhand, new_seqs, file_format='fasta')
        result = ">seq1\nCATCAATGACATCACAAATGACATCAACAAACTCAAACTCACATACA\n"
        assert result == out_fhand.getvalue()
    def test_mate_pair_unorderer_checker():
        'It test the mate pair function'
        # with equal seqs but the last ones
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand,
                              out_format)

        output = out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with the firsts seqs different
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand,
                              out_format)

        output = out_fhand.getvalue()
        assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp
        assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand,
                              out_format)

        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # unordered file
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2_unordered.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand,
                              out_format)
        output = out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with reads with no direcction
        file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand,
                              out_format)
        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output

        orp = orphan_out_fhand.getvalue()
        assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp
        assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp
Example #60
0
 def tmpfile_with_content(self, value):
     tmpf = NamedTemporaryFile(mode='wt')
     tmpf.write(value)
     tmpf.flush()
     return tmpf