예제 #1
0
    def setup_run(self, cluster, params=None):
        """Creates temp file with cluster reads and provides CLI-ready parameter list, using self.params 

		Args:
			cluster: 				iterable of read-like objects or path to reads
			params:					Overrides self.params
		Returns:
			run_input_path: 		path to input file
			params (str):			str of params for run
		"""

        if not params: params = self.params

        run_input_path = None
        if isinstance(cluster, str):  # input path provided
            if not os.path.exists(cluster):
                raise ValueError(
                    'Input to {} is not valid, please provide either a list of read objects or a valid path to fasta file'
                    .format(self.get_name()))
            else:
                run_input_path = cluster
        else:  # list of seq objects provided
            try:
                f = tempfile.NamedTemporaryFile(suffix='.fa', delete=False)
                f.write(fasta_from_seq(*zip(*[(x.id, x.seq)
                                              for x in cluster])))
                run_input_path = f.name
            except AttributeError as e:
                log.error(
                    'Input to {} is not valid, please provide either a list of read objects or a valid path to fasta file'
                    .format(self.get_name()))
                raise e
        return run_input_path, params
예제 #2
0
    def ava(self, reads=None, params=None):
        """Runs minimap with query == target for all-to-all mapping

		Args:
			reads: 					iterable of read-like objects
			params:					mapping parameters. Overrides self.params
		Returns:
			result: 				result of self.run with params using self.paf_parser
		Raises:
			ValueError:				if 'ava-pb' not in params (or self.params if params not provided).
		"""
        if not params:
            params = self.params
        elif not params:
            params = 'ava-pb'
        elif 'ava' not in params:
            raise ValueError(
                '''This MinimapWrapper has parameter configurations that do not included all-to-all mapping. 
								Please change the params attribute or manually overide the parameters when calling MinimapWrapper.ava'''
            )

        reads_file = self.create_temp_file(
            fasta_from_seq(*zip(*[(x.id, x.seq) for x in reads])))

        return self.run(
            reads_file.name, reads_file.name, params=params
        )  ## NOTE / TODO using tempfile.name twice MAY NOT work on some systems
예제 #3
0
    def call_cluster(self, consensus_seq):
        ## Takes a list of string sequences and returns a consensus sequence

        with tempfile.NamedTemporaryFile(delete=True) as f:
            f.write(fasta_from_seq('consensus', consensus_seq))

            command = [self.minimap.src, '-cx map-pb', f.name, self.allele_db]

            mapping_output = self.minimap.run(command)
예제 #4
0
    def call_cluster(self,
                     cluster,
                     filter_function=None,
                     result_filter=None,
                     temp_file_path=None):
        import tempfile

        if len(cluster) == 1:
            log.warn('Cluster {} has single read, not calling'.format(
                cluster.id))
            try:
                cluster.consensus_seq = None
                cluster.consensus_builder = None
                cluster.set_call(None)
                cluster.candidates = None
                cluster.candidates_method = str(self)
            except AttributeError as e:
                pass
            finally:
                return None

        consensus_seq = None
        consensus_seq_id = None
        f = None
        is_cluster_inst = False  # flag for filling descriptive attributes
        if hasattr(cluster, '__getitem__'
                   ):  # assumed to be list of sequences, get consensus
            try:
                if temp_file_path:
                    with open(temp_file_path, 'wb') as f:
                        f.write(
                            fasta_from_seq(*zip(*[(x.id, x.seq)
                                                  for x in cluster])))
                consensus_seq = self.consensus_builder.generate_consensus(
                    temp_file_path if temp_file_path else cluster)
                if not consensus_seq:
                    cluster.consensus = None
                    cluster.candidates_method = str(self)
                    return
                consensus_seq_id = 'cons'
                log.info('Generated consensus with:\n{}'.format(
                    str(self.consensus_builder)))
                log.debug('Output:\n{}'.format(consensus_seq))

                try:
                    cluster.consensus = consensus_seq
                    cluster.consensus_method = str(self.consensus_builder)
                except AttributeError as e:
                    pass
            except TypeError as e:  ## No consensus builder is set
                raise ValueError(
                    'Cluster calling: list of cluster sequences provided but no consensus builder instantiated.'
                )
        else:
            if isinstance(cluster, basestring):  # input is path
                if os.path.exists(cluster):
                    cons_path = cluster
                else:
                    raise ValueError(
                        'Cluster calling input invalid. String provided but is not valid path. If trying to cast as Bio.Seq.Seq-like object'
                    )
            else:  # input is consensus seq
                consensus_seq = cluster.seq
                consensus_seq_id = cluster.id

        ## save blasr target in all cases except path as input
        if consensus_seq:
            try:
                f = open(
                    temp_file_path,
                    'wb+') if temp_file_path else tempfile.NamedTemporaryFile(
                        delete=False)
                f.write(str(fasta_from_seq(consensus_seq_id, consensus_seq)))
                cons_path = f.name
                f.close()
            except AttributeError as e:
                raise ValueError(
                    'Cluster calling input invalid. Provide iterable of cluster sequences, path to cluster consensus or Bio.Seq.Seq-like object to call'
                )

        ## run blasr mapping of consensus_seq against allele database
        command = [self.blasr.src, '', self.allele_db, cons_path]

        try:
            mapping_output = self.blasr.run(*command)
        except ValueError as e:
            log.warn('Blasr returned no mapping')
            try:
                cluster.set_call(None)
                cluster.candidates = None
                cluster.candidates_method = str(self)
            except AttributeError as e:
                pass
            finally:
                return None

        f.close()

        ## select from mapping the desired result as the call
        if not filter_function:
            filter_function = self.filter_function

        try:
            mapping_output = sorted(mapping_output, key=filter_function)
            cluster_call = mapping_output[0]
        except ValueError as e:
            log.error('Invalid blasr mapping value')
            log.debug('\n'.join([str(x) for x in mapping_output]))
            raise e

        if not result_filter:
            result_filter = self.result_filter
        result = result_filter(cluster_call)

        try:
            cluster.set_call([result])
            cluster.candidates = list(mapping_output)
            cluster.candidates_method = str(self)
        except AttributeError as e:
            return result
예제 #5
0
    def run(self,
            query,
            target,
            src=None,
            params=None,
            parser=None,
            output_path=None):
        """Runs minimap using subprocess.

		Args:
			query: 				iterable of read-like objects or path to fasta
			target:				iterable of read-like objects or path to fasta
			src (str)			path to minimap executable. self.src if None
			params (str):		string of minimap parameters. self.params if None
			parser (func(x)):	parser func for minimap stdout result. MinimapWrapper.paf_parser if None
			output_path (str):	cache path to save mapping result to
		
		Note:
			read-like requires 'id' and 'seq' attributes

		Returns:
			output: 			result of parser
		"""

        from subprocess import Popen, PIPE
        from os.path import exists

        ## Check type(query), make temp file and write query seqs as needed
        if isinstance(query, basestring):
            if not exists(query):
                log.error(
                    'Provided query path is invalid, please provide a path as a string or Bio.SeqIO-like objects'
                )
            query_path = query
            query_file = None
        else:
            # try:
            # try:
            query_file = self.create_temp_file(write_data=fasta_from_seq(*zip(
                *[(x.id, x.seq) for x in query])))
            # except TypeError: # is not iterable
            # query_file = self.create_temp_file(write_data=fasta_from_seq(query.id, query.seq))
            # except AttributeError as e:
            # 	log.error('Provided query input is invalid, please provide a path as a string or Bio.SeqIO-like objects')
            # raise e
            query_path = query_file.name

        ## Check type(target), make temp file and write target seqs as needed
        if isinstance(target, basestring):
            if not exists(target):
                log.error(
                    'Provided target path is invalid, please provide a path as a string or Bio.SeqIO-like objects'
                )
            target_path = target
            target_file = None
        else:
            try:
                try:
                    target_file = self.create_temp_file(
                        write_data=fasta_from_seq(*zip(*[(x.id, x.seq)
                                                         for x in target])))
                except TypeError:  # is not iterable
                    target_file = self.create_temp_file(
                        write_data=fasta_from_seq(target.id, target.seq))
            except AttributeError as e:
                log.error(
                    'Provided target input is invalid, please provide a path as a string or Bio.SeqIO-like objects'
                )
                raise e
            target_path = target_file.name

        if not src:
            src = self.src
        if not params:
            params = self.params
        if not output_path:
            output_path = self.output_path
        if not parser:
            parser = MinimapWrapper.paf_parser

        command = ' '.join([src, params, target_path, query_path])
        log.debug('Running minimap:\n{}'.format(command))
        process = Popen(command.split(), stdout=PIPE, stderr=PIPE)
        stdout, stderr = process.communicate()

        ## save / cache output if needed
        if output_path:
            try:
                with open(output_path, 'wb') as f:
                    f.write(stdout)
            except OSError as e:
                log.error(
                    'Provided minimap output path is not valid, output will be discarded'
                )

        if not stdout.strip():
            log.error('Minimap returned no mapping')
            log.debug(stderr)
            log.debug(stdout)
            with open(query_path, 'r') as f:
                log.debug(f.readlines())
            with open(target_path, 'r') as f:
                log.debug(f.readlines())
            raise ValueError('Minimap returned no mapping')

        output = parser(stdout.strip())

        if query_file:
            query_file.close()
        if target_file:
            target_file.close()

        return output