Python SamTools 예제들, ReadsAlignmentUtils.core.sam_tools.SamTools Python 예제들

예제 #1

0

파일 보기

    def __init__(self, config):
        #BEGIN_CONSTRUCTOR
        self.config = config
        self.__LOGGER = logging.getLogger('KBaseRNASeq')
        if 'log_level' in config:
            self.__LOGGER.setLevel(config['log_level'])
        else:
            self.__LOGGER.setLevel(logging.INFO)
        streamHandler = logging.StreamHandler(sys.stdout)
        formatter = logging.Formatter(
            "%(asctime)s - %(filename)s - %(lineno)d - \
                                       %(levelname)s - %(message)s")
        formatter.converter = time.gmtime
        streamHandler.setFormatter(formatter)
        self.__LOGGER.addHandler(streamHandler)
        self.__LOGGER.info("Logger was set")

        script_utils.check_sys_stat(self.__LOGGER)

        self.scratch = config['scratch']
        self.callback_url = os.environ['SDK_CALLBACK_URL']
        self.ws_url = config['workspace-url']
        self.dfu = DataFileUtil(self.callback_url)
        self.samtools = SamTools(config)
        #END_CONSTRUCTOR
        pass

예제 #2

0

파일 보기

파일: sam_tools_test.py 프로젝트: scanon/ReadsAlignmentUtils

    def test_invalid_validate(self):

        samt = SamTools(self.__class__.cfg, self.__class__.__LOGGER)

        rval = samt.validate(ifile='accepted_hits_invalid.sam',
                             ipath='/kb/module/test/data/samtools')

        self.assertEquals(1, rval)

예제 #3

0

파일 보기

파일: sam_tools_test.py 프로젝트: scanon/ReadsAlignmentUtils

    def test_valid_validate(self):

        samt = SamTools(self.__class__.cfg, self.__class__.__LOGGER)

        rval = samt.validate(ifile='accepted_hits.sam',
                             ipath='/kb/module/test/data/samtools',
                             ignore=['MISSING_READ_GROUP'])

        self.assertEquals(0, rval)

예제 #4

0

파일 보기

파일: sam_tools_test.py 프로젝트: scanon/ReadsAlignmentUtils

    def test__is_valid(self):
        result = '\n' + \
                 ' \n' + \
                 '## HISTOGRAM	java.lang.String\n' + \
                 'Error Type	Count\n' + \
                 'ERROR:MISSING_READ_GROUP	1\n' + \
                 'WARNING:RECORD_MISSING_READ_GROUP	19498\n'
        samt = SamTools(self.__class__.cfg, self.__class__.__LOGGER)

        self.assertFalse(samt._is_valid(result, None))
        self.assertTrue(samt._is_valid(result, ['MISSING_READ_GROUP']))

예제 #5

0

파일 보기

파일: sam_tools_test.py 프로젝트: scanon/ReadsAlignmentUtils

    def test_get_stats(self):

        samt = SamTools(self.__class__.cfg, self.__class__.__LOGGER)

        stats = samt.get_stats(ifile='accepted_hits.sam',
                               ipath='/kb/module/test/data/samtools')

        self.assertEquals(stats['unmapped_reads'], 285)
        self.assertEquals(stats['mapped_reads'], 19213)
        self.assertEquals(stats['singletons'], 0)
        self.assertEquals(stats['total_reads'], 19498)

예제 #6

0

파일 보기

    def _validate(self, params):
        samt = SamTools(self.config, self.__LOGGER)
        if 'ignore' in params:
            path, file = os.path.split(params['file_path'])
            rval = samt.validate(ifile=file,
                                 ipath=path,
                                 ignore=params['ignore'])
        else:
            path, file = os.path.split(params['file_path'])
            rval = samt.validate(ifile=file, ipath=path)

        return rval

예제 #7

0

파일 보기

파일: sam_tools_test.py 프로젝트: scanon/ReadsAlignmentUtils

    def test_invalid_convert_bam_to_sam(self):
        opath = '/kb/module/work/'
        ofile = 'accepted_hits_invalid.sam'

        if os.path.exists(opath + ofile):
            os.remove(opath + ofile)

        samt = SamTools(self.__class__.cfg, self.__class__.__LOGGER)

        result = samt.convert_bam_to_sam(ifile='accepted_hits_invalid.bam',
                                         ipath='/kb/module/test/data/samtools',
                                         ofile=ofile,
                                         opath=opath,
                                         validate=True)

        self.assertEquals(result, 1)

예제 #8

0

파일 보기

파일: sam_tools_test.py 프로젝트: scanon/ReadsAlignmentUtils

    def test_valid_convert_bam_to_sam(self):
        opath = '/kb/module/work/'
        ofile = 'accepted_hits_valid_test_output.sam'

        if os.path.exists(opath + ofile):
            os.remove(opath + ofile)

        samt = SamTools(self.__class__.cfg, self.__class__.__LOGGER)

        result = samt.convert_bam_to_sam(ifile='accepted_hits_sorted.bam',
                                         ipath='/kb/module/test/data/samtools',
                                         ofile=ofile,
                                         opath=opath)

        self.assertEquals(result, 0)
        self.assertTrue(os.path.exists(opath + ofile))
        self.assertEquals(
            hashlib.md5(open(opath + ofile, 'rb').read()).hexdigest(),
            'e8fd0e3d115bef90a520c831a0fbf478')

예제 #9

0

파일 보기

파일: sam_tools_test.py 프로젝트: scanon/ReadsAlignmentUtils

    def test_valid_convert_sam_to_bam(self):
        opath = '/kb/module/work/'
        ofile = 'accepted_hits_valid_test_output.bam'

        if os.path.exists(opath + ofile):
            os.remove(opath + ofile)

        samt = SamTools(self.__class__.cfg, self.__class__.__LOGGER)

        result = samt.convert_sam_to_sorted_bam(
            ifile='accepted_hits.sam',
            ipath='/kb/module/test/data/samtools',
            ofile=ofile,
            opath=opath)

        self.assertEquals(result, 0)
        self.assertTrue(os.path.exists(opath + ofile))
        self.assertEquals(
            hashlib.md5(open(opath + ofile, 'rb').read()).hexdigest(),
            '96c59589b0ed7338ff27de1881cf40b3')

예제 #10

0

파일 보기

파일: sam_tools_test.py 프로젝트: scanon/ReadsAlignmentUtils

    def test_valid_create_bai_from_bam(self):
        opath = '/kb/module/work/'
        ofile = 'accepted_hits_valid_test_output.bai'

        if os.path.exists(opath + ofile):
            os.remove(opath + ofile)

        samt = SamTools(self.__class__.cfg, self.__class__.__LOGGER)

        result = samt.create_bai_from_bam(
            ifile='accepted_hits_sorted.bam',
            ipath='/kb/module/test/data/samtools',
            ofile=ofile,
            opath=opath)

        self.assertEquals(result, 0)
        self.assertTrue(os.path.exists(opath + ofile))
        self.assertEquals(
            hashlib.md5(open(opath + ofile, 'rb').read()).hexdigest(),
            '479a05f10c62e47c68501b7551d44593')

예제 #11

0

파일 보기

class ReadsAlignmentUtils:
    '''
    Module Name:
    ReadsAlignmentUtils

    Module Description:
    A KBase module: ReadsAlignmentUtils

This module is intended for use by Aligners and Assemblers to upload and download alignment files.
The alignment may be uploaded as a sam or bam file. If a sam file is given, it is converted to
the sorted bam format and saved. Upon downloading, optional parameters may be provided to get files
in sam and bai formats from the downloaded bam file. This utility also generates stats from the
stored alignment.
    '''

    ######## WARNING FOR GEVENT USERS ####### noqa
    # Since asynchronous IO can lead to methods - even the same method -
    # interrupting each other, you must be *very* careful when using global
    # state. A method could easily clobber the state set by another while
    # the latter method is running.
    ######################################### noqa
    VERSION = "0.3.6"
    GIT_URL = "https://github.com/kbaseapps/ReadsAlignmentUtils.git"
    GIT_COMMIT_HASH = "75ef2c24694c056dfca71859d6f344ccff7d4725"

    #BEGIN_CLASS_HEADER

    PARAM_IN_FILE = 'file_path'
    PARAM_IN_SRC_REF = 'source_ref'
    PARAM_IN_DST_REF = 'destination_ref'
    PARAM_IN_CONDITION = 'condition'
    PARAM_IN_READ_LIB_REF = 'read_library_ref'
    PARAM_IN_ASM_GEN_REF = 'assembly_or_genome_ref'

    PARAM_IN_ALIGNED_USING = 'aligned_using'
    PARAM_IN_ALIGNER_VER = 'aligner_version'
    PARAM_IN_ALIGNER_OPTS = 'aligner_opts'
    PARAM_IN_REPLICATE_ID = 'replicate_id'
    PARAM_IN_PLATFORM = 'platform'
    PARAM_IN_BOWTIE2_INDEX = 'bowtie2_index'
    PARAM_IN_SAMPLESET_REF = 'sampleset_ref'
    PARAM_IN_MAPPED_SAMPLE_ID = 'mapped_sample_id'

    PARAM_IN_DOWNLOAD_SAM = 'downloadSAM'
    PARAM_IN_DOWNLOAD_BAI = 'downloadBAI'
    PARAM_IN_VALIDATE = 'validate'

    INVALID_WS_OBJ_NAME_RE = re.compile('[^\\w\\|._-]')
    INVALID_WS_NAME_RE = re.compile('[^\\w:._-]')

    def _get_file_path_info(self, file_path):
        """
        Given a file path, returns the directory, file name, file base and file extension
        """
        dir, file_name = os.path.split(file_path)
        file_base, file_ext = os.path.splitext(file_name)

        return dir, file_name, file_base, file_ext

    def _mkdir_p(self, path):
        """
        _mkdir_p: make directory for given path
        """
        if not path:
            return
        try:
            os.makedirs(path)
        except OSError as exc:
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else:
                raise

    def _check_required_param(self, in_params, param_list):
        """
        Checks if each of the params in the list are in the input params
        """
        for param in param_list:
            if (param not in in_params or not in_params[param]):
                raise ValueError('{} parameter is required'.format(param))

    def _proc_ws_obj_params(self, ctx, params):
        """
        Checks the validity of workspace and object params and returns them
        """
        dst_ref = params.get(self.PARAM_IN_DST_REF)

        ws_name_id, obj_name_id = os.path.split(dst_ref)

        if not bool(ws_name_id.strip()) or ws_name_id == '/':
            raise ValueError("Workspace name or id is required in " +
                             self.PARAM_IN_DST_REF)

        if not bool(obj_name_id.strip()):
            raise ValueError("Object name or id is required in " +
                             self.PARAM_IN_DST_REF)

        if not isinstance(ws_name_id, int):

            try:
                ws_name_id = self.dfu.ws_name_to_id(ws_name_id)
            except DFUError as se:
                prefix = se.message.split('.')[0]
                raise ValueError(prefix)

        self.__LOGGER.info('Obtained workspace name/id ' + str(ws_name_id))

        return ws_name_id, obj_name_id

    def _get_ws_info(self, obj_ref):

        ws = Workspace(self.ws_url)
        try:
            info = ws.get_object_info_new({'objects': [{'ref': obj_ref}]})[0]
        except WorkspaceError as wse:
            self.__LOGGER.error('Logging workspace exception')
            self.__LOGGER.error(str(wse))
            raise
        return info

    def _proc_upload_alignment_params(self, ctx, params):
        """
        Checks the presence and validity of upload alignment params
        """
        self._check_required_param(params, [
            self.PARAM_IN_DST_REF, self.PARAM_IN_FILE, self.PARAM_IN_CONDITION,
            self.PARAM_IN_READ_LIB_REF, self.PARAM_IN_ASM_GEN_REF
        ])

        ws_name_id, obj_name_id = self._proc_ws_obj_params(ctx, params)

        file_path = params.get(self.PARAM_IN_FILE)

        if not (os.path.isfile(file_path)):
            raise ValueError('File does not exist: ' + file_path)

        lib_type = self._get_ws_info(params.get(self.PARAM_IN_READ_LIB_REF))[2]
        if lib_type.startswith('KBaseFile.SingleEndLibrary') or \
           lib_type.startswith('KBaseFile.PairedEndLibrary') or \
           lib_type.startswith('KBaseAssembly.SingleEndLibrary') or \
           lib_type.startswith('KBaseAssembly.PairedEndLibrary'):
            pass
        else:
            raise ValueError(self.PARAM_IN_READ_LIB_REF +
                             ' parameter should be of type' +
                             ' KBaseFile.SingleEndLibrary or' +
                             ' KBaseFile.PairedEndLibrary or' +
                             ' KBaseAssembly.SingleEndLibrary or' +
                             ' KBaseAssembly.PairedEndLibrary')

        obj_type = self._get_ws_info(params.get(self.PARAM_IN_ASM_GEN_REF))[2]
        if obj_type.startswith('KBaseGenomes.Genome') or \
           obj_type.startswith('KBaseGenomeAnnotations.Assembly') or \
           obj_type.startswith('KBaseGenomes.ContigSet'):
            pass
        else:
            raise ValueError(self.PARAM_IN_ASM_GEN_REF +
                             ' parameter should be of type' +
                             ' KBaseGenomes.Genome or' +
                             ' KBaseGenomeAnnotations.Assembly or' +
                             ' KBaseGenomes.ContigSet')
        return ws_name_id, obj_name_id, file_path, lib_type

    def _get_aligner_stats(self, bam_file):
        """
        Gets the aligner stats from BAM file

        How we compute this stats:

        For each segment (line) in SAM/BAM file:
            we take the first element as `reads_id`
                    the second element as `flag`

            if the last bit (0x1) of flag is `1`:
                we treat this segment as paired end reads
            otherwise:
                we treat this segment as single end reads

            For single end reads:
                if the 3rd last bit (0x8) of flag is `1`:
                    we increment unmapped_reads_count
                else:
                    we treat this `reads_id` as mapped

                for all mapped `reads_ids`"
                    if it appears only once:
                        we treat this `reads_id` as `singletons`
                    else:
                        we treat this `reads_id` as `multiple_alignments`

                lastly, total_reads = unmapped_reads_count + identical mapped `reads_id`

            For paired end reads:
                if the 7th last bit (0x40) of flag is `1`:
                    if the 3rd last bit (0x8) of flag is `1`:
                        we increment unmapped_left_reads_count
                    else:
                        we treat this `reads_id` as mapped

                if the 8th last bit ( 0x80) of flag is `1`:
                    if the 3rd last bit (0x8) of flag is `1`:
                        we increment unmapped_right_reads_count
                    else:
                        we treat this `reads_id` as mapped

                for all mapped `reads_ids`"
                    if it appears only once:
                        we treat this `reads_id` as `singletons`
                    else:
                        we treat this `reads_id` as `multiple_alignments`

                lastly, total_reads = unmapped_left_reads_count + unmapped_right_reads_count + identical mapped `reads_id`
        """
        path, file = os.path.split(bam_file)

        self.__LOGGER.info('Start to generate aligner stats')
        start_time = time.time()

        infile = pysam.AlignmentFile(bam_file, 'r')

        properly_paired = 0
        unmapped_reads_count = 0
        unmapped_left_reads_count = 0
        unmapped_right_reads_count = 0
        mapped_reads_ids = []
        mapped_left_reads_ids = []
        mapped_right_reads_ids = []
        paired = False
        for alignment in infile:
            seg = alignment.to_string().split('\t')
            reads_id = seg[0]
            flag = "0000000" + "{0:b}".format(int(seg[1]))

            if flag[-1] == '1':
                paired = True

            if paired:  # process paired end sequence

                if flag[-7] == '1':  # first sequence of a pair
                    if flag[-3] == '1':
                        unmapped_left_reads_count += 1
                    else:
                        mapped_left_reads_ids.append(reads_id)

                if flag[-8] == '1':  # second sequence of a pair
                    if flag[-3] == '1':
                        unmapped_right_reads_count += 1
                    else:
                        mapped_right_reads_ids.append(reads_id)

                if flag[-2] == '1':
                    properly_paired += 1
            else:  # process single end sequence
                if flag[-3] == '1':
                    unmapped_reads_count += 1
                else:
                    mapped_reads_ids.append(reads_id)

                if flag[-2] == '1':
                    properly_paired += 1

        infile.close()

        if paired:
            mapped_reads_ids = mapped_left_reads_ids + mapped_right_reads_ids
            unmapped_reads_count = unmapped_left_reads_count + unmapped_right_reads_count

            mapped_reads_ids_counter = Counter(mapped_reads_ids)
            mapped_reads_count = len(list(mapped_reads_ids_counter))

            singletons = list(mapped_reads_ids_counter.values()).count(1)
            multiple_alignments = mapped_reads_count - singletons

            total_reads = unmapped_reads_count + mapped_reads_count

            properly_paired = properly_paired // 2

        else:
            mapped_reads_ids_counter = Counter(mapped_reads_ids)
            mapped_reads_count = len(list(mapped_reads_ids_counter))

            singletons = list(mapped_reads_ids_counter.values()).count(1)
            multiple_alignments = mapped_reads_count - singletons

            total_reads = unmapped_reads_count + mapped_reads_count

        try:
            alignment_rate = round(
                float(mapped_reads_count) / total_reads * 100, 3)
        except ZeroDivisionError:
            alignment_rate = 0

        if alignment_rate > 100:
            alignment_rate = 100.0

        elapsed_time = time.time() - start_time
        self.__LOGGER.info('Used: {}'.format(
            time.strftime("%H:%M:%S", time.gmtime(elapsed_time))))

        stats_data = {
            "alignment_rate": alignment_rate,
            "mapped_reads": mapped_reads_count,
            "multiple_alignments": multiple_alignments,
            "properly_paired": properly_paired,
            "singletons": singletons,
            "total_reads": total_reads,
            "unmapped_reads": unmapped_reads_count
        }
        return stats_data

    def _validate(self, params):
        samt = SamTools(self.config, self.__LOGGER)
        if 'ignore' in params:
            path, file = os.path.split(params['file_path'])
            rval = samt.validate(ifile=file,
                                 ipath=path,
                                 ignore=params['ignore'])
        else:
            path, file = os.path.split(params['file_path'])
            rval = samt.validate(ifile=file, ipath=path)

        return rval

    #END_CLASS_HEADER

    # config contains contents of config file in a hash or None if it couldn't
    # be found
    def __init__(self, config):
        #BEGIN_CONSTRUCTOR
        self.config = config
        self.__LOGGER = logging.getLogger('KBaseRNASeq')
        if 'log_level' in config:
            self.__LOGGER.setLevel(config['log_level'])
        else:
            self.__LOGGER.setLevel(logging.INFO)
        streamHandler = logging.StreamHandler(sys.stdout)
        formatter = logging.Formatter(
            "%(asctime)s - %(filename)s - %(lineno)d - \
                                       %(levelname)s - %(message)s")
        formatter.converter = time.gmtime
        streamHandler.setFormatter(formatter)
        self.__LOGGER.addHandler(streamHandler)
        self.__LOGGER.info("Logger was set")

        script_utils.check_sys_stat(self.__LOGGER)

        self.scratch = config['scratch']
        self.callback_url = os.environ['SDK_CALLBACK_URL']
        self.ws_url = config['workspace-url']
        self.dfu = DataFileUtil(self.callback_url)
        self.samtools = SamTools(config)
        #END_CONSTRUCTOR
        pass

    def validate_alignment(self, ctx, params):
        """
        :param params: instance of type "ValidateAlignmentParams" (* Input
           parameters for validating a reads alignment. For validation errors
           to ignore, see
           http://broadinstitute.github.io/picard/command-line-overview.html#V
           alidateSamFile) -> structure: parameter "file_path" of String,
           parameter "ignore" of list of String
        :returns: instance of type "ValidateAlignmentOutput" (* Results from
           validate alignment *) -> structure: parameter "validated" of type
           "boolean" (A boolean - 0 for false, 1 for true. @range (0, 1))
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN validate_alignment

        rval = self._validate(params)

        if rval == 0:
            returnVal = {'validated': True}
        else:
            returnVal = {'validated': False}

        #END validate_alignment

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method validate_alignment return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def upload_alignment(self, ctx, params):
        """
        Validates and uploads the reads alignment
                How we compute BAM stats:
                For each segment (line) in SAM/BAM file:
                    we take the first element as `reads_id`
                            the second element as `flag`
                    if the last bit (0x1) of flag is `1`:
                        we treat this segment as paired end reads
                    otherwise:
                        we treat this segment as single end reads
                    For single end reads:
                        if the 3rd last bit (0x8) of flag is `1`:
                            we increment unmapped_reads_count
                        else:
                            we treat this `reads_id` as mapped
                        for all mapped `reads_ids`"
                            if it appears only once:
                                we treat this `reads_id` as `singletons`
                            else:
                                we treat this `reads_id` as `multiple_alignments`
                        lastly, total_reads = unmapped_reads_count + identical mapped `reads_id`
                    For paired end reads:
                        if the 7th last bit (0x40) of flag is `1`:
                            if the 3rd last bit (0x8) of flag is `1`:
                                we increment unmapped_left_reads_count
                            else:
                                we treat this `reads_id` as mapped
                        if the 8th last bit ( 0x80) of flag is `1`:
                            if the 3rd last bit (0x8) of flag is `1`:
                                we increment unmapped_right_reads_count
                            else:
                                we treat this `reads_id` as mapped
                        for all mapped `reads_ids`"
                            if it appears only once:
                                we treat this `reads_id` as `singletons`
                            else:
                                we treat this `reads_id` as `multiple_alignments`
                        lastly, total_reads = unmapped_left_reads_count + unmapped_right_reads_count + identical mapped `reads_id`
        :param params: instance of type "UploadAlignmentParams" (* Required
           input parameters for uploading a reads alignment string
           destination_ref -  object reference of alignment destination. The
           object ref is 'ws_name_or_id/obj_name_or_id' where ws_name_or_id
           is the workspace name or id and obj_name_or_id is the object name
           or id file_path              -  File with the path of the sam or
           bam file to be uploaded. If a sam file is provided, it will be
           converted to the sorted bam format before being saved
           read_library_ref       -  workspace object ref of the read sample
           used to make the alignment file condition              -
           assembly_or_genome_ref -  workspace object ref of genome assembly
           or genome object that was used to build the alignment *) ->
           structure: parameter "destination_ref" of String, parameter
           "file_path" of String, parameter "read_library_ref" of String,
           parameter "condition" of String, parameter
           "assembly_or_genome_ref" of String, parameter "aligned_using" of
           String, parameter "aligner_version" of String, parameter
           "aligner_opts" of mapping from String to String, parameter
           "replicate_id" of String, parameter "platform" of String,
           parameter "bowtie2_index" of type "ws_bowtieIndex_id", parameter
           "sampleset_ref" of type "ws_Sampleset_ref", parameter
           "mapped_sample_id" of mapping from String to mapping from String
           to String, parameter "validate" of type "boolean" (A boolean - 0
           for false, 1 for true. @range (0, 1)), parameter "ignore" of list
           of String
        :returns: instance of type "UploadAlignmentOutput" (*  Output from
           uploading a reads alignment  *) -> structure: parameter "obj_ref"
           of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN upload_alignment

        self.__LOGGER.info(
            'Starting upload Reads Alignment, parsing parameters ')
        pprint(params)

        ws_name_id, obj_name_id, file_path, lib_type = self._proc_upload_alignment_params(
            ctx, params)

        dir, file_name, file_base, file_ext = self._get_file_path_info(
            file_path)

        if self.PARAM_IN_VALIDATE in params and params[
                self.PARAM_IN_VALIDATE] is True:
            if self._validate(params) == 1:
                raise Exception('{0} failed validation'.format(file_path))

        bam_file = file_path
        if file_ext.lower() == '.sam':
            bam_file = os.path.join(dir, file_base + '.bam')
            self.samtools.convert_sam_to_sorted_bam(ifile=file_name,
                                                    ipath=dir,
                                                    ofile=bam_file)

        uploaded_file = self.dfu.file_to_shock({
            'file_path': bam_file,
            'make_handle': 1
        })
        file_handle = uploaded_file['handle']
        file_size = uploaded_file['size']

        aligner_stats = self._get_aligner_stats(file_path)
        aligner_data = {
            'file': file_handle,
            'size': file_size,
            'condition': params.get(self.PARAM_IN_CONDITION),
            'read_sample_id': params.get(self.PARAM_IN_READ_LIB_REF),
            'library_type': lib_type,
            'genome_id': params.get(self.PARAM_IN_ASM_GEN_REF),
            'alignment_stats': aligner_stats
        }
        optional_params = [
            self.PARAM_IN_ALIGNED_USING, self.PARAM_IN_ALIGNER_VER,
            self.PARAM_IN_ALIGNER_OPTS, self.PARAM_IN_REPLICATE_ID,
            self.PARAM_IN_PLATFORM, self.PARAM_IN_BOWTIE2_INDEX,
            self.PARAM_IN_SAMPLESET_REF, self.PARAM_IN_MAPPED_SAMPLE_ID
        ]
        for opt_param in optional_params:
            if opt_param in params and params[opt_param] is not None:
                aligner_data[opt_param] = params[opt_param]

        self.__LOGGER.info('=========  Adding extra_provenance_refs')
        self.__LOGGER.info(params.get(self.PARAM_IN_READ_LIB_REF))
        self.__LOGGER.info(params.get(self.PARAM_IN_ASM_GEN_REF))
        self.__LOGGER.info('=======================================')

        res = self.dfu.save_objects({
            "id":
            ws_name_id,
            "objects": [{
                "type":
                "KBaseRNASeq.RNASeqAlignment",
                "data":
                aligner_data,
                "name":
                obj_name_id,
                "extra_provenance_input_refs": [
                    params.get(self.PARAM_IN_READ_LIB_REF),
                    params.get(self.PARAM_IN_ASM_GEN_REF)
                ]
            }]
        })[0]
        self.__LOGGER.info('save complete')

        returnVal = {
            'obj_ref': str(res[6]) + '/' + str(res[0]) + '/' + str(res[4])
        }

        self.__LOGGER.info('Uploaded object: ')
        self.__LOGGER.info(returnVal)

        #END upload_alignment

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method upload_alignment return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def download_alignment(self, ctx, params):
        """
        Downloads alignment files in .bam, .sam and .bai formats. Also downloads alignment stats *
        :param params: instance of type "DownloadAlignmentParams" (* Required
           input parameters for downloading a reads alignment string
           source_ref -  object reference of alignment source. The object ref
           is 'ws_name_or_id/obj_name_or_id' where ws_name_or_id is the
           workspace name or id and obj_name_or_id is the object name or id
           *) -> structure: parameter "source_ref" of String, parameter
           "downloadSAM" of type "boolean" (A boolean - 0 for false, 1 for
           true. @range (0, 1)), parameter "downloadBAI" of type "boolean" (A
           boolean - 0 for false, 1 for true. @range (0, 1)), parameter
           "validate" of type "boolean" (A boolean - 0 for false, 1 for true.
           @range (0, 1)), parameter "ignore" of list of String
        :returns: instance of type "DownloadAlignmentOutput" (*  The output
           of the download method.  *) -> structure: parameter
           "destination_dir" of String, parameter "stats" of type
           "AlignmentStats" -> structure: parameter "properly_paired" of
           Long, parameter "multiple_alignments" of Long, parameter
           "singletons" of Long, parameter "alignment_rate" of Double,
           parameter "unmapped_reads" of Long, parameter "mapped_reads" of
           Long, parameter "total_reads" of Long
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN download_alignment

        self.__LOGGER.info('Running download_alignment with params:\n' +
                           pformat(params))

        inref = params.get(self.PARAM_IN_SRC_REF)
        if not inref:
            raise ValueError('{} parameter is required'.format(
                self.PARAM_IN_SRC_REF))

        try:
            alignment = self.dfu.get_objects({'object_refs': [inref]})['data']
        except DFUError as e:
            self.__LOGGER.error(
                'Logging stacktrace from workspace exception:\n' + e.data)
            raise

        # set the output dir
        uuid_str = str(uuid.uuid4())
        output_dir = os.path.join(self.scratch, 'download_' + uuid_str)
        self._mkdir_p(output_dir)

        file_ret = self.dfu.shock_to_file({
            'shock_id':
            alignment[0]['data']['file']['id'],
            'file_path':
            output_dir
        })
        if zipfile.is_zipfile(file_ret.get('file_path')):
            with zipfile.ZipFile(file_ret.get('file_path')) as z:
                z.extractall(output_dir)

        for f in glob.glob(output_dir + '/*.zip'):
            os.remove(f)

        bam_files = glob.glob(output_dir + '/*.bam')

        if len(bam_files) == 0:
            raise ValueError("Alignment object does not contain a bam file")

        for bam_file_path in bam_files:
            dir, file_name, file_base, file_ext = self._get_file_path_info(
                bam_file_path)
            if params.get(self.PARAM_IN_VALIDATE, False):
                validate_params = {'file_path': bam_file_path}
                if self._validate(validate_params) == 1:
                    raise Exception(
                        '{0} failed validation'.format(bam_file_path))

            if params.get(self.PARAM_IN_DOWNLOAD_BAI, False):
                bai_file = file_base + '.bai'
                bai_file_path = os.path.join(output_dir, bai_file)
                self.samtools.create_bai_from_bam(ifile=file_name,
                                                  ipath=output_dir,
                                                  ofile=bai_file)
                if not os.path.isfile(bai_file_path):
                    raise ValueError('Error creating {}'.format(bai_file_path))

            if params.get(self.PARAM_IN_DOWNLOAD_SAM, False):
                sam_file = file_base + '.sam'
                sam_file_path = os.path.join(output_dir, sam_file)
                self.samtools.convert_bam_to_sam(ifile=file_name,
                                                 ipath=output_dir,
                                                 ofile=sam_file)
                if not os.path.isfile(sam_file_path):
                    raise ValueError('Error creating {}'.format(sam_file_path))

        returnVal = {
            'destination_dir': output_dir,
            'stats': alignment[0]['data']['alignment_stats']
        }

        #END download_alignment

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method download_alignment return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def export_alignment(self, ctx, params):
        """
        Wrapper function for use by in-narrative downloaders to download alignments from shock *
        :param params: instance of type "ExportParams" (* Required input
           parameters for exporting a reads alignment string source_ref -
           object reference of alignment source. The object ref is
           'ws_name_or_id/obj_name_or_id' where ws_name_or_id is the
           workspace name or id and obj_name_or_id is the object name or id
           *) -> structure: parameter "source_ref" of String, parameter
           "exportSAM" of type "boolean" (A boolean - 0 for false, 1 for
           true. @range (0, 1)), parameter "exportBAI" of type "boolean" (A
           boolean - 0 for false, 1 for true. @range (0, 1)), parameter
           "validate" of type "boolean" (A boolean - 0 for false, 1 for true.
           @range (0, 1)), parameter "ignore" of list of String
        :returns: instance of type "ExportOutput" -> structure: parameter
           "shock_id" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN export_alignment

        inref = params.get(self.PARAM_IN_SRC_REF)
        if not inref:
            raise ValueError('{} parameter is required'.format(
                self.PARAM_IN_SRC_REF))

        if params.get(self.PARAM_IN_VALIDATE, False) or \
           params.get('exportBAI', False) or \
           params.get('exportSAM', False):
            """
            Need to validate or convert files. Use download_alignment
            """
            download_params = {}
            for key, val in params.items():
                download_params[key.replace('export', 'download')] = val

            download_retVal = self.download_alignment(ctx, download_params)[0]

            export_dir = download_retVal['destination_dir']

            # package and load to shock
            ret = self.dfu.package_for_download({
                'file_path': export_dir,
                'ws_refs': [inref]
            })
            output = {'shock_id': ret['shock_id']}
        else:
            """
            return shock id from the object
            """
            try:
                alignment = self.dfu.get_objects({'object_refs':
                                                  [inref]})['data']
            except DFUError as e:
                self.__LOGGER.error(
                    'Logging stacktrace from workspace exception:\n' + e.data)
                raise
            output = {'shock_id': alignment[0]['data']['file']['id']}

        #END export_alignment

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method export_alignment return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]

    def status(self, ctx):
        #BEGIN_STATUS
        returnVal = {
            'state': "OK",
            'message': "",
            'version': self.VERSION,
            'git_url': self.GIT_URL,
            'git_commit_hash': self.GIT_COMMIT_HASH
        }
        #END_STATUS
        return [returnVal]