Esempio n. 1
0
    def ExtractPromotersFromFeatureSetandDiscoverMotifs(self, ctx, params):
        """
        :param params: instance of type "extract_input" -> structure:
           parameter "workspace_name" of String, parameter "genome_ref" of
           String, parameter "featureSet_ref" of String, parameter
           "promoter_length" of Long, parameter "motif_min_length" of Long,
           parameter "motif_max_length" of Long
        :returns: instance of type "extract_output_params" -> structure:
           parameter "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN ExtractPromotersFromFeatureSetandDiscoverMotifs
        SSU = SequenceSetUtils(self.callback_url)

        BuildParams = {'ws_name' : params['workspace_name'], 'FeatureSet_ref' : params['featureSet_ref'], 'genome_ref' : params['genome_ref'], 'upstream_length' : params['promoter_length']}
        SSret =  SSU.buildFromFeatureSet(BuildParams)
        SSref = SSret['SequenceSet_ref']
        fastapath = '/kb/module/work/tmp/tmpSeqSet.fa'
        FastaParams = {'workspace_name' : params['workspace_name'] , 'SequenceSetRef' : SSref , 'fasta_outpath' : fastapath}
        output = self.BuildFastaFromSequenceSet(ctx,FastaParams)
        newfastapath = '/kb/module/work/tmp/SeqSet.fa'
        fu=FastaUtils()
        fu.RemoveRepeats(fastapath,newfastapath)
        findmotifsparams= {'workspace_name' : params['workspace_name'],'fastapath':fastapath,'motif_min_length':params['motif_min_length'],'motif_max_length':params['motif_max_length'],'SS_ref':SSref,'obj_name':params['obj_name']}
        
        output = self.find_motifs(ctx,findmotifsparams)[0]
        #END ExtractPromotersFromFeatureSetandDiscoverMotifs

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method ExtractPromotersFromFeatureSetandDiscoverMotifs return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
 def __init__(self, config):
     #BEGIN_CONSTRUCTOR
     self.callback_url = os.environ['SDK_CALLBACK_URL']
     self.shared_folder = config['scratch']
     self.SSU = SequenceSetUtils(os.environ['SDK_CALLBACK_URL'])
     self.MOU = MotifUtils(os.environ['SDK_CALLBACK_URL'])
     self.MEU = MemeUtil(self.shared_folder)
     self.dfu = DataFileUtil(self.callback_url)
     self.GR = GenerateReport()
     #END_CONSTRUCTOR
     pass
class MotifFinderMEME:
    '''
    Module Name:
    MotifFinderMEME

    Module Description:
    A KBase module: MotifFinderMEME
    '''

    ######## WARNING FOR GEVENT USERS ####### noqa
    # Since asynchronous IO can lead to methods - even the same method -
    # interrupting each other, you must be *very* careful when using global
    # state. A method could easily clobber the state set by another while
    # the latter method is running.
    ######################################### noqa
    VERSION = "0.0.1"
    GIT_URL = "https://github.com/kbasecollaborations/MotifFinderMEME.git"
    GIT_COMMIT_HASH = "ed2e967cf8aa213388e4166525cd8ce2918d4fae"

    #BEGIN_CLASS_HEADER
    #END_CLASS_HEADER

    # config contains contents of config file in a hash or None if it couldn't
    # be found
    def __init__(self, config):
        #BEGIN_CONSTRUCTOR
        self.callback_url = os.environ['SDK_CALLBACK_URL']
        self.shared_folder = config['scratch']
        self.SSU = SequenceSetUtils(os.environ['SDK_CALLBACK_URL'])
        self.MOU = MotifUtils(os.environ['SDK_CALLBACK_URL'])
        self.MEU = MemeUtil(self.shared_folder)
        self.dfu = DataFileUtil(self.callback_url)
        self.GR = GenerateReport()
        #END_CONSTRUCTOR
        pass

    def find_motifs(self, ctx, params):
        """
        :param params: instance of type "find_motifs_params" (SS_ref -
           optional, used for exact genome locations if possible) ->
           structure: parameter "workspace_name" of String, parameter
           "fastapath" of String, parameter "motif_min_length" of Long,
           parameter "motif_max_length" of Long, parameter "SS_ref" of
           String, parameter "obj_name" of String, parameter "background" of
           Long
        :returns: instance of type "extract_output_params" -> structure:
           parameter "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN find_motifs
        if 'motif_min_length' not in params:
            params['motif_min_length'] = 8
        if 'motif_max_length' not in params:
            params['motif_max_length'] = 16

        motMin = params['motif_min_length']
        motMax = params['motif_max_length']

        promoterFastaFilePath = self.SSU.SeqSetToFasta({
            'ws_name':
            params['workspace_name'],
            'SS_ref':
            params['SS_ref']
        })['path']

        MEMEMotifCommand = self.MEU.build_meme_command(promoterFastaFilePath,
                                                       motMin, motMax,
                                                       params['background'])
        meme_out_path = self.MEU.run_meme_command(MEMEMotifCommand)

        meme_params = {
            'ws_name': params['workspace_name'],
            'format': 'MEME',
            'file': {
                'path': meme_out_path
            },
            'obj_name': params['obj_name'],
            'seq_set_ref': params['SS_ref']
        }

        # MOU.parseMotifSet with the same parameters will
        # return a dictionary of the motifset object that you save on
        # your own
        #
        # MOU.saveMotifSet will save the object with DataFileUtils to
        # whatever workspace you specify in ws_name
        #
        # This function will also download the sequence set as a fasta to
        # unique (uuid4) file name in the scratch directory

        obj_ref = self.MOU.saveMotifSet(meme_params)

        timestamp = str(
            int((datetime.utcnow() -
                 datetime.utcfromtimestamp(0)).total_seconds() * 1000))
        htmlDir = self.shared_folder + '/html' + timestamp
        os.mkdir(htmlDir)

        get_obj_params = {'object_refs': [obj_ref]}
        memeMotifSet = self.dfu.get_objects(get_obj_params)['data'][0]['data']
        self.GR.MakeMotifReport(htmlDir, memeMotifSet)

        try:
            html_upload_ret = self.dfu.file_to_shock({
                'file_path': htmlDir,
                'make_handle': 0,
                'pack': 'zip'
            })
        except Exception:
            raise ValueError('Error uploading HTML file: ' + str(htmlDir) +
                             ' to shock')

        reportname = 'MEMEMotifFinder_report_' + str(uuid.uuid4())

        reportobj = {
            'objects_created': [{
                'ref': obj_ref,
                'description': 'Motif Set generated by MEME'
            }],
            'message':
            '',
            'direct_html':
            None,
            'direct_html_link_index':
            0,
            'file_links': [],
            'html_links': [],
            'html_window_height':
            220,
            'workspace_name':
            params['workspace_name'],
            'report_object_name':
            reportname
        }

        # attach to report obj
        reportobj['direct_html'] = ''
        reportobj['direct_html_link_index'] = 0
        reportobj['html_links'] = [{
            'shock_id': html_upload_ret['shock_id'],
            'name': 'index.html',
            'label': 'Save promoter_download.zip'
        }]

        report = KBaseReport(self.callback_url, token=ctx['token'])
        report_info = report.create_extended_report(reportobj)
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref']
        }
        #END find_motifs

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method find_motifs return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]

    def ExtractPromotersFromFeatureSetandDiscoverMotifs(self, ctx, params):
        """
        :param params: instance of type "extract_input" -> structure:
           parameter "workspace_name" of String, parameter "genome_ref" of
           String, parameter "featureSet_ref" of String, parameter
           "promoter_length" of Long, parameter "motif_min_length" of Long,
           parameter "motif_max_length" of Long, parameter "obj_name" of
           String
        :returns: instance of type "extract_output_params" -> structure:
           parameter "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN ExtractPromotersFromFeatureSetandDiscoverMotifs
        BuildParams = {
            'ws_name': params['workspace_name'],
            'FeatureSet_ref': params['featureSet_ref'],
            'genome_ref': params['genome_ref'],
            'upstream_length': params['promoter_length']
        }

        SSret = self.SSU.buildFromFeatureSet(BuildParams)

        SSref = SSret['SequenceSet_ref']
        fastapath = '/kb/module/work/tmp/tmpSeqSet.fa'
        newfastapath = '/kb/module/work/tmp/SeqSet.fa'
        fastapath = newfastapath
        FastaParams = {
            'workspace_name': params['workspace_name'],
            'SequenceSetRef': SSref,
            'fasta_outpath': fastapath
        }
        output = self.BuildFastaFromSequenceSet(ctx, FastaParams)

        findmotifsparams = {
            'workspace_name': params['workspace_name'],
            'fastapath': fastapath,
            'motif_min_length': params['motif_min_length'],
            'motif_max_length': params['motif_max_length'],
            'SS_ref': SSref,
            'obj_name': params['obj_name']
        }

        output = self.find_motifs(ctx, findmotifsparams)[0]

        #END ExtractPromotersFromFeatureSetandDiscoverMotifs
        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError(
                'Method ExtractPromotersFromFeatureSetandDiscoverMotifs return value '
                + 'output is not type dict as required.')
        # return the results
        return [output]

    def DiscoverMotifsFromFasta(self, ctx, params):
        """
        :param params: instance of type "discover_fasta_input" -> structure:
           parameter "workspace_name" of String, parameter "fasta_path" of
           String
        :returns: instance of type "extract_output_params" -> structure:
           parameter "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN DiscoverMotifsFromFasta
        raise NotImplementedError(
            'Discovery of motifs from a FASTA file is not yet implemented.')
        #END DiscoverMotifsFromFasta

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method DiscoverMotifsFromFasta return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]

    def DiscoverMotifsFromSequenceSet(self, ctx, params):
        """
        :param params: instance of type "discover_seq_input" -> structure:
           parameter "workspace_name" of String, parameter "genome_ref" of
           String, parameter "SS_ref" of String, parameter "promoter_length"
           of Long, parameter "motif_min_length" of Long, parameter
           "motif_max_length" of Long, parameter "obj_name" of String,
           parameter "background" of Long, parameter "mask_repeats" of Long,
           parameter "background_group" of mapping from String to String
        :returns: instance of type "extract_output_params" -> structure:
           parameter "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN DiscoverMotifsFromSequenceSet

        if params['background_group'] == None:
            params['background_group'] = {'background': 0}

        findmotifsparams = {
            'workspace_name': params['workspace_name'],
            'motif_min_length': params['motif_min_length'],
            'motif_max_length': params['motif_max_length'],
            'SS_ref': params['SS_ref'],
            'obj_name': params['obj_name']
        }

        if params['background_group']['background'] == 1:
            findmotifsparams['background'] = 1
        else:
            findmotifsparams['background'] = 0

        output = self.find_motifs(ctx, findmotifsparams)[0]

        #END DiscoverMotifsFromSequenceSet
        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError(
                'Method DiscoverMotifsFromSequenceSet return value ' +
                'output is not type dict as required.')
        # return the results
        return [output]

    def status(self, ctx):
        #BEGIN_STATUS
        returnVal = {
            'state': "OK",
            'message': "",
            'version': self.VERSION,
            'git_url': self.GIT_URL,
            'git_commit_hash': self.GIT_COMMIT_HASH
        }
        #END_STATUS
        return [returnVal]
Esempio n. 4
0
 def __init__(self, config):
     self.scratch = config['scratch']
     self.GFU = GenomeFileUtil(os.environ['SDK_CALLBACK_URL'])
     self.DFU = DataFileUtil(os.environ['SDK_CALLBACK_URL'])
     self.SSU = SequenceSetUtils(os.environ['SDK_CALLBACK_URL'])
Esempio n. 5
0
class MotifUtil:
    def __init__(self, config):
        self.scratch = config['scratch']
        self.GFU = GenomeFileUtil(os.environ['SDK_CALLBACK_URL'])
        self.DFU = DataFileUtil(os.environ['SDK_CALLBACK_URL'])
        self.SSU = SequenceSetUtils(os.environ['SDK_CALLBACK_URL'])

    def build_sequence_fasta(self, params):
        self.seq_file = self.SSU.SeqSetToFasta({
            'ws_name': params['ws_name'],
            'SS_ref': params['seq_set_ref']
        })['path']

        if not os.path.exists(self.seq_file):
            raise FileNotFoundError(
                f'Sequence file: {self.seq_file} does not exist')

        return True

    def parse_motif_list(self, motiflist, params):
        self.build_sequence_fasta(params)
        MSO = {}

        MSO['Background'] = self.GetBackground(self.seq_file)

        MSO['Condition'] = 'Temp'
        MSO['SequenceSet_ref'] = params['seq_set_ref']
        MSO['Motifs'] = []
        MSO['Alphabet'] = ['A', 'C', 'G', 'T']

        for motif in motiflist:
            MSO['Motifs'].append(
                deepcopy(self.ConvertMotif(motif, MSO, self.seq_file)))

        return MSO

    def GetBackground(self, seqfile):
        count = 0
        sfile = open(seqfile)
        FreqDict = {'A': 0, 'G': 0, 'C': 0, 'T': 0}
        for line in sfile:
            if '> ' not in line and line != '\n':
                FreqDict['A'] += line.count('A')
                FreqDict['C'] += line.count('C')
                FreqDict['G'] += line.count('G')
                FreqDict['T'] += line.count('T')
            count += 1
        total = FreqDict['A'] + FreqDict['C'] + FreqDict['G'] + FreqDict['T']
        Background = {}

        Background['A'] = float(FreqDict['A']) / total
        Background['C'] = float(FreqDict['C']) / total
        Background['G'] = float(FreqDict['G']) / total
        Background['T'] = float(FreqDict['T']) / total

        return Background

    def BuildSetDict(self, seqfile):
        sfile = open(seqfile)
        seqDict = {}
        id = ''
        for line in sfile:
            if '> ' in line:
                id = line.replace('\n', '').replace('>', '').strip()
            elif line == '\n':
                pass
            else:
                seqDict[id] = line.replace('\n', '')
        return seqDict

    def ConvertMotif(self, motif, MotifSet, seqfile):
        newMotif = {}
        newMotif['Motif_Locations'] = []
        SeqDict = self.BuildSetDict(seqfile)
        for loc in motif['Locations']:
            new_loc = {}
            # new_loc['Feature_id'] = loc[0]
            new_loc['sequence_id'] = loc[0]
            new_loc['start'] = int(loc[1])
            new_loc['end'] = int(loc[2])
            new_loc['orientation'] = loc[3]
            new_loc['sequence'] = self.ExtractSequence(int(loc[1]),
                                                       int(loc[2]), loc[3],
                                                       loc[0], SeqDict)
            new_loc['sequence'] = ''
            newMotif['Motif_Locations'].append(new_loc.copy())
        newMotif['Iupac_sequence'] = motif['Iupac_signature']
        newMotif['PWM'] = {}
        newMotif['PFM'] = {}

        for letter in MotifSet['Alphabet']:
            newMotif['PWM'][letter] = []
            newMotif['PFM'][letter] = []
        for row in motif['pwm']:
            for pair in row:
                newMotif['PWM'][pair[0]].append(pair[1])

        return newMotif

    def ExtractSequence(self, start, end, orientation, id, SeqDict):
        complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N'}
        if orientation == '+':
            return SeqDict[id][start:end]
        else:
            tempseq = SeqDict[id][start:end]
            newSeq = ''
            for b in tempseq:
                newSeq += complement[b]
            newSeq = newSeq[::-1]
            return newSeq

        pass