Ejemplo n.º 1
0
    def __call__(self, seqs, all_seqs):
        """call the runner like a function"""
        with tempfile.NamedTemporaryFile(prefix='weeder.fasta',
                                         delete=False) as outfile:
            filename = outfile.name
            logging.info("Run Weeder on FASTA file: '%s'", filename)
            st.write_sequences_to_fasta_file(outfile, seqs.items())

        pssms = weeder.run_weeder(filename)
        meme_outfile = '%s.meme' % filename
        dbfile = self.meme_suite.make_sequence_file(
            [(feature_id, locseq[1])
             for feature_id, locseq in all_seqs.items()])
        logging.info("# PSSMS created: %d", len(pssms))
        logging.info("run MAST on '%s'", meme_outfile)

        motif_infos = []
        for i in xrange(len(pssms)):
            pssm = pssms[i]
            motif_infos.append(meme.MemeMotifInfo(pssm.values(), i + 1,
                                                  pssm.sequence_length(),
                                                  len(pssm.sites()),
                                                  None, pssm.evalue(),
                                                  pssm.sites()))

        try:
            mast_out = self.meme_suite.mast(
                meme_outfile, dbfile,
                self.meme_suite.global_background_file())
            pe_values, annotations = meme.read_mast_output(mast_out,
                                                           seqs.keys())
            return meme.MemeRunResult(pe_values, annotations, motif_infos)
        except:
            return meme.MemeRunResult([], {}, [])
Ejemplo n.º 2
0
 def test_write_sequences_to_fasta_file(self):
     """Tests writing to a FASTA file"""
     seqs = st.read_sequences_from_fasta_file('testdata/fasta_test.fa')
     with open('/tmp/fasta_tmp.fa', 'w') as outputfile:
         st.write_sequences_to_fasta_file(outputfile, seqs)
     seqs2 = st.read_sequences_from_fasta_file('/tmp/fasta_tmp.fa')
     self.assertEquals(seqs, seqs2)
 def test_write_sequences_to_fasta_file(self):
     """Tests writing to a FASTA file"""
     seqs = st.read_sequences_from_fasta_file('testdata/fasta_test.fa')
     with open('/tmp/fasta_tmp.fa', 'w') as outputfile:
         st.write_sequences_to_fasta_file(outputfile, seqs)
     seqs2 = st.read_sequences_from_fasta_file('/tmp/fasta_tmp.fa')
     self.assertEquals(seqs, seqs2)
Ejemplo n.º 4
0
 def make_sequence_file(self, seqs):
     """Creates a FASTA file from a list of(feature_id, sequence)
     pairs"""
     filename = None
     with tempfile.NamedTemporaryFile(prefix='memeseqs',
                                      delete=False) as outfile:
         filename = outfile.name
         st.write_sequences_to_fasta_file(outfile, seqs)
     return filename
Ejemplo n.º 5
0
 def make_sequence_file(self, seqs):
     """Creates a FASTA file from a list of(feature_id, sequence)
     pairs"""
     filename = None
     with tempfile.NamedTemporaryFile(prefix='memeseqs',
                                      delete=False) as outfile:
         filename = outfile.name
         st.write_sequences_to_fasta_file(outfile, seqs)
     return filename
Ejemplo n.º 6
0
    def __call__(self, params):
        """call the runner like a function"""
        with tempfile.NamedTemporaryFile(prefix='weeder.fasta',
                                         delete=False) as outfile:
            filename = outfile.name
            logging.debug("Run Weeder on FASTA file: '%s'", filename)
            st.write_sequences_to_fasta_file(outfile, params.seqs.iteritems())

        try:
            dbfile = None
            meme_outfile, pssms = weeder.run_weeder(filename, params, self.config_params,
                                                    self.meme_suite.bgmodel)
            if len(pssms) == 0:
                logging.debug('no PSSMS generated, skipping cluster')
                return meme.MemeRunResult([], {}, [])

            dbfile = self.meme_suite.make_sequence_file(
                [(feature_id, locseq[1])
                 for feature_id, locseq in params.used_seqs.iteritems()])
            logging.debug("# PSSMS created: %d %s", len(pssms), str([i.consensus_motif() for i in pssms]))
            logging.debug("run MAST on '%s', dbfile: '%s'", meme_outfile, dbfile)

            motif_infos = []
            for i in xrange(len(pssms)):
                pssm = pssms[i]
                motif_infos.append(meme.MemeMotifInfo(pssm.values, i + 1,
                                                      pssm.sequence_length(),
                                                      len(pssm.sites),
                                                      None, pssm.e_value,
                                                      pssm.sites))
            mast_out = self.meme_suite.mast(meme_outfile, dbfile,
                                            self.meme_suite.global_background_file())
            if 'keep_mastout' in self.config_params['debug']:
                with open('%s.mast' % meme_outfile, 'w') as outfile:
                    outfile.write(mast_out)
            pe_values, annotations = self.meme_suite.read_mast_output(mast_out,
                                                                      params.seqs.keys())
            return meme.MemeRunResult(pe_values, annotations, motif_infos)
        except:
            e = sys.exc_info()[0]
            print e
            raise
            return meme.MemeRunResult([], {}, [])
        finally:
            if self.__remove_tempfiles:
                for fileExtension in ['', '.wee', '.mix', '.html', '.meme', '.1.f1', '.1.f2', '.2.f1', '.2.f2']:
                    tmpName = filename + fileExtension
                    if os.path.exists(tmpName):
                        try:
                            os.remove(tmpName)
                        except:
                            logging.warn("could not remove tmp file:'%s'", tmpName)
                try:
                    if dbfile:
                        os.remove(dbfile)
                except:
                    logging.warn("could not remove tmp file:'%s'", dbfile)
Ejemplo n.º 7
0
 def test_write_sequences_to_fasta_file_empty_seqs(self):
     """Tests ensures that only non-empty sequences will be written to FASTA"""
     seqs = [['seq1', 'TATATA'], ['seq2', '']]
     with open('/tmp/fasta_tmp.fa', 'w') as outputfile:
         st.write_sequences_to_fasta_file(outputfile, seqs)
     seqs2 = st.read_sequences_from_fasta_file('/tmp/fasta_tmp.fa')
     self.assertEquals(1, len(seqs2))
     self.assertEquals(seqs[0][0], seqs2[0][0])
     self.assertEquals(seqs[0][1], seqs2[0][1])
 def test_write_sequences_to_fasta_file_empty_seqs(self):
     """Tests ensures that only non-empty sequences will be written to FASTA"""
     seqs = [['seq1', 'TATATA'], ['seq2', '']]
     with open('/tmp/fasta_tmp.fa', 'w') as outputfile:
         st.write_sequences_to_fasta_file(outputfile, seqs)
     seqs2 = st.read_sequences_from_fasta_file('/tmp/fasta_tmp.fa')
     self.assertEquals(1, len(seqs2))
     self.assertEquals(seqs[0][0], seqs2[0][0])
     self.assertEquals(seqs[0][1], seqs2[0][1])
Ejemplo n.º 9
0
    def __call__(self, params):
        """call the runner like a function"""
        with tempfile.NamedTemporaryFile(prefix='weeder.fasta',
                                         delete=False) as outfile:
            filename = outfile.name
            logging.info("Run Weeder on FASTA file: '%s'", filename)
            st.write_sequences_to_fasta_file(outfile, params.seqs.items())

        pssms = weeder.run_weeder(filename)
        meme_outfile = '%s.meme' % filename
        dbfile = self.meme_suite.make_sequence_file([
            (feature_id, locseq[1])
            for feature_id, locseq in params.used_seqs.items()
        ])
        logging.info("# PSSMS created: %d %s", len(pssms),
                     str([i.consensus_motif() for i in pssms]))
        logging.info("run MAST on '%s'", meme_outfile)

        motif_infos = []
        for i in xrange(len(pssms)):
            pssm = pssms[i]
            motif_infos.append(
                meme.MemeMotifInfo(pssm.values, i + 1, pssm.sequence_length(),
                                   len(pssm.sites), None, pssm.e_value,
                                   pssm.sites))

        try:
            mast_out = self.meme_suite.mast(
                meme_outfile, dbfile, self.meme_suite.global_background_file())
            pe_values, annotations = meme.read_mast_output(
                mast_out, params.seqs.keys())
            return meme.MemeRunResult(pe_values, annotations, motif_infos)
        except:
            return meme.MemeRunResult([], {}, [])
        finally:
            if self.__remove_tempfiles:
                for fileExtension in [
                        '', '.wee', '.mix', '.html', '.meme', '.1.f1', '.1.f2',
                        '.2.f1', '.2.f2'
                ]:
                    tmpName = filename + fileExtension
                    if os.path.exists(tmpName):
                        try:
                            os.remove(tmpName)
                        except:
                            logging.warn("could not remove tmp file:'%s'",
                                         tmpName)
            try:
                os.remove(dbfile)
            except:
                logging.warn("could not remove tmp file:'%s'", dbfile)
Ejemplo n.º 10
0
    def __call__(self, params):
        """call the runner like a function"""
        with tempfile.NamedTemporaryFile(prefix='weeder.fasta',
                                         delete=False) as outfile:
            filename = outfile.name
            logging.info("Run Weeder on FASTA file: '%s'", filename)
            st.write_sequences_to_fasta_file(outfile, params.seqs.items())
        if seqtype=='upstream':
            freqfile = 'HS'
        elif seqtype=='p3utr':
            freqfile = 'HS3P'
        pssms = weeder.run_weeder(filename, freqfile)
        meme_outfile = '%s.meme' % filename
        dbfile = self.meme_suite.make_sequence_file(
            [(feature_id, locseq[1])
             for feature_id, locseq in params.used_seqs.items()])
        logging.info("# PSSMS created: %d %s", len(pssms),str([i.consensus_motif() for i in pssms]))
        logging.info("run MAST on '%s'", meme_outfile)

        motif_infos = []
        for i in xrange(len(pssms)):
            pssm = pssms[i]
            motif_infos.append(meme.MemeMotifInfo(pssm.values, i + 1,
                                                  pssm.sequence_length(),
                                                  len(pssm.sites),
                                                  None, pssm.e_value,
                                                  pssm.sites))

        try:
            mast_out = self.meme_suite.mast(
                meme_outfile, dbfile,
                self.meme_suite.global_background_file())
            pe_values, annotations = meme.read_mast_output(mast_out,
                                                           params.seqs.keys())
            return meme.MemeRunResult(pe_values, annotations, motif_infos)
        except:
            return meme.MemeRunResult([], {}, [])
        finally:
            if self.__remove_tempfiles:
                for fileExtension in ['','.wee','.mix','.html','.meme','.1.f1','.1.f2','.2.f1','.2.f2']:
                    tmpName = filename+fileExtension
                    if os.path.exists(tmpName):
                        try:
                            os.remove(tmpName)
                        except:
                            logging.warn("could not remove tmp file:'%s'", tmpName)
            try:
                os.remove(dbfile)
            except:
                logging.warn("could not remove tmp file:'%s'", dbfile)
Ejemplo n.º 11
0
def make_sequences( genome_fasta_file, gene_features_file,
        outfile='sequences.csv', distance={'upstream':300,'downstream':100}, from_end=False, fasta=False ):

    if from_end:
        distance = ( distance['upstream'], distance['downstream'] )
    else:
        '''WARNING: as of 2012-03-22, the st.extract functions used flipped distances!
           e.g. distance[1] is the UPSTREAM distance and distance[0] is the DOWNSTREAM
           CHECK YOUR SEQUENCES after running this! Also, a negative number is expected for
           DOWNSTREAM. So, (-100,300) must be passed to st.extract_upstream in order to get
           a sequence from 300 upstream to 100 downstream. WEIRD!'''
        distance = (-1*distance['downstream'],distance['upstream'])

    contig_sequences = st.read_sequences_from_fasta_file( genome_fasta_file )
    # convert contig_sequences to dictionary (this func returns a list of tuples)
    contig_dict = {}
    for name, seq in contig_sequences:
        contig_dict[name] = seq
    print 'loaded %i contigs' %len(contig_dict)
    print string.join( [ '%s: %ibp' %(a,len(b)) for a,b in contig_dict.items()] , ',' )

    features = st.read_features_from_file( gene_features_file )
    print 'loaded %i features' %len(features)
#    print str(features.values()[1])

    sequences = []
    for feature in features.values():
        location = feature.location()
#        print location, location.contig, distance, feature.id()
        if from_end:
            sequences.append( ( feature.id(), st.extract_downstream(contig_dict[location.contig], location, distance)[1] ) )
        else:
            sequences.append( ( feature.id(), st.extract_upstream(contig_dict[location.contig], location, distance)[1] ) )
#        print sequences[feature.id()]

    outf = open(outfile,'w')
    if fasta: st.write_sequences_to_fasta_file(outf,sequences)
    else:
        sep = ','
        for id, seq in sequences:
            outf.write( '%s%s%s\n' %(id,sep,seq) )
    outf.close()
Ejemplo n.º 12
0
def make_sequences( genome_fasta_file, gene_features_file,
        outfile='sequences.csv', distance={'upstream':300,'downstream':100}, from_end=False, fasta=False ):

    if from_end:
        distance = ( distance['upstream'], distance['downstream'] )
    else:
        '''WARNING: as of 2012-03-22, the st.extract functions used flipped distances!
           e.g. distance[1] is the UPSTREAM distance and distance[0] is the DOWNSTREAM
           CHECK YOUR SEQUENCES after running this! Also, a negative number is expected for
           DOWNSTREAM. So, (-100,300) must be passed to st.extract_upstream in order to get
           a sequence from 300 upstream to 100 downstream. WEIRD!'''
        distance = (-1*distance['downstream'],distance['upstream'])

    contig_sequences = st.read_sequences_from_fasta_file( genome_fasta_file )
    # convert contig_sequences to dictionary (this func returns a list of tuples)
    contig_dict = {}
    for name, seq in contig_sequences:
        contig_dict[name] = seq
    print 'loaded %i contigs' %len(contig_dict)
    print string.join( [ '%s: %ibp' %(a,len(b)) for a,b in contig_dict.items()] , ',' )

    features = st.read_features_from_file( gene_features_file )
    print 'loaded %i features' %len(features)
#    print str(features.values()[1])

    sequences = []
    for feature in features.values():
        location = feature.location
#        print location, location.contig, distance, feature.id
        if from_end:
            sequences.append( ( feature.id, st.extract_downstream(contig_dict[location.contig], location, distance)[1] ) )
        else:
            sequences.append( ( feature.id, st.extract_upstream(contig_dict[location.contig], location, distance)[1] ) )
#        print sequences[feature.id]

    outf = open(outfile,'w')
    if fasta: st.write_sequences_to_fasta_file(outf,sequences)
    else:
        sep = ','
        for id, seq in sequences:
            outf.write( '%s%s%s\n' %(id,sep,seq) )
    outf.close()