Example #1
0
    def append_pfm_to_database(self, meme_db, trim_frac=0.08, freqs=[0.27, 0.23, 0.23, 0.27]):
        """write the pfm into a meme file to be used by motif search tools
        """
        motif_name = "PWM_{}".format(self.short_name)
        if(os.path.exists(meme_db)):
            with open(meme_db, "r") as fp:
                for line in fp:
                    if(motif_name in line):
                        print("{} already exists".format(motif_name))
                        return None
        else:
            with open(meme_db, "w") as fp:
                fp.write("MEME version 4\n\nALPHABET= ACGT\n\nstrands: + -\n\nBackground letter frequencies\n\n")
                fp.write("A {0} C {1} G {2} T {3}\n\n".format(freqs[0], freqs[1], freqs[2], freqs[3]))

        i, j = trim_pssm_idx(self.get_seq_ic(), frac=trim_frac)
        trimmed_pattern = self.trim(i, j)
        pfm = trimmed_pattern.seq
        with open(meme_db, 'a') as fp:
            fp.write("MOTIF {}\n".format(motif_name))
            fp.write("letter-probability matrix: alength= 4 w= {} nsites= 20 E= 0e+0\n".format(pfm.shape[0]))
            for line in pfm:
                fp.write('%.5f %.5f %.5f %.5f\n' % tuple(line))
            fp.write("\n")
        return None
Example #2
0
    def fetch_tomtom_matches(self, background=[0.27, 0.23, 0.23, 0.27],
                             tomtom_exec_path='tomtom',
                             motifs_db='HOCOMOCOv11_full_HUMAN_mono_meme_format.meme',
                             save_report=False,
                             report_dir='./',
                             temp_dir='./',
                             trim_frac=0.08):
        """Fetches top matches from a motifs database using TomTom.
        Args:
            background: list with ACGT background probabilities
            tomtom_exec_path: path to TomTom executable
            motifs_db: path to motifs database in meme format
            n: number of top matches to return, ordered by p-value
            temp_dir: directory for storing temp files
            trim_threshold: the ppm is trimmed from left till first position for which
                probability for any base pair >= trim_threshold. Similarly from right. 
        Returns:
            list: a list of up to n results returned by tomtom, each entry is a
                dictionary with keys 'Target ID', 'p-value', 'E-value', 'q-value'  
        """
        fname = os.path.join(temp_dir, 'query_file')
        # trim and prepare meme file
        i, j = trim_pssm_idx(self.get_seq_ic(), frac=trim_frac)
        trimmed_pattern = self.trim(i, j)
        trimmed_pattern.write_meme_file(background, fname)

        # run tomtom
        if(save_report):
            cmd = '{0} -no-ssc -oc {1} -verbosity 1 -min-overlap 5 -mi 1 -dist pearson -evalue -thresh 10.0 {2} {3}'.format(tomtom_exec_path, report_dir, fname, motifs_db)
            print(cmd)
            out = subprocess.check_output(cmd, shell=True)
            df = pd.read_table("{}/tomtom.tsv".format(report_dir))
            df = df[['Target_ID', 'p-value', 'E-value', 'q-value']]
            schema = list(df.columns)
            dat = df.get_values()
        else:
            cmd = "{0} -no-ssc -oc . -verbosity 1 -text -min-overlap 5 -mi 1 -dist pearson -evalue -thresh 10.0 {1} {2}".format(tomtom_exec_path, fname, motifs_db)
            print(cmd)
            out = subprocess.check_output(cmd, shell=True)
            dat = [x.split('\t') for x in out.strip().decode("utf-8").split('\n')]
            schema = dat[0]
            dat = dat[1:]

        tget_idx, pval_idx, eval_idx, qval_idx = schema.index('Target_ID'), schema.index('p-value'), schema.index('E-value'), schema.index('q-value')
        r = []
        for t in dat:
            if(len(t) < 4):
                break
            mtf = {}
            mtf['Target ID'] = t[tget_idx]
            mtf['p-value'] = float(t[pval_idx])
            mtf['E-value'] = float(t[eval_idx])
            mtf['q-value'] = float(t[qval_idx])
            # if(mtf['q-value']<0.001):
            #    break
            r.append(mtf)

        os.system('rm ' + fname)
        return r
Example #3
0
    def append_pwm_to_database(self, pwm_db, trim_frac=0.08):
        """write a the pwm into a database to be used by motif search tools

        """
        pwm_name = "PWM_{}".format(self.short_name)
        if(os.path.exists(pwm_db)):  # search whether the pwm already exists in the database
            with open(pwm_db, 'r') as fp:
                for line in fp:
                    if(pwm_name in line):
                        print("{} already exists".format(pwm_name))
                        return None

        i, j = trim_pssm_idx(self.get_seq_ic(), frac=trim_frac)
        trimmed_pattern = self.trim(i, j)
        pssm = trimmed_pattern.get_seq_ic()
        with open(pwm_db, 'a') as fp:
            fp.write(">{}\n".format(pwm_name))
            for line in pssm:
                fp.write('%.5f %.5f %.5f %.5f\n' % tuple(line))

        return None
Example #4
0
 def get_trim_idx(self, pattern):
     """Return the trimming indices
     """
     return trim_pssm_idx(self.mr.get_pssm(*pattern.split("/")), frac=self.trim_frac)
Example #5
0
 def _trim_seq_ic_ij(self, trim_frac=0.0):
     return trim_pssm_idx(self.get_seq_ic(), frac=trim_frac)