Python validate_profile_infoの例

プログラミング言語: Python

名前空間/パッケージ名: sortseq_tools.qc

メソッド/関数: validate_profile_info

hotexamples.comのコード掲載数: 2

Python validate_profile_info - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsortseq_tools.qc.validate_profile_infoの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: test_profile_info.py プロジェクト: irelandb/sortseq_tools

    def test_profile_info(self):
        """ Test the ability of sortseq_tools.profile_info to compute mutation rates based on total count values
        """

        print '\nIn test_profile_info...'
        file_names = glob.glob(self.input_dir+'dataset_*.txt')
        for err in [True,False]:
            for file_name in file_names:
                print '\t%s, err=%s ='%(file_name,str(err)),
                description = file_name.split('_')[-1].split('.')[0]
                executable = lambda: \
                    profile_info.main(io.load_dataset(file_name),err=err)

                # If good, then profile_info.main should produce a valid df
                if '_good' in file_name:
                    try:
                        df = executable()
                        qc.validate_profile_info(df)
                        out_file = self.output_dir+\
                            'profile_info_%s_err_%s.txt'%(description,str(err))
                        io.write(df,out_file)
                        io.load_profile_info(out_file)
                        print 'good.'
                    except:
                        print 'bad (ERROR).'
                        raise

                # If bad, then profile_info.main should raise SortSeqError
                elif '_bad' in file_name:
                    try:
                        self.assertRaises(SortSeqError,executable)
                        print 'badtype.'
                    except:
                        print 'good (ERROR).'
                        raise

                # There are no other options
                else:
                    raise SortSeqError('Unrecognized class of file_name.')

コード例 #2

ファイルを表示

ファイル: profile_info.py プロジェクト: irelandb/sortseq_tools

def main(dataset_df, err=False, method='naive',\
    pseudocount=1.0, start=0, end=None):
    """
    Computes the mutual information (in bits), at each position, between the character and the bin number. 

    Arguments:
        dataset_df (pd.DataFrame): A dataframe containing a valid dataset.
        start (int): An integer specifying the sequence start position
        end (int): An integer specifying the sequence end position
        method (str): Which method to use to estimate mutual information

    Returns:
        info_df (pd.DataFrame): A dataframe containing results.
    """

    # Validate dataset_df
    qc.validate_dataset(dataset_df)

    # Get number of bins
    bin_cols = [c for c in dataset_df.columns if qc.is_col_type(c,'ct_')]
    if not len(bin_cols) >= 2:
        raise SortSeqError('Information profile requires at least 2 bins.')
    bins = [int(c.split('_')[1]) for c in bin_cols]
    num_bins = len(bins)

    # Get number of characters
    seq_cols = [c for c in dataset_df.columns if qc.is_col_type(c,'seqs')]
    if not len(seq_cols)==1:
        raise SortSeqError('Must be only one seq column.') 
    seq_col = seq_cols[0]
    seqtype = qc.colname_to_seqtype_dict[seq_col]
    alphabet = qc.seqtype_to_alphabet_dict[seqtype]
    ct_cols = ['ct_'+a for a in alphabet]
    num_chars = len(alphabet)

    # Get sequence length and check start, end numbers
    num_pos = len(dataset_df[seq_col][0])
    if not (0 <= start < num_pos):
        raise SortSeqError('Invalid start==%d, num_pos==%d'%(start,num_pos))
    if end is None:
        end = num_pos
    elif (end > num_pos):
        raise SortSeqError('Invalid end==%d, num_pos==%d'%(end,num_pos))
    elif end <= start:
        raise SortSeqError('Invalid: start==%d >= end==%d'%(start,end))

    # Record positions in new dataframe
    counts_df = profile_ct.main(dataset_df)
    info_df = counts_df.loc[start:(end-1),['pos']].copy() # rows from start:end
    info_df['info'] = 0.0
    if err:
        info_df['info_err'] = 0.0

    # Fill in 3D array of counts
    ct_3d_array = np.zeros([end-start, num_chars, num_bins])
    for i, bin_num in enumerate(bins):

        # Compute counts
        counts_df = profile_ct.main(dataset_df, bin=bin_num)

        # Fill in counts table
        ct_3d_array[:,:,i] = counts_df.loc[start:(end-1),ct_cols].astype(float)

    # Compute mutual information for each position
    for i in range(end-start): # i only from start:end

        # Get 2D counts
        nxy = ct_3d_array[i,:,:]
        assert len(nxy.shape) == 2

        # Compute mutual informaiton
        if err:
            mi, mi_err = info.estimate_mutualinfo(nxy,err=True,\
                method=method,pseudocount=pseudocount)
            info_df.loc[i+start,'info'] = mi
            info_df.loc[i+start,'info_err'] = mi_err
        else:
            mi = info.estimate_mutualinfo(nxy,err=False,\
                method=method,pseudocount=pseudocount)
            info_df.loc[i+start,'info'] = mi

    # Validate info dataframe
    info_df = qc.validate_profile_info(info_df,fix=True)
    return info_df