Python computePredictedProfile Examples

Programming Language: Python

Namespace/Package Name: predictor.model

Method/Function: computePredictedProfile

Examples at hotexamples.com: 3

Python computePredictedProfile - 3 examples found. These are the top rated real world Python examples of predictor.model.computePredictedProfile extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: predict.py Project: zhaijj/SelfTarget

def predictMutations(theta_file, target_seq, pam_idx, add_null=True):

    theta, train_set, theta_feature_columns = readTheta(theta_file)

    #generate indels
    left_trim = 0
    tmp_genindels_file = 'tmp_genindels_%s_%d.txt' % (target_seq, random.randint(0,100000))
    cmd = INDELGENTARGET_EXE + ' %s %d %s' % (target_seq, pam_idx, tmp_genindels_file)
    print(cmd); subprocess.check_call(cmd.split())
    rep_reads = fetchRepReads(tmp_genindels_file)
    isize, smallest_indel = min([(tokFullIndel(x)[1],x) for x in rep_reads]) if len(rep_reads) > 0 else (0,'-') 
    if isize > 0: left_trim = target_seq.find(rep_reads[smallest_indel][:10])

    #compute features for all generated indels
    tmp_features_file = 'tmp_features_%s_%d.txt' % (target_seq, random.randint(0,100000))
    calculateFeaturesForGenIndelFile( tmp_genindels_file, target_seq, pam_idx-3, tmp_features_file)
    os.remove(tmp_genindels_file)
    feature_data, feature_columns = readFeaturesData(tmp_features_file)
    os.remove(tmp_features_file)

    if len(set(theta_feature_columns).difference(set(feature_columns))) != 0:
        raise Exception('Stored feature names associated with model thetas are not contained in those computed')

    if len(set(theta_feature_columns).union(set(feature_columns))) != len(theta_feature_columns):
        feature_data = feature_data[['Indel'] + theta_feature_columns]
        feature_columns = theta_feature_columns

    #Predict the profile
    p_predict, _ = computePredictedProfile(feature_data, theta, theta_feature_columns)
    in_frame, out_frame, _ = fetchIndelSizeCounts(p_predict)
    in_frame_perc = in_frame*100.0/(in_frame + out_frame)
    if add_null:
        p_predict['-'] = 1000
        rep_reads['-'] = target_seq[left_trim:]
    return p_predict, rep_reads, in_frame_perc

Example #2

Show file

def computeAndComparePredicted(theta_file,
                               selected_id=None,
                               out_dir='.',
                               start_count=0,
                               end_count=10000):

    features_dir = getHighDataDir() + '/gen_indels/features_for_gen_indels'
    theta, train_set, feature_columns = readTheta(theta_file)

    new_sep_labels = 'New 2x800x', 'New 1600x'
    old_sep_labels = 'Old 2x800x', 'Old 1600x'

    #Note: here old refers to conventional scaffold library, new refers to improved scaffold library
    fout = io.open(
        out_dir + '/old_new_kl_predicted_summaries.txt' %
        (start_count, end_count), 'w')
    fout.write(
        u'Old Oligo Id\tNew Oligo Id\tOld Mut Reads\tNew Mut Reads\tCombined Mut Reads\t'
    )
    fout.write(u'\t'.join('%s Mut Reads' % x.split('/')[-1]
                          for x in new_sep_labels + old_sep_labels))
    fout.write(
        u'\tOld In Frame Perc\tNew In Frame Perc\tCombined in Frame Perc\tPredicted In Frame Per\t'
    )
    fout.write(u'\t'.join('%s In Frame Perc' % x.split('/')[-1]
                          for x in new_sep_labels + old_sep_labels))
    fout.write(
        u'\tOld v New KL\tOld v Predicted KL\tNew v Predicted KL\tCombined v Predicted KL\t'
    )
    fout.write(u'\t'.join('%s vs Predicted KL' % x.split('/')[-1]
                          for x in new_sep_labels + old_sep_labels) + '\t')
    fout.write(u'\t'.join([
        '%s vs %s KL' % (x.split('/')[-1], y.split('/')[-1])
        for x, y in (getCombs(new_sep_labels) + getCombs(old_sep_labels))
    ]) + '\n')

    id_pairs = loadValidationPairs()
    for (old_id, new_id) in id_pairs:
        if old_id in train_set or new_id in train_set:
            raise Exception('Bad!!! Testing on Training data: %s %s' %
                            (old_id, new_id))

        if selected_id is not None and selected_id != old_id:
            continue  #Guide pair selected for plotting

        #Load Old and new profiles, and produce combined profile from the two
        p_old, p_new, mut_reads_old, mut_reads_new = loadProfilePair(
            old_id, new_id)
        p_comb, mut_reads_comb = combineProfiles(p_old, p_new, mut_reads_old,
                                                 mut_reads_new)

        #Predict the profile (old and new will be the same so just do one)
        feature_data = loadOligoFeaturesAndReadCounts(new_id, [])
        p_predict, _ = computePredictedProfile(feature_data, theta,
                                               feature_columns)

        #Load separate profiles too
        p_old_sep, p_new_sep, old_sep_mr, new_sep_mr = loadProfilesSeparately(
            old_id, new_id)

        #Compute in frame percentages
        old_if_perc = getInFramePerc(p_old)
        new_if_perc = getInFramePerc(p_new)
        comb_if_perc = getInFramePerc(p_comb)
        pred_if_perc = getInFramePerc(p_predict)
        new_sep_if_percs = [
            getInFramePerc(profile) if len(profile) > 1 else -1
            for profile in p_new_sep
        ]
        old_sep_if_percs = [
            getInFramePerc(profile) if len(profile) > 1 else -1
            for profile in p_old_sep
        ]

        #Plot the comparison
        if selected_id is not None:
            rrds = loadRepReads(new_id)
            plotProfiles([p_new_sep[0], p_new_sep[1], p_predict],
                         [rrds, rrds, rrds], [56, 56, 56],
                         [False, False, False],
                         ['Replicate 1', 'Replicate 2', 'Predicted'],
                         title='%s (KL=%.2f, KL=%.2f)' %
                         (new_id, symmetricKL(p_new_sep[0], p_new_sep[1]),
                          symmetricKL(p_new, p_predict)))

        str_args = (symmetricKL(p_old, p_new), symmetricKL(p_old, p_predict),
                    symmetricKL(p_new,
                                p_predict), symmetricKL(p_comb, p_predict))
        kl_str = u'\t%.5f\t%.5f\t%.5f\t%.5f\t' % str_args
        kl_str += u'\t'.join([
            '%.5f' % symmetricKL(p_predict, x) for x in p_new_sep + p_old_sep
        ])
        kl_str += u'\t' + u'\t'.join([
            '%.5f' % symmetricKL(x, y)
            for (x, y) in (getCombs(p_new_sep) + getCombs(p_old_sep))
        ])
        if_str = u'\t'.join(
            ['%.3f' % x for x in new_sep_if_percs + old_sep_if_percs])
        mut_str = u'\t'.join(['%d' % x for x in new_sep_mr + old_sep_mr])
        fout.write(u'%s\t%s\t%d\t%d\t%d\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%s%s\n' %
                   (old_id, new_id, mut_reads_old, mut_reads_new,
                    mut_reads_comb, mut_str, old_if_perc, new_if_perc,
                    comb_if_perc, pred_if_perc, if_str, kl_str))
        fout.flush()
    fout.close()

Example #3

Show file

def computeAndComparePredicted(theta_file, selected_id=None, out_dir='.'):

    features_dir = getHighDataDir() + '/gen_indels/features_for_gen_indels'
    theta, train_set, feature_columns = readTheta(theta_file)

    #Note: here old refers to conventional scaffold library, new refers to improved scaffold library
    fout = io.open(out_dir + '/old_new_kl_predicted_summaries.txt', 'w')
    fout.write(
        u'Old Oligo Id\tNew Oligo Id\tOld Mut Reads\tNew Mut Reads\tCombined Mut Reads\tOld In Frame Perc\tNew In Frame Perc\tCombined in Frame Perc\tPredicted In Frame Per'
    )
    fout.write(
        u'\tOld v New KL\tOld v Predicted KL\tNew v Predicted KL\tCombined v Predicted KL\n'
    )

    id_pairs = loadValidationPairs()
    for (old_id, new_id) in id_pairs:
        if old_id in train_set or new_id in train_set:
            raise Exception('Bad!!! Testing on Training data: %s %s' %
                            (old_id, new_id))

        if selected_id is not None and selected_id != old_id:
            continue  #Guide pair selected for plotting

        #Load Old and new profiles, and produce combined profile from the two
        p_old, p_new, mut_reads_old, mut_reads_new = loadProfilePair(
            old_id, new_id)
        p_comb, mut_reads_comb = combineProfiles(p_old, p_new, mut_reads_old,
                                                 mut_reads_new)

        #Predict the profile (old and new will be the same so just do one)
        feature_data = loadOligoFeaturesAndReadCounts(new_id, [])
        p_predict, _ = computePredictedProfile(feature_data, theta,
                                               feature_columns)

        #Compute in frame percentages
        old_if, old_of, _ = fetchIndelSizeCounts(p_old)
        new_if, new_of, _ = fetchIndelSizeCounts(p_new)
        comb_if, comb_of, _ = fetchIndelSizeCounts(p_comb)
        pred_if, pred_of, _ = fetchIndelSizeCounts(p_predict)
        old_if_perc = old_if * 100.0 / (old_if + old_of)
        new_if_perc = new_if * 100.0 / (new_if + new_of)
        comb_if_perc = comb_if * 100.0 / (comb_if + comb_of)
        pred_if_perc = pred_if * 100.0 / (pred_if + pred_of)

        #Plot the comparison
        if selected_id is not None:
            rrds = loadRepReads(new_id)
            plotProfiles([p_old, p_new, p_predict], [rrds, rrds, rrds],
                         [42, 42, 42], [False, False, False],
                         ['Replicate 1', 'Replicate 2', 'Predicted'],
                         title='%s (KL=%.2f, KL=%.2f)' %
                         (new_id, symmetricKL(
                             p_old, p_new), symmetricKL(p_comb, p_predict)))

        str_args = (symmetricKL(p_old, p_new), symmetricKL(p_old, p_predict),
                    symmetricKL(p_new,
                                p_predict), symmetricKL(p_comb, p_predict))
        kl_str = u'\t%.5f\t%.5f\t%.5f\t%.5f' % str_args
        fout.write(
            u'%s\t%s\t%d\t%d\t%d\t%.3f\t%.3f\t%.3f\t%.3f%s\n' %
            (old_id, new_id, mut_reads_old, mut_reads_new, mut_reads_comb,
             old_if_perc, new_if_perc, comb_if_perc, pred_if_perc, kl_str))
        fout.flush()
    fout.close()