Python readFeaturesData Examples

Programming Language: Python

Namespace/Package Name: predictor.features

Method/Function: readFeaturesData

Examples at hotexamples.com: 2

Python readFeaturesData - 2 examples found. These are the top rated real world Python examples of predictor.features.readFeaturesData extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def loadOligoFeaturesAndReadCounts(oligo_id, sample_names):

    oligo_idx = getOligoIdxFromId(oligo_id)
    oligo_subdir, _ = getFileForOligoIdx(oligo_idx, ext='')

    features_file = FEATURES_DIR + '/' + oligo_subdir + '/%s_gen_indel_features.txt' % oligo_id
    reads_file = READS_DIR + '/' + oligo_subdir + '/%s_gen_indel_reads.txt' % oligo_id

    cut_site = getCutSite(features_file)
    indel_feature_data, feature_cols = readFeaturesData(features_file)

    if len(sample_names) > 0:
        read_data = pd.read_csv(reads_file, skiprows=1, sep='\t')
        read_data['Sum Sample Reads'] = read_data[sample_names].sum(
            axis=1) + 0.5
        read_data = read_data.loc[read_data['Indel'] != 'All Mutated']
        total_mut_reads = read_data['Sum Sample Reads'].sum()
        if total_mut_reads == 0:
            raise Exception('No Mutated Reads in %s' % reads_file)
        read_data['Frac Sample Reads'] = read_data[
            'Sum Sample Reads'] / total_mut_reads
        merged_data = pd.merge(indel_feature_data,
                               read_data[['Indel', 'Frac Sample Reads']],
                               left_index=True,
                               right_on='Indel',
                               how='inner')
    else:
        merged_data = indel_feature_data
        merged_data['Indel'] = merged_data.index

    return merged_data

Example #2

Show file

File: predict.py Project: zhaijj/SelfTarget

def predictMutations(theta_file, target_seq, pam_idx, add_null=True):

    theta, train_set, theta_feature_columns = readTheta(theta_file)

    #generate indels
    left_trim = 0
    tmp_genindels_file = 'tmp_genindels_%s_%d.txt' % (target_seq, random.randint(0,100000))
    cmd = INDELGENTARGET_EXE + ' %s %d %s' % (target_seq, pam_idx, tmp_genindels_file)
    print(cmd); subprocess.check_call(cmd.split())
    rep_reads = fetchRepReads(tmp_genindels_file)
    isize, smallest_indel = min([(tokFullIndel(x)[1],x) for x in rep_reads]) if len(rep_reads) > 0 else (0,'-') 
    if isize > 0: left_trim = target_seq.find(rep_reads[smallest_indel][:10])

    #compute features for all generated indels
    tmp_features_file = 'tmp_features_%s_%d.txt' % (target_seq, random.randint(0,100000))
    calculateFeaturesForGenIndelFile( tmp_genindels_file, target_seq, pam_idx-3, tmp_features_file)
    os.remove(tmp_genindels_file)
    feature_data, feature_columns = readFeaturesData(tmp_features_file)
    os.remove(tmp_features_file)

    if len(set(theta_feature_columns).difference(set(feature_columns))) != 0:
        raise Exception('Stored feature names associated with model thetas are not contained in those computed')

    if len(set(theta_feature_columns).union(set(feature_columns))) != len(theta_feature_columns):
        feature_data = feature_data[['Indel'] + theta_feature_columns]
        feature_columns = theta_feature_columns

    #Predict the profile
    p_predict, _ = computePredictedProfile(feature_data, theta, theta_feature_columns)
    in_frame, out_frame, _ = fetchIndelSizeCounts(p_predict)
    in_frame_perc = in_frame*100.0/(in_frame + out_frame)
    if add_null:
        p_predict['-'] = 1000
        rep_reads['-'] = target_seq[left_trim:]
    return p_predict, rep_reads, in_frame_perc