Example #1
0
def main(args):
    args = parse_args()

    # motif sites
    motifs = [80, 148, 289, 354, 363, 525, 626, 653, 747, 755, 796, 813, 874]
    # null sites
    nulls = [11, 62, 87, 218, 295, 371, 383, 457, 518, 740, 785, 805, 842, 866]
    all_sites = motifs + nulls

    # get the files
    tsvs = [x for x in glob.glob(args.files) if os.stat(x).st_size != 0]
    # container for stats
    stats = {}
    strand = args.strand
    for tsv in tsvs:
        motif_table = cull_motif_features4(motif=all_sites, tsv=tsv, strand=strand, feature_set="mean")
        if motif_table is False:
            continue
        for row in motif_table.iterrows():
            try:
                idx = str(row[1]['ref_pos'])
                d_mean = row[1]['delta_mean']
                stats[idx].append(d_mean)
            except KeyError:
                idx = str(row[1]['ref_pos'])
                d_mean = row[1]['delta_mean']
                stats[idx] = [d_mean]
    cPickle.dump(stats, open(args.out, 'w'))
Example #2
0
import sys
sys.path.append("../")
from lib.utils import cull_motif_features4, collect_data_vectors2

tsv_t = "../../marginAlign/cPecan/tests/temp/tempFiles_alignment/" \
        "makeson_PC_MA_286_R7.3_ZYMO_C_1_09_11_15_1714_1_ch1_file1_strand.vl.forward.tsv"

aln = "../marginAlign/cPecan/tests/temp/tempFiles_alignment/*.tsv"

aln2 = "../marginAlign/cPecan/tests/test_alignments/newf_conditional_model/C/tempFiles_alignment/*.forward.tsv"

m = [300, 747]
dst = "all"
strand = "t"

features = cull_motif_features4(m, tsv_t, strand, feature_set=dst, kmer_length=6)
print features.ix[features['ref_pos'] == 300]
print features.ix[features['ref_pos'] == 300]['delta_mean']

#events = []
#for strand in ["t", "c"]:
#    events += list(chain(
#                         *features.ix[(features['ref_pos'] == 300) & (features['strand'] == strand)]
#                         .drop(['ref_pos', 'strand'], 1)[:1].values.tolist()))

'''
tr, xtr, ts = collect_data_vectors2(events_per_pos=1,
                                    label=0,
                                    portion=0.5,
                                    files=aln,
                                    strand=strand,