def main(args): args = parse_args() # motif sites motifs = [80, 148, 289, 354, 363, 525, 626, 653, 747, 755, 796, 813, 874] # null sites nulls = [11, 62, 87, 218, 295, 371, 383, 457, 518, 740, 785, 805, 842, 866] all_sites = motifs + nulls # get the files tsvs = [x for x in glob.glob(args.files) if os.stat(x).st_size != 0] # container for stats stats = {} strand = args.strand for tsv in tsvs: motif_table = cull_motif_features4(motif=all_sites, tsv=tsv, strand=strand, feature_set="mean") if motif_table is False: continue for row in motif_table.iterrows(): try: idx = str(row[1]['ref_pos']) d_mean = row[1]['delta_mean'] stats[idx].append(d_mean) except KeyError: idx = str(row[1]['ref_pos']) d_mean = row[1]['delta_mean'] stats[idx] = [d_mean] cPickle.dump(stats, open(args.out, 'w'))
import sys sys.path.append("../") from lib.utils import cull_motif_features4, collect_data_vectors2 tsv_t = "../../marginAlign/cPecan/tests/temp/tempFiles_alignment/" \ "makeson_PC_MA_286_R7.3_ZYMO_C_1_09_11_15_1714_1_ch1_file1_strand.vl.forward.tsv" aln = "../marginAlign/cPecan/tests/temp/tempFiles_alignment/*.tsv" aln2 = "../marginAlign/cPecan/tests/test_alignments/newf_conditional_model/C/tempFiles_alignment/*.forward.tsv" m = [300, 747] dst = "all" strand = "t" features = cull_motif_features4(m, tsv_t, strand, feature_set=dst, kmer_length=6) print features.ix[features['ref_pos'] == 300] print features.ix[features['ref_pos'] == 300]['delta_mean'] #events = [] #for strand in ["t", "c"]: # events += list(chain( # *features.ix[(features['ref_pos'] == 300) & (features['strand'] == strand)] # .drop(['ref_pos', 'strand'], 1)[:1].values.tolist())) ''' tr, xtr, ts = collect_data_vectors2(events_per_pos=1, label=0, portion=0.5, files=aln, strand=strand,