from sequence_utils import trim_inactive from build_models import filter_criteria, log_series import sys, cPickle if __name__ == "__main__": inpath = sys.argv[1] goodpath = sys.argv[2] rejpath = sys.argv[3] good_records = [] rej_records = [] with open(inpath) as datafile: data = cPickle.load(datafile) window_size = data['window_size'] records = data['records'] for record in records: trimmed = trim_inactive(record['relays_out']) new_rec = { 'ident': record['ident'], 'create': record['create'], 'destroy': record['destroy'], 'relays_in': None, 'relays_out': trimmed, } if filter_criteria(log_series(trimmed)): good_records.append(new_rec) elif len(trimmed) > 0: rej_records.append(new_rec) n_gone = len(records) - len(good_records) - len(rej_records) print "%i good records" % len(good_records) print "%i reject records" % len(rej_records) print "%i len 0 after trimming" % n_gone
def preprocess(series): return map(lambda s: log_series(trim_inactive(s)), series)
def do_summarize(records, direc_key): """ Display summary histograms for the series in records. @param records: the circuit records @param direc_key: 'relays_in' for incoming relays, 'relays_out' for outgoing """ circ_len_aggr = [] mean_cells_per_window_aggr = [] min_cells_per_window_aggr = [] max_cells_per_window_aggr = [] median_cells_per_window_aggr = [] stddev_cells_per_window_aggr = [] inst_counts_aggr = [] unique_vals_aggr = [] percent_active_aggr = [] time_active_aggr = [] for record in records: relays = record[direc_key] circ_len_aggr.append((record['destroy'] - record['create'])/1000.0) mean_cells_per_window_aggr.append(1.0*sum(relays)/len(relays)) median_cells_per_window_aggr.append(median(relays)) min_cells_per_window_aggr.append(min(relays)) max_cells_per_window_aggr.append(max(relays)) stddev_cells_per_window_aggr.append(std(relays)) inst_counts_aggr += relays # unique_vals_aggr.append(len(set(filter(lambda o: o > 2, relays)))) time_active = len(trim_inactive(relays)) percent_active_aggr.append(100.0*time_active/len(relays)) # time_active_aggr.append(time_active) fig = plt.figure() summarize(max_cells_per_window_aggr, "Max") meansplot = fig.add_subplot(421) plt.title("Mean Cells/Window") plt.xlabel("Mean Cells/Window") plt.ylabel("Frequency") plt.yscale('log') meansplot.hist(mean_cells_per_window_aggr, bins=N_HIST_BINS) cellsplot = fig.add_subplot(422) plt.title("Median Cells/Window") plt.xlabel("Median Cells/Window") plt.ylabel("Frequency") plt.yscale('log') cellsplot.hist(median_cells_per_window_aggr, bins=N_HIST_BINS) minsplot = fig.add_subplot(423) plt.title("Min Cells/Window") plt.xlabel("Min Cells/Window") plt.ylabel("Frequency") plt.yscale('log') minsplot.hist(min_cells_per_window_aggr, bins=N_HIST_BINS) maxsplot = fig.add_subplot(424) plt.title("Max Cells/Window") plt.xlabel("Max Cells/Window") plt.ylabel("Frequency") plt.yscale('log') maxsplot.hist(max_cells_per_window_aggr, bins=N_HIST_BINS) stddevsplot = fig.add_subplot(425) plt.title("Std Dev. of Cells/Window") plt.xlabel("Std Dev. of Cells/Window") plt.ylabel("Frequency") plt.yscale('log') stddevsplot.hist(stddev_cells_per_window_aggr, bins=N_HIST_BINS) cellsplot = fig.add_subplot(426) plt.title("Single Window Cell Count") plt.xlabel("Single Window Cell Count") plt.ylabel("Frequency") plt.yscale('log') cellsplot.hist(inst_counts_aggr, bins=N_HIST_BINS) lenplot = fig.add_subplot(427) plt.title("Circuit Length (seconds)") plt.xlabel("Circuit Length (seconds)") plt.ylabel("Frequency") plt.yscale('log') lenplot.hist(circ_len_aggr, bins=N_HIST_BINS) # uniqueplot = fig.add_subplot(338) # plt.title("Number of Unique Values > 1") # plt.xlabel("Number of Unique Values > 1") # plt.ylabel("Frequency") # uniqueplot.hist(unique_vals_aggr, bins=N_HIST_BINS) # timeactiveplot = fig.add_subplot(428) # plt.title("Percent of Time in Active State") # plt.xlabel("Percent of Time") # plt.ylabel("Frequency") # timeactiveplot.hist(percent_active_aggr, bins=N_HIST_BINS) fig.tight_layout()