コード例 #1
0
ファイル: trim_series.py プロジェクト: japplebaum/thesis
from sequence_utils import trim_inactive
from build_models import filter_criteria, log_series
import sys, cPickle

if __name__ == "__main__":
	inpath = sys.argv[1]
	goodpath = sys.argv[2]
	rejpath = sys.argv[3]
	good_records = []
	rej_records = []
	with open(inpath) as datafile:
		data = cPickle.load(datafile)
		window_size = data['window_size']
		records = data['records']
		for record in records:
			trimmed = trim_inactive(record['relays_out'])
			new_rec = {
				'ident': record['ident'],
				'create': record['create'],
				'destroy': record['destroy'],
				'relays_in': None,
				'relays_out': trimmed,
			}
			if filter_criteria(log_series(trimmed)):
				good_records.append(new_rec)
			elif len(trimmed) > 0:
				rej_records.append(new_rec)
	n_gone = len(records) - len(good_records) - len(rej_records)
	print "%i good records" % len(good_records)
	print "%i reject records" % len(rej_records)
	print "%i len 0 after trimming" % n_gone
コード例 #2
0
ファイル: build_models.py プロジェクト: shalisawong/j-thesis
def preprocess(series):
	return map(lambda s: log_series(trim_inactive(s)), series)
コード例 #3
0
ファイル: exploratory.py プロジェクト: japplebaum/thesis
def do_summarize(records, direc_key):
	"""
	Display summary histograms for the series in records.
	@param records: the circuit records
	@param direc_key: 'relays_in' for incoming relays, 'relays_out' for
		outgoing
	"""
	circ_len_aggr = []
	mean_cells_per_window_aggr = []
	min_cells_per_window_aggr = []
	max_cells_per_window_aggr = []
	median_cells_per_window_aggr = []
	stddev_cells_per_window_aggr = []
	inst_counts_aggr = []
	unique_vals_aggr = []
	percent_active_aggr = []
	time_active_aggr = []
	for record in records:
		relays = record[direc_key]
		circ_len_aggr.append((record['destroy'] - record['create'])/1000.0)
		mean_cells_per_window_aggr.append(1.0*sum(relays)/len(relays))
		median_cells_per_window_aggr.append(median(relays))
		min_cells_per_window_aggr.append(min(relays))
		max_cells_per_window_aggr.append(max(relays))
		stddev_cells_per_window_aggr.append(std(relays))
		inst_counts_aggr += relays
		# unique_vals_aggr.append(len(set(filter(lambda o: o > 2, relays))))
		time_active = len(trim_inactive(relays))
		percent_active_aggr.append(100.0*time_active/len(relays))
		# time_active_aggr.append(time_active)
	fig = plt.figure()
	summarize(max_cells_per_window_aggr, "Max")

	meansplot = fig.add_subplot(421)
	plt.title("Mean Cells/Window")
	plt.xlabel("Mean Cells/Window")
	plt.ylabel("Frequency")
	plt.yscale('log')
	meansplot.hist(mean_cells_per_window_aggr, bins=N_HIST_BINS)

	cellsplot = fig.add_subplot(422)
	plt.title("Median Cells/Window")
	plt.xlabel("Median Cells/Window")
	plt.ylabel("Frequency")
	plt.yscale('log')
	cellsplot.hist(median_cells_per_window_aggr, bins=N_HIST_BINS)

	minsplot = fig.add_subplot(423)
	plt.title("Min Cells/Window")
	plt.xlabel("Min Cells/Window")
	plt.ylabel("Frequency")
	plt.yscale('log')
	minsplot.hist(min_cells_per_window_aggr, bins=N_HIST_BINS)

	maxsplot = fig.add_subplot(424)
	plt.title("Max Cells/Window")
	plt.xlabel("Max Cells/Window")
	plt.ylabel("Frequency")
	plt.yscale('log')
	maxsplot.hist(max_cells_per_window_aggr, bins=N_HIST_BINS)

	stddevsplot = fig.add_subplot(425)
	plt.title("Std Dev. of Cells/Window")
	plt.xlabel("Std Dev. of Cells/Window")
	plt.ylabel("Frequency")
	plt.yscale('log')
	stddevsplot.hist(stddev_cells_per_window_aggr, bins=N_HIST_BINS)

	cellsplot = fig.add_subplot(426)
	plt.title("Single Window Cell Count")
	plt.xlabel("Single Window Cell Count")
	plt.ylabel("Frequency")
	plt.yscale('log')
	cellsplot.hist(inst_counts_aggr, bins=N_HIST_BINS)

	lenplot = fig.add_subplot(427)
	plt.title("Circuit Length (seconds)")
	plt.xlabel("Circuit Length (seconds)")
	plt.ylabel("Frequency")
	plt.yscale('log')
	lenplot.hist(circ_len_aggr, bins=N_HIST_BINS)

	# uniqueplot = fig.add_subplot(338)
	# plt.title("Number of Unique Values > 1")
	# plt.xlabel("Number of Unique Values > 1")
	# plt.ylabel("Frequency")
	# uniqueplot.hist(unique_vals_aggr, bins=N_HIST_BINS)

	# timeactiveplot = fig.add_subplot(428)
	# plt.title("Percent of Time in Active State")
	# plt.xlabel("Percent of Time")
	# plt.ylabel("Frequency")
	# timeactiveplot.hist(percent_active_aggr, bins=N_HIST_BINS)
	fig.tight_layout()