def make_int_to_action(file): ita_data = CsvDataset(file) ita = [0 for i in range(0, len(ita_data.feature_vectors))] for fv in ita_data.feature_vectors: idx = int(fv[0]) ita[idx] = fv[1] return ita
def summarize(dir): Nagents = 2 summary = Summary() for w in os.listdir(dir): wpath = os.path.join(dir, w) if os.path.isdir(wpath): print(wpath) try: wfile = open(os.path.join(wpath, "result.csv")) except: print("! No 'results.csv' in '" + wpath + "'") continue results = CsvDataset(wfile) wfile.close() try: wfile = open(os.path.join(wpath, "game-log.csv")) except: print("! No 'game-log.csv' in '" + wpath + "'") continue log = CsvDataset(wfile) wfile.close() # Commit log_ai = [log.attribute_index("a0"), log.attribute_index("a1")] for fv in results.feature_vectors: if fv[0] == "winner": summary.win_counts[int(fv[1])] += 1 elif fv[0] == "score": summary.score.add(float(fv[1])) policies = [None, None] switches = [0, 0] for fv in log.feature_vectors: for i in range(0, Nagents): if policies[i] is None: policies[i] = fv[log_ai[i]] elif policies[i] != fv[log_ai[i]]: switches[i] += 1 policies[i] = fv[log_ai[i]] for i in range(0, Nagents): summary.switches[i].add(switches[i]) return summary
def process_data( data ): global acc, headers, master, Ngames_idx, options if master is None: master = CsvDataset( attributes=data.attributes[:], feature_vectors=[] ) Ngames_idx = master.attribute_index( "Ngames" ) mean_attributes = [master.attribute_index( a ) for a in [ "mean", "state_branching_mean", "action_branching_mean", "tree_depth_mean", "steps_mean"]] var_attributes = [master.attribute_index( a ) for a in [ "var", "state_branching_var", "action_branching_var", "tree_depth_var", "steps_var"]] min_attributes = [master.attribute_index( a ) for a in ["steps_min"]] max_attributes = [master.attribute_index( a ) for a in ["steps_max"]] else: if len(master.attributes) != len(data.attributes): on_error( "Unequal column count" ) for i in range(0, len(master.attributes)): if master.attributes[i].name != data.attributes[i].name: on_error( "Different headers" ) hidx = None if options.combine is not None: hidx = [data.attribute_index( name ) for name in eval( options.combine )] # print( eval( options.combine ) ) for fv in data.feature_vectors: key = len(acc) if hidx is not None: key = tuple(fv[i] for i in hidx) try: exemplar = acc[key] acc[key] = combine( exemplar, fv ) except KeyError: acc[key] = fv[:]
# ---------------------------------------------------------------------------- # Main # ---------------------------------------------------------------------------- cl_parser = OptionParser(usage="%prog [options] file") cl_parser.add_option("-o", dest="output_file", type="string", default=None, help="The file to write the output to (default: stdout)") (options, args) = cl_parser.parse_args() if len(args) == 0: cl_parser.error("No input file") elif len(args) > 1: print("WARNING: Multiple input files; ignoring all but the first") arff_file = open(args[0], "r") if options.output_file is None: # Saying 'open( "sys.stdout" )' doesn't seem to accomplish this output_file = sys.stdout else: output_file = open(options.output_file, "w") arff_dataset = ArffDataset(arff_file) csv_dataset = CsvDataset.from_arff_dataset(arff_dataset) output_file.write(repr(csv_dataset)) output_file.close()
else: output_file = open(options.output_file, "w") out_attributes = [ "Domain", "Algorithm", "domain_params", "criterion", "ss.abstraction", "par.priority", "par.classifier", "random_abstraction.k", "ss.budget_type", "ss.budget", "V_mean", "V_var", "V_conf", "ss.width", "ss.depth", "seed.world", "seed.sim" ] # out_attributes = [ # "domain", "criterion", "ss.abstraction", "par.subtree_refinement_order", "par.classifier", "random_partition.k", "ss.budget_type", "ss.budget", # "V_mean", "V_var", "V_conf", "ss.width", "ss.depth", "seed" # ] out_data = CsvDataset(attributes=[CsvAttribute(a) for a in out_attributes], feature_vectors=[]) with open(args[0], "r") as input_file: in_data = CsvDataset(input_file) # [jhostetler:20160804] The Saving domain had an extra header in its output, # causing everything to be shifted by 1. Nepisodes_idx = len(in_data.attributes) try: Nepisodes_idx = in_data.attribute_index("Nepisodes") except KeyError: # Exception indicates that problematic field was not present pass if Nepisodes_idx != len(in_data.attributes): print("WARNING: Found 'Nepisodes' header; correcting dataset") sailing_p = in_data.attribute_index("sailing.p")
cl_parser.add_option("--attribute", dest="attribute", type="string", default=None, help="The attribute to match") cl_parser.add_option("--value", dest="value", type="string", default=None, help="The value to match") (options, args) = cl_parser.parse_args() def open_output(options): if options.output_file == "-": # Saying 'open( "sys.stdout" )' doesn't seem to accomplish this return sys.stdout else: return open(options.output_file, "w") with open(args[0], "r") as input_file: in_data = CsvDataset(input_file) out_data = CsvDataset(attributes=in_data.attributes, feature_vectors=[]) idx = in_data.attribute_index(options.attribute) for fv in in_data.feature_vectors: if fv[idx] == options.value: out_data.feature_vectors.append(fv[:]) with open_output(options) as output_file: output_file.write(repr(out_data))
cl_parser.add_option( "--loose", dest="loose", action="store_true", default=False, help="If specified, files are not checked for header or column count equality." ) cl_parser.add_option( "--no-headers", dest="no_headers", action="store_true", default=False, help="If specified, indicates that the files have no header rows." ) cl_parser.add_option( "--combine", dest="combine", type="string", default=None, help="""A python list of header names. If specified, results will be combined for all rows that have the same value in all specified columns.""" ) (options, args) = cl_parser.parse_args(); if options.output_file == "-": # Saying 'open( "sys.stdout" )' doesn't seem to accomplish this output_file = sys.stdout else: output_file = open( options.output_file, "w" ) for file in args: print( file ) input_file = open( file, "r" ) in_data = CsvDataset( input_file ) process_data( in_data ) input_file.close() conf_idx = master.attribute_index( "conf" ) var_idx = master.attribute_index( "var" ) Ngames_idx = master.attribute_index( "Ngames" ) for fv in acc.values(): fv[conf_idx] = 1.96 * math.sqrt( float(fv[var_idx]) ) / math.sqrt( float(fv[Ngames_idx]) ) master.feature_vectors = [map(str, v) for v in acc.values()] output_file.write( repr(master) ) output_file.close()
dest="complete", action="store_true", default=False, help= "If specified, only parameterizations for which results are available for all budgets are considered." ) (options, args) = cl_parser.parse_args() if options.output_file == "-": # Saying 'open( "sys.stdout" )' doesn't seem to accomplish this output_file = sys.stdout else: output_file = open(options.output_file, "w") with open(args[0], "r") as input_file: in_data = CsvDataset(input_file) domain_kb = KeyBuilder(in_data, ["domain_params"]) # par_kb = KeyBuilder( in_data, ["ss.abstraction", "par.priority", "par.classifier"] ) par_kb = KeyBuilder(in_data, ["Algorithm"]) values = dict() # The "blocks" are Domain x Budget block_kb = KeyBuilder(in_data, ["domain_params", "ss.budget"]) column_kb = KeyBuilder(in_data, ["Algorithm"]) friedman = dict() alg_set = set() for fv in in_data.feature_vectors: if not fv[in_data.attribute_index("Algorithm")].startswith( "PAR"): # != "par": continue print("par: " + fv[in_data.attribute_index("Algorithm")])
# Set output if options.output_file is None: output_file = sys.stdout else: output_file = open(options.output_file, "w") out_fields = [ "experiment", "algorithm", "faults", "N", "Nblackout", "V", "V_stdev", "V_median", "V_min", "V_max", "R_end", "R_end_stdev", "R_end_median", "R_end_min", "R_end_max", "t_blackout", "t_blackout_stdev", "t_blackout_median", "t_blackout_min", "t_blackout_max", "NLoadShed", "NLoadShed_stdev", "NLoadShed_median", "NLoadShed_min", "NLoadShed_max", "NIsland", "NIsland_stdev", "NIsland_median", "NIsland_min", "NIsland_max" ] out_data = CsvDataset(attributes=list(map(CsvAttribute, out_fields)), feature_vectors=[]) missing = [] for filename in args: # for filename in glob.iglob( args[0] ): # experiment = os.path.dirname( filename ) experiment = filename print("'" + experiment + "'") with open(experiment + ".csv") as fparams: params = CsvDataset(fparams) try: f = open(os.path.join(filename, "rewards.csv")) except IOError as ex: print("WARNING: Skipping " + filename) print(str(ex)) missing.append(
# header_file.close() for rewards_filename in args: missing = None with open(rewards_filename, "r+") as rewards_file: lines = sum(1 for line in rewards_file) if lines == 2: # Assumed good print(rewards_filename) if False: # if lines == 1: print("Missing header: " + rewards_filename) rewards_file.seek(0, 0) bad_dataset = CsvDataset(rewards_file, headers=False) bad_dataset.attributes = header_dataset.attributes[:] params_filename = os.path.dirname(rewards_filename) + ".csv" print(params_filename) with open(params_filename) as params_file: params_dataset = CsvDataset(params_file) params = params_dataset.feature_vectors[0] # Fix the feature vectors fv = bad_dataset.feature_vectors[0] fv[0] = str(0) # Filled with NUL for some reason for i in range(0, 3): # Copy missing parameters attr_name = header_dataset.attributes[i].name attr_idx = params_dataset.attribute_index(attr_name) print("\tInsert " + attr_name + " = " + params[attr_idx]) fv.insert(i, params[attr_idx])
dest="fix_fault_time", default=False, help= "Enable correction for early experiments that activated the fault one step later" ) (options, args) = cl_parser.parse_args() if options.output_file == "-": output_file = sys.stdout else: output_file = open(options.output_file, "w") out_fields = [ "Tstable", "Tepisode", "Vu", "Ru_end", "Nfaults", "fault0", "fault1" ] out_data = CsvDataset(attributes=map(CsvAttribute, out_fields), feature_vectors=[]) for filename in args: with open(filename) as f: data = CsvDataset(f) [iTstable, iTepisode, iNfaults, ifault0, ifault1] = map(data.attribute_index, ["Tstable", "Tepisode", "Nfaults", "fault0", "fault1"]) for fv in data.feature_vectors: Tstable = int(fv[iTstable]) Tepisode = int(fv[iTepisode]) T = Tstable + Tepisode r = 0.0 Rend = 0 for t in range(0, T): i = data.attribute_index("t" + str(t))
cl_parser.add_option( "--delim", dest="delim", type="string", default=",", help="""The delimiter string (default: ","). """ ) cl_parser.add_option( "--loose", dest="loose", action="store_true", default=False, help="If specified, files are not checked for header or column count equality." ) (options, args) = cl_parser.parse_args(); if options.output_file == "-": output_file = sys.stdout else: output_file = open( options.output_file, "w" ) # Find the largest set of headers headers = HeaderAccumulator() for file in args: input_file = open( file, "r" ) in_data = CsvDataset( input_file ) print( file + ": " + str(len(in_data.attributes)) ) headers.add( in_data.attributes ) input_file.close() print( headers.attributes ) # Treat the largest header set as canonical master = CsvDataset( attributes=headers.attributes[:], feature_vectors=[] ) for file in args: input_file = open( file, "r" ) in_data = CsvDataset( input_file ) process_data( master, in_data ) input_file.close() output_file.write( repr(master) ) output_file.close()
default="-", help="The file to write the output to (default: stdout)") # cl_parser.add_option( "--complete", dest="complete", action="store_true", default=False, # help="If specified, only parameterizations for which results are available for all budgets are considered." ) (options, args) = cl_parser.parse_args() if options.output_file == "-": # Saying 'open( "sys.stdout" )' doesn't seem to accomplish this output_file = sys.stdout rank_file = sys.stdout else: output_file = open(options.output_file, "w") rank_file = open("rank-" + options.output_file, "w") with open(args[0], "r") as input_file: in_data = CsvDataset(input_file) # The "blocks" are Domain x Budget block_kb = KeyBuilder(in_data, ["domain_params", "ss.budget"]) # The "treatments" are the different PARSS variants column_kb = KeyBuilder(in_data, ["Algorithm"]) # This will be a two-level map Block -> Column -> V_mean friedman = dict() alg_set = set([ "PAR(bf; DT)", "PAR(bf; random)", "PAR(uniform; DT)", "PAR(uniform; random)", "PAR(variance; DT)", "PAR(variance; random)" ]) # Gather results into 'friedman' table for fv in in_data.feature_vectors: alg = fv[in_data.attribute_index("Algorithm")] if alg not in alg_set:
cl_parser.add_argument("-N", type=int, default=0, help="Size of each subset") cl_parser.add_argument("-k", type=int, default=1, help="Number of subsets for cross-validation") cl_parser.add_argument("--seed", type=int, default=0, help="RNG seed (default: no particular seed)") args = cl_parser.parse_args() if args.N == 0 and args.k != 1: raise RuntimeError("If args.N == 0, then args.k must be 1") with open(args.input_file[0]) as input_file: data = CsvDataset(input_file) t_blackout = data.attribute_index("t_blackout") faults = data.attribute_index("faults") recoverable_faults = [] for fv in data.feature_vectors: t = int(fv[t_blackout]) if t > 10 and t < 311: recoverable_faults.append(fv[faults]) if args.N * args.k > len(recoverable_faults): raise AssertionError("Not enough faults for requested dataset size") rng = random.Random() if args.seed != 0: rng.seed(args.seed) rng.shuffle(recoverable_faults) next = 0 for i in range(0, args.k):
def make_action_to_int(file): ati_data = CsvDataset(file) ati = dict() for fv in ati_data.feature_vectors: ati[fv[1]] = int(fv[0]) return ati
cl_parser = OptionParser(usage="%prog [options] file") cl_parser.add_option("--subdivide", dest="subdivide", type="string", default=None, help="Attribute to subdivide.") cl_parser.add_option("--subdivisions", dest="subdivisions", type="int", default=1, help="Number of subdivisions.") (options, args) = cl_parser.parse_args() csv_file = open(args[0], "r") csv_dataset = CsvDataset(csv_file) count = 0 for fv in csv_dataset.feature_vectors: print(fv) exploded = CsvDataset(attributes=csv_dataset.attributes[:], feature_vectors=[fv[:]]) print(exploded.feature_vectors[0]) if options.subdivide is not None: idx = exploded.attribute_index(options.subdivide) n = int(exploded.feature_vectors[0][idx]) q = n / options.subdivisions r = n % options.subdivisions sub = [q] * options.subdivisions for i in range(0, r): sub[i] += 1
import statistics cl_parser = argparse.ArgumentParser( description="Creates random subsets of faults") cl_parser.add_argument("input_file", type=str, nargs=1, help="A 'rewards.csv' input file") cl_parser.add_argument("-d", type=int, default=60, help="Size of intervals for analysis") args = cl_parser.parse_args() with open(args.input_file[0]) as fin: data = CsvDataset(fin) intervals = [] for fv in data.feature_vectors: i = data.attribute_index("r1") j = i + args.d while j < len(data.attributes): ri = float(fv[i]) rj = float(fv[j]) intervals.append(abs(rj - ri)) i += 2 j += 2 print("n: " + str(len(intervals))) print("mean: " + str(statistics.mean(intervals))) print("stddev: " + str(statistics.stdev(intervals))) print("min: " + str(min(intervals)))
help="If specified, only parameterizations for which results are available for all budgets are considered." ) (options, args) = cl_parser.parse_args(); if options.output_file == "-": # Saying 'open( "sys.stdout" )' doesn't seem to accomplish this output_file = sys.stdout else: output_file = open( options.output_file, "w" ) fault_str = "faults" out_attributes = [ "Domain", "domain_parameters", fault_str, "Algorithm", "algorithm_parameters", "budget_type", "budget", "Vc", "Vu", "dV", "Rc_end", "Ru_end", "dR", "NLoadShed", "NIsland" ] out_data = CsvDataset( attributes=[CsvAttribute( a ) for a in out_attributes], feature_vectors=[] ) with open( args[0], "r" ) as input_file: c_data = CsvDataset( input_file ) with open( args[1], "r" ) as input_file: u_data = CsvDataset( input_file ) # Index map for fault sets in baseline data u_idx = dict() for i in range(0, len(u_data.feature_vectors)): u_fv = u_data.feature_vectors[i] u_idx[u_fv[u_data.attribute_index(fault_str)]] = i for c_fv in c_data.feature_vectors: faults = c_fv[c_data.attribute_index(fault_str)] Vc = float( c_fv[c_data.attribute_index("V")] )