Beispiel #1
0
def process_data( data ):
	global acc, headers, master, Ngames_idx, options
	if master is None:
		master = CsvDataset( attributes=data.attributes[:], feature_vectors=[] )
		Ngames_idx = master.attribute_index( "Ngames" )
		mean_attributes = [master.attribute_index( a ) for a in [
							"mean", "state_branching_mean", "action_branching_mean", "tree_depth_mean", "steps_mean"]]
		var_attributes = [master.attribute_index( a ) for a in [
							"var", "state_branching_var", "action_branching_var", "tree_depth_var", "steps_var"]]
		min_attributes = [master.attribute_index( a ) for a in ["steps_min"]]
		max_attributes = [master.attribute_index( a ) for a in ["steps_max"]]
	else:
		if len(master.attributes) != len(data.attributes):
			on_error( "Unequal column count" )
		for i in range(0, len(master.attributes)):
			if master.attributes[i].name != data.attributes[i].name:
				on_error( "Different headers" )
	hidx = None
	if options.combine is not None:
		hidx = [data.attribute_index( name ) for name in eval( options.combine )]
		# print( eval( options.combine ) )
	for fv in data.feature_vectors:
		key = len(acc)
		if hidx is not None:
			key = tuple(fv[i] for i in hidx)
		try:
			exemplar = acc[key]
			acc[key] = combine( exemplar, fv )
		except KeyError: 
			acc[key] = fv[:]
def summarize(dir):
    Nagents = 2
    summary = Summary()
    for w in os.listdir(dir):
        wpath = os.path.join(dir, w)
        if os.path.isdir(wpath):
            print(wpath)
            try:
                wfile = open(os.path.join(wpath, "result.csv"))
            except:
                print("! No 'results.csv' in '" + wpath + "'")
                continue
            results = CsvDataset(wfile)
            wfile.close()
            try:
                wfile = open(os.path.join(wpath, "game-log.csv"))
            except:
                print("! No 'game-log.csv' in '" + wpath + "'")
                continue
            log = CsvDataset(wfile)
            wfile.close()

            # Commit
            log_ai = [log.attribute_index("a0"), log.attribute_index("a1")]
            for fv in results.feature_vectors:
                if fv[0] == "winner":
                    summary.win_counts[int(fv[1])] += 1
                elif fv[0] == "score":
                    summary.score.add(float(fv[1]))
            policies = [None, None]
            switches = [0, 0]
            for fv in log.feature_vectors:
                for i in range(0, Nagents):
                    if policies[i] is None:
                        policies[i] = fv[log_ai[i]]
                    elif policies[i] != fv[log_ai[i]]:
                        switches[i] += 1
                        policies[i] = fv[log_ai[i]]
            for i in range(0, Nagents):
                summary.switches[i].add(switches[i])
    return summary
cl_parser = argparse.ArgumentParser(
    description="Creates random subsets of faults")
cl_parser.add_argument("input_file",
                       type=str,
                       nargs=1,
                       help="A 'rewards.csv' input file")
cl_parser.add_argument("-d",
                       type=int,
                       default=60,
                       help="Size of intervals for analysis")
args = cl_parser.parse_args()

with open(args.input_file[0]) as fin:
    data = CsvDataset(fin)
    intervals = []
    for fv in data.feature_vectors:
        i = data.attribute_index("r1")
        j = i + args.d
        while j < len(data.attributes):
            ri = float(fv[i])
            rj = float(fv[j])
            intervals.append(abs(rj - ri))
            i += 2
            j += 2

print("n:      " + str(len(intervals)))
print("mean:   " + str(statistics.mean(intervals)))
print("stddev: " + str(statistics.stdev(intervals)))
print("min:    " + str(min(intervals)))
print("max:    " + str(max(intervals)))
Beispiel #4
0
# out_attributes = [
# "domain", "criterion", "ss.abstraction", "par.subtree_refinement_order", "par.classifier", "random_partition.k", "ss.budget_type", "ss.budget",
# "V_mean", "V_var", "V_conf", "ss.width", "ss.depth", "seed"
# ]

out_data = CsvDataset(attributes=[CsvAttribute(a) for a in out_attributes],
                      feature_vectors=[])

with open(args[0], "r") as input_file:
    in_data = CsvDataset(input_file)

# [jhostetler:20160804] The Saving domain had an extra header in its output,
# causing everything to be shifted by 1.
Nepisodes_idx = len(in_data.attributes)
try:
    Nepisodes_idx = in_data.attribute_index("Nepisodes")
except KeyError:
    # Exception indicates that problematic field was not present
    pass
if Nepisodes_idx != len(in_data.attributes):
    print("WARNING: Found 'Nepisodes' header; correcting dataset")
    sailing_p = in_data.attribute_index("sailing.p")
    for fv in in_data.feature_vectors:
        assert (str(fv[sailing_p]) == "")
        del fv[sailing_p]
    # Delete header last so that indexing is less confusing
    del in_data.attributes[Nepisodes_idx]
assert (len(in_data.attributes) == len(in_data.feature_vectors[0]))
# /End hack

# Skip index < 4 because they are not found in the summary file
Beispiel #5
0
missing = []
for filename in args:
    # for filename in glob.iglob( args[0] ):
    # experiment = os.path.dirname( filename )
    experiment = filename
    print("'" + experiment + "'")
    with open(experiment + ".csv") as fparams:
        params = CsvDataset(fparams)
        try:
            f = open(os.path.join(filename, "rewards.csv"))
        except IOError as ex:
            print("WARNING: Skipping " + filename)
            print(str(ex))
            missing.append(
                params.feature_vectors[0][params.attribute_index("fault")])
        else:
            with f:
                data = CsvDataset(f)
                # assert( len(params.feature_vectors) == len(data.feature_vectors) )
                [iTstable, iTepisode] = map(params.attribute_index,
                                            ["Tstable", "Tepisode"])
                (sV, sR_end, st_blackout, sNLoadShed, sNIsland) = ([], [], [],
                                                                   [], [])
                Nblackout = 0
                params_fv = params.feature_vectors[0]
                Tstable = int(params_fv[iTstable])
                Tepisode = int(params_fv[iTepisode])

                if options.algorithm is not None:
                    algorithm = options.algorithm
Beispiel #6
0
                     type="int",
                     default=1,
                     help="Number of subdivisions.")
(options, args) = cl_parser.parse_args()

csv_file = open(args[0], "r")
csv_dataset = CsvDataset(csv_file)

count = 0
for fv in csv_dataset.feature_vectors:
    print(fv)
    exploded = CsvDataset(attributes=csv_dataset.attributes[:],
                          feature_vectors=[fv[:]])
    print(exploded.feature_vectors[0])
    if options.subdivide is not None:
        idx = exploded.attribute_index(options.subdivide)
        n = int(exploded.feature_vectors[0][idx])
        q = n / options.subdivisions
        r = n % options.subdivisions
        sub = [q] * options.subdivisions
        for i in range(0, r):
            sub[i] += 1
        for i in range(0, len(sub)):
            subdiv_fv = exploded.feature_vectors[0][:]
            subdiv_fv[idx] = sub[i]
            subdivided = CsvDataset(attributes=exploded.attributes[:],
                                    feature_vectors=[subdiv_fv])
            name = os.path.splitext(
                args[0])[0] + "_" + str(count) + "_" + str(i) + ".csv"
            out = open(name, "w")
            out.write(repr(subdivided))
Beispiel #7
0
                      feature_vectors=[])

for filename in args:
    with open(filename) as f:
        data = CsvDataset(f)
        [iTstable, iTepisode, iNfaults, ifault0,
         ifault1] = map(data.attribute_index,
                        ["Tstable", "Tepisode", "Nfaults", "fault0", "fault1"])
        for fv in data.feature_vectors:
            Tstable = int(fv[iTstable])
            Tepisode = int(fv[iTepisode])
            T = Tstable + Tepisode
            r = 0.0
            Rend = 0
            for t in range(0, T):
                i = data.attribute_index("t" + str(t))
                r += float(fv[i])
                if t == T - 1:
                    Rend = float(fv[i])
            # Special case to fix early results that had the fault occur one
            # step later: Duplicate last time step reward to compensate
            if opt.fix_fault_time:
                r -= float(fv[data.attribute_index("t8")])
                r += Rend
            out_fv = map(str, [
                Tstable, Tepisode, r, Rend, fv[iNfaults], fv[ifault0],
                fv[ifault1]
            ])
            assert (len(out_fv) == len(out_data.attributes))
            out_data.feature_vectors.append(out_fv)
Beispiel #8
0
    output_file = open(options.output_file, "w")

with open(args[0], "r") as input_file:
    in_data = CsvDataset(input_file)

domain_kb = KeyBuilder(in_data, ["domain_params"])
# par_kb = KeyBuilder( in_data, ["ss.abstraction", "par.priority", "par.classifier"] )
par_kb = KeyBuilder(in_data, ["Algorithm"])
values = dict()
# The "blocks" are Domain x Budget
block_kb = KeyBuilder(in_data, ["domain_params", "ss.budget"])
column_kb = KeyBuilder(in_data, ["Algorithm"])
friedman = dict()
alg_set = set()
for fv in in_data.feature_vectors:
    if not fv[in_data.attribute_index("Algorithm")].startswith(
            "PAR"):  # != "par":
        continue
    print("par: " + fv[in_data.attribute_index("Algorithm")])
    d = domain_kb.key(fv)
    print("d: " + str(d))
    try:
        dmap = values[d]
    except KeyError:
        dmap = dict()
        values[d] = dmap
    p = par_kb.key(fv)
    print("p: " + str(p))
    try:
        v = dmap[p]
    except KeyError:
    with open(rewards_filename, "r+") as rewards_file:
        lines = sum(1 for line in rewards_file)

        if lines == 2:
            # Assumed good
            print(rewards_filename)

        if False:
            # if lines == 1:
            print("Missing header: " + rewards_filename)
            rewards_file.seek(0, 0)
            bad_dataset = CsvDataset(rewards_file, headers=False)
            bad_dataset.attributes = header_dataset.attributes[:]
            params_filename = os.path.dirname(rewards_filename) + ".csv"
            print(params_filename)
            with open(params_filename) as params_file:
                params_dataset = CsvDataset(params_file)
            params = params_dataset.feature_vectors[0]
            # Fix the feature vectors
            fv = bad_dataset.feature_vectors[0]
            fv[0] = str(0)  # Filled with NUL for some reason
            for i in range(0, 3):
                # Copy missing parameters
                attr_name = header_dataset.attributes[i].name
                attr_idx = params_dataset.attribute_index(attr_name)
                print("\tInsert " + attr_name + " = " + params[attr_idx])
                fv.insert(i, params[attr_idx])
            # Commit changes
            rewards_file.seek(0, 0)
            rewards_file.write(repr(bad_dataset))
Beispiel #10
0
with open(args[0], "r") as input_file:
    in_data = CsvDataset(input_file)

# The "blocks" are Domain x Budget
block_kb = KeyBuilder(in_data, ["domain_params", "ss.budget"])
# The "treatments" are the different PARSS variants
column_kb = KeyBuilder(in_data, ["Algorithm"])
# This will be a two-level map Block -> Column -> V_mean
friedman = dict()
alg_set = set([
    "PAR(bf; DT)", "PAR(bf; random)", "PAR(uniform; DT)",
    "PAR(uniform; random)", "PAR(variance; DT)", "PAR(variance; random)"
])
# Gather results into 'friedman' table
for fv in in_data.feature_vectors:
    alg = fv[in_data.attribute_index("Algorithm")]
    if alg not in alg_set:
        continue

    V_mean = float(fv[in_data.attribute_index("V_mean")])
    # Stuff for Friedman's test
    b = block_kb.key(fv)
    try:
        block = friedman[b]
    except KeyError:
        block = dict()
        friedman[b] = block
    c = alg
    block[c] = V_mean

# Assign sequential indices to algorithms
Beispiel #11
0
cl_parser.add_argument("-k",
                       type=int,
                       default=1,
                       help="Number of subsets for cross-validation")
cl_parser.add_argument("--seed",
                       type=int,
                       default=0,
                       help="RNG seed (default: no particular seed)")
args = cl_parser.parse_args()

if args.N == 0 and args.k != 1:
    raise RuntimeError("If args.N == 0, then args.k must be 1")

with open(args.input_file[0]) as input_file:
    data = CsvDataset(input_file)
    t_blackout = data.attribute_index("t_blackout")
    faults = data.attribute_index("faults")
    recoverable_faults = []
    for fv in data.feature_vectors:
        t = int(fv[t_blackout])
        if t > 10 and t < 311:
            recoverable_faults.append(fv[faults])
    if args.N * args.k > len(recoverable_faults):
        raise AssertionError("Not enough faults for requested dataset size")
    rng = random.Random()
    if args.seed != 0:
        rng.seed(args.seed)
    rng.shuffle(recoverable_faults)
    next = 0
    for i in range(0, args.k):
        filename = args.name + str(i) + ".txt"
Beispiel #12
0
cl_parser.add_option("--attribute",
                     dest="attribute",
                     type="string",
                     default=None,
                     help="The attribute to match")
cl_parser.add_option("--value",
                     dest="value",
                     type="string",
                     default=None,
                     help="The value to match")
(options, args) = cl_parser.parse_args()


def open_output(options):
    if options.output_file == "-":
        # Saying 'open( "sys.stdout" )' doesn't seem to accomplish this
        return sys.stdout
    else:
        return open(options.output_file, "w")


with open(args[0], "r") as input_file:
    in_data = CsvDataset(input_file)
    out_data = CsvDataset(attributes=in_data.attributes, feature_vectors=[])
    idx = in_data.attribute_index(options.attribute)
    for fv in in_data.feature_vectors:
        if fv[idx] == options.value:
            out_data.feature_vectors.append(fv[:])
with open_output(options) as output_file:
    output_file.write(repr(out_data))
Beispiel #13
0
"Domain", "domain_parameters", fault_str, "Algorithm", "algorithm_parameters", "budget_type", "budget",
"Vc", "Vu", "dV", "Rc_end", "Ru_end", "dR", "NLoadShed", "NIsland"
]

out_data = CsvDataset( attributes=[CsvAttribute( a ) for a in out_attributes], feature_vectors=[] )
	
with open( args[0], "r" ) as input_file:
	c_data = CsvDataset( input_file )
with open( args[1], "r" ) as input_file:
	u_data = CsvDataset( input_file )
	
# Index map for fault sets in baseline data
u_idx = dict()
for i in range(0, len(u_data.feature_vectors)):
	u_fv = u_data.feature_vectors[i]
	u_idx[u_fv[u_data.attribute_index(fault_str)]] = i
	
for c_fv in c_data.feature_vectors:
	faults = c_fv[c_data.attribute_index(fault_str)]
	Vc = float( c_fv[c_data.attribute_index("V")] )
	Rc_end = float( c_fv[c_data.attribute_index("R_end")] )
	ui = u_idx[faults]
	u_fv = u_data.feature_vectors[ui]
	Vu = float( u_fv[u_data.attribute_index("V")] )
	Ru_end = float( u_fv[u_data.attribute_index("R_end")] )
	
	out_fv = [""] * len(out_attributes)
	out_fv[0] = c_fv[c_data.attribute_index("experiment")]
	out_fv[1] = ""			# TODO domain_params
	out_fv[2] = faults
	out_fv[3] = "pr"		# TODO algorithm