def get_battery_data(datadir): files = get_paths_from_dir(datadir, file_matchers=['ttimes_pc_']) data = {} print("Opening JSONs, found {}".format(len(files))) for file in files: with open(file, "r") as f: name = file.split('breakdown_')[1].split('155')[0] data[name] = json.load(f)['trial-times'] print(data) return data
def get_battery_data(datadir): files = get_paths_from_dir(datadir) data = [] print("Opening JSONs, found {}".format(len(files))) for file in files: with open(file, "r") as f: data.append(json.load(f)) fmt_data = {} fmt_data["level"] = [] fmt_data["Charge counter"] = [] fmt_data["times"] = [] newdata = {} for entry in data: newdata[entry["timestamp"]] = entry for key in sorted(newdata): fmt_data["level"].append(int(newdata[key]["level"])) fmt_data["Charge counter"].append(int(newdata[key]["Charge counter"])) fmt_data["times"].append(float(key)) return fmt_data
{ 'name' : 'logit_downsampled', 'estimator' : "SMLogit()", 'share_zeros_keep' : share_zeros_tenth, 'share_ones_keep' : share_ones_full, }, { 'name' : 'rf_downsampled', 'estimator' : "pipe_rf_500", 'share_zeros_keep' : share_zeros_tenth, 'share_ones_keep' : share_ones_full, } ] # load what we need from each model paths_models = utils.get_paths_from_dir(dir_models, ".json") wanted_kws = ["name", "name_base", "lhs", "rhs", "train_start", "train_end", "sim_start", "sim_end", "loa", "runtype", "period", "outcome_extension"] renames = [ {'old' : 'lhs', 'new' : 'outcome'}, {'old' : 'rhs', 'new' : 'features'}, {'old' : 'sim_start', 'new' : 'forecast_start'}, {'old' : 'sim_end', 'new' : 'forecast_end'}, ] # subset the info we need from each of the raw models models = []
def main_prep(): parser = argparse.ArgumentParser() parser.add_argument("--dir_scratch", type=str, help="temp directory in which to save data") parser.add_argument("--dir_input", type=str, help="directory to read data from") args = parser.parse_args() dir_scratch = args.dir_scratch dir_input = args.dir_input path_params = dir_scratch + "params.json" params = load_params(path_params) params_data = params['data'] groupvar = params_data['groupvar'] timevar = params_data['timevar'] allvars = get_allvars(params) # @TODO: This is stupid, remove if groupvar == "pg_id" and timevar == "month_id": dir_input += "pgm/" elif groupvar == "country_id" and timevar == "month_id": dir_input += "cm/" elif groupvar == "country_id" and timevar == "year_id": dir_input += "cy/" elif groupvar == "gwno" and timevar == "year": dir_input += "gwy/" dir_input_data = dir_input + "data/" dir_input_spatial = dir_input + "spatial/" dir_data = dir_scratch + "data/" dir_spatial = dir_scratch + "spatial/" dir_spatial_shapes = dir_spatial + "shapes/" paths_input_data = get_paths_from_dir(dir_input_data, extension=".hdf5") print("found:") for p in paths_input_data: print("\t", p) if len(paths_input_data) == 0: raise FileNotFoundError( "Didn't find any input files, did you specify --dir_input correctly?" ) jobs_prep = [] for path in paths_input_data: filename = path.split("/")[-1] job = { 'path_input': path, 'path_output': dir_data + filename, 'timevar': timevar, 'groupvar': groupvar, 'allvars': allvars, 'params': params } jobs_prep.append(job) for job in jobs_prep: worker_prep(job) # Simply copy all files from input/spatial/ to rundir/spatial/shapes/ paths_input_shapes = get_paths_from_dir(dir_input_spatial) for path_input in paths_input_shapes: filename = path_input.split("/")[-1] destination = dir_spatial_shapes + filename shutil.copyfile(path_input, destination) print("Prep copied", path_input, "to", destination) print("Prep finished")
def get_srumutil_files(datadir): files = get_paths_from_dir(datadir, file_matchers=['srumutil']) return files
def get_wpa_data(testdir, apps, excluded_apps, testtime): print("Getting WPA data...") files = get_paths_from_dir(os.path.join(testdir, 'etl-data'), file_matchers=KNOWN_TABLES) for file in files: command_file = pattern_find( file, ['Processes_Summary_Table_Lifetime_By_Process']) if command_file: command_file = file break files = list(set(files) - set([command_file])) starttime, endtime = get_borders(command_file, testtime) print("Start time: {}, End time: {}".format(str(starttime), str(endtime))) currdata = {} for i, file in enumerate(files): print("Processing {}...".format(str(file))) header, data = open_wpa_csv(file) name = '' for table in KNOWN_TABLES: if pattern_find(file, [table]): name = table break # Expecting times as the first column, and # data as the second column times = [ float(t[0, 0].replace(',', '')) for t in np.asmatrix(data)[:, 0] ] data = [ float(d[0, 0].replace(',', '')) for d in np.asmatrix(data)[:, 1] ] if times[0] < starttime: first_ind = 0 for i, t in enumerate(times): if t < starttime: continue else: first_ind = i break times = times[first_ind:] data = data[first_ind:] if times[-1] > endtime: last_ind = len(times) - 1 for i, t in enumerate(times): if t < endtime: continue else: last_ind = i break times = times[:last_ind + 1] data = data[:last_ind + 1] xvals = np.arange(0, endtime - starttime, 1 / 60) currdata[name] = { 'times': xvals, 'data': list(np.interp(xvals, times, data)), 'srate': 60 } print("Total datapoints found: %s" % len(list(currdata.keys()))) return header, currdata
with open(path_output, 'w') as f: f.write(paramfile) print("Wrote", path_output) times = t.times_nested dir_source = "./source/" dir_paramfiles = "./output/ds/paramfiles/" dir_runfiles = "./output/ds/runfiles/" dir_publishes = "./output/ds/publishes/" loas = ["pgm", "cm"] runtypes = ["eval", "fcast"] periods = ["calib", "test"] paths_models = utils.get_paths_from_dir(dir = "./output/models/", extension=".json") # load all models models = [] for path in paths_models: with open(path, 'r') as f: model = json.load(f) models.append(model) # base ds runs, without time runs_bases = get_ds_baseruns(models) # make the runs with a name runs = [] for run_base in runs_bases: for runtype in runtypes:
def get_batteryreport_files(datadir): files = get_paths_from_dir(datadir, file_matchers=['batteryreport']) return files