def train_model(self, x_train, y_train, x_val, y_val, batch_size, normalize=False, log_stats=True): self.train_cnn_model(self.cnn_model, x_train, y_train, x_val, y_val, batch_size, normalize=normalize) layer_dict = dict([(layer.name, layer) for layer in self.cnn_model.layers]) # x = layer_dict['flatten'].output x = self.cnn_model.layers[-2].output self.cropped_model = Model(self.cnn_model.input, x) cropped_model_train_output = self.cropped_model.predict(x_train) for sklearn_model, _ in self.classifiers: sklearn_model.fit(cropped_model_train_output, y_train) ensemble_model_predictions = self.predict(x_val) if log_stats: print("Ensemble predicted proba: ", ensemble_model_predictions) print("Validation ensemble Model result: ") stats(y_val, ensemble_model_predictions, 'Ensemble Method')
def _train(build_model, train_model, X_train, y_train, X_val, y_val, X_test, y_test, batch_size, gray_scale_model, normalize=False, cnn_file_results_path='./histories/cnn_norm.pickle'): model = build_model(gray_scale_model) train_model(model, X_train, y_train, X_val, y_val, batch_size, normalize) y_pred = model.predict(X_test).ravel() path_p = Path(cnn_file_results_path) if not path_p.exists(): print("Initialize log file") with open(cnn_file_results_path, 'xb') as file: pickle.dump([], file) stats(y_test, y_pred, "CNN", cnn_file_results_path) return model
def _trainSK(build_model, train_model, X_train, y_train, X_val, y_val, X_test, y_test, sklearn_model, sklearn_model_name, batch_size, gray_scale_model, normalize, cnn_file_results_path='./histories/norm_'): print("TrainingModelSK") model = build_model(gray_scale_model) sklearn_model, cropped_model = train_model(model, X_train, y_train, X_val, y_val, sklearn_model, sklearn_model_name, batch_size, normalize) y_pred = sklearn_model.predict_proba(cropped_model.predict(X_test))[:, 1] path = cnn_file_results_path + sklearn_model_name + ".pickle" path_p = Path(path) if not path_p.exists(): print("Initialize log file") with open(path, 'xb') as file: pickle.dump([], file) stats(y_test, y_pred, "CNN + " + sklearn_model_name, path)
def analyse_regionsm(region, rd=False): wpE_reg = wpE.loc[states_reg[region]].wp.unstack().transpose().sum(axis=1) wpE_GWA_reg = wpE_GWA.loc[states_reg[region]].wp.unstack().transpose().sum( axis=1) wpM_reg = wpM.loc[states_reg[region]].wp.unstack().transpose().sum(axis=1) wpM_GWA_reg = wpM_GWA.loc[states_reg[region]].wp.unstack().transpose().sum( axis=1) prod_regm = prod_USAm[region].dropna() # merge data wp_reg = pd.concat([wpE_reg, wpE_GWA_reg, wpM_reg, wpM_GWA_reg], axis=1).tz_localize('UTC').tz_convert('US/Central') wp_reg.columns = ['ERA5', 'ERA5_GWA', 'MERRA2', 'MERRA2_GWA'] # aggregate monthly wp_regm = wp_reg.resample('M').sum() # combine data and calculate capacity factors cf_regm = pd.concat([ wp_regm.div(caps_reg[region], axis=0), (prod_regm.resample('M').sum() * 10**6 / caps_reg[region]) ], axis=1).dropna() cf_regm.columns = np.append(wp_regm.columns, 'wp_obs') # Analyse stats_regm = pd.DataFrame( { 'ERA5': stats(cf_regm.ERA5, cf_regm.wp_obs, rd), 'ERA5_GWA': stats(cf_regm.ERA5_GWA, cf_regm.wp_obs, rd), 'MERRA2': stats(cf_regm.MERRA2, cf_regm.wp_obs, rd), 'MERRA2_GWA': stats(cf_regm.MERRA2_GWA, cf_regm.wp_obs, rd), 'obs': [np.nan, np.nan, np.nan, cf_regm.wp_obs.mean()] }, index=['cor', 'rmse', 'mbe', 'avg']) stats_regm.index = pd.MultiIndex.from_product( [[region], stats_regm.index.values], names=['state', 'param']) return stats_regm
def analyse_statesm(state, rd=False): # merge data wp_st = pd.concat([ wpE.wp.loc[state], wpE_GWA.wp.loc[state], wpM.wp.loc[state], wpM_GWA.wp.loc[state] ], axis=1).tz_localize('UTC').tz_convert('US/Central') wp_st.columns = ['ERA5', 'ERA5_GWA', 'MERRA2', 'MERRA2_GWA'] # aggregate monthly wp_stm = wp_st.resample('M').sum() # combine data and calculate capacity factors cf_stm = pd.concat([ wp_stm.div(cap_statesm[state], axis=0), (prod_USAm[state].dropna().resample('M').sum() * 10**6 / (cap_statesm[state])) ], axis=1).replace(np.inf, np.nan).dropna()[1:] cf_stm.columns = np.append(wp_stm.columns, 'wp_obs') # Analyse stats_stm = pd.DataFrame( { 'ERA5': stats(cf_stm.ERA5, cf_stm.wp_obs, rd), 'ERA5_GWA': stats(cf_stm.ERA5_GWA, cf_stm.wp_obs, rd), 'MERRA2': stats(cf_stm.MERRA2, cf_stm.wp_obs, rd), 'MERRA2_GWA': stats(cf_stm.MERRA2_GWA, cf_stm.wp_obs, rd), 'obs': [np.nan, np.nan, np.nan, cf_stm.wp_obs.mean()] }, index=['cor', 'rmse', 'mbe', 'avg']) stats_stm.index = pd.MultiIndex.from_product( [[state], stats_stm.index.values], names=['state', 'param']) return (stats_stm)
def _trainV2(build_model, train_model, X_train_paths, y_train, X_val_paths, y_val, X_test_paths, y_test, batch_size, gray_scale_model, normalize=False, cnn_file_results_path='./histories/cnn_norm.pickle'): dict_path_image = load_images(X_train_paths + X_val_paths + X_test_paths, y_train + y_val + y_test, gray_scale_model, normalize) X_train_ = [ value for path, value in dict_path_image.items() if path in X_train_paths ] X_val_ = [ value for path, value in dict_path_image.items() if path in X_val_paths ] X_test_ = [ value for path, value in dict_path_image.items() if path in X_test_paths ] del dict_path_image X_train = np.array(map(lambda x: x[0], X_train_)) y_train = np.array(map(lambda x: x[1], X_train_)) del X_train_ X_val = np.array(map(lambda x: x[0], X_val_)) y_val = np.array(map(lambda x: x[1], X_val_)) del X_val_ X_test = np.array(map(lambda x: x[0], X_test_)) y_test = np.array(map(lambda x: x[1], X_test_)) del X_test_ model = build_model(gray_scale_model) train_model(model, X_train, y_train, X_val, y_val, batch_size, normalize) y_pred = model.predict(X_test).ravel() path_p = Path(cnn_file_results_path) if not path_p.exists(): print("Initialize log file") with open(cnn_file_results_path, 'xb') as file: pickle.dump([], file) stats(y_test, y_pred, "CNN", cnn_file_results_path)
def print_stats(self): print('Train stats:') utils.stats(tokens=self.train_tokens, lemmas=self.train_lemmas, known=self.preprocessor.known_tokens) print('Test stats:') utils.stats(tokens=self.test_tokens, lemmas=self.test_lemmas, known=self.preprocessor.known_tokens)
def ratings(name=""): name = str(name) mydb = db.db() q = utils.get_questions() me, all = mydb.getAllRatingsFor(name) mystats = utils.stats(me) allstats = utils.stats(all) ans = json.dumps({"questions": q, "me": mystats, "all": allstats}) return ans
def allratings(): mydb = db.db() users = userdb.get_users() emails = users.keys() ratings = [] for e in emails: me, all = mydb.getAllRatingsFor(e) mystats = utils.stats(me) allstats = utils.stats(all) ratings.append({"email": e, "last": users[e]["last"], "first": users[e]["first"], "stats": mystats}) return render_template("allratings.html", ratings=ratings, all=allstats)
def run_code(arg, input_data, attachment, lang_id, channel_id): code = get_code(arg, attachment) print('Running code: ', code.decode()) submission = api.submission.submit(client, code, lang_id, stdin=input_data.encode()) status = submission.status output = submission.stdout errors = submission.stderr compile_output = submission.compile_output if output: output = output.decode() if errors: errors = errors.decode() if compile_output: compile_output = compile_output.decode() message = 'Status: ' + status['description'] + '\n' if output: message += 'Output: ```\n' + output + '\n```' else: message += 'No output sent.\n' if errors: message += 'Errors: ```\n' + errors + '\n```' if compile_output: message += ('Compiler output: ```\n' + compile_output + '\n```\n') message += stats(submission.time, submission.memory) send_message(channel_id, message)
def buildTree(data): if len(data) <= 0: return node() currentEnt = entropy(data) bestGain = 0.0 bestCriteria = None bestSets = None dimension = len(data[0]) - 1 for feature in range(dimension): feature_values = {} for item in data: feature_values[data[feature]] = 1 for value in feature_values.keys(): set1, set2 = split(data, feature, value) p = len(set1) / len(set2) infoGain = currentEnt - p * entropy(set1) - (1 - p) * entropy(set2) if infoGain > bestGain and len(set1) > 0 and len(set2) > 0: bestGain = infoGain bestCriteria = (feature, value) bestSets = (set1, set2) if bestGain > 0: leftBranch = buildTree(bestSet[0]) rightBranch = buildTree(bestSet[1]) return node(feature=bestCriteria[0], threshold=bestCriteria[1], left=leftBranch, right=rightBranch) else: return node(results=stats(data))
def container_list(): res = utils.stats() # if res["status"] != "error": # return jsonify({"status":"success", "message":res}) # else: # return jsonify(res) return jsonify(res)
def _train_ensemble(classifiers, X_train, y_train, X_val, y_val, X_test, y_test, batch_size, gray_scale_model, normalize=False, mode='single', cnn_file_results_path='./histories/ensemble_norm.pickle'): model = EnsembleClassifier(build_model, train_model_with_Keras_ImageDataGenerator, classifiers) model.build_model(gray_scale_model) model.train_model(X_train, y_train, X_val, y_val, batch_size, normalize=normalize) if mode == 'single': y_pred = model.predict(X_test) path_p = Path(cnn_file_results_path) if not path_p.exists(): print("Initialize log file") with open(cnn_file_results_path, 'xb') as file: pickle.dump([], file) stats(y_test, y_pred, "Ensemble", cnn_file_results_path) else: for y_pred, model_name in model.get_predictions(X_test): results_path = '/histories/' + 'norm_' + model_name + '.pickle' path_p = Path(results_path) if not path_p.exists(): print("Initialize log file") with open(results_path, 'xb') as file: pickle.dump([], file) stats(y_test, y_pred, model_name, str(path_p))
def analyse_ZAFd(region): ''' analyse daily wind power generation for a region ''' mask = (ZAFh[region].notna()*capdfH[region].notna()).replace(0,np.nan) comph = pd.DataFrame({'MERRA2':ZAFm[region].tz_convert('Africa/Johannesburg')*mask, 'ERA5':ZAFe[region].tz_convert('Africa/Johannesburg')*mask, 'MERRA2_GWA2':ZAFmg2[region].tz_convert('Africa/Johannesburg')*mask, 'ERA5_GWA2':ZAFeg2[region].tz_convert('Africa/Johannesburg')*mask, 'MERRA2_GWA3':ZAFmg3[region].tz_convert('Africa/Johannesburg')*mask, 'ERA5_GWA3':ZAFeg3[region].tz_convert('Africa/Johannesburg')*mask}) # get capacities and mask caph = capdfH[region].tz_convert('Africa/Johannesburg')*mask # aggregate daily capd = caph.resample('D').sum() compd = comph.resample('D').sum() # calculate capacity factors cfd = compd.div(capd,axis=0) # add observed CFs cfd['obs'] = cfd.index.map((ZAFh[region]*mask).resample('D').mean()) # remove capacity factors > 1 and missing data cfd = cfd.mask(cfd>1).dropna() stat_d = pd.DataFrame({'ERA5':stats(cfd.ERA5,cfd.obs,False), 'ERA5_GWA2':stats(cfd.ERA5_GWA2,cfd.obs,False), 'ERA5_GWA3':stats(cfd.ERA5_GWA3,cfd.obs,False), 'MERRA2':stats(cfd.MERRA2,cfd.obs,False), 'MERRA2_GWA2':stats(cfd.MERRA2_GWA2,cfd.obs,False), 'MERRA2_GWA3':stats(cfd.MERRA2_GWA3,cfd.obs,False), 'obs':[np.nan,np.nan,np.nan,cfd.obs.mean()]}, index = ['cor','rmse','mbe','avg']).reset_index().melt(id_vars=['index']).dropna() stat_d.columns = ['param','dataset',region] return(stat_d.set_index(['param','dataset']).transpose())
def analyse_NZh(): ''' analyse hourly wind power generation for NZ ''' mask = (prod_NZh.notna() * capdfH.notna()).replace(0, np.nan) comph = pd.DataFrame({ 'MERRA2': (NZm * mask).sum(axis=1), 'ERA5': (NZe * mask).sum(axis=1), 'MERRA2_GWA2': (NZmg2 * mask).sum(axis=1), 'ERA5_GWA2': (NZeg2 * mask).sum(axis=1), 'MERRA2_GWA3': (NZmg3 * mask).sum(axis=1), 'ERA5_GWA3': (NZeg3 * mask).sum(axis=1) }) comph['obs'] = comph.index.map((prod_NZh * mask).sum(axis=1)) / 1000 # get capacities caph = (capdfH * mask).sum(axis=1) # calculate capacity factors cfh = comph.div(caph, axis=0) # remove capacity factors > 1 cfh = cfh.mask(cfh > 1).dropna() stat_h = pd.DataFrame( { 'ERA5': stats(cfh.ERA5, cfh.obs, False), 'ERA5_GWA2': stats(cfh.ERA5_GWA2, cfh.obs, False), 'ERA5_GWA3': stats(cfh.ERA5_GWA3, cfh.obs, False), 'MERRA2': stats(cfh.MERRA2, cfh.obs, False), 'MERRA2_GWA2': stats(cfh.MERRA2_GWA2, cfh.obs, False), 'MERRA2_GWA3': stats(cfh.MERRA2_GWA3, cfh.obs, False), 'obs': [np.nan, np.nan, np.nan, cfh.obs.mean()] }, index=['cor', 'rmse', 'mbe', 'avg']).reset_index().melt(id_vars=['index']).dropna() stat_h.columns = ['param', 'dataset', 'NZ'] return (stat_h.set_index(['param', 'dataset']).transpose())
def analyse_NZparkh(park): ''' analyse hourly wind power generation for one park ''' comph = pd.DataFrame({ 'MERRA2': NZm[park], 'ERA5': NZe[park], 'MERRA2_GWA2': NZmg2[park], 'ERA5_GWA2': NZeg2[park], 'MERRA2_GWA3': NZmg3[park], 'ERA5_GWA3': NZeg3[park] }) comph['obs'] = comph.index.map(prod_NZh[park]) / 1000 # get capacities caph = capdfH[park] # calculate capacity factors cfh = comph.div(caph, axis=0) # remove capacity factors > 1 cfh = cfh.mask(cfh > 1).dropna() stat_h = pd.DataFrame( { 'ERA5': stats(cfh.ERA5, cfh.obs, False), 'ERA5_GWA2': stats(cfh.ERA5_GWA2, cfh.obs, False), 'ERA5_GWA3': stats(cfh.ERA5_GWA3, cfh.obs, False), 'MERRA2': stats(cfh.MERRA2, cfh.obs, False), 'MERRA2_GWA2': stats(cfh.MERRA2_GWA2, cfh.obs, False), 'MERRA2_GWA3': stats(cfh.MERRA2_GWA3, cfh.obs, False), 'obs': [np.nan, np.nan, np.nan, cfh.obs.mean()] }, index=['cor', 'rmse', 'mbe', 'avg']).reset_index().melt(id_vars=['index']).dropna() stat_h.columns = ['param', 'dataset', park] return (stat_h.set_index(['param', 'dataset']).transpose())
def analyse_ZAFm(region): ''' analyse monthly wind power generation for a region ''' # mask for masking simulated data and capacities # (to only use timespans where also observed data are available) mask = (ZAFh[region].notna()*capdfH[region].notna()).replace(0,np.nan) comph = pd.DataFrame({'MERRA2':ZAFm[region].tz_convert('Africa/Johannesburg')*mask, 'ERA5':ZAFe[region].tz_convert('Africa/Johannesburg')*mask, 'MERRA2_GWA2':ZAFmg2[region].tz_convert('Africa/Johannesburg')*mask, 'ERA5_GWA2':ZAFeg2[region].tz_convert('Africa/Johannesburg')*mask, 'MERRA2_GWA3':ZAFmg3[region].tz_convert('Africa/Johannesburg')*mask, 'ERA5_GWA3':ZAFeg3[region].tz_convert('Africa/Johannesburg')*mask}) # get capacities and mask caph = capdfH[region].tz_convert('Africa/Johannesburg')*mask # aggregate monthly capm = caph.resample('M').sum() compm = comph.resample('M').sum() # calculate capacity factors cfm = compm.div(capm,axis=0) # add observed data cfm['obs'] = cfm.index.map((ZAFh[region]*mask).resample('M').mean()) # remove capacity factors > 1 and missing data cfm = cfm.mask(cfm>1).dropna() stat_m = pd.DataFrame({'ERA5':stats(cfm.ERA5,cfm.obs,False), 'ERA5_GWA2':stats(cfm.ERA5_GWA2,cfm.obs,False), 'ERA5_GWA3':stats(cfm.ERA5_GWA3,cfm.obs,False), 'MERRA2':stats(cfm.MERRA2,cfm.obs,False), 'MERRA2_GWA2':stats(cfm.MERRA2_GWA2,cfm.obs,False), 'MERRA2_GWA3':stats(cfm.MERRA2_GWA3,cfm.obs,False), 'obs':[np.nan,np.nan,np.nan,cfm.obs.mean()]}, index = ['cor','rmse','mbe','avg']).reset_index().melt(id_vars=['index']).dropna() stat_m.columns = ['param','dataset',region] return(stat_m.set_index(['param','dataset']).transpose())
def analyse_ZAFh(region): ''' analyse hourly wind power generation for a region ''' comph = pd.DataFrame({'MERRA2':ZAFm[region], 'ERA5':ZAFe[region], 'MERRA2_GWA2':ZAFmg2[region], 'ERA5_GWA2':ZAFeg2[region], 'MERRA2_GWA3':ZAFmg3[region], 'ERA5_GWA3':ZAFeg3[region]}) # get capacities caph = capdfH[region] # calculate capacity factors cfh = comph.div(caph,axis=0).tz_convert('Africa/Johannesburg') # add observed data cfh['obs'] = cfh.index.map(ZAFh[region]) # remove capacity factors > 1 and lines with missing data cfh = cfh.mask(cfh>1).dropna() stat_h = pd.DataFrame({'ERA5':stats(cfh.ERA5,cfh.obs,False), 'ERA5_GWA2':stats(cfh.ERA5_GWA2,cfh.obs,False), 'ERA5_GWA3':stats(cfh.ERA5_GWA3,cfh.obs,False), 'MERRA2':stats(cfh.MERRA2,cfh.obs,False), 'MERRA2_GWA2':stats(cfh.MERRA2_GWA2,cfh.obs,False), 'MERRA2_GWA3':stats(cfh.MERRA2_GWA3,cfh.obs,False), 'obs':[np.nan,np.nan,np.nan,cfh.obs.mean()]}, index = ['cor','rmse','mbe','avg']).reset_index().melt(id_vars=['index']).dropna() stat_h.columns = ['param','dataset',region] return(stat_h.set_index(['param','dataset']).transpose())
def test_code(arg, attachment, lang_id, problem_name, channel_id): code = get_code(arg, attachment) print('Running test code: ', code.decode()) cases = get_problem(problem_name).cases message = '' for stdin in cases: expected_output = cases[stdin] submission = api.submission.submit(client, code, lang_id, stdin=stdin.encode(), expected_output=expected_output.encode()) status = submission.status message += 'Status: ' + status['description'] + '\n' message += stats(submission.time, submission.memory) send_message(channel_id, message)
def train_model_sklearn(model: models.Sequential, x_train, y_train, x_val, y_val, sklearn_model, sklearn_model_name, batch_size, normalize=False): model = train_model_with_Keras_ImageDataGenerator(model, x_train, y_train, x_val, y_val, batch_size, normalize=normalize) layer_dict = dict([(layer.name, layer) for layer in model.layers]) # x = layer_dict['flatten'].output x = model.layers[-2].output from tensorflow.keras.models import Model cropped_model = Model(model.input, x) del model cropped_model_train_output = cropped_model.predict(x_train) del x_train sklearn_model.fit(cropped_model_train_output, y_train) cropped_model_val_output = cropped_model.predict(x_val) sklearn_model_predicted = sklearn_model.predict_proba( cropped_model_val_output)[:, 1] print("SKlearn predicted proba: ", sklearn_model_predicted) print("Validation sklearn Model result: ") stats(y_val, sklearn_model_predicted, 'CNN + ' + sklearn_model_name) return sklearn_model, cropped_model
def histogram(metrics, figure_num): global min, max # Calculate data needed to plot the normal distribution graph stats = utils.stats(metrics) dist = scipy.stats.norm(stats['mean'], stats['std_dev']) minimum = int(min(metrics)) maximum = int(max(metrics)) values = [ value for value in range(minimum, maximum, (maximum - minimum) // 100) ] probabilities = [dist.pdf(value) for value in values] # Plot plt.figure(figure_num) plt.hist(metrics, bins=50, density=True) plt.plot(values, probabilities) plt.show()
def _calculate(self): """ Private method used to calculate the baseline from the base slots. Base slots are not used as the baseline because this approach would require additional computation with each is_alerting call. """ bl = [[[] for _ in range(DAY_LEN)] for _ in range(WEEK_LEN)] self.baseline = [[None for _ in range(DAY_LEN)] for _ in range(WEEK_LEN)] # Build collection slot arrays for base_slot in self.base_slots: for day in range(WEEK_LEN): for hour in range(DAY_LEN): bl[day][hour].append(base_slot.get_item(day, hour)) # Build baseline object for day in range(WEEK_LEN): for hour in range(DAY_LEN): stats = utils.stats(bl[day][hour]) self.baseline[day][hour] = CollectionSlot( stats['mean'], stats['std_dev'])
def analyse_NZparkm(park): ''' analyse monthly wind power generation for one park ''' # mask for masking simulated data and capacities # (to only use timespans where also observed data are available) mask = (prod_NZh[park].notna() * capdfH[park].notna()).replace(0, np.nan) comph = pd.DataFrame({ 'MERRA2': NZm[park] * mask, 'ERA5': NZe[park] * mask, 'MERRA2_GWA2': NZmg2[park] * mask, 'ERA5_GWA2': NZeg2[park] * mask, 'MERRA2_GWA3': NZmg3[park] * mask, 'ERA5_GWA3': NZeg3[park] * mask }) comph['obs'] = comph.index.map(prod_NZh[park] * mask) / 1000 # get capacities and mask caph = capdfH[park] * mask # aggregate monthly capm = caph.resample('M').sum() compm = comph.resample('M').sum() # calculate capacity factors cfm = compm.div(capm, axis=0) # remove capacity factors > 1 cfm = cfm.mask(cfm > 1).dropna() stat_m = pd.DataFrame( { 'ERA5': stats(cfm.ERA5, cfm.obs, False), 'ERA5_GWA2': stats(cfm.ERA5_GWA2, cfm.obs, False), 'ERA5_GWA3': stats(cfm.ERA5_GWA3, cfm.obs, False), 'MERRA2': stats(cfm.MERRA2, cfm.obs, False), 'MERRA2_GWA2': stats(cfm.MERRA2_GWA2, cfm.obs, False), 'MERRA2_GWA3': stats(cfm.MERRA2_GWA3, cfm.obs, False), 'obs': [np.nan, np.nan, np.nan, cfm.obs.mean()] }, index=['cor', 'rmse', 'mbe', 'avg']).reset_index().melt(id_vars=['index']).dropna() stat_m.columns = ['param', 'dataset', park] return (stat_m.set_index(['param', 'dataset']).transpose())
def analyse_NZm(): ''' analyse monthly wind power generation for NZ ''' # mask for masking simulated data and capacities # (to only use timespans where also observed data are available) mask = (prod_NZh.notna() * capdfH.notna()).replace(0, np.nan) # mask and aggregate simulated data comph = pd.DataFrame({ 'MERRA2': (NZm * mask).sum(axis=1), 'ERA5': (NZe * mask).sum(axis=1), 'MERRA2_GWA2': (NZmg2 * mask).sum(axis=1), 'ERA5_GWA2': (NZeg2 * mask).sum(axis=1), 'MERRA2_GWA3': (NZmg3 * mask).sum(axis=1), 'ERA5_GWA3': (NZeg3 * mask).sum(axis=1) }) comph['obs'] = comph.index.map(prod_NZh.sum(axis=1)) / 1000 # mask and aggregate capacities caph = (capdfH * mask).sum(axis=1) # aggregate monthly compm = comph.resample('M').sum() capm = caph.resample('M').sum() # calculate capacity factors cfmu = compm.div(capm, axis=0).dropna() cfm = cfmu.mask(cfmu > 1).dropna() stat_m = pd.DataFrame( { 'ERA5': stats(cfm.ERA5, cfm.obs, False), 'ERA5_GWA2': stats(cfm.ERA5_GWA2, cfm.obs, False), 'ERA5_GWA3': stats(cfm.ERA5_GWA3, cfm.obs, False), 'MERRA2': stats(cfm.MERRA2, cfm.obs, False), 'MERRA2_GWA2': stats(cfm.MERRA2_GWA2, cfm.obs, False), 'MERRA2_GWA3': stats(cfm.MERRA2_GWA3, cfm.obs, False), 'obs': [np.nan, np.nan, np.nan, cfm.obs.mean()] }, index=['cor', 'rmse', 'mbe', 'avg']).reset_index().melt(id_vars=['index']).dropna() stat_m.columns = ['param', 'dataset', 'NZ'] return (stat_m.set_index(['param', 'dataset']).transpose())
def analyse_NZparkd(park): ''' analyse daily wind power generation for one park ''' mask = (prod_NZh[park].notna() * capdfH[park].notna()).replace(0, np.nan) comph = pd.DataFrame({ 'MERRA2': NZm[park] * mask, 'ERA5': NZe[park] * mask, 'MERRA2_GWA2': NZmg2[park] * mask, 'ERA5_GWA2': NZeg2[park] * mask, 'MERRA2_GWA3': NZmg3[park] * mask, 'ERA5_GWA3': NZeg3[park] * mask }) comph['obs'] = comph.index.map(prod_NZh[park] * mask) / 1000 # get capacities and mask caph = capdfH[park] * mask # aggregate daily capd = caph.resample('D').sum() compd = comph.resample('D').sum() # calculate capacity factors cfd = compd.div(capd, axis=0) # remove capacity factors > 1 cfd = cfd.mask(cfd > 1).dropna() stat_d = pd.DataFrame( { 'ERA5': stats(cfd.ERA5, cfd.obs, False), 'ERA5_GWA2': stats(cfd.ERA5_GWA2, cfd.obs, False), 'ERA5_GWA3': stats(cfd.ERA5_GWA3, cfd.obs, False), 'MERRA2': stats(cfd.MERRA2, cfd.obs, False), 'MERRA2_GWA2': stats(cfd.MERRA2_GWA2, cfd.obs, False), 'MERRA2_GWA3': stats(cfd.MERRA2_GWA3, cfd.obs, False), 'obs': [np.nan, np.nan, np.nan, cfd.obs.mean()] }, index=['cor', 'rmse', 'mbe', 'avg']).reset_index().melt(id_vars=['index']).dropna() stat_d.columns = ['param', 'dataset', park] return (stat_d.set_index(['param', 'dataset']).transpose())
env.run() #test_cache(jobList,hierarchy) logger.info("Simulation Ends") logger.info("Collecting Statistics...") print "Simulation Ends" print "Collecting Statistics..." #utils.cacheinfo2(hierarchy) stats = {} stats["rn"] = sim_req_num stats["clist"] = sim_req_comp stats["sim_end"] = sim_end res_file = config.get('Simulation', 'res_file') fd = open(res_file, "a") fd.write("\n\n") fd.write("-------------RESULTS---------------------\n") fd.write("Date:" + time + "\n") utils.stats(hierarchy, config, fd, stats) utils.missCost(hierarchy, config, fd) fd.close() # shadow.set_cache_size(hierarchy,env) # shadow.reset_counters(hierarchy,nodeNum) # shadow.set_cache_size(hierarchy,env)
axis=1).tz_localize('UTC').tz_convert('US/Central') wp_USA.columns = ['ERA5', 'ERA5_GWA', 'MERRA2', 'MERRA2_GWA'] # aggregate monthly wp_USAm = wp_USA.resample('M').sum() # combine data and calculate capacity factors cf_USAm = pd.concat([ wp_USAm.div(cap_usam, axis=0), (prod_USAm['USA'].dropna().resample('M').sum() * 10**6 / (cap_usaIm * 10**3)) ], axis=1).dropna() cf_USAm.columns = np.append(wp_USAm.columns, 'wp_obs') # Analyse stats_USAm = pd.DataFrame( { 'ERA5': stats(cf_USAm.ERA5, cf_USAm.wp_obs, False), 'ERA5_GWA': stats(cf_USAm.ERA5_GWA, cf_USAm.wp_obs, False), 'MERRA2': stats(cf_USAm.MERRA2, cf_USAm.wp_obs, False), 'MERRA2_GWA': stats(cf_USAm.MERRA2_GWA, cf_USAm.wp_obs, False), 'obs': [np.nan, np.nan, np.nan, cf_USAm.wp_obs.mean()] }, index=['cor', 'rmse', 'mbe', 'avg']) stats_USAm_r = pd.DataFrame( { 'ERA5': stats(cf_USAm.ERA5, cf_USAm.wp_obs), 'ERA5_GWA': stats(cf_USAm.ERA5_GWA, cf_USAm.wp_obs), 'MERRA2': stats(cf_USAm.MERRA2, cf_USAm.wp_obs), 'MERRA2_GWA': stats(cf_USAm.MERRA2_GWA, cf_USAm.wp_obs), 'obs': [np.nan, np.nan, np.nan, round(cf_USAm.wp_obs.mean(), 2)]
params = dict(read_csv(opts['-p'])) params = {k: v[0] for k, v in params.items()} n_cols, weeks, y, X = load_data(filename=opts['-i']) try: model.pca except AttributeError: pass else: pca = PCA(n_components=params['n_components']) del params['n_components'] X = pca.fit(X).transform(X) model = model(**params) scores, mean, std_dev = stats(X, y, model) if '--predict' not in opts: print(mean) else: modelname = opts['--model'] predict = opts['--predict'] print_stats(scores, mean, std_dev, 'Stats of ' + modelname) results_filename = predict + '/pred-' + modelname + '.csv' results_plot_filename = predict + '/pred-' + modelname + '.eps' model.fit(X, y) y_true, y_pred = y, model.predict(X) save_data(results_filename, weeks, y_true, y_pred)
ITERS = 100 ALPHA = 0.01 EPSI = 0.1 CW_ITERS = 100 BIN_STEPS = 20 NORM = 'l2' #l0/l2/linf (train_features, train_labels), (test_features, test_labels) = cifar10.load_data() (test_features, test_labels) = agu(test_features, test_labels) train_features = train_features.astype('float32') test_features = test_features.astype('float32') train_features /= 255 test_features /= 255 test_labels = np_utils.to_categorical(test_labels, 10) if len(sys.argv) < 2 or sys.argv[1] == "fgsm_it": perturbed_accuracy = fgsm_it(test_features[:100], ITERS, EPSI, ALPHA, build_network, loss_func, evaluation, './tmp/original_cifar_model-8') print([s[0] for s in perturbed_accuracy]) stats([s[1] for s in perturbed_accuracy], test_labels[:1000]) heat([s[1] for s in perturbed_accuracy], test_labels[:1000], "cifar10_fgsm_") else: perturbed_norms, was = cw(test_features[:100], test_labels[:100], CW_ITERS, BIN_STEPS, build_network, loss_func, evaluation, NORM, './tmp/original_cifar_model-8') stats_cari(perturbed_norms, was)
def stream_stats(): data = dict() data["stream"] = True res = utils.stats(**data) # return jsonify({"message":"success", "containers":res}) return render_template('index.html', **locals())
def multilevel_uniform( prop, x_sample, sigma=1., rho=0.1, count_particles=1000, count_mh_steps=100, debug=True, stats=False): # Calculate the mean of the normal distribution in logit space # We transform the input from [x_min, x_max] to [epsilon, 1 - epsilon], then to [logit(epsilon), logit(1 - epsilon)] # Then we can do the sampling on (-inf, inf) prior = dist.Uniform(low=torch.max(x_sample-sigma*(x_max-x_min).view(3,1,1), x_min.view(3,1,1)), high=torch.min(x_sample+sigma*(x_max-x_min).view(3,1,1), x_max.view(3,1,1))) #print((x_sample-sigma).size()) #raise Exception() # Parameters if CUDA: width_proposal = sigma*torch.ones(count_particles).cuda()/30 else: width_proposal = sigma*torch.ones(count_particles)/30 count_max_levels = 500 target_acc_ratio = 0.9 max_width_proposal = 0.1 min_width_proposal = 1e-8 width_inc = 1.02 width_dec = 0.5 # Sample the initial particles # Implements parallel batched accept-reject sampling. x = prior.sample(torch.Size([count_particles])) #print(x.size()) #raise Exception() L_prev = -math.inf L = -math.inf l_inf_min = math.inf lg_p = 0 levels = [] #print('Inside valid bounds', x_min, x_max) #utils.stats(x[0]) #print((x >= x_min).all(dim=1) & (x <= x_max).all(dim=1)) #raise Exception() # Loop over levels for level_idx in range(count_max_levels): if CUDA: acc_ratio = torch.zeros(count_particles).cuda() else: acc_ratio = torch.zeros(count_particles) if L >= 0: break # Calculate current level s_x = prop(x).squeeze(-1) s_sorted, s_idx = torch.sort(s_x) L = min(s_sorted[math.floor((1-rho)*count_particles)].item(), 0) if L == L_prev: L = 0 levels.append(L) where_keep = s_x >= L where_kill = s_x < L count_kill = (where_kill).sum() count_keep = count_particles - count_kill # Print level if debug: print(f'Level {level_idx+1} = {L}') # Terminate if change in level is below some threshold if count_keep == 0: return -math.inf, None, x, levels lg_p += torch.log(count_keep.float()).item() - math.log(count_particles) # Early termination if lg_p < -90: return -90., None, x, levels # If the level is 0 then don't do last MH steps (speeds things up!) if L >= 0: break # Uniformly resample killed particles below the level new_idx = torch.randint(low=0, high=count_keep, size=(count_kill,), dtype=torch.long) x = x[where_keep] x = torch.cat((x, x[new_idx]), dim=0) width_proposal = width_proposal[where_keep] width_proposal = torch.cat((width_proposal, width_proposal[new_idx]), dim=0) #acc_ratio = torch.zeros(count_kill).cuda() #x_temp = x #while acc_ratio.mean() < 0.2: # x = x_temp if CUDA: acc_ratio = torch.zeros(count_particles).cuda() else: acc_ratio = torch.zeros(count_particles) for mh_idx in range(count_mh_steps): # Propose new sample g_bottom = dist.Uniform(low=torch.max(x - width_proposal.view(-1,1,1,1)*(x_max-x_min).view(3,1,1), prior.low), high=torch.min(x + width_proposal.view(-1,1,1,1)*(x_max-x_min).view(3,1,1), prior.high)) x_maybe = g_bottom.sample() s_x = prop(x_maybe).squeeze(-1) # Calculate log-acceptance ratio g_top = dist.Uniform(low=torch.max(x_maybe - width_proposal.view(-1,1,1,1)*(x_max-x_min).view(3,1,1), prior.low), high=torch.min(x_maybe + width_proposal.view(-1,1,1,1)*(x_max-x_min).view(3,1,1), prior.high)) lg_alpha = (prior.log_prob(x_maybe) + g_top.log_prob(x) - prior.log_prob(x) - g_bottom.log_prob(x_maybe)).view(count_particles,-1).sum(dim=1) acceptance = torch.min(lg_alpha, torch.zeros_like(lg_alpha)) # Work out which ones to accept log_u = torch.log(torch.rand_like(acceptance)) acc_idx = (log_u <= acceptance) & (s_x >= L) acc_ratio += acc_idx.float() x = torch.where(acc_idx.view(-1,1,1,1), x_maybe, x) # Adapt the width proposal *for each chain individually* acc_ratio /= count_mh_steps # DEBUG: See what acceptance ratios are doing if stats: utils.stats(acc_ratio) #input() #print(acc_ratio.size()) width_proposal = torch.where(acc_ratio > 0.124, width_proposal*width_inc, width_proposal) width_proposal = torch.where(acc_ratio < 0.124, width_proposal*width_dec, width_proposal) L_prev = L #input() return lg_p, None, x, levels
def subtest(): ''' test net using patches of slide. compare outputs of net and targets and print result. ''' os.path.isdir('checkpoint') checkpoint = torch.load('./checkpoint/ckpt.t7') net = checkpoint['net'] threshold = checkpoint['threshold'] net.eval() outputs_list = np.array([]) targets_list = np.array([]) test_loss = 0 total = 0 correct = 0 for batch_idx, (inputs, targets) in enumerate(subtestloader): if USE_CUDA: inputs = inputs.cuda() targets = torch.FloatTensor(np.array(targets).astype(float)).cuda() batch_size = targets.shape[0] inputs, targets = Variable(inputs, volatile=True), Variable(targets) outputs = net(inputs) total += targets.size(0) outputs = torch.squeeze(outputs) loss = criterion(outputs, targets) test_loss += loss.data[0] _outputs = np.array(outputs.data).astype(float) _targets = np.array(targets.data).astype(float) outputs_list = np.append(outputs_list, _outputs) targets_list = np.append(targets_list, _targets) outputs += Variable((torch.ones(batch_size) * (1 - threshold)).cuda()) outputs = torch.floor(outputs) correct += int(outputs.eq(targets).cpu().sum()) progress_bar( batch_idx, len(subtestloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) correct, tp, tn, fp, fn, recall, precision, specificity, f1_score, auc, threshold = stats( outputs_list, targets_list) acc = correct / total print( 'Acc: %.3f, Recall: %.3f, Prec: %.3f, Spec: %.3f, F1: %.3f, Thres: %.3f, AUC: %.3f' % (acc, recall, precision, specificity, f1_score, threshold, auc)) print('%17s %12s\n%-11s %-8d %-8d\n%-11s %-8d %-8d' % ('Tumor', 'Normal', 'pos', tp, fp, 'neg', fn, tn)) print("lr: ", args.lr * (0.5**(LR_DECAY)), " chance:", LR_CHANCE)
def valid(epoch): ''' valid net using patches of slide. Save checkpoint if AUC score is higher than saved checkpoint's. Args: epoch (int): current epoch ''' global BEST_AUC global THRESHOLD global LR_CHANCE global CK_CHANCE global LR_DECAY net.eval() valid_loss = 0 total = 0 correct = 0 outputs_list = np.array([]) targets_list = np.array([]) for batch_idx, (inputs, targets) in enumerate(valloader): if USE_CUDA: inputs = inputs.cuda() targets = torch.FloatTensor(np.array(targets).astype(float)).cuda() batch_size = targets.shape[0] inputs, targets = Variable(inputs, volatile=True), Variable(targets) outputs = net(inputs) total += targets.size(0) outputs = torch.squeeze(outputs) loss = criterion(outputs, targets) valid_loss += loss.data[0] _outputs = np.array(outputs.data).astype(float) _targets = np.array(targets.data).astype(float) outputs_list = np.append(outputs_list, _outputs) targets_list = np.append(targets_list, _targets) outputs += Variable((torch.ones(batch_size) * (1 - THRESHOLD)).cuda()) outputs = torch.floor(outputs) correct += int(outputs.eq(targets).cpu().sum()) progress_bar( batch_idx, len(valloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (valid_loss / (batch_idx + 1), 100. * correct / total, correct, total)) correct, tp, tn, fp, fn, recall, precision, specificity, f1_score, auc, threshold = stats( outputs_list, targets_list) acc = correct / total THRESHOLD = threshold print( 'Acc: %.3f, Recall: %.3f, Prec: %.3f, Spec: %.3f, F1: %.3f, Thres: %.3f, AUC: %.3f' % (acc, recall, precision, specificity, f1_score, threshold, auc)) print('%17s %12s\n%-11s %-8d %-8d\n%-11s %-8d %-8d' % ('Tumor', 'Normal', 'pos', tp, fp, 'neg', fn, tn)) print("lr: ", args.lr * (0.5**(LR_DECAY)), "lr chance:", LR_CHANCE) # plot data CUR_EPOCH.append(epoch) CUR_VAL_ACC.append(acc) CUR_LOSS.append(valid_loss / (batch_idx + 1)) CUR_LR.append(args.lr * (0.5**(LR_DECAY))) # Save checkpoint. if auc > BEST_AUC: print('saving...') BEST_AUC = auc state = { 'net': net if USE_CUDA else net, 'acc': acc, 'loss': valid_loss, 'recall': recall, 'specificity': specificity, 'precision': precision, 'f1_score': f1_score, 'auc': auc, 'epoch': epoch, 'lr': args.lr * (0.5**(LR_DECAY)), 'threshold': threshold } torch.save(state, './checkpoint/ckpt.t7')