def iterative_tests(): resfile_name = get_free_filename('iterative-hybrid', '.', suffix='.pkl') outdir = get_free_filename('iterative-hybrid', '/home/centos/results') suffix = 'hybrid' iterative = False # clf = RuleBased(filter_method='take_max', num_rules=6) clf = Hybrid(freq_threshold=2, pass_freq_to_vw=True, probability=False, vw_args='-b 26 --learning_rate 1.5', suffix=suffix, iterative=iterative, use_temp_files=True ) # clf = Hybrid(freq_threshold=2, pass_freq_to_vw=True, # suffix=suffix, # probability=True, tqdm=True) # Get single app dirty changesets with (PROJECT_ROOT / 'iterative_chunks.p').open('rb') as f: it_chunks = pickle.load(f) logging.info("Prediction pickle is %s", resfile_name) resfile = open(resfile_name, 'wb') results = [] for i in range(3): i1 = i % 3 i2 = (i + 1) % 3 i3 = (i + 2) % 3 X_train = [] y_train = [] X_test = [] y_test = [] clf.refresh() for idx, inner_chunks in enumerate(it_chunks): logging.info('In iteration %d', idx) features, labels = parse_csids(inner_chunks[i1], iterative=True) if iterative: X_train = features y_train = labels else: X_train += features y_train += labels features, labels = parse_csids(inner_chunks[i2], iterative=True) X_train += features y_train += labels features, labels = parse_csids(inner_chunks[i3], iterative=True) X_test += features y_test += labels results.append(get_scores(clf, X_train, y_train, X_test, y_test)) pickle.dump(results, resfile) resfile.seek(0) resfile.close() print_results(resfile_name, outdir, args=clf.get_args(), n_strats=len(it_chunks), iterative=True)
def multiapp_trainw_dirty(): resfile_name = get_free_filename('result-timing', '.', suffix='.pkl') outdir = get_free_filename('timing-multiapp', '/home/centos/results') suffix = 'timing' # clf = RuleBased(filter_method='take_max', num_rules=6) clf = Hybrid(freq_threshold=2, pass_freq_to_vw=True, probability=True, vw_args='-b 26 --learning_rate 1.5 --passes 10', suffix=suffix, use_temp_files=True ) # Get multiapp changesets with (PROJECT_ROOT / 'changeset_sets' / 'multilabel_chunks.p').open('rb') as f: multilabel_chunks = pickle.load(f) # Get single app dirty changesets with (PROJECT_ROOT / 'changeset_sets' / 'threek_dirty_chunks.p').open('rb') as f: threeks = pickle.load(f) logging.info("Prediction pickle is %s", resfile_name) resfile = open(resfile_name, 'wb') results = [] for ml_idx, ml_chunk in enumerate(multilabel_chunks): logging.info('Test set is %d', ml_idx) ml_train_idx = [0, 1, 2] ml_train_idx.remove(ml_idx) X_train, y_train = parse_csids(multilabel_chunks[ml_train_idx[0]], multilabel=True) features, labels = parse_csids(multilabel_chunks[ml_train_idx[1]], multilabel=True) X_train += features y_train += labels X_test, y_test = parse_csids(ml_chunk, multilabel=True) results.append(get_multilabel_scores( clf, X_train, y_train, X_test, y_test)) pickle.dump(results, resfile) resfile.seek(0) for idx, chunk in tqdm(enumerate(threeks)): logging.info('Extra training set is %d', idx) features, labels = parse_csids(chunk, multilabel=True) X_train += features y_train += labels results.append(get_multilabel_scores( clf, X_train, y_train, X_test, y_test)) pickle.dump(results, resfile) resfile.seek(0) resfile.close() print_multilabel_results(resfile_name, outdir, args=clf.get_args(), n_strats=4)
def create_optimizer(model, lr, rho_adam, etam, etad, weight_decay): return Hybrid(model.parameters(), lr, etam=etam, etad=etad, rho_adam=rho_adam, nesterov=True, weight_decay=weight_decay)
return x return ("Chg: " + str(source.charger_state) + ' ' + str(source.shutdown) + ' ' + 'i' + str(source.charger_info) + ' ' + 's' + str(source.cells) + ' ' + 'v' + str(source.charged_voltage) + ' ') def _get_temp(source): return ("Tmp: " + str(source.t1)[:4] + ' ' + str(source.t2)[:4] + ' ') controller = Hybrid() args = _parse_commandline() pot = Charge_Controller() # Charger Setup controller.shutdown = args.on #if args.iLim is not controller.__charger.current: # controller.__charger.current = args.iLim # Battery Setup controller.cells = args.cells controller.charged_voltage = args.chem #pot.current = 1.0 # Turn on/off switches if args.h2: controller.h2_on()
################################## IMPORTS ################################## from Utils.Evaluator import EvaluatorHoldout from Utils.DataSplitter import DataSplitter from Utils.DataReader import DataReader from hybrid import Hybrid from tqdm import tqdm ################################# READ DATA ################################# reader = DataReader() splitter = DataSplitter() urm = reader.load_urm() ICM = reader.load_icm() targets = reader.load_target() URM_train, URM_val, URM_test = splitter.split(urm, validation=0, testing=0) ####################### ISTANTIATE AND FIT THE HYBRID ####################### recommender = Hybrid(URM_train, ICM) recommender.fit() ################################ PRODUCE CSV ################################ f = open("submission.csv", "w+") f.write("user_id,item_list\n") for t in tqdm(targets): recommended_items = recommender.recommend(t, cutoff=10, remove_seen_flag=True) well_formatted = " ".join([str(x) for x in recommended_items]) f.write(f"{t}, {well_formatted}\n")
x += ") " if source.cells: x += "4cell " else: x += "3cell " x += (str(source.charged_voltage) + ' ') return x return ("Chg: " + str(source.charger_state) + ' ' + str(source.shutdown) + ' ' + 'i' + str(source.charger_info) + ' ' + 's' + str(source.cells) + ' ' + 'v' + str(source.charged_voltage) + ' ') def _get_temp(source): return ("Tmp: " + str(source.t1)[:4] + ' ' + str(source.t2)[:4] + ' ') controller = Hybrid() args = _parse_commandline() pot = Charge_Controller() # Charger Setup controller.shutdown = args.on #if args.iLim is not controller.__charger.current: # controller.__charger.current = args.iLim # Battery Setup controller.cells = args.cells controller.charged_voltage = args.chem #pot.current = 1.0 # Turn on/off switches if args.h2: controller.h2_on()
def cross_validation(): data = pd.read_csv('data/Demand_for_California_hourly_UTC_time.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0]) data = data.reindex(index=data.index[::-1]) data.index.freq = 'H' # Hourly data. data_without_leap_year = pd.read_csv( 'data/Demand_for_California_hourly_UTC_time_without_leap_year.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0]) data_without_leap_year = data_without_leap_year.reindex( index=data_without_leap_year.index[::-1]) horizon = 7 * 24 time_series = [ data.loc['2016-01-01':'2019-02-01'].to_numpy(), data.loc['2016-01-01':'2019-03-01'].to_numpy(), data.loc['2016-01-01':'2019-04-01'].to_numpy(), data.loc['2016-01-01':'2019-05-01'].to_numpy(), data.loc['2016-01-01':'2019-06-01'].to_numpy(), data.loc['2016-01-01':'2019-07-01'].to_numpy(), data.loc['2016-01-01':'2019-08-01'].to_numpy(), data.loc['2016-01-01':'2019-09-01'].to_numpy(), data.loc['2016-01-01':'2019-10-01'].to_numpy(), data.loc['2016-01-01':'2019-11-01'].to_numpy() ] time_series_without_leap_year = [ data_without_leap_year.loc['2016-01-01':'2019-02-01'].to_numpy(), data_without_leap_year.loc['2016-01-01':'2019-03-01'].to_numpy(), data_without_leap_year.loc['2016-01-01':'2019-04-01'].to_numpy(), data_without_leap_year.loc['2016-01-01':'2019-05-01'].to_numpy(), data_without_leap_year.loc['2016-01-01':'2019-06-01'].to_numpy(), data_without_leap_year.loc['2016-01-01':'2019-07-01'].to_numpy(), data_without_leap_year.loc['2016-01-01':'2019-08-01'].to_numpy(), data_without_leap_year.loc['2016-01-01':'2019-09-01'].to_numpy(), data_without_leap_year.loc['2016-01-01':'2019-10-01'].to_numpy(), data_without_leap_year.loc['2016-01-01':'2019-11-01'].to_numpy() ] sum_mae_error_add = 0 sum_mae_error_mul = 0 sum_mae_error_week = 0 sum_mae_error_year = 0 sum_mae_error_year_without_leap_year = 0 sum_mae_error_hybrid = 0 sum_rmse_error_add = 0 sum_rmse_error_mul = 0 sum_rmse_error_week = 0 sum_rmse_error_year = 0 sum_rmse_error_year_without_leap_year = 0 sum_rmse_error_hybrid = 0 do_training = True for i in range(0, len(time_series)): print("Iteration: " + str(i)) ts = time_series[i] hw = HoltWinters(ts[:-horizon], horizon) x = np.linspace(0, horizon, horizon) forecast_additive, residuals_additive = hw.holt_winters_additive_predict( do_training) residuals_additive = ts[-horizon:] - forecast_additive mae_error_add = mae(ts[-horizon:], forecast_additive) sum_mae_error_add = sum_mae_error_add + mae_error_add rmse_error_add = rmse(ts[-horizon:], forecast_additive) sum_rmse_error_add = sum_rmse_error_add + rmse_error_add sns.lineplot(x=x, y=forecast_additive.flatten()) sns.lineplot(x=x, y=ts[-horizon:].flatten()) print("Additive:" + str(mae_error_add)) print("Additive:" + str(rmse_error_add)) forecast_multiplicative, residuals_multiplicative = hw.holt_winters_multiplicative_predict( do_training) residuals_multiplicative = ts[-horizon:] - forecast_multiplicative mae_error_mul = mae(ts[-horizon:], forecast_multiplicative) sum_mae_error_mul = sum_mae_error_mul + mae_error_mul rmse_error_mul = rmse(ts[-horizon:], forecast_multiplicative) sum_rmse_error_mul = sum_rmse_error_mul + rmse_error_mul sns.lineplot(x=x, y=forecast_multiplicative.flatten()) sns.lineplot(x=x, y=ts[-horizon:].flatten()) print("Multiplicative:" + str(mae_error_mul)) print("Multiplicative:" + str(rmse_error_mul)) forecast_week_extended, residuals_week_extended = hw.holt_winters_multiplicative_week_extended_predict( do_training) residuals_week = ts[-horizon:] - forecast_week_extended mae_error_week = mae(ts[-horizon:], forecast_week_extended) sum_mae_error_week = sum_mae_error_week + mae_error_week rmse_error_week = rmse(ts[-horizon:], forecast_week_extended) sum_rmse_error_week = sum_rmse_error_week + rmse_error_week sns.set(rc={'figure.figsize': (16, 4)}) sns.lineplot(x=x, y=forecast_week_extended.flatten(), color="r") sns.lineplot(x=x, y=ts[-horizon:].flatten(), color="b") plt.title( "The HW week extended's week long forecast plottet in red and the real time series in blue." ) print("Extended week:" + str(mae_error_week)) print("Extended week:" + str(rmse_error_week)) plt.xlabel("Time [h]") plt.ylabel("Electricity demand [MWh]") forecast_year_extended, residuals_year_extended = hw.holt_winters_multiplicative_year_extended_predict( do_training) residuals_year = ts[-horizon:] - forecast_year_extended mae_error_year = mae(ts[-horizon:], forecast_year_extended) sum_mae_error_year = sum_mae_error_year + mae_error_year rmse_error_year = rmse(ts[-horizon:], forecast_year_extended) sum_rmse_error_year = sum_rmse_error_year + rmse_error_year sns.set(rc={'figure.figsize': (16, 4)}) sns.lineplot(x=x, y=forecast_year_extended.flatten(), color="r") sns.lineplot(x=x, y=ts[-horizon:].flatten(), color="b") print("Extended year:" + str(mae_error_year)) print("Extended year:" + str(rmse_error_year)) plt.title( "The HW year extended's week long forecast plottet in red and the real time series in blue." ) plt.xlabel("Time [h]") plt.ylabel("Electricity demand [MWh]") ts_without_leap_year = time_series_without_leap_year[i] hw_without_leap_year = HoltWintersWithoutLeapYear( ts_without_leap_year[:-horizon], horizon) forecast_year_extended_without_leap_year, residuals_year_extended_without_leap_year = hw_without_leap_year.holt_winters_multiplicative_year_extended_predict( do_training) residuals_without_leap_year = ts[ -horizon:] - forecast_year_extended_without_leap_year mae_error_year_without_leap_year = mae( ts_without_leap_year[-horizon:], forecast_year_extended_without_leap_year) sum_mae_error_year_without_leap_year = sum_mae_error_year_without_leap_year + mae_error_year_without_leap_year rmse_error_year_without_leap_year = rmse( ts_without_leap_year[-horizon:], forecast_year_extended_without_leap_year) sum_rmse_error_year_without_leap_year = sum_rmse_error_year_without_leap_year + rmse_error_year_without_leap_year sns.lineplot(x=x, y=forecast_year_extended_without_leap_year.flatten()) sns.lineplot(x=x, y=ts[-horizon:].flatten()) print("Extended year without leap:" + str(mae_error_year_without_leap_year)) print("Extended year without leap:" + str(rmse_error_year_without_leap_year)) hybrid = Hybrid(ts[:-horizon], horizon) forecast_hybrid = hybrid.forecast() #residuals_hybrid = hybrid.get_residuals() mae_error_hybrid = mae(ts[-horizon:], forecast_hybrid) sum_mae_error_hybrid = sum_mae_error_hybrid + mae_error_hybrid rmse_error_hybrid = rmse(ts[-horizon:], forecast_hybrid) sum_rmse_error_hybrid = sum_rmse_error_hybrid + rmse_error_hybrid sns.set(rc={'figure.figsize': (16, 4)}) sns.lineplot(x=x, y=forecast_hybrid.flatten(), color="r") sns.lineplot(x=x, y=ts[-horizon:].flatten(), color="b") plt.title( "The hybrid model's week long forecast plottet in red and the real time series in blue." ) plt.xlabel("Time [h]") plt.ylabel("Electricity demand [MWh]") print("Hybrid:" + str(mae_error_hybrid)) print("Hybrid:" + str(rmse_error_hybrid)) print("Sum additive:" + str(sum_mae_error_add)) print("Sum multiplicative:" + str(sum_mae_error_mul)) print("Sum extended week:" + str(sum_mae_error_week)) print("Sum extended year:" + str(sum_mae_error_year)) print("Sum extended year without leap:" + str(sum_mae_error_year_without_leap_year)) print("Sum hybrid:" + str(sum_mae_error_hybrid)) print("Sum additive:" + str(sum_rmse_error_add)) print("Sum multiplicative:" + str(sum_rmse_error_mul)) print("Sum extended week:" + str(sum_rmse_error_week)) print("Sum extended year:" + str(sum_rmse_error_year)) print("Sum extended year without leap:" + str(sum_rmse_error_year_without_leap_year)) print("Sum hybrid:" + str(sum_rmse_error_hybrid))
df_collab = json.load(f) df_content = pd.read_csv('content_dataset.csv') df_hybrid = pd.read_csv('hybrid_dataset.csv', sep=';') df_abtest = pd.read_csv('ab_dataset.csv') print('Association rule with apriori: ') apr = Apriori(support=0.1, confidence=0.1) apr.main(df_apriori) apr.result() print('Collaborative Filtering: ') collab = Collaborative(df_collab) recommendation, score = collab.user_recommendations('ANI') print('user recommendation for ANI with score: ', score) print('user correlation score between ANI and Dpv: ', collab.person_correlation('ANI', 'Dpv')) print('users who have similarities with ANI in genre film: ', collab.most_similar_users('ANI', 1)) print('Recommendation with content based: ') content = ContentBased(df_content) print(content.predict('Avenger')) print('Recommendation for ANI with hybrid algorithm:') hybrid = Hybrid(film=df_hybrid, dataset=df_collab) print(hybrid.predict()) print('AB testing:') ab = ABTesting(0.05) ab.fit(df_abtest) ab.predict()
def train_s1(fold=0, disable_progress=False): directory = './data/' device = 'cuda' if torch.cuda.is_available() else 'cpu' train_df = pd.read_csv('folds.csv') train_path = os.path.join(directory, 'train') train_ids = train_df[train_df.fold != fold]['id'].values val_ids = train_df[train_df.fold == fold]['id'].values dataset = TGSSaltDataset(train_path, train_ids, augment=True) dataset_val = TGSSaltDataset(train_path, val_ids) model = UnetModel() model.train() model.to(device) epoch = 30 learning_rate = 1e-2 # loss_fn = FocalLoss(2) loss_fn = nn.BCEWithLogitsLoss() # loss_fn = LovaszLoss() empty_loss_fn = nn.BCEWithLogitsLoss() deep_loss_fn = nn.BCEWithLogitsLoss() # optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True, weight_decay=1e-4) optimizer = Hybrid(model.parameters(), lr=learning_rate, etad=0.8, etam=0.8) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=20) #load_checkpoint(f'tgs-{num_filters}-best.pth', model, optimizer) best_iou = 0 for e in range(epoch): train_loss = [] for sample in tqdm(data.DataLoader(dataset, batch_size=16, shuffle=True), disable=disable_progress): image, mask = sample['image'], sample['mask'] image = image.type(torch.float).to(device) mask = mask.to(device) y_pred, y_pred_empty, y_pred_deep = model(image) seg_loss = loss_fn(y_pred, mask) class_loss = empty_loss_fn(y_pred_empty, empty_mask(mask)) deep_loss = deep_sup_loss(y_pred_deep, mask) loss = seg_loss + class_loss * 0.05 + deep_loss * 0.10 optimizer.zero_grad() loss.backward() optimizer.step() train_loss.append(seg_loss.item()) val_loss = [] val_iou = [] for sample in data.DataLoader(dataset_val, batch_size=30, shuffle=False): image, mask = sample['image'], sample['mask'] image = image.to(device) y_pred, _, _ = model(image) loss = loss_fn(y_pred, mask.to(device)) val_loss.append(loss.item()) iou = iou_pytorch((y_pred > 0).int(), mask.int().to(device)).cpu() val_iou.append(iou) avg_iou = np.mean(val_iou) # scheduler.step(np.mean(val_loss)) print("Epoch: %d, Train: %.3f, Val: %.3f, IoU: %.3f" % (e, np.mean(train_loss), np.mean(val_loss), avg_iou)) if avg_iou > best_iou: print('saving new best') save_checkpoint(f'unet-fold{fold}-best.pth', model, optimizer) best_iou = avg_iou print('Best IoU: %.3f' % best_iou)