def summarize_res(sname, datasize): print(sname) res = [] times = [] for i in range(100): PATH = ROOT_PATH + "/MMR_IVs/results/zoo/" + sname + "/" filename = os.path.join( PATH, str(date.today()), 'LMO_errs_{}_nystr_prodkern_{}.npy'.format(i, datasize)) if os.path.exists(filename): tmp_res = np.load(filename, allow_pickle=True) if tmp_res[-1] is not None: res += [tmp_res[-1]] time_path = os.path.join( PATH, str(date.today()), '/LMO_errs_{}_nystr_prodkern_{}_time.npy'.format(i, datasize)) if os.path.exists(time_path): t = np.load(time_path) times += [t] res = np.array(res) times = np.array(times) res = remove_outliers(res) times = np.sort(times)[:80] print(times) print('mean, std: ', np.mean(res), np.std(res)) print('time: ', np.mean(times), np.std(times))
def compute_features_from_endpoint(model, class_property_uris, min_num_of_objects, update_func): """ :param endpoint: :param class_property_uris: :param min_num_of_objects: :param update_func: :return: list of (cluster,feature) pairs """ update_model_state( model=model, new_progress=0, new_notes="extracting values from gathered class/property") percentage_of_numerical = 0.5 idx = 0 total_num_of_queries = 0 # calculate the required num of queries for class_uri in class_property_uris.keys(): total_num_of_queries += len(class_property_uris[class_uri]) for class_uri in class_property_uris.keys(): logger.info("\nproperties for %s are %d" % (class_uri, len(class_property_uris[class_uri]))) for property_uri in class_property_uris[class_uri]: logger.debug("getting objects for: %s" % property_uri) raw_col = easysparql.get_objects(endpoint=model.knowledge_graph, class_uri=class_uri, property_uri=property_uri) if len(raw_col) > min_num_of_objects: col = get_numericals(column=raw_col) if len(col) > percentage_of_numerical * len(raw_col): col = remove_outliers(col) if len(col) > min_num_of_objects: logger.debug("success: %s" % (property_uri)) # features_vector = features.compute_features(col, [features.mean, features.std, features.q1, # features.q3]) features_vector = features.compute_curr_features(col) cluster_name = "%s %s" % (class_uri, property_uri) logger.debug("cluster: %s" % cluster_name) modeling.add_cluster_to_model(model=model, name=cluster_name, features=features_vector) else: logger.debug("\n***%s only has %d values" % (property_uri, len(col))) else: logger.debug("%s only has %d/%d numerical values" % (property_uri, len(raw_col), len(col))) update_func(int(idx * 1.0 / total_num_of_queries * 100)) idx += 1
def summarize_res(sname, datasize): print(sname) res = [] times = [] for i in range(100): PATH = ROOT_PATH + "/our_methods/results/mendelian/" + sname + "/" filename = PATH + 'LMO_errs_{}_nystr_{}.npy'.format(i, datasize) if os.path.exists(filename): tmp_res = np.load(filename, allow_pickle=True) if tmp_res[-1] is not None: res += [tmp_res[-1]] time_path = PATH + '/LMO_errs_{}_nystr_{}_time.npy'.format(i, datasize) if os.path.exists(time_path): t = np.load(time_path) times += [t] res = np.array(res) res = remove_outliers(res) print('mean, std: ', np.mean(res), np.std(res))
gamma = 0.95 batch_size = 20 eval_batch_size = 100 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device", device) path = sys.argv[1] print("dataset", path) train_data = np.load(path)[:, :NUM_FEAT].astype(np.float32) # normalize train_data -= train_data.mean(axis=0, keepdims=True) train_data /= train_data.std(axis=0, keepdims=True) train_data = remove_outliers(train_data) trivial_loss = np.mean((train_data[1:] - train_data[:-1])**2) print(f"Trivial loss (predicting no changes): {trivial_loss}") all_data = torch.tensor(train_data).float().to(device) n_split = len(all_data) // 2 train_data = all_data[:n_split] val_data = all_data[-n_split:] test_data = train_data def batchify(data, bsz): # Divide the dataset into bsz parts.
import numpy as np today = datetime.today() print("Today", today) dates_list = util.create_date_list(today) strql = util.convert_date_list_to_string(dates_list) interval = 2 timelist = util.create_timelist(interval, today) df = dbdao.retrieve_database(strql) dataarray = util.create_dataarray_from_dataframe(df, timelist, today) mydf = util.remove_outliers(dataarray) training_set = mydf.values training_set_scaled = modelutil.scale_training_set(training_set) X_train, y_train = modelutil.create_timesteps(training_set_scaled) X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) regressor = modelutil.buid_model(X_train) regressor.fit(X_train, y_train, epochs=100, batch_size=32) dbdao.save_model_in_database(regressor)
jac=True, options={ 'maxiter': 5000, 'disp': True, 'ftol': 0 }, callback=callback0) PATH = ROOT_PATH + "/MMR_IVs/results/" + sname + "/" os.makedirs(PATH, exist_ok=True) np.save(PATH + 'LMO_errs_{}_nystr.npy'.format(seed), [opt_params, prev_norm, opt_test_err]) if __name__ == '__main__': snames = ['mnist_z', 'mnist_x', 'mnist_xz'] for sname in snames: for seed in range(100): experiment(sname, seed) PATH = ROOT_PATH + "/MMR_IVs/results/" + sname + "/" ress = [] for seed in range(100): filename = PATH + 'LMO_errs_{}_nystr.npy'.format(seed) if os.path.exists(filename): res = np.load(filename, allow_pickle=True) if res[-1] is not None: ress += [res[-1]] ress = np.array(ress) ress = remove_outliers(ress) print(np.nanmean(ress), np.nanstd(ress))
g = 1 # Get the running times t_i for m_i^x for the messages m_i in M def measure_times(M, x, n, number_of_runs_per_message=1): return np.array( [rt3x_average(powmod, m, x, n, number_of_runs_per_message) for m in M]) TT = measure_times(M, d, n, averages) # These are the T_i's tt0 = measure_times(M, g, n, averages) # These are the t_i's when g = 0b1 g = set_bit_n(g, 1) # set g to 0b11 tt1 = measure_times(M, g, n, averages) # these are the t_i's when g is 0b11 D0 = remove_outliers(TT - tt0) D1 = remove_outliers(TT - tt1) # Compute the standard deviations for the time differences (sd0, sd1) = map(std, (D0, D1)) print("\nStandard deviation of time differences: ", sd0, sd1) print("\nIn binary, d = ", bin(d)) num_bins = 32 nn, bins, patches = plt.hist(D0, num_bins, facecolor='blue', alpha=0.5,
c=dataset['target'], cmap=plt.cm.Set1, edgecolor='k') axs[3, 1].scatter(dataset.iloc[:, 3], dataset.iloc[:, 1], c=dataset['target'], cmap=plt.cm.Set1, edgecolor='k') axs[3, 2].scatter(dataset.iloc[:, 3], dataset.iloc[:, 2], c=dataset['target'], cmap=plt.cm.Set1, edgecolor='k') # remove outliers new_dataset = util.remove_outliers(dataset) # look at skew and mean of new dataset without outliers f.write("\nAfter removing outliers\n") util.get_data_stats(new_dataset, f) plt.figure(2) scatter = plt.scatter(new_dataset.iloc[:, 0], new_dataset.iloc[:, 1], c=new_dataset['target'], cmap=plt.cm.Set1, edgecolor='k') plt.legend(*scatter.legend_elements(), loc="upper right", title="Class") plt.xlabel('Sepal length') plt.ylabel('Sepal width')