def main(): parser = argparse.ArgumentParser(description="Available Parameters:") parser.add_argument("--n_hidden_units", default=64, type=int) parser.add_argument("--n_hidden_layers", default=1, type=int) parser.add_argument("--train_epochs", default=100, type=int) parser.add_argument("--write_output", default=True, type=bool) args = parser.parse_args() torch.manual_seed(0) np.random.seed(0) profiles = pd.read_csv("../data/new_profiles_200t.csv") comments = pd.read_csv("../data/new_comments_200t.csv") comments = comments.drop_duplicates() profiles = preprocessing.categorical_to_numerical(profiles, col="category_1") all_users = set(profiles.profile_username.values) data = preprocessing.scale(profiles.drop(columns=["category_1", "profile_username"]).values) name_to_record = {name: record for name, record in zip(all_users, data)} input_dim, output_dim = data.shape[1], len(profiles.category_1.unique()) + 1 user_to_label = {user: category for user, category in profiles[["profile_username", "category_1"]].values} K = 5 skf = StratifiedKFold(n_splits=K) models_metrics, models_histories = defaultdict(dict), defaultdict(list) for kth_fold, (train_idx, test_idx) in enumerate(skf.split(profiles.profile_username.values, profiles.category_1.values), start=1): print("Starting {}th Fold".format(kth_fold)) authors = profiles.profile_username.values username_to_index = utils.get_users_indices(authors) interactions = utils.get_interactions(comments, username_to_index) edge_index = utils.get_edge_index(interactions) x = utils.get_x(authors, name_to_record, input_dim=input_dim) y = utils.get_y(user_to_label, authors) train_mask = [True if i in train_idx else False for i in range(len(x))] test_mask = [True if i in test_idx else False for i in range(len(x))] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data = Data(x=x, y=y, edge_index=edge_index, train_mask=train_mask, test_mask=test_mask).to(device) assert len(x)==len(y), "Train Input and Output tensor do not have the same dimensions" models = utils.get_models(data.num_nodes, input_dim, output_dim, args.n_hidden_units, args.n_hidden_layers, device=device, lr=0.005) histories = utils.train(data, models, epochs=args.train_epochs) models_histories = utils.update_histories(models_histories, histories) current_metrics = utils.test(data, models) utils.update_metrics_dict(models_metrics, current_metrics) print('\n') models_histories = {model: list(history/K) for model, history in models_histories.items()} # Get mean traces models_metrics = utils.calculate_statistics(models_metrics) if args.write_output: utils.write_json("../data/results/models_metrics_{}e_{}l_{}u.json".format(args.train_epochs, args.n_hidden_layers, args.n_hidden_units), models_metrics) utils.write_json("../data/results/models_histories_{}e_{}l_{}u.json".format(args.train_epochs, args.n_hidden_layers, args.n_hidden_units), models_histories)
def plot_time_evolve_step(N, step, f, T, dt): x = get_x(N) v0 = f(x) v = np.copy(v0) print("Making step") A = step(N, V, dt) n = int(T / dt) print("walking {} steps".format(n)) fig, ax = plt.subplots() ax.plot(x, v0) for _ in range(3): for _ in range(int(n / 3)): v = A @ v ax.plot(x, v) plt.show()
def plot_H_eff_vecs(N, Vrs): n = len(Vrs) fig, ax = plt.subplots(2) for i in range(n): Vr = Vrs[i] H_eff = get_H_eff(N, V, Vr) l, v = eig(H_eff) indx = np.argsort(l) v = v[:, indx] l, v2 = get_eig(N, lambda x:V(x, Vr), 2) x = get_x(N) ax[0].plot([0.25, 0.75], abs(v[:, 0])**2, "kx") ax2 = ax[0].twinx() ax2.plot(x, abs(v2[:, 0])**2, color=color(i, n)) ax[1].plot([0.25, 0.75], abs(v[:, 1])**2, "kx") ax3 = ax[1].twinx() ax3.plot(x, abs(v2[:, 1])**2, color=color(i, n)) plt.show()
def plot_superpos(N): l, v = get_eig(N, lambda x: V(x, V0), 2) alpha = np.array([1, 1]) / sqrt(2) x = get_x(N) fig, ax = plt.subplots() ax.plot(x, V(x, V0), "k--") ax.set_ylabel("$E / [2mL/\hbar^2]$") ax.set_xlabel("$x / [L]$") ax.set_title("$N={}$".format(N)) ax2 = ax.twinx() ax2.set_ylabel("$\Psi / [1]$") ax2.plot(x, time_evolve(v, l, 0, alpha), label="$\Psi(x, 0) \in \\bfR$") T = pi / (l[0] - l[1]) ax2.plot(x, time_evolve(v, l, T, alpha).real, label="$\Re(\\Psi(x, T))$") ax2.plot(x, time_evolve(v, l, T, alpha).imag, label="$\Im(\\Psi(x, T))$") ax2.legend() plt.tight_layout() plt.savefig(FIG_PATH + "super_pos.pdf")
def plot_vecs(N, Vr): l, v = get_eig(N, lambda x: V(x, Vr), 2) x = get_x(N) fig, ax = plt.subplots() v1 = v[:, 0] v2 = v[:, 1] print(inner(v1, V(x, Vr, 0)*v1)) print(inner(v2, V(x, Vr, 0)*v1)) print(inner(v2, V(x, Vr, 0)*v2)) print(inner(v1, V(x, Vr, 0)*v2)) ax.plot(x, v1) ax.plot(x, v2) ax2 = ax.twinx() ax2.plot(x, V(x, Vr), "k--") y1 = np.max(abs(v)) y2 = np.max(abs(V(x, Vr))) # ax.set_ylim(-y1*1.1, y1*1.1) ax2.set_ylim(-y2*1.1, y2*1.1) plt.plot() plt.show()
def plot_time_evolve(N): l, v = get_eig(N, lambda x: V(x, V0), 2) alpha = np.array([1, 1]) / sqrt(2) x = get_x(N) fig, ax = plt.subplots(figsize=(12, 4)) ax.plot(x, V(x, V0), "k--") ax.set_ylabel("$E / [2mL/\hbar^2]$") ax.set_xlabel("$x / [L]$") ax.set_title("$N={}$".format(N)) ax2 = ax.twinx() ax2.set_ylabel("$|\Psi|^2 / [1]$") n = 5 T = pi / (l[0] - l[1]) / n for i in range(n + 1): v_new = time_evolve(v, l, T * i, alpha) label = "$|\\Psi(x, {}T/{})|^2$".format(i, n) ax2.plot(x, abs(v_new)**2, label=label, color=color(i, n)) ax2.legend() plt.tight_layout() plt.savefig(FIG_PATH + "time_evolve.pdf")
def test_get_x(self): num_of_features = 5 x = get_x(num_of_features) self.assertTrue(isinstance(x, np.ndarray)) self.assertTrue(x.shape == (1, num_of_features))
'clear', 'partly_cloudy', 'haze', 'cloudy', 'primary', 'agriculture', 'road', 'water', 'cultivation', 'habitation', 'bare_ground', 'selective_logging', 'artisinal_mine', 'blooming', 'slash_burn', 'conventional_mine', 'blow_down' ] label_map = {l: i for i, l in enumerate(train_tags)} inv_label_map = {i: l for l, i in label_map.items()} file_all = train_set['image_name'].values y_train = utils.get_y(train_set['tags'].values, label_map) test_file_all = test_set['image_name'].values tr_dir = 'E:/new_data/kaggle/planet/train-jpg/' ts_dir = 'E:/new_data/kaggle/planet/test-jpg/' # 获取带有sift描述子的数据集 X_train = utils.get_x(ts_dir) X_test = utils.get_x(ts_dir) # 训练 import lightgbm as lgb from sklearn.model_selection import StratifiedKFold # 分层交叉验证 from sklearn.metrics import fbeta_score p_tr = np.zeros((X_train.shape[0], 17)) y_ts = np.zeros((X_test.shape[0], 17)) params = { 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'binary_logloss', 'num_leaves': 31, 'min_data_in_leaf': 20,