def main(): parser = argparse.ArgumentParser(description="Available Parameters:") parser.add_argument("--n_hidden_units", default=64, type=int) parser.add_argument("--n_hidden_layers", default=1, type=int) parser.add_argument("--train_epochs", default=100, type=int) parser.add_argument("--write_output", default=True, type=bool) args = parser.parse_args() torch.manual_seed(0) np.random.seed(0) profiles = pd.read_csv("../data/new_profiles_200t.csv") comments = pd.read_csv("../data/new_comments_200t.csv") comments = comments.drop_duplicates() profiles = preprocessing.categorical_to_numerical(profiles, col="category_1") all_users = set(profiles.profile_username.values) data = preprocessing.scale(profiles.drop(columns=["category_1", "profile_username"]).values) name_to_record = {name: record for name, record in zip(all_users, data)} input_dim, output_dim = data.shape[1], len(profiles.category_1.unique()) + 1 user_to_label = {user: category for user, category in profiles[["profile_username", "category_1"]].values} K = 5 skf = StratifiedKFold(n_splits=K) models_metrics, models_histories = defaultdict(dict), defaultdict(list) for kth_fold, (train_idx, test_idx) in enumerate(skf.split(profiles.profile_username.values, profiles.category_1.values), start=1): print("Starting {}th Fold".format(kth_fold)) authors = profiles.profile_username.values username_to_index = utils.get_users_indices(authors) interactions = utils.get_interactions(comments, username_to_index) edge_index = utils.get_edge_index(interactions) x = utils.get_x(authors, name_to_record, input_dim=input_dim) y = utils.get_y(user_to_label, authors) train_mask = [True if i in train_idx else False for i in range(len(x))] test_mask = [True if i in test_idx else False for i in range(len(x))] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data = Data(x=x, y=y, edge_index=edge_index, train_mask=train_mask, test_mask=test_mask).to(device) assert len(x)==len(y), "Train Input and Output tensor do not have the same dimensions" models = utils.get_models(data.num_nodes, input_dim, output_dim, args.n_hidden_units, args.n_hidden_layers, device=device, lr=0.005) histories = utils.train(data, models, epochs=args.train_epochs) models_histories = utils.update_histories(models_histories, histories) current_metrics = utils.test(data, models) utils.update_metrics_dict(models_metrics, current_metrics) print('\n') models_histories = {model: list(history/K) for model, history in models_histories.items()} # Get mean traces models_metrics = utils.calculate_statistics(models_metrics) if args.write_output: utils.write_json("../data/results/models_metrics_{}e_{}l_{}u.json".format(args.train_epochs, args.n_hidden_layers, args.n_hidden_units), models_metrics) utils.write_json("../data/results/models_histories_{}e_{}l_{}u.json".format(args.train_epochs, args.n_hidden_layers, args.n_hidden_units), models_histories)
def classify(X): n = len(X) y = utils.get_y() classifier = svm.SVC(gamma=0.001) classifier.fit(X[:n // 2], y[:n // 2]) y_true = y[n // 2:] y_predict = classifier.predict(X[n // 2:]) score = metrics.accuracy_score(y_true, y_predict) # print("Classification accuracy = {}".format(float(score))) return float(score)
def calculate(): """Calculate the velocity, and return it as a list""" # Define some constants g = 9.81 # Gravity constant c = 0.5 # Mass distribution constant # Retrieve the starting y position first_y = utils.get_y()[0] # Calculate speed in all positions v = [] for current_y in utils.get_y(): y_diff = first_y - current_y # Difference from this y to first y value = np.sqrt(2 * g * y_diff / (1 + c)) # Value for speed in this position v.append(value) return v
def calculate(): """Calculate the curvature, and return it as a list""" # Retrieve y, derivative and 2nd derivative of y y = utils.get_y() dy = utils.get_dy() d2y = utils.get_d2y() k = [] # The curvature for i in range(len(y)): # Iterate over all y elements with i as the index value = d2y[i] / (1 + dy[i]**2)**(3 / 2) k.append(value) return k
def clutering(X): labels = utils.get_y() n_clusters = len(np.unique(labels)) kmeans = KMeans(init='k-means++', n_clusters=n_clusters, n_init=10) kmeans.fit(X) vmeasure = metrics.v_measure_score(labels, kmeans.labels_) # mutualInfo = metrics.adjusted_mutual_info_score(labels, kmeans.labels_) silhoutte = metrics.silhouette_score(X, kmeans.labels_, metric='euclidean', sample_size=300) # print("Clustering measure: vmeasure = {}, silhoutte = {}" # .format(float(vmeasure), float(silhoutte))) return (float(vmeasure), float(silhoutte))
def run_send_to_client(ws): """ Main loop of the thread that read the subscribed data and turn it into a json object and send back to client. The returned message is a dataframe in `/tsnex/do_embedding` route """ print("[PUBSUB] Thread to read subscribed data is starting ... ") while True: fixed_data = utils.get_from_db(key='fixed_points') fixed_ids = [] if fixed_data: fixed_points = json.loads(fixed_data) fixed_ids = [int(id) for id in fixed_points.keys()] subscribedData = utils.get_subscribed_data() if subscribedData is not None: if not ws.closed: # pause server and wait until client receives new data # if user does not pause client, a `continous` command # will be sent automatically to continue server utils.pause_server() # prepare the `embedding` in subscribedData # do not need to touch the other fields X_embedded = subscribedData['embedding'] zInfo = subscribedData['z_info'] idx = np.argsort(zInfo)[::-1] y = utils.get_y() labels = json.loads(utils.get_from_db(key='labels')) raw_points = [{ 'id': str(i), 'x': float(X_embedded[i][0]), 'y': float(X_embedded[i][1]), 'z': float(zInfo[i]), 'text': labels[i], 'label': str(y[i]), 'fixed': i in fixed_ids } for i in idx] subscribedData['embedding'] = raw_points ws.send(json.dumps(subscribedData)) status = utils.get_server_status(['tick_frequence', 'stop']) if status['stop']: break else: time.sleep(status['tick_frequence'])
import numpy as np import matplotlib.pyplot as plt from utils import X, Y, get_y, get_a Gnoise = np.random.normal(0.0, 0.1, len(Y)) Ynoisy = np.matrix([Y[i].item(0) + Gnoise[i] for i in range(len(Y))]).transpose() # Find a and b A = get_a(X, Ynoisy) print(A) plt.scatter(np.asarray(X[:, 0]), np.asarray(Ynoisy)) x = [0, 1] y = get_y(x, A) plt.scatter(np.asarray(X[:, 0]), np.asarray(Ynoisy)) plt.plot(x, y, color='r') plt.show()
del matches_hard[("edeba23f215dcc702220", "51a11cbc498e4083823909f1")] # Extra matches matches_hard_test = utils.read_matches("matches_test_hard.csv") matches_easy = utils.read_matches("matches_train.csv") matches_easy_test = utils.read_matches("matches_test.csv") # Compiling data sets try: f = open("working/locu_classifier.cache", 'rb') (X_tot, y_tot) = cPickle.load(f) sys.stderr.write("Loading data from cache.") except IOError: sys.stderr.write( "Featurizing easy dataset..." ) (X_easy, index_easy) = utils.featurize(locu_easy, four_easy, utils.sim) y_easy = utils.get_y(index_easy, matches_easy) sys.stderr.write( "done.\n" ) sys.stderr.write( "Featurizing easy test dataset..." ) (X_easy_test, index_easy_test) = utils.featurize(locu_easy_test, four_easy_test, utils.sim) y_easy_test = utils.get_y(index_easy_test, matches_easy_test) sys.stderr.write( "done.\n" ) sys.stderr.write( "Featurizing hard dataset..." ) (X, index) = utils.featurize(locu, four, utils.sim) y = utils.get_y(index, matches_hard) sys.stderr.write( "done.\n" ) sys.stderr.write( "Featurizing hard test dataset..." ) (X_hard_test, index_hard_test) = utils.featurize(locu_test, four_test, utils.sim) y_hard_test = utils.get_y(index_hard_test, matches_hard_test)
imp.reload(utils) wd = 'E:/new_data/kaggle/planet/' train_set = pd.read_csv(wd + 'train_v2.csv') train_set['tags'] = train_set['tags'].apply(lambda x: x.split(' ')) test_set = pd.read_csv(wd + 'sample_submission_v2.csv') train_tags = [ 'clear', 'partly_cloudy', 'haze', 'cloudy', 'primary', 'agriculture', 'road', 'water', 'cultivation', 'habitation', 'bare_ground', 'selective_logging', 'artisinal_mine', 'blooming', 'slash_burn', 'conventional_mine', 'blow_down' ] label_map = {l: i for i, l in enumerate(train_tags)} inv_label_map = {i: l for l, i in label_map.items()} file_all = train_set['image_name'].values y_train = utils.get_y(train_set['tags'].values, label_map) test_file_all = test_set['image_name'].values tr_dir = 'E:/new_data/kaggle/planet/train-jpg/' ts_dir = 'E:/new_data/kaggle/planet/test-jpg/' # 获取带有sift描述子的数据集 X_train = utils.get_x(ts_dir) X_test = utils.get_x(ts_dir) # 训练 import lightgbm as lgb from sklearn.model_selection import StratifiedKFold # 分层交叉验证 from sklearn.metrics import fbeta_score p_tr = np.zeros((X_train.shape[0], 17))
matches_hard = utils.read_matches("matches_train_hard.csv") # Remove crappy data from gold standard del matches_hard[("5f3fd107090d0ddc658b", "51ce011a498ed8dfb15381bb")] del matches_hard[("c170270283ef870d546b", "51eb7eed498e401ec51196b6")] del matches_hard[("493f5e2798de851ec3b2", "51f119e7498e9716f71f4413")] del matches_hard[("212dffb393f745df801a", "51e869ac498e7e485cabcdeb")] del matches_hard[("e3f9d84c0c989f2e7928", "51e25e57498e535de72f03e7")] del matches_hard[("66ef54d76ff989a91d52", "51c9e1dd498e33ecd8670892")] del matches_hard[("edeba23f215dcc702220", "51a11cbc498e4083823909f1")] matches_hard_test = utils.read_matches("matches_test_hard.csv") sys.stderr.write( "Featurizing hard dataset..." ) (X, index) = utils.featurize(locu, four, utils.sim) y = utils.get_y(index, matches_hard) sys.stderr.write( "done.\n" ) sys.stderr.write( "Featurizing hard test dataset..." ) (X_hard_test, index_hard_test) = utils.featurize(locu_test, four_test, utils.sim) y_hard_test = utils.get_y(index_hard_test, matches_hard_test) X_tot = X + X_hard_test y_tot = y + y_hard_test # Load in model classifier_model_file = sys.argv[1] matcher_model_file = sys.argv[2] model = joblib.load(classifier_model_file)
x_ph_bin: xtr[:, 0:len(binfeats)], x_ph_cont: xtr[:, len(binfeats):], t_ph: ttr } f_va = { x_ph_bin: xva[:, 0:len(binfeats)], x_ph_cont: xva[:, len(binfeats):], t_ph: tva } f_te = { x_ph_bin: xte[:, 0:len(binfeats)], x_ph_cont: xte[:, len(binfeats):], t_ph: tte } y_tr = get_y(sess, y_post, f_tr, shape=ytr.shape, L=100) y_va = get_y(sess, y_post, f_va, shape=yva.shape, L=100) y_te = get_y(sess, y_post, f_te, shape=yte.shape, L=100) y_tr, y_va, y_te = y_tr * ys + ym, y_va * ys + ym, y_te * ys + ym ytr, yva = ytr * ys + ym, yva * ys + ym #un-normalize y_tr, y_va, y_te = y_tr.flatten(), y_va.flatten(), y_te.flatten() ytr, yva, yte = ytr.flatten(), yva.flatten(), yte.flatten() rmses[i][0], rmses[i][1], rmses[i][2] = rms(y_tr, ytr), rms(y_va, yva), rms( y_te, yte) print 'rmse_tr: {:0.3f}, rmse_va: {:0.3f}, rmse_va: {:0.3f},'.format( rmses[i][0], rmses[i][1], rmses[i][2])
def forward(self, itr, input_data): print("--step#%d feed forward--" % itr) is_input_layer = True prev_layer = None current_time = 1 TIME_OVER = 200 INTERVAL = 1 is_done = False queue = Queue() while True: if current_time > TIME_OVER or is_done == True: break else: i_layer = 0 for layer in self.layer_list: if self.layer_list.index(layer) == 0: # first layer --> just feed input data self.layer_list[0].neurons = input_data prev_layer = self.layer_list[0] is_input_layer = False # current_time = utils.min_natural_number(prev_layer.neurons) + INTERVAL else: # do calculation for all neurons in this layer. i_neuron = 0 # index of neuron for neuron in layer.neurons: if neuron > 0: # it's time over or current neuron is already fired. i_neuron = i_neuron + 1 break else: # t_mask = utils.mask(current_time, prev_layer.neurons, self.n_terminals, self.delay) y, w = utils.get_incoming_connections( layer.connections, i_neuron) y = utils.get_y( utils.flatten(y), current_time, prev_layer.neurons, self.delay, self.tau, self.n_terminals ) # conversion y into 1-dimensional vector. w = utils.flatten( w ) # conversion w into 1-dimensional vector. masked_inner_connections = y * w x = masked_inner_connections.sum( ) # get its membrane potential. if x >= self.theta: # membrane potential is crossed with the threshold theta. self.layer_list[i_layer].neurons[ i_neuron] = current_time # update y utils.update_connections( self.layer_list[i_layer].connections, y, w, i_neuron) i_neuron = i_neuron + 1 # increase the index of neuron. i_layer = i_layer + 1 # increase the index of layer. current_time += INTERVAL # increase the current time.
def backward(self, itr, output_data): print("--step#%d backward--" % itr) error = 0 # t_a = self.layer_list[-1].neurons # t_d = output_data[itr] # # if self.loss_function == 'mse': # error = utils.mse_loss(t_a, t_d) prev_delta = [] temp_prev_delta = [] for layer in reversed(self.layer_list): i_current_layer = self.layer_list.index( layer) # index of current layer if i_current_layer == 0: # current layer is the first layer(input layer). break prev_layer = self.layer_list[i_current_layer - 1] # previous layer i_neuron = 0 for neuron in layer.neurons: if i_current_layer == (len(self.layer_list) - 1): # for output layer layer.neurons = utils.convert_not_fired(layer.neurons, 50) delta = utils.get_delta(i_neuron=i_neuron, l_connections=[layer.connections], t_d=output_data[i_neuron], t_a=neuron, t_i=prev_layer.neurons, tau=self.tau, d=self.delay, n_terminals=self.n_terminals, is_output_layer=True, prev_delta=None) # if neuron < 0: # neuron = 40 y, w = utils.get_incoming_connections( layer.connections, i_neuron) y = utils.get_y(y, neuron, prev_layer.neurons, self.delay, self.tau, self.n_terminals) delta_w = (self.lr * y * delta) w = w + delta_w # update weights utils.update_connections(layer.connections, y, w, i_neuron) temp_prev_delta.append(delta) i_neuron = i_neuron + 1 else: # for hidden layer (generalied case) if i_current_layer == len(self.layer_list): # first layer --> end point of backwarding break next_layer = self.layer_list[i_current_layer + 1] # layer J delta = utils.get_delta( i_neuron=i_neuron, l_connections=[ next_layer.connections, layer.connections ], t_j=self.layer_list[i_current_layer + 1].neurons, t_i=neuron, t_h=self.layer_list[i_current_layer - 1].neurons, tau=self.tau, d=self.delay, n_terminals=self.n_terminals, is_output_layer=False, prev_delta=prev_delta) y, w = utils.get_incoming_connections( layer.connections, i_neuron) y = utils.get_y(y, neuron, prev_layer.neurons, self.delay, self.tau, self.n_terminals) delta_w = -(self.lr * y * delta) w = w + delta_w # update weights utils.update_connections(layer.connections, y, w, i_neuron) temp_prev_delta.append(delta) i_neuron = i_neuron + 1 prev_delta.clear() prev_delta = temp_prev_delta.copy() temp_prev_delta.clear() return None