Exemple #1
0
def main():
    parser = argparse.ArgumentParser(description="Available Parameters:")
    parser.add_argument("--n_hidden_units", default=64, type=int)
    parser.add_argument("--n_hidden_layers", default=1, type=int)
    parser.add_argument("--train_epochs", default=100, type=int)
    parser.add_argument("--write_output", default=True, type=bool)
    args = parser.parse_args()

    torch.manual_seed(0)
    np.random.seed(0)

    profiles = pd.read_csv("../data/new_profiles_200t.csv")
    comments = pd.read_csv("../data/new_comments_200t.csv")

    comments = comments.drop_duplicates()
    profiles = preprocessing.categorical_to_numerical(profiles, col="category_1")
    all_users = set(profiles.profile_username.values)

    data = preprocessing.scale(profiles.drop(columns=["category_1", "profile_username"]).values)
    name_to_record = {name: record for name, record in zip(all_users, data)}

    input_dim, output_dim = data.shape[1], len(profiles.category_1.unique()) + 1
    user_to_label = {user: category for user, category in profiles[["profile_username", "category_1"]].values}

    K = 5
    skf = StratifiedKFold(n_splits=K)
    models_metrics, models_histories = defaultdict(dict), defaultdict(list)
    for kth_fold, (train_idx, test_idx) in enumerate(skf.split(profiles.profile_username.values, profiles.category_1.values), start=1):
        print("Starting {}th Fold".format(kth_fold))

        authors = profiles.profile_username.values
        username_to_index = utils.get_users_indices(authors)
        interactions = utils.get_interactions(comments, username_to_index)
        edge_index = utils.get_edge_index(interactions)
        
        x = utils.get_x(authors, name_to_record, input_dim=input_dim)
        y = utils.get_y(user_to_label, authors)

        train_mask = [True if i in train_idx else False for i in range(len(x))]
        test_mask = [True if i in test_idx else False for i in range(len(x))]
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        data = Data(x=x, y=y, edge_index=edge_index, train_mask=train_mask, test_mask=test_mask).to(device)

        assert len(x)==len(y), "Train Input and Output tensor do not have the same dimensions"

        models = utils.get_models(data.num_nodes, input_dim, output_dim, args.n_hidden_units, args.n_hidden_layers, device=device, lr=0.005)
        histories = utils.train(data, models, epochs=args.train_epochs)
        models_histories = utils.update_histories(models_histories, histories)

        current_metrics = utils.test(data, models)
        utils.update_metrics_dict(models_metrics, current_metrics)

        print('\n')
        
    models_histories = {model: list(history/K) for model, history in models_histories.items()} # Get mean traces
    models_metrics = utils.calculate_statistics(models_metrics)

    if args.write_output:
        utils.write_json("../data/results/models_metrics_{}e_{}l_{}u.json".format(args.train_epochs, args.n_hidden_layers, args.n_hidden_units), models_metrics)
        utils.write_json("../data/results/models_histories_{}e_{}l_{}u.json".format(args.train_epochs, args.n_hidden_layers, args.n_hidden_units), models_histories)
Exemple #2
0
def classify(X):
    n = len(X)
    y = utils.get_y()
    classifier = svm.SVC(gamma=0.001)
    classifier.fit(X[:n // 2], y[:n // 2])
    y_true = y[n // 2:]
    y_predict = classifier.predict(X[n // 2:])
    score = metrics.accuracy_score(y_true, y_predict)
    # print("Classification accuracy = {}".format(float(score)))
    return float(score)
Exemple #3
0
def calculate():
    """Calculate the velocity, and return it as a list"""

    # Define some constants
    g = 9.81  # Gravity constant
    c = 0.5  # Mass distribution constant

    # Retrieve the starting y position
    first_y = utils.get_y()[0]

    # Calculate speed in all positions
    v = []
    for current_y in utils.get_y():
        y_diff = first_y - current_y  # Difference from this y to first y
        value = np.sqrt(2 * g * y_diff /
                        (1 + c))  # Value for speed in this position
        v.append(value)

    return v
Exemple #4
0
def calculate():
    """Calculate the curvature, and return it as a list"""

    # Retrieve y, derivative and 2nd derivative of y
    y = utils.get_y()
    dy = utils.get_dy()
    d2y = utils.get_d2y()

    k = []  # The curvature
    for i in range(len(y)):  # Iterate over all y elements with i as the index
        value = d2y[i] / (1 + dy[i]**2)**(3 / 2)
        k.append(value)

    return k
Exemple #5
0
def clutering(X):
    labels = utils.get_y()
    n_clusters = len(np.unique(labels))
    kmeans = KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
    kmeans.fit(X)
    vmeasure = metrics.v_measure_score(labels, kmeans.labels_)
    # mutualInfo = metrics.adjusted_mutual_info_score(labels,  kmeans.labels_)
    silhoutte = metrics.silhouette_score(X,
                                         kmeans.labels_,
                                         metric='euclidean',
                                         sample_size=300)

    # print("Clustering measure: vmeasure = {}, silhoutte = {}"
    #       .format(float(vmeasure), float(silhoutte)))
    return (float(vmeasure), float(silhoutte))
Exemple #6
0
def run_send_to_client(ws):
    """ Main loop of the thread that read the subscribed data
        and turn it into a json object and send back to client.
        The returned message is a dataframe in `/tsnex/do_embedding` route
    """
    print("[PUBSUB] Thread to read subscribed data is starting ... ")
    while True:
        fixed_data = utils.get_from_db(key='fixed_points')
        fixed_ids = []
        if fixed_data:
            fixed_points = json.loads(fixed_data)
            fixed_ids = [int(id) for id in fixed_points.keys()]

        subscribedData = utils.get_subscribed_data()
        if subscribedData is not None:
            if not ws.closed:
                # pause server and wait until client receives new data
                # if user does not pause client, a `continous` command
                # will be sent automatically to continue server
                utils.pause_server()

                # prepare the `embedding` in subscribedData
                # do not need to touch the other fields
                X_embedded = subscribedData['embedding']
                zInfo = subscribedData['z_info']
                idx = np.argsort(zInfo)[::-1]
                y = utils.get_y()
                labels = json.loads(utils.get_from_db(key='labels'))
                raw_points = [{
                    'id': str(i),
                    'x': float(X_embedded[i][0]),
                    'y': float(X_embedded[i][1]),
                    'z': float(zInfo[i]),
                    'text': labels[i],
                    'label': str(y[i]),
                    'fixed': i in fixed_ids
                } for i in idx]
                subscribedData['embedding'] = raw_points
                ws.send(json.dumps(subscribedData))

        status = utils.get_server_status(['tick_frequence', 'stop'])
        if status['stop']:
            break
        else:
            time.sleep(status['tick_frequence'])
Exemple #7
0
import numpy as np
import matplotlib.pyplot as plt
from utils import X, Y, get_y, get_a

Gnoise = np.random.normal(0.0, 0.1, len(Y))
Ynoisy = np.matrix([Y[i].item(0) + Gnoise[i]
                    for i in range(len(Y))]).transpose()

# Find a and b
A = get_a(X, Ynoisy)
print(A)

plt.scatter(np.asarray(X[:, 0]), np.asarray(Ynoisy))

x = [0, 1]
y = get_y(x, A)

plt.scatter(np.asarray(X[:, 0]), np.asarray(Ynoisy))
plt.plot(x, y, color='r')

plt.show()
Exemple #8
0
del matches_hard[("edeba23f215dcc702220", "51a11cbc498e4083823909f1")]

# Extra matches
matches_hard_test = utils.read_matches("matches_test_hard.csv")
matches_easy = utils.read_matches("matches_train.csv")
matches_easy_test = utils.read_matches("matches_test.csv")

# Compiling data sets
try:
    f = open("working/locu_classifier.cache", 'rb')
    (X_tot, y_tot) = cPickle.load(f)
    sys.stderr.write("Loading data from cache.")
except IOError:
    sys.stderr.write( "Featurizing easy dataset..." )
    (X_easy, index_easy) = utils.featurize(locu_easy, four_easy, utils.sim)
    y_easy = utils.get_y(index_easy, matches_easy)
    sys.stderr.write( "done.\n" )

    sys.stderr.write( "Featurizing easy test dataset..." )
    (X_easy_test, index_easy_test) = utils.featurize(locu_easy_test, four_easy_test, utils.sim)
    y_easy_test = utils.get_y(index_easy_test, matches_easy_test) 
    sys.stderr.write( "done.\n" )

    sys.stderr.write( "Featurizing hard dataset..." )
    (X, index) = utils.featurize(locu, four, utils.sim)
    y = utils.get_y(index, matches_hard) 
    sys.stderr.write( "done.\n" )

    sys.stderr.write( "Featurizing hard test dataset..." )
    (X_hard_test, index_hard_test) = utils.featurize(locu_test, four_test, utils.sim)
    y_hard_test = utils.get_y(index_hard_test, matches_hard_test) 
Exemple #9
0
imp.reload(utils)

wd = 'E:/new_data/kaggle/planet/'
train_set = pd.read_csv(wd + 'train_v2.csv')
train_set['tags'] = train_set['tags'].apply(lambda x: x.split(' '))
test_set = pd.read_csv(wd + 'sample_submission_v2.csv')
train_tags = [
    'clear', 'partly_cloudy', 'haze', 'cloudy', 'primary', 'agriculture',
    'road', 'water', 'cultivation', 'habitation', 'bare_ground',
    'selective_logging', 'artisinal_mine', 'blooming', 'slash_burn',
    'conventional_mine', 'blow_down'
]
label_map = {l: i for i, l in enumerate(train_tags)}
inv_label_map = {i: l for l, i in label_map.items()}
file_all = train_set['image_name'].values
y_train = utils.get_y(train_set['tags'].values, label_map)
test_file_all = test_set['image_name'].values

tr_dir = 'E:/new_data/kaggle/planet/train-jpg/'
ts_dir = 'E:/new_data/kaggle/planet/test-jpg/'

# 获取带有sift描述子的数据集
X_train = utils.get_x(ts_dir)
X_test = utils.get_x(ts_dir)

# 训练
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold  # 分层交叉验证
from sklearn.metrics import fbeta_score

p_tr = np.zeros((X_train.shape[0], 17))
Exemple #10
0
matches_hard = utils.read_matches("matches_train_hard.csv")

# Remove crappy data from gold standard
del matches_hard[("5f3fd107090d0ddc658b", "51ce011a498ed8dfb15381bb")]
del matches_hard[("c170270283ef870d546b", "51eb7eed498e401ec51196b6")]
del matches_hard[("493f5e2798de851ec3b2", "51f119e7498e9716f71f4413")]
del matches_hard[("212dffb393f745df801a", "51e869ac498e7e485cabcdeb")]
del matches_hard[("e3f9d84c0c989f2e7928", "51e25e57498e535de72f03e7")]
del matches_hard[("66ef54d76ff989a91d52", "51c9e1dd498e33ecd8670892")]
del matches_hard[("edeba23f215dcc702220", "51a11cbc498e4083823909f1")]

matches_hard_test = utils.read_matches("matches_test_hard.csv")

sys.stderr.write( "Featurizing hard dataset..." )
(X, index) = utils.featurize(locu, four, utils.sim)
y = utils.get_y(index, matches_hard) 
sys.stderr.write( "done.\n" )

sys.stderr.write( "Featurizing hard test dataset..." )
(X_hard_test, index_hard_test) = utils.featurize(locu_test, four_test, utils.sim)
y_hard_test = utils.get_y(index_hard_test, matches_hard_test) 

X_tot = X + X_hard_test
y_tot = y + y_hard_test

# Load in model
classifier_model_file = sys.argv[1]
matcher_model_file = sys.argv[2]

model = joblib.load(classifier_model_file)
Exemple #11
0
            x_ph_bin: xtr[:, 0:len(binfeats)],
            x_ph_cont: xtr[:, len(binfeats):],
            t_ph: ttr
        }
        f_va = {
            x_ph_bin: xva[:, 0:len(binfeats)],
            x_ph_cont: xva[:, len(binfeats):],
            t_ph: tva
        }
        f_te = {
            x_ph_bin: xte[:, 0:len(binfeats)],
            x_ph_cont: xte[:, len(binfeats):],
            t_ph: tte
        }

        y_tr = get_y(sess, y_post, f_tr, shape=ytr.shape, L=100)
        y_va = get_y(sess, y_post, f_va, shape=yva.shape, L=100)
        y_te = get_y(sess, y_post, f_te, shape=yte.shape, L=100)

        y_tr, y_va, y_te = y_tr * ys + ym, y_va * ys + ym, y_te * ys + ym
        ytr, yva = ytr * ys + ym, yva * ys + ym  #un-normalize

        y_tr, y_va, y_te = y_tr.flatten(), y_va.flatten(), y_te.flatten()
        ytr, yva, yte = ytr.flatten(), yva.flatten(), yte.flatten()

        rmses[i][0], rmses[i][1], rmses[i][2] = rms(y_tr,
                                                    ytr), rms(y_va, yva), rms(
                                                        y_te, yte)

        print 'rmse_tr: {:0.3f}, rmse_va: {:0.3f}, rmse_va: {:0.3f},'.format(
            rmses[i][0], rmses[i][1], rmses[i][2])
Exemple #12
0
    def forward(self, itr, input_data):
        print("--step#%d feed forward--" % itr)
        is_input_layer = True
        prev_layer = None
        current_time = 1
        TIME_OVER = 200
        INTERVAL = 1
        is_done = False
        queue = Queue()

        while True:
            if current_time > TIME_OVER or is_done == True:
                break
            else:
                i_layer = 0
                for layer in self.layer_list:
                    if self.layer_list.index(layer) == 0:
                        # first layer --> just feed input data
                        self.layer_list[0].neurons = input_data
                        prev_layer = self.layer_list[0]
                        is_input_layer = False
                        # current_time = utils.min_natural_number(prev_layer.neurons) + INTERVAL
                    else:
                        # do calculation for all neurons in this layer.
                        i_neuron = 0  # index of neuron

                        for neuron in layer.neurons:
                            if neuron > 0:
                                # it's time over or current neuron is already fired.
                                i_neuron = i_neuron + 1
                                break
                            else:
                                # t_mask = utils.mask(current_time, prev_layer.neurons, self.n_terminals, self.delay)
                                y, w = utils.get_incoming_connections(
                                    layer.connections, i_neuron)
                                y = utils.get_y(
                                    utils.flatten(y), current_time,
                                    prev_layer.neurons, self.delay, self.tau,
                                    self.n_terminals
                                )  # conversion y into 1-dimensional vector.
                                w = utils.flatten(
                                    w
                                )  # conversion w into 1-dimensional vector.

                                masked_inner_connections = y * w
                                x = masked_inner_connections.sum(
                                )  # get its membrane potential.

                                if x >= self.theta:
                                    # membrane potential is crossed with the threshold theta.
                                    self.layer_list[i_layer].neurons[
                                        i_neuron] = current_time

                                # update y
                                utils.update_connections(
                                    self.layer_list[i_layer].connections, y, w,
                                    i_neuron)

                                i_neuron = i_neuron + 1  # increase the index of neuron.

                    i_layer = i_layer + 1  # increase the index of layer.

            current_time += INTERVAL  # increase the current time.
Exemple #13
0
    def backward(self, itr, output_data):
        print("--step#%d backward--" % itr)

        error = 0

        # t_a = self.layer_list[-1].neurons
        # t_d = output_data[itr]
        #
        # if self.loss_function == 'mse':
        #     error = utils.mse_loss(t_a, t_d)

        prev_delta = []
        temp_prev_delta = []
        for layer in reversed(self.layer_list):
            i_current_layer = self.layer_list.index(
                layer)  # index of current layer

            if i_current_layer == 0:
                # current layer is the first layer(input layer).
                break

            prev_layer = self.layer_list[i_current_layer - 1]  # previous layer

            i_neuron = 0
            for neuron in layer.neurons:
                if i_current_layer == (len(self.layer_list) - 1):
                    # for output layer
                    layer.neurons = utils.convert_not_fired(layer.neurons, 50)
                    delta = utils.get_delta(i_neuron=i_neuron,
                                            l_connections=[layer.connections],
                                            t_d=output_data[i_neuron],
                                            t_a=neuron,
                                            t_i=prev_layer.neurons,
                                            tau=self.tau,
                                            d=self.delay,
                                            n_terminals=self.n_terminals,
                                            is_output_layer=True,
                                            prev_delta=None)

                    # if neuron < 0:
                    #     neuron = 40
                    y, w = utils.get_incoming_connections(
                        layer.connections, i_neuron)
                    y = utils.get_y(y, neuron, prev_layer.neurons, self.delay,
                                    self.tau, self.n_terminals)
                    delta_w = (self.lr * y * delta)
                    w = w + delta_w  # update weights
                    utils.update_connections(layer.connections, y, w, i_neuron)
                    temp_prev_delta.append(delta)
                    i_neuron = i_neuron + 1
                else:
                    # for hidden layer (generalied case)
                    if i_current_layer == len(self.layer_list):
                        # first layer --> end point of backwarding
                        break

                    next_layer = self.layer_list[i_current_layer +
                                                 1]  # layer J
                    delta = utils.get_delta(
                        i_neuron=i_neuron,
                        l_connections=[
                            next_layer.connections, layer.connections
                        ],
                        t_j=self.layer_list[i_current_layer + 1].neurons,
                        t_i=neuron,
                        t_h=self.layer_list[i_current_layer - 1].neurons,
                        tau=self.tau,
                        d=self.delay,
                        n_terminals=self.n_terminals,
                        is_output_layer=False,
                        prev_delta=prev_delta)

                    y, w = utils.get_incoming_connections(
                        layer.connections, i_neuron)
                    y = utils.get_y(y, neuron, prev_layer.neurons, self.delay,
                                    self.tau, self.n_terminals)
                    delta_w = -(self.lr * y * delta)
                    w = w + delta_w  # update weights
                    utils.update_connections(layer.connections, y, w, i_neuron)
                    temp_prev_delta.append(delta)
                    i_neuron = i_neuron + 1

            prev_delta.clear()
            prev_delta = temp_prev_delta.copy()
            temp_prev_delta.clear()

        return None