Ejemplo n.º 1
0
def TestCTRNN(angs, model, criterion, device):
    dataProcessor = DataPreprocessor(angs, sample_length=700, normalize=True)
    initdir = torch.from_numpy(
        dataProcessor.GetInitialInput()).float().to(device)
    input = torch.from_numpy(
        dataProcessor.GetTrainingInputs()).float().to(device)
    output = torch.from_numpy(
        dataProcessor.GetTrainingOutputs()).float().to(device)

    pred, h = model(initdir, input)
    loss = criterion(pred, output)
    print(f"Loss on real data: {loss.item():.4f}")
    pred = np.transpose(pred.detach().cpu().numpy(), (2, 1, 0))
    pred = np.reshape(pred, (pred.shape[0], -1))
    radsOut = np.unwrap(np.arctan2(pred[0], pred[1]))

    plt.plot(angs[1], label='ground truth')
    plt.plot(radsOut, label='predicted')
    plt.xlabel('Timestep (ms)')
    plt.ylabel('Angle (rad)')
    plt.title('Prediction Visualization')
    plt.legend()
    plt.savefig('performance_fakeAngs.png')

    del initdir
    del input
    del output
def TestCTRNN(angs, model, criterion, device, training_outputs):
    dataProcessor = DataPreprocessor(angs, sample_length=700, normalize=True) 
    initdir = torch.from_numpy(dataProcessor.GetInitialInput()).float().to(device)
    input = torch.from_numpy(dataProcessor.GetTrainingInputs()).float().to(device)
    output = torch.from_numpy(dataProcessor.GetTrainingOutputs()).float().to(device)
    
    pred = model(initdir, input)
    loss = criterion(pred, output)
    print(f"Loss on real data: {loss.item():.4f}")
    pred = np.transpose(pred.detach().cpu().numpy(), (2, 1, 0))
    pred = np.reshape(pred, (pred.shape[0], -1))
    radsOut = np.unwrap(np.arctan2(pred[0], pred[1]))

    print("Graphing test performance to performance.png...")
    for i, output in enumerate(training_outputs):
        out = np.transpose(output.detach().cpu().numpy(), (2, 1, 0))
        out = np.reshape(out, (out.shape[0], -1))
        plt.plot(np.unwrap(np.arctan2(out[0], out[1])), label=f'fake batch {i}')
    plt.plot(angs[1], label='ground truth', color='blue')
    plt.plot(radsOut, label='predicted', color='orange')
    plt.xlabel('Timestep (ms)')
    plt.ylabel('Angle (rad)')
    plt.title('Prediction Visualization')
    plt.legend(prop={'size': 6})
    plt.savefig('performance.png')
Ejemplo n.º 3
0
def build_model():
    '''
    request body{
        name: model name,
        path: path to model
    }
    '''
    jsondata = request.get_json()
    name = jsondata["name"]
    if name not in model.keys():
        _model = support_vector_machine()
        model[name] = _model

    if name not in preprocessor.keys():
        _preprocessor = DataPreprocessor()
        preprocessor[name] = _preprocessor

    dataX, datay = DataPreprocessor().getData()
    trainX, testX, trainy, testy = train_test_split(dataX,
                                                    datay,
                                                    test_size=0.2)

    model[name].train(trainX, trainy)

    score = model[name].score(testX, testy)

    # payload = {"test R square": str(score),
    #            "result": "success",
    #            }
    return str(score)
Ejemplo n.º 4
0
def main():
    realAngs = np.load('angs_smooth.npy') - 2 * np.pi
    plt.plot(np.unwrap(realAngs[1]), label='real angs')
    diffs = realAngs[1][1:] - realAngs[1][:-1]
    print("realAngs avg diff: ", np.mean(diffs))
    print("realAngs stddev: ", np.std(diffs))
    print("realAngs range: ", np.min(realAngs[1]), np.max(realAngs[1]))

    for i in range(10):
        datagen = AVDataGenerator(T=realAngs.shape[1],
                                  dt=25,
                                  mean=np.mean(diffs) / 100,
                                  sigma=np.std(diffs) / 10,
                                  momentum=0)
        # if i == 0:
        #     fakeAngs = realAngs
        # else:
        fakeAngs = datagen.GenerateAngs()
        dataProcessor = DataPreprocessor(fakeAngs,
                                         sample_length=700,
                                         normalize=False)
        fakeOutputs = dataProcessor.GetTrainingOutputs()
        fakeOutputs = np.transpose(fakeOutputs, (2, 1, 0))
        fakeOutputs = np.reshape(fakeOutputs, (fakeOutputs.shape[0], -1))
        fakeOutputs = np.unwrap(np.arctan2(fakeOutputs[0], fakeOutputs[1]))
        plt.plot(fakeOutputs, label='fake angs')

    # fakeAngs = datagen.GenerateAngs()
    # plt.plot(fakeAngs[1], label='fake angs 2')

    plt.xlabel('Timestep (ms)')
    plt.ylabel('Angle (rad)')
    plt.title('AVDataGenerator Visualization')
    plt.legend(prop={'size': 6})
    plt.savefig('datagen.png')
Ejemplo n.º 5
0
def getData(CSVFile):

    smoother = DataPreprocessor()
    data = read_csv(CSVFile)
    data = data[::-1]  # reverse
    ohclv_data = np.c_[data['Open'], data['High'], data['Low'], data['Close'],
                       data['Volume']]
    smoothened_ohclv_data = smoother.PandaSmoother(ohclv_data)
    return smoothened_ohclv_data, np.array(data["Close"]), list(data["Date"])
 def write_counts_of_each_phrase(csv_reader, csv_writer, patterns):
     print(patterns)
     data_preprocessor = DataPreprocessor()
     for row in csv_reader:
         cleaned_row = data_preprocessor.remove_punctuation(''.join(row))
         resulting_row = []
         for pattern in patterns:
             number_of_occurrences = len(re.findall(pattern, cleaned_row))
             resulting_row.append(number_of_occurrences)
         csv_writer.writerow(resulting_row)
Ejemplo n.º 7
0
 def _count_bigrams_by_frequency(file_name):
     with open(file_name, 'r', encoding='mac-roman',
               newline='\r\n') as file:
         bigrams = {}
         unigrams = {}
         data_preprocessor = DataPreprocessor()
         for row in file.readlines():
             row = data_preprocessor.clean_row(row)
             for index in range(len(row) - 1):
                 unigrams[row[index]] = unigrams.get(row[index])
                 bigrams[(row[index], row[index + 1])] = bigrams.get(
                     (row[index], row[index + 1]), 0) + 1
         return bigrams
    def find_instances_near_two_class_centroid(self):
        data = DataPreprocessor().select_all_features()
        # data = data.drop(['target'], axis=1)

        mean_class_0 = data.loc[data['target'] == 0].mean()
        mean_class_1 = data.loc[data['target'] == 1].mean()
        means = pd.concat([mean_class_0, mean_class_1], axis=1)
        means = means.T
        data = data.append(means, ignore_index=True)
        class_0 = data.loc[data['target'] == 0]
        class_1 = data.loc[data['target'] == 1]

        dist_condensed_class_0 = pdist(class_0.values)
        distance_mat_0 = pd.DataFrame(squareform(dist_condensed_class_0),
                                      index=class_0.index,
                                      columns=class_0.index)
        distance_class_0 = distance_mat_0.iloc[(distance_mat_0.shape[0] - 1)]
        distance_class_0.sort_values(ascending=True, inplace=True)
        distance_class_0.drop(labels=[distance_class_0.index[0]], inplace=True)
        sort_by_distance_class_0 = distance_class_0.index
        nearest_to_centroid_class_0 = sort_by_distance_class_0[
            0:len(sort_by_distance_class_0) // 2]
        not_near_to_centroid_0 = set(
            class_0.index) - set(nearest_to_centroid_class_0)

        dist_condensed_class_1 = pdist(class_1.values, metric='euclidean')
        distance_mat_1 = pd.DataFrame(squareform(dist_condensed_class_1),
                                      index=class_1.index,
                                      columns=class_1.index)
        distance_class_1 = distance_mat_1.iloc[(distance_mat_1.shape[0] - 1)]
        distance_class_1.sort_values(ascending=True, inplace=True)
        distance_class_1.drop(labels=[distance_class_1.index[0]], inplace=True)
        sort_by_distance_class_1 = distance_class_1.index
        nearest_to_centroid_class_1 = sort_by_distance_class_1[
            0:len(sort_by_distance_class_1) // 2]
        not_near_to_centroid_1 = set(
            class_1.index) - set(nearest_to_centroid_class_1)

        new_data_class_0 = data.ix[not_near_to_centroid_0]
        new_data_class_1 = data.ix[nearest_to_centroid_class_1]
        useful_data = pd.concat([new_data_class_0, new_data_class_1])

        useful_data_index = np.concatenate([
            np.array(list(not_near_to_centroid_0)), nearest_to_centroid_class_1
        ])
        remain_data_index = set(data.index) - set(useful_data_index)
        remain_data = data.ix[remain_data_index]

        return useful_data, remain_data
Ejemplo n.º 9
0
    def get_clean_ad_tweet_data(self, tweet_file, ad_file):
        annotations_data = pd.read_csv(ad_file, index_col=0)
        annotations_data['Keywords'] = annotations_data['Brand Name']\
                                        .str.cat(annotations_data['Ad Name'], sep=" ")\
                                        .str.cat(annotations_data['KeyTerms_Edited'], sep=" ")
        df = annotations_data.drop_duplicates()
        print(df.shape)
        man_ann_data = pd.read_csv(tweet_file)

        annotations_data['keywords_clean'] = annotations_data[
            'Keywords'].apply(lambda ad: DataPreprocessor.cleanTweet(ad))
        man_ann_data['tweet_clean'] = man_ann_data['tweet_text'].apply(
            lambda twt: DataPreprocessor.cleanTweet(twt))

        return annotations_data, man_ann_data
Ejemplo n.º 10
0
def init_model(workflow, client):
    global train_start_time
    global train_end_time
    initReq = workflow + "#" + client

    name, _, __ = requestHandler.parseReq(initReq, "fwf")
    print("whrkflow name : " + name)
    if name not in model.keys():
        _model = support_vector_machine()
        model[name] = _model

    if name not in preprocessor.keys():
        _preprocessor = DataPreprocessor()
        preprocessor[name] = _preprocessor
    train_start_time = time.time()
    dataX, datay = preprocessor[name].getData(workflow + "_" + client)
    trainX, testX, trainy, testy = train_test_split(dataX,
                                                    datay,
                                                    test_size=0.2)

    model[name].train(trainX, trainy)
    train_end_time = time.time()
    score = model[name].score(testX, testy)

    print("test score : " + str(score))
Ejemplo n.º 11
0
def GenerateFakeAngs():
    realAngs = np.load('angs_smooth.npy') - 2 * np.pi
    diffs = realAngs[1][1:] - realAngs[1][:-1]

    sigmas = [
        np.std(diffs) / 30,
        np.std(diffs) / 10,
        np.std(diffs) / 6,
        np.std(diffs) / 4,
        np.std(diffs) / 3,
        np.std(diffs) / 2,
        np.std(diffs)
    ]

    for sigma in sigmas:
        timesteps = 100000
        datagen = AVDataGenerator(T=timesteps,
                                  dt=25,
                                  mean=np.mean(diffs) / 100,
                                  sigma=sigma,
                                  momentum=0)
        generatedAngs = datagen.GenerateAngs()
        plt.plot(generatedAngs[1])
        print("Generated angs shape: ", generatedAngs.shape)
        trainsize = int(0.8 * timesteps)
        dataProcessor = DataPreprocessor(generatedAngs[:, :trainsize],
                                         sample_length=700,
                                         normalize=True)
        # torch.from_numpy(dataProcessor.GetInitialInput()).float().to(device)
        angs.append(generatedAngs[1, :trainsize])
        initdirs.append(dataProcessor.GetInitialInput())
        inputs.append(dataProcessor.GetTrainingInputs())
        outputs.append(dataProcessor.GetTrainingOutputs())

        testDataProcessor = DataPreprocessor(generatedAngs[:, trainsize:],
                                             sample_length=700,
                                             normalize=True)
        # torch.from_numpy(dataProcessor.GetInitialInput()).float().to(device)
        test_angs.append(generatedAngs[1, trainsize:])
        test_initdirs.append(testDataProcessor.GetInitialInput())
        test_inputs.append(testDataProcessor.GetTrainingInputs())
        test_outputs.append(testDataProcessor.GetTrainingOutputs())

    plt.savefig('fakeangs.png')
    plt.clf()
Ejemplo n.º 12
0
def build_model():
    '''
    request body{
        name: model name,
        path: path to model
    }
    '''
    global model
    global preprocessor
    global requestHandler
    global train_start_time
    global train_end_time
    jsondata = request.get_json()
    app.logger.info("json data : " + str(jsondata))
    name, _, __ = requestHandler.parseReq(jsondata, "nwf")
    # name, _, __ = requestHandler.parseReq(initReq, "fwf")
    # name = jsondata["workflow"]+"#"+jsondata["client_name"]
    _client = jsondata["client_name"]
    _workflow = jsondata["workflow"]
    client = _workflow + "_" + _client
    if name not in model.keys():
        _model = support_vector_machine()
        model[name] = _model
        app.logger.info("name key error ")
    if name not in preprocessor.keys():
        _preprocessor = DataPreprocessor()
        preprocessor[name] = _preprocessor
        app.logger.info("name key error ")
    train_start_time = time.time()
    app.logger.info("model start training")
    dataX, datay = DataPreprocessor().getData(client)
    trainX, testX, trainy, testy = train_test_split(dataX,
                                                    datay,
                                                    test_size=0.2)

    model[name].train(trainX, trainy)
    app.logger.info("training success")
    score = model[name].score(testX, testy)
    train_end_time = time.time()
    # payload = {"test R square": str(score),
    #            "result": "success",
    #            }
    res = {"start_time": train_start_time, "end_time": train_end_time}
    return jsonify(res)
Ejemplo n.º 13
0
    def calculate_total_loss(self, x, y):
        loss = 0

        for i in range(len(y)):
            if i % (len(y) / 4) == 0:
                print("(" + str(i) + "/" + str(len(y)) + ") ", end='')
                sys.stdout.flush()
            # Do the one hot here as we get the sequences so that we dont run out of memory
            xoh = DataPreprocessor.one_hot_vector(x[i], self.input_dim)
            yoh = DataPreprocessor.one_hot_vector(y[i], self.input_dim)
            o, _ = self.forward_propogation(xoh)

            yoh = np.array(yoh)

            correct_characters_predicted = o[np.arange(len(yoh)),
                                             np.argmax(yoh, axis=1)]

            loss += self.cross_entropy_sum(correct_characters_predicted)

        return loss
    def lemma_token_pos(self, text, allowed_pos):
        text = text.lower()  # to take care of capital case word in glove
        doc = nlp(text)
        lemma_list = []
        for token in doc:
            if token.is_stop is False:
                #if (token.pos_ == 'NOUN' or token.pos_ == 'VERB' or token.pos_ == 'ADJ' or token.pos_ == 'adv'):
                if (token.pos_ in allowed_pos):
                    token_preprocessed = DataPreprocessor.preprocessor(
                        token.lemma_)
                    if token_preprocessed != '':
                        lemma_list.append(token_preprocessed)

        return lemma_list
Ejemplo n.º 15
0
    def bptt(self, x, y):
        T = len(y)
        xoh = DataPreprocessor.one_hot_vector(x, self.input_dim)
        yoh = DataPreprocessor.one_hot_vector(y, self.input_dim)

        o, s = self.forward_propogation(x)
        dLdU = np.zeros(self.U.shape)
        dLdV = np.zeros(self.V.shape)
        dLdW = np.zeros(self.W.shape)
        delta_o = o
        delta_o[np.arange(len(yoh)), np.argmax(yoh, axis=1)] -= 1

        # Go backwards through time (::-1 is the reverse of time)
        for t in np.arange(T)[::-1]:
            dLdV += np.outer(delta_o[t], s[t].T)

            delta_t = self.V.T.dot(delta_o[t]) * (1 - (s[t]**2))

            for bptt_step in np.arange(max(0, t - self.bptt_truncate),
                                       t + 1)[::-1]:
                dLdW += np.outer(delta_t, s[bptt_step - 1])
                dLdU[:, np.argmax(xoh[bptt_step])] += delta_t
                delta_t = self.W.T.dot(delta_t) * (1 - s[bptt_step - 1]**2)
        return [dLdU, dLdV, dLdW]
    def lemma_token(self, text):
        text = text.lower()  # to take care of capital case word in glove
        tokenizer = nlp.Defaults.create_tokenizer(nlp)
        tokens = tokenizer(text)
        token_list = []
        lemma_list = []
        for token in tokens:
            if token.is_stop is False:
                token_preprocessed = DataPreprocessor.preprocessor(
                    token.lemma_)
                if token_preprocessed != '':
                    lemma_list.append(token_preprocessed)
                    token_list.append(token.text)

        return lemma_list
Ejemplo n.º 17
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Get the data 
    angs = np.load('angs_smooth.npy') 
    angs[1] -= 2 * np.pi

    # Preprocess the data
    dataProcessor = DataPreprocessor(angs, sample_length=700, normalize=True)
    initdir = torch.from_numpy(dataProcessor.GetInitialInput()).float().to(device)
    input = torch.from_numpy(dataProcessor.GetTrainingInputs()).float().to(device)
    output = torch.from_numpy(dataProcessor.GetTrainingOutputs()).float().to(device)

    # Define the model and optimizer
    model = SingleLayerCTRNN(store_h=True)
    optimizer = torch.optim.Adam(model.parameters(), lr=.005, weight_decay=1e-6)
    criterion = nn.MSELoss()
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()

    # Train
    losses = []
    hidden_states = None
    for epoch in range(NUM_EPOCHS):
        optimizer.zero_grad()

        pred, h = model(initdir, input)
        loss = criterion(pred, output)
        hidden_states = np.array(h.cpu().detach().numpy())

        print (f'Epoch [{epoch+1}/{NUM_EPOCHS}], Loss: {loss.item():.4f}' )
        losses.append(loss.item())

        loss.backward()
        optimizer.step()
    
    # Graph the losses
    print(f"Losses: {losses}")
    plt.plot(losses)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.savefig('loss_ctrnn.png')
    plt.clf()

    # Test
    testAngs = np.load('angs_smooth.npy') - 2 * np.pi
    TestCTRNN(testAngs, model, criterion, device)

    from scipy.stats import binned_statistic, binned_statistic_2d
    seqlen = hidden_states.shape[0] * hidden_states.shape[1]
    velocities = input.cpu().detach().numpy().reshape(seqlen, -1)[:, 0]
    headdirs = testAngs[1][:seqlen] % (2 * np.pi)
    print("Hidden states shape: ", hidden_states.shape)
    print("Inputs[i].shape: ", velocities.shape)
    print("Angs[i].shape: ", headdirs.shape)

    _, ax = plt.subplots(10, 10, figsize=(20,15))
    for cell in range(100):
        x,y = np.divmod(cell,10)
        curr_ax = ax[x, y]

        activations = torch.relu( torch.tanh( torch.from_numpy( hidden_states.reshape(seqlen, -1)[:, cell] ) ) )
        # activations = hidden_states.reshape(seqlen, -1)[:, cell]
        # print("No nonlin")
        
        print(f"Doing it for cell={cell}")
        bs = binned_statistic_2d(headdirs, velocities, activations, bins=[30, 30])
        curr_ax.pcolormesh(bs[1], bs[2], bs[0])
        curr_ax.set_yticks([])
        curr_ax.set_xticks([])
    
    plt.savefig(f"activations_realAngs.png")
    plt.clf()

    _, ax = plt.subplots(10, 10, figsize=(20,15))
    for cell in range(100):
        x,y = np.divmod(cell,10)
        curr_ax = ax[x, y]

        activations = torch.relu( torch.tanh( torch.from_numpy( hidden_states.reshape(seqlen, -1)[:, cell] ) ) )
        # activations = hidden_states.reshape(seqlen, -1)[:, cell]
        
        # import pdb; pdb.set_trace()
        print(f"Doing it for cell={cell}, no bins")
        bs = binned_statistic(headdirs, activations)
        curr_ax.plot( (bs[1][1:] + bs[1][:-1]) / 2, bs[0])
        curr_ax.set_yticks([])
        curr_ax.set_xticks([])
    
    plt.savefig(f"activations_realAngs_headdirs.png")
    plt.clf()

    _, ax = plt.subplots(10, 10, figsize=(20,15))
    for cell in range(100):
        x,y = np.divmod(cell,10)
        curr_ax = ax[x, y]

        activations = torch.relu( torch.tanh( torch.from_numpy( hidden_states.reshape(seqlen, -1)[:, cell] ) ) )
        # activations = hidden_states.reshape(seqlen, -1)[:, cell]
        # print("No nonlin")
        
        # import pdb; pdb.set_trace()
        print(f"Doing it for cell={cell}")
        bs = binned_statistic(velocities, activations)
        curr_ax.plot( (bs[1][1:] + bs[1][:-1]) / 2, bs[0])
        curr_ax.set_yticks([])
        curr_ax.set_xticks([])
    
    plt.savefig(f"activations_realAngs_vels.png")
    plt.clf()
    
    del initdir
    del input
    del output
Ejemplo n.º 18
0
from FreshRNN import RNN
from DataPreprocessor import DataPreprocessor
import numpy as np

datasetsPath = 'data'

vocab = 256
sequence_length = 100
trainx = []
trainy = []
testx = []
testy = []

if __name__ == "__main__":
    dataset = []
    DataPreprocessor.get_dataset(dataset, datasetsPath, clean=True)
    del dataset[0]

    trainx, trainy = DataPreprocessor.data_targets(dataset, sequence_length)
    trainx = np.array(trainx)
    trainy = np.array(trainy)

    print(trainx[0])
    print(trainy[0])

    model = RNN(vocab)

    losses = model.train_with_sgd(model,
                                  trainx[:500],
                                  trainy[:500],
                                  nepoch=5,
Ejemplo n.º 19
0
def main():
    dataprep = DataPreprocessor()
    dataprep.preprocess()
    runLogisticRegression(dataprep)
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    print("Processing training data...")
    realAngs = np.load('angs_smooth.npy') - 2 * np.pi
    diffs = realAngs[1][1:] - realAngs[1][:-1]
    
    initdirs = []
    inputs = []
    outputs = []

    datameanmean = np.mean(diffs)/100
    datameansigma = np.std(diffs)/10
    
    for i in range(TRAINING_BATCHES):
        # if i == 0:
        #     dataProcessor = DataPreprocessor(realAngs, sample_length=700, normalize=True)
        # else:
        datagen = AVDataGenerator(T=realAngs.shape[1], dt=25, mean=np.random.uniform(datameanmean/2, datameanmean*1.5), \
            sigma=np.random.uniform(datameansigma/2, datameansigma*1.5), momentum=0)
        dataProcessor = DataPreprocessor(datagen.GenerateAngs(), sample_length=700, normalize=True)
        initdirs.append(torch.from_numpy(dataProcessor.GetInitialInput()).float())
        inputs.append(torch.from_numpy(dataProcessor.GetTrainingInputs()).float())
        outputs.append(torch.from_numpy(dataProcessor.GetTrainingOutputs()).float())
        print(f"Sample initdirs for fake batch {i}: ", initdirs[i][0][0])

    initdirs = torch.stack(initdirs).to(device)
    inputs = torch.stack(inputs).to(device)
    outputs = torch.stack(outputs).to(device)

    print("Defining the model...")
    model = SingleLayerCTRNN(input_dim=2)
    optimizer = torch.optim.Adam(model.parameters(), lr=.005, weight_decay=1e-6)
    criterion = nn.MSELoss()
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()

    print("Training...")
    losses = []
    for epoch in range(NUM_EPOCHS):
        for batch in range(TRAINING_BATCHES):
            optimizer.zero_grad()

            pred = model(initdirs[batch], inputs[batch])
            loss = criterion(pred, outputs[batch])

            print (f'Epoch [{epoch+1}/{NUM_EPOCHS}] Batch [{batch+1}/{TRAINING_BATCHES}] Loss: {loss.item():.4f}' )
            losses.append(loss.item())

            loss.backward()
            optimizer.step()
    
    print("Graphing the losses to loss_ctrnn.py...")
    print(f"Losses: {losses}")
    plt.plot(losses)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.savefig('loss_ctrnn.png')
    plt.clf()

    print("Testing the model...")
    testAngs = np.load('angs_smooth.npy') - 2 * np.pi
    # datagen = AVDataGenerator(T=realAngs.shape[1], dt=25, mean=datameanmean, \
    #         sigma=datameansigma, momentum=0)
    # testAngs = datagen.GenerateAngs()
    TestCTRNN(testAngs, model, criterion, device, outputs)
Ejemplo n.º 21
0
import json
from TextProcessor import TextProcessor
from DataPreprocessor import DataPreprocessor
import configparser
import sys

from sklearn.cluster import KMeans
from sklearn import metrics
from scipy.spatial.distance import cdist
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer

if __name__ == '__main__':
    data_preprocessor = DataPreprocessor()
    text_processor = TextProcessor()

    data = data_preprocessor.readFile('neberitrubku_output.csv')
    data = data_preprocessor.cleanData(data)

    sse = {}
    for k in range(1, 15):
        kmeans = text_processor.make_clusters(data, k)
        #data["clusters"] = kmeans.labels_
        #print(data["clusters"])
        sse[k] = kmeans.inertia_  # Inertia: Sum of distances of samples to their closest cluster center
    plt.figure()
    plt.plot(list(sse.keys()), list(sse.values()))
    plt.xlabel("Number of cluster")
    plt.ylabel("SSE")
Ejemplo n.º 22
0
def login():
    message = None
    print("is called")
    if request.method == 'POST':
        print("is posted")
        datafromjs = request.form['mydata']
        print(datafromjs)
        vars = datafromjs.split(',')
        print(vars)
        vec_start = vars[0:-4]
        vec_start = results = [int(i) for i in vec_start]
        time = vars[-4]
        vec_end = vars[-3:-1]
        vec_end = results = [int(i) for i in vec_end]
        print("vec start: " + str(vec_start))
        print("vec end: " + str(vec_end))
        rel_path = vars[-1].strip()
        processor = DataPreprocessor()
        # #outputs = processor.preprocess(processor, datafromjs)
        dir_path = os.path.dirname(os.path.realpath(__file__))
        file_path = os.path.join(dir_path, '../website/' + rel_path + '.csv')
        path = os.path.abspath(os.path.realpath(file_path))
        print(path)
        model_input = []
        with open(path) as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            line_count = 0
            for row in csv_reader:
                vector = []
                row = [int(i) for i in row]
                vector += vec_start
                vector.append(time)
                vector += row
                vector += vec_end
                vector.append(1)
                vector.append(1)
                print("vector: " + str(len(vector)))
                model_input.append(vector)

        # print("length: " + str(len(model_input)))
        dir_path = os.path.dirname(os.path.realpath(__file__))
        file_path = os.path.join(dir_path, '../models/severity.h5')
        path = os.path.abspath(os.path.realpath(file_path))
        model = load_model(path)
        outputs = []
        processor.preprocess(data=model_input)

        X, y = processor.getDataForSeverity()

        for inputvec in X:
            print(inputvec.shape)
            inputvec = inputvec.reshape(1, 98)
            outputs.append(model.predict(inputvec, steps=1))
        print("size outputs " + str(len(outputs)))

        print(outputs[0])

        results = []
        for r in outputs:
            o = list(r[0])
            print(o)
            print(max(o))
            res = o.index(max(o))
            results.append(res)
        print(str(results))
        # if res == 0:
        #     result = 'Low'
        # if res == 1:
        #     result = 'Mid'
        # else:
        #     result = 'High'

        resultsStr = str(results)

        finalResult = resultsStr[1:-1]
        print(finalResult)

        resp = make_response(finalResult)
        resp.headers['Content-Type'] = "application/json"
        return resp
Ejemplo n.º 23
0
from DataPreprocessor import DataPreprocessor
from TrainTestPipeline import TrainTestPipeline
import pandas as pd
import os

# if not os.path.exists('MicrotracMinMax'):
#     os.mkdir('MicrotracMinMax')

data_folder = './MicrotracDataFilesPT'
flow_values_excel = './TrueFlowValues_.xlsx'
dp = DataPreprocessor(data_folder, flow_values_excel, root_folder_='.')

success = dp.prepare_df(preproc_type='yeo-johnson')
assert success

# # Pearson-correlation, full dataset with augmented features
x_filt, columns = dp.get_feature_selection_x(method='pearson', threshold = 0.8, \
                                            heldout_cols = ['Density'])

y_regr = dp.get_regression_y()
all_samples = dp.get_samples()

pipeline = TrainTestPipeline(x_data=x_filt,
                             y_data=y_regr,
                             all_samples=all_samples,
                             model_name='RandomForestRegressor',
                             heldout_samples='random',
                             num_heldout=4)

tr_test_ = pipeline.do_train_test(cv=False)
Ejemplo n.º 24
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Get the data
    print("Processing training data...")
    realAngs = np.load('angs_smooth.npy') - 2 * np.pi
    diffs = realAngs[1][1:] - realAngs[1][:-1]
    datameanmean = np.mean(diffs) / 100
    datameansigma = np.std(diffs) / 10
    datagen = AVDataGenerator(T=realAngs.shape[1],
                              dt=25,
                              mean=datameanmean,
                              sigma=datameansigma,
                              momentum=0)

    initdirs = []
    inputs = []
    outputs = []

    for i in range(TRAINING_BATCHES):
        dataProcessor = DataPreprocessor(datagen.GenerateAngs(),
                                         sample_length=700,
                                         normalize=True)
        initdirs.append(
            torch.from_numpy(dataProcessor.GetInitialInput()).float())
        inputs.append(
            torch.from_numpy(dataProcessor.GetTrainingInputs()).float())
        outputs.append(
            torch.from_numpy(dataProcessor.GetTrainingOutputs()).float())
        print(f"Sample initdirs for fake batch {i}: ", initdirs[i][0][0])

    initdirs = torch.stack(initdirs).to(device)
    inputs = torch.stack(inputs).to(device)
    outputs = torch.stack(outputs).to(device)

    # Define the model and optimizer
    model = SingleLayerCTRNN(store_h=True)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=.005,
                                 weight_decay=1e-6)
    criterion = nn.MSELoss()
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()

    # Train
    print("Training...")
    losses = []
    hidden_states = None
    for epoch in range(NUM_EPOCHS):
        for batch in range(TRAINING_BATCHES):
            optimizer.zero_grad()

            pred, h = model(initdirs[batch], inputs[batch])
            loss = criterion(pred, outputs[batch])
            hidden_states = np.array(h.cpu().detach().numpy())

            print(
                f'Epoch [{epoch+1}/{NUM_EPOCHS}] Batch [{batch+1}/{TRAINING_BATCHES}] Loss: {loss.item():.4f}'
            )
            losses.append(loss.item())

            loss.backward()
            optimizer.step()

    print(f"Losses: {losses}")

    # Test
    testAngs = np.load('angs_smooth.npy') - 2 * np.pi
    datagen = AVDataGenerator(T=realAngs.shape[1], dt=25, mean=datameanmean, \
            sigma=datameansigma, momentum=0)
    testAngs = datagen.GenerateAngs()

    TestCTRNN(testAngs, model, criterion, device)

    from scipy.stats import binned_statistic, binned_statistic_2d
    seqlen = hidden_states.shape[0] * hidden_states.shape[1]
    velocities = inputs[0].cpu().detach().numpy().reshape(seqlen, -1)[:, 0]

    import pdb
    pdb.set_trace()
    headdirs = np.unwrap(np.arctan2(outputs[0][0],
                                    outputs[0][1]))[:seqlen] % (2 * np.pi)
    print("Hidden states shape: ", hidden_states.shape)
    print("Inputs[i].shape: ", velocities.shape)
    print("Angs[i].shape: ", headdirs.shape)

    _, ax = plt.subplots(10, 10, figsize=(20, 15))
    for cell in range(100):
        x, y = np.divmod(cell, 10)
        curr_ax = ax[x, y]

        activations = torch.relu(
            torch.tanh(
                torch.from_numpy(hidden_states.reshape(seqlen, -1)[:, cell])))
        # activations = hidden_states.reshape(seqlen, -1)[:, cell]
        # print("No nonlin")

        print(f"Doing it for cell={cell}")
        bs = binned_statistic_2d(headdirs,
                                 velocities,
                                 activations,
                                 bins=[30, 30])
        curr_ax.pcolormesh(bs[1], bs[2], bs[0])
        curr_ax.set_yticks([])
        curr_ax.set_xticks([])

    plt.savefig(f"activations_fakeAngs.png")
    plt.clf()

    _, ax = plt.subplots(10, 10, figsize=(20, 15))
    for cell in range(100):
        x, y = np.divmod(cell, 10)
        curr_ax = ax[x, y]

        activations = torch.relu(
            torch.tanh(
                torch.from_numpy(hidden_states.reshape(seqlen, -1)[:, cell])))
        # activations = hidden_states.reshape(seqlen, -1)[:, cell]

        # import pdb; pdb.set_trace()
        print(f"Doing it for cell={cell}, no bins")
        bs = binned_statistic(headdirs, activations)
        curr_ax.plot((bs[1][1:] + bs[1][:-1]) / 2, bs[0])
        curr_ax.set_yticks([])
        curr_ax.set_xticks([])

    plt.savefig(f"activations_fakeAngs_headdirs.png")
    plt.clf()

    _, ax = plt.subplots(10, 10, figsize=(20, 15))
    for cell in range(100):
        x, y = np.divmod(cell, 10)
        curr_ax = ax[x, y]

        activations = torch.relu(
            torch.tanh(
                torch.from_numpy(hidden_states.reshape(seqlen, -1)[:, cell])))
        # activations = hidden_states.reshape(seqlen, -1)[:, cell]
        # print("No nonlin")

        # import pdb; pdb.set_trace()
        print(f"Doing it for cell={cell}")
        bs = binned_statistic(velocities, activations)
        curr_ax.plot((bs[1][1:] + bs[1][:-1]) / 2, bs[0])
        curr_ax.set_yticks([])
        curr_ax.set_xticks([])

    plt.savefig(f"activations_fakeAngs_vels.png")
    plt.clf()

    del initdirs
    del inputs
    del outputs
Ejemplo n.º 25
0
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from DataPreprocessor import DataPreprocessor
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer
from scipy.stats import uniform
from FeatureSelector import *
from sklearn.utils import shuffle

preprocessor = DataPreprocessor()


def mse_error(y_true, y_pred):
    error = y_true - y_pred
    return np.sum(error**2) / (len(y_pred))


X_train, y_train = preprocessor.get_train_test_data(norm=True, test=0)
BClass = MLPRegressor(max_iter=10000,
                      hidden_layer_sizes=[50, 50, 50, 50],
                      early_stopping=True,
                      validation_fraction=.1,
                      n_iter_no_change=300,
                      activation='tanh',
                      alpha=0.00001,
                      learning_rate='adaptive',
                      momentum=0.3,
Ejemplo n.º 26
0
from LoadSplit import LoadSplit
from DataPreprocessor import DataPreprocessor
from UserParams import UserParams

# load and split the data
user_params = UserParams(
    dataset_path="data\\santander_customer_transaction_prediction_target.csv",
    target_name="target",
    train_test_path="train_test_splited\\")

load_splitter = LoadSplit(user_params)
X_train, X_test, Y_train, Y_test = load_splitter.load_and_split()

# preprocess the data
data_preprocessor = DataPreprocessor(user_params, X_train, X_test, Y_train,
                                     Y_test)
X_train, X_test = data_preprocessor.fit_transform()
Ejemplo n.º 27
0
 def get_features(self):
     self.data = DataPreprocessor().select_all_features()
     # self.data = getData()
     self.cluster_data_before_classify(self.data)