Python dataset 예제들, preprocessing.dataset Python 예제들

예제 #1

0

파일 보기

파일: model1026_elfar.py 프로젝트: Lucas-Dr-Pigeon/NU-thesis-cv-freeway-congestion-prediction

def load_three_datasets_features(paths,
                                 timespan=[1000, 5000],
                                 segment=[1000, 2400],
                                 window_size=100,
                                 zone_size=200,
                                 history=3,
                                 var=2,
                                 mpr=100,
                                 mode="density",
                                 boundary=[50]):

    Features = np.full((0, history * 2, 100, var), 0.0, dtype="float")
    Targets = np.full(0, 0.0, dtype="float")
    all_qvks = {}
    for path in paths:
        us101_data = pd.read_csv(path)
        us101 = prep.dataset(data=us101_data, vehicles=[])
        Infos, qvk, all_vehicles = get_multiple_snapshots(
            us101_data, timespan, segment, window_size, zone_size, mpr)
        IInfos = get_multiple_infos(Infos)
        IIInfos = clean_multiple_infos(IInfos, var)
        features, targets = get_features(IIInfos,
                                         qvk,
                                         history=history,
                                         var=var,
                                         boundary=boundary,
                                         mode=mode)
        # features, targets = get_features(IIInfos, qvk, history = 3, var = 7, boundary = [19,37.5], mode = "speed")
        # features, targets = get_features(IIInfos, qvk, history = 3, var = 7, boundary = [], mode = "dual")
        Features = np.append(Features, features, axis=0)
        Targets = np.append(Targets, targets, axis=0)
        all_qvks[path] = qvk
    return Features, Targets, all_qvks

예제 #2

0

파일 보기

파일: model0929_knn.py 프로젝트: Lucas-Dr-Pigeon/NU-thesis-cv-freeway-congestion-prediction

def get_features_from_single_dataset(path,
                                     timespan=[1000, 5000],
                                     segment=[1000, 2400],
                                     window_size=100,
                                     zone_size=200,
                                     mpr=100,
                                     boundary=[40, 60],
                                     mode="density",
                                     history=3):
    path = r"vehicle-trajectory-data\0805am-0820am\trajectories-0805am-0820am.csv"
    us101_data = pd.read_csv(path)
    us101 = prep.dataset(data=us101_data, vehicles=[])
    Infos, qvk, wzveh = get_multiple_snapshots(us101_data, timespan, segment,
                                               window_size, zone_size, mpr)

    IInfos = get_multiple_infos(Infos)
    IIInfos = clean_multiple_infos(IInfos, var=4)
    features, targets, labels = get_features_300x8(IIInfos,
                                                   qvk,
                                                   history,
                                                   var=4,
                                                   boundary=boundary,
                                                   mode=mode)
    return features, targets

예제 #3

0

파일 보기

    return doc_topic_list


if __name__ == '__main__':
# Define the dataset and the arguments
	df = pd.read_csv(opt.dataset)
	articles = df['content']

# Generate the document term matrix and the vectorizer
    processed_articles = articles.apply(tokenizer)
    tfidf, dtm = document_term_matrix(processed_articles, opt.vectorizer, opt.min_df, opt.max_df)
# Generate the bag-of-words, the dictionary, and the word2vec model trained on the dataset
    bow, dictionary, w2v = get_dictionary(cv, articles, opt.min_df, opt.size, opt.sg)

# Create the train loader
    train_loader = dataset(dtm, batch_size)

# Define the models and the optimizers
    vocab_size = dtm.shape[1]
    encoder = Encoder(vocab_size, opt.hidden_size, opt.num_topics, opt.batch_size).to(device)
    generator = Generator(vocab_size, opt.hidden_size , opt.num_topics, opt.batch_size).to(device)
    discriminator = Discriminator(vocab_size, opt.hidden_size , opt.num_topics, opt.batch_size).to(device)

    optimizer_e = optim.Adam(encoder.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
    optimizer_g = optim.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
    optimizer_d = optim.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))

# Train the model
    train_model(discriminator, generator, encoder, optimizer_d, optimizer_g, optimizer_e, opt.epochs, opt.num_topics, opt.n_critic, device)

# Create the list of lists of the top 10 words of each topic

예제 #4

0

파일 보기

파일: train.py 프로젝트: nextValue123/NLP

            cnn.input_s1 : s1,
            cnn.input_s2 : s2,
            cnn.input_y : score,
            cnn.dropout_keep_prob : 1.0
        }
        step, summaries, loss, pearson = sess.run(
            [globals_step, test_summary_op, cnn.loss, cnn.pearson],
            feed_dict
        )
        time_str = datetime.datetime.now().isoformat()
        print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, pearson))
        if writer:
            writer.add_summary(summaries, step)

    #Generate batches
    STS_train = preprocessing.dataset(s1=s1_train, s2=s2_train, label=score_train)
    #Training loop for every batch

    for i in range(num_epochs):
        batch_train = STS_train.next_batch(batch_size)

        train_step(batch_train[0], batch_train[1], batch_train[2])
        current_step = tf.train.global_step(sess, globals_step)
        if current_step % evaluate_every == 0:
            print("\n evaluation:")
            test_step(s1_test, s2_test, score_test, writer=test_summary_writer)
            print("")
        if current_step % num_checkpoints == 0:
            path = saver.save(sess, checkpoint_prefix, global_step=current_step)
            print("Saved model checkpoint to {}\n".format(path))

예제 #5

0

파일 보기

파일: main.py 프로젝트: schelotto/Gaussian_Word_Embedding

        sigma_i, sigma_j, sigma_n = torch.exp(self.log_sigma(words_i)), \
                                    torch.exp(self.log_sigma_c(words_j)), \
                                    torch.exp(self.log_sigma_c(words_n))

        return torch.mean(F.relu(self.ob - self.kl_energy(mu_i, mu_j, sigma_i, sigma_j) + self.kl_energy(mu_i, mu_n, sigma_i, sigma_n)), dim=0)

    def nn(self, word, k):
        embedding = self.mu.weight.data.cpu() # [dict, embed_size]
        vector = embedding[self.dset.stoi[word], :].view(-1, 1) # [embed_size, 1]
        distance = torch.mm(embedding, vector).squeeze() / torch.norm(embedding, 2, 1)
        distance = distance / torch.norm(vector, 2, 0)[0]
        distance = distance.numpy()
        index = np.argsort(distance)[:-k]
        return [self.dset.itos[x] for x in index]

args.dset = dataset(args)
g_emb = GaussianEmbedding(args)
g_emb = g_emb.cuda()
optimizer = torch.optim.Adagrad(g_emb.parameters(), lr=0.05)

global_step = 0
for epoch in range(args.epochs):
    step = 0
    for (words_i, words_j) in tqdm(g_emb.dset.dsetIter):
        optimizer.zero_grad()
        words_i = Variable(words_i).cuda()
        words_j = Variable(words_j).cuda()
        loss = g_emb(words_i, words_j)
        loss.backward()
        optimizer.step()

예제 #6

0

파일 보기

파일: main.py 프로젝트: lihuiliullh/Gaussian_Word_Embedding

                   self.kl_energy(mu_i, mu_n, sigma_i, sigma_n)),
            dim=0)

    def nn(self, word, k):
        embedding = self.mu.weight.data.cpu()  # [dict, embed_size]
        vector = embedding[self.dset.stoi[word], :].view(-1,
                                                         1)  # [embed_size, 1]
        distance = torch.mm(embedding, vector).squeeze() / torch.norm(
            embedding, 2, 1)
        distance = distance / torch.norm(vector, 2, 0)[0]
        distance = distance.numpy()
        index = np.argsort(distance)[:-k]
        return [self.dset.itos[x] for x in index]


args.dset = dataset(args)
g_emb = GaussianEmbedding(args)
g_emb = g_emb.cuda()
optimizer = torch.optim.Adagrad(g_emb.parameters(), lr=0.05)

global_step = 0
for epoch in range(args.epochs):
    step = 0
    for (words_i, words_j) in tqdm(g_emb.dset.dsetIter):
        optimizer.zero_grad()
        words_i = Variable(words_i).cuda()
        words_j = Variable(words_j).cuda()
        loss = g_emb(words_i, words_j)
        loss.backward()
        optimizer.step()

예제 #7

0

파일 보기

파일: main.py 프로젝트: Lucas-Dr-Pigeon/cv-congestion-detection-hmm

#import collections
import preprocessing as prep
import congestion_monitoring_system as cms
import time
import HMM
import HMM2
import HMM_VSW
from hmm7 import hmm7
from copy import deepcopy
import pandas as pd

if __name__ == "__main__":
    path = r"vehicle-trajectory-data\0805am-0820am\trajectories-0805am-0820am.csv"
    us101_data = pd.read_csv(path)
    us101_data_vehicles = list(set(list(us101_data["Vehicle_ID"])))
    us101 = prep.dataset(data=us101_data, vehicles=[])
    us101_vehicles = us101.data_by_vehicle()
    #us101_lanes = us101.data_by_lane()
    us101.read_vehicle(5)

    Convoy464 = us101.get_preceding_convoy(464,
                                           200, [2150, 2200],
                                           by_lane=True)

    System464 = cms.CM_System(convoy=Convoy464, thres_v=10)
    System464.__Proceed__(2200)

    for W in System464.waves:
        print(System464.waves[W].loc)

    Convoy484 = us101.get_preceding_convoy(484,