def load_three_datasets_features(paths, timespan=[1000, 5000], segment=[1000, 2400], window_size=100, zone_size=200, history=3, var=2, mpr=100, mode="density", boundary=[50]): Features = np.full((0, history * 2, 100, var), 0.0, dtype="float") Targets = np.full(0, 0.0, dtype="float") all_qvks = {} for path in paths: us101_data = pd.read_csv(path) us101 = prep.dataset(data=us101_data, vehicles=[]) Infos, qvk, all_vehicles = get_multiple_snapshots( us101_data, timespan, segment, window_size, zone_size, mpr) IInfos = get_multiple_infos(Infos) IIInfos = clean_multiple_infos(IInfos, var) features, targets = get_features(IIInfos, qvk, history=history, var=var, boundary=boundary, mode=mode) # features, targets = get_features(IIInfos, qvk, history = 3, var = 7, boundary = [19,37.5], mode = "speed") # features, targets = get_features(IIInfos, qvk, history = 3, var = 7, boundary = [], mode = "dual") Features = np.append(Features, features, axis=0) Targets = np.append(Targets, targets, axis=0) all_qvks[path] = qvk return Features, Targets, all_qvks
def get_features_from_single_dataset(path, timespan=[1000, 5000], segment=[1000, 2400], window_size=100, zone_size=200, mpr=100, boundary=[40, 60], mode="density", history=3): path = r"vehicle-trajectory-data\0805am-0820am\trajectories-0805am-0820am.csv" us101_data = pd.read_csv(path) us101 = prep.dataset(data=us101_data, vehicles=[]) Infos, qvk, wzveh = get_multiple_snapshots(us101_data, timespan, segment, window_size, zone_size, mpr) IInfos = get_multiple_infos(Infos) IIInfos = clean_multiple_infos(IInfos, var=4) features, targets, labels = get_features_300x8(IIInfos, qvk, history, var=4, boundary=boundary, mode=mode) return features, targets
return doc_topic_list if __name__ == '__main__': # Define the dataset and the arguments df = pd.read_csv(opt.dataset) articles = df['content'] # Generate the document term matrix and the vectorizer processed_articles = articles.apply(tokenizer) tfidf, dtm = document_term_matrix(processed_articles, opt.vectorizer, opt.min_df, opt.max_df) # Generate the bag-of-words, the dictionary, and the word2vec model trained on the dataset bow, dictionary, w2v = get_dictionary(cv, articles, opt.min_df, opt.size, opt.sg) # Create the train loader train_loader = dataset(dtm, batch_size) # Define the models and the optimizers vocab_size = dtm.shape[1] encoder = Encoder(vocab_size, opt.hidden_size, opt.num_topics, opt.batch_size).to(device) generator = Generator(vocab_size, opt.hidden_size , opt.num_topics, opt.batch_size).to(device) discriminator = Discriminator(vocab_size, opt.hidden_size , opt.num_topics, opt.batch_size).to(device) optimizer_e = optim.Adam(encoder.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) optimizer_g = optim.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) optimizer_d = optim.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) # Train the model train_model(discriminator, generator, encoder, optimizer_d, optimizer_g, optimizer_e, opt.epochs, opt.num_topics, opt.n_critic, device) # Create the list of lists of the top 10 words of each topic
cnn.input_s1 : s1, cnn.input_s2 : s2, cnn.input_y : score, cnn.dropout_keep_prob : 1.0 } step, summaries, loss, pearson = sess.run( [globals_step, test_summary_op, cnn.loss, cnn.pearson], feed_dict ) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, pearson)) if writer: writer.add_summary(summaries, step) #Generate batches STS_train = preprocessing.dataset(s1=s1_train, s2=s2_train, label=score_train) #Training loop for every batch for i in range(num_epochs): batch_train = STS_train.next_batch(batch_size) train_step(batch_train[0], batch_train[1], batch_train[2]) current_step = tf.train.global_step(sess, globals_step) if current_step % evaluate_every == 0: print("\n evaluation:") test_step(s1_test, s2_test, score_test, writer=test_summary_writer) print("") if current_step % num_checkpoints == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
sigma_i, sigma_j, sigma_n = torch.exp(self.log_sigma(words_i)), \ torch.exp(self.log_sigma_c(words_j)), \ torch.exp(self.log_sigma_c(words_n)) return torch.mean(F.relu(self.ob - self.kl_energy(mu_i, mu_j, sigma_i, sigma_j) + self.kl_energy(mu_i, mu_n, sigma_i, sigma_n)), dim=0) def nn(self, word, k): embedding = self.mu.weight.data.cpu() # [dict, embed_size] vector = embedding[self.dset.stoi[word], :].view(-1, 1) # [embed_size, 1] distance = torch.mm(embedding, vector).squeeze() / torch.norm(embedding, 2, 1) distance = distance / torch.norm(vector, 2, 0)[0] distance = distance.numpy() index = np.argsort(distance)[:-k] return [self.dset.itos[x] for x in index] args.dset = dataset(args) g_emb = GaussianEmbedding(args) g_emb = g_emb.cuda() optimizer = torch.optim.Adagrad(g_emb.parameters(), lr=0.05) global_step = 0 for epoch in range(args.epochs): step = 0 for (words_i, words_j) in tqdm(g_emb.dset.dsetIter): optimizer.zero_grad() words_i = Variable(words_i).cuda() words_j = Variable(words_j).cuda() loss = g_emb(words_i, words_j) loss.backward() optimizer.step()
self.kl_energy(mu_i, mu_n, sigma_i, sigma_n)), dim=0) def nn(self, word, k): embedding = self.mu.weight.data.cpu() # [dict, embed_size] vector = embedding[self.dset.stoi[word], :].view(-1, 1) # [embed_size, 1] distance = torch.mm(embedding, vector).squeeze() / torch.norm( embedding, 2, 1) distance = distance / torch.norm(vector, 2, 0)[0] distance = distance.numpy() index = np.argsort(distance)[:-k] return [self.dset.itos[x] for x in index] args.dset = dataset(args) g_emb = GaussianEmbedding(args) g_emb = g_emb.cuda() optimizer = torch.optim.Adagrad(g_emb.parameters(), lr=0.05) global_step = 0 for epoch in range(args.epochs): step = 0 for (words_i, words_j) in tqdm(g_emb.dset.dsetIter): optimizer.zero_grad() words_i = Variable(words_i).cuda() words_j = Variable(words_j).cuda() loss = g_emb(words_i, words_j) loss.backward() optimizer.step()
#import collections import preprocessing as prep import congestion_monitoring_system as cms import time import HMM import HMM2 import HMM_VSW from hmm7 import hmm7 from copy import deepcopy import pandas as pd if __name__ == "__main__": path = r"vehicle-trajectory-data\0805am-0820am\trajectories-0805am-0820am.csv" us101_data = pd.read_csv(path) us101_data_vehicles = list(set(list(us101_data["Vehicle_ID"]))) us101 = prep.dataset(data=us101_data, vehicles=[]) us101_vehicles = us101.data_by_vehicle() #us101_lanes = us101.data_by_lane() us101.read_vehicle(5) Convoy464 = us101.get_preceding_convoy(464, 200, [2150, 2200], by_lane=True) System464 = cms.CM_System(convoy=Convoy464, thres_v=10) System464.__Proceed__(2200) for W in System464.waves: print(System464.waves[W].loc) Convoy484 = us101.get_preceding_convoy(484,