def main(): def eval_input_fn(): return input_fn(*data[config['data']], batch_size=config['batch_size'], shuffle=False) data = inputs.load_data(config['n_examples_for_train'], config['n_examples_for_cv']) estimator = tf.estimator.Estimator(model_fn=model_fn, params=config, model_dir=config['model_dir']) with mu.Timer() as timer: result = estimator.evaluate(eval_input_fn) result['data'] = config['data'] logger.info('Done in %.fs', timer.eclipsed) logger.info('\n%s\n%s%s%s\n', data, '*'*10, result, '*'*10)
def extract_statistical_features(): #feature 1: [mean history speed, std h speed, max h speed, min h speed] #feature 2: [mean history car number, std, max, min] if os.path.exists('temp/sta_feature_dict.pkl'): with open('temp/sta_feature_dict.pkl', 'rb') as fin: sta_feature_dict = pkl.load(fin) return sta_feature_dict features_dict = {} #build training data date = 20190701 k = 20 features_dict = {} for i in range(k): date_star = date + i traffic_data_list = inputs.load_data("%s"%str(date_star)) for traffic_data in traffic_data_list: link_id = traffic_data.link_id if link_id not in features_dict: features_dict[link_id] = {} features_dict[link_id]['h_speed'] = [] features_dict[link_id]['car_num'] = [] for his_road_state in traffic_data.his_road_state_list: for h_road in his_road_state: h_speed = h_road[1] car_num = h_road[4] features_dict[link_id]['h_speed'].append(h_speed) features_dict[link_id]['car_num'].append(car_num) sta_feature_dict = {} for link_id in features_dict: mean_speed = np.mean(features_dict[link_id]['h_speed']) max_speed = np.max(features_dict[link_id]['h_speed']) min_speed = np.min(features_dict[link_id]['h_speed']) std_speed = np.std(features_dict[link_id]['h_speed']) mean_car_num = np.mean(features_dict[link_id]['car_num']) max_car_num = np.max(features_dict[link_id]['car_num']) min_car_num = np.min(features_dict[link_id]['car_num']) std_car_num = np.std(features_dict[link_id]['car_num']) sta_feature_dict[link_id] = [mean_speed, std_speed, max_speed, min_speed, mean_car_num, std_car_num, max_car_num, min_car_num] with open('temp/sta_feature_dict.pkl', 'wb') as fout: pkl.dump(sta_feature_dict, fout) return sta_feature_dict
def main(): def train_input_fn(): return input_fn(*data['train'], batch_size=config['batch_size'], n_epochs=config['n_epochs'], shuffle=True) # load the data data = inputs.load_data(config['n_examples_for_train'], config['n_examples_for_cv']) logger.info('\n%s\n', data) if config['delete']: logger.info('Deleting existing checkpoint files...') mu.delete_if_exists(config['model_dir']) estimator = tf.estimator.Estimator(model_fn=model_fn, params=config, model_dir=config['model_dir']) estimator.train(train_input_fn)
def run(args): """Load the data, train, evaluate, and export the model for serving and evaluating. Args: args: experiment parameters. """ cuda_availability = torch.cuda.is_available() if cuda_availability: device = torch.device('cuda:{}'.format(torch.cuda.current_device())) else: device = 'cpu' print('\n*************************') print('`cuda` available: {}'.format(cuda_availability)) print('Current Device: {}'.format(device)) print('*************************\n') torch.manual_seed(args.seed) # Open our dataset train_loader, test_loader, eval_loader = inputs.load_data(args, device) # Create the model, loss function, and optimizer sequential_model, criterion, optimizer = model.create(args, device) # Train / Test the model for epoch in range(1, args.num_epochs + 1): train(sequential_model, train_loader, criterion, optimizer, epoch) test(sequential_model, test_loader, criterion) # Evalutate the model print("Evaluate the model using the evaluation dataset") test(sequential_model, eval_loader, criterion) # Export the trained model torch.save(sequential_model.state_dict(), args.model_name) # Save the model to GCS if args.job_dir: inputs.save_model(args)
def __init__(self, transform=None, target_transform=None): self.data = load_data()[:2] # train_x, train_y self.transform = transform self.target_transform = target_transform
def __init__(self, transform=None): self.data = load_data()[2] # test_x self.transform = transform
def extract_features1(sta_feature_dict): #extract neighbor sta features #feature 1: [mean history speed of all neighbors, std h speed, max h speed, min h speed] #feature 2: [mean history car number of all neighbors, std, max, min] if os.path.exists("temp/nb_sta_features.pkl"): with open("temp/nb_sta_features.pkl", 'rb') as fin: [trainX, valX, testX] = pkl.load(fin) return trainX, valX, testX if os.path.exists("temp/nb_sta_features_train.txt"): trainX, val, testX = load_feature_from_txt('nb_sta_features') return trainx, valX, testX topo_file = 'traffic/topo.txt' graph = inputs.load_topo(topo_file) if os.path.exists("temp/mean_sta_features.pkl"): with open("temp/mean_sta_features.pkl", 'rb') as fin: mean_sta_feature_vec = pkl.load(fin) else: features = np.array(sta_feature_dict.values()) mean_sta_feature_vec = np.mean(features, axis=0) #build training data date = 20190701 k = 20 trainX = [] for i in range(k): date_star = date + i traffic_data_list = inputs.load_data("%s"%str(date_star)) features = extract_nb_feature_func(traffic_data_list, sta_feature_dict) trainX += features print("procee file %s END!!"%str(date_star)) #build validation date = 20190721 k=5 valX = [] for i in range(k): date_star = date + i traffic_data_list = inputs.load_data("%s"%str(date_star)) features = extract_nb_feature_func(traffic_data_list, sta_feature_dict) valX += features print("procee file %s END!!"%str(date_star)) #build test date = 20190726 k=5 testX = [] for i in range(k): date_star = date + i traffic_data_list = inputs.load_data("%s"%str(date_star)) features = extract_nb_feature_func(traffic_data_list, sta_feature_dict) testX += features print("procee file %s END!!"%str(date_star)) #with open("temp/nb_sta_features.pkl", 'wb') as fout: # pkl.dump([trainX, valX, testX], fout) save_feature_to_txt(trainX, valX, testX, 'nb_sta_features') return trainX, valX, testX
#graph statics #node_num = graph.number_of_nodes() #edges_num = graph.number_of_edges() #items = list(graph.degree()) #degrees = [item[1] for item in items] #print("total node %d, total edge %d, max degree %d, min degree %d, mean degree %f"%(node_num, edges_num, np.max(degrees), np.min(degrees), np.mean(degrees))) date = 20190701 k = 30 for i in range(k): date_star = date + i #label distribution traffic_data_list = inputs.load_data("./traffic/%s.txt" % str(date_star)) label_data = inputs.collect_label_data_from_traffic_data_list( traffic_data_list) #record label distribution #d = {} #for l in label_data: # if l not in d: # d[l] = 0. # else: # d[l] += 1 #print(d) #sys.exit(0) #date_star = 'test' #load data