txt_col_name = [
    'day',  # 0
    'time',  # 1
    'direction',  # 2
    'road_type',  # 3
    'linkid',  # 4
    'length',  # 5
    'travel_time',  # 6
    'volumn',  # 7
    'speed',  # 8
    'occupancy',  # 9
    'congestion_level'  # 10
]

dmc.check_file_and_pause(dmfp.pp1_train_data_path)
verbose = 1
train_data = []
for file in sd.train_0707:
    data_csv = pd.read_csv(file,
                           header=None,
                           sep=',',
                           usecols=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    train_data = ppf.process_source_file(data_csv, train_data, file,
                                         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 10,
                                         verbose)
for file in sd.train_0715:
    data_csv = pd.read_csv(file,
                           header=None,
                           sep='\t',
                           usecols=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
예제 #2
0
import dm_source_data as sd
import pandas as pd
import dm_preprocess_fun as ppf
import dm_csv as dmcsv
import os
import dm_filepath as dmfp
import dm_common as dmc
import pickle
import time
from sklearn import tree
from sklearn.model_selection import cross_val_score

dmc.check_file_and_pause( dmfp.pp4_format_train_path )

print("Reading Training data " + dmfp.pp4_format_train_path )
traincsv = pd.read_csv( dmfp.pp4_format_train_path, sep=',' )

print("Extract data and label")
item_nr = traincsv.values.__len__()
data  = traincsv.iloc[0:item_nr, 0:48]
label = traincsv.iloc[0:item_nr, 48:54]

# using decision tree
print("Using Decision Tree Module")
#clf = tree.DecisionTreeClassifier()
#clf = tree.DecisionTreeClassifier(criterion="entropy")
#clf = tree.DecisionTreeClassifier(min_samples_split=10)
#clf = tree.DecisionTreeClassifier(min_samples_split=20)
#clf = tree.DecisionTreeClassifier(max_depth=30)
#clf = tree.DecisionTreeClassifier(min_samples_split=20, max_depth=30)
#clf = tree.DecisionTreeClassifier(min_samples_leaf=10)
예제 #3
0
import dm_source_data as sd
import pandas as pd
import dm_preprocess_fun as ppf
import dm_csv as dmcsv
import os
import dm_filepath as dmfp
import dm_common as dmc
import pickle
import dm_prediction_func as funs

dmc.check_file_and_pause(dmfp.pp4_format_test_without_label_path)
dmc.check_file_and_pause(dmfp.pp2_linkid_map_path)

linkid_map_csv = pd.read_csv(dmfp.pp2_linkid_map_path, sep=',')
linkid_tcid = ppf.pp2_read_tcid_csv(linkid_map_csv)

verbose = 1

files = os.listdir(dmfp.training_modules_floder_path)
clfs = [None, None, None, None, None, None]
for file in files:
    if file.__contains__("DS_Store"):
        continue
    words = file.split('.')  # [0] modulename [1] predict_id [2] suffix
    # load module
    clf_file = os.path.join(dmfp.training_modules_floder_path, file)
    fd = open(clf_file, "rb")
    clfs[int(words[1])] = pickle.load(fd)
    fd.close()

# read prediction data
예제 #4
0
train_col_name=[
    'day',              # 0 <- 0
    'time',             # 1 <- 1
    'direction',        # 2 <- 2
    'linkid',           # 3 <- 4
    'travel_time',      # 4 <- 6
    'volumn',           # 5 <- 7
    'speed',            # 6 <- 8
    'occupancy',        # 7 <- 9
    'congestion_level'  # 8 <- 10
]

col_idx=[0,1,2,4,6,7,8,9,10]

dmc.check_file_and_pause( dmfp.pp2_linkid_map_path )
dmc.check_file_and_pause( dmfp.pp2_direction_map_path )

print("reading linkid map" + dmfp.pp2_linkid_map_path )
linkid_map_csv = pd.read_csv( dmfp.pp2_linkid_map_path, sep=',')
linkid_tcid = ppf.pp2_read_tcid_csv( linkid_map_csv )
linkid_dict = ppf.pp2_read_dict_csv( linkid_map_csv )

print("reading dirction map" + dmfp.pp2_direction_map_path )
direction_map_csv = pd.read_csv( dmfp.pp2_direction_map_path, sep=',')
direction_tcid = ppf.pp2_read_tcid_csv( direction_map_csv )
direction_dict = ppf.pp2_read_dict_csv( direction_map_csv )

dmc.check_file_and_pause( dmfp.all_in_one_file_path_old )

half_hour_data_old_csv=pd.read_csv(dmfp.all_in_one_file_path_old, sep=",")
import dm_source_data as sd
import pandas as pd
import dm_preprocess_fun as ppf
import dm_csv as dmcsv
import os
import dm_filepath as dmfp
import dm_common as dmc

dmc.check_file_and_pause(dmfp.pp3_train_data_folder)
dmc.check_file_and_pause(dmfp.pp3_test_data_folder)

dmc.check_file_and_pause(dmfp.pp2_linkid_map_path)
linkid_map_csv = pd.read_csv(dmfp.pp2_linkid_map_path, sep=',')
linkid_dict = ppf.pp2_read_dict_csv(linkid_map_csv)

train_col_name = [
    'day',  # 0
    'time',  # 1
    'direction',  # 2
    'linkid',  # 3
    'travel_time',  # 4
    'volumn',  # 5
    'speed',  # 6
    'occupancy',  # 7
    'congestion_level'  # 8
]

format_data_cols = [0, 1, 2, 3, 4, 5, 6, 7]
format_data_nr = 6
format_label_col = 8
format_label_nr = 6
예제 #6
0
import dm_source_data as sd
import pandas as pd
import dm_preprocess_fun as ppf
import dm_csv as dmcsv
import os
import dm_filepath as dmfp
import dm_common as dmc
import pickle
import dm_prediction_func as funs

dmc.check_file_and_pause(dmfp.prediction_result_floder_path)

res_files = os.listdir(dmfp.prediction_result_floder_path)
res_files = sorted(res_files)

for file in res_files:
    if file[0] == '.':
        continue
    start_time = int(file.split('.')[2])

    csv = pd.read_csv(os.path.join(dmfp.prediction_result_floder_path, file),
                      sep=",")
    # [0] linkid
    # [1] linkid's tag
    # [2] predicted value 1
    # [3] predicted value 2
    # [4] predicted value 3
    # [5] predicted value 4
    # [6] predicted value 5
    # [7] predicted value 6
예제 #7
0
import dm_source_data as sd
import pandas as pd
import dm_preprocess_fun as ppf
import dm_csv as dmcsv
import os
import dm_filepath as dmfp
import dm_common as dmc

dmc.check_file_and_pause(dmfp.sta_unknown_percentage_path)
dmc.check_file_and_pause(dmfp.pp2_train_data_path)
dmc.check_file_and_pause(dmfp.pp2_test_data_path)
dmc.check_file_and_pause(dmfp.pp2_direction_map_path)
dmc.check_file_and_pause(dmfp.pp2_linkid_map_path)

train_col_name = [
    'day',  # 0
    'time',  # 1
    'direction',  # 2
    'linkid',  # 3
    'travel_time',  # 4
    'volumn',  # 5
    'speed',  # 6
    'occupancy',  # 7
    'congestion_level'  # 8
]

direction_col = 2
linkid_col = 3

direction_map_csv = pd.read_csv(dmfp.pp2_direction_map_path, sep=',')
direction_dict = ppf.pp2_read_dict_csv(direction_map_csv)