def predict(file): data = _G.load_data(file) print(f"Predicting {file}") for mod_name, model in Model.items(): print(f"=== {mod_name} ===") result = defaultdict(list) for frame in data: for category, values in frame.items(): if category in _G.IgnoredCategories: continue elif category.upper() not in mod_name: continue if "RFR" in mod_name: train_n = model.best_estimator_.n_features_ else: print("WARNING: Unknown estimator", mod_name) break train = preprocessing(mod_name, train_n, values) try: result[mod_name].append(model.predict(train)[0]) except ValueError: result[mod_name].append(0) # end for frame in data for mod_name in result: if "RFR" in mod_name: process_rfr_result(result[mod_name])
def build_playback_archive(): filename = _G.PlotPlaybackFilename if os.path.exists(filename): print(f"Archive {filename} already exists") return _G.load_data(filename) print(f"Building archive playback for '{filename}'") files = sorted(glob_plots(filename)) cur_timestamp = 0 data = [] _len = len(files) for i, file in enumerate(files): print(f"Processing {i}/{_len}") dat = PlotPlaybackRecord(cur_timestamp) dat.sx, dat.ex = find_plot_window_length(file) data.append(dat) cur_timestamp += _G.TimeWindowSize _G.dump_data(data, filename) print("Archived dumped") return data
data = _G.all_data_files() x_train = [] y_train = [] base = 0 incom_idx = [] max_nsize = 0 for file in data: parts = re.split(r"\\|\/", file) if len(parts[2].split('_')[-1]) < 3: continue labels = load_postive_label(parts) dat = _G.load_data(file) twlen = len(dat) for key in labels: tmp_y = [1 if i in labels[key] else 0 for i in range(twlen)] base = len(y_train) y_train.extend(tmp_y) for idx, frame in enumerate(dat): # if frame[Category].shape != dat[0][Category].shape: # print(f"Frame#{base+idx} is incomplete, discard") # incom_idx.append(base+idx) # continue infidx = np.where(np.isinf(frame[Category])) if len(infidx[0]) > 0: print( f"WARNING: INF value in frame#{idx} of {Category} in {file}"
import re from collections import defaultdict from pprint import pprint from threading import Thread import numpy as np from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import (GridSearchCV, StratifiedKFold, cross_val_score) import _G import argv_parse Model = { #"RFR_MFCC": _G.load_data("rfr_mfcc.mod"), "RFR_ROLLOFF": _G.load_data("rfr_rolloff.mod"), } def getframe_timestamp_period(idx): return [idx * _G.TimeWindowSize, (idx+1) * _G.TimeWindowSize] def process_rfr_result(result): score_dict = {} for i, v in enumerate(result): score_dict[i] = v score_dict = {k: v for k, v in sorted(score_dict.items(), key=lambda p: p[1], reverse=True)} print("Sorted result:") for i,v in score_dict.items(): print(i, v) def preprocessing(category, train_n, values):
sam_datas = json.load(fp) for sdata in sam_datas: slug = sdata['slug'] st = sdata['start_t'] dur = sdata['duration'] ed = (st + int(dur)) // _G.TimeWindowSize st = int(st) // _G.TimeWindowSize ret[slug] = [] for i in range(st,ed+1): ret[slug].append(i) return ret[next(iter(ret))] data = _G.all_data_files() models = { "SVM": _G.load_data("svm_rolloff.mod"), "KNN": _G.load_data("knn_rolloff.mod"), "RFR": _G.load_data("rfr_rolloff.mod") } for mod_name, model in models.items(): print(f"=== {mod_name} ===") ok_cnt = 0 nonok_cnt = 0 total_frame = 0 real_ok_cnt = 0 for file in data: parts = re.split(r"\\|\/",file) if len(parts[2].split('_')[-1]) < 3: continue labels = load_postive_label(parts)
for sdata in sam_datas: slug = sdata['slug'] st = sdata['start_t'] dur = sdata['duration'] ed = (st + int(dur)) // _G.TimeWindowSize st = int(st) // _G.TimeWindowSize ret[slug] = [] for i in range(st, ed + 1): ret[slug].append(i) return ret[next(iter(ret))] data = _G.all_data_files() models = { "SVM": _G.load_data("svm_zcr.mod"), #"KNN": _G.load_data("knn_zcr.mod"), "RFR": _G.load_data("rfr_zcr.mod") } for mod_name, model in models.items(): print(f"=== {mod_name} ===") ok_cnt = 0 nonok_cnt = 0 total_frame = 0 real_ok_cnt = 0 for file in data: parts = re.split(r"\\|\/", file) if len(parts[2].split('_')[-1]) < 3: continue labels = load_postive_label(parts)
st = sdata['start_t'] dur = sdata['duration'] ed = (st + int(dur)) // _G.TimeWindowSize st = int(st) // _G.TimeWindowSize ret[slug] = [] for i in range(st, ed + 1): ret[slug].append(i) return ret[next(iter(ret))] data = _G.all_data_files() models = { # "SVM": _G.load_data("svm_mfcc.mod"), # "KNN": _G.load_data("knn_mfcc.mod") "RFR": _G.load_data("rfr_mfcc.mod") } for mod_name, model in models.items(): print(f"=== {mod_name} ===") ok_cnt = 0 nonok_cnt = 0 total_frame = 0 real_ok_cnt = 0 for file in data: parts = re.split(r"\\|\/", file) if len(parts[2].split('_')[-1]) < 3: continue labels = load_postive_label(parts) dat = _G.load_data(file) twlen = len(dat)
data = _G.all_data_files() x_train = [] y_train = [] base = 0 incom_idx = [] max_nsize = 0 cnt = 0 for file in data: parts = re.split(r"\\|\/", file) if len(parts[2].split('_')[-1]) < 3: continue labels = load_postive_label(parts) dat = _G.load_data(file) frame_len = len(dat) print(frame_len, file) for key in labels: tmp_y = [1 if i in labels[key] else 0 for i in range(frame_len)] base = len(y_train) y_train.extend(tmp_y) for idx, frame in enumerate(dat): cnt += 1 for idx, frame in enumerate(dat): infidx = np.where(np.isinf(frame[Category])) if len(infidx[0]) > 0: print( f"WARNING: INF value in frame#{idx} of {Category} in {file}" ) print("INF val idx: ", infidx)
from mpl_toolkits.axes_grid1 import make_axes_locatable import numpy as np from sklearn import svm from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_val_score from sklearn.neighbors import KNeighborsClassifier from sklearn.ensemble import RandomForestRegressor from IPython.display import Image from sklearn.tree import export_graphviz import pydotplus from collections import defaultdict from pprint import pprint import _G import argv_parse if __name__ == "__main__": model = _G.load_data("rfr_rolloff.mod") model = model.best_estimator_ print(model) print(model.min_samples_split) print(model.max_samples) #export_graphviz(model.estimators_[2], out_file='tree.dot') for i in range(50): export_graphviz(model.estimators_[i], out_file=f"visualization/tree_{i}.dot") os.system( f"dot -Tpng visualization/tree_{i}.dot -o visualization/tree_{i}.png" )