def output_split(all: List[Preprocessed], train: List[Preprocessed], io: IO): obj = { "training_set": list(map(lambda x: x.file_name, train)), "test_set": list(map(lambda x: x.file_name, filter(lambda x: x not in train, all))), } with open(io.get("output_train"), "w+") as out_file: json.dump(obj, out_file) logline("wrote training/testing config to {}".format(io.get("output_train")))
def output_split(all: List[Preprocessed], train: List[Preprocessed], io: IO): obj = { "training_set": list(map(lambda x: x.file_name, train)), "test_set": list(map(lambda x: x.file_name, filter(lambda x: x not in train, all))), } pathlib.Path(os.path.dirname(io.get("output_train"))).mkdir(parents=True, exist_ok=True) with open(io.get("output_train"), "w+") as out_file: json.dump(obj, out_file) logline("wrote training/testing config to {}".format( io.get("output_train")))
def start_server(io: IO): global interval interval = io.get("interval") port = io.get("port") httpd = HTTPServer(("", port), partial(WebServer, directory=os.path.join(CUR_DIR, "public"))) logline("listening at port", port) enter_group() try: httpd.serve_forever() except KeyboardInterrupt: pass httpd.server_close() exit_group() logline("stopped listening")
def match_files(io: IO, input_paths: List[str]): """Match found files to analysis file contents""" analysis_file = io.get("analysis") logline(analysis_file) analysis = AnalysisFile(analysis_file) mapped: Dict[str, str] = {} reverse_map: Dict[str, str] = {} for in_path in input_paths: file_name = in_path.split("/")[-1].split(".")[0] for track_analysis in analysis.tracks: if track_analysis.name.lower() in file_name.lower(): mapped[in_path] = track_analysis.name reverse_map[track_analysis.name] = file_name break logline("came up with the following mapping:") logline("") for file_name in mapped: logline('"{}" -> "{}"'.format(file_name, mapped[file_name])) unmapped_amount: int = 0 for in_path in input_paths: if in_path not in mapped: warn('input file "{}" not mapped'.format(in_path)) unmapped_amount += 1 for track_analysis in analysis.tracks: if track_analysis.name not in reverse_map: warn('analysed file "{}" not mapped'.format(track_analysis.name)) unmapped_amount += 1 logline("") if unmapped_amount > 0: try: correct = input("is this correct? Y/n") if correct.lower() == "n": return None except KeyboardInterrupt: return None return analysis, mapped
def fit_model(io: IO, model: Sequential, preprocessed: List[Preprocessed]): epochs = io.get("epochs") model.reset_states() logline("splitting into training set and testing set ({}%)".format( io.get("split"))) split = gen_split(preprocessed, io) log_dir = "logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") for i in range(epochs): logline("generating input and expected data for epoch {}/{}".format( i + 1, epochs)) train_x, train_y = trim_params(gen_fit_params(split), io) logline("training epoch {}/{}".format(i + 1, epochs)) callbacks = [] if io.get("profile"): debug("profiling") callbacks.append( tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)) model.fit(train_x, train_y, batch_size=io.get("batch_size"), epochs=1, shuffle=False, callbacks=callbacks) model.reset_states()
def run_tests(io: IO, model: Sequential, test_files: List[Preprocessed]): model.reset_states() for file in test_files: logline("creating test params for {}".format(file.file_name)) test_x, test_y = get_test_params(file) logline("making predictions") predictions: List[List[float]] = model.predict(test_x, batch_size=1, verbose=1) model.reset_states() mse_total: List[float] = list() correct = 0 diff_score = 0 for i in range(len(predictions)): prediction = predictions[i] actual: List[float] = test_y[i] diff = abs(actual[0] - prediction[0]) diff_score += diff if is_in_range(diff): correct += 1 mse_total.append(mean_squared_error(actual, prediction)) logline( "predicted {}/{} within range ({}%) correct, score was {}/{}, mse was {}" .format( correct, len(predictions), round(correct / len(predictions) * 100, 2), diff_score, len(predictions), round(sum(mse_total) / len(predictions), 4), )) out_obj = predictions_to_out_file(predictions, io) pathlib.Path(io.get("output_annotated")).mkdir(parents=True, exist_ok=True) out_path = os.path.join(io.get("output_annotated"), "{}.json".format(file.file_name)) with open(out_path, "w+") as out_file: json.dump(out_obj, out_file) logline("wrote object to {}".format(out_path))
def fit_model(io: IO, model: Sequential, preprocessed: List[Preprocessed]): epochs = io.get("epochs") model.reset_states() logline("splitting into training set and testing set ({}%)".format(io.get("split"))) split = gen_split(preprocessed, io) for i in range(epochs): logline("generating input and expected data for epoch {}/{}".format(i + 1, epochs)) train_x, train_y = trim_params(gen_fit_params(split), io) logline("training epoch {}/{}".format(i + 1, epochs)) model.fit(train_x, train_y, batch_size=io.get("batch_size"), epochs=1, shuffle=False) model.reset_states()
def mode_realtime_test(): """The main realtime test entrypoint""" io = get_io() logline("realtime test") enter_group() logline("reconstructing model") global model model = create_model(1) logline("applying learned weights") model = apply_weights(model, io) start_server(io)
def run_tests(io: IO, model: Sequential, test_files: List[Preprocessed]): model.reset_states() for file in test_files: logline("creating test params for {}".format(file.file_name)) test_x, test_y = get_test_params(file) logline("making predictions") predictions = model.predict(test_x, batch_size=1, verbose=1) model.reset_states() mse_total = list() correct = 0 for i in range(len(predictions)): prediction = predictions[i] actual = test_y[i] if actual[0] == is_positive_beat( prediction[0]) and actual[1] == is_positive_melody( prediction[1]): correct += 1 mse_total.append(mean_squared_error(actual, prediction)) logline("predicted {}/{} ({}%) correct, mse was {}".format( correct, len(predictions), round(correct / len(predictions) * 100, 2), round(sum(mse_total) / len(predictions), 4), )) out_obj = predictions_to_out_file(predictions, io) out_path = os.path.join(io.get("output_annotated"), "{}.json".format(file.file_name)) with open(out_path, "w+") as out_file: json.dump(out_obj, out_file) logline("wrote object to {}".format(out_path))
def mode_test(): """The main testing mode entrypoint""" start_time = time.time() io = get_io() logline("test") enter_group() logline("reconstructing model") model = create_model(1) logline("applying learned weights") model = apply_weights(model, io) logline("reading testing files") test_files = read_test_files(io) logline("running testing data") enter_group() run_tests(io, model, test_files) exit_group() exit_group() logline("done training, runtime is {}".format( Timer.stringify_time(Timer.format_time(time.time() - start_time))))
def mode_train(): """The main training mode entrypoint""" start_time = time.time() io = get_io() logline("using GPU?", tf.test.is_gpu_available()) logline("train") enter_group() logline("loading preprocessed data") preprocessed = load_preprocessed(io) logline("creating models") train_model = create_model(batch_size=io.get("batch_size")) logline("fitting model") enter_group() fit_model(io, train_model, preprocessed) exit_group() logline("exporting model") export_model(train_model, io) exit_group() logline("done training, runtime is {}".format( Timer.stringify_time(Timer.format_time(time.time() - start_time))))
def export_model(model: Sequential, io: IO): logline('wrote weights to file "{}"'.format(io.get("output_weights"))) model.save_weights(io.get("output_weights"))
def run_mode( mode: Union[Literal["preprocess"], Literal["train"], Literal["test"], Literal["realtime_test"]] ) -> int: if mode == "preprocess": return mode_preprocess() elif mode == "train": return mode_train() or 0 elif mode == "test": return mode_test() or 0 elif mode == "realtime_test": return mode_realtime_test() or 0 else: if mode == "": logline("No mode supplied. Choose one of:") else: logline("Unknown mode. Choose one of:") logline("") logline("\tpreprocess - preprocess and extract features") logline("\ttrain - train on given features") logline("\ttest - test trained model") logline("\trealtime_test - do a realtime test by listening to music") return 1
def mode_preprocess() -> int: """The main preprocessing entrypoint""" start_time = time.time() preprocessed = [] io = get_io() logline("preprocessing") enter_group() logline("reading input paths") enter_group() input_paths = collect_input_paths(io) for input_path in input_paths: logline('found path: "{}"'.format(input_path)) exit_group() logline("matching") enter_group() matching = match_files(io, input_paths) if matching is None: return 0 analysis, mapping = matching exit_group() logline("iterating files") enter_group() for file in get_files(input_paths, analysis, mapping): if not file: error("no files") return 1 features = gen_features(file) outputs = gen_outputs(file, io) feature_arr = list(map(lambda x: x.to_arr(), features)) output_arr = list(map(lambda x: x.to_arr(), outputs)) assert np.array(feature_arr).shape[1] == Features.length() assert np.array(output_arr).shape[1] == OUT_VEC_SIZE preprocessed.append({"file_name": file.name, "features": feature_arr, "outputs": output_arr}) logline('done with file: "{}"'.format(file.name)) file.close() exit_group() logline("done iterating files") pathlib.Path(os.path.dirname(io.get("output_file"))).mkdir(parents=True, exist_ok=True) with open(io.get("output_file"), "wb+") as file: pickle.dump(preprocessed, file) logline("wrote output to file: {}".format(io.get("output_file"))) exit_group() logline( "done preprocessing, runtime is {}".format(Timer.stringify_time(Timer.format_time(time.time() - start_time))) ) return 0