def main(ms_file_name, line_freq, ouptut_file): tf.reset_default_graph() sess = tf.InteractiveSession() # load vocabulary int2word = read_vocab("models/vocabulary_semantic.txt") # Restore weights model = "models/semantic_model.meta" saver = tf.train.import_meta_graph(model) saver.restore(sess, model[:-5]) graph = tf.get_default_graph() model_input = graph.get_tensor_by_name("model_input:0") seq_len = graph.get_tensor_by_name("seq_lengths:0") rnn_keep_prob = graph.get_tensor_by_name("keep_prob:0") height_tensor = graph.get_tensor_by_name("input_height:0") width_reduction_tensor = graph.get_tensor_by_name("width_reduction:0") logits = tf.get_collection("logits")[0] # Constants that are saved inside the model itself WIDTH_REDUCTION, HEIGHT = sess.run([width_reduction_tensor, height_tensor]) decoded, _ = tf.nn.ctc_greedy_decoder(logits, seq_len) # split the music score into lines print(f"Process {ms_file_name}\n") lines = split_score(ms_file_name, line_freq) output = open(ouptut_file, "w") # process save file for idx, line in enumerate(lines): # write the file to sample directory for sampling print(f"./samples/sample{idx}.png\n") cv2.imwrite(f"./samples/sample{idx}.png", line) gray = cv2.cvtColor(line, cv2.COLOR_BGR2GRAY) image = ctc_utils.resize(gray, HEIGHT) image = ctc_utils.normalize(image) image = np.asarray(image).reshape(1, image.shape[0], -1, 1) seq_lengths = [image.shape[2] / WIDTH_REDUCTION] prediction = sess.run(decoded, feed_dict={ model_input: image, seq_len: seq_lengths, rnn_keep_prob: 1.0, }) str_predictions = ctc_utils.sparse_tensor_to_strs(prediction) for w in str_predictions[0]: description = int2word[w] notation, v1, v2 = parse_description(description) if v1 != "tie": if notation == "barline": output.write("### ----------------\n") elif notation == "note" or notation == "gracenote": output.write(f'- ["{notation}", "{v1}", "{v2}"]\n') elif notation == "rest": output.write(f'- ["rest", "{v1}"]\n') output.close()
image = cv2.imread(imgpath, 0) image = ctc_utils.resize(image, HEIGHT) image = ctc_utils.normalize(image) image = np.asarray(image).reshape(1, image.shape[0], image.shape[1], 1) seq_lengths = [image.shape[2] / WIDTH_REDUCTION] prediction = sess.run(decoded, feed_dict={ input: image, seq_len: seq_lengths, rnn_keep_prob: 1.0, }) str_predictions = ctc_utils.sparse_tensor_to_strs(prediction) output = "" for w in str_predictions[0]: output += str(int2word[w]) output += str('\t') output.rstrip() f = open(f'{corpus}/{x_in}/{x_in}.txt', "r") inputt = f.read() f.close res2 = " " for x in inputt: res2 += x.strip()
def predict(image): tf.reset_default_graph() sess = tf.InteractiveSession() voc_file = 'vocabulary_agnostic.txt' model = './Models/model.hdf5-69000.meta' # Read the dictionary dict_file = open(voc_file, "r") dict_list = dict_file.read().splitlines() int2word = dict() for word in dict_list: word_idx = len(int2word) int2word[word_idx] = word dict_file.close() # Restore weights saver = tf.train.import_meta_graph(model) saver.restore(sess, model[:-5]) graph = tf.get_default_graph() input = graph.get_tensor_by_name("model_input:0") seq_len = graph.get_tensor_by_name("seq_lengths:0") rnn_keep_prob = graph.get_tensor_by_name("keep_prob:0") height_tensor = graph.get_tensor_by_name("input_height:0") width_reduction_tensor = graph.get_tensor_by_name("width_reduction:0") logits = tf.get_collection("logits")[0] # Constants that are saved inside the model itself WIDTH_REDUCTION, HEIGHT = sess.run([width_reduction_tensor, height_tensor]) decoded, _ = tf.nn.ctc_greedy_decoder(logits, seq_len) image = cv2.imread(image, False) image = ctc_utils.resize(image, HEIGHT) image = ctc_utils.normalize(image) image = np.asarray(image).reshape(1, image.shape[0], image.shape[1], 1) seq_lengths = [image.shape[2] / WIDTH_REDUCTION] prediction = sess.run( decoded, feed_dict={input: image, seq_len: seq_lengths, rnn_keep_prob: 1.0,} ) str_predictions = ctc_utils.sparse_tensor_to_strs(prediction) notes = [] for w in str_predictions[0]: temp = int2word[w].split('.') print(temp) if (len(temp) != 2): continue else: symbol, des = temp if (symbol == 'note'): length, note = des.split('-', 1) if ('beamed' in length): length = 'eigth' notes.append((length, notes_dict[note])) elif (symbol == 'rest'): length, _ = des.split('-', 1) notes.append((length, 'rest')) return notes