Exemple #1
0
    def getValidation(self, params):
        if self.validation_dict == None:
            images = []
            labels = []

            # Read files
            for sample_filepath in self.validation_list:
                sample_fullpath = self.corpus_dirpath + '/' + sample_filepath + '/' + sample_filepath

                # IMAGE
                sample_img = cv2.imread(
                    sample_fullpath + '.png',
                    cv2.IMREAD_GRAYSCALE)  # Grayscale is assumed!
                height = params['img_height']
                sample_img = ctc_utils.resize(sample_img, height)
                images.append(ctc_utils.normalize(sample_img))

                # GROUND TRUTH
                if self.semantic:
                    sample_full_filepath = sample_fullpath + '.semantic'
                else:
                    sample_full_filepath = sample_fullpath + '.agnostic'

                sample_gt_file = open(sample_full_filepath, 'r')

                sample_gt_plain = sample_gt_file.readline().rstrip().split(
                    ctc_utils.word_separator())
                sample_gt_file.close()

                labels.append([self.word2int[lab] for lab in sample_gt_plain])

            # Transform to batch
            image_widths = [img.shape[1] for img in images]
            max_image_width = max(image_widths)

            batch_images = np.ones(shape=[
                len(self.validation_list), params['img_height'],
                max_image_width, params['img_channels']
            ],
                                   dtype=np.float32) * self.PAD_COLUMN

            for i, img in enumerate(images):
                batch_images[i, 0:img.shape[0], 0:img.shape[1], 0] = img

            # LENGTH
            width_reduction = 1
            for i in range(params['conv_blocks']):
                width_reduction = width_reduction * params[
                    'conv_pooling_size'][i][1]

            lengths = [batch_images.shape[2] / width_reduction
                       ] * batch_images.shape[0]

            self.validation_dict = {
                'inputs': batch_images,
                'seq_lengths': np.asarray(lengths),
                'targets': labels,
            }

        return self.validation_dict, len(self.validation_list)
Exemple #2
0
    def nextBatch(self, params):
            images = []
            labels = []

            
            for i in range(16):
                temp_filepath = self.training_data[self.curr_idx]
                full_path = self.data_dirpath + '/' + temp_filepath + '/' + temp_filepath

               
           
             
                sample_img = cv2.imread(full_path + '.png', False)
                height = 128
                sample_img = ctc_utils.resize(sample_img,height)
                images.append(ctc_utils.normalize(sample_img))

                
                
                sample_full_filepath = full_path + '.semantic'
                
                gt_file = open(sample_full_filepath, 'r')
                gt_list = gt_file.readline().rstrip().split(ctc_utils.word_separator())
                gt_file.close()

                labels.append([self.word2int[lab] for lab in gt_list])

                self.curr_idx = (self.curr_idx + 1) % len( self.training_data )


            
            image_widths = [img.shape[1] for img in images]
            max_width = max(image_widths)

            batch_images = np.ones(shape=[16,
                                           128,
                                           max_width,
                                           1], dtype=np.float32)*self.PAD_COLUMN

            for i, img in enumerate(images):
                batch_images[i, 0:img.shape[0], 0:img.shape[1], 0] = img

            
            width_reduction = 1
            for i in range(4):
                width_reduction = width_reduction * conv_pool[i][1]

            lengths = [ batch_images.shape[2] / width_reduction ] * batch_images.shape[0]

            return {
                'inputs': batch_images,
                'seq_lengths': np.asarray(lengths),
                'targets': labels,
            }
Exemple #3
0
    # Constants that are saved inside the model itself
    WIDTH_REDUCTION, HEIGHT = sess.run([width_reduction_tensor, height_tensor])

    decoded, _ = tf.nn.ctc_greedy_decoder(logits, seq_len)

    results = []
    minres = 10
    maxres = -1

    for x_in in inputs:
        imgpath = f'{corpus}/{x_in}/{x_in}.jpg'

        image = cv2.imread(imgpath, 0)
        image = ctc_utils.resize(image, HEIGHT)
        image = ctc_utils.normalize(image)
        image = np.asarray(image).reshape(1, image.shape[0], image.shape[1], 1)

        seq_lengths = [image.shape[2] / WIDTH_REDUCTION]

        prediction = sess.run(decoded,
                              feed_dict={
                                  input: image,
                                  seq_len: seq_lengths,
                                  rnn_keep_prob: 1.0,
                              })

        str_predictions = ctc_utils.sparse_tensor_to_strs(prediction)
        output = ""
        for w in str_predictions[0]:
            output += str(int2word[w])
Exemple #4
0
    def nextBatch(self, params, mode = 'Train'):
        images = []
        labels = []

        # Read files
        for _ in range(params['batch_size']):
            if mode == 'Train':
                sample_filepath = self.training_list[self.current_idx]
                sample_fullpath = self.corpus_dirpath + '/' + sample_filepath + '/' + sample_filepath
            elif mode == 'Validation':
                sample_filepath = self.validation_list[self.current_val_idx]
                sample_fullpath = self.corpus_dirpath + '/' + sample_filepath + '/' + sample_filepath
               
            # IMAGE
            if self.distortions:
                sample_img = cv2.imread(sample_fullpath + '_distorted.jpg', False) # Grayscale is assumed
            else:
                sample_img = cv2.imread(sample_fullpath + '.png', False)  # Grayscale is assumed!
                
            height = params['img_height']
            sample_img = ctc_utils.resize(sample_img,height)
            images.append(ctc_utils.normalize(sample_img))

            # GROUND TRUTH
            if self.semantic:
                sample_full_filepath = sample_fullpath + '.semantic'
            else:
                sample_full_filepath = sample_fullpath + '.agnostic'
            
            sample_gt_file = open(sample_full_filepath, 'r')
            sample_gt_plain = sample_gt_file.readline().rstrip().split(ctc_utils.word_separator())
            sample_gt_file.close()

            labels.append([self.word2int[lab] for lab in sample_gt_plain])

            if mode == 'Train':
                self.current_idx = (self.current_idx + 1) % len( self.training_list )
            elif mode == 'Validation':
                self.current_val_idx = (self.current_val_idx + 1) % len( self.validation_list )

        # Transform to batch
        image_widths = [img.shape[1] for img in images]
        max_image_width = max(image_widths)

        batch_images = np.ones(shape=[params['batch_size'],
                                       params['img_height'],
                                       max_image_width,
                                       params['img_channels']], dtype=np.float32)*self.PAD_COLUMN

        for i, img in enumerate(images):
            batch_images[i, 0:img.shape[0], 0:img.shape[1], 0] = img

        # LENGTH
        width_reduction = 1
        for i in range(params['conv_blocks']):
            width_reduction = width_reduction * params['conv_pooling_size'][i][1]

        lengths = [ batch_images.shape[2] / width_reduction ] * batch_images.shape[0]

        return {
            'inputs': batch_images,
            'seq_lengths': np.asarray(lengths),
            'targets': labels,
        }
Exemple #5
0
def main(ms_file_name, line_freq, ouptut_file):
    tf.reset_default_graph()
    sess = tf.InteractiveSession()

    # load vocabulary
    int2word = read_vocab("models/vocabulary_semantic.txt")

    # Restore weights
    model = "models/semantic_model.meta"
    saver = tf.train.import_meta_graph(model)
    saver.restore(sess, model[:-5])

    graph = tf.get_default_graph()

    model_input = graph.get_tensor_by_name("model_input:0")
    seq_len = graph.get_tensor_by_name("seq_lengths:0")
    rnn_keep_prob = graph.get_tensor_by_name("keep_prob:0")
    height_tensor = graph.get_tensor_by_name("input_height:0")
    width_reduction_tensor = graph.get_tensor_by_name("width_reduction:0")
    logits = tf.get_collection("logits")[0]

    # Constants that are saved inside the model itself
    WIDTH_REDUCTION, HEIGHT = sess.run([width_reduction_tensor, height_tensor])

    decoded, _ = tf.nn.ctc_greedy_decoder(logits, seq_len)

    # split the music score into lines
    print(f"Process {ms_file_name}\n")
    lines = split_score(ms_file_name, line_freq)

    output = open(ouptut_file, "w")
    # process save file
    for idx, line in enumerate(lines):
        # write the file to sample directory for sampling
        print(f"./samples/sample{idx}.png\n")
        cv2.imwrite(f"./samples/sample{idx}.png", line)

        gray = cv2.cvtColor(line, cv2.COLOR_BGR2GRAY)
        image = ctc_utils.resize(gray, HEIGHT)
        image = ctc_utils.normalize(image)
        image = np.asarray(image).reshape(1, image.shape[0], -1, 1)

        seq_lengths = [image.shape[2] / WIDTH_REDUCTION]

        prediction = sess.run(decoded,
                              feed_dict={
                                  model_input: image,
                                  seq_len: seq_lengths,
                                  rnn_keep_prob: 1.0,
                              })

        str_predictions = ctc_utils.sparse_tensor_to_strs(prediction)

        for w in str_predictions[0]:
            description = int2word[w]
            notation, v1, v2 = parse_description(description)
            if v1 != "tie":
                if notation == "barline":
                    output.write("### ----------------\n")
                elif notation == "note" or notation == "gracenote":
                    output.write(f'- ["{notation}", "{v1}", "{v2}"]\n')
                elif notation == "rest":
                    output.write(f'- ["rest", "{v1}"]\n')

    output.close()
def predict(image):
    tf.reset_default_graph()
    sess = tf.InteractiveSession()

    voc_file = 'vocabulary_agnostic.txt'
    model = './Models/model.hdf5-69000.meta'

    # Read the dictionary
    dict_file = open(voc_file, "r")
    dict_list = dict_file.read().splitlines()
    int2word = dict()
    for word in dict_list:
        word_idx = len(int2word)
        int2word[word_idx] = word
    dict_file.close()

    # Restore weights
    saver = tf.train.import_meta_graph(model)
    saver.restore(sess, model[:-5])

    graph = tf.get_default_graph()

    input = graph.get_tensor_by_name("model_input:0")
    seq_len = graph.get_tensor_by_name("seq_lengths:0")
    rnn_keep_prob = graph.get_tensor_by_name("keep_prob:0")
    height_tensor = graph.get_tensor_by_name("input_height:0")
    width_reduction_tensor = graph.get_tensor_by_name("width_reduction:0")
    logits = tf.get_collection("logits")[0]

    # Constants that are saved inside the model itself
    WIDTH_REDUCTION, HEIGHT = sess.run([width_reduction_tensor, height_tensor])

    decoded, _ = tf.nn.ctc_greedy_decoder(logits, seq_len)

    image = cv2.imread(image, False)
    image = ctc_utils.resize(image, HEIGHT)
    image = ctc_utils.normalize(image)
    image = np.asarray(image).reshape(1, image.shape[0], image.shape[1], 1)

    seq_lengths = [image.shape[2] / WIDTH_REDUCTION]

    prediction = sess.run(
        decoded, feed_dict={input: image, seq_len: seq_lengths, rnn_keep_prob: 1.0,}
    )

    str_predictions = ctc_utils.sparse_tensor_to_strs(prediction)
    notes = []
    for w in str_predictions[0]:
        temp = int2word[w].split('.')
        print(temp)
        if (len(temp) != 2):
            continue
        else:
            symbol, des = temp       
            if (symbol == 'note'):
                length, note = des.split('-', 1)
                if ('beamed' in length):
                    length = 'eigth'
                notes.append((length, notes_dict[note]))
            elif (symbol == 'rest'):
                length, _ = des.split('-', 1)
                notes.append((length, 'rest'))

    return notes