コード例 #1
0
ファイル: parser_utils.py プロジェクト: RayTang88/NLP
def minibatches(data, batch_size):
    x = np.array([d[0] for d in data])
    y = np.array([d[2] for d in data])
    one_hot = np.zeros((y.size, 3))
    one_hot[np.arange(y.size), y] = 1

    return get_minibatches([x, one_hot], batch_size)
コード例 #2
0
    def run_valid_epoch(self, sess, dataset, valid_writer, merged):
        print "\nEvaluating on dev set",
        total_correct = 0
        total_loss = []
        predicted_ids = []
        target_ids = []

        for i, (valid_x, valid_y) in enumerate(
                get_minibatches([dataset.valid_inputs, dataset.valid_targets],
                                self.config.test_batch_size,
                                is_multi_feature_input=True)):
            batch_feed = self.create_feed_dict(valid_x,
                                               labels_batch=valid_y,
                                               is_training=False)
            batch_predictions, batch_loss, batch_accuracy = sess.run(
                [self.pred, self.loss, self.accuracy], feed_dict=batch_feed)

            total_correct += (batch_accuracy * len(valid_y))
            total_loss.append(batch_loss)

            predicted_ids.extend(list(np.argmax(batch_predictions, axis=1)))
            target_ids.extend(list(np.argmax(valid_y, axis=1)))

        valid_loss = sum(total_loss) / float(len(total_loss))
        valid_accuracy = float(total_correct) / len(dataset.valid_targets)

        valid_f1_score = get_weighted_f1_score(predicted_ids, target_ids)

        return valid_loss, valid_accuracy, valid_f1_score
コード例 #3
0
    def run_epoch(self, sess, inputs, labels, input_masks, label_masks,
                  train_writer, step_i):
        """Runs an epoch of training.

				Args:
						sess: tf.Session() object
						inputs: np.ndarray of shape (n_samples, n_features)
						labels: np.ndarray of shape (n_samples, n_classes)
						input_masks: boolean np.ndarray of shape (max_num_frames,)
						label_masks: boolean np.ndarray of shape (max_num_frames,)
						train_writer: a tf.summary.FileWriter object
						step_i: The global number of steps taken so far (i.e., batches we've done a full forward
										and backward pass on) 
				Returns:
						average_loss: scalar. Average minibatch loss of model on epoch.
				"""
        n_minibatches, total_loss = 0, 0
        for input_batch, labels_batch, input_masks_batch, label_masks_batch in \
              get_minibatches([inputs, labels, input_masks, label_masks], self.config.batch_size):
            n_minibatches += 1
            batch_loss, summary, feed = self.train_on_batch(
                sess, input_batch, input_masks_batch, labels_batch,
                label_masks_batch)
            total_loss += batch_loss

            train_writer.add_summary(summary, step_i)
            #print "step_i: ", step_i
            step_i += 1

        return total_loss / n_minibatches, step_i, feed
コード例 #4
0
    def run_epoch(self, sess, inputs, labels, train_writer, step_i):
        """Runs an epoch of training.

        Args:
                sess: tf.Session() object
                inputs: A list of length num_examples with float np.ndarray entries of shape (max_num_frames, num_mfcc_coeffs)
                labels: A list of length num_examples with float np.ndarray entries of shape (max_num_frames, num_mfcc_coeffs)
                train_writer: a tf.summary.FileWriter object
                step_i: The global number of steps taken so far (i.e., batches we've done a full forward
                                and backward pass on)
        Returns:
                average_loss: scalar. Average minibatch loss of model on epoch.
                step_i: The global number of steps taken so far (i.e., batches we've done a full forward
                                and backward pass on)
        """
        n_minibatches, total_loss = 0, 0
        for input_batch, labels_batch in get_minibatches(
            [inputs, labels], self.config.batch_size):
            batch_loss, summary = self.train_on_batch(sess, input_batch,
                                                      labels_batch)
            total_loss += batch_loss
            n_minibatches += 1
            train_writer.add_summary(summary, step_i)
            step_i += 1

        return total_loss / n_minibatches, step_i
コード例 #5
0
 def predict(self, sess, inputs, ids):
     labels = []
     for inputs_batch in get_minibatches(inputs, self.config.batch_size):
         raw_preds = self.predict_on_batch(sess, inputs_batch)
         masks = self.convert_to_mask(raw_preds, 0.0)
         labels += masks
     return ids, labels
コード例 #6
0
    def run_epoch(self, inputs, labels):
        """Runs an epoch of training.

        Args:
            inputs: np.ndarray of shape (n_samples, n_features)
            labels: np.ndarray of shape (n_samples, n_classes)
        Returns:
            average_loss: scalar. Average minibatch loss of model on epoch.
        """
        config = self.config
        n_minibatches, total_loss = 0, 0
        for input_batch, labels_batch in get_minibatches([inputs, labels], config.batch_size):
            n_minibatches += 1
            dy.renew_cg()
            '''Compute the loss of a batch'''
            # loss = []
            # for i in xrange(config.batch_size):
            #     input_t, labels_t = input_batch[i].reshape(1, config.n_features), labels_batch[i].reshape(1, config.n_classes)
            #     loss_t = self.train_on_batch(input_t, labels_t)
            #     loss.append(loss_t)
            # loss = dy.esum(loss) / config.batch_size
            loss = self.train_on_batch(input_batch, labels_batch) / config.batch_size

            loss.forward()
            loss.backward()
            self.trainer.update()

            total_loss += loss.value()
        return total_loss / n_minibatches
コード例 #7
0
    def run_epoch(self, sess, train_examples, dev_set):
        """Runs an epoch of training.
        train_examples: a list of np.arrays of the form [inputs, labels].
        dev_sets: a list of np.arrays of the form [inputs, labels].
        
        """
        n_minibatches, total_loss = 0, 0
        mask_batch = np.full((self.config.batch_size, self.config.max_length),
                             fill_value=True)
        for inputs_batch, labels_batch in get_minibatches(
                train_examples, self.config.batch_size):
            #train_examples is a list of (sentence, label, mask) tuples.
            #Each setence/label/mask is itself a list of integers or boolean(in case of mask).
            assert mask_batch.shape == inputs_batch.shape, "mask_batch size doesn't match with inputs_batch."
            n_minibatches += 1
            total_loss += self.train_on_batch(sess, inputs_batch, labels_batch,
                                              mask_batch)
            '''
            preds= self.predict_on_batch(sess, *batches)
            for pred in preds:
                print [self.helper.id2tok.get(np.argmax(t)) for t in pred]
            '''

            # batches is a list of input_batch, labels_batch, mask_batch.
            # That is, batches = [input_batch, labels_batch, mask_batch].

        train_loss = total_loss / n_minibatches
        dev_loss = self.evaluate(sess, dev_set)

        return train_loss, dev_loss
コード例 #8
0
 def evaluate(self, sess, examples):
     n_minibatches, total_loss = 0, 0
     for input_batch, labels_batch in get_minibatches(
             examples, self.config.batch_size):
         n_minibatches += 1
         total_loss += self.loss_on_batch(sess, input_batch, labels_batch)
     return total_loss / n_minibatches
コード例 #9
0
def minibatches(data, batch_size, pad_instance, config):
    data_dict = {"features": []}
    sent = []
    ch = []
    ch_len = []
    n_gram = []
    n_gram_len = []
    sent_len = []
    seq_len = []
    arc_pair_x = []
    arc_y = []
    pos_y = []
    dep_y = []

    for d in data:
        instances = d["instances"][:config.seq_len]
        ex = d["ex"]
        word_list = ex["word"][:config.sent_len]
        pos_list = ex["pos"][:config.sent_len]
        ch_list = ex["ch"][:config.sent_len]
        n_gram_list = ex["n_gram"][:config.sent_len]

        n_words = min(len(ex["word"]), config.sent_len)
        data_dict["features"].append([i[0] for i in instances])

        sent += [word_list + [config.pad_id] * (config.sent_len - n_words)]
        ch += [[
            i[:config.n_char] + [config.pad_id] * (config.n_char - len(i))
            for i in ch_list
        ] + [[config.pad_id] * config.n_char] * (config.sent_len - n_words)]
        n_gram += [[
            i[:config.n_char] + [config.pad_id] * (config.n_char - len(i))
            for i in n_gram_list
        ] + [[config.pad_id] * config.n_char] * (config.sent_len - n_words)]

        arc_y.append([int(i[2] / config.n_dep_classes) for i in instances])
        dep_y.append([int(i[2] % config.n_dep_classes) for i in instances])
        pos_y.append([i - config.pos_offset
                      for i in pos_list] + [0] * (config.sent_len - n_words))

        seq_len += [n_words * 2]
        sent_len += [n_words]
        ch_len += [[min(len(i), config.n_char)
                    for i in ch_list] + [0] * (config.sent_len - n_words)]
        n_gram_len += [[min(len(i), config.n_char) for i in n_gram_list] +
                       [0] * (config.sent_len - n_words)]

    data_dict["sent"] = sent
    data_dict["char"] = ch
    data_dict["n_gram"] = n_gram
    data_dict["char_len"] = ch_len
    data_dict["n_gram_len"] = n_gram_len
    data_dict["seq_len"] = seq_len
    data_dict["sent_len"] = sent_len
    data_dict["arc_y"] = arc_y
    data_dict["dep_y"] = dep_y
    data_dict["pos_y"] = pos_y

    return get_minibatches(data_dict, batch_size, pad_instance, config)
コード例 #10
0
 def run_train_epoch(self, sess, train_inputs, train_labels):
     # Iterate through the train inputs, and train the weights
     prog = Progbar(target=1 + len(train_labels) / self.config.batch_size)
     iterator = get_minibatches([train_inputs, train_labels],
                                self.config.batch_size)
     for i, (train_x, train_y) in enumerate(iterator):
         loss = self.train_on_batch(sess, train_x, train_y)
         prog.update(i + 1, [("train loss", loss)])
コード例 #11
0
    def run_epoch(self, sess, inputs, labels):
        """Runs an epoch of training.

        """
        n_minibatches, total_loss = 0, 0
        for input_batch, labels_batch in get_minibatches([inputs, labels], self.config.batch_size): #For my own benefit: get_minibatches randomly selects as many as batch_size from the total training samples. This implements SGD.
            n_minibatches += 1
            total_loss += self.train_on_batch(sess, input_batch, labels_batch)
        return total_loss / n_minibatches
コード例 #12
0
 def dev_model(self, sess, inputs, labels, seqs, mask):
     n_minibatches, total_loss = 0.0, 0.0
     for input_batch, labels_batch, seq_batch, mask_batch in get_minibatches(
         [inputs, labels, seqs, mask], self.config.batch_size):
         n_minibatches += 1
         batchloss = self.loss_on_batch(sess, input_batch, labels_batch,
                                        seq_batch, mask_batch)
         total_loss += batchloss
     return total_loss / n_minibatches
コード例 #13
0
    def run_epoch(self, sess, config, dataset, train_writer, merged):#按批次运行
        prog = Progbar(target=1 + len(dataset.train_inputs[0]) / config.batch_size)
        for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs, dataset.train_targets],
                                                               config.batch_size, is_multi_feature_input=True)):
            # print "input, outout: {}, {}".format(np.array(train_x).shape, np.array(train_y).shape)

            summary, loss = self.train_on_batch(sess, train_x, train_y, merged)#训练主函数
            prog.update(i + 1, [("train loss", loss)])
            # train_writer.add_summary(summary, global_step=i)
        return summary, loss  # Last batch
コード例 #14
0
 def test_model(self, sess, inputs, labels, seqs, mask):
     n_minibatches, total_loss, total_correct = 0.0, 0.0, 0.0
     pred = None
     lbls = None
     for input_batch, labels_batch, seq_batch, mask_batch in get_minibatches(
         [inputs, labels, seqs, mask], self.config.batch_size):
         n_minibatches += 1
         lbls, pred, batchloss, batchcorrect = self.test_on_batch(
             sess, input_batch, labels_batch, seq_batch, mask_batch)
         total_loss += batchloss
         total_correct += batchcorrect
     return lbls, pred, total_loss / n_minibatches, total_correct / n_minibatches
コード例 #15
0
    def evaluate_answer(self, session, dataset, sample=100, log=False):
        """
        Evaluate the model's performance using the harmonic mean of F1 and Exact Match (EM)
        with the set of true answer labels

        This step actually takes quite some time. So we can only sample 100 examples
        from either training or testing set.

        :param session: session should always be centrally managed in train.py
        :param dataset: a representation of our data, in some implementations, you can
                        pass in multiple components (arguments) of one dataset to this function
                        here the signature is [vq,vc,va]
        :param sample: how many examples in dataset we look at
        :param log: whether we print to std out stream
        :return:
        """
        for dataset_minix in get_minibatches(dataset, sample):
            dataset_feed = self.prepare_feed_input(dataset_minix)
            ind_out = self.answer_indices(session, dataset_feed)  #batch, 2
            answers = dataset_minix[2]
            gold = np.array(answers)  #batch, 2
            preds = np.array(ind_out).T  #batch, 2
            gold_lengths = gold[:, 1] - gold[:, 0] + 1  #batch
            preds_lengths = preds[:, 1] - preds[:, 0] + 1  #batch
            correct_mask = preds[:, 1] >= preds[:, 0]  #batch
            correct_end_indices = np.where(gold[:, 1] < preds[:, 1],
                                           gold[:, 1], preds[:, 1])
            correct_start_indices = np.where(gold[:, 0] > preds[:, 0],
                                             gold[:, 0], preds[:, 0])

            correct_lengths = np.clip(
                correct_end_indices - correct_start_indices + 1, 0, None)
            correct_lengths = correct_mask * correct_lengths
            preds_lengths = preds_lengths * correct_mask
            gl = np.sum(gold_lengths)
            pl = np.sum(preds_lengths)
            exact_preds = np.logical_and((correct_lengths == gold_lengths),
                                         (correct_lengths == preds_lengths))
            cl = np.sum(correct_lengths)
            p = 0.
            if pl != 0:
                p = cl / pl
            r = cl / gl
            f1 = 0.
            if (p + r) != 0.:
                f1 = 2 * p * r / (p + r)
            em = np.sum(exact_preds)
            if log:
                logging.info("F1: {}, EM: {}, for {} samples".format(
                    f1, em, sample))
            break
        return f1, em
コード例 #16
0
 def run_dev_epoch(self, sess, dev_inputs, dev_labels):
     # Iterate through the dev inputs and print the accuracy
     prf = []
     print "Evaluating on dev set"
     prog = Progbar(target=1 + len(dev_labels) / self.config.batch_size)
     iterator = get_minibatches([dev_inputs, dev_labels],
                                self.config.batch_size)
     for i, (train_x, train_y) in enumerate(iterator):
         prf.append(self.evaluate_on_batch(sess, train_x, train_y))
         prog.update(i + 1)
     prf = np.mean(np.array(prf), axis=0)
     print "Precision={:.2f}, Recall={:.2f}, F1={:.2f}".format(
         prf[0], prf[1], prf[2])
     return prf[2]
コード例 #17
0
 def run_epoch(self, sess, inputs, labels):
     """Runs an epoch of training.
     Args:
         sess: tf.Session() object
         inputs: np.ndarray of shape (n_samples, n_features)
         labels: np.ndarray of shape (n_samples, n_classes)
     Returns:
         average_loss: scalar. Average minibatch loss of model on epoch.
     """
     n_minibatches, total_loss = 0, 0
     for input_batch, labels_batch in get_minibatches([inputs, labels], self.config.batch_size):
         n_minibatches += 1
         total_loss += self.train_on_batch(sess, input_batch, labels_batch)
     return total_loss / n_minibatches
コード例 #18
0
    def run_test_epoch(self, sess, dataset):
        print "\nEvaluating on Test set",
        total_correct = 0
        total_loss = []
        predicted_ids = []
        target_ids = []

        incorrect_predictions_path = os.path.join(
            DataConfig.data_dir_path, DataConfig.model_dir,
            DataConfig.test_incorrect_predictions_file)
        predictions_path = os.path.join(DataConfig.data_dir_path,
                                        DataConfig.model_dir,
                                        DataConfig.test_predictions_file)

        if os.path.exists(predictions_path):
            os.remove(predictions_path)
        if os.path.exists(incorrect_predictions_path):
            os.remove(incorrect_predictions_path)

        offset = 0

        for i, (test_x, test_y) in enumerate(
                get_minibatches([dataset.test_inputs, dataset.test_targets],
                                self.config.test_batch_size,
                                shuffle=False,
                                is_multi_feature_input=True)):
            batch_feed = self.create_feed_dict(test_x,
                                               labels_batch=test_y,
                                               is_training=False)
            batch_predictions, batch_loss, batch_accuracy = sess.run(
                [self.pred, self.loss, self.accuracy], feed_dict=batch_feed)

            total_correct += (batch_accuracy * len(test_y))
            total_loss.append(batch_loss)

            predicted_ids.extend(list(np.argmax(batch_predictions, axis=1)))
            target_ids.extend(list(np.argmax(test_y, axis=1)))

            self.dump_incorrect_predictions(batch_predictions, test_y, offset,
                                            incorrect_predictions_path,
                                            predictions_path)
            offset += len(test_y)

        test_loss = sum(total_loss) / float(len(total_loss))
        test_accuracy = float(total_correct) / len(dataset.test_targets)

        test_f1_score = get_weighted_f1_score(predicted_ids, target_ids)

        return test_loss, test_accuracy, test_f1_score
コード例 #19
0
ファイル: q1_classifier.py プロジェクト: ziyaochen/CS224n
    def run_epoch(self, sess, inputs, labels):
        """Runs an epoch of training.

        Args:
            sess: tf.Session() object
            inputs: np.ndarray of shape (n_samples, n_features)
            labels: np.ndarray of shape (n_samples, n_classes)
        Returns:
            average_loss: scalar. Average minibatch loss of model on epoch.
        """
        n_minibatches, total_loss = 0, 0
        for input_batch, labels_batch in get_minibatches([inputs, labels], self.config.batch_size):
            n_minibatches += 1
            total_loss += self.train_on_batch(sess, input_batch, labels_batch)
        return total_loss / n_minibatches
コード例 #20
0
    def run_epoch(self, inputs, labels):
        config = self.config
        n_minibatches, total_loss = 0, 0
        for input_batch, labels_batch in get_minibatches([inputs, labels],
                                                         config.batch_size):
            n_minibatches += 1
            dy.renew_cg()
            loss = self.train_on_batch(input_batch,
                                       labels_batch) / config.batch_size

            loss.forward()
            loss.backward()
            self.trainer.update()

            total_loss += loss.value()
        return total_loss / n_minibatches
コード例 #21
0
    def run_epoch(self, sess, examples):
        """Runs an epoch of training.

        Args:
            sess: tf.Session() object
            inputs: np.ndarray of shape (n_samples, n_features)
            labels: np.ndarray of shape (n_samples, n_classes)
        Returns:
            average_loss: scalar. Average minibatch loss of model on epoch.
        """
        n_minibatches, total_loss = 0, 0
        for input_batch, labels_batch in get_minibatches(
                examples, self.config.batch_size
        ):  #For my own benefit: get_minibatches randomly selects as many as batch_size of the total training samples. This implements SGD.
            n_minibatches += 1
            total_loss += self.train_on_batch(sess, input_batch, labels_batch)
        return total_loss / n_minibatches
コード例 #22
0
    def run_epoch(self, dataset, sess):
        '''dataset is a list [q,c,a, c_sent_masks]'''

        n_minibatches = 0.
        total_loss = 0.

        for dataset_mini in get_minibatches(dataset, self.minibatch_size):
            n_minibatches += 1
            dataset_feed = self.prepare_feed_input(dataset_mini)
            feed_dict = self.feed_dict(dataset_feed)

            output = [self.optimizer_op, self.loss, self.global_norm]
            _, loss, global_norm = sess.run(output, feed_dict)
            if not n_minibatches % 100:
                print("n_minibatch = {}".format(n_minibatches),
                      "loss: {}".format(loss),
                      "global_norm{}".format(global_norm))
            total_loss += loss
        return total_loss / n_minibatches
コード例 #23
0
 def run_epoch(self, dataset, sess):
     '''dataset is a list [q,c,a]'''
     
     n_minibatches = 0.
     total_loss = 0.
     
     for dataset_mini in get_minibatches(dataset, self.minibatch_size):
         dat_pad, dat_pad_masks = self.pad(dataset_mini)
         dat_pad_masks = dat_pad_masks[:-1]
         n_minibatches += 1
         dataset_i = [dat_pad, dat_pad_masks]
         feed_dict = self.feed_dict(dataset_i)
         
         output = [self.optimizer_op , self.loss, self.global_norm]
         _, loss, global_norm = sess.run(output, feed_dict)
         if not n_minibatches % 100:
             print("n_minibatch = {}".format(n_minibatches), "loss: {}".format(loss), "global_norm{}".format(global_norm))
         total_loss += loss  
     return total_loss/n_minibatches
コード例 #24
0
    def evaluate_answer(self, session, dataset, sample=100, log=False):
        """
        Evaluate the model's performance using the harmonic mean of F1 and Exact Match (EM)
        with the set of true answer labels

        This step actually takes quite some time. So we can only sample 100 examples
        from either training or testing set.

        :param session: session should always be centrally managed in train.py
        :param dataset: a representation of our data, in some implementations, you can
                        pass in multiple components (arguments) of one dataset to this function
                        here the signature is [vq,vc,va]
        :param sample: how many examples in dataset we look at
        :param log: whether we print to std out stream
        :return:
        """
        for dataset_s in get_minibatches(dataset, sample):
            data_pad, data_pad_masks = self.pad(dataset_s)
            q, c, a = data_pad
            q_masks, c_masks = data_pad_masks[:2]
            dataset_a = [[q, c, a], [q_masks, c_masks]]
            
            ind_out, answers = np.array(self.answer_indices(session, dataset_a)) #batch, max_length
                
            gold = np.array(answers) #batch, max_lengt
            pp = np.sum(ind_out)
            gp = np.sum(gold)
            correct_predictions = np.logical_and(ind_out, gold)
            exact_preds = ind_out == gold
            cp = np.sum(correct_predictions)
            p = 0.
            if pp != 0.:
                p = cp/pp
            r = cp/gp
            f1 = 0.
            if (p + r) != 0.:
                f1 = 2*p*r/(p+r)
            em = np.sum(np.all(exact_preds, axis = 1))/sample 
            if log:
                logging.info("F1: {}, EM: {}, for {} samples".format(f1, em, sample))
            break
        return f1, em
コード例 #25
0
    def run_epoch(self, sess, inputs, labels):
        """Runs an epoch of training.

        Args:
            sess: tf.Session() object
            inputs: np.ndarray of shape (n_samples, n_features)
            labels: np.ndarray of shape (n_samples, n_classes)
        Returns:
            average_loss: scalar. Average minibatch loss of model on epoch.
        """
        n_minibatches, total_loss = 0, 0
        inputs_shape = inputs.shape
        prog = Progbar(target=1 + inputs_shape[0] / self.config.batch_size)
        for input_batch, labels_batch in get_minibatches(
            [inputs, labels], self.config.batch_size):
            n_minibatches += 1
            loss = self.train_on_batch(sess, input_batch, labels_batch)
            total_loss += loss
            prog.update(n_minibatches, [("train loss", loss)])
        return total_loss / n_minibatches
コード例 #26
0
    def evaluate(self, sess, inputs, labels, metric=None):
        from utils.data_utils import Precision
        from utils.data_utils import get_iou_vector
        IoUs = []
        IoUs2 = []
        Precisions = []

        for input_batch, labels_batch in get_minibatches(
            [inputs, labels], self.config.batch_size):
            loss = self.train_on_batch(sess, input_batch, labels_batch)
            raw_preds = self.predict_on_batch(sess, input_batch)
            #IoUbatch = get_iou_vector(labels_batch.astype(int), (raw_preds>0.5).astype(int))
            #IoUs = IoUs + IoUbatch
            pBatch, IoUbatch = Precision((raw_preds > 0.5).astype(int),
                                         labels_batch)
            Precisions = Precisions + pBatch.tolist()
            IoUs = IoUs + IoUbatch.tolist()
            IoUs2 = IoUs2 + get_iou_vector(labels_batch,
                                           (raw_preds > 0.5).astype(int))
        return np.mean(IoUs), np.mean(np.asarray(IoUs2)), np.mean(
            np.mean(Precisions))
コード例 #27
0
    def run_epoch(self, sess, config, dataset, train_writer, merged):
        prog = Progbar(target=1 +
                       len(dataset.train_inputs[0]) / config.batch_size)
        for i, (train_x, train_y) in enumerate(
                get_minibatches([dataset.train_inputs, dataset.train_targets],
                                config.batch_size,
                                is_multi_feature_input=True)):
            print "word input, char input, outout: {}, {}, {}".format(
                np.array(train_x[0]).shape,
                np.array(train_x[1]).shape,
                np.array(train_y).shape)

            summary, loss = self.train_on_batch(sess, train_x, train_y, merged)
            prog.update(i + 1, [("train loss", loss)])

        # feed = self.create_feed_dict(dataset.train_inputs, labels_batch=dataset.train_targets,
        #                              keep_prob_word=self.config.keep_prob, keep_prob_fc=self.config.keep_prob_fc,
        #                              is_training=False)
        # train_accuracy = sess.run(self.accuracy, feed_dict=feed)
        # print "- train Accuracy: {:.2f}".format(train_accuracy * 100.0)

        return summary, loss  # returns for Last batch
コード例 #28
0
    def run_epoch(self, sess, inputs, input_masks, labels, label_masks,
                  train_writer, step_i, should_output_wavefiles):
        """Runs an epoch of training.

				Args:
						sess: tf.Session() object
						inputs: A list of length num_examples with float np.ndarray entries of shape (max_num_frames, num_mfcc_coeffs) 
						input_masks: A list of length num_examples with boolean np.darray entries of shape (max_num_frames,)
						labels: A list of length num_examples with float np.ndarray entries of shape (max_num_frames, num_mfcc_coeffs)	
						label_masks: A list of length num_examples with boolean np.darray entries of shape (max_num_frames,)
						train_writer: a tf.summary.FileWriter object
						step_i: The global number of steps taken so far (i.e., batches we've done a full forward
										and backward pass on) 
				Returns:
						average_loss: scalar. Average minibatch loss of model on epoch.
						step_i: The global number of steps taken so far (i.e., batches we've done a full forward
										and backward pass on)
				"""
        n_minibatches, total_loss = 0, 0
        for input_batch, input_masks_batch, labels_batch, label_masks_batch in \
              get_minibatches([inputs, input_masks, labels, label_masks], self.config.batch_size):

            # We only evaluate and output wavefiles on the first batch of the epoch
            should_output_wavefiles_batch = False
            if n_minibatches == 0:
                should_output_wavefiles_batch = True
            batch_loss, summary = self.train_on_batch(
                sess, input_batch, input_masks_batch, labels_batch,
                label_masks_batch, should_output_wavefiles
                and should_output_wavefiles_batch)
            total_loss += batch_loss

            n_minibatches += 1
            train_writer.add_summary(summary, step_i)
            step_i += 1

        return total_loss / n_minibatches, step_i
コード例 #29
0
    def predict(self, sess, inputs, ids):
        labels = {}
        for ix, inputs_batch in enumerate(
                get_minibatches(inputs, self.config.batch_size,
                                shuffle=False)):
            raw_preds = self.predict_on_batch(sess, inputs_batch)
            masks = self.convert_to_mask(raw_preds, 0.5)
            from utils.data_utils import rle_encode
            from PIL import Image
            import sys
            import numpy as np
            for kx, mask in enumerate(masks):
                #im = Image.fromarray(np.uint8(mask*255))
                #im.save("/results/preds/"+str(ids[ix+kx])+".png", "PNG")
                labels[ids[ix * self.config.batch_size +
                           kx]] = rle_encode(mask)

        import pandas as pd
        df = pd.DataFrame.from_dict(labels, orient="index")
        df.index.names = ['id']
        df.columns = ['rle_mask']
        df.to_csv("../submission/results.csv")

        return ids, labels
コード例 #30
0
    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        #new_saver = tf.train.Saver()
        #new_saver.restore(sess, './models/regularized_RNN_100d-1-29')
        saver = tf.train.Saver()
        for epoch in range(n_epochs):
            print("Epoch: ", epoch)
            #n_minibatches = 1 + len(train_embeddings) / batch_size
            #prog = tf.keras.utils.Progbar(target=n_minibatches)

            for i, (s1, s2, l, m1, m2) in enumerate(
                    get_minibatches([[ex[0] for ex in train_input],
                                     [ex[1] for ex in train_input],
                                     [ex[2] for ex in train_input],
                                     [ex[3] for ex in train_input],
                                     [ex[4] for ex in train_input]],
                                    batch_size)):
                feed_dict = {
                    input_placeholder_s1: s1,
                    input_placeholder_s2: s2,
                    mask_placeholder_s1: m1,
                    mask_placeholder_s2: m2,
                    labels_placeholder: l,
                    dropout_placeholder: dropout
                }
                sess.run(train_op, feed_dict=feed_dict)
                _, batch_loss = sess.run([train_op, loss], feed_dict=feed_dict)
                #print "Gradient: ", global_norm.eval()
                #_, batch_loss = sess.run([train_op(loss), loss], feed_dict=feed_dict)
                #print pred, labels_placeholder, loss, losses
コード例 #31
0
def main():
    print "Starting matlab ... type in your password if prompted"
    eng = matlab.engine.start_matlab()
    eng.addpath('../invMFCCs_new')
    print "Done starting matlab"

    batch_size = 32
    n_epochs = 50
    lr = 1e-3
    max_num_frames = 706
    num_mfcc_coeffs = 25
    sample_rate = 16000.0
    num_features = max_num_frames * num_mfcc_coeffs
    state_size_1 = 25
    state_size_2 = 50
    state_size_3 = 50
    state_size_4 = 25

    inputs, labels = preprocess_data(num_mfcc_coeffs, max_num_frames)
    logs_path = "tensorboard/" + strftime("%Y_%m_%d_%H_%M_%S", gmtime())

    input_placeholder = tf.placeholder(tf.float32,
                                       (None, max_num_frames, num_mfcc_coeffs))
    labels_placeholder = tf.placeholder(
        tf.float32, (None, max_num_frames, num_mfcc_coeffs))

    xavier = tf.contrib.layers.xavier_initializer()
    W1 = tf.get_variable("W1",
                         shape=(num_features, state_size_1),
                         initializer=xavier)
    b1 = tf.get_variable("b1", shape=(1, state_size_1))

    W2 = tf.get_variable("W2",
                         shape=(state_size_1, state_size_2),
                         initializer=xavier)
    b2 = tf.get_variable("b2", shape=(1, state_size_2))

    W3 = tf.get_variable("W3",
                         shape=(state_size_2, state_size_3),
                         initializer=xavier)
    b3 = tf.get_variable("b3", shape=(1, state_size_3))

    W4 = tf.get_variable("W4",
                         shape=(state_size_3, state_size_4),
                         initializer=xavier)
    b4 = tf.get_variable("b4", shape=(1, state_size_4))

    W5 = tf.get_variable("W5",
                         shape=(state_size_4, num_features),
                         initializer=xavier)
    b5 = tf.get_variable("b5", shape=(1, num_features))

    # forward propagation
    mfcc_preds = forward_prop(W1, W2, W3, W4, W5, b1, b2, b3, b4, b5,
                              input_placeholder, max_num_frames,
                              num_mfcc_coeffs)

    # backward propagation
    loss = tf.reduce_mean(tf.squared_difference(mfcc_preds,
                                                labels_placeholder))
    updates = tf.train.GradientDescentOptimizer(lr).minimize(loss)

    # run SGD
    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)
    train_writer = tf.summary.FileWriter(logs_path + '/train', sess.graph)

    for epoch in range(n_epochs):
        start_time = time()
        # train with each batch
        n_minibatches = 0
        for input_batch, labels_batch in get_minibatches([inputs, labels],
                                                         batch_size):
            n_minibatches += 1
            feed = {
                input_placeholder: input_batch,
                labels_placeholder: labels_batch
            }
            sess.run(updates, feed_dict=feed)

            #train_writer.add_summary(summary, epoch_count)

        duration = time() - start_time
        print 'Epoch ' + str(epoch) + ' : loss = ' + str(
            loss.eval(session=sess,
                      feed_dict=feed)) + ' (' + str(duration) + ' sec)'

        predicted_mfccs_batch = mfcc_preds.eval(session=sess, feed_dict=feed)
        for i in range(predicted_mfccs_batch.shape[0]):
            predicted_mfccs_transposed = np.transpose(
                predicted_mfccs_batch[i, :, :])

            # MFCC features need to be a numpy array of shape (num_coefficients x num_frames) in order to be passed to the invmelfcc function
            inverted_wav_data = eng.invmelfcc(
                matlab.double(predicted_mfccs_transposed.tolist()),
                sample_rate, num_mfcc_coeffs)

            # eng.soundsc(inverted_wav_data, sample_rate, nargout=0)
            inverted_wav_data = np.squeeze(np.array(inverted_wav_data))

            # Scales the waveform to be between -1 and 1
            maxVec = np.max(inverted_wav_data)
            minVec = np.min(inverted_wav_data)
            inverted_wav_data = ((inverted_wav_data - minVec) /
                                 (maxVec - minVec) - 0.5) * 2

            wav.write('learned_wav' + str(i) + '.wav', sample_rate,
                      inverted_wav_data)

    sess.close()