Example #1
0
def predict_loop(model, data, batch_size=128, callbacks=[], log=print, f=None):
    if f is None:
        f = model._predict
    ins = [data[name] for name in model.input_order]
    nb_sample = len(ins[0])
    outs = []
    batches = km.make_batches(nb_sample, batch_size)
    index_array = np.arange(nb_sample)
    nb_batch = len(batches)
    for batch_index, (batch_start, batch_end) in enumerate(batches):
        if log is not None:
            s = progress(batch_index, nb_batch)
            if s is not None:
                log(s)
        for callback in callbacks:
            callback(batch_index, len(batches))
        batch_ids = list(index_array[batch_start:batch_end])
        ins_batch = km.slice_X(ins, batch_ids)

        batch_outs = f(*ins_batch)
        if type(batch_outs) != list:
            batch_outs = [batch_outs]

        if batch_index == 0:
            for batch_out in batch_outs:
                shape = (nb_sample, ) + batch_out.shape[1:]
                outs.append(np.zeros(shape))

        for i, batch_out in enumerate(batch_outs):
            outs[i][batch_start:batch_end] = batch_out

    return dict(zip(model.output_order, outs))
Example #2
0
def predict_loop(model, data, batch_size=128, callbacks=[], log=print, f=None):
    if f is None:
        f = model._predict
    ins = [data[name] for name in model.input_order]
    nb_sample = len(ins[0])
    outs = []
    batches = km.make_batches(nb_sample, batch_size)
    index_array = np.arange(nb_sample)
    nb_batch = len(batches)
    for batch_index, (batch_start, batch_end) in enumerate(batches):
        if log is not None:
            s = progress(batch_index, nb_batch)
            if s is not None:
                log(s)
        for callback in callbacks:
            callback(batch_index, len(batches))
        batch_ids = list(index_array[batch_start:batch_end])
        ins_batch = km.slice_X(ins, batch_ids)

        batch_outs = f(*ins_batch)
        if type(batch_outs) != list:
            batch_outs = [batch_outs]

        if batch_index == 0:
            for batch_out in batch_outs:
                shape = (nb_sample,) + batch_out.shape[1:]
                outs.append(np.zeros(shape))

        for i, batch_out in enumerate(batch_outs):
            outs[i][batch_start:batch_end] = batch_out

    return dict(zip(model.output_order, outs))
Example #3
0
def predict_on_batch(model, X, normalize=None, batch_size=32, shuffle=False, verbose=0):
    predictions = []

    nb_samples = X.shape[0]

    if shuffle:
        X = shuffle_data(X)

    # predict
    batches = make_batches(nb_samples, batch_size)
    for batch_index, (batch_start, batch_end) in enumerate(batches):
        batch_logs = {}
        batch_logs['batch'] = batch_index
        batch_logs['size'] = batch_end - batch_start

        # load the actual images; X only contains paths
        X_batch = load_samples(X[batch_start:batch_end], batch_end - batch_start)
        X_batch = X_batch.astype("float32") / 255
        if normalize:
            X_batch = X_batch - normalize[0] # mean
            X_batch /= normalize[1] # std

        predictions += [model.predict_classes(X_batch, verbose=verbose).tolist()]

    predictions = np.hstack(predictions).tolist()

    return predictions
Example #4
0
def predict_on_batch(model,
                     X,
                     normalize=None,
                     batch_size=32,
                     shuffle=False,
                     verbose=0):
    predictions = []

    nb_samples = X.shape[0]

    if shuffle:
        X = shuffle_data(X)

    # predict
    batches = make_batches(nb_samples, batch_size)
    for batch_index, (batch_start, batch_end) in enumerate(batches):
        batch_logs = {}
        batch_logs['batch'] = batch_index
        batch_logs['size'] = batch_end - batch_start

        # load the actual images; X only contains paths
        X_batch = load_samples(X[batch_start:batch_end],
                               batch_end - batch_start)
        X_batch = X_batch.astype("float32") / 255
        if normalize:
            X_batch = X_batch - normalize[0]  # mean
            X_batch /= normalize[1]  # std

        predictions += [
            model.predict_classes(X_batch, verbose=verbose).tolist()
        ]

    predictions = np.hstack(predictions).tolist()

    return predictions
Example #5
0
def train_on_batch(model, X, y, nb_classes,
                   callbacks=None, normalize=None, batch_size=32, class_weight=None, class_acc=True, shuffle=False):
    loss = []
    acc = []
    size = []

    nb_samples = X.shape[0]
    out_labels = ['loss', 'acc']

    if shuffle:
        X, y = shuffle_data(X, y)

    # batch train
    batches = make_batches(nb_samples, batch_size)
    for batch_index, (batch_start, batch_end) in enumerate(batches):
        batch_logs = {}
        batch_logs['batch'] = batch_index
        batch_logs['size'] = batch_end - batch_start

        if callbacks:
            callbacks.on_batch_begin(batch_index, batch_logs)

        # load the actual images; X only contains paths
        X_batch = load_samples(X[batch_start:batch_end], batch_end - batch_start)
        X_batch = X_batch.astype("float32") / 255

        y_batch = y[batch_start:batch_end]
        y_batch = to_categorical(y_batch, nb_classes)

        if normalize:
            X_batch = X_batch - normalize[0] # mean
            X_batch /= normalize[1] # std

        # calculates the overall loss and accuracy
        outs = model.train_on_batch(X_batch, y_batch, accuracy=True, class_weight=class_weight)

        if type(outs) != list:
            outs = [outs]
        for l, o in zip(out_labels, outs):
            batch_logs[l] = o

        # calculates the accuracy per class
        if class_acc:
            result = calc_class_acc(model, X[batch_start:batch_end], y[batch_start:batch_end], nb_classes,
                                    normalize=normalize,
                                    batch_size=batch_size,
                                    keys=['acc'])
            batch_logs['class_acc'] = result['acc']

        if callbacks:
            callbacks.on_batch_end(batch_index, batch_logs)

    return loss, acc, size
Example #6
0
def write_loop(ins, fun, write_fun, batch_size=128, callbacks=[], log=print):
    nb_sample = len(ins[0])
    batches = km.make_batches(nb_sample, batch_size)
    index_array = np.arange(nb_sample)
    nb_batch = len(batches)
    for batch_index, (batch_start, batch_end) in enumerate(batches):
        if log is not None:
            s = progress(batch_index, nb_batch)
            if s is not None:
                log(s)
        for callback in callbacks:
            callback(batch_index, len(batches))
        batch_ids = list(index_array[batch_start:batch_end])
        ins_batch = km.slice_X(ins, batch_ids)

        batch_outs = fun(*ins_batch)
        write_fun(batch_outs, batch_start, batch_end)
Example #7
0
def write_loop(ins, fun, write_fun, batch_size=128, callbacks=[], log=print):
    nb_sample = len(ins[0])
    batches = km.make_batches(nb_sample, batch_size)
    index_array = np.arange(nb_sample)
    nb_batch = len(batches)
    for batch_index, (batch_start, batch_end) in enumerate(batches):
        if log is not None:
            s = progress(batch_index, nb_batch)
            if s is not None:
                log(s)
        for callback in callbacks:
            callback(batch_index, len(batches))
        batch_ids = list(index_array[batch_start:batch_end])
        ins_batch = km.slice_X(ins, batch_ids)

        batch_outs = fun(*ins_batch)
        write_fun(batch_outs, batch_start, batch_end)
Example #8
0
def test_on_batch(model,
                  X,
                  y,
                  nb_classes,
                  normalize=None,
                  batch_size=32,
                  shuffle=False):
    loss = []
    acc = []
    size = []

    nb_samples = X.shape[0]

    if shuffle:
        X, y = shuffle_data(X, y)

    # batch test
    batches = make_batches(nb_samples, batch_size)
    for batch_index, (batch_start, batch_end) in enumerate(batches):
        batch_logs = {}
        batch_logs['batch'] = batch_index
        batch_logs['size'] = batch_end - batch_start

        # load the actual images; X only contains paths
        X_batch = load_samples(X[batch_start:batch_end],
                               batch_end - batch_start)
        X_batch = X_batch.astype("float32") / 255

        y_batch = y[batch_start:batch_end]
        y_batch = to_categorical(y_batch, nb_classes)

        if normalize:
            X_batch = X_batch - normalize[0]  # mean
            X_batch /= normalize[1]  # std

        outs = model.test_on_batch(X_batch, y_batch, accuracy=True)

        # logging of the loss, acc and batch_size
        loss += [float(outs[0])]
        acc += [float(outs[1])]
        size += [batch_end - batch_start]

    return loss, acc, size
Example #9
0
def test_on_batch(model, X, y, nb_classes, normalize=None, batch_size=32, shuffle=False):
    loss = []
    acc = []
    size = []

    nb_samples = X.shape[0]

    if shuffle:
        X, y = shuffle_data(X, y)

    # batch test
    batches = make_batches(nb_samples, batch_size)
    for batch_index, (batch_start, batch_end) in enumerate(batches):
        batch_logs = {}
        batch_logs['batch'] = batch_index
        batch_logs['size'] = batch_end - batch_start

        # load the actual images; X only contains paths
        X_batch = load_samples(X[batch_start:batch_end], batch_end - batch_start)
        X_batch = X_batch.astype("float32") / 255

        y_batch = y[batch_start:batch_end]
        y_batch = to_categorical(y_batch, nb_classes)

        if normalize:
            X_batch = X_batch - normalize[0] # mean
            X_batch /= normalize[1] # std

        outs = model.test_on_batch(X_batch, y_batch, accuracy=True)

        # logging of the loss, acc and batch_size
        loss += [float(outs[0])]
        acc += [float(outs[1])]
        size += [batch_end - batch_start]

    return loss, acc, size
def batch_generator(word_crop, max_len, batch_size, doc_ids, words, word_metas, pos_tags, dependencies, parsetrees, rel_ids, rel_parts, rel_types, rel_senses, words2id, words2id_size, skipgram_offsets, pos_tags2id, pos_tags2id_size, rel_types2id, rel_types2id_size, rel_senses2id, rel_senses2id_size, rel_marking2id, rel_marking2id_size):
    """Batch generator where each sample represents a different discourse relation."""

    rel_ids = list(rel_ids)  # copy list
    while True:
        # shuffle relations on each epoch
        random.shuffle(rel_ids)

        for batch_start, batch_end in make_batches(len(rel_ids), batch_size):

            # prepare batch data
            _rel_id = []
            _token_start = []
            _token_end = []
            x_words_pad = []
            x_words_rand = []
            x_skipgram = []
            x_pos_tags = []
            x_rel_types = []
            x_rel_senses = []
            x_rel_focus = []
            x_rel_marking = []
            x_rel_types_one = []
            x_rel_senses_one = []
            for rel_id in rel_ids[batch_start:batch_end]:
                doc_id = rel_parts[rel_id]['DocID']
                words_len = len(words[doc_id])
                token_min = rel_parts[rel_id]['TokenMin']
                token_max = rel_parts[rel_id]['TokenMax']
                token_start, token_end = token_boundary_random(token_min, token_max, word_crop, words_len)
                _rel_id.append(rel_id)
                _token_start.append(token_start)
                _token_end.append(token_end)

                x1_words_pad, x1_words_rand, x1_skipgram, x1_pos_tags, x1_rel_types, x1_rel_senses, x1_rel_focus, x1_rel_marking, x1_rel_types_one, x1_rel_senses_one = relation_sample(rel_id, token_start, token_end, max_len, doc_ids, words, word_metas, pos_tags, dependencies, parsetrees, rel_ids, rel_parts, rel_types, rel_senses, words2id, words2id_size, skipgram_offsets, pos_tags2id, pos_tags2id_size, rel_types2id, rel_types2id_size, rel_senses2id, rel_senses2id_size, rel_marking2id, rel_marking2id_size)
                x_words_pad.append(x1_words_pad)
                x_words_rand.append(x1_words_rand)
                x_skipgram.append(x1_skipgram)
                x_pos_tags.append(x1_pos_tags)
                x_rel_types.append(x1_rel_types)
                x_rel_senses.append(x1_rel_senses)
                x_rel_focus.append(x1_rel_focus)
                x_rel_marking.append(x1_rel_marking)
                x_rel_types_one.append(x1_rel_types_one)
                x_rel_senses_one.append(x1_rel_senses_one)

            # yield batch
            yield {
                '_rel_id': _rel_id,
                '_token_start': _token_start,
                '_token_end': _token_end,
                'x_words_pad': np.asarray(x_words_pad, dtype=np.int32),
                'x_words_rand': np.asarray(x_words_rand, dtype=np.int32),
                'x_rel_focus': np.asarray(x_rel_focus, dtype=np.int8),
                'x_rel_marking': np.asarray(x_rel_marking, dtype=np.int8),
                'x_skipgram': np.asarray(x_skipgram, dtype=np.int8),
                'x_pos_tags': np.asarray(x_pos_tags, dtype=np.int8),
                'x_rel_types': np.asarray(x_rel_types, dtype=np.float32),
                'x_rel_senses': np.asarray(x_rel_senses, dtype=np.float32),
                'x_rel_types_one': np.asarray(x_rel_types_one, dtype=np.int8),
                'x_rel_senses_one': np.asarray(x_rel_senses_one, dtype=np.int8),
                'x_rel_focus_out': np.asarray(x_rel_focus, dtype=np.int8),
            }
def batch_generator(word_crop, max_len, batch_size, doc_ids, words, word_metas,
                    pos_tags, dependencies, parsetrees, rel_ids, rel_parts,
                    rel_types, rel_senses, words2id, words2id_size,
                    skipgram_offsets, pos_tags2id, pos_tags2id_size,
                    rel_types2id, rel_types2id_size, rel_senses2id,
                    rel_senses2id_size, rel_marking2id, rel_marking2id_size):
    """Batch generator where each sample represents a different discourse relation."""

    rel_ids = list(rel_ids)  # copy list
    while True:
        # shuffle relations on each epoch
        random.shuffle(rel_ids)

        for batch_start, batch_end in make_batches(len(rel_ids), batch_size):

            # prepare batch data
            _rel_id = []
            _token_start = []
            _token_end = []
            x_words_pad = []
            x_words_rand = []
            x_skipgram = []
            x_pos_tags = []
            x_rel_types = []
            x_rel_senses = []
            x_rel_focus = []
            x_rel_marking = []
            x_rel_types_one = []
            x_rel_senses_one = []
            for rel_id in rel_ids[batch_start:batch_end]:
                doc_id = rel_parts[rel_id]['DocID']
                words_len = len(words[doc_id])
                token_min = rel_parts[rel_id]['TokenMin']
                token_max = rel_parts[rel_id]['TokenMax']
                token_start, token_end = token_boundary_random(
                    token_min, token_max, word_crop, words_len)
                _rel_id.append(rel_id)
                _token_start.append(token_start)
                _token_end.append(token_end)

                x1_words_pad, x1_words_rand, x1_skipgram, x1_pos_tags, x1_rel_types, x1_rel_senses, x1_rel_focus, x1_rel_marking, x1_rel_types_one, x1_rel_senses_one = relation_sample(
                    rel_id, token_start, token_end, max_len, doc_ids, words,
                    word_metas, pos_tags, dependencies, parsetrees, rel_ids,
                    rel_parts, rel_types, rel_senses, words2id, words2id_size,
                    skipgram_offsets, pos_tags2id, pos_tags2id_size,
                    rel_types2id, rel_types2id_size, rel_senses2id,
                    rel_senses2id_size, rel_marking2id, rel_marking2id_size)
                x_words_pad.append(x1_words_pad)
                x_words_rand.append(x1_words_rand)
                x_skipgram.append(x1_skipgram)
                x_pos_tags.append(x1_pos_tags)
                x_rel_types.append(x1_rel_types)
                x_rel_senses.append(x1_rel_senses)
                x_rel_focus.append(x1_rel_focus)
                x_rel_marking.append(x1_rel_marking)
                x_rel_types_one.append(x1_rel_types_one)
                x_rel_senses_one.append(x1_rel_senses_one)

            # yield batch
            yield {
                '_rel_id': _rel_id,
                '_token_start': _token_start,
                '_token_end': _token_end,
                'x_words_pad': np.asarray(x_words_pad, dtype=np.int32),
                'x_words_rand': np.asarray(x_words_rand, dtype=np.int32),
                'x_rel_focus': np.asarray(x_rel_focus, dtype=np.int8),
                'x_rel_marking': np.asarray(x_rel_marking, dtype=np.int8),
                'x_skipgram': np.asarray(x_skipgram, dtype=np.int8),
                'x_pos_tags': np.asarray(x_pos_tags, dtype=np.int8),
                'x_rel_types': np.asarray(x_rel_types, dtype=np.float32),
                'x_rel_senses': np.asarray(x_rel_senses, dtype=np.float32),
                'x_rel_types_one': np.asarray(x_rel_types_one, dtype=np.int8),
                'x_rel_senses_one': np.asarray(x_rel_senses_one,
                                               dtype=np.int8),
                'x_rel_focus_out': np.asarray(x_rel_focus, dtype=np.int8),
            }
Example #12
0
        loss_best = loss_avg
        loss_min = float(stats.history[-1]['loss_min'])
        loss_max = float(stats.history[-1]['loss_max'])
        log.info("  continue from epoch {}, loss avg: {:.4f}, min: {:.4f}, max: {:.4f}".format(epoch_i, loss_avg, loss_min, loss_max))

    # train model
    while epoch_i < epochs:
        epoch_i += 1
        time_0 = time.time()
        log.info("train epoch {}/{} ({} docs)".format(epoch_i, epochs, len(train_doc_ids)))

        # one document per batch update
        loss_avg = 0.
        loss_min = np.inf
        loss_max = -np.inf
        batches = make_batches(len(train_doc_ids), batch_size)
        for batch_i, (batch_start, batch_end) in enumerate(batches):
            # prepare batch data
            doc_ids = train_doc_ids[batch_start:batch_end]

            x_word_pad, x_word_rand = build_x_word(doc_ids, train_words, word2id, word_crop, max_len)
            #print("x_word_pad:", x_word_pad.shape); pprint(x_word_pad)
            #print("x_word_rand:", x_word_rand.shape); pprint(x_word_rand)

            y_skipgram = build_y_skipgram(x_word_pad, skipgram_offsets, max_len)
            #print("y_skipgram:", y_skipgram.shape); pprint(y_skipgram)

            y_pos = build_y_pos(doc_ids, train_words, pos2id, word_crop, max_len)
            #print("y_pos:", y_pos.shape); pprint(y_pos)

            y_pdtbpair = build_y_pdtbpair(doc_ids, train_words, pdtbpair_offsets, pdtbpair2id, pdtbpair2id_weights, word_crop, max_len)
Example #13
0
        log.info(
            "  continue from epoch {}, loss avg: {:.4f}, min: {:.4f}, max: {:.4f}"
            .format(epoch_i, loss_avg, loss_min, loss_max))

    # train model
    while epoch_i < epochs:
        epoch_i += 1
        time_0 = time.time()
        log.info("train epoch {}/{} ({} docs)".format(epoch_i, epochs,
                                                      len(train_doc_ids)))

        # one document per batch update
        loss_avg = 0.
        loss_min = np.inf
        loss_max = -np.inf
        batches = make_batches(len(train_doc_ids), batch_size)
        for batch_i, (batch_start, batch_end) in enumerate(batches):
            # prepare batch data
            doc_ids = train_doc_ids[batch_start:batch_end]

            x_word_pad, x_word_rand = build_x_word(doc_ids, train_words,
                                                   word2id, word_crop, max_len)
            #print("x_word_pad:", x_word_pad.shape); pprint(x_word_pad)
            #print("x_word_rand:", x_word_rand.shape); pprint(x_word_rand)

            y_skipgram = build_y_skipgram(x_word_pad, skipgram_offsets,
                                          max_len)
            #print("y_skipgram:", y_skipgram.shape); pprint(y_skipgram)

            y_pos = build_y_pos(doc_ids, train_words, pos2id, word_crop,
                                max_len)
Example #14
0
def train_on_batch(model,
                   X,
                   y,
                   nb_classes,
                   callbacks=None,
                   normalize=None,
                   batch_size=32,
                   class_weight=None,
                   class_acc=True,
                   shuffle=False):
    loss = []
    acc = []
    size = []

    nb_samples = X.shape[0]
    out_labels = ['loss', 'acc']

    if shuffle:
        X, y = shuffle_data(X, y)

    # batch train
    batches = make_batches(nb_samples, batch_size)
    for batch_index, (batch_start, batch_end) in enumerate(batches):
        batch_logs = {}
        batch_logs['batch'] = batch_index
        batch_logs['size'] = batch_end - batch_start

        if callbacks:
            callbacks.on_batch_begin(batch_index, batch_logs)

        # load the actual images; X only contains paths
        X_batch = load_samples(X[batch_start:batch_end],
                               batch_end - batch_start)
        X_batch = X_batch.astype("float32") / 255

        y_batch = y[batch_start:batch_end]
        y_batch = to_categorical(y_batch, nb_classes)

        if normalize:
            X_batch = X_batch - normalize[0]  # mean
            X_batch /= normalize[1]  # std

        # calculates the overall loss and accuracy
        outs = model.train_on_batch(X_batch,
                                    y_batch,
                                    accuracy=True,
                                    class_weight=class_weight)

        if type(outs) != list:
            outs = [outs]
        for l, o in zip(out_labels, outs):
            batch_logs[l] = o

        # calculates the accuracy per class
        if class_acc:
            result = calc_class_acc(model,
                                    X[batch_start:batch_end],
                                    y[batch_start:batch_end],
                                    nb_classes,
                                    normalize=normalize,
                                    batch_size=batch_size,
                                    keys=['acc'])
            batch_logs['class_acc'] = result['acc']

        if callbacks:
            callbacks.on_batch_end(batch_index, batch_logs)

    return loss, acc, size