Ejemplo n.º 1
0
def compress(saved_model_path,
             tflite_model_path,
             img_size,
             quantize=None,
             device=None):
    converter = lite.TFLiteConverter.from_saved_model(saved_model_path)

    if quantize:
        sample_dataset = DataGenerator(get_train_data(), 10, img_size).sample()
        sample_images = sample_dataset[0]

        def representative_dataset_gen():
            for index in range(sample_images.shape[0] - 1):
                yield [sample_images[index:index + 1]]

        converter.representative_dataset = tf.lite.RepresentativeDataset(
            representative_dataset_gen)

        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        converter.target_spec.supported_types = [tf.float16]

    tflite_model = converter.convert()
    x = open(tflite_model_path, "wb").write(tflite_model)
    print(x)
Ejemplo n.º 2
0
#   Editor      : VIM
#   File name   : test.py
#   Author      : YunYang1994
#   Created date: 2019-10-23 23:14:38
#   Description :
#
#================================================================

import numpy as np
import tensorflow as tf

from fcn8s import FCN8s
from utils import visual_result, DataGenerator

model = FCN8s(n_class=21)
TestSet = DataGenerator("./data/test_image.txt", "./data/test_labels", 1)

## load weights and test your model after training
## if you want to test model, first you need to initialize your model
## with "model(data)", and then load model weights
data = np.ones(shape=[1, 224, 224, 3], dtype=np.float)
model(data)
model.load_weights("FCN8s.h5")

for idx, (x, y) in enumerate(TestSet):
    result = model(x)
    pred_label = tf.argmax(result, axis=-1)
    result = visual_result(x[0], pred_label[0].numpy())
    save_file = "./data/prediction/%d.jpg" % idx
    print("=> saving prediction result into ", save_file)
    result.save(save_file)
Ejemplo n.º 3
0
                          args.train_size,
                          args.aux_inputs,
                          seed=args.seed,
                          drop_lowq=args.drop_lowq)
X_train, X_test, y_train, y_test = data

if args.scale_targets:  # scale targets to std == 1
    scales = y_train.std(axis=0)
    y_train /= scales
    y_test /= scales

# create data generators for on the fly augmentations
dg_train = DataGenerator(X_train,
                         y_train,
                         batch_size=args.batch_size,
                         seed=args.seed,
                         augment=args.augment_train,
                         im_size=args.im_size,
                         n_channels=len(args.colors),
                         y_shape=(len(args.targets), ))
dg_test = DataGenerator(X_test,
                        y_test,
                        batch_size=args.batch_size,
                        im_size=args.im_size,
                        y_shape=(len(args.targets), ),
                        shuffle=False,
                        n_channels=len(args.colors))

###############################################################################
# train and save model
n_steps = args.steps_per_epoch if args.steps_per_epoch else dg_train.n_steps
sdecay = partial(step_decay,
Ejemplo n.º 4
0
def model(batch_size=128, nb_epoch=100):
    # set parameters:
    nb_classes = len(functions)
    start_time = time.time()
    logging.info("Loading Data")
    train, val, test, train_df, valid_df, test_df = load_data()
    train_df = pd.concat([train_df, valid_df])
    test_gos = test_df['gos'].values
    train_data, train_labels = train
    val_data, val_labels = val
    test_data, test_labels = test
    logging.info("Data loaded in %d sec" % (time.time() - start_time))
    logging.info("Training data size: %d" % len(train_data[0]))
    logging.info("Validation data size: %d" % len(val_data[0]))
    logging.info("Test data size: %d" % len(test_data[0]))

    # pre_model_path = DATA_ROOT + 'pre_model_weights_' + FUNCTION + '.pkl'
    model_path = DATA_ROOT + 'model_' + FUNCTION + '.h5'
    checkpointer = ModelCheckpoint(filepath=model_path,
                                   verbose=1,
                                   save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

    logging.info('Starting training the model')

    train_generator = DataGenerator(batch_size, nb_classes)
    train_generator.fit(train_data, train_labels)
    valid_generator = DataGenerator(batch_size, nb_classes)
    valid_generator.fit(val_data, val_labels)
    test_generator = DataGenerator(batch_size, nb_classes)
    test_generator.fit(test_data, test_labels)

    # model = get_model()
    # model.fit_generator(
    #     train_generator,
    #     samples_per_epoch=len(train_data[0]),
    #     nb_epoch=nb_epoch,
    #     validation_data=valid_generator,
    #     nb_val_samples=len(val_data[0]),
    #     max_q_size=batch_size,
    #     callbacks=[checkpointer, earlystopper])

    logging.info('Loading best model')
    model = load_model(model_path)

    logging.info('Predicting')
    preds = model.predict_generator(test_generator,
                                    val_samples=len(test_data[0]))
    # incon = 0
    # for i in range(len(test_data)):
    #     for j in range(len(functions)):
    #         childs = set(go[functions[j]]['children']).intersection(func_set)
    #         ok = True
    #         for n_id in childs:
    #             if preds[i, j] < preds[i, go_indexes[n_id]]:
    #                 preds[i, j] = preds[i, go_indexes[n_id]]
    #                 ok = False
    #         if not ok:
    #             incon += 1
    logging.info('Computing performance')
    f, p, r, t, preds_max = compute_performance(preds, test_labels, test_gos)
    roc_auc = compute_roc(preds, test_labels)
    logging.info('Fmax measure: \t %f %f %f %f' % (f, p, r, t))
    logging.info('ROC AUC: \t %f ' % (roc_auc, ))
Ejemplo n.º 5
0
sent_mention = Concatenate()([sentence_label_emb, mention_emb, sent_mention])
sent_mention = Dense(dim, activation='relu')(sent_mention)

pt = Dense(1, activation='sigmoid')(sent_mention)

train_model = Model([sentence_in, mention_in, left_in, right_in, y_in, t_in],
                    [left_p, right_p, pt])

left_in = K.expand_dims(left_in, 2)
right_in = K.expand_dims(right_in, 2)

left_loss = K.binary_crossentropy(left_in, left_p)
left_loss = K.sum(left_loss * sentence_mask) / K.sum(sentence_mask)

right_loss = K.binary_crossentropy(right_in, right_p)
right_loss = K.sum(right_loss * sentence_mask) / K.sum(sentence_mask)

pt_loss = K.mean(K.binary_crossentropy(t_in, pt))

loss = left_loss + right_loss + pt_loss

train_model.add_loss(loss)
train_model.compile(optimizer=Adam(1e-3))
train_model.summary()

train_D = DataGenerator(train_data, char2id, kb2id, id2kb)

train_model.fit_generator(train_D.__iter__(),
                          steps_per_epoch=len(train_D),
                          epochs=40)
Ejemplo n.º 6
0
def main(args):
    main_start = time.time()

    tf.set_random_seed(2019)
    random.seed(2019)
    np.random.seed(2019)

    if len(args) != 1:
        raise Exception('Problem with flags: %s' % args)

    # Correcting a few flags for test/eval mode.
    if FLAGS.mode != 'train':
        FLAGS.batch_size = FLAGS.beam_size
        FLAGS.bs_dec_steps = FLAGS.dec_steps

        if FLAGS.model.lower() != "tx":
            FLAGS.dec_steps = 1

    assert FLAGS.mode == 'train' or FLAGS.batch_size == FLAGS.beam_size, \
        "In test mode, batch size should be equal to beam size."

    assert FLAGS.mode == 'train' or FLAGS.dec_steps == 1 or FLAGS.model.lower() == "tx", \
        "In test mode, no. of decoder steps should be one."

    os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'
    os.environ['CUDA_VISIBLE_DEVICES'] = ",".join(
        str(gpu_id) for gpu_id in FLAGS.GPUs)

    if not os.path.exists(FLAGS.PathToCheckpoint):
        os.makedirs(FLAGS.PathToCheckpoint)

    if FLAGS.mode == "test" and not os.path.exists(FLAGS.PathToResults):
        os.makedirs(FLAGS.PathToResults)
        os.makedirs(FLAGS.PathToResults + 'predictions')
        os.makedirs(FLAGS.PathToResults + 'groundtruths')

    if FLAGS.mode == 'eval':
        eval_model(FLAGS.PathToResults)
    else:
        start = time.time()
        vocab = Vocab(max_vocab_size=FLAGS.vocab_size,
                      emb_dim=FLAGS.dim,
                      dataset_path=FLAGS.PathToDataset,
                      glove_path=FLAGS.PathToGlove,
                      vocab_path=FLAGS.PathToVocab,
                      lookup_path=FLAGS.PathToLookups)

        if FLAGS.model.lower() == "plain":
            print("Setting up the plain model.\n")
            data = DataGenerator(path_to_dataset=FLAGS.PathToDataset,
                                 max_inp_seq_len=FLAGS.enc_steps,
                                 max_out_seq_len=FLAGS.dec_steps,
                                 vocab=vocab,
                                 use_pgen=FLAGS.use_pgen,
                                 use_sample=FLAGS.sample)
            summarizer = SummarizationModel(vocab, data)

        elif FLAGS.model.lower() == "hier":
            print("Setting up the hier model.\n")
            data = DataGeneratorHier(
                path_to_dataset=FLAGS.PathToDataset,
                max_inp_sent=FLAGS.max_enc_sent,
                max_inp_tok_per_sent=FLAGS.max_enc_steps_per_sent,
                max_out_tok=FLAGS.dec_steps,
                vocab=vocab,
                use_pgen=FLAGS.use_pgen,
                use_sample=FLAGS.sample)
            summarizer = SummarizationModelHier(vocab, data)

        elif FLAGS.model.lower() == "rlhier":
            print("Setting up the Hier RL model.\n")
            data = DataGeneratorHier(
                path_to_dataset=FLAGS.PathToDataset,
                max_inp_sent=FLAGS.max_enc_sent,
                max_inp_tok_per_sent=FLAGS.max_enc_steps_per_sent,
                max_out_tok=FLAGS.dec_steps,
                vocab=vocab,
                use_pgen=FLAGS.use_pgen,
                use_sample=FLAGS.sample)
            summarizer = SummarizationModelHierSC(vocab, data)

        else:
            raise ValueError(
                "model flag should be either of plain/hier/bayesian/shared!! \n"
            )

        end = time.time()
        print(
            "Setting up vocab, data and model took {:.2f} sec.".format(end -
                                                                       start))

        summarizer.build_graph()

        if FLAGS.mode == 'train':
            summarizer.train()
        elif FLAGS.mode == "test":
            summarizer.test()
        else:
            raise ValueError("mode should be either train/test!! \n")

        main_end = time.time()
        print("Total time elapsed: %.2f \n" % (main_end - main_start))
Ejemplo n.º 7
0
def model(params, batch_size=128, nb_epoch=6, is_train=True):
    # set parameters:
    nb_classes = len(functions)
    start_time = time.time()
    logging.info("Loading Data")
    train, val, test, train_df, valid_df, test_df = load_data()
    train_df = pd.concat([train_df, valid_df])
    test_gos = test_df['gos'].values
    train_data, train_labels = train
    val_data, val_labels = val
    test_data, test_labels = test
    logging.info("Data loaded in %d sec" % (time.time() - start_time))
    logging.info("Training data size: %d" % len(train_data[0]))
    logging.info("Validation data size: %d" % len(val_data[0]))
    logging.info("Test data size: %d" % len(test_data[0]))

    model_path = (DATA_ROOT + 'models/model_' + FUNCTION + '.h5') 
                  # '-' + str(params['embedding_dims']) +
                  # '-' + str(params['nb_filter']) +
                  # '-' + str(params['nb_conv']) +
                  # '-' + str(params['nb_dense']) + '.h5')
    checkpointer = ModelCheckpoint(
        filepath=model_path,
        verbose=1, save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

    logging.info('Starting training the model')

    train_generator = DataGenerator(batch_size, nb_classes)
    train_generator.fit(train_data, train_labels)
    valid_generator = DataGenerator(batch_size, nb_classes)
    valid_generator.fit(val_data, val_labels)
    test_generator = DataGenerator(batch_size, nb_classes)
    test_generator.fit(test_data, test_labels)

    if is_train:
        model = get_model(params)
        model.fit_generator(
            train_generator,
            samples_per_epoch=len(train_data[0]),
            nb_epoch=nb_epoch,
            validation_data=valid_generator,
            nb_val_samples=len(val_data[0]),
            max_q_size=batch_size,
            callbacks=[checkpointer, earlystopper])
    logging.info('Loading best model')
    start_time = time.time()
    model = load_model(model_path)
    logging.info('Loading time: %d' % (time.time() - start_time))
    # orgs = ['9606', '10090', '10116', '7227', '7955',
    #         '559292', '3702', '284812', '6239',
    #         '83333', '83332', '224308', '208964']
    # for org in orgs:
    #     logging.info('Predicting for %s' % (org,))
    #     train, val, test, train_df, valid_df, test_df = load_data(org=org)
    #     test_data, test_labels = test
    #     test_gos = test_df['gos'].values
    #     test_generator = DataGenerator(batch_size, nb_classes)
    #     test_generator.fit(test_data, test_labels)
    start_time = time.time()
    preds = model.predict_generator(
        test_generator, val_samples=len(test_data[0]))
    running_time = time.time() - start_time
    logging.info('Running time: %d %d' % (running_time, len(test_data[0])))
    logging.info('Computing performance')
    f, p, r, t, preds_max = compute_performance(preds, test_labels, test_gos)
    roc_auc = compute_roc(preds, test_labels)
    mcc = compute_mcc(preds_max, test_labels)
    logging.info('Fmax measure: \t %f %f %f %f' % (f, p, r, t))
    logging.info('ROC AUC: \t %f ' % (roc_auc, ))
    logging.info('MCC: \t %f ' % (mcc, ))
    print(('%.3f & %.3f & %.3f & %.3f & %.3f' % (
        f, p, r, roc_auc, mcc)))
    # return f
    # logging.info('Inconsistent predictions: %d' % incon)
    # logging.info('Saving the predictions')
    proteins = test_df['proteins']
    predictions = list()
    for i in range(preds_max.shape[0]):
        predictions.append(preds_max[i])
    df = pd.DataFrame(
        {
            'proteins': proteins, 'predictions': predictions,
            'gos': test_df['gos'], 'labels': test_df['labels']})
    df.to_pickle(DATA_ROOT + 'test-' + FUNCTION + '-preds.pkl')
Ejemplo n.º 8
0
def handler(context):
    print('Start train handler.')
    if not isinstance(context, dict):
        message = 'Error: Support only "abeja/all-cpu:19.04" or "abeja/all-gpu:19.04".'
        print(message)
        raise Exception(message)

    try:
        dataset_alias = context['datasets']
        id2index, _ = set_categories(dataset_alias.values())
        num_classes = len(id2index)
        dataset_item_ids = get_dataset_item_ids(dataset_alias.values())
        random.shuffle(dataset_item_ids)

        test_size = int(len(dataset_item_ids) * EARLY_STOPPING_TEST_SIZE)
        if test_size:
            train_ids, test_ids = dataset_item_ids[
                test_size:], dataset_item_ids[:test_size]
        else:
            raise Exception(
                "Dataset size is too small. Please add more dataset.")
        input_shape = (IMG_ROWS, IMG_COLS, NB_CHANNELS)
        print('num classes:', num_classes)
        print('input shape:', input_shape)
        print(len(train_ids), 'train samples')
        print(len(test_ids), 'test samples')
        print('parameters:', utils.parameters)

        model = create_model(num_classes, input_shape)
        tensorboard = TensorBoard(log_dir=log_path,
                                  histogram_freq=0,
                                  write_graph=True,
                                  write_images=False)
        statistics = Statistics()
        early = EarlyStopping(monitor='val_acc',
                              min_delta=0,
                              patience=EARLY_STOPPING_PATIENCE,
                              verbose=1,
                              mode='auto')
        # Do you want to add `checkpoint` to callback as well?
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=Adam(lr=LEARNING_RATE,
                                     beta_1=ADAM_BETA_1,
                                     beta_2=ADAM_BETA_2,
                                     epsilon=ADAM_EPSILON,
                                     decay=ADAM_DECAY),
                      metrics=['accuracy'])

        # fit_generator
        train_gen = DataGenerator(train_ids, id2index, is_train=True)
        test_gen = DataGenerator(test_ids, id2index, is_train=False)

        # fit_generator
        model.fit_generator(train_gen,
                            epochs=EPOCHS,
                            verbose=1,
                            validation_data=test_gen,
                            callbacks=[tensorboard, statistics, early])
        score = model.evaluate_generator(test_gen)
        model.save(os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.h5'))
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])
    except Exception as e:
        print(str(e))
        print(traceback.format_exc())
        raise e
Ejemplo n.º 9
0
def main(feature_type: str, language: str, domain: str, main_dir: str, seq_len: int,
         batch_size: int, lstm_dim: int, character_level: bool = False):
    """
    Parameters
    ----------
    feature_type: the name of the feature
    language: language of the text.
    main_dir: base directory
    seq_len: sequence length
    batch_size: batch size
    lstm_dim: lstm hidden dimension
    character_level: whether tokenizer should be on character level.
    """

    texts = get_texts(main_dir, language, feature_type, character_level, domain)

    tokenizer = Tokenizer(texts.values(), character_level=character_level)

    samples = {}

    for book in texts:
        print(len(texts[book]))
        len_text = len(texts[book]) if character_level else len(texts[book].split())

        if len_text < seq_len:
            logger.warn(f"Requested seq_len larger than text length: {len_text} / {seq_len} "
                             f"for {book} and feature type {feature_type}.")
            continue
        rand_idx = np.random.randint(0, len_text - seq_len, batch_size)

        if character_level:
            samples[book] = tokenizer.encode([texts[book][i: i + seq_len] for i in rand_idx])

        else:
            split_text = texts[book].split()
            samples[book] = tokenizer.encode(
                [" ".join(split_text[i: i + seq_len]) for i in rand_idx]
            )

    test_generator = DataGenerator(tokenizer,
                                   tokenizer.full_text,
                                   seq_len=seq_len,
                                   batch_size=batch_size,
                                   with_embedding=True,
                                   train=False)

    sample_batch = next(iter(test_generator))

    logger.info(f"X batch shape: {sample_batch[0].shape}, y batch shape: {sample_batch[1].shape}")
    logger.info(f"Sample batch text: {tokenizer.decode(sample_batch[0][0])}")

    file_path = os.path.join(main_dir, 'models',
                             f'{feature_type}_{language}_lstm_{lstm_dim}')

    if domain:
        file_path += '_' + domain

    if character_level:
        file_path += '_character_level'

    file_path += '.h5'

    logger.info(f"Loading {file_path}")

    prediction_model = lstm_model(num_words=tokenizer.num_words,
                                  lstm_dim=lstm_dim,
                                  seq_len=1,
                                  batch_size=batch_size,
                                  stateful=True,
                                  return_state=True)

    prediction_model.load_weights(file_path)

    hiddens = {}
    seeds = {}
    predictions = {}

    for book in samples:
        seed = np.stack(samples[book])
        print(seed.shape)
        hf, preds = generate_text(prediction_model, tokenizer, seed, get_hidden=True)
        print(hf.shape)
        hiddens[book] = hf
        seeds[book] = seed
        preds = [tokenizer.ix_to_word[pred] for pred in preds]
        predictions[book] = preds

    file_name = f'{feature_type}_{language}_lstm_{lstm_dim}_seq_len_{seq_len}'

    if domain:
        file_name += '_' + domain

    if character_level:
        file_name += '_character-level'
    file_name += '.pkl'

    path_out = os.path.join('data', 'hidden_states', file_name)
    with open(path_out, 'wb') as f:
        pickle.dump(hiddens, f)

    logger.info(f"Succesfully saved hidden dimensions to {path_out}")

    path_out = os.path.join('data', 'seeds', file_name)
    with open(path_out, 'wb') as f:
        pickle.dump(seeds, f)
    logger.info(f"Succesfully saved seeds to {path_out}")

    path_out = os.path.join('data', 'predictions', file_name)
    with open(path_out, 'wb') as f:
        pickle.dump(predictions, f)

    logger.info(f"Succesfully saved predictions to {path_out}")
Ejemplo n.º 10
0
    parser.add_argument('-hps_path', default='./hps/giga.json')
    parser.add_argument('-dataset_path', default='/home/jjery2243542/datasets/summary/structured/26693_50_30/giga_40_10.h5')
    parser.add_argument('--pretrain_wordvec', action='store_true')
    parser.add_argument('-npy_path', default='/home/jjery2243542/datasets/summary/structured/26693_50_30/glove.npy')
    parser.add_argument('-log_file_path', default='./log.txt')
    parser.add_argument('-write_model_path', default='./model/model.ckpt')
    parser.add_argument('--load_model')
    parser.add_argument('-read_model_path', default='./model/model.ckpt')
    parser.add_argument('-vocab_path', default='/home/jjery2243542/datasets/summary/structured/26693_50_30/vocab.pkl')
    args = parser.parse_args()
    # get hps
    hps = Hps()
    hps.load(args.hps_path)
    hps_tuple = hps.get_tuple()
    print(hps_tuple)
    vocab = Vocab(args.vocab_path, args.dataset_path + '.unk.json')
    data_generator = DataGenerator(args.dataset_path)
    model = PointerModel(hps_tuple, vocab)
    if args.pretrain_wordvec:
        model.init(npy_path=args.npy_path)
    else:
        model.init()
    if args.load_model:
        model.load_model(args.read_load_model)
    train(
        model=model,
        data_generator=data_generator,
        log_file_path=args.log_file_path, 
        model_path=args.write_model_path,
    )
Ejemplo n.º 11
0
def model():
    # set parameters:
    batch_size = 128
    nb_epoch = 100
    nb_classes = len(functions)
    start_time = time.time()
    logging.info("Loading Data")
    train, val, test, test_df = load_data()
    test_gos = test_df['gos'].values
    train_data, train_labels = train
    val_data, val_labels = val
    test_data, test_labels = test
    logging.info("Data loaded in %d sec" % (time.time() - start_time))
    logging.info("Training data size: %d" % len(train_data))
    logging.info("Validation data size: %d" % len(val_data))
    logging.info("Test data size: %d" % len(test_data))
    logging.info("Building the model")
    inputs = Input(shape=(MAXLEN, ), dtype='int32', name='input1')
    feature_model = get_feature_model()(inputs)
    layers = get_layers(feature_model)
    output_models = []
    for i in range(len(functions)):
        output_models.append(layers[functions[i]]['output'])
    net = merge(output_models, mode='concat', concat_axis=1)
    # net = Dense(nb_classes * 2, activation='relu')(feature_model)
    # net = Dense(nb_classes, activation='sigmoid')(net)
    # net = Activation('sigmoid')(net)
    model = Model(input=inputs, output=net)
    logging.info('Model built in %d sec' % (time.time() - start_time))
    logging.info('Saving the model')
    model_json = model.to_json()
    with open(DATA_ROOT + 'model_seq_' + FUNCTION + '.json', 'w') as f:
        f.write(model_json)
    logging.info('Compiling the model')
    optimizer = RMSprop()

    model.compile(optimizer=optimizer, loss='binary_crossentropy')

    pre_model_path = DATA_ROOT + 'pre_model_seq_weights_' + FUNCTION + '.pkl'
    model_path = DATA_ROOT + 'model_seq_weights_' + FUNCTION + '.pkl'
    checkpointer = MyCheckpoint(filepath=model_path,
                                verbose=1,
                                save_best_only=True,
                                save_weights_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
    logging.info('Compilation finished in %d sec' % (time.time() - start_time))

    # logging.info('Loading pretrained weights')
    # load_model_weights(model, pre_model_path)

    logging.info('Starting training the model')

    train_generator = DataGenerator(batch_size, nb_classes)
    train_generator.fit(train_data, train_labels)
    valid_generator = DataGenerator(batch_size, nb_classes)
    valid_generator.fit(val_data, val_labels)
    test_generator = DataGenerator(batch_size, nb_classes)
    test_generator.fit(test_data, test_labels)
    # model.fit_generator(
    #     train_generator,
    #     samples_per_epoch=len(train_data),
    #     nb_epoch=nb_epoch,
    #     validation_data=valid_generator,
    #     nb_val_samples=len(val_data),
    #     max_q_size=batch_size,
    #     callbacks=[checkpointer, earlystopper])

    logging.info('Loading weights')
    load_model_weights(model, model_path)

    # model.save(DATA_ROOT + 'model_%s.h5' % FUNCTION)

    preds = model.predict_generator(test_generator, val_samples=len(test_data))

    logging.info(preds.shape)
    incon = 0
    # for i in xrange(len(test_data)):
    #     for j in xrange(len(functions)):
    #         childs = set(go[functions[j]]['children']).intersection(func_set)
    #         ok = True
    #         for n_id in childs:
    #             if preds[i, j] < preds[i, go_indexes[n_id]]:
    #                 preds[i, j] = preds[i, go_indexes[n_id]]
    #                 ok = False
    #         if not ok:
    #             incon += 1
    f, p, r, preds_max = compute_performance(preds, test_labels, test_gos)
    roc_auc = compute_roc(preds, test_labels)
    logging.info('Fmax measure: \t %f %f %f' % (f, p, r))
    logging.info('ROC AUC: \t %f ' % (roc_auc, ))
    logging.info('Inconsistent predictions: %d' % incon)
    logging.info('Saving the predictions')
    proteins = test_df['proteins']
    predictions = list()
    for i in xrange(preds_max.shape[0]):
        predictions.append(preds_max[i])
    df = pd.DataFrame({
        'proteins': proteins,
        'predictions': predictions,
        'gos': test_df['gos'],
        'labels': test_df['labels']
    })
    df.to_pickle(DATA_ROOT + 'test-' + FUNCTION + '-preds-seq.pkl')
    logging.info('Done in %d sec' % (time.time() - start_time))

    function_centric_performance(functions, preds.T, test_labels.T)
Ejemplo n.º 12
0
import os
import time

import numpy as np
from tqdm.auto import tqdm

import cv2
from PIL import Image
from matplotlib import pyplot as plt

from utils import DataGenerator, face_plot

root_path = os.getcwd()
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml')
driver_path = 'chromedriver.exe'
data_generator = DataGenerator(root_path, face_cascade, driver_path)

data_generator.get_idol_faces('鬼娃恰吉')
data_generator.get_idol_faces('王世堅')
print('OK')

from dataset import ImageFolder
from model import CNN_MODEL

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms

TRAIN_SIZE = 0.8
Ejemplo n.º 13
0

# ### Create the data generator to load batches of data

# In[20]:


import utils; reload(utils)
from utils import DataGenerator

NUM_TRAIN_PAIRS = 150000
NUM_VAL_PAIRS = 10000
BATCH_SIZE = 128
datagen = DataGenerator(X_train, y_train, num_train_pairs = NUM_TRAIN_PAIRS,
                        num_val_pairs = NUM_VAL_PAIRS, X_val = X_val[val_train],
                        train_alphabet_to_index = train_alphabet_to_index,
                        val_alphabet_to_index = val_train_index,
                        y_val = y_val[val_train], batch_sz = BATCH_SIZE, verbose = True)
datagen.create_data_transformer(rotation_range=10, width_shift_range=0.01, 
                              height_shift_range=0.01, shear_range=0.01)

STEPS_PER_EPOCH = NUM_TRAIN_PAIRS // BATCH_SIZE
VALIDATION_STEPS = NUM_VAL_PAIRS // BATCH_SIZE 

from keras.optimizers import Adam
learning_rate = 5e-5
adam = Adam(learning_rate)
scheduler = LearningRateScheduler(lambda epoch : learning_rate * pow(0.985, epoch))
siamese_net.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
siamese_net.load_weights(INIT_WEIGHTS)
Ejemplo n.º 14
0
            for result in all_result:
                for word_idx in result:
                    f_out.write('{} '.format(word_idx))
                f_out.write('\n')

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-hps_path', default='./hps/cd_v3.json')
    parser.add_argument('-vocab_path', default='/home/jjery2243542/datasets/summary/structured/26693_50_30/vocab.pkl')
    parser.add_argument('-model_path', default='./model/model.ckpt-2999')
    parser.add_argument('-dataset_path', default='/home/jjery2243542/datasets/summary/structured/26693_50_30/giga_40_10.h5')
    parser.add_argument('-dataset_type', default='valid')
    parser.add_argument('-output_path', default='result.txt')
    args = parser.parse_args()
    hps = Hps()
    hps.load(args.hps_path)
    hps_tuple = hps.get_tuple()
    print(hps_tuple)
    vocab = Vocab(args.vocab_path, args.dataset_path + '.unk.json')
    data_generator = DataGenerator(args.dataset_path)
    model = PointerModel(hps_tuple, vocab)
    model.load_model(args.model_path)
    dg = DataGenerator(args.dataset_path)
    iterator = dg.iterator(
        batch_size=hps_tuple.batch_size, 
        dataset_type=args.dataset_type, 
        infinite=False, 
        shuffle=False
    )
    predict(model, iterator, args.output_path)
Ejemplo n.º 15
0
def main(feature_type: str,
         language: str,
         domain: str,
         main_dir: str,
         seq_len: int,
         batch_size: int,
         test_batch_size: int,
         lstm_dim: int,
         character_level: bool = False):
    """
    Parameters
    ----------
    feature_type: the name of the feature
    main_dir: base directory
    language: language of corpus
    seq_len: sequence length
    batch_size: batch size
    test_batch_size: test batch size
    lstm_dim: lstm hidden dimension
    character_level: whether tokenizer should be on character level.
    """

    texts = get_texts(main_dir, language, feature_type, character_level,
                      domain)

    tokenizer = Tokenizer(texts.values(), character_level=character_level)

    train_generator = DataGenerator(tokenizer,
                                    tokenizer.full_text,
                                    seq_len=seq_len,
                                    batch_size=batch_size,
                                    with_embedding=True,
                                    train=True)

    test_generator = DataGenerator(tokenizer,
                                   tokenizer.full_text,
                                   seq_len=seq_len,
                                   batch_size=test_batch_size,
                                   with_embedding=True,
                                   train=False)

    sample_batch = next(iter(train_generator))

    logger.info(
        f"X batch shape: {sample_batch[0].shape}, y batch shape: {sample_batch[1].shape}"
    )
    logger.info(f"Sample batch text: {tokenizer.decode(sample_batch[0][0])}")

    training_model = lstm_model(num_words=tokenizer.num_words,
                                seq_len=seq_len,
                                lstm_dim=lstm_dim,
                                stateful=False)

    file_path = os.path.join(main_dir, 'models',
                             f'{feature_type}_{language}_lstm_{lstm_dim}')

    if domain:
        file_path += '_' + domain

    if character_level:
        file_path += '_character_level'

    file_path += '.h5'

    training_model.save_weights(file_path)

    checkpoint = tf.keras.callbacks.ModelCheckpoint(file_path,
                                                    monitor='val_loss',
                                                    save_best_only=True)
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                      patience=2)

    generate_text = GenerateText(test_generator, tokenizer, file_path,
                                 lstm_dim)
    callbacks_list = [checkpoint, early_stopping, generate_text]

    training_model.fit_generator(train_generator,
                                 validation_data=test_generator,
                                 callbacks=callbacks_list,
                                 epochs=256)
Ejemplo n.º 16
0
import os
from builtins import enumerate

import pandas as pd
import numpy as np
from utils import DataGenerator
import pickle

cwd = "/home/go96bix/projects/epitop_pred"
directory = os.path.join(cwd, "data_generator")
classes = 0
num_samples = []
all_samples = []
classic = False
embedding = False
elmo_embedder = DataGenerator.Elmo_embedder()
slicesize = 49
use_old_test_set = True

if use_old_test_set:
	test_df_old = pd.DataFrame.from_csv(
		"/home/le86qiz/Documents/Konrad/general_epitope_analyses/bepipred_evaluation/deepipred_results/test_samples.csv",
		sep=",", header=None, index_col=None)
	test_df_old_y = test_df_old[2].values
	test_df_old = test_df_old[1].values
else:
	test_df_old = []

for root, dirs, files in os.walk(directory):
	for file in files:
		if file.endswith(".csv"):
Ejemplo n.º 17
0
with tables.open_file(os.path.join('data', dataset_name + '.h5'),
                      'r') as dataset:

    # split into train and test sets
    total_imgs = dataset.root.imgs.shape[0]
    sample_weights = sample_weights[0:total_imgs] if use_sample_weights else []
    all_inds = list(range(0, total_imgs))
    np.random.shuffle(all_inds)
    train_inds = all_inds[0:int(total_imgs * (1 - test_set_portion))]
    test_inds = all_inds[int(total_imgs * (1 - test_set_portion)):]

    # create model and data generators
    train_generator = DataGenerator(
        train_inds,
        dataset,
        batch_size=batch_size,
        shuffle=True,
        sample_weights=sample_weights,
        num_loss_fcns=2 if network_structure == 'stacked_hourglass' else 1)
    test_generator = DataGenerator(
        test_inds,
        dataset,
        batch_size=batch_size,
        shuffle=False,
        sample_weights=sample_weights,
        num_loss_fcns=2 if network_structure == 'stacked_hourglass' else 1)

    model = models_dict(network_structure)(
        (train_generator.img_dims[0], train_generator.img_dims[1], 1),
        train_generator.channels,
        first_layer_filters,
def fTrainInner(cnn,
                modelName,
                X_train=None,
                y_train=None,
                Y_segMasks_train=None,
                X_valid=None,
                y_valid=None,
                Y_segMasks_valid=None,
                X_test=None,
                y_test=None,
                Y_segMasks_test=None,
                sOutPath=None,
                patchSize=0,
                batchSize=None,
                learningRate=None,
                iEpochs=None,
                usingClassification=False,
                dlnetwork=None,
                data=None):
    print('Training CNN')
    print('with lr = ' + str(learningRate) + ' , batchSize = ' +
          str(batchSize))

    # sio.savemat('D:med_data/' + 'checkdata_voxel_and_mask.mat',
    #             {'mask_train': Y_segMasks_train,
    #              'voxel_train': X_train,
    #              'mask_test': Y_segMasks_test,
    #              'voxel_test': X_test})

    # save names
    _, sPath = os.path.splitdrive(sOutPath)
    sPath, sFilename = os.path.split(sPath)
    sFilename, sExt = os.path.splitext(sFilename)

    model_name = sOutPath + os.sep + sFilename
    weight_name = model_name + '_weights.h5'
    model_json = model_name + '.json'
    model_all = model_name + '_model.h5'
    model_mat = model_name + '.mat'

    if (os.path.isfile(model_mat)):  # no training if output file exists
        print('------- already trained -> go to next')
        return

    # create optimizer
    if dlnetwork != None:
        if dlnetwork.optimizer == 'SGD':
            opti = keras.optimizers.SGD(lr=learningRate,
                                        momentum=dlnetwork.momentum,
                                        decay=dlnetwork.weightdecay,
                                        nesterov=dlnetwork.nesterov)

        elif dlnetwork.optimizer == 'RMSPROP':
            opti = keras.optimizers.RMSprop(lr=learningRate,
                                            decay=dlnetwork.weightdecay)

        elif dlnetwork.optimizer == 'ADAGRAD':
            opti = keras.optimizers.Adagrad(lr=learningRate,
                                            epsilon=None,
                                            decay=dlnetwork.weightdecay)

        elif dlnetwork.optimizer == 'ADADELTA':
            opti = keras.optimizers.Adadelta(lr=learningRate,
                                             rho=0.95,
                                             epsilon=None,
                                             decay=dlnetwork.weightdecay)

        elif dlnetwork.optimizer == 'ADAM':
            opti = keras.optimizers.Adam(lr=learningRate,
                                         beta_1=0.9,
                                         beta_2=0.999,
                                         epsilon=None,
                                         decay=dlnetwork.weightdecay)
        else:
            raise ValueError("Unknown Optimizer!")
    else:
        # opti = SGD(lr=learningRate, momentum=1e-8, decay=0.1, nesterov=True);#Adag(lr=0.01, epsilon=1e-06)
        opti = keras.optimizers.Adam(lr=learningRate,
                                     beta_1=0.9,
                                     beta_2=0.999,
                                     epsilon=1e-08,
                                     decay=0.0)

    cnn.summary()

    # compile model
    if usingClassification:
        cnn.compile(loss={
            'segmentation_output': dice_coef_loss,
            'classification_output': 'categorical_crossentropy'
        },
                    optimizer=opti,
                    metrics={
                        'segmentation_output': dice_coef,
                        'classification_output': 'accuracy'
                    })
    else:
        cnn.compile(loss=dice_coef_loss, optimizer=opti, metrics=[dice_coef])

    # callbacks
    #callback_earlyStopping = EarlyStopping(monitor='val_loss', patience=12, verbose=1)

    # callback_tensorBoard = keras.callbacks.TensorBoard(log_dir=dlart_handle.getLearningOutputPath() + '/logs',
    # histogram_freq=2,
    # batch_size=batchSize,
    # write_graph=True,
    # write_grads=True,
    # write_images=True,
    # embeddings_freq=0,
    # embeddings_layer_names=None,
    #  embeddings_metadata=None)

    #callbacks = [callback_earlyStopping]
    callbacks = []
    #callbacks.append(
    #   ModelCheckpoint(sOutPath + os.sep + 'checkpoints' + os.sep + 'checker.hdf5', monitor='val_acc', verbose=0,
    #                  period=1, save_best_only=True))  # overrides the last checkpoint, its just for security
    # callbacks.append(ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, min_lr=1e-4, verbose=1))
    callbacks.append(LearningRateScheduler(schedule=step_decay, verbose=1))
    #callbacks.append(LivePlotCallback(dlart_handle))

    print('Start training')

    # TODO: add here data augmentation via ImageDataGenerator from utils/image_preprocessing
    if dlnetwork.trainMode == 'GENERATOR':
        # prepare data generators
        if os.path.exists(X_train):  # splitting was already done
            train_gen = DataGenerator(X_train,
                                      batch_size=batchSize,
                                      dim=patchSize,
                                      usingClassification=usingClassification)
            val_gen = DataGenerator(X_valid,
                                    batch_size=batchSize,
                                    dim=patchSize,
                                    usingClassification=usingClassification)
            test_gen = DataGenerator(X_test,
                                     batch_size=batchSize,
                                     dim=patchSize,
                                     usingClassification=usingClassification)
        else:  # splitting needs to be done
            datapath = os.path.dirname(X_train)
            datafiles = [
                f for f in os.listdir(datapath)
                if (os.path.isfile(os.path.join(datapath, f))
                    and f.endswith('.hdf5'))
            ]
            train_files, val_files, test_files = fSplitSegmentationDataset_generator(
                datafiles,
                data.allPats,
                data.allTestPats,
                data.splittingMode,
                testTrainingDatasetRatio=data.trainTestDatasetRatio,
                validationTrainRatio=data.trainValidationRatio,
                nfolds=data.nfolds,
                isRandomShuffle=data.isRandomShuffle)
            train_gen = DataGenerator(datapath,
                                      batch_size=batchSize,
                                      dim=patchSize,
                                      usingClassification=usingClassification,
                                      list_IDs=train_files)
            val_gen = DataGenerator(datapath,
                                    batch_size=batchSize,
                                    dim=patchSize,
                                    usingClassification=usingClassification,
                                    list_IDs=val_files)
            test_gen = DataGenerator(datapath,
                                     batch_size=batchSize,
                                     dim=patchSize,
                                     usingClassification=usingClassification,
                                     list_IDs=test_files)
        existing_validation = True if len(val_gen.list_IDs) > 0 else False
    else:  # ARRAY
        existing_validation = (X_valid != 0 and X_valid is not None)

    if existing_validation:
        # using test set for validation
        if usingClassification:
            if dlnetwork.trainMode == 'ARRAY':
                result = cnn.fit(X_train, {
                    'segmentation_output': Y_segMasks_train,
                    'classification_output': y_train
                },
                                 validation_data=(X_valid, {
                                     'segmentation_output':
                                     Y_segMasks_valid,
                                     'classification_output':
                                     y_valid
                                 }),
                                 epochs=iEpochs,
                                 batch_size=batchSize,
                                 callbacks=callbacks,
                                 verbose=1)
            else:
                result = cnn.fit_generator(train_gen,
                                           validation_data=val_gen,
                                           epochs=iEpochs,
                                           batch_size=batchSize,
                                           callbacks=callbacks,
                                           use_multiprocessing=True,
                                           workers=8,
                                           max_queue_size=32,
                                           verbose=1)
        else:
            if dlnetwork.trainMode == 'ARRAY':
                result = cnn.fit(X_train,
                                 Y_segMasks_train,
                                 validation_data=(X_valid, Y_segMasks_valid),
                                 epochs=iEpochs,
                                 batch_size=batchSize,
                                 callbacks=callbacks,
                                 verbose=1)
            else:
                result = cnn.fit_generator(train_gen,
                                           validation_data=val_gen,
                                           epochs=iEpochs,
                                           batch_size=batchSize,
                                           callbacks=callbacks,
                                           use_multiprocessing=True,
                                           workers=8,
                                           max_queue_size=32,
                                           verbose=1)
    else:
        # using validation set for validation
        if usingClassification:
            if dlnetwork.trainMode == 'ARRAY':
                result = cnn.fit(X_train, {
                    'segmentation_output': Y_segMasks_train,
                    'classification_output': y_train
                },
                                 validation_data=(X_test, {
                                     'segmentation_output':
                                     Y_segMasks_test,
                                     'classification_output':
                                     y_test
                                 }),
                                 epochs=iEpochs,
                                 batch_size=batchSize,
                                 callbacks=callbacks,
                                 verbose=1)
            else:
                result = cnn.fit_generator(train_gen,
                                           validation_data=test_gen,
                                           epochs=iEpochs,
                                           batch_size=batchSize,
                                           callbacks=callbacks,
                                           use_multiprocessing=True,
                                           workers=8,
                                           max_queue_size=32,
                                           verbose=1)
        else:
            if dlnetwork.trainMode == 'ARRAY':
                result = cnn.fit(X_train,
                                 Y_segMasks_train,
                                 validation_data=(X_test, Y_segMasks_test),
                                 epochs=iEpochs,
                                 batch_size=batchSize,
                                 callbacks=callbacks,
                                 verbose=1)
            else:
                result = cnn.fit_generator(train_gen,
                                           validation_data=test_gen,
                                           epochs=iEpochs,
                                           batch_size=batchSize,
                                           callbacks=callbacks,
                                           use_multiprocessing=True,
                                           workers=8,
                                           max_queue_size=32,
                                           verbose=1)

    # return the loss value and metrics values for the model in test mode
    if dlnetwork.trainMode == 'ARRAY':
        if usingClassification:
            model_metrics = cnn.metrics_names
            loss_test, segmentation_output_loss_test, classification_output_loss_test, segmentation_output_dice_coef_test, classification_output_acc_test \
                = cnn.evaluate(X_test, {'segmentation_output': Y_segMasks_test, 'classification_output': y_test}, batch_size=batchSize, verbose=1)
        else:
            score_test, dice_coef_test = cnn.evaluate(X_test,
                                                      Y_segMasks_test,
                                                      batch_size=batchSize,
                                                      verbose=1)

        prob_test = cnn.predict(X_test, batchSize, 0)
    else:
        if usingClassification:
            model_metrics = cnn.metrics_names
            loss_test, segmentation_output_loss_test, classification_output_loss_test, segmentation_output_dice_coef_test, classification_output_acc_test \
                = cnn.evaluate_generator(test_gen, batch_size=batchSize, verbose=1)
        else:
            score_test, dice_coef_test = cnn.evaluate_generator(
                test_gen, batch_size=batchSize, verbose=1)

        prob_test = cnn.predict_generator(test_gen, batchSize, 0)

    # save model
    json_string = cnn.to_json()
    with open(model_json, 'w') as jsonFile:
        jsonFile.write(json_string)

    # wei = cnn.get_weights()
    cnn.save_weights(weight_name, overwrite=True)
    # cnn.save(model_all) # keras > v0.7

    if not usingClassification:
        # matlab
        dice_coef_training = result.history['dice_coef']
        training_loss = result.history['loss']
        if X_valid != 0:
            val_dice_coef = result.history['val_dice_coef']
            val_loss = result.history['val_loss']
        else:
            val_dice_coef = 0
            val_loss = 0

        print('Saving results: ' + model_name)

        sio.savemat(
            model_name, {
                'model_settings': model_json,
                'model': model_all,
                'weights': weight_name,
                'dice_coef': dice_coef_training,
                'training_loss': training_loss,
                'val_dice_coef': val_dice_coef,
                'val_loss': val_loss,
                'score_test': score_test,
                'dice_coef_test': dice_coef_test,
                'prob_test': prob_test
            })
    else:
        # matlab
        segmentation_output_loss_training = result.history[
            'segmentation_output_loss']
        classification_output_loss_training = result.history[
            'classification_output_loss']
        segmentation_output_dice_coef_training = result.history[
            'segmentation_output_dice_coef']
        classification_output_acc_training = result.history[
            'classification_output_acc']

        if X_valid != 0:
            val_segmentation_output_loss = result.history[
                'val_segmentation_output_loss']
            val_classification_output_loss = result.history[
                'val_classification_output_loss']
            val_segmentation_output_dice_coef = result.history[
                'val_segmentation_output_dice_coef']
            val_classification_output_acc = result.history[
                'val_classification_output_acc']
        else:
            val_segmentation_output_loss = 0
            val_classification_output_loss = 0
            val_segmentation_output_dice_coef = 0
            val_classification_output_acc = 0

        print('Saving results: ' + model_name)

        sio.savemat(
            model_name, {
                'model_settings': model_json,
                'model': model_all,
                'weights': weight_name,
                'segmentation_output_loss_training':
                segmentation_output_loss_training,
                'classification_output_loss_training':
                classification_output_loss_training,
                'segmentation_output_dice_coef_training':
                segmentation_output_dice_coef_training,
                'classification_output_acc_training':
                classification_output_acc_training,
                'segmentation_output_loss_val': val_segmentation_output_loss,
                'classification_output_loss_val':
                val_classification_output_loss,
                'segmentation_output_dice_coef_val':
                val_segmentation_output_dice_coef,
                'classification_output_acc_val': val_classification_output_acc,
                'loss_test': loss_test,
                'segmentation_output_loss_test': segmentation_output_loss_test,
                'classification_output_loss_test':
                classification_output_loss_test,
                'segmentation_output_dice_coef_test':
                segmentation_output_dice_coef_test,
                'classification_output_acc_test':
                classification_output_acc_test,
                'segmentation_predictions': prob_test[0],
                'classification_predictions': prob_test[1]
            })
Ejemplo n.º 19
0
def model(params, batch_size=b_size, nb_epoch=n_epoch, is_train=True):
    # set parameters:
    nb_classes = len(functions)
    start_time = time.time()
    logging.info("Loading Data")
    train, val, test, train_df, valid_df, test_df = load_data()
    print len(test_df)
    train_df = pd.concat([train_df, valid_df])
    test_gos = test_df['gos'].values
    train_data, train_labels = train
    val_data, val_labels = val
    test_data, test_labels = test
    print len(test_labels)
    logging.info("Data loaded in %d sec" % (time.time() - start_time))
    logging.info("Training data size: %d" % len(train_data[0]))
    logging.info("Validation data size: %d" % len(val_data[0]))
    logging.info("Test data size: %d" % len(test_data[0]))

    model_path = (DATA_ROOT + 'models/model_' + FUNCTION + '.h5')
    checkpointer = ModelCheckpoint(filepath=model_path,
                                   verbose=1,
                                   save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

    logging.info('Starting training the model')
    print train_data
    train_generator = DataGenerator(batch_size, nb_classes)
    train_generator.fit(train_data, train_labels)
    valid_generator = DataGenerator(batch_size, nb_classes)
    valid_generator.fit(val_data, val_labels)
    test_generator = DataGenerator(batch_size, nb_classes)
    test_generator.fit(test_data, test_labels)
    is_train = True
    if is_train:
        model = get_model(params)
        model.fit_generator(train_generator,
                            samples_per_epoch=len(train_data[0]),
                            nb_epoch=nb_epoch,
                            validation_data=valid_generator,
                            nb_val_samples=len(val_data[0]),
                            max_q_size=batch_size,
                            callbacks=[checkpointer, earlystopper])
    logging.info('Loading best model')
    start_time = time.time()
    model = load_model(model_path)
    logging.info('Loading time: %d' % (time.time() - start_time))
    start_time = time.time()
    preds = model.predict_generator(test_generator,
                                    val_samples=len(test_data[0]))
    running_time = time.time() - start_time
    logging.info('Running time: %d %d' % (running_time, len(test_data[0])))
    logging.info('Computing performance')
    # pred_file="pred"+FUNCTION+".txt"
    # test_file ="test"+FUNCTION+".txt"
    # gos_file = "test"+FUNCTION+"_goc.txt"
    # write_file(pred_file,preds)
    # write_file(test_file,test_labels)
    # write_file(gos_file,test_gos)
    f, p, r, t, preds_max = compute_performance(preds, test_labels, test_gos)
    roc_auc = compute_roc(preds, test_labels)
    mcc = compute_mcc(preds_max, test_labels)
    logging.info('Fmax measure: \t %f %f %f %f' % (f, p, r, t))
    logging.info('ROC AUC: \t %f ' % (roc_auc, ))
    logging.info('MCC: \t %f ' % (mcc, ))
    print('f :%.3f & p: %.3f & r: %.3f & roc_auc: %.3f & mcc: %.3f' %
          (f, p, r, roc_auc, mcc))
    write_results([f, p, r, roc_auc, mcc])
    proteins = test_df['proteins']
    predictions = list()
    for i in xrange(preds_max.shape[0]):
        predictions.append(preds_max[i])
    df = pd.DataFrame({
        'proteins': proteins,
        'predictions': predictions,
        'gos': test_df['gos'],
        'labels': test_df['labels']
    })
    print df
    df.to_pickle('test' + FUNCTION + 'preds.pkl')
Ejemplo n.º 20
0
test_list_IDs = list_IDs[int(train_test_ratio*length_data):]
test_list_xmls = list_xmls[int(train_test_ratio*length_data):]


# In[3]:


def yolo_loss_(y_true, y_pred):
    return y_pred


# In[4]:


train_generator = DataGenerator(train_list_IDs, train_list_xmls, num_class, cls2id, anchors, batch_size, image_size, max_boxes=max_boxes, is_training=True)
val_generator = DataGenerator(test_list_IDs, test_list_xmls, num_class, cls2id, anchors, batch_size, image_size, max_boxes=max_boxes, is_training=False)

model = yolov3_model(num_class, anchors, max_boxes, image_size, batch_size, is_training=True)
# model = yolov3_model(num_class, anchors, max_boxes, image_size, is_training=True)

if finetune:
    
    lr = lr*0.5
    yolov3_filepath = './models/yolov3_weights.h5'
    
    model.load_weights(yolov3_filepath, by_name=True)
    
    
# myyolo_loss = partial(yolo_loss, anchors=anchors, num_chasses=num_class, image_size=(416,416), ignore_thresh=0.5)
# myyolo_loss.__name__ = 'myyolo_loss'
def model():
    # set parameters:
    batch_size = 128
    nb_epoch = 100
    nb_classes = len(functions)
    start_time = time.time()
    logging.info("Loading Data")
    train, val, test, train_df, valid_df, test_df = load_data()
    train_df = pd.concat([train_df, valid_df])
    test_gos = test_df['gos'].values
    train_data, train_labels = train
    val_data, val_labels = val
    test_data, test_labels = test
    logging.info("Data loaded in %d sec" % (time.time() - start_time))
    logging.info("Training data size: %d" % len(train_data[0]))
    logging.info("Validation data size: %d" % len(val_data[0]))
    logging.info("Test data size: %d" % len(test_data[0]))

    pre_model_path = DATA_ROOT + 'pre_model_weights_' + FUNCTION + '.pkl'
    model_path = DATA_ROOT + 'model_weights_' + FUNCTION + '.pkl'
    last_model_path = DATA_ROOT + 'model_weights_' + FUNCTION + '.last.pkl'
    checkpointer = MyCheckpoint(filepath=model_path,
                                verbose=1,
                                save_best_only=True,
                                save_weights_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

    model = get_model()
    # logging.info('Loading pretrained weights')
    # load_model_weights(model, pre_model_path)

    logging.info('Starting training the model')

    train_generator = DataGenerator(batch_size, nb_classes)
    train_generator.fit(train_data, train_labels)
    valid_generator = DataGenerator(batch_size, nb_classes)
    valid_generator.fit(val_data, val_labels)
    test_generator = DataGenerator(batch_size, nb_classes)
    test_generator.fit(test_data, test_labels)

    # model.fit_generator(
    #     train_generator,
    #     samples_per_epoch=len(train_data[0]),
    #     nb_epoch=nb_epoch,
    #     validation_data=valid_generator,
    #     nb_val_samples=len(val_data[0]),
    #     max_q_size=batch_size,
    #     callbacks=[checkpointer, earlystopper])

    logging.info('Loading weights')
    load_model_weights(model, model_path)
    model.save(DATA_ROOT + 'model_%s.h5' % FUNCTION)
    logging.info('Predicting')
    preds = model.predict_generator(test_generator,
                                    val_samples=len(test_data[0]))
    # incon = 0
    # for i in xrange(len(test_data)):
    #     for j in xrange(len(functions)):
    #         childs = set(go[functions[j]]['children']).intersection(func_set)
    #         ok = True
    #         for n_id in childs:
    #             if preds[i, j] < preds[i, go_indexes[n_id]]:
    #                 preds[i, j] = preds[i, go_indexes[n_id]]
    #                 ok = False
    #         if not ok:
    #             incon += 1
    logging.info('Computing performance')
    f, p, r, preds_max = compute_performance(preds, test_labels, test_gos)
    # roc_auc = compute_roc(preds, test_labels)
    # logging.info('Fmax measure: \t %f %f %f' % (f, p, r))
    # logging.info('ROC AUC: \t %f ' % (roc_auc, ))
    # logging.info('Inconsistent predictions: %d' % incon)
    logging.info('Saving the predictions')
    proteins = test_df['proteins']
    predictions = list()
    for i in xrange(preds_max.shape[0]):
        predictions.append(preds_max[i])
    df = pd.DataFrame({
        'proteins': proteins,
        'predictions': predictions,
        'gos': test_df['gos'],
        'labels': test_df['labels']
    })
    df.to_pickle(DATA_ROOT + 'test-' + FUNCTION + '-predictions.pkl')
    logging.info('Done in %d sec' % (time.time() - start_time))
Ejemplo n.º 22
0
def main(args):
    contents = [
        PointToTargetContent, ChangeDetectionContent, OddOneOutContent,
        VisualSearchContent, MultipleObjectTrackingContent,
        RandomDotMotionDiscriminationContent
    ]
    content = contents[args.content - 1]()
    dg = DataGenerator(content, retina=args.retina)
    print('egocentric images: {} episode, {} length'.format(
        args.episode, args.length))
    print('allocentric images: {} scene'.format(args.scene))
    print('image shape: {} height, {} width, {} channel'.format(128, 128, 3))
    print('collecting egocentric images...')
    dg.generate_egocentric_images(episode=args.episode,
                                  length=args.length,
                                  inplace=True)
    e_path = dg.save_egocentric_images(dirname='images',
                                       prefix='egocentric_images')
    dg.reset_egocentric_images()
    print('save {}'.format(str(e_path)))
    print('collecting allocentric images...')
    dg.generate_allocentric_images(scene=args.scene, inplace=True)
    a_path = dg.save_allocentric_images(dirname='images',
                                        prefix='allocentric_images')
    dg.reset_allocentric_images()
    print('save {}'.format(str(a_path)))
Ejemplo n.º 23
0
output_dim = 185
# Display frequency (print/#batch)
display_step = 400

writer_path = "visualization"
checkpoint_path = "checkpoints"

if os.path.exists(writer_path):
    shutil.rmtree(writer_path)
os.makedirs(writer_path)
'''main part of training'''
# Place data loading and pre-processing on cpu
with tf.device('/cpu:0'):
    train_data = DataGenerator(train_list,
                               image_size[0],
                               output_dim,
                               mode='training',
                               batch_size=batch_size,
                               shuffle=True)
    val_data = DataGenerator(val_list,
                             image_size[0],
                             output_dim,
                             mode='inference',
                             batch_size=batch_size,
                             shuffle=False)

    # Create an reinitializable iterator given the data structure
    iterator = Iterator.from_structure(train_data.data.output_types,
                                       train_data.data.output_shapes)
    next_batch = iterator.get_next()

# Ops for initializing the two different iterators
if not use_loaded_model:
    print("Pre-Training")
    if pred_weight != 0:
        H = F.predict([x1[train_idx], x2[train_idx]], batch_size=batch_size)
        set_trainability(K, True)
        K.fit(H,
              leaky_features[train_idx],
              epochs=10,
              batch_size=batch_size,
              sample_weight=sample_weight[train_idx])

    data_generator = DataGenerator(x1[train_idx],
                                   x2[train_idx],
                                   y[train_idx],
                                   leaky_features[train_idx],
                                   leaky_features_adv[train_idx],
                                   sample_weight[train_idx],
                                   batch_size=batch_size,
                                   shuffle=True,
                                   data_gen_mode=data_gen_mode)
    y_loss = []
    l_loss = []
    l_adv_loss = []

    lr_adn = lr
    lr_k = lr
    val_best = -1
    num_no_improv = 0
    best_epoch = -1
    for epoch in range(num_epochs):
        y_loss_batch = []
Ejemplo n.º 25
0
    tensorboard_cb = TensorBoard(log_dir=log_dir)

    callbacks_list = [
        checkpoint_cb,
        # lr_cb,
        #earlystopping_cb,
        tensorboard_cb
    ]

    # Generate data
    image_shape = (args.image_size, ) * 3
    #FAIL: (144,144,144) #(160,160,144) #(192,192,144) #(208,208,144) #(240,240,144)
    gen_factor = 1
    train_gen = DataGenerator(train_ids,
                              src_dir,
                              n_samples=n_train * gen_factor,
                              rotation_range=0.4,
                              batch_size=args.batch_size,
                              image_shape=image_shape)
    valid_gen = DataGenerator(valid_ids,
                              src_dir,
                              n_samples=n_val * gen_factor,
                              rotation_range=0.4,
                              batch_size=args.batch_size,
                              image_shape=image_shape)
    test_gen = DataGenerator(test_ids,
                             src_dir,
                             n_samples=n_test * gen_factor,
                             rotation_range=0.4,
                             batch_size=args.batch_size,
                             image_shape=image_shape)
    train_steps = len(train_ids * gen_factor) // args.batch_size
Ejemplo n.º 26
0
def train_model(batch_size=128,
                epochs=100,
                is_train=True,
                model_path='data/model.h5'):
    # set parameters:
    start_time = time.time()
    logging.info("Loading Data")
    train, valid, test = load_data()
    train_data, train_labels = train
    valid_data, valid_labels = valid
    test_data, test_labels = test

    logging.info("Data loaded in %d sec" % (time.time() - start_time))
    logging.info("Training data size: %d" % train_data.shape[0])
    logging.info("Validation data size: %d" % valid_data.shape[0])
    logging.info("Test data size: %d" % test_data.shape[0])

    model_path = 'data/model.h5'
    checkpointer = ModelCheckpoint(filepath=model_path,
                                   verbose=1,
                                   save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

    logging.info('Starting training the model')

    train_generator = DataGenerator(batch_size)
    train_generator.fit(train_data, train_labels)
    valid_generator = DataGenerator(batch_size)
    valid_generator.fit(valid_data, valid_labels)
    test_generator = DataGenerator(batch_size)
    test_generator.fit(test_data, test_labels)

    if is_train:
        valid_steps = int(math.ceil(valid_data.shape[0] / batch_size))
        train_steps = int(math.ceil(train_data.shape[0] / batch_size))
        model = get_model()
        model.fit_generator(train_generator,
                            steps_per_epoch=train_steps,
                            epochs=epochs,
                            validation_data=valid_generator,
                            validation_steps=valid_steps,
                            max_queue_size=batch_size,
                            workers=12,
                            callbacks=[checkpointer, earlystopper])

    logging.info('Loading best model')
    model = load_model(model_path)

    logging.info('Predicting')
    test_steps = int(math.ceil(test_data.shape[0] / batch_size))
    preds = model.predict_generator(test_generator,
                                    steps=test_steps,
                                    verbose=1)

    logging.info('Computing performance')
    test_labels = test_labels.toarray()
    f, p, r, t, preds_max = compute_performance(preds, test_labels)
    roc_auc = compute_roc(preds, test_labels)
    mcc = compute_mcc(preds_max, test_labels)
    logging.info('Fmax measure: \t %f %f %f %f' % (f, p, r, t))
    logging.info('ROC AUC: \t %f ' % (roc_auc, ))
    logging.info('MCC: \t %f ' % (mcc, ))
    print('%.3f & %.3f & %.3f & %.3f & %.3f' % (f, p, r, roc_auc, mcc))
Ejemplo n.º 27
0
import bunch

from transform import PCA
from utils import DataGenerator

config = bunch.Bunch({
    'amplitude': 1.0,
    'length_scale': 10.0,
    'n_features': 500,
    'n_components': 10,
    'n_iterations': 1000,
    'learning_rate': 0.01
})

train_file = '../data/train.npy'
observed_data = DataGenerator(train_file, config.n_features)

with tf.name_scope('data'):
    X = tf.placeholder(dtype=tf.float64,
                       shape=[None, config.n_features],
                       name='features')
    y = tf.placeholder(dtype=tf.float64, shape=[None], name='targets')

with tf.name_scope('PCA'):
    pca = PCA(config.n_components)
    Xt = pca.fit_transform(X)

with tf.name_scope('hyperparameters'):
    sigma = tf.Variable(initial_value=config.amplitude,
                        name='sigma',
                        dtype=np.float64)
Ejemplo n.º 28
0
    test_historical = historical_hot(test_codes_x, len(code_map))

    visit_rnn_dims = [200]
    hyper_params = {
        'code_dims': [32, 32, 32, 32],
        'patient_dim': 16,
        'word_dim': 16,
        'patient_hidden_dims': [32],
        'code_hidden_dims': [64, 128],
        'visit_rnn_dims': visit_rnn_dims,
        'visit_attention_dim': 32,
        'note_attention_dim': visit_rnn_dims[-1]
    }

    test_codes_gen = DataGenerator(
        [test_codes_x, test_visit_lens, test_note_x, test_note_lens],
        shuffle=False)

    def lr_schedule_fn(epoch, lr):
        if epoch < 20:
            lr = 0.01
        elif epoch < 100:
            lr = 0.001
        elif epoch < 200:
            lr = 0.0001
        else:
            lr = 0.00001
        return lr

    lr_scheduler = LearningRateScheduler(lr_schedule_fn)
    test_callback = EvaluateCodesCallBack(test_codes_gen,
Ejemplo n.º 29
0
def model(batch_size=128, nb_epoch=100, is_train=True):
    # set parameters:
    nb_classes = len(functions)
    start_time = time.time()
    logging.info("Loading Data")
    train, val, test, train_df, valid_df, test_df = load_data()
    train_df = pd.concat([train_df, valid_df])
    test_gos = test_df['gos'].values
    train_data, train_labels = train
    val_data, val_labels = val
    test_data, test_labels = test
    logging.info("Data loaded in %d sec" % (time.time() - start_time))
    logging.info("Training data size: %d" % len(train_data))
    logging.info("Validation data size: %d" % len(val_data))
    logging.info("Test data size: %d" % len(test_data))

    model_path = DATA_ROOT + 'models/model_seq_' + FUNCTION + '.h5'
    checkpointer = ModelCheckpoint(filepath=model_path,
                                   verbose=1,
                                   save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

    logging.info('Starting training the model')

    train_generator = DataGenerator(batch_size, nb_classes)
    train_generator.fit(train_data, train_labels)
    valid_generator = DataGenerator(batch_size, nb_classes)
    valid_generator.fit(val_data, val_labels)
    test_generator = DataGenerator(batch_size, nb_classes)
    test_generator.fit(test_data, test_labels)

    if is_train:
        model = get_model()
        model.fit_generator(train_generator,
                            samples_per_epoch=len(train_data),
                            nb_epoch=nb_epoch,
                            validation_data=valid_generator,
                            nb_val_samples=len(val_data),
                            max_q_size=batch_size,
                            callbacks=[checkpointer, earlystopper])

    logging.info('Loading best model')
    model = load_model(model_path)

    model = model.layers[1]
    output = model.predict_generator(test_generator,
                                     val_samples=len(test_data))
    print((output.shape))
    return
    logging.info('Predicting')
    preds = model.predict_generator(test_generator, val_samples=len(test_data))
    # incon = 0
    # for i in range(len(test_data)):
    #     for j in range(len(functions)):
    #         childs = set(go[functions[j]]['children']).intersection(func_set)
    #         ok = True
    #         for n_id in childs:
    #             if preds[i, j] < preds[i, go_indexes[n_id]]:
    #                 preds[i, j] = preds[i, go_indexes[n_id]]
    #                 ok = False
    #         if not ok:
    #             incon += 1
    logging.info('Computing performance')
    f, p, r, t, preds_max = compute_performance(preds, test_labels, test_gos)
    roc_auc = compute_roc(preds, test_labels)
    mcc = compute_mcc(preds_max, test_labels)
    logging.info('Fmax measure: \t %f %f %f %f' % (f, p, r, t))
    logging.info('ROC AUC: \t %f ' % (roc_auc, ))
    logging.info('MCC: \t %f ' % (mcc, ))
    print(('%.3f & %.3f & %.3f & %.3f & %.3f' % (f, p, r, roc_auc, mcc)))
    # logging.info('Inconsistent predictions: %d' % incon)
    # logging.info('Saving the predictions')
    # proteins = test_df['proteins']
    # predictions = list()
    # for i in range(preds_max.shape[0]):
    #     predictions.append(preds_max[i])
    # df = pd.DataFrame(
    #     {
    #         'proteins': proteins, 'predictions': predictions,
    #         'gos': test_df['gos'], 'labels': test_df['labels']})
    # df.to_pickle(DATA_ROOT + 'test-' + FUNCTION + '-predictions.pkl')
    # logging.info('Done in %d sec' % (time.time() - start_time))

    function_centric_performance(functions, preds.T, test_labels.T)
Ejemplo n.º 30
0

if __name__ == "__main__":

    import numpy as np
    import torch.optim as optim
    from torch.utils.data import Dataset, DataLoader
    from utils import sampling, DataGenerator

    args = parameters.get_config()
    device = torch.device("cuda:0" if args.cuda else "cpu")
    #data = sampling(50, 100, "sin")
    #data2 = sampling(50, 100, "cos", phase=0.5*np.pi)
    #data = np.concatenate([data, data2], 0)
    data, noised_data = sampling(100, 100, "sin", noise=True)
    generator = DataGenerator(noised_data, data)
    inputs = DataLoader(generator, batch_size=args.batch_size)
    trainer = Trainer(args)
    #trainer.build_model(RNN)
    trainer.build_model(LSTM)
    #trainer.train(inputs)

    #rnn = trainer.model
    #rnn.eval()
    #output_log = []
    #for i in range(99):
    #    if i == 0:
    #        #cur_input = torch.zeros([1,2])
    #        cur_input = torch.zeros([1, 2])
    #        cur_input.data.numpy()[0, 0] = -0.75
    #        state = None