def load_dataset(self,
                     data_subset_id,
                     size=160,
                     sample='Normal',
                     res=0.5,
                     rating_scale='none',
                     configuration=None,
                     dataset_type='Clean'):
        # prepare test data
        self.images_test, self.labels_test, self.classes_test, self.masks_test, self.meta_test, _ = \
            prepare_data_direct(
                load_nodule_dataset(size=size, res=res, sample=sample, dataset_type=dataset_type, configuration=configuration)[data_subset_id],
                size=size,
                return_meta=True, objective="rating", rating_scale=rating_scale, verbose=1,
                balanced=False)

        print("Data ready: images({}), labels({})".format(
            self.images_test[0].shape, self.labels_test.shape))
        print("Range = [{:.2f},{:.2f}]".format(np.min(self.images_test[0]),
                                               np.max(self.images_test[0])))

        self.images_test = np.array([
            crop_center(im, msk, size=self.in_size)[0]
            for im, msk in zip(self.images_test, self.masks_test)
        ])

        print("Image size changed to {}".format(self.images_test.shape))
        print('Mask not updated')
    def predict_malignancy(self,
                           weights_file,
                           out_filename,
                           data_subset_id,
                           dataset_type='Clean',
                           configuration=None):

        input_shape = (self.model_size, self.model_size, 1)

        # prepare model
        model = DirectArch(miniXception_loader,
                           input_shape,
                           objective="malignancy",
                           pooling=self.pooling,
                           output_size=self.out_size,
                           normalize=True)
        if weights_file is not None:
            model.load_weights(weights_file)
            print('Load from: {}'.format(weights_file))
        else:
            print('Model without weights')

        # prepare test data
        images_test, labels_test, classes_test, masks_test, meta_test = \
            prepare_data_direct(
                load_nodule_dataset(size=self.data_size, res=self.res, sample=self.sample, dataset_type=dataset_type, configuration=configuration)[data_subset_id],
                    size=self.model_size, return_meta=True, objective="malignancy", verbose=1, balanced=False)

        print("Data ready: images({}), labels({})".format(
            images_test[0].shape, labels_test.shape))
        print("Range = [{:.2f},{:.2f}]".format(np.min(images_test[0]),
                                               np.max(images_test[0])))

        images_test = np.array([
            crop_center(im, msk, size=self.model_size)[0]
            for im, msk in zip(images_test, masks_test)
        ])

        print("Image size changed to {}".format(images_test.shape))
        print('Mask not updated')

        # eval
        print("Begin Predicting...")
        pred = model.predict(images_test, round=False)
        print("Predication Ready")
        print("\tshape = {}\n\trange [{}, {}]".format(pred.shape, np.min(pred),
                                                      np.max(pred)))

        pickle.dump((images_test, pred, meta_test, labels_test, masks_test),
                    open(out_filename, 'bw'))
        print("Saved to: {}".format(out_filename))

        return (images_test, pred, meta_test, labels_test,
                masks_test), out_filename
Exemple #3
0
 def get_flat_data(self, dataset):
     images, labels, classes, masks, meta, conf, nodule_size, rating_weights, z = \
         prepare_data(dataset, rating_format='raw', verbose=True, reshuffle=False)
     if self.model_size != self.data_size:
         if self.seq_model:
             images = format_data_as_sequence(images,
                                              embed_size=self.model_size)
         else:
             images = np.array([
                 crop_center(im, msk, size=self.model_size)[0]
                 for im, msk in zip(images, masks)
             ])
     return images, labels, classes, masks, meta, conf, nodule_size, rating_weights, z
Exemple #4
0
    def prepare_data(self, data_subset_id, dataset_type='Clean', configuration=None):

        images, labels, classes, masks, meta, conf = \
            prepare_data(load_nodule_dataset(size=self.data_size, res=self.data_res, sample=self.data_sample, dataset_type=dataset_type,
                                             configuration=configuration)[data_subset_id],
                         reshuffle=False,
                         return_meta=True,
                         verbose=1)
        self.images = np.array([crop_center(im, msk, size=self.net_in_size)[0]
                           for im, msk in zip(images, masks)])
        self.meta   = meta
        self.labels = labels
        self.masks  = masks
        print("Image size changed to {}".format(self.images.shape))
        print('Mask not updated')
Exemple #5
0
def process_data(data):
    embed, epochs, meta, images, classes, labels, masks, z = data
    embed = np.squeeze(embed[np.array(epochs) == epoch])
    print('Loaded embedding: {} for config #{}'.format(embed.shape, c))

    nodule_ids = [reduce(lambda x, y: x + y, [m[0]] + m[-1]) for m in meta]
    unique_ids, id_map = np.unique(nodule_ids, return_inverse=True)

    dataset = []
    for i, idx in enumerate(unique_ids):
        curr_roi = np.array([int(idx) for idx in np.argwhere(id_map == i)])
        new_order = np.argsort(
            np.array([z[idx] for idx in curr_roi]).flatten())
        curr_roi = curr_roi[new_order]

        roi_volume = {}
        roi_volume['embed'] = np.moveaxis(
            np.array([embed[idx] for idx in curr_roi]), 0, 2)
        roi_volume['patch'] = np.array([images[idx] for idx in curr_roi
                                        ]).swapaxes(0, -1).squeeze(axis=0)
        roi_volume['mask'] = np.array([
            crop_center(image=None, mask=masks[idx], size=input_size)[1]
            for idx in curr_roi
        ]).swapaxes(0, -1).squeeze(axis=0)

        roi_volume['rating'] = labels[curr_roi[0]]
        roi_volume['label'] = classes[curr_roi[0]]
        roi_volume['info'] = meta[curr_roi[0]]

        roi_volume['z'] = np.array([z[idx] for idx in curr_roi])
        roi_volume['size'] = np.count_nonzero(roi_volume['mask'])
        roi_volume['weights'] = None

        dataset.append(roi_volume)

    return dataset
Exemple #6
0
def run(choose_model="DIR",
        epochs=200,
        config=0,
        skip_validation=False,
        no_training=False):

    np.random.seed(1337)
    random.seed(1337)
    tf.set_random_seed(1234)
    K.set_session(tf.Session(graph=tf.get_default_graph()))

    ## --------------------------------------- ##
    ## ------- General Setup ----------------- ##
    ## --------------------------------------- ##

    #data
    dataset_type = 'Primary'
    data_size = 160
    if no_training:
        data_size = 160
    res = 0.5  # 'Legacy' #0.7 #0.5 #'0.5I'
    sample = 'Normal'  # 'UniformNC' #'Normal' #'Uniform'
    use_gen = True
    #model
    model_size = 128
    input_shape = (model_size, model_size, 1)
    normalize = True
    out_size = 128
    do_augment = True
    if no_training:
        do_augment = False
    preload_weight = None

    print("-" * 30)
    print("Running {} for --** {} **-- model, with #{} configuration".format(
        "training" if not no_training else "validation", choose_model, config))
    print(
        "\tdata_size = {},\n\tmodel_size = {},\n\tres = {},\n\tdo_augment = {}"
        .format(data_size, model_size, res, do_augment))
    print("\tdataset_type = {}".format(dataset_type))
    print("-" * 30)

    model = None

    data_augment_params = {
        'max_angle': 30,
        'flip_ratio': 0.5,
        'crop_stdev': 0.15,
        'epoch': 0
    }

    data_loader = build_loader(size=data_size,
                               res=res,
                               sample=sample,
                               dataset_type=dataset_type,
                               configuration=config)

    ## --------------------------------------- ##
    ## ------- Prepare Direct Architecture ------- ##
    ## --------------------------------------- ##

    if choose_model is "DIR":
        # run = '300'  # SPIE avg-pool (data-aug, balanced=False,class_weight=True)
        # run = '301'  # SPIE max-pool (data-aug, balanced=False,class_weight=True)
        # run = '302'  # SPIE rmac-pool (data-aug, balanced=False,class_weight=True)

        # run = 'zzz'

        model = DirectArch(miniXception_loader,
                           input_shape,
                           output_size=out_size,
                           normalize=normalize,
                           pooling='msrmac')
        model.model.summary()
        model.compile(learning_rate=1e-3, decay=0)
        if use_gen:
            generator = DataGeneratorDir(
                data_loader,
                val_factor=0 if skip_validation else 1,
                balanced=False,
                data_size=data_size,
                model_size=model_size,
                batch_size=32,
                do_augment=do_augment,
                augment=data_augment_params,
                use_class_weight=True,
                use_confidence=False)
            model.load_generator(generator)
        else:
            dataset = load_nodule_dataset(size=data_size,
                                          res=res,
                                          sample=sample)
            images_train, labels_train, class_train, masks_train, _ = prepare_data_direct(
                dataset[2], num_of_classes=2)
            images_valid, labels_valid, class_valid, masks_valid, _ = prepare_data_direct(
                dataset[1], num_of_classes=2)
            images_train = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_train, masks_train)
            ])
            images_valid = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_valid, masks_valid)
            ])
            model.load_data(images_train,
                            labels_train,
                            images_valid,
                            labels_valid,
                            batch_size=32)

    if choose_model is "DIR_RATING":

        ### CLEAN SET
        # run = '800'  # rmac conf:size
        # run = '801'  # rmac conf:none
        # run = '802'  # rmac conf:rating-std
        # run = '803'  # max conf:none

        ### PRIMARY SET
        # run = '810'  # rmac conf:size
        # run = '811'  # rmac conf:none
        # run = '812'  # rmac conf:rating-std
        run = '813'  # max conf:none

        # run = 'zzz'

        obj = 'rating'  # 'distance-matrix' 'rating' 'rating-size'

        rating_scale = 'none'
        reg_loss = None  # {'SampleCorrelation': 0.0}  # 'Dispersion', 'Std', 'FeatureCorrelation', 'SampleCorrelation'
        batch_size = 32

        epoch_pre = 20
        preload_weight = None  # FileManager.Weights('dirR', output_dir=input_dir).name(run='251c{}'.format(config), epoch=epoch_pre)

        model = DirectArch(miniXception_loader,
                           input_shape,
                           output_size=out_size,
                           objective=obj,
                           separated_prediction=False,
                           normalize=normalize,
                           pooling='max',
                           l1_regularization=None,
                           regularization_loss=reg_loss,
                           batch_size=batch_size)
        model.model.summary()

        if preload_weight is not None:
            model.load_core_weights(preload_weight)

        # scheduale 02
        should_use_scheduale = (reg_loss is not None) or (obj == 'rating_size')
        sched = [{'epoch': 00, 'weights': [0.1, 0.9]},
                 {'epoch': 20, 'weights': [0.4, 0.6]},
                 {'epoch': 40, 'weights': [0.6, 0.4]},
                 {'epoch': 60, 'weights': [0.9, 0.1]},
                 {'epoch': 80, 'weights': [1.0, 0.0]}] \
            if should_use_scheduale else []

        loss = 'logcosh' if obj is not 'distance-matrix' else pearson_correlation
        model.compile(
            learning_rate=1e-3, decay=0, loss=loss, scheduale=sched
        )  # mean_squared_logarithmic_error, binary_crossentropy, logcosh

        if use_gen:
            generator = DataGeneratorDir(
                data_loader,
                val_factor=0 if skip_validation else 1,
                data_size=data_size,
                model_size=model_size,
                batch_size=batch_size,
                objective=obj,
                rating_scale=rating_scale,
                weighted_rating=(obj == 'distance-matrix'),
                balanced=False,
                do_augment=do_augment,
                augment=data_augment_params,
                use_class_weight=False,
                use_confidence=False)
            model.load_generator(generator)
        else:
            dataset = load_nodule_dataset(size=data_size,
                                          res=res,
                                          sample=sample,
                                          dataset_type=dataset_type)
            images_train, labels_train, masks_train = prepare_data_direct(
                dataset[2], objective='rating', rating_scale=rating_scale)
            images_valid, labels_valid, masks_valid = prepare_data_direct(
                dataset[1], objective='rating', rating_scale=rating_scale)
            images_train = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_train, masks_train)
            ])
            images_valid = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_valid, masks_valid)
            ])
            model.load_data(images_train,
                            labels_train,
                            images_valid,
                            labels_valid,
                            batch_size=batch_size)

    ## --------------------------------------- ##
    ## ------- Prepare Siamese Architecture ------ ##
    ## --------------------------------------- ##

    if choose_model is "SIAM":
        # run = '300'  # l1, avg-pool (data-aug, balanced=True, class_weight=False)
        # run = '301'  # l1, max-pool (data-aug, balanced=True, class_weight=False)
        # run = '302'  # l1, rmac-pool (data-aug, balanced=True, class_weight=False)
        # run = '310'  # l2, avg-pool (data-aug, balanced=True, class_weight=False)
        # run = '311'  # l2, max-pool (data-aug, balanced=True, class_weight=False)
        # run = '312'  # l2, rmac-pool (data-aug, balanced=True, class_weight=False)
        # run = '320'  # cos, avg-pool (data-aug, balanced=True, class_weight=False)
        # run = '321'  # cos, max-pool (data-aug, balanced=True, class_weight=False)
        # run = '322b'  # cos, rmac-pool (data-aug, balanced=True, class_weight=False)

        # b/c - changed margin-loss params
        # run = '313c'  # l2, max-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False)
        # run = '314c'  # l2, rmac-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False)
        # run = '323c'  # cos, max-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False)
        # run = '324c'  # cos, rmac-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False)

        # run = 'zzz'

        batch_size = 64 if local else 128

        # model
        generator = DataGeneratorSiam(data_loader,
                                      data_size=data_size,
                                      model_size=model_size,
                                      batch_size=batch_size,
                                      val_factor=0 if skip_validation else 3,
                                      balanced=True,
                                      objective="malignancy",
                                      do_augment=do_augment,
                                      augment=data_augment_params,
                                      use_class_weight=False)

        model = SiamArch(miniXception_loader,
                         input_shape,
                         output_size=out_size,
                         batch_size=batch_size,
                         distance='l2',
                         normalize=normalize,
                         pooling='msrmac')
        model.model.summary()
        model.compile(learning_rate=1e-3, decay=0)
        if use_gen:
            model.load_generator(generator)
        else:
            imgs_trn, lbl_trn = generator.next_train().__next__()
            imgs_val, lbl_val = generator.next_val().__next__()
            model.load_data(imgs_trn, lbl_trn, imgs_val, lbl_val)

    if choose_model is "SIAM_RATING":
        ### clean set
        # run = '400'  # l2-rmac no-conf
        # run = '401'  # cosine-rmac no-conf
        # run = '402'  # l2-rmac conf
        # run = '403'  # cosine-rmac conf
        # run = '404'  # l2-max no-conf
        # run = '405'  # cosine-max no-conf

        ### primary set
        # run = '410'  # l2-rmac no-conf
        # run = '411'  # cosine-rmac no-conf
        # run = '412'  # l2-rmac conf
        # run = '413'  # cosine-rmac conf
        # run = '414'  # l2-max no-conf
        # run = '415'  # cosine-max no-conf

        # run = 'zzz'

        obj = 'rating'  # rating / size / rating_size
        batch_size = 16 if local else 64
        reg_loss = None  # {'SampleCorrating_clusters_distance_and_stdrelation': 0.1}  # 'Dispersion', 'Std', 'FeatureCorrelation', 'SampleCorrelation'

        epoch_pre = 60
        preload_weight = None  # FileManager.Weights('dirR', output_dir=input_dir).name(run='251c{}'.format(config), epoch=70)

        should_use_scheduale = (reg_loss is not None) or (obj == 'rating_size')
        '''
        sched = [{'epoch': 00, 'weights': [0.1, 0.9]},
                 {'epoch': 30, 'weights': [0.4, 0.6]},
                 {'epoch': 60, 'weights': [0.6, 0.4]},
                 {'epoch': 80, 'weights': [0.9, 0.1]},
                 {'epoch': 100, 'weights': [1.0, 0.0]}] \
            if should_use_scheduale else []
        '''
        sched = [{'epoch': 00, 'weights': [0.1, 0.9]},
                 {'epoch': 20, 'weights': [0.4, 0.6]},
                 {'epoch': 30, 'weights': [0.6, 0.4]},
                 {'epoch': 50, 'weights': [0.9, 0.1]},
                 {'epoch': 80, 'weights': [1.0, 0.0]}] \
            if should_use_scheduale else []
        # model
        generator = DataGeneratorSiam(data_loader,
                                      data_size=data_size,
                                      model_size=model_size,
                                      batch_size=batch_size,
                                      train_facotr=2,
                                      val_factor=0 if skip_validation else 3,
                                      balanced=False,
                                      objective=obj,
                                      weighted_rating=True,
                                      do_augment=do_augment,
                                      augment=data_augment_params,
                                      use_class_weight=False,
                                      use_confidence=False)

        model = SiamArch(miniXception_loader,
                         input_shape,
                         output_size=out_size,
                         objective=obj,
                         batch_size=batch_size,
                         distance='cosine',
                         normalize=normalize,
                         pooling='rmac',
                         regularization_loss=reg_loss,
                         l1_regularization=False)

        if preload_weight is not None:
            model.load_core_weights(preload_weight)
        model.model.summary()
        model.compile(learning_rate=1e-3,
                      decay=0,
                      loss='logcosh',
                      scheduale=sched)  # mean_squared_error, logcosh
        model.load_generator(generator)

    ## --------------------------------------- ##
    ## ------- Prepare Triplet Architecture ------ ##
    ## --------------------------------------- ##

    if choose_model is "TRIPLET":
        #run = 'trip011XXX'  # mrg-loss, decay(0.01), max-pool
        #run = 'trip012X'  # mrg-loss, decay(0.05), rmac-pool
        #run = 'trip013'  # cosine
        #run = 'trip014' # ortogonal initialization
        #run = 'trip015X'  # objective rating
        #run = 'trip016XXXX'  # softplus-loss
        #run = 'trip017'  # softplus-loss, no decay
        #run = 'trip018'  # binary
        #run = 'trip019'  # categorize
        #run = 'trip020X'  # rating-conf-tryout

        #run = 'trip021' # pretrained
        #run = 'trip022XXX'  # pretrained rmac
        #run = 'trip023X'  # pretrained categorize
        #run = 'trip024'  # pretrained confidence
        #run = 'trip025'  # pretrained cat,conf
        #run = 'trip026Z'  # class_weight='rating_distance', cat

        #run = 'trip027'  # obj:malig, rmac, categorize, no-decay
        #run = 'trip028'  # obj:malig, max, categorize, no-decay

        run = 'trip_100'  # obj:malig, msrmac, softplus-loss
        #run = 'trip101'  # obj:malig, msrmac, rank-loss

        objective = 'malignancy'
        use_rank_loss = False

        gen = True
        preload_weight = None  #'./Weights/w_dirR011X_50.h5'

        # model
        model = TripArch(miniXception_loader,
                         input_shape,
                         objective=objective,
                         output_size=out_size,
                         distance='l2',
                         normalize=True,
                         pooling='msrmac',
                         categorize=use_rank_loss)

        if preload_weight is not None:
            model.load_core_weights(preload_weight)
        model.model.summary()
        model.compile(learning_rate=1e-3, decay=0)  #0.05

        generator = DataGeneratorTrip(data_loader,
                                      data_size=data_size,
                                      model_size=model_size,
                                      batch_size=16,
                                      objective=objective,
                                      balanced=(objective == 'malignancy'),
                                      categorize=True,
                                      val_factor=0 if skip_validation else 3,
                                      train_factor=1,
                                      do_augment=do_augment,
                                      augment=data_augment_params,
                                      use_class_weight=False,
                                      use_confidence=False)
        if gen:
            model.load_generator(generator)
        else:
            imgs_trn, lbl_trn = generator.next_train().__next__()
            imgs_val, lbl_val = generator.next_val().__next__()
            model.load_data(imgs_trn, lbl_trn, imgs_val, lbl_val)

    ## --------------------------------------- ##
    ## -------      RUN             ------ ##
    ## --------------------------------------- ##

    print('Current Run: {}{}c{}'.format('', run, config))

    if no_training:
        model.last_epoch = epochs
        model.run = '{}{}c{}'.format('', run, config)
    else:
        model.train(run='{}{}c{}'.format('', run, config),
                    epoch=(0 if preload_weight is None else epoch_pre),
                    n_epoch=epochs,
                    gen=use_gen,
                    do_graph=False)

    return model
Exemple #7
0
def crop_dataset(dataset, size):
    for entry in dataset:
        patch, mask = crop_center(entry['patch'], entry['mask'], size=size)
        entry['patch'] = patch
        entry['mask'] = mask
    return dataset
    mapping = np.zeros(len(size_arr))
    print(steps)
    for step in steps:
        mapping = mapping + (np.array(size_arr) > step).astype('uint')
    return mapping


def scale(im):
    return (255 * (im * std_ + mean_ + 1000.0) / 1400.0).astype('int')


filename = 'LIDC/NodulePatches144-0.5-IByMalignancy.p'
M, B, U = pickle.load(open(filename, 'br'))

raw_dataset = load_nodule_raw_dataset(size=144, res=0.5, sample='Normal')[0]
dataset = [(crop_center(entry['patch'] * (1.0 + 0.0 * entry['mask']),
                        entry['mask'], 128)[0], np.mean(entry['rating'],
                                                        axis=0),
            entry['label'], entry['info'], entry['size'])
           for entry in raw_dataset]
#dataset += [(normalize(entry['patch'], mean_, std_, [-1000, 400])*(1.0+0.0*entry['mask']), np.mean(entry['rating'], axis=0), -1, entry['info'], entry['size']) for entry in U[:len(U)//5]]

images = [scale(entry[0]) for entry in dataset]
rating = [entry[1] for entry in dataset]
n_rating = [rating_normalize(entry[1], 'Norm') for entry in dataset]
malig_map = [entry[2] for entry in dataset]
meta_data = [entry[3] for entry in dataset]
size_arr = np.array([entry[-1] for entry in dataset])

print(np.min(n_rating, axis=0))
print(np.max(n_rating, axis=0))
    def embed(self, epochs, data='Valid', use_core=True, seq_model=False):

        Weights, Embed = self.config_filenames(self.net_type, use_core)

        # get data from generator
        data_loader = self.get_data_loader(data)
        images, labels, classes, masks, meta, conf, size, rating_weights, z = data_loader(resize=False)

        cropped_images = np.array([crop_center(im, msk, size=self.input_shape[0])[0]
                           for im, msk in zip(images, masks)])

        if self.net_type == 'dirS' and not use_core:
            labels = size

        start = timer()
        if self.net_type == 'dirRS' and not use_core:
            embedding = [], []
        else:
            embedding = []
        epochs_done = []

        if use_core:
            embed_model = self.extract_core(repool=False)
        else:
            embed_model = self.model

        for epch in epochs:
            # load weights
            try:
                w = None
                w = Weights(run=self.run, epoch=epch)
                assert(w is not None)
            except:
                print("Skipping. {} not found (w {})".format(epch, w))
                continue

            try:
                if use_core:
                    self.load_core_weights(w)
                else:
                    self.load_weights(w)
                # predict
                if seq_model:
                    pred = np.vstack([embed_model.predict(np.expand_dims(im, axis=0), batch_size=1) for im in cropped_images])
                else:
                    pred = embed_model.predict(cropped_images, batch_size=1)
            except:
                print("Epoch {} failed ({})".format(epch, w))
                continue

            if self.net_type == 'dirRS' and not use_core:
                embedding[0].append(np.expand_dims(pred[0], axis=0))
                embedding[1].append(np.expand_dims(pred[1], axis=0))
            else:
                embedding.append(np.expand_dims(pred, axis=0))
            epochs_done.append(epch)

        if self.net_type == 'dirRS' and not use_core:
            embedding = np.concatenate(embedding[0], axis=0), np.concatenate(embedding[1], axis=0)
        else:
            embedding = np.concatenate(embedding, axis=0)
        total_time = (timer() - start) / 60 / 60
        print("Total training time is {:.1f} hours".format(total_time))

        # dump to Embed file
        if self.net_type == 'dirRS' and not use_core:
            out_filenameR = Embed['R'](self.run, data)
            out_filenameS = Embed['S'](self.run, data)
            pickle.dump((embedding[0], epochs_done, meta, images, classes, labels, masks, conf, rating_weights, z), open(out_filenameR, 'bw'))
            pickle.dump((embedding[1], epochs_done, meta, images, classes, size,   masks, conf, rating_weights, z), open(out_filenameS, 'bw'))
            print("Saved embedding of shape {} to: {}".format(embedding[0].shape, out_filenameR))
            print("Saved embedding of shape {} to: {}".format(embedding[1].shape, out_filenameS))
        else:
            out_filename = Embed(self.run, data)
            pickle.dump((embedding, epochs_done, meta, images, classes, labels, masks, conf, rating_weights, z), open(out_filename, 'bw'))
            print("Saved embedding of shape {} to: {}".format(embedding.shape, out_filename))
def run(choose_model="DIR",
        epochs=200,
        config=0,
        skip_validation=False,
        no_training=False,
        config_name='LEGACY',
        load_data_from_predications=False):

    np.random.seed(1337)
    random.seed(1337)
    tf.set_random_seed(1234)
    K.set_session(tf.Session(graph=tf.get_default_graph()))

    ## --------------------------------------- ##
    ## ------- General Setup ----------------- ##
    ## --------------------------------------- ##

    #data
    dataset_type = 'Primary'
    data_size = 160
    if no_training:
        data_size = 160
    res = 0.5  # 'Legacy' #0.7 #0.5 #'0.5I'
    sample = 'Normal'  # 'UniformNC' #'Normal' #'Uniform'
    data_run = '813'
    data_epoch = 70
    return_predicted_ratings = not no_training
    use_gen = True
    #model
    model_size = 128
    input_shape = (model_size, model_size, 1)
    normalize = True
    out_size = 128
    do_augment = True
    if no_training:
        do_augment = False
    preload_weight = None

    print("-" * 30)
    print("Running {} for --** {} **-- model, with #{} configuration".format(
        "training" if not no_training else "validation", choose_model, config))
    if load_data_from_predications:
        print(
            "\tdata_run = {}, \n\tdata_epoch = {}, return_predicted_ratings = {}"
            .format(data_run, data_epoch, return_predicted_ratings))
    else:
        print(
            "\tdata_size = {},\n\tmodel_size = {},\n\tres = {},\n\tdo_augment = {}"
            .format(data_size, model_size, res, do_augment))
        print("\tdataset_type = {}".format(dataset_type))
    print("-" * 30)

    model = None

    data_augment_params = {
        'max_angle': 30,
        'flip_ratio': 0.5,
        'crop_stdev': 0.15,
        'epoch': 0
    }

    data_loader = build_loader(
        size=data_size,
        res=res,
        sample=sample,
        dataset_type=dataset_type,
        config_name=config_name,
        configuration=config,
        run=data_run,
        epoch=data_epoch,
        load_data_from_predictions=load_data_from_predications,
        return_predicted_ratings=return_predicted_ratings)

    ## --------------------------------------- ##
    ## ------- Prepare Direct Architecture ------- ##
    ## --------------------------------------- ##

    if choose_model is "DIR":
        # run = '300'  # SPIE avg-pool (data-aug, balanced=False,class_weight=True)
        # run = '301'  # SPIE max-pool (data-aug, balanced=False,class_weight=True)
        # run = '302'  # SPIE rmac-pool (data-aug, balanced=False,class_weight=True)

        # run = 'zzz'

        model = DirectArch(miniXception_loader,
                           input_shape,
                           output_size=out_size,
                           normalize=normalize,
                           pooling='msrmac')
        model.model.summary()
        model.compile(learning_rate=1e-3, decay=0)
        if use_gen:
            generator = DataGeneratorDir(
                data_loader,
                val_factor=0 if skip_validation else 1,
                balanced=False,
                data_size=data_size,
                model_size=model_size,
                batch_size=32,
                do_augment=do_augment,
                augment=data_augment_params,
                use_class_weight=True,
                use_confidence=False)
            model.load_generator(generator)
        else:
            dataset = load_nodule_dataset(size=data_size,
                                          res=res,
                                          sample=sample)
            images_train, labels_train, class_train, masks_train, _ = prepare_data_direct(
                dataset[2], num_of_classes=2)
            images_valid, labels_valid, class_valid, masks_valid, _ = prepare_data_direct(
                dataset[1], num_of_classes=2)
            images_train = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_train, masks_train)
            ])
            images_valid = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_valid, masks_valid)
            ])
            model.load_data(images_train,
                            labels_train,
                            images_valid,
                            labels_valid,
                            batch_size=32)

    if choose_model is "DIR_RATING":

        ### CLEAN SET
        # run = '800'  # rmac conf:size
        # run = '801'  # rmac conf:none
        # run = '802'  # rmac conf:rating-std
        # run = '803'  # max conf:none

        ### PRIMARY SET
        # run = '810'  # rmac conf:size
        # run = '811'  # rmac conf:none
        # run = '812'  # rmac conf:rating-std
        # run = '813'  # max conf:none
        # run = '814'  # max separated_prediction

        # run = '820'  # dirD, max, logcoh-loss
        # run = '821'  # dirD, max, pearson-loss
        # run = '822'  # dirD, max, KL-rank-loss
        # run = '823'  # dirD, max, poisson-rank-loss
        # run = '824'  # dirD, max, categorical-cross-entropy-loss
        # run = '825'  # dirD, max, ranked-pearson-loss
        # run = '826'  # dirD, max, KL-normalized-rank-loss
        # run = '827'  # dirD, max, KL-normalized-rank-loss (local-scaled) softmax
        # run = '828'  # dirD, max, KL-normalized-rank-loss (local-scaled) l2
        # run = '829'  # dirD, max, ranked-pearson-loss (local-scaled)

        # run = '830'  # dirD, rmac, logcoh-loss
        # run = '831'  # dirD, rmac, pearson-loss
        # run = '832'  # dirD, rmac, KL-rank-loss
        # run = '833'  # dirD, rmac, poisson-rank-loss
        # run = '834'  # dirD, rmac, categorical-cross-entropy-loss
        # run = '835'  # dirD, rmac, ranked-pearson-loss
        # run = '836'  # dirD, rmac, KL-normalized-rank-loss

        # run = '841'  # dirD, max, pearson-loss    pre:dirR813-50
        # run = '842b'  # dirD, max, KL-rank-loss    pre:dirR813-50  (b:lr-4)
        # run = '846'  # dirD, max, KL-norm-loss    pre:dirR813-50

        # run = '851'  # dirD, rmac, pearson-loss   pre:dirR813-50
        # run = '852'  # dirD, rmac, KL-rank-loss   pre:dirR813-50
        # run = '856'  # dirD, rmac, KL-norm-loss   pre:dirR813-50

        # run = '860'  # dirD, max, KL-loss    pre:dirR813-50  (b:lr-4, freeze:7)
        # run = '861'  # dirD, max, KL-loss    pre:dirR813-50  (b:lr-4, freeze:17)
        # run = '862'  # dirD, max, KL-loss    pre:dirR813-50  (b:lr-4, freeze:28)
        # run = '863'  # dirD, max, KL-loss    pre:dirR813-50  (b:lr-4, freeze:39)

        # run = '870'  # dirRD, max, KL-loss    schd: 00
        # run = '871'  # dirRD, max, KL-loss    schd: 01
        # run = '872'  # dirRD, max, KL-loss    schd: 02
        # run = '873'  # dirRD, max, KL-loss    schd: 03
        # run = '874'  # dirRD, max, KL-loss    schd: 04
        # run = '875'  # dirRD, max, KL-loss    schd: 05
        # run = '876'  # dirRD, max, KL-loss    schd: 06
        # run = '877b'  # dirRD, max, KL-loss    schd: 07b
        # run = '878'  # dirRD, max, KL-loss    schd: 08
        # run = '879'  # dirRD, max, KL-loss    schd: 09

        # run = '888'  # dirRD, max, KL-loss    schd: 08, on partial data SUP
        # run = '882'  # dirRD, max, KL-loss    schd:

        run = '898b'  # dirRD, max, KL-loss    schd: 08, on partial data UNSUP
        # run = '890b'  # dirR
        # run = '892b'  # dirRD, max, KL-loss

        # run = 'ccc'

        obj = 'rating_distance-matrix'  # 'distance-matrix' 'rating' 'rating-size'

        rating_scale = 'none'
        reg_loss = None  # {'SampleCorrelation': 0.0}  # 'Dispersion', 'Std', 'FeatureCorrelation', 'SampleCorrelation'
        batch_size = 32

        epoch_pre = 50
        preload_weight = None
        # FileManager.Weights('dirR', output_dir=input_dir).name(run='813c{}'.format(config), epoch=epoch_pre)
        # FileManager.Weights('dirR', output_dir=input_dir).name(run='251c{}'.format(config), epoch=epoch_pre)

        model = DirectArch(miniXception_loader,
                           input_shape,
                           output_size=out_size,
                           objective=obj,
                           separated_prediction=False,
                           normalize=normalize,
                           pooling='max',
                           l1_regularization=None,
                           regularization_loss=reg_loss,
                           batch_size=batch_size)

        if preload_weight is not None:
            model.load_core_weights(preload_weight, 39)
            # 7:    freeze 1 blocks
            # 17:   freeze 2 blocks
            # 28:   freeze 3 blocks
            # 39:   freeze 4 blocks

        model.model.summary()

        should_use_scheduale = (reg_loss is not None) or (obj in [
            'rating_size', 'rating_distance-matrix'
        ])

        # scheduale 00:     870
        # sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
        #         {'epoch': 40, 'weights': [0.5, 0.5]},
        #         {'epoch': 80, 'weights': [0.1, 0.9]}] \
        #    if should_use_scheduale else []

        # scheduale 01:     871
        # sched = [{'epoch': 00, 'weights': [1.0, 0.0]},
        #         {'epoch': 50, 'weights': [0.0, 1.0]}] \
        #    if should_use_scheduale else []

        # scheduale 02:     872
        # sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
        #       {'epoch': 50, 'weights': [0.1, 0.9]}] \
        #   if should_use_scheduale else []

        # scheduale 03:     873
        # sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
        #        {'epoch': 50, 'weights': [0.5, 0.5]},
        #         {'epoch': 100, 'weights': [0.1, 0.9]}] \
        #    if should_use_scheduale else []

        # scheduale 04:     874
        # sched = [{'epoch': 00, 'weights': [1.0, 0.0]},
        #        {'epoch': 50, 'weights': [0.0, 0.1]}] \
        #   if should_use_scheduale else []

        # scheduale 05:     875
        # sched = [{'epoch': 00, 'weights': [1.0, 0.0]},
        #        {'epoch': 50, 'weights': [0.0, 1.0]},
        #         {'epoch': 100, 'weights': [0.0, 0.1]}] \
        #    if should_use_scheduale else []

        # scheduale 06:     876
        # sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
        #         {'epoch': 40, 'weights': [0.5, 0.5]},
        #         {'epoch': 60, 'weights': [0.1, 0.1]},
        #         {'epoch': 80, 'weights': [0.0, 0.1]},
        #         {'epoch': 100, 'weights': [0.0, 0.05]}] \
        #    if should_use_scheduale else []

        # scheduale 07b:     877b
        # sched = [{'epoch': 00,  'weights': [1.0, 0.0]},
        #         {'epoch': 50,  'weights': [0.0, 1.0]},
        #         {'epoch': 80,  'weights': [0.0, 0.1]},
        #         {'epoch': 100, 'weights': [0.0, 0.05]}] \
        #    if should_use_scheduale else []

        # scheduale 08b:     878
        # sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
        #         {'epoch': 40, 'weights': [0.5, 0.5]},
        #         {'epoch': 80, 'weights': [0.0, 0.1]}] \
        #    if should_use_scheduale else []

        # scheduale 09:     879
        # sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
        #         {'epoch': 20, 'weights': [0.7, 0.3]},
        #         {'epoch': 40, 'weights': [0.5, 0.5]},
        #         {'epoch': 60, 'weights': [0.3, 0.3]},
        #         {'epoch': 80, 'weights': [0.0, 0.1]}] \
        #    if should_use_scheduale else []

        # scheduale      892/882
        sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
                 {'epoch': 80, 'weights': [0.5, 0.5]},
                 {'epoch': 120, 'weights': [0.0, 0.1]}] \
            if should_use_scheduale else []

        loss = dict()
        loss['predictions'] = 'logcosh'
        loss['predictions_size'] = 'logcosh'
        loss['distance_matrix'] = distance_matrix_rank_loss_adapter(
            K_losses.kullback_leibler_divergence, 'KL')
        # distance_matrix_logcosh
        # pearson_correlation
        # distance_matrix_rank_loss_adapter(K_losses.kullback_leibler_divergence, 'KL')
        # distance_matrix_rank_loss_adapter(K_losses.poisson, 'poisson')
        # distance_matrix_rank_loss_adapter(K_losses.categorical_crossentropy, 'entropy')
        model.compile(
            learning_rate=1e-3 if (preload_weight is None) else 1e-4,
            loss=loss,
            scheduale=sched
        )  # mean_squared_logarithmic_error, binary_crossentropy, logcosh

        if use_gen:
            generator = DataGeneratorDir(
                data_loader,
                val_factor=0 if skip_validation else 1,
                data_size=data_size,
                model_size=model_size,
                batch_size=batch_size,
                objective=obj,
                rating_scale=rating_scale,
                weighted_rating=('distance-matrix' in obj),
                balanced=False,
                do_augment=do_augment,
                augment=data_augment_params,
                use_class_weight=False,
                use_confidence=False)
            model.load_generator(generator)
        else:
            dataset = load_nodule_dataset(size=data_size,
                                          res=res,
                                          sample=sample,
                                          dataset_type=dataset_type)
            images_train, labels_train, masks_train = prepare_data_direct(
                dataset[2], objective='rating', rating_scale=rating_scale)
            images_valid, labels_valid, masks_valid = prepare_data_direct(
                dataset[1], objective='rating', rating_scale=rating_scale)
            images_train = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_train, masks_train)
            ])
            images_valid = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_valid, masks_valid)
            ])
            model.load_data(images_train,
                            labels_train,
                            images_valid,
                            labels_valid,
                            batch_size=batch_size)

    ## --------------------------------------- ##
    ## ------- Prepare Siamese Architecture ------ ##
    ## --------------------------------------- ##

    if choose_model is "SIAM":
        # run = '300'  # l1, avg-pool (data-aug, balanced=True, class_weight=False)
        # run = '301'  # l1, max-pool (data-aug, balanced=True, class_weight=False)
        # run = '302'  # l1, rmac-pool (data-aug, balanced=True, class_weight=False)
        # run = '310'  # l2, avg-pool (data-aug, balanced=True, class_weight=False)
        # run = '311'  # l2, max-pool (data-aug, balanced=True, class_weight=False)
        # run = '312'  # l2, rmac-pool (data-aug, balanced=True, class_weight=False)
        # run = '320'  # cos, avg-pool (data-aug, balanced=True, class_weight=False)
        # run = '321'  # cos, max-pool (data-aug, balanced=True, class_weight=False)
        # run = '322b'  # cos, rmac-pool (data-aug, balanced=True, class_weight=False)

        # b/c - changed margin-loss params
        # run = '313c'  # l2, max-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False)
        # run = '314c'  # l2, rmac-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False)
        # run = '323c'  # cos, max-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False)
        # run = '324c'  # cos, rmac-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False)

        # run = 'zzz'

        batch_size = 64 if local else 128

        # model
        generator = DataGeneratorSiam(data_loader,
                                      data_size=data_size,
                                      model_size=model_size,
                                      batch_size=batch_size,
                                      val_factor=0 if skip_validation else 3,
                                      balanced=True,
                                      objective="malignancy",
                                      do_augment=do_augment,
                                      augment=data_augment_params,
                                      use_class_weight=False)

        model = SiamArch(miniXception_loader,
                         input_shape,
                         output_size=out_size,
                         batch_size=batch_size,
                         distance='l2',
                         normalize=normalize,
                         pooling='msrmac')
        model.model.summary()
        model.compile(learning_rate=1e-3, decay=0)
        if use_gen:
            model.load_generator(generator)
        else:
            imgs_trn, lbl_trn = generator.next_train().__next__()
            imgs_val, lbl_val = generator.next_val().__next__()
            model.load_data(imgs_trn, lbl_trn, imgs_val, lbl_val)

    if choose_model is "SIAM_RATING":
        ### clean set
        # run = '400'  # l2-rmac no-conf
        # run = '401'  # cosine-rmac no-conf
        # run = '402'  # l2-rmac conf
        # run = '403'  # cosine-rmac conf
        # run = '404'  # l2-max no-conf
        # run = '405'  # cosine-max no-conf

        ### primary set
        # run = '410'  # l2-rmac no-conf
        # run = '411'  # cosine-rmac no-conf
        # run = '412'  # l2-rmac conf
        # run = '413'  # cosine-rmac conf
        # run = '414'  # l2-max no-conf
        # run = '415'  # cosine-max no-conf

        run = 'zzz'

        obj = 'rating'  # rating / size / rating_size
        batch_size = 16 if local else 64
        reg_loss = None  # {'SampleCorrating_clusters_distance_and_stdrelation': 0.1}  # 'Dispersion', 'Std', 'FeatureCorrelation', 'SampleCorrelation'

        epoch_pre = 60
        preload_weight = None  # FileManager.Weights('dirR', output_dir=input_dir).name(run='251c{}'.format(config), epoch=70)

        should_use_scheduale = (reg_loss is not None) or (obj == 'rating_size')
        '''
        sched = [{'epoch': 00, 'weights': [0.1, 0.9]},
                 {'epoch': 30, 'weights': [0.4, 0.6]},
                 {'epoch': 60, 'weights': [0.6, 0.4]},
                 {'epoch': 80, 'weights': [0.9, 0.1]},
                 {'epoch': 100, 'weights': [1.0, 0.0]}] \
            if should_use_scheduale else []
        '''
        sched = [{'epoch': 00, 'weights': [0.1, 0.9]},
                 {'epoch': 20, 'weights': [0.4, 0.6]},
                 {'epoch': 30, 'weights': [0.6, 0.4]},
                 {'epoch': 50, 'weights': [0.9, 0.1]},
                 {'epoch': 80, 'weights': [1.0, 0.0]}] \
            if should_use_scheduale else []
        # model
        generator = DataGeneratorSiam(data_loader,
                                      data_size=data_size,
                                      model_size=model_size,
                                      batch_size=batch_size,
                                      train_facotr=2,
                                      val_factor=0 if skip_validation else 3,
                                      balanced=False,
                                      objective=obj,
                                      weighted_rating=True,
                                      do_augment=do_augment,
                                      augment=data_augment_params,
                                      use_class_weight=False,
                                      use_confidence=False)

        model = SiamArch(miniXception_loader,
                         input_shape,
                         output_size=out_size,
                         objective=obj,
                         batch_size=batch_size,
                         distance='cosine',
                         normalize=normalize,
                         pooling='rmac',
                         regularization_loss=reg_loss,
                         l1_regularization=False)

        if preload_weight is not None:
            model.load_core_weights(preload_weight)
        model.model.summary()
        model.compile(learning_rate=1e-3,
                      decay=0,
                      loss='logcosh',
                      scheduale=sched)  # mean_squared_error, logcosh
        model.load_generator(generator)

    ## --------------------------------------- ##
    ## ------- Prepare Triplet Architecture ------ ##
    ## --------------------------------------- ##

    if choose_model is "TRIPLET":

        # run = '000'  # rmac softplus, b16
        # run = '001'  # rmac hinge, b16, pre:dirR813-50
        # run = '002'  # rmac hinge, b32, pre:dirR813-50
        # run = '003'  # rmac hinge, b64, pre:dirR813-50
        # run = '004'  # rmac hinge, b128, pre:dirR813-50
        # run = '005'  # rmac hinge, b64, pre:dirR813-50
        run = '006'  # rmac rank, b64, pre:dirR813-50

        # run = 'zzz'

        objective = 'rating'
        use_rank_loss = True

        batch_size = 16 if local else 64

        gen = True
        epoch_pre = 50
        preload_weight = FileManager.Weights(
            'dirR', output_dir=input_dir).name(run='813c{}'.format(config),
                                               epoch=epoch_pre)

        # model
        model = TripArch(miniXception_loader,
                         input_shape,
                         objective=objective,
                         output_size=out_size,
                         distance='l2',
                         normalize=True,
                         pooling='msrmac',
                         categorize=use_rank_loss)

        if preload_weight is not None:
            model.load_core_weights(preload_weight)
        model.model.summary()
        model.compile(learning_rate=1e-3, decay=0)

        generator = DataGeneratorTrip(data_loader,
                                      data_size=data_size,
                                      model_size=model_size,
                                      batch_size=batch_size,
                                      objective=objective,
                                      balanced=(objective == 'malignancy'),
                                      categorize=use_rank_loss,
                                      val_factor=0 if skip_validation else 1,
                                      train_factor=2,
                                      do_augment=do_augment,
                                      augment=data_augment_params,
                                      use_class_weight=False,
                                      use_confidence=False)
        if gen:
            model.load_generator(generator)
        else:
            imgs_trn, lbl_trn = generator.next_train().__next__()
            imgs_val, lbl_val = generator.next_val().__next__()
            model.load_data(imgs_trn, lbl_trn, imgs_val, lbl_val)

    ## --------------------------------------- ##
    ## -------      RUN             ------ ##
    ## --------------------------------------- ##

    cnf_id = config if config_name == 'LEGACY' else CrossValidationManager(
        config_name).get_run_id(config)
    run_name = '{}{}c{}'.format('', run, cnf_id)
    print('Current Run: {}'.format(run_name))
    if no_training:
        model.last_epoch = epochs
        model.run = run_name
    else:
        model.train(run=run_name,
                    epoch=(0 if preload_weight is None else epoch_pre),
                    n_epoch=epochs,
                    gen=use_gen,
                    do_graph=False)

    return model
    def next(self, set, is_training=False):
        verbose = 1
        epoch = 0
        while 1:
            print('Run Gen {}: {}'.format(
                epoch, np.where(is_training, 'Training', 'Validation')))
            size = self.data_size if self.do_augment else self.model_size
            #images, labels, masks, confidence = \
            images, labels, classes, masks = \
                prepare_data_direct(set, objective=self.objective, classes=2, size=self.model_size, verbose=verbose)
            #prepare_data(set, classes=2, verbose=verbose, reshuffle=True)
            Nb = np.count_nonzero(1 - classes)
            Nm = np.count_nonzero(classes)
            N = np.minimum(Nb, Nm)
            if verbose:
                print("Benign: {}, Malignant: {}".format(Nb, Nm))
            if self.balanced and is_training:
                new_order = np.random.permutation(2 * N)
                labels_ = np.argmax(classes, axis=1)
                images = self.select_balanced(images, labels_, N, new_order)
                labels = self.select_balanced(labels, labels_, N, new_order)
                classes = self.select_balanced(classes, labels_, N, new_order)
                masks = self.select_balanced(masks, labels_, N, new_order)
                if verbose:
                    Nb = np.count_nonzero(1 - np.argmax(classes, axis=1))
                    Nm = np.count_nonzero(np.argmax(classes, axis=1))
                    print("Balanced - Benign: {}, Malignant: {}".format(
                        Nb, Nm))
            if self.do_augment and is_training and (epoch >=
                                                    self.augment['epoch']):
                if epoch == self.augment['epoch']:
                    print("Begin augmenting")
                images = self.augment_all(images, masks)
            else:
                images = np.array([
                    crop_center(im, msk, size=self.model_size)[0]
                    for im, msk in zip(images, masks)
                ])
            if verbose:
                print("images after augment/crop: {}".format(images[0].shape))

            #if self.use_class_weight:
            #    class_weight = get_class_weight(confidence, method=self.class_weight_method)

            # split into batches
            split_idx = [
                b for b in range(self.batch_sz, images.shape[0], self.batch_sz)
            ]
            images = np.array_split(images, split_idx)
            labels = np.array_split(labels, split_idx)
            classes = np.array_split(classes, split_idx)
            masks = np.array_split(masks, split_idx)
            #confidence = np.array_split(confidence,  split_idx)

            if verbose == 1:
                print("batch size:{}, sets:{}".format(images[0].shape[0],
                                                      len(images[0])))

            # if last batch smaller than batch_sz, discard it
            if images[-1].shape[0] < self.batch_sz:
                images = images[:-1]
                labels = labels[:-1]
                classes = classes[:-1]
                masks = masks[:-1]
                #if self.use_class_weight:
                #    confidence = confidence[:-1]
                if verbose == 1:
                    print("discard last unfull batch -> sets:{}".format(
                        len(images)))

            if epoch == 0:
                if is_training:
                    assert (len(images) == self.trainN)
                else:
                    assert ((self.val_factor * len(images)) == self.valN)

            #for im, lbl, msk, cnf in zip(images, labels, masks, confidence):
            for im, lbl, msk in zip(images, labels, masks):
                yield (im, lbl)
                #if self.use_class_weight:
                #    assert(False)
                #    w = get_sample_weight(cnf,  wD=class_weight['D'],
                #                                wSB=class_weight['SB'],
                #                                wSM=class_weight['SM']
                #                          )
                #    if verbose == 1:
                #        print([(li, np.round(10*wi, 2).astype('uint')) for li, wi in zip(lbl, w)])
                #    verbose = 0
                #    yield (im, lbl, w)
                #else:
                #    yield (im, lbl)
            epoch = epoch + 1
            verbose = 0
Exemple #12
0
        else:
            print('Model without weights')

        pred_all = []
        labels_test_all = []
        meta_all_0 = []
        meta_all_1 = []
        for i in range(1):
            # prepare test data
            images_test, labels_test, masks_test, confidence, meta = \
                prepare_data_siamese(load_nodule_dataset(size=size, res=res, sample=sample)[DataSubSet], size=size,
                                     return_meta=True, verbose=1, balanced=True)
            print("Data ready: images({}), labels({})".format(images_test[0].shape, labels_test.shape))
            print("Range = [{:.2f},{:.2f}]".format(np.min(images_test[0]), np.max(images_test[0])))

            images_test = (np.array([crop_center(im, msk, size=in_size)[0]
                                for im, msk in zip(images_test[0], masks_test[0])]),
                      np.array([crop_center(im, msk, size=in_size)[0]
                                for im, msk in zip(images_test[1], masks_test[1])]))
            print("Image size changed to {}".format(images_test[0].shape))
            print('Mask not updated')

            # eval
            print("Begin Predicting...")
            pred = model.predict(images_test, round=False)
            print("Predication Ready")

            pred_all.append(pred)
            labels_test_all.append(labels_test)
            meta_all_0 += meta[0]
            meta_all_1 += meta[1]
wEpchs= [24]

run = wRuns[0]
epoch = wEpchs[0]

# Load Data
# =================

images, labels, masks, meta = \
                    prepare_data(load_nodule_dataset(size=inp_size, res=res, sample=sample)[DataSubSet],
                                 categorize=False,
                                 reshuffle=False,
                                 return_meta=True,
                                 verbose=1)

images = np.array([crop_center(im, msk, size=net_size)[0] for im, msk in zip(images, masks)])

# Run
# =================

siam_model = SiamArch(miniXception_loader, input_shape, distance='l2', output_size=out_size, normalize=True)
embed_model = siam_model.extract_core(weights=Weights(run=run, epoch=epoch))
embed_model.layers[1].summary()

layer_names = ['block13_sepconv2_bn'] #['block1_conv1', 'block1_conv1_bn', 'block1_conv1_act']
layers = [embed_model.layers[1].get_layer(name).output for name in layer_names]
intermediate_layer_model = Model(inputs=embed_model.layers[1].layers[0].input, outputs=layers)

intermediate_outputs = intermediate_layer_model.predict(images)
if type(intermediate_outputs) is not list:
    intermediate_outputs = [intermediate_outputs]
Exemple #14
0
                                     batch_sz=32,
                                     val_factor=1,
                                     balanced=False,
                                     do_augment=False,
                                     augment=data_augment_params,
                                     use_class_weight=True,
                                     class_weight='balanced')
        model.load_generator(generator)
    else:
        dataset = load_nodule_dataset(size=data_size, res=res, sample=sample)
        images_train, labels_train, masks_train = prepare_data_direct(
            dataset[2], classes=2, size=model_size)
        images_valid, labels_valid, masks_valid = prepare_data_direct(
            dataset[1], classes=2, size=model_size)
        images_train = np.array([
            crop_center(im, msk, size=model_size)[0]
            for im, msk in zip(images_train, masks_train)
        ])
        images_valid = np.array([
            crop_center(im, msk, size=model_size)[0]
            for im, msk in zip(images_valid, masks_valid)
        ])
        model.load_data(images_train,
                        labels_train,
                        images_valid,
                        labels_valid,
                        batch_sz=32)

    model.train(label=run, n_epoch=epochs, gen=use_gen)

if choose_model is "DIR_RATING":
Exemple #15
0
def run(choose_model="DIR",
        epochs=200,
        config=0,
        skip_validation=False,
        no_training=False):

    np.random.seed(1337)
    random.seed(1337)
    tf.set_random_seed(1234)
    K.set_session(tf.Session(graph=tf.get_default_graph()))

    ## --------------------------------------- ##
    ## ------- General Setup ----------------- ##
    ## --------------------------------------- ##

    net_type = 'flat'  # 'flat', 'rmac'

    #data
    dataset_type = '3d'
    res = 0.5  # 'Legacy' #0.7 #0.5 #'0.5I'
    sample = 'Normal'  # 'UniformNC' #'Normal' #'Uniform'
    use_gen = True
    data_size = 160

    data_loader = build_loader_3d(configuration=config,
                                  net_type='dirR',
                                  run='251',
                                  epoch=60)

    # model
    out_size = 128
    if net_type == 'flat':
        model_size = 8 * 8 * 128
    elif net_type == 'rmac':
        model_size = 128
    else:
        assert False
    input_shape = (None, model_size)
    do_augment = False
    normalize = True

    print("-" * 30)
    print("Running Sequence {} for --** {} **-- model, with #{} configuration".
          format("training" if not no_training else "validation", choose_model,
                 config))
    print(
        "\tdata_size = {},\n\tmodel_size = {},\n\tres = {},\n\tdo_augment = {}"
        .format(data_size, model_size, res, do_augment))
    print("-" * 30)

    model = None

    ## --------------------------------------- ##
    ## ------- Prepare Direct Architecture ------- ##
    ## --------------------------------------- ##

    if choose_model is "DIR_RATING":

        #run = '0004'
        run = '0005'  # new dataset (Train-Valid-Test)
        obj = 'rating'  # 'distance-matrix' 'rating' 'rating-size'

        rating_scale = 'none'
        reg_loss = None  # {'SampleCorrelation': 0.0}  # 'Dispersion', 'Std', 'FeatureCorrelation', 'SampleCorrelation'
        batch_size = 16

        epoch_pre = 20
        preload_weight = None  # FileManager.Weights('dirR', output_dir=input_dir).name(run='251c{}'.format(config), epoch=epoch_pre)

        model = DirectArch(gru3d_loader,
                           input_shape,
                           output_size=out_size,
                           objective=obj,
                           separated_prediction=False,
                           normalize=normalize,
                           pooling='msrmac',
                           l1_regularization=None,
                           regularization_loss=reg_loss,
                           batch_size=batch_size)
        model.model.summary()

        if preload_weight is not None:
            model.load_core_weights(preload_weight)

        # scheduale 02
        should_use_scheduale = (reg_loss is not None) or (obj == 'rating_size')
        sched = [{'epoch': 00, 'weights': [0.1, 0.9]},
                 {'epoch': 20, 'weights': [0.4, 0.6]},
                 {'epoch': 40, 'weights': [0.6, 0.4]},
                 {'epoch': 60, 'weights': [0.9, 0.1]},
                 {'epoch': 80, 'weights': [1.0, 0.0]}] \
            if should_use_scheduale else []

        loss = 'logcosh' if obj is not 'distance-matrix' else pearson_correlation
        model.compile(
            learning_rate=1e-3,
            decay=0,
            loss=loss,
            scheduale=sched,
            temporal_weights=False
        )  # mean_squared_logarithmic_error, binary_crossentropy, logcosh

        if use_gen:
            generator = DataGeneratorDir(
                data_loader,
                val_factor=0 if skip_validation else 1,
                data_size=data_size,
                model_size=model_size,
                batch_size=batch_size,
                objective=obj,
                rating_scale=rating_scale,
                weighted_rating=False,
                seq_model=True,
                balanced=False,
                do_augment=do_augment,
                use_class_weight=False,
                use_confidence=False)
            model.load_generator(generator)
        else:
            dataset = load_nodule_dataset(size=data_size,
                                          res=res,
                                          sample=sample,
                                          dataset_type=dataset_type)
            images_train, labels_train, masks_train = prepare_data_direct(
                dataset[2], objective='rating', rating_scale=rating_scale)
            images_valid, labels_valid, masks_valid = prepare_data_direct(
                dataset[1], objective='rating', rating_scale=rating_scale)
            images_train = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_train, masks_train)
            ])
            images_valid = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_valid, masks_valid)
            ])
            model.load_data(images_train,
                            labels_train,
                            images_valid,
                            labels_valid,
                            batch_size=batch_size)

    ## --------------------------------------- ##
    ## ------- Prepare Siamese Architecture ------ ##
    ## --------------------------------------- ##

    if choose_model is "SIAM_RATING":
        #run = 'siamR001'  # mse-loss, rating-scaled
        #run = 'siamR002'  # mse-loss, rating-scaled, repeated-epochs
        #run = 'siamR003'  # mse-loss, 0.25*rating-scaled, repeated-epochs(3)
        #run = 'siamR004X'  # mse-loss, 0.25*rating-scaled, repeated-epochs(5)
        #run = 'siamR005'  # mse-loss, 0.25*rating-scaled, repeated-epochs(1)
        #run = 'siamR006XX'  # rmac, mse-loss, 0.25*rating-scaled, repeated-epochs(1)
        #run = 'siamR007'  # rmac, logcosh-loss, 0.25*rating-scaled, repeated-epochs(1)
        #run = 'siamR008X'  # data-aug
        #run = 'siamR009'  # cosine
        #run = '100c'  # l2, max-pooling, train_factor=1
        #run = '101b'  # l2, max-pooling, train_factor=2
        #run = '102'  # l2, max-pooling, train_factor=3
        #run = '103'  # l2, max-pooling, train_factor=1, mse
        #run = '110'  # l2, max-pooling, train_factor=3
        #run = '112'  # l2, msrmac-pooling, train_factor=3
        #run = '122'  # l2, msrmac-pooling, train_factor=2, data-aug
        #run = '132'  # l2, msrmac-pooling, train_factor=2, data-aug, primary
        #run = '142'  # l2, msrmac-pooling, train_factor=2, out=64
        #run = '152'  # l2, msrmac-pooling, train_factor=2, out=32
        #run = '162'  # l2, msrmac-pooling, train_factor=2, out=8
        #run = '172'  # l2, msrmac-pooling, train_factor=2, out=256
        #run = '135'  # l2, msrmac-pooling, train_factor=2, data-aug, primary
        #run = '180'  # baseline, b64
        #run = '181'  # baseline, FeatCorr.1
        #run = '182'  # baseline, SampCorr.1

        run = '200'  # pretrain with dirR251-70

        #run = '300'   # obj: size
        #run = '311'  # obj: rating-size

        #run = 'zzz'  #

        dataset_type = 'Primary'
        obj = 'rating'  # rating / size / rating_size
        batch_size = 16 if local else 64
        reg_loss = None  # {'SampleCorrelation': 0.1}  # 'Dispersion', 'Std', 'FeatureCorrelation', 'SampleCorrelation'

        epoch_pre = 60
        preload_weight = None  # FileManager.Weights('dirR', output_dir=input_dir).name(run='251c{}'.format(config), epoch=70)

        should_use_scheduale = (reg_loss is not None) or (obj == 'rating_size')
        '''
        sched = [{'epoch': 00, 'weights': [0.1, 0.9]},
                 {'epoch': 30, 'weights': [0.4, 0.6]},
                 {'epoch': 60, 'weights': [0.6, 0.4]},
                 {'epoch': 80, 'weights': [0.9, 0.1]},
                 {'epoch': 100, 'weights': [1.0, 0.0]}] \
            if should_use_scheduale else []
        '''
        sched = [{'epoch': 00, 'weights': [0.1, 0.9]},
                 {'epoch': 20, 'weights': [0.4, 0.6]},
                 {'epoch': 30, 'weights': [0.6, 0.4]},
                 {'epoch': 50, 'weights': [0.9, 0.1]},
                 {'epoch': 80, 'weights': [1.0, 0.0]}] \
            if should_use_scheduale else []
        # model
        generator = DataGeneratorSiam(data_loader,
                                      data_size=data_size,
                                      model_size=model_size,
                                      batch_size=batch_size,
                                      train_facotr=2,
                                      val_factor=0 if skip_validation else 3,
                                      balanced=False,
                                      objective=obj,
                                      do_augment=do_augment,
                                      augment=data_augment_params,
                                      use_class_weight=False,
                                      use_confidence=False)

        model = SiamArch(miniXception_loader,
                         input_shape,
                         output_size=out_size,
                         objective=obj,
                         batch_size=batch_size,
                         distance='l2',
                         normalize=normalize,
                         pooling='msrmac',
                         regularization_loss=reg_loss,
                         l1_regularization=False)

        if preload_weight is not None:
            model.load_core_weights(preload_weight)
        model.model.summary()
        model.compile(learning_rate=1e-3,
                      decay=0,
                      loss='logcosh',
                      scheduale=sched)  # mean_squared_error, logcosh
        model.load_generator(generator)

    ## --------------------------------------- ##
    ## ------- Prepare Triplet Architecture ------ ##
    ## --------------------------------------- ##

    if choose_model is "TRIPLET":
        #run = 'trip011XXX'  # mrg-loss, decay(0.01), max-pool
        #run = 'trip012X'  # mrg-loss, decay(0.05), rmac-pool
        #run = 'trip013'  # cosine
        #run = 'trip014' # ortogonal initialization
        #run = 'trip015X'  # objective rating
        #run = 'trip016XXXX'  # softplus-loss
        #run = 'trip017'  # softplus-loss, no decay
        #run = 'trip018'  # binary
        #run = 'trip019'  # categorize
        #run = 'trip020X'  # rating-conf-tryout

        #run = 'trip021' # pretrained
        #run = 'trip022XXX'  # pretrained rmac
        #run = 'trip023X'  # pretrained categorize
        #run = 'trip024'  # pretrained confidence
        #run = 'trip025'  # pretrained cat,conf
        #run = 'trip026Z'  # class_weight='rating_distance', cat

        #run = 'trip027'  # obj:malig, rmac, categorize, no-decay
        #run = 'trip028'  # obj:malig, max, categorize, no-decay

        run = 'trip_100'  # obj:malig, msrmac, softplus-loss
        #run = 'trip101'  # obj:malig, msrmac, rank-loss

        dataset_type = 'Primary'
        objective = 'malignancy'
        use_rank_loss = False

        gen = True
        preload_weight = None  #'./Weights/w_dirR011X_50.h5'

        # model
        model = TripArch(miniXception_loader,
                         input_shape,
                         objective=objective,
                         output_size=out_size,
                         distance='l2',
                         normalize=True,
                         pooling='msrmac',
                         categorize=use_rank_loss)

        if preload_weight is not None:
            model.load_core_weights(preload_weight)
        model.model.summary()
        model.compile(learning_rate=1e-3, decay=0)  #0.05

        generator = DataGeneratorTrip(data_loader,
                                      data_size=data_size,
                                      model_size=model_size,
                                      batch_size=16,
                                      objective=objective,
                                      balanced=(objective == 'malignancy'),
                                      categorize=True,
                                      val_factor=0 if skip_validation else 3,
                                      train_factor=1,
                                      do_augment=do_augment,
                                      augment=data_augment_params,
                                      use_class_weight=False,
                                      use_confidence=False)
        if gen:
            model.load_generator(generator)
        else:
            imgs_trn, lbl_trn = generator.next_train().__next__()
            imgs_val, lbl_val = generator.next_val().__next__()
            model.load_data(imgs_trn, lbl_trn, imgs_val, lbl_val)

    ## --------------------------------------- ##
    ## -------      RUN             ------ ##
    ## --------------------------------------- ##

    if no_training:
        model.last_epoch = epochs
        model.run = '{}{}c{}'.format('', run, config)
    else:
        model.train(run='{}{}c{}'.format('', run, config),
                    epoch=(0 if preload_weight is None else epoch_pre),
                    n_epoch=epochs,
                    gen=use_gen,
                    do_graph=False)

    return model
                    open(filename, 'br'))
            except:
                from Network.data import load_nodule_dataset, load_nodule_raw_dataset, prepare_data
                from Network.model import miniXception_loader
                from Network.siameseArch import siamArch
                from Network.directArch import directArch

                # prepare test data
                images, labels, masks, meta = \
                    prepare_data(load_nodule_dataset(size=size, res=res, sample=sample)[DataSubSet],
                                 categorize=False,
                                 reshuffle=False,
                                 return_meta=True,
                                 verbose=1)
                images = np.array([
                    crop_center(im, msk, size=in_size)[0]
                    for im, msk in zip(images, masks)
                ])
                print("Image size changed to {}".format(images.shape))
                print('Mask not updated')
                if network == 'dir':
                    model = directArch(miniXception_loader,
                                       input_shape,
                                       objective="malignancy",
                                       output_size=out_size,
                                       normalize=normalize,
                                       pooling='max')
                elif network == 'siam':
                    model = siamArch(miniXception_loader,
                                     input_shape,
                                     2,
Exemple #17
0
    def next(self, set, is_training=False):
        verbose = 1
        epoch = 0
        while 1:
            print('Run Gen: {}'.format(np.where(is_training, 'Training', 'Validation')))
            size = self.data_size if self.do_augment else self.model_size
            images, labels, masks, confidence = \
                prepare_data_siamese(set, size=size, balanced=(self.balanced and is_training),
                                     objective=self.objective, verbose=verbose)
            if self.do_augment and is_training and (epoch >= self.augment['epoch']):
                    if epoch == self.augment['epoch']:
                        print("Begin augmenting")
                    images = (self.augment_all(images[0], masks[0]), self.augment_all(images[1], masks[1]))
            else:
                images = (np.array([crop_center(im, msk, size=self.model_size)[0]
                                    for im, msk in zip(images[0], masks[0])]),
                          np.array([crop_center(im, msk, size=self.model_size)[0]
                                    for im, msk in zip(images[1], masks[1])]))

            if verbose:
                print("images after augment/crop: {}".format(images[0].shape))

            if self.use_class_weight:
                class_weight = get_class_weight(confidence, method=self.class_weight_method)

            # split into batches
            split_idx = [b for b in range(self.batch_sz, images[0].shape[0], self.batch_sz)]
            images = (  np.array_split(images[0], split_idx),
                        np.array_split(images[1], split_idx) )
            labels = np.array_split(labels,  split_idx)
            masks  = (  np.array_split(masks[0], split_idx),
                        np.array_split(masks[1], split_idx) )
            confidence = np.array_split(confidence,  split_idx)

            if verbose == 1: print("batch size:{}, sets:{}".format(images[0][0].shape[0], len(images[0])))

            # if last batch smaller than batch_sz, discard it
            if images[0][-1].shape[0] < self.batch_sz:
                images = (images[0][:-1], images[1][:-1])
                labels = labels[:-1]
                masks  = (masks[0][:-1], masks[1][:-1])
                if self.use_class_weight:
                    confidence = confidence[:-1]
                if verbose == 1:
                    print("discard last unfull batch -> sets:{}".format(len(images[0])))

            if is_training:
                assert(len(images[0]) == self.trainN)
            else:
                assert( (self.val_factor*len(images[0])) == self.valN)

            for im0, im1, lbl, msk0, msk1, cnf in zip(images[0], images[1], labels, masks[0], masks[1], confidence):
                if self.use_class_weight:
                    w = get_sample_weight(cnf,  wD=class_weight['D'],
                                                wSB=class_weight['SB'],
                                                wSM=class_weight['SM']
                                          )
                    if verbose == 1:
                        print([(li, np.round(10*wi, 2).astype('uint')) for li, wi in zip(lbl, w)])
                    verbose = 0
                    yield ([im0, im1], lbl, w)
                else:
                    yield ([im0, im1], lbl)
            epoch = epoch +1
            verbose = 0