def train_top_model():
    # Load the bottleneck features and labels
    train_features = np.load(
        open(output_dir + 'bottleneck_features_train.npy', 'rb'))
    train_labels = np.load(
        open(output_dir + 'bottleneck_labels_train.npy', 'rb'))
    validation_features = np.load(
        open(output_dir + 'bottleneck_features_validation.npy', 'rb'))
    validation_labels = np.load(
        open(output_dir + 'bottleneck_labels_validation.npy', 'rb'))

    # Create the top model for the inception V3 network, a single Dense layer
    # with softmax activation.
    top_input = Input(shape=train_features.shape[1:])
    top_output = Dense(5, activation='softmax')(top_input)
    model = Model(top_input, top_output)

    # Train the model using the bottleneck features and save the weights.
    model.compile(optimizer=SGD(lr=1e-4, momentum=0.9),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    csv_logger = CSVLogger(output_dir + 'top_model_training.csv')
    model.fit(train_features,
              train_labels,
              epochs=top_epochs,
              batch_size=batch_size,
              validation_data=(validation_features, validation_labels),
              callbacks=[csv_logger])
    model.save_weights(top_model_weights_path)
class SiameseModel:
    def __init__(self, use_cudnn_lstm=True, plot_model_architecture=False):
        n_hidden = 50
        input_dim = 300

        # unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force  bias_initializer="zeros". This is recommended in Jozefowicz et al.
        # he_normal: Gaussian initialization scaled by fan_in (He et al., 2014)
        if use_cudnn_lstm:
            # Use CuDNNLSTM instead of LSTM, because it is faster
            lstm = layers.CuDNNLSTM(n_hidden,
                                    unit_forget_bias=True,
                                    kernel_initializer='he_normal',
                                    kernel_regularizer='l2',
                                    name='lstm_layer')
        else:
            lstm = layers.LSTM(n_hidden,
                               unit_forget_bias=True,
                               kernel_initializer='he_normal',
                               kernel_regularizer='l2',
                               name='lstm_layer')

        # Building the left branch of the model: inputs are variable-length sequences of vectors of size 128.
        left_input = Input(shape=(None, input_dim), name='input_1')
        #        left_masked_input = layers.Masking(mask_value=0)(left_input)
        left_output = lstm(left_input)

        # Building the right branch of the model: when you call an existing layer instance, you reuse its weights.
        right_input = Input(shape=(None, input_dim), name='input_2')
        #        right_masked_input = layers.Masking(mask_value=0)(right_input)
        right_output = lstm(right_input)

        # Builds the classifier on top
        l1_norm = lambda x: 1 - K.abs(x[0] - x[1])
        merged = layers.Lambda(function=l1_norm,
                               output_shape=lambda x: x[0],
                               name='L1_distance')([left_output, right_output])
        predictions = layers.Dense(1,
                                   activation='tanh',
                                   name='Similarity_layer')(merged)  #sigmoid

        # Instantiating and training the model: when you train such a model, the weights of the LSTM layer are updated based on both inputs.
        self.model = Model([left_input, right_input], predictions)

        self.__compile()
        print(self.model.summary())

        if plot_model_architecture:
            from tensorflow.python.keras.utils import plot_model
            plot_model(self.model, to_file='siamese_architecture.png')

    def __compile(self):
        optimizer = Adadelta(
        )  # gradient clipping is not there in Adadelta implementation in keras
        #        optimizer = 'adam'
        self.model.compile(loss='mse',
                           optimizer=optimizer,
                           metrics=[pearson_correlation])

    def fit(self,
            left_data,
            right_data,
            targets,
            validation_data,
            epochs=5,
            batch_size=128):
        # The paper employ early stopping based on a validation, but they didn't mention parameters.
        early_stopping_monitor = EarlyStopping(
            monitor='val_pearson_correlation', mode='max', patience=20)
        #        callbacks = [early_stopping_monitor]
        callbacks = []
        history = self.model.fit(
            [left_data, right_data],
            targets,
            validation_data=validation_data,
            epochs=epochs,
            batch_size=batch_size  #)
            ,
            callbacks=callbacks)

        self.visualize_metric(history.history, 'loss')
        self.visualize_metric(history.history, 'pearson_correlation')
        self.load_activation_model()

    def visualize_metric(self, history_dic, metric_name):
        plt.plot(history_dic[metric_name])
        legend = ['train']
        if 'val_' + metric_name in history_dic:
            plt.plot(history_dic['val_' + metric_name])
            legend.append('test')
        plt.title('model ' + metric_name)
        plt.ylabel(metric_name)
        plt.xlabel('epoch')
        plt.legend(legend, loc='upper left')
        plt.show()

    def predict(self, left_data, right_data):
        return self.model.predict([left_data, right_data])

    def evaluate(self, left_data, right_data, targets, batch_size=128):
        return self.model.evaluate([left_data, right_data],
                                   targets,
                                   batch_size=batch_size)

    def load_activation_model(self):
        self.activation_model = Model(
            inputs=self.model.input[0],
            outputs=self.model.get_layer('lstm_layer').output)

    def visualize_activation(self, data):
        activations = self.activation_model.predict(data)
        plt.figure(figsize=(10, 100), dpi=80)
        plt.imshow(activations, cmap='Blues')
        plt.grid()
        plt.xticks(ticks=range(0, 50))
        plt.yticks(ticks=range(0, data.shape[0]))
        plt.show()

    def visualize_specific_activation(self, data, dimension_idx):
        activations = self.activation_model.predict(data)
        if dimension_idx >= activations.shape[1]:
            raise ValueError('dimension_idx must be less than %d' %
                             activations.shape[1])
        fig = plt.figure(figsize=(10, 1), dpi=80)
        ax = fig.add_subplot(111)
        plt.title('dimension_idx = %d' % dimension_idx)
        weights = activations[:, dimension_idx]
        plt.yticks(ticks=[0, 1])
        plt.plot(weights, np.zeros_like(weights), 'o')
        for i, txt in enumerate(weights):
            ax.annotate((i + 1), (weights[i], 0))
        plt.show()

    def save(self, model_folder='./model/'):
        # serialize model to JSON
        model_json = self.model.to_json()
        with open(model_folder + 'model.json', 'w') as json_file:
            json_file.write(model_json)
        # serialize weights to HDF5
        self.model.save_weights(model_folder + 'model.h5')
        print('Saved model to disk')

    def save_pretrained_weights(
            self, model_wieghts_path='./model/pretrained_weights.h5'):
        self.model.save_weights(model_wieghts_path)
        print('Saved pretrained weights to disk')

    def load(self, model_folder='./model/'):
        # load json and create model
        json_file = open(model_folder + 'model.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        # load weights into new model
        loaded_model.load_weights(model_folder + 'model.h5')
        print('Loaded model from disk')

        self.model = loaded_model
        # loaded model should be compiled
        self.__compile()
        self.load_activation_model()

    def load_pretrained_weights(
            self, model_wieghts_path='./model/pretrained_weights.h5'):
        # load weights into new model
        self.model.load_weights(model_wieghts_path)
        print('Loaded pretrained weights from disk')
        self.__compile()
Exemple #3
0
class BaseModel(object):
    """Base Model Interface

    Methods
    ----------
    fit(train_data, valid_data, epohcs, batch_size, **kwargs)
    predict(X)
    evaluate(X, y)

    Examples
    ----------
    >>> model = Model("example", inference, "model.h5")
    >>> model.fit([X_train, y_train], [X_val, y_val])
    """
    def __init__(self, name, fn, model_path):
        """Constructor for BaseModel

        Parameters
        ----------
        name : str
            Name of this model

        fn : function
            Inference function, y = fn(X)

        model_path : str
            Path to a model.h5
        """
        X = Input(shape=[28, 28, 1])
        y = fn(X)

        self.model = Model(X, y, name=name)
        self.model.compile("adam", "categorical_crossentropy", ["accuracy"])
        self.model.summary()

        self.path = model_path
        self.name = name
        ##self.load()

    def fit(self, train_data, valid_data, epochs=10, batchsize=128, **kwargs):
        """Training function

        Evaluate at each epoch against validation data
        Save the best model according to the validation loss

        Parameters
        ----------
        train_data : tuple, (X_train, y_train)
            X_train.shape == (N, H, W, C)
            y_train.shape == (N, N_classes)

        valid_data : tuple
            (X_val, y_val)

        epochs : int
            Number of epochs to train

        batchsize : int
            Minibatch size

        **kwargs
            Keywords arguments for `fit_generator`
        """
        callback_best_only = ModelCheckpoint(self.path, save_best_only=True)
        train_gen, val_gen = train_generator()

        X_train, y_train = train_data
        X_val, y_val = valid_data

        N = X_train.shape[0]
        print("[DEBUG] N -> {}", X_train.shape)
        N_val = X_val.shape[0]

        self.model.fit_generator(train_gen.flow(X_train, y_train, batchsize),
                                 steps_per_epoch=N / batchsize,
                                 validation_data=val_gen.flow(
                                     X_val, y_val, batchsize),
                                 validation_steps=N_val / batchsize,
                                 epochs=epochs,
                                 callbacks=[callback_best_only],
                                 **kwargs)

    def save(self):
        """Save weights

        Should not be used manually
        """
        self.model.save_weights(self.path)

    def freeze(self, export_dir):
        """ Save Freeze Model 
        """
        tf.saved_model.simple_save(
            K.get_session(),
            os.path.join(export_dir, str(int(time.time()))),
            inputs={'inputs': self.model.input},
            outputs={t.name: t
                     for t in self.model.outputs})

    def load(self):
        """Load weights from self.path """
        if os.path.isfile(self.path):
            self.model.load_weights(self.path)
            print("Model loaded")
        else:
            print("No model is found")

    def predict(self, X):
        """Return probabilities for each classes

        Parameters
        ----------
        X : array-like (N, H, W, C)

        Returns
        ----------
        y : array-like (N, N_classes)
            Probability array
        """
        return self.model.predict(X)

    def evaluate(self, X, y):
        """Return an accuracy

        Parameters
        ----------
        X : array-like (N, H, W, C)
        y : array-like (N, N_classes)

        Returns
        ----------
        acc : float
            Accuracy
        """
        return self.model.evaluate(X, y)
Exemple #4
0
def main(cvset=0,
         n_features=5000,
         batch_size=1000,
         p_drop=0.5,
         latent_dim=2,
         n_epoch=5000,
         run_iter=0,
         exp_name='nagent',
         model_id='nagent_model'):
    train_dict, val_dict, full_dict, dir_pth = dataIO(cvset=0,
                                                      n_features=n_features,
                                                      exp_name=exp_name,
                                                      train_size=25000)

    #Architecture parameters ------------------------------
    input_dim = train_dict['X'].shape[1]
    print(input_dim)
    fc_dim = 50

    fileid = model_id + \
        '_cv_' + str(cvset) + \
        '_ng_' + str(n_features) + \
        '_pd_' + str(p_drop) + \
        '_bs_' + str(batch_size) + \
        '_ld_' + str(latent_dim) + \
        '_ne_' + str(n_epoch) + \
        '_ri_' + str(run_iter)
    fileid = fileid.replace('.', '-')
    print(fileid)

    n_agents = 1
    #Model definition -----------------------------------------------
    M = {}
    M['in_ae'] = Input(shape=(input_dim, ), name='in_ae')
    M['mask_ae'] = Input(shape=(input_dim, ), name='mask_ae')
    for i in range(n_agents):

        M['dr_ae_' + str(i)] = Dropout(p_drop,
                                       name='dr_ae_' + str(i))(M['in_ae'])
        M['fc01_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc01_ae_' + str(i))(M['dr_ae_' +
                                                                   str(i)])
        M['fc02_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc02_ae_' + str(i))(M['fc01_ae_' +
                                                                   str(i)])
        M['fc03_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc03_ae_' + str(i))(M['fc02_ae_' +
                                                                   str(i)])
        M['fc04_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc04_ae_' + str(i))(M['fc03_ae_' +
                                                                   str(i)])
        M['fc05_ae_' + str(i)] = Dense(latent_dim,
                                       activation='linear',
                                       name='fc05_ae_' + str(i))(M['fc04_ae_' +
                                                                   str(i)])
        M['ld_ae_' + str(i)] = BatchNormalization(scale=False,
                                                  center=False,
                                                  epsilon=1e-10,
                                                  momentum=0.,
                                                  name='ld_ae_' + str(i))(
                                                      M['fc05_ae_' + str(i)])

        M['fc06_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc06_ae_' + str(i))(M['ld_ae_' +
                                                                   str(i)])
        M['fc07_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc07_ae_' + str(i))(M['fc06_ae_' +
                                                                   str(i)])
        M['fc08_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc08_ae_c' + str(i))(
                                           M['fc07_ae_' + str(i)])
        M['fc09_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc09_ae_' + str(i))(M['fc08_ae_' +
                                                                   str(i)])
        M['ou_ae_' + str(i)] = Dense(input_dim,
                                     activation='linear',
                                     name='ou_ae_' + str(i))(M['fc09_ae_' +
                                                               str(i)])

    AE = Model(inputs=[M['in_ae'], M['mask_ae']],
               outputs=[M['ou_ae_' + str(i)] for i in range(n_agents)])

    def masked_mse(X, Y, mask):
        loss_val = tf.reduce_mean(
            tf.multiply(tf.math.squared_difference(X, Y), mask))

        def masked_loss(y_true, y_pred):
            return loss_val

        return masked_loss

    #Create loss dictionary
    loss_dict = {
        'ou_ae_' + str(i): masked_mse(M['in_ae'], M['ou_ae_0'], M['mask_ae'])
        for i in range(n_agents)
    }

    #Loss weights dictionary
    loss_wt_dict = {'ou_ae_' + str(i): 1.0 for i in range(n_agents)}

    #Add loss definitions to the model
    AE.compile(optimizer='adam', loss=loss_dict, loss_weights=loss_wt_dict)

    #Custom logging
    cb_obj = CSVLogger(filename=dir_pth['logs'] + fileid + '.csv')

    train_input_dict = {
        'in_ae': train_dict['X'],
        'mask_ae': train_dict['mask']
    }
    train_output_dict = {
        'ou_ae_' + str(i): train_dict['X']
        for i in range(n_agents)
    }

    val_input_dict = {'in_ae': val_dict['X'], 'mask_ae': val_dict['mask']}
    val_output_dict = {
        'ou_ae_' + str(i): val_dict['X']
        for i in range(n_agents)
    }

    #Model training
    start_time = timeit.default_timer()
    AE.fit(train_input_dict,
           train_output_dict,
           batch_size=batch_size,
           initial_epoch=0,
           epochs=n_epoch,
           validation_data=(val_input_dict, val_output_dict),
           verbose=2,
           callbacks=[cb_obj])

    elapsed = timeit.default_timer() - start_time

    print('-------------------------------')
    print('Training time:', elapsed)
    print('-------------------------------')

    #Save weights
    AE.save_weights(dir_pth['result'] + fileid + '-modelweights' + '.h5')

    #Generate summaries
    summary = {}
    for i in range(n_agents):
        encoder = Model(inputs=M['in_ae'], outputs=M['ld_ae_' + str(i)])
        summary['z'] = encoder.predict(full_dict['X'])

    sio.savemat(dir_pth['result'] + fileid + '-summary.mat', summary)
    return
Exemple #5
0
                                     cpu_relocation=False,
                                     cpu_merge=True)
    parallel_model.compile(optimizer=Adam(lr=1e-3),
                           loss='mean_squared_error',
                           metrics=[r_squared])
    history = parallel_model.fit_generator(
        generator=training_generator,
        validation_data=validation_generator,
        epochs=epochs,
        use_multiprocessing=False,
        callbacks=[tensorboard],
        workers=4)
    # check weights
    # https://github.com/keras-team/keras/issues/11313
    weights = keras.backend.batch_get_value(model.weights)
    parallel_weights = keras.backend.batch_get_value(parallel_model.weights)

    if all([np.all(w == ow) for w, ow in zip(weights, original_weights)]):
        print('Weights in the template model have not changed')
    else:
        print('Weights in the template model have changed')

    if all([np.all(w == pw) for w, pw in zip(weights, parallel_weights)]):
        print('Weights in the template and parallel model are equal')
    else:
        print('Weights in the template and parallel model are different')

# save weights
os.makedirs(weights_dir, exist_ok=True)
model.save_weights(os.path.join(weights_dir, 'pretrained_weight.h5'))
Exemple #6
0
def train_frcnn(options):
    if options.parser == 'pascal_voc':
        from utils import voc_parser as get_data
    elif options.parser == 'simple':
        from utils import simple_parser as get_data
    else:
        raise ValueError(
            "Command line option parser must be one of 'pascal_voc' or 'simple'"
        )

    # pass the settings from the command line, and persist them in the config object
    C = Config()

    C.use_horizontal_flips = bool(options.horizontal_flips)
    C.use_vertical_flips = bool(options.vertical_flips)
    C.rot_90 = bool(options.rot_90)

    C.model_path = options.output_weight_path.format(options.network)
    C.num_rois = int(options.num_rois)

    if options.network == 'resnet50':
        C.network = 'resnet50'
        from utils import rpn_res as rpn
        from utils import classifier_res as classifier_func
        from utils import get_img_output_length_res as get_img_output_length
        from utils import nn_base_res as nn_base
    elif options.network == 'vgg':
        C.network = 'vgg'
        from utils import rpn_vgg as rpn
        from utils import classifier_vgg as classifier_func
        from utils import get_img_output_length_vgg as get_img_output_length
        from utils import nn_base_vgg as nn_base
    else:
        print('Not a valid model')
        raise ValueError

    # check if weight path was passed via command line
    if options.input_weight_path:
        C.base_net_weights = options.input_weight_path
    else:
        # set the path to weights based on backend and model
        C.base_net_weights = get_weight_path(options.network)

    all_imgs, classes_count, class_mapping = get_data(options.path)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    C.class_mapping = class_mapping

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))

    config_output_filename = options.config_filename

    with open(config_output_filename, 'wb') as config_f:
        pickle.dump(C, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_output_filename))
    #
    random.shuffle(all_imgs)

    train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_imgs if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = get_anchor_gt(train_imgs,
                                   classes_count,
                                   C,
                                   get_img_output_length,
                                   K.backend(),
                                   mode='train')
    data_gen_val = get_anchor_gt(val_imgs,
                                 classes_count,
                                 C,
                                 get_img_output_length,
                                 K.backend(),
                                 mode='val')

    if K.backend() == "theano":
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = rpn(shared_layers, num_anchors)

    classifier = classifier_func(shared_layers,
                                 roi_input,
                                 C.num_rois,
                                 nb_classes=len(classes_count),
                                 trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(C.base_net_weights))
        model_rpn.load_weights(C.base_net_weights + "rpn.h5", by_name=True)
        model_classifier.load_weights(C.base_net_weights + "classifier.h5",
                                      by_name=True)
    except Exception as e:
        model_rpn.load_weights(C.base_net_weights, by_name=True)
        model_classifier.load_weights(C.base_net_weights, by_name=True)
        print('Exception: {}'.format(e))

    optimizer = Adam(lr=1e-5, decay=2e-7)
    optimizer_classifier = Adam(lr=1e-5, decay=2e-7)

    model_rpn.compile(
        optimizer=optimizer,
        loss=[rpn_loss_cls(num_anchors),
              rpn_loss_regr(num_anchors)])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[class_loss_cls,
              class_loss_regr(len(classes_count) - 1)],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = options.epoch_length
    num_epochs = int(options.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    print('Starting training')

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap the ground truth boxes. '
                            'Check RPN settings or keep training.')

                X, Y, img_data = next(data_gen_train)
                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                R = rpn_to_roi(P_rpn[0],
                               P_rpn[1],
                               C,
                               K.backend(),
                               use_regr=True,
                               overlap_thresh=0.7,
                               max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = calc_iou(R, img_data, C, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []
                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    if len(pos_samples) < C.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(selected_neg_samples)
                    else:
                        sel_samples = random.choice(selected_pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                progbar.update(iter_num + 1,
                               [('rpn_cls', losses[iter_num, 0]),
                                ('rpn_regr', losses[iter_num, 1]),
                                ('detector_cls', losses[iter_num, 2]),
                                ('detector_regr', losses[iter_num, 3])])

                iter_num += 1

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if C.verbose:
                            print(
                                f'Total loss decreased from {best_loss:.3f} to {curr_loss:.3f}, saving weights to '
                                f'{C.model_path}')
                        best_loss = curr_loss
                        model_classifier.save_weights(C.model_path +
                                                      "classifier.h5")
                        model_rpn.save_weights(C.model_path + "rpn.h5")
                    break
            except Exception as e:
                print('Exception: {}'.format(e))
                continue

    print('Training complete, exiting.')
Exemple #7
0
class textgenrnn:
    META_TOKEN = '<s>'
    config = {
        'rnn_layers': 2,
        'rnn_size': 128,
        'rnn_bidirectional': False,
        'max_length': 40,
        'max_words': 10000,
        'dim_embeddings': 100,
        'word_level': False,
        'single_text': False
    }
    default_config = config.copy()

    def __init__(self,
                 weights_path=None,
                 vocab_path=None,
                 config_path=None,
                 name="textgenrnn_tf"):

        if weights_path is None:
            weights_path = resource_filename(__name__,
                                             'textgenrnn_weights.hdf5')

        if vocab_path is None:
            vocab_path = resource_filename(__name__, 'textgenrnn_vocab.json')

        if config_path is not None:
            with open(config_path, 'r', encoding='utf8',
                      errors='ignore') as json_file:
                self.config = json.load(json_file)

        self.config.update({'name': name})
        self.default_config.update({'name': name})

        with open(vocab_path, 'r', encoding='utf8',
                  errors='ignore') as json_file:
            self.vocab = json.load(json_file)

        self.tokenizer = Tokenizer(filters='', lower=False, char_level=True)
        self.tokenizer.word_index = self.vocab
        self.num_classes = len(self.vocab) + 1
        self.model = textgenrnn_model(self.num_classes,
                                      cfg=self.config,
                                      weights_path=weights_path)
        self.indices_char = dict((self.vocab[c], c) for c in self.vocab)

    def generate(self,
                 n=1,
                 return_as_list=False,
                 prefix=None,
                 temperature=[1.0, 0.5, 0.2, 0.2],
                 max_gen_length=300,
                 interactive=False,
                 top_n=3,
                 progress=True):
        gen_texts = []
        iterable = trange(n) if progress and n > 1 else range(n)
        for _ in iterable:
            gen_text, _ = textgenrnn_generate(
                self.model, self.vocab, self.indices_char, temperature,
                self.config['max_length'],
                self.META_TOKEN, self.config['word_level'],
                self.config.get('single_text', False), max_gen_length,
                interactive, top_n, prefix)
            if not return_as_list:
                print("{}\n".format(gen_text))
            gen_texts.append(gen_text)
        if return_as_list:
            return gen_texts

    def generate_samples(self, n=3, temperatures=[0.2, 0.5, 1.0], **kwargs):
        for temperature in temperatures:
            print('#' * 20 + '\nTemperature: {}\n'.format(temperature) +
                  '#' * 20)
            self.generate(n, temperature=temperature, progress=False, **kwargs)

    def train_on_texts(self,
                       texts,
                       context_labels=None,
                       batch_size=128,
                       num_epochs=50,
                       verbose=1,
                       new_model=False,
                       gen_epochs=1,
                       train_size=1.0,
                       max_gen_length=300,
                       validation=True,
                       dropout=0.0,
                       via_new_model=False,
                       save_epochs=0,
                       multi_gpu=False,
                       **kwargs):

        if new_model and not via_new_model:
            self.train_new_model(texts,
                                 context_labels=context_labels,
                                 num_epochs=num_epochs,
                                 gen_epochs=gen_epochs,
                                 train_size=train_size,
                                 batch_size=batch_size,
                                 dropout=dropout,
                                 validation=validation,
                                 save_epochs=save_epochs,
                                 multi_gpu=multi_gpu,
                                 **kwargs)
            return

        if context_labels:
            context_labels = LabelBinarizer().fit_transform(context_labels)

        if 'prop_keep' in kwargs:
            train_size = prop_keep

        if self.config['word_level']:
            texts = [text_to_word_sequence(text, filters='') for text in texts]

        # calculate all combinations of text indices + token indices
        indices_list = [
            np.meshgrid(np.array(i), np.arange(len(text) + 1))
            for i, text in enumerate(texts)
        ]
        indices_list = np.block(indices_list)

        # If a single text, there will be 2 extra indices, so remove them
        # Also remove first sequences which use padding
        if self.config['single_text']:
            indices_list = indices_list[self.config['max_length']:-2, :]

        indices_mask = np.random.rand(indices_list.shape[0]) < train_size

        if multi_gpu:
            num_gpus = len(K.tensorflow_backend._get_available_gpus())
            batch_size = batch_size * num_gpus

        gen_val = None
        val_steps = None
        if train_size < 1.0 and validation:
            indices_list_val = indices_list[~indices_mask, :]
            gen_val = generate_sequences_from_texts(texts, indices_list_val,
                                                    self, context_labels,
                                                    batch_size)
            val_steps = max(
                int(np.floor(indices_list_val.shape[0] / batch_size)), 1)

        indices_list = indices_list[indices_mask, :]

        num_tokens = indices_list.shape[0]
        assert num_tokens >= batch_size, "Fewer tokens than batch_size."

        level = 'word' if self.config['word_level'] else 'character'
        print("Training on {:,} {} sequences.".format(num_tokens, level))

        steps_per_epoch = max(int(np.floor(num_tokens / batch_size)), 1)

        gen = generate_sequences_from_texts(texts, indices_list, self,
                                            context_labels, batch_size)

        base_lr = 4e-3

        # scheduler function must be defined inline.
        def lr_linear_decay(epoch):
            return (base_lr * (1 - (epoch / num_epochs)))

        if context_labels is not None:
            if new_model:
                weights_path = None
            else:
                weights_path = "{}_weights.hdf5".format(self.config['name'])
                self.save(weights_path)

            self.model = textgenrnn_model(self.num_classes,
                                          dropout=dropout,
                                          cfg=self.config,
                                          context_size=context_labels.shape[1],
                                          weights_path=weights_path)

        model_t = self.model

        if multi_gpu:
            # Do not locate model/merge on CPU since sample sizes are small.
            parallel_model = multi_gpu_model(self.model,
                                             gpus=num_gpus,
                                             cpu_merge=False)
            parallel_model.compile(loss='categorical_crossentropy',
                                   optimizer=RMSprop(lr=4e-3, rho=0.99))

            model_t = parallel_model
            print("Training on {} GPUs.".format(num_gpus))

        model_t.fit_generator(gen,
                              steps_per_epoch=steps_per_epoch,
                              epochs=num_epochs,
                              callbacks=[
                                  LearningRateScheduler(lr_linear_decay),
                                  generate_after_epoch(self, gen_epochs,
                                                       max_gen_length),
                                  save_model_weights(self, num_epochs,
                                                     save_epochs)
                              ],
                              verbose=verbose,
                              max_queue_size=10,
                              validation_data=gen_val,
                              validation_steps=val_steps)

        # Keep the text-only version of the model if using context labels
        if context_labels is not None:
            self.model = Model(inputs=self.model.input[0],
                               outputs=self.model.output[1])

    def train_new_model(self,
                        texts,
                        context_labels=None,
                        num_epochs=50,
                        gen_epochs=1,
                        batch_size=128,
                        dropout=0.0,
                        train_size=1.0,
                        validation=True,
                        save_epochs=0,
                        multi_gpu=False,
                        **kwargs):
        self.config = self.default_config.copy()
        self.config.update(**kwargs)

        print("Training new model w/ {}-layer, {}-cell {}LSTMs".format(
            self.config['rnn_layers'], self.config['rnn_size'],
            'Bidirectional ' if self.config['rnn_bidirectional'] else ''))

        # If training word level, must add spaces around each punctuation.
        # https://stackoverflow.com/a/3645946/9314418

        if self.config['word_level']:
            punct = '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~\\n\\t\'‘’“”’–—'
            for i in range(len(texts)):
                texts[i] = re.sub('([{}])'.format(punct), r' \1 ', texts[i])
                texts[i] = re.sub(' {2,}', ' ', texts[i])

        # Create text vocabulary for new texts
        # if word-level, lowercase; if char-level, uppercase
        self.tokenizer = Tokenizer(filters='',
                                   lower=self.config['word_level'],
                                   char_level=(not self.config['word_level']))
        self.tokenizer.fit_on_texts(texts)

        # Limit vocab to max_words
        max_words = self.config['max_words']
        self.tokenizer.word_index = {
            k: v
            for (k, v) in self.tokenizer.word_index.items() if v <= max_words
        }

        if not self.config.get('single_text', False):
            self.tokenizer.word_index[self.META_TOKEN] = len(
                self.tokenizer.word_index) + 1
        self.vocab = self.tokenizer.word_index
        self.num_classes = len(self.vocab) + 1
        self.indices_char = dict((self.vocab[c], c) for c in self.vocab)

        # Create a new, blank model w/ given params
        self.model = textgenrnn_model(self.num_classes,
                                      dropout=dropout,
                                      cfg=self.config)

        # Save the files needed to recreate the model
        with open('{}_vocab.json'.format(self.config['name']),
                  'w',
                  encoding='utf8') as outfile:
            json.dump(self.tokenizer.word_index, outfile, ensure_ascii=False)

        with open('{}_config.json'.format(self.config['name']),
                  'w',
                  encoding='utf8') as outfile:
            json.dump(self.config, outfile, ensure_ascii=False)

        self.train_on_texts(texts,
                            new_model=True,
                            via_new_model=True,
                            context_labels=context_labels,
                            num_epochs=num_epochs,
                            gen_epochs=gen_epochs,
                            train_size=train_size,
                            batch_size=batch_size,
                            dropout=dropout,
                            validation=validation,
                            save_epochs=save_epochs,
                            multi_gpu=multi_gpu,
                            **kwargs)

    def save(self, weights_path="textgenrnn_weights_saved.hdf5"):
        self.model.save_weights(weights_path)

    def load(self, weights_path):
        self.model = textgenrnn_model(self.num_classes,
                                      cfg=self.config,
                                      weights_path=weights_path)

    def reset(self):
        self.config = self.default_config.copy()
        self.__init__(name=self.config['name'])

    def train_from_file(self,
                        file_path,
                        header=True,
                        delim="\n",
                        new_model=False,
                        context=None,
                        is_csv=False,
                        **kwargs):

        context_labels = None
        if context:
            texts, context_labels = textgenrnn_texts_from_file_context(
                file_path)
        else:
            texts = textgenrnn_texts_from_file(file_path, header, delim,
                                               is_csv)

        print("{:,} texts collected.".format(len(texts)))
        if new_model:
            self.train_new_model(texts,
                                 context_labels=context_labels,
                                 **kwargs)
        else:
            self.train_on_texts(texts, context_labels=context_labels, **kwargs)

    def train_from_largetext_file(self, file_path, new_model=True, **kwargs):
        with open(file_path, 'r', encoding='utf8', errors='ignore') as f:
            texts = [f.read()]

        if new_model:
            self.train_new_model(texts, single_text=True, **kwargs)
        else:
            self.train_on_texts(texts, single_text=True, **kwargs)

    def generate_to_file(self, destination_path, **kwargs):
        texts = self.generate(return_as_list=True, **kwargs)
        with open(destination_path, 'w') as f:
            for text in texts:
                f.write("{}\n".format(text))

    def encode_text_vectors(self,
                            texts,
                            pca_dims=50,
                            tsne_dims=None,
                            tsne_seed=None,
                            return_pca=False,
                            return_tsne=False):

        # if a single text, force it into a list:
        if isinstance(texts, str):
            texts = [texts]

        vector_output = Model(inputs=self.model.input,
                              outputs=self.model.get_layer('attention').output)
        encoded_vectors = []
        maxlen = self.config['max_length']
        for text in texts:
            if self.config['word_level']:
                text = text_to_word_sequence(text, filters='')
            text_aug = [self.META_TOKEN] + list(text[0:maxlen])
            encoded_text = textgenrnn_encode_sequence(text_aug, self.vocab,
                                                      maxlen)
            encoded_vector = vector_output.predict(encoded_text)
            encoded_vectors.append(encoded_vector)

        encoded_vectors = np.squeeze(np.array(encoded_vectors), axis=1)
        if pca_dims is not None:
            assert len(texts) > 1, "Must use more than 1 text for PCA"
            pca = PCA(pca_dims)
            encoded_vectors = pca.fit_transform(encoded_vectors)

        if tsne_dims is not None:
            tsne = TSNE(tsne_dims, random_state=tsne_seed)
            encoded_vectors = tsne.fit_transform(encoded_vectors)

        return_objects = encoded_vectors
        if return_pca or return_tsne:
            return_objects = [return_objects]
        if return_pca:
            return_objects.append(pca)
        if return_tsne:
            return_objects.append(tsne)

        return return_objects

    def similarity(self, text, texts, use_pca=True):
        text_encoded = self.encode_text_vectors(text, pca_dims=None)
        if use_pca:
            texts_encoded, pca = self.encode_text_vectors(texts,
                                                          return_pca=True)
            text_encoded = pca.transform(text_encoded)
        else:
            texts_encoded = self.encode_text_vectors(texts, pca_dims=None)

        cos_similairity = cosine_similarity(text_encoded, texts_encoded)[0]
        text_sim_pairs = list(zip(texts, cos_similairity))
        text_sim_pairs = sorted(text_sim_pairs, key=lambda x: -x[1])
        return text_sim_pairs
    print("Error trying to load checkpoint.")
    print(error)

x_data={'encoder_input':encoder_input_data,
        'decoder_input':decoder_input_data}
y_data={'decoder_output':decoder_output_data}
model_train.fit(x=x_data,y=y_data,batch_size=512,validation_split=0.005,callbacks=callbacks)
modelname1='MachineTranslationTrain'
modelname2='MachineTranslationEncoder'
modelname3='MachineTranslationDecoder'
model_train.save('{}.keras'.format(modelname1))
model_encoder.save('{}.keras'.format(modelname2))
model_decoder.save('{}.keras'.format(modelname3))
with open('model_encoder.json', 'w', encoding='utf8') as f:
    f.write(model_encoder.to_json())
model_encoder.save_weights('model_encoder_weights.h5')
with open('model_decoder.json', 'w', encoding='utf8') as f:
    f.write(model_decoder.to_json())
model_decoder.save_weights('model_decoder_weights.h5')
with open('model_train.json', 'w', encoding='utf8') as f:
    f.write(model_train.to_json())
model_train.save_weights('model_train_weights.h5')
#Translate Texts

def translate(input_text,true_output_text=None):
    input_tokens=tokenizer_src.text_to_tokens(text=input_text,reverse=True,padding=True)
    initial_state=model_encoder.predict(input_tokens)
    max_tokens=tokenizer_dest.max_tokens
    shape=(1,max_tokens)
    decoder_input_data=np.zeros(shape=shape,dtype=np.int)
    token_int=token_start
def tune_model():
    # Build the Inception V3 network.
    base_model = inception_v3.InceptionV3(include_top=False,
                                          weights='imagenet',
                                          pooling='avg')
    print('Model loaded.')

    # build a classifier model to put on top of the convolutional model
    top_input = Input(shape=base_model.output_shape[1:])
    top_output = Dense(5, activation='softmax')(top_input)
    top_model = Model(top_input, top_output)

    # Note that it is necessary to start with a fully-trained classifier,
    # including the top classifier, in order to successfully do fine-tuning.
    top_model.load_weights(top_model_weights_path)

    # add the model on top of the convolutional base
    model = Model(inputs=base_model.inputs,
                  outputs=top_model(base_model.outputs))

    # Set all layers up to 'mixed8' to non-trainable (weights will not be updated)
    last_train_layer = model.get_layer(name='mixed8')
    for layer in model.layers[:model.layers.index(last_train_layer)]:
        layer.trainable = False

    # Compile the model with a SGD/momentum optimizer and a very slow learning rate.
    model.compile(loss='categorical_crossentropy',
                  optimizer=SGD(lr=1e-4, momentum=0.9),
                  metrics=['accuracy'])

    # Prepare data augmentation configuration
    train_datagen = ImageDataGenerator(
        preprocessing_function=inception_v3.preprocess_input,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

    test_datagen = ImageDataGenerator(
        preprocessing_function=inception_v3.preprocess_input)

    train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical')

    validation_generator = test_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical')

    loss = model.evaluate_generator(validation_generator,
                                    nb_validation_samples // batch_size)
    print('Model validation performance before fine-tuning:', loss)

    csv_logger = CSVLogger(output_dir + 'model_tuning.csv')
    # fine-tune the model
    model.fit_generator(train_generator,
                        steps_per_epoch=nb_train_samples // batch_size,
                        epochs=tune_epochs,
                        validation_data=validation_generator,
                        validation_steps=nb_validation_samples // batch_size,
                        workers=4,
                        callbacks=[csv_logger])
    model.save_weights(tuned_weights_path)
Exemple #10
0
                               'y_26',
                               trainable=True,
                               restore_output_weights=restore_output_weights)(
                                   x, **kwargs)
        x = Upsample(128, trainable=True)([x, x_52], **kwargs)
        x, fmap_52 = YOLOBlock(128,
                               3 * (5 + n_cls),
                               'y_52',
                               trainable=True,
                               restore_output_weights=restore_output_weights)(
                                   x, **kwargs)

        return fmap_52, fmap_26, fmap_13

    return call


if __name__ == '__main__':
    inputs = Input(shape=(416, 416, 3))
    # model = yolo_v3(inputs)
    yolo_v3_net = YOLOv3Net(n_cls=80,
                            restore_weights=True,
                            trainable_backbone=True,
                            use_spp=True,
                            restore_output_weights=True)
    model = Model(inputs, outputs=yolo_v3_net(inputs))
    model.save_weights('coco_init_weights_spp.h5')
    print(
        f'Restored {_weight_loader.cnt} of {len(_weight_loader.weights)} weights.'
    )
Exemple #11
0
def main(batch_size=100, n_paired_per_batch=100, cvset=0, 
         p_dropT=0.5, p_dropE=0.1, stdE=0.05,
         fc_dimT=[50,50,50,50],fc_dimE=[60,60,60,60],latent_dim=3,
         recon_strT=1.0, recon_strE=0.1, cpl_str=10.0,
         n_epoch=2000, steps_per_epoch = 500, 
         run_iter=0, model_id='crossval_noadaptloss',exp_name='patchseq_v2_noadapt'):
         
    train_dat, val_dat, train_ind_T, train_ind_E, val_ind, dir_pth = dataset_50fold(exp_name=exp_name,cvset=cvset)
    train_generator = DatagenTE(dataset=train_dat, batch_size=batch_size, n_paired_per_batch=n_paired_per_batch, steps_per_epoch=steps_per_epoch)
    chkpt_save_period = 1e7
    
    #Architecture parameters ------------------------------
    input_dim  = [train_dat['T'].shape[1],train_dat['E'].shape[1]]

    #'_fcT_' +  '-'.join(map(str, fc_dimT)) + \
    #'_fcE_' +  '-'.join(map(str, fc_dimE)) + \
    fileid = model_id + \
        '_rT_' + str(recon_strT) + \
        '_rE_'  + str(recon_strE) + \
        '_cs_'  + str(cpl_str) + \
        '_pdT_' + str(p_dropT) + \
        '_pdE_' + str(p_dropE) + \
        '_sdE_' + str(stdE) + \
        '_bs_'  + str(batch_size) + \
        '_np_'  + str(n_paired_per_batch) + \
        '_se_'  + str(steps_per_epoch) +\
        '_ne_'  + str(n_epoch) + \
        '_cv_'  + str(cvset) + \
        '_ri_'  + str(run_iter)
    fileid = fileid.replace('.', '-')
    
    print(fileid)
    out_actfcn = ['elu','linear']

    def add_gauss_noise(x):
        '''Injects additive gaussian noise independently into each element of input x'''
        x_noisy = x + tf.random.normal(shape=tf.shape(x), mean=0., stddev=stdE, dtype = tf.float32)
        return tf.keras.backend.in_train_phase(x_noisy, x)
    
    #Model inputs -----------------------------------------
    M = {}
    M['in_ae_0']   = Input(shape=(input_dim[0],), name='in_ae_0')
    M['in_ae_1']   = Input(shape=(input_dim[1],), name='in_ae_1')

    M['ispaired_ae_0'] = Input(shape=(1,), name='ispaired_ae_0')
    M['ispaired_ae_1'] = Input(shape=(1,), name='ispaired_ae_1')

    #Transcriptomics arm---------------------------------------------------------------------------------
    M['dr_ae_0'] = Dropout(p_dropT, name='dr_ae_0')(M['in_ae_0'])
    X = 'dr_ae_0'

    for j, units in enumerate(fc_dimT):
        Y = 'fc'+ format(j,'02d') +'_ae_0'
        M[Y] = Dense(units, activation='elu', name=Y)(M[X])
        X = Y

    M['ldx_ae_0'] = Dense(latent_dim, activation='linear',name='ldx_ae_0')(M[X])
    M['ld_ae_0']  = BatchNormalization(scale = False, center = False ,epsilon = 1e-10, momentum = 0.99, name='ld_ae_0')(M['ldx_ae_0'])
    X = 'ld_ae_0'

    for j, units in enumerate(reversed(fc_dimT)):
        Y = 'fc'+ format(j+len(fc_dimT),'02d') +'_ae_0'
        M[Y] = Dense(units, activation='elu', name=Y)(M[X])
        X = Y
    
    M['ou_ae_0']  = Dense(input_dim[0], activation=out_actfcn[0], name='ou_ae_0')(M[X])

    #Electrophysiology arm--------------------------------------------------------------------------------
    M['no_ae_1']  = Lambda(add_gauss_noise,name='no_ae_1')(M['in_ae_1'])
    M['dr_ae_1']  = Dropout(p_dropE, name='dr_ae_1')(M['no_ae_1'])
    X = 'dr_ae_1'
    for j, units in enumerate(fc_dimE):
        Y = 'fc'+ format(j,'02d') +'_ae_1'
        M[Y] = Dense(units, activation='elu', name=Y)(M[X])
        X = Y
    
    M['ldx_ae_1'] = Dense(latent_dim, activation='linear',name='ldx_ae_1')(M[X])
    M['ld_ae_1']  = BatchNormalization(scale = False, center = False ,epsilon = 1e-10, momentum = 0.99, name='ld_ae_1')(M['ldx_ae_1'])
    X = 'ld_ae_1'

    for j, units in enumerate(reversed(fc_dimE)):
        Y = 'fc'+ format(j+len(fc_dimE),'02d') +'_ae_1'
        M[Y] = Dense(units, activation='elu', name=Y)(M[X])
        X = Y

    M['ou_ae_1']  = Dense(input_dim[1], activation=out_actfcn[1], name='ou_ae_1')(M[X])

    cplAE = Model(inputs=[M['in_ae_0'], M['in_ae_1'], M['ispaired_ae_0'], M['ispaired_ae_1']],
                  outputs=[M['ou_ae_0'], M['ou_ae_1'],M['ld_ae_0'], M['ld_ae_1']])
    
    def coupling_loss(zi, pairedi, zj, pairedj):
        '''Minimum singular value based loss. 
        \n SVD is calculated over all datapoints
        \n MSE is calculated over only `paired` datapoints'''
        batch_size = tf.shape(zi)[0]

        paired_i = tf.reshape(pairedi, [tf.shape(pairedi)[0],])
        paired_j = tf.reshape(pairedj, [tf.shape(pairedj)[0],])
        zi_paired = tf.boolean_mask(zi, tf.equal(paired_i, 1.0))
        zj_paired = tf.boolean_mask(zj, tf.equal(paired_j, 1.0))

        vars_j_ = tf.square(tf.linalg.svd(zj - tf.reduce_mean(zj, axis=0), compute_uv=False))/tf.cast(batch_size - 1, tf.float32)
        vars_j  = tf.where(tf.math.is_nan(vars_j_), tf.zeros_like(vars_j_) + tf.cast(1e-1,dtype=tf.float32), vars_j_)
        L_ij    = tf.compat.v1.losses.mean_squared_error(zi_paired, zj_paired)/tf.maximum(tf.reduce_min(vars_j, axis=None),tf.cast(1e-2,dtype=tf.float32))

        def loss(y_true, y_pred):
            #Adaptive version:#tf.multiply(tf.stop_gradient(L_ij), L_ij)
            return L_ij
        return loss
        
    #Create loss dictionary
    loss_dict = {'ou_ae_0': mse, 'ou_ae_1': mse,
                 'ld_ae_0': coupling_loss(zi=M['ld_ae_0'], pairedi=M['ispaired_ae_0'],zj=M['ld_ae_1'], pairedj=M['ispaired_ae_1']),
                 'ld_ae_1': coupling_loss(zi=M['ld_ae_1'], pairedi=M['ispaired_ae_1'],zj=M['ld_ae_0'], pairedj=M['ispaired_ae_0'])}

    #Loss weights dictionary
    loss_wt_dict = {'ou_ae_0': recon_strT,
                    'ou_ae_1': recon_strE,
                    'ld_ae_0': cpl_str,
                    'ld_ae_1': cpl_str}

    #Add loss definitions to the model
    cplAE.compile(optimizer='adam', loss=loss_dict, loss_weights=loss_wt_dict)

    #Checkpoint function definitions
    checkpoint_cb = ModelCheckpoint(filepath=(dir_pth['checkpoint']+fileid + '-checkpoint-' + '{epoch:04d}' + '.h5'),
                                      verbose=1, save_best_only=False, save_weights_only=True,
                                      mode='auto', period=chkpt_save_period)

    val_in = {'in_ae_0': val_dat['T'],
              'in_ae_1': val_dat['E'],
              'ispaired_ae_0': val_dat['T_ispaired'],
              'ispaired_ae_1': val_dat['E_ispaired']}

    val_out = {'ou_ae_0': val_dat['T'],
               'ou_ae_1': val_dat['E'],
               'ld_ae_0': np.zeros((val_dat['T'].shape[0], latent_dim)),
               'ld_ae_1': np.zeros((val_dat['E'].shape[0], latent_dim))}
    
    #Custom callback object
    log_cb = CSVLogger(filename=dir_pth['logs']+fileid+'.csv')

    last_checkpoint_epoch = 0
    start_time = timeit.default_timer()
    cplAE.fit_generator(train_generator,
                        validation_data=(val_in,val_out),
                        epochs=n_epoch,
                        max_queue_size=100,
                        use_multiprocessing=False, workers=1,
                        initial_epoch=last_checkpoint_epoch,
                        verbose=2, callbacks=[checkpoint_cb,log_cb])
    elapsed = timeit.default_timer() - start_time        
    print('-------------------------------')
    print('Training time:',elapsed)
    print('-------------------------------')

    #Saving weights
    cplAE.save_weights(dir_pth['result']+fileid+'-modelweights'+'.h5')
    
    matsummary = {}
    matsummary['cvset']       = cvset
    matsummary['val_ind']     = val_ind
    matsummary['train_ind_T'] = train_ind_T
    matsummary['train_ind_E'] = train_ind_E
    
    #Trained model predictions
    i = 0
    encoder = Model(inputs=M['in_ae_'+str(i)], outputs=M['ld_ae_'+str(i)])
    matsummary['z_val_'+str(i)]   = encoder.predict({'in_ae_'+str(i): val_dat['T']})
    matsummary['z_train_'+str(i)] = encoder.predict({'in_ae_'+str(i): train_dat['T']})

    i = 1
    encoder = Model(inputs=M['in_ae_'+str(i)], outputs=M['ld_ae_'+str(i)])
    matsummary['z_val_'+str(i)]   = encoder.predict({'in_ae_'+str(i): val_dat['E']})
    matsummary['z_train_'+str(i)] = encoder.predict({'in_ae_'+str(i): train_dat['E']})

    sio.savemat(dir_pth['result']+fileid+'-summary', matsummary)
    return
Exemple #12
0
    y_data = {'decoder_output': decoder_output_data}
    validation_split = 10000 / len(encoder_input_data)
    print (validation_split)

    model_train.fit(x=x_data,
                    y=y_data,
                    batch_size=512,
                    epochs=10,
                    validation_split=validation_split,
                    )
mark_start = 'starttt'
mark_end = 'enddd'
token_start = tokenizer_vitn.word_index[mark_start.strip()]
token_end = tokenizer_vitn.word_index[mark_end.strip()]

model_train.save_weights('nmt_train_model.h5')
model_train.save('nmt_train_model.h5')

def translate(input_text,true_output_text = None):
    input_tokens = tokenizer_eng.text_to_tokens(text=input_text,reverse=True,padding=True)

    initial_state = model_encoder.predict(input_tokens)
    max_tokens = tokenizer_vitn.max_tokens

    shape = (1, max_tokens)
    decoder_input_data = np.zeros(shape=shape, dtype=np.int)
    token_int = token_start
    output_text = ''
    count_tokens = 0
    while token_int != token_end and count_tokens < max_tokens:
        decoder_input_data[0, count_tokens] = token_int
Exemple #13
0
class JointEmbeddingModel:
    def __init__(self, config):
        self.data_dir = config.data_dir
        self.model_name = config.model_name
        self.methname_len = config.methname_len  # the max length of method name
        self.apiseq_len = config.apiseq_len
        self.tokens_len = config.tokens_len
        self.desc_len = config.desc_len

        self.vocab_size = config.n_words  # the size of vocab
        self.embed_dims = config.embed_dims
        self.lstm_dims = config.lstm_dims
        self.hidden_dims = config.hidden_dims

        self.margin = 0.05

        self.init_embed_weights_methodname = config.init_embed_weights_methodname
        self.init_embed_weights_tokens = config.init_embed_weights_tokens
        self.init_embed_weights_desc = config.init_embed_weights_desc

        self.methodname = Input(shape=(self.methname_len, ),
                                dtype='int32',
                                name='methodname')
        self.apiseq = Input(shape=(self.apiseq_len, ),
                            dtype='int32',
                            name='apiseq')
        self.tokens = Input(shape=(self.tokens_len, ),
                            dtype='int32',
                            name='tokens')
        self.desc_good = Input(shape=(self.desc_len, ),
                               dtype='int32',
                               name='desc_good')
        self.desc_bad = Input(shape=(self.desc_len, ),
                              dtype='int32',
                              name='desc_bad')

        # create path to store model Info
        if not os.path.exists(self.data_dir + 'model/' + self.model_name):
            os.makedirs(self.data_dir + 'model/' + self.model_name)

    def build(self):

        # 1 -- CodeNN
        methodname = Input(shape=(self.methname_len, ),
                           dtype='int32',
                           name='methodname')
        apiseq = Input(shape=(self.apiseq_len, ), dtype='int32', name='apiseq')
        tokens = Input(shape=(self.tokens_len, ), dtype='int32', name='tokens')

        # methodname
        # embedding layer
        init_emd_weights = np.load(
            self.data_dir + self.init_embed_weights_methodname
        ) if self.init_embed_weights_methodname is not None else None
        init_emd_weights = init_emd_weights if init_emd_weights is None else [
            init_emd_weights
        ]

        embedding = Embedding(input_dim=self.vocab_size,
                              output_dim=self.embed_dims,
                              weights=init_emd_weights,
                              mask_zero=False,
                              name='embedding_methodname')

        methodname_embedding = embedding(methodname)

        # dropout
        dropout = Dropout(0.25, name='dropout_methodname_embed')
        methodname_dropout = dropout(methodname_embedding)

        # forward rnn
        fw_rnn = LSTM(self.lstm_dims,
                      recurrent_dropout=0.2,
                      return_sequences=True,
                      name='lstm_methodname_fw')

        # backward rnn
        bw_rnn = LSTM(self.lstm_dims,
                      recurrent_dropout=0.2,
                      return_sequences=True,
                      go_backwards=True,
                      name='lstm_methodname_bw')

        methodname_fw = fw_rnn(methodname_dropout)
        methodname_bw = bw_rnn(methodname_dropout)

        dropout = Dropout(0.25, name='dropout_methodname_rnn')
        methodname_fw_dropout = dropout(methodname_fw)
        methodname_bw_dropout = dropout(methodname_bw)

        # max pooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]),
                         name='maxpooling_methodname')
        methodname_pool = Concatenate(name='concat_methodname_lstm')(
            [maxpool(methodname_fw_dropout),
             maxpool(methodname_bw_dropout)])
        activation = Activation('tanh', name='active_methodname')
        methodname_repr = activation(methodname_pool)

        # apiseq
        # embedding layer
        embedding = Embedding(input_dim=self.vocab_size,
                              output_dim=self.embed_dims,
                              mask_zero=False,
                              name='embedding_apiseq')

        apiseq_embedding = embedding(apiseq)

        # dropout
        dropout = Dropout(0.25, name='dropout_apiseq_embed')
        apiseq_dropout = dropout(apiseq_embedding)

        # forward rnn
        fw_rnn = LSTM(self.lstm_dims,
                      return_sequences=True,
                      recurrent_dropout=0.2,
                      name='lstm_apiseq_fw')

        # backward rnn
        bw_rnn = LSTM(self.lstm_dims,
                      return_sequences=True,
                      recurrent_dropout=0.2,
                      go_backwards=True,
                      name='lstm_apiseq_bw')

        apiseq_fw = fw_rnn(apiseq_dropout)
        apiseq_bw = bw_rnn(apiseq_dropout)

        dropout = Dropout(0.25, name='dropout_apiseq_rnn')
        apiseq_fw_dropout = dropout(apiseq_fw)
        apiseq_bw_dropout = dropout(apiseq_bw)

        # max pooling

        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]),
                         name='maxpooling_apiseq')
        apiseq_pool = Concatenate(name='concat_apiseq_lstm')(
            [maxpool(apiseq_fw_dropout),
             maxpool(apiseq_bw_dropout)])
        activation = Activation('tanh', name='active_apiseq')
        apiseq_repr = activation(apiseq_pool)

        # tokens
        # embedding layer
        init_emd_weights = np.load(
            self.data_dir + self.init_embed_weights_tokens
        ) if self.init_embed_weights_tokens is not None else None
        init_emd_weights = init_emd_weights if init_emd_weights is None else [
            init_emd_weights
        ]

        embedding = Embedding(input_dim=self.vocab_size,
                              output_dim=self.embed_dims,
                              weights=init_emd_weights,
                              mask_zero=False,
                              name='embedding_tokens')

        tokens_embedding = embedding(tokens)

        # dropout
        dropout = Dropout(0.25, name='dropout_tokens_embed')
        tokens_dropout = dropout(tokens_embedding)

        # forward rnn
        fw_rnn = LSTM(self.lstm_dims,
                      recurrent_dropout=0.2,
                      return_sequences=True,
                      name='lstm_tokens_fw')

        # backward rnn
        bw_rnn = LSTM(self.lstm_dims,
                      recurrent_dropout=0.2,
                      return_sequences=True,
                      go_backwards=True,
                      name='lstm_tokens_bw')

        tokens_fw = fw_rnn(tokens_dropout)
        tokens_bw = bw_rnn(tokens_dropout)

        dropout = Dropout(0.25, name='dropout_tokens_rnn')
        tokens_fw_dropout = dropout(tokens_fw)
        tokens_bw_dropout = dropout(tokens_bw)

        # max pooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]),
                         name='maxpooling_tokens')
        tokens_pool = Concatenate(name='concat_tokens_lstm')(
            [maxpool(tokens_fw_dropout),
             maxpool(tokens_bw_dropout)])
        activation = Activation('tanh', name='active_tokens')
        tokens_repr = activation(tokens_pool)

        # fusion methodname, apiseq, tokens
        merge_methname_api = Concatenate(name='merge_methname_api')(
            [methodname_repr, apiseq_repr])
        merge_code_repr = Concatenate(name='merge_code_repr')(
            [merge_methname_api, tokens_repr])

        code_repr = Dense(self.hidden_dims,
                          activation='tanh',
                          name='dense_coderepr')(merge_code_repr)

        self.code_repr_model = Model(inputs=[methodname, apiseq, tokens],
                                     outputs=[code_repr],
                                     name='code_repr_model')
        self.code_repr_model.summary()

        #  2 -- description
        desc = Input(shape=(self.desc_len, ), dtype='int32', name='desc')

        # desc
        # embedding layer
        init_emd_weights = np.load(
            self.data_dir + self.init_embed_weights_desc
        ) if self.init_embed_weights_desc is not None else None
        init_emd_weights = init_emd_weights if init_emd_weights is None else [
            init_emd_weights
        ]

        embedding = Embedding(input_dim=self.vocab_size,
                              output_dim=self.embed_dims,
                              weights=init_emd_weights,
                              mask_zero=False,
                              name='embedding_desc')

        desc_embedding = embedding(desc)

        # dropout
        dropout = Dropout(0.25, name='dropout_desc_embed')
        desc_dropout = dropout(desc_embedding)

        # forward rnn
        fw_rnn = LSTM(self.lstm_dims,
                      recurrent_dropout=0.2,
                      return_sequences=True,
                      name='lstm_desc_fw')

        # backward rnn
        bw_rnn = LSTM(self.lstm_dims,
                      recurrent_dropout=0.2,
                      return_sequences=True,
                      go_backwards=True,
                      name='lstm_desc_bw')

        desc_fw = fw_rnn(desc_dropout)
        desc_bw = bw_rnn(desc_dropout)

        dropout = Dropout(0.25, name='dropout_desc_rnn')
        desc_fw_dropout = dropout(desc_fw)
        desc_bw_dropout = dropout(desc_bw)

        # max pooling

        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]),
                         name='maxpooling_desc')
        desc_pool = Concatenate(name='concat_desc_lstm')(
            [maxpool(desc_fw_dropout),
             maxpool(desc_bw_dropout)])
        activation = Activation('tanh', name='active_desc')
        desc_repr = activation(desc_pool)

        self.desc_repr_model = Model(inputs=[desc],
                                     outputs=[desc_repr],
                                     name='desc_repr_model')
        self.desc_repr_model.summary()

        #  3 -- cosine similarity
        code_repr = self.code_repr_model([methodname, apiseq, tokens])
        desc_repr = self.desc_repr_model([desc])

        cos_sim = Dot(axes=1, normalize=True,
                      name='cos_sim')([code_repr, desc_repr])

        sim_model = Model(inputs=[methodname, apiseq, tokens, desc],
                          outputs=[cos_sim],
                          name='sim_model')

        self.sim_model = sim_model

        self.sim_model.summary()

        #  4 -- build training model
        good_sim = sim_model(
            [self.methodname, self.apiseq, self.tokens, self.desc_good])
        bad_sim = sim_model(
            [self.methodname, self.apiseq, self.tokens, self.desc_bad])

        loss = Lambda(lambda x: K.maximum(1e-6, self.margin - x[0] + x[1]),
                      output_shape=lambda x: x[0],
                      name='loss')([good_sim, bad_sim])

        self.training_model = Model(inputs=[
            self.methodname, self.apiseq, self.tokens, self.desc_good,
            self.desc_bad
        ],
                                    outputs=[loss],
                                    name='training_model')

        self.training_model.summary()

    def compile(self, optimizer, **kwargs):
        self.code_repr_model.compile(loss='cosine_proximity',
                                     optimizer=optimizer,
                                     **kwargs)
        self.desc_repr_model.compile(loss='cosine_proximity',
                                     optimizer=optimizer,
                                     **kwargs)
        self.training_model.compile(
            loss=lambda y_true, y_pred: y_pred + y_true - y_true,
            optimizer=optimizer,
            **kwargs)
        self.sim_model.compile(loss='binary_crossentropy',
                               optimizer=optimizer,
                               **kwargs)

    def fit(self, x, **kwargs):
        y = np.zeros(shape=x[0].shape[:1], dtype=np.float32)
        return self.training_model.fit(x, y, **kwargs)

    def repr_code(self, x, **kwargs):
        return self.code_repr_model.predict(x, **kwargs)

    def repr_desc(self, x, **kwargs):
        return self.desc_repr_model.predict(x, **kwargs)

    def predict(self, x, **kwargs):
        return self.sim_model.predict(x, **kwargs)

    def save(self, code_model_file, desc_model_file, **kwargs):
        self.code_repr_model.save_weights(code_model_file, **kwargs)
        self.desc_repr_model.save_weights(desc_model_file, **kwargs)

    def load(self, code_model_file, desc_model_file, **kwargs):
        self.code_repr_model.load_weights(code_model_file, **kwargs)
        self.desc_repr_model.load_weights(desc_model_file, **kwargs)
class Neural:

    def __init__(self, size_window_left, size_window_right, number_samples, threshold, number_epochs,
                      learning_patterns_per_id, optimizer_function, loss_function, dense_layers,
                      output_evolution_error_figures):

        self.size_windows_left = size_window_left
        self.size_window_right = size_window_right
        self.number_samples = number_samples
        self.threshold = threshold
        self.number_epochs = number_epochs
        self.learning_patterns_per_id = learning_patterns_per_id
        self.optimizer_function = optimizer_function
        self.loss_function = loss_function
        self.output_evolution_error_figures = output_evolution_error_figures
        self.neural_network = None
        self.dense_layers = dense_layers

    def create_neural_network(self):

        input_size = Input(shape=(self.size_windows_left + self.size_window_right + 1,))
        # Please do not change this layer
        self.neural_network = Dense(20, )(input_size)
        self.neural_network = Dropout(0.2)(self.neural_network)

        for i in range(self.dense_layers - 1):

            self.neural_network = Dense(20)(self.neural_network)
            self.neural_network = Dropout(0.5)(self.neural_network)

        # Please do not change this layer
        self.neural_network = Dense(1, activation='sigmoid')(self.neural_network)
        self.neural_network = Model(input_size, self.neural_network)
        self.neural_network.summary()
        self.neural_network.compile(optimizer=self.optimizer_function, loss=self.loss_function,
                                    metrics=['mean_squared_error'])

    def fit(self, x, y, x_validation, y_validation):

        first_test_training = self.neural_network.evaluate(x, y)
        first_test_validation = self.neural_network.evaluate(x_validation, y_validation)
        history = self.neural_network.fit(x, y, epochs=self.number_epochs,
                                          validation_data=(x_validation, y_validation), )
        self.plotter_error_evaluate(history.history['mean_squared_error'], history.history['val_mean_squared_error'],
                                    first_test_training, first_test_validation)

    def plotter_error_evaluate(self, mean_square_error_training, mean_square_error_evaluate, first_error_training,
                               first_error_evaluate):

        mean_square_error_training.insert(0, first_error_training[1])
        mean_square_error_evaluate.insert(0, first_error_evaluate[1])
        matplotlib.pyplot.plot(mean_square_error_training, 'b', marker='^', label="Treinamento")
        matplotlib.pyplot.plot(mean_square_error_evaluate, 'g', marker='o', label="Validação")
        matplotlib.pyplot.legend(loc="upper right")
        matplotlib.pyplot.xlabel('Quantidade de épocas')
        matplotlib.pyplot.ylabel('Erro Médio')
        matplotlib.pyplot.savefig(
            self.output_evolution_error_figures + "fig_Mean_square_error_" + str(datetime.datetime.now()) + ".pdf")

    def predict_values(self, x):

        return self.neural_network.predict(x)

    def save_models(self, model_architecture_file, model_weights_file):

        model_json = self.neural_network.to_json()

        with open(model_architecture_file, "w") as json_file:

            json_file.write(model_json)

        self.neural_network.save_weights(model_weights_file)
        print("Saved model {} {}".format(model_architecture_file, model_weights_file))

    def load_models(self, model_architecture_file, model_weights_file):

        json_file = open(model_architecture_file, 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        self.neural_network = model_from_json(loaded_model_json)
        self.neural_network.load_weights(model_weights_file)
        print("Loaded model {} {}".format(model_architecture_file, model_weights_file))

    @staticmethod
    def get_samples_vectorized(sample):

        sample_vectorized = []

        for i in range(len(sample)):

            sample_vectorized.append(float(sample[i][2]))

        return sample_vectorized, sample[5][2]

    def predict(self, x):

        x_axis = []
        y_axis = []
        results_predicted = []

        for i in range(len(x)):

            x_temp, y_temp = self.get_samples_vectorized(x[i])
            x_axis.append(x_temp)
            y_axis.append(y_temp)

        predicted = self.neural_network.predict(x_axis)

        for i in range(len(predicted)):

            if predicted[i] > self.threshold or y_axis[i] > 0.8:

                results_predicted.append(x[i][5])

        return results_predicted
Exemple #15
0
class A2C(Agent):
    """Advantage Actor-Critic (A2C)
	A2C is a synchronous version of A3C which gives equal or better performance.
	For more information on A2C refer to the OpenAI blog post: https://blog.openai.com/baselines-acktr-a2c/.
	The A3C algorithm is described in "Asynchronous Methods for Deep Reinforcement Learning" (Mnih et al., 2016)
	Since this algorithm is on-policy, it can and should be trained with multiple simultaneous environment instances.
	The parallelism decorrelates the agents' data into a more stationary process which aids learning.
	"""
    def __init__(self,
                 model,
                 actions,
                 optimizer=None,
                 policy=None,
                 test_policy=None,
                 gamma=0.99,
                 instances=8,
                 nsteps=1,
                 value_loss=0.5,
                 entropy_loss=0.01):
        """
		TODO: Describe parameters
		"""
        self.actions = actions
        self.optimizer = Adam(lr=3e-3) if optimizer is None else optimizer
        self.memory = memory.OnPolicy(steps=nsteps, instances=instances)

        if policy is None:
            # Create one policy per instance, with varying exploration parameters
            self.policy = [Greedy()] + [
                GaussianEpsGreedy(eps, 0.1)
                for eps in np.arange(0, 1, 1 / (instances - 1))
            ]
        else:
            self.policy = policy
        self.test_policy = Greedy() if test_policy is None else test_policy

        self.gamma = gamma
        self.instances = instances
        self.nsteps = nsteps
        self.value_loss = value_loss
        self.entropy_loss = entropy_loss
        self.training = True

        # Create output model layers based on number of actions
        raw_output = model.layers[-1].output
        actor = Dense(actions, activation='softmax')(
            raw_output)  # Actor (Policy Network)
        critic = Dense(1, activation='linear')(
            raw_output)  # Critic (Value Network)
        output_layer = Concatenate()([actor, critic])
        self.model = Model(inputs=model.input, outputs=output_layer)

        def a2c_loss(targets_actions, y_pred):
            # Unpack input
            targets, actions = targets_actions[:,
                                               0], targets_actions[:,
                                                                   1:]  # Unpack
            probs, values = y_pred[:, :-1], y_pred[:, -1]
            # Compute advantages and logprobabilities
            adv = targets - values
            logprob = tf.math.log(
                tf.reduce_sum(probs * actions, axis=1, keepdims=False) + 1e-10)
            # Compute composite loss
            loss_policy = -adv * logprob
            loss_value = self.value_loss * tf.square(adv)
            entropy = self.entropy_loss * tf.reduce_sum(
                probs * tf.math.log(probs + 1e-10), axis=1, keepdims=False)
            return tf.reduce_mean(loss_policy + loss_value + entropy)

        self.model.compile(optimizer=self.optimizer, loss=a2c_loss)

    def save(self, filename, overwrite=False):
        """Saves the model parameters to the specified file."""
        self.model.save_weights(filename, overwrite=overwrite)

    def act(self, state, instance=0):
        """Returns the action to be taken given a state."""
        qvals = self.model.predict(np.array([state]))[0][:-1]
        if self.training:
            return self.policy[instance].act(qvals) if isinstance(
                self.policy, list) else self.policy.act(qvals)
        else:
            return self.test_policy[instance].act(qvals) if isinstance(
                self.test_policy, list) else self.test_policy.act(qvals)

    def push(self, transition, instance=0):
        """Stores the transition in memory."""
        self.memory.put(transition, instance)

    def train(self, step):
        """Trains the agent for one step."""
        if len(self.memory) < self.instances:
            return

        state_batch, action_batch, reward_batches, end_state_batch, not_done_mask = self.memory.get(
        )

        # Compute the value of the last next states
        target_qvals = np.zeros(self.instances)
        non_final_last_next_states = [
            es for es in end_state_batch if es is not None
        ]
        if len(non_final_last_next_states) > 0:
            non_final_mask = list(map(lambda s: s is not None,
                                      end_state_batch))
            target_qvals[non_final_mask] = self.model.predict_on_batch(
                np.array(non_final_last_next_states))[:, -1].squeeze()

        # Compute n-step discounted return
        # If episode ended within any sampled nstep trace - zero out remaining rewards
        for n in reversed(range(self.nsteps)):
            rewards = np.array([b[n] for b in reward_batches])
            target_qvals *= np.array([t[n] for t in not_done_mask])
            target_qvals = rewards + (self.gamma * target_qvals)

        # Prepare loss data: target Q-values and actions taken (as a mask)
        ran = np.arange(self.instances)
        targets_actions = np.zeros((self.instances, self.actions + 1))
        targets_actions[ran, 0] = target_qvals
        targets_actions[ran, np.array(action_batch) + 1] = 1

        self.model.train_on_batch(np.array(state_batch), targets_actions)
class GAN():
    def __init__(self, model_yaml, train_yaml):
        """

        Args:
            model_yaml: dictionnary with the model parameters
            train_yaml: dictionnary the tran parameters
        """
        self.sigma_val = 0
        self.model_yaml = model_yaml
        self.img_rows = 28
        self.img_cols = 28
        self.channels = 1
        self.img_shape = (self.img_rows, self.img_cols, self.channels)
        if "dict_band_x" not in train_yaml:
            self.dict_band_X = None
            self.dict_band_label = None
            self.dict_rescale_type = None
        else:
            self.dict_band_X = train_yaml["dict_band_x"]
            self.dict_band_label = train_yaml["dict_band_label"]
            self.dict_rescale_type = train_yaml["dict_rescale_type"]
        self.s1bands = train_yaml["s1bands"]
        self.s2bands = train_yaml["s2bands"]
        # self.latent_dim = 100
        # PATH
        self.model_name = model_yaml["model_name"]
        self.model_dir = train_yaml["training_dir"] + self.model_name + "/"
        self.this_training_dir = self.model_dir + "training_{}/".format(
            train_yaml["training_number"])
        self.saving_image_path = self.this_training_dir + "saved_training_images/"
        self.saving_logs_path = self.this_training_dir + "logs/"
        self.checkpoint_dir = self.this_training_dir + "checkpoints/"
        self.previous_checkpoint = train_yaml["load_model"]
        # TRAIN PARAMETER
        self.normalization = train_yaml["normalization"]
        self.epoch = train_yaml["epoch"]
        self.batch_size = train_yaml["batch_size"]
        # self.sess = sess
        self.learning_rate = train_yaml["lr"]
        self.fact_g_lr = train_yaml["fact_g_lr"]
        self.beta1 = train_yaml["beta1"]
        self.val_directory = train_yaml["val_directory"]
        self.fact_s2 = train_yaml["s2_scale"]
        self.fact_s1 = train_yaml["s1_scale"]
        self.data_X, self.data_y, self.scale_dict_train = load_data(
            train_yaml["train_directory"],
            x_shape=model_yaml["input_shape"],
            label_shape=model_yaml["dim_gt_image"],
            normalization=self.normalization,
            dict_band_X=self.dict_band_X,
            dict_band_label=self.dict_band_label,
            dict_rescale_type=self.dict_rescale_type,
            fact_s2=self.fact_s2,
            fact_s1=self.fact_s1,
            s2_bands=self.s2bands,
            s1_bands=self.s1bands,
            lim=train_yaml["lim_train_tile"])
        self.val_X, self.val_Y, scale_dict_val = load_data(
            self.val_directory,
            x_shape=model_yaml["input_shape"],
            label_shape=model_yaml["dim_gt_image"],
            normalization=self.normalization,
            dict_band_X=self.dict_band_X,
            dict_band_label=self.dict_band_label,
            dict_rescale_type=self.dict_rescale_type,
            dict_scale=self.scale_dict_train,
            fact_s2=self.fact_s2,
            fact_s1=self.fact_s1,
            s2_bands=self.s2bands,
            s1_bands=self.s1bands,
            lim=train_yaml["lim_val_tile"])
        print("Loading the data done dataX {} dataY {}".format(
            self.data_X.shape, self.data_y.shape))
        self.gpu = train_yaml["n_gpu"]
        self.num_batches = self.data_X.shape[0] // self.batch_size
        self.model_yaml = model_yaml
        self.im_saving_step = train_yaml["im_saving_step"]
        self.w_saving_step = train_yaml["weights_saving_step"]
        self.val_metric_step = train_yaml["metric_step"]
        # REDUCE THE DISCRIMINATOR PERFORMANCE
        self.val_lambda = train_yaml["lambda"]
        self.real_label_smoothing = tuple(train_yaml["real_label_smoothing"])
        self.fake_label_smoothing = tuple(train_yaml["fake_label_smoothing"])
        self.sigma_init = train_yaml["sigma_init"]
        self.sigma_step = train_yaml['sigma_step']
        self.sigma_decay = train_yaml["sigma_decay"]
        self.ite_train_g = train_yaml["train_g_multiple_time"]

        self.max_im = 10
        self.strategy = tf.distribute.MirroredStrategy()
        print('Number of devices: {}'.format(
            self.strategy.num_replicas_in_sync))
        self.buffer_size = self.data_X.shape[0]

        self.global_batch_size = self.batch_size * self.strategy.num_replicas_in_sync
        with self.strategy.scope():
            self.d_optimizer = Adam(self.learning_rate, self.beta1)
            self.g_optimizer = Adam(self.learning_rate * self.fact_g_lr,
                                    self.beta1)

            self.build_model()

        self.model_writer = tf.summary.create_file_writer(
            self.saving_logs_path)
        #self.strategy = tf.distribute.MirroredStrategy()

    def build_model(self):
        # strategy = tf.distribute.MirroredStrategy()
        # print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

        # We use the discriminator
        self.discriminator = self.build_discriminator(self.model_yaml)
        self.discriminator.compile(loss='binary_crossentropy',
                                   optimizer=self.d_optimizer,
                                   metrics=['accuracy'])
        self.generator = self.build_generator(self.model_yaml,
                                              is_training=True)
        print("Input G")
        g_input = Input(shape=(self.data_X.shape[1], self.data_X.shape[2],
                               self.data_X.shape[3]),
                        name="g_build_model_input_data")
        G = self.generator(g_input)
        print("G", G)
        # For the combined model we will only train the generator
        self.discriminator.trainable = False
        D_input = tf.concat([G, g_input], axis=-1)
        print("INPUT DISCRI ", D_input)
        # The discriminator takes generated images as input and determines validity
        D_output_fake = self.discriminator(D_input)
        # print(D_output)
        # The combined model  (stacked generator and discriminator)
        # TO TRAIN WITH MULTIPLE GPU

        self.combined = Model(g_input, [D_output_fake, G],
                              name="Combined_model")
        self.combined.compile(loss=['binary_crossentropy', L1_loss],
                              loss_weights=[1, self.val_lambda],
                              optimizer=self.g_optimizer)
        print("[INFO] combined model loss are : ".format(
            self.combined.metrics_names))

    def build_generator(self, model_yaml, is_training=True):
        def build_resnet_block(input, id=0):
            """Define the ResNet block"""
            x = Conv2D(model_yaml["dim_resnet"],
                       model_yaml["k_resnet"],
                       padding=model_yaml["padding"],
                       strides=tuple(model_yaml["stride"]),
                       name="g_block_{}_conv1".format(id))(input)
            x = BatchNormalization(momentum=model_yaml["bn_momentum"],
                                   trainable=is_training,
                                   name="g_block_{}_bn1".format(id))(x)
            x = ReLU(name="g_block_{}_relu1".format(id))(x)
            x = Dropout(rate=model_yaml["do_rate"],
                        name="g_block_{}_do".format(id))(x)
            x = Conv2D(model_yaml["dim_resnet"],
                       model_yaml["k_resnet"],
                       padding=model_yaml["padding"],
                       strides=tuple(model_yaml["stride"]),
                       name="g_block_{}_conv2".format(id))(x)
            x = BatchNormalization(momentum=model_yaml["bn_momentum"],
                                   trainable=is_training,
                                   name="g_block_{}_bn2".format(id))(x)
            x = Add(name="g_block_{}_add".format(id))([x, input])
            x = ReLU(name="g_block_{}_relu2".format(id))(x)
            return x

        img_input = Input(shape=(self.data_X.shape[1], self.data_X.shape[2],
                                 self.data_X.shape[3]),
                          name="g_input_data")

        if model_yaml["last_activation"] == "tanh":
            print("use tanh keras")
            last_activ = lambda x: tf.keras.activations.tanh(x)
        else:
            last_activ = model_yaml["last_activation"]
        x = img_input

        for i, param_lay in enumerate(
                model_yaml["param_before_resnet"]
        ):  # build the blocks before the Resnet Blocks
            x = Conv2D(param_lay[0],
                       param_lay[1],
                       strides=tuple(model_yaml["stride"]),
                       padding=model_yaml["padding"],
                       name="g_conv{}".format(i))(x)
            x = BatchNormalization(momentum=model_yaml["bn_momentum"],
                                   trainable=is_training,
                                   name="g_{}_bn".format(i))(x)
            x = ReLU(name="g_{}_lay_relu".format(i))(x)

        for j in range(model_yaml["nb_resnet_blocs"]):  # add the Resnet blocks
            x = build_resnet_block(x, id=j)

        for i, param_lay in enumerate(model_yaml["param_after_resnet"]):
            x = Conv2D(param_lay[0],
                       param_lay[1],
                       strides=tuple(model_yaml["stride"]),
                       padding=model_yaml["padding"],
                       name="g_conv_after_resnetblock{}".format(i))(x)
            x = BatchNormalization(
                momentum=model_yaml["bn_momentum"],
                trainable=is_training,
                name="g_after_resnetblock{}_bn2".format(i))(x)
            x = ReLU(name="g_after_resnetblock_relu_{}".format(i))(x)
        # The last layer
        x = Conv2D(model_yaml["last_layer"][0],
                   model_yaml["last_layer"][1],
                   strides=tuple(model_yaml["stride"]),
                   padding=model_yaml["padding"],
                   name="g_final_conv",
                   activation=last_activ)(x)
        model_gene = Model(img_input, x, name="Generator")
        model_gene.summary()
        return model_gene

    def build_discriminator(self, model_yaml, is_training=True):

        discri_input = Input(shape=tuple([256, 256, 12]), name="d_input")
        if model_yaml["d_activation"] == "lrelu":
            d_activation = lambda x: tf.nn.leaky_relu(
                x, alpha=model_yaml["lrelu_alpha"])
        else:
            d_activation = model_yaml["d_activation"]

        if model_yaml["add_discri_noise"]:
            x = GaussianNoise(self.sigma_val,
                              input_shape=self.model_yaml["dim_gt_image"],
                              name="d_GaussianNoise")(discri_input)
        else:
            x = discri_input
        for i, layer_index in enumerate(model_yaml["dict_discri_archi"]):
            layer_val = model_yaml["dict_discri_archi"][layer_index]
            layer_key = model_yaml["layer_key"]
            layer_param = dict(zip(layer_key, layer_val))
            pad = layer_param["padding"]
            vpadding = tf.constant([[0, 0], [pad, pad], [pad, pad],
                                    [0, 0]])  # the last dimension is 12
            x = tf.pad(
                x,
                vpadding,
                model_yaml["discri_opt_padding"],
                name="{}_padding_{}".format(
                    model_yaml["discri_opt_padding"],
                    layer_index))  # the type of padding is defined the yaml,
            # more infomration  in https://www.tensorflow.org/api_docs/python/tf/pad
            #
            # x = ZeroPadding2D(
            #   padding=(layer_param["padding"], layer_param["padding"]), name="d_pad_{}".format(layer_index))(x)
            x = Conv2D(layer_param["nfilter"],
                       layer_param["kernel"],
                       padding="valid",
                       activation=d_activation,
                       strides=(layer_param["stride"], layer_param["stride"]),
                       name="d_conv{}".format(layer_index))(x)
            if i > 0:
                x = BatchNormalization(momentum=model_yaml["bn_momentum"],
                                       trainable=is_training,
                                       name="d_bn{}".format(layer_index))(x)

        # x = Flatten(name="flatten")(x)
        # for i, dlayer_idx in enumerate(model_yaml["discri_dense_archi"]):
        #    dense_layer = model_yaml["discri_dense_archi"][dlayer_idx]
        #    x = Dense(dense_layer, activation=d_activation, name="dense_{}".format(dlayer_idx))(x)

        if model_yaml["d_last_activ"] == "sigmoid":
            x_final = tf.keras.layers.Activation('sigmoid',
                                                 name="d_last_activ")(x)
        else:
            x_final = x
        model_discri = Model(discri_input, x_final, name="discriminator")
        model_discri.summary()
        return model_discri

    def produce_noisy_input(self, input, sigma_val):
        if self.model_yaml["add_discri_white_noise"]:
            # print("[INFO] On each batch GT label we add Gaussian Noise before training discri on labelled image")
            new_gt = GaussianNoise(sigma_val,
                                   input_shape=self.model_yaml["dim_gt_image"],
                                   name="d_inputGN")(input)
            if self.model_yaml["add_relu_after_noise"]:
                new_gt = tf.keras.layers.Activation(
                    lambda x: tf.keras.activations.tanh(x),
                    name="d_before_activ")(new_gt)
        else:
            new_gt = input
        return new_gt

    def define_callback(self):
        # Define Tensorboard callbacks
        self.g_tensorboard_callback = TensorBoard(
            log_dir=self.saving_logs_path,
            histogram_freq=0,
            batch_size=self.batch_size,
            write_graph=True,
            write_grads=True)
        self.g_tensorboard_callback.set_model(self.combined)

    def train_gpu(self):
        valid = np.ones(
            (self.batch_size, 30, 30, 1))  # because of the shape of the discri
        fake = np.zeros((self.batch_size, 30, 30, 1))

        print("valid shape {}".format(valid.shape))
        if self.previous_checkpoint is not None:
            print("LOADING the model from step {}".format(
                self.previous_checkpoint))
            start_epoch = int(self.previous_checkpoint) + 1
            self.load_from_checkpoint(self.previous_checkpoint)
        else:
            # create_safe_directory(self.saving_logs_path)
            create_safe_directory(self.saving_image_path)
        train_dataset = tf.data.Dataset.from_tensor_slices(
            (self.data_X, self.data_y)).shuffle(self.batch_size).batch(
                self.global_batch_size)
        train_dist_dataset = self.strategy.experimental_distribute_dataset(
            train_dataset)

    def train(self):
        # Adversarial ground truths

        valid = np.ones((self.global_batch_size, 30, 30,
                         1))  # because of the shape of the discri
        fake = np.zeros((self.global_batch_size, 30, 30, 1))
        #print("valid shape {}".format(valid.shape))
        if self.previous_checkpoint is not None:
            print("LOADING the model from step {}".format(
                self.previous_checkpoint))
            start_epoch = int(self.previous_checkpoint) + 1
            self.load_from_checkpoint(self.previous_checkpoint)
        else:
            # create_safe_directory(self.saving_logs_path)
            create_safe_directory(self.saving_image_path)
            start_epoch = 0

        train_dataset = tf.data.Dataset.from_tensor_slices(
            (self.data_X, self.data_y)).shuffle(self.batch_size).batch(
                self.global_batch_size)
        # loop for epoch
        sigma_val = self.sigma_init
        # dict_metric={"epoch":[],"d_loss_real":[],"d_loss_fake":[],"d_loss":[],"g_loss":[]}
        d_loss_real = [100, 100]  # init losses
        d_loss_fake = [100, 100]
        d_loss = [100, 100]
        l_val_name_metrics, l_val_value_metrics = [], []
        start_time = time.time()
        for epoch in range(start_epoch, self.epoch):

            # print("starting epoch {}".format(epoch))
            for idx, (batch_input, batch_gt) in enumerate(train_dataset):

                #print(batch_input)
                ##  TRAIN THE DISCRIMINATOR

                d_noise_real = random.uniform(
                    self.real_label_smoothing[0],
                    self.real_label_smoothing[1])  # Add noise on the loss
                d_noise_fake = random.uniform(
                    self.fake_label_smoothing[0],
                    self.fake_label_smoothing[1])  # Add noise on the loss

                # Create a noisy gt images
                batch_new_gt = self.produce_noisy_input(batch_gt, sigma_val)
                # Generate a batch of new images
                # print("Make a prediction")
                gen_imgs = self.generator.predict(
                    batch_input)  # .astype(np.float32)
                D_input_real = tf.concat([batch_new_gt, batch_input], axis=-1)
                D_input_fake = tf.concat([gen_imgs, batch_input], axis=-1)
                print("shape d train")
                print(valid.shape, D_input_fake.shape)
                d_loss_real = self.discriminator.train_on_batch(
                    D_input_real, d_noise_real * valid)

                d_loss_fake = self.discriminator.train_on_batch(
                    D_input_fake, d_noise_fake * fake)
                d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

                g_loss = self.combined.train_on_batch(batch_input,
                                                      [valid, batch_gt])

                # Plot the progress
                print("%d iter %d [D loss: %f, acc.: %.2f%%] [G loss: %f %f]" %
                      (epoch, self.num_batches * epoch + idx, d_loss[0],
                       100 * d_loss[1], g_loss[0], g_loss[1]))

                if epoch % self.im_saving_step == 0 and idx < self.max_im:  # to save some generated_images
                    gen_imgs = self.generator.predict(batch_input)
                    save_images(gen_imgs, self.saving_image_path, ite=idx)
                # LOGS to print in Tensorboard
                if epoch % self.val_metric_step == 0:
                    l_val_name_metrics, l_val_value_metrics = self.val_metric()
                    name_val_metric = [
                        "val_{}".format(name) for name in l_val_name_metrics
                    ]
                    name_logs = self.combined.metrics_names + [
                        "g_loss_tot", "d_loss_real", "d_loss_fake",
                        "d_loss_tot", "d_acc_real", "d_acc_fake", "d_acc_tot"
                    ]
                    val_logs = g_loss + [
                        g_loss[0] + 100 * g_loss[1], d_loss_real[0],
                        d_loss_fake[0], d_loss[0], d_loss_real[1],
                        d_loss_fake[1], d_loss[1]
                    ]
                    # The metrics
                    #print(type(batch_gt),type(gen_imgs))
                    l_name_metrics, l_value_metrics = compute_metric(
                        batch_gt.numpy(), gen_imgs)
                    assert len(val_logs) == len(
                        name_logs
                    ), "The name and value list of logs does not have the same lenght {} vs {}".format(
                        name_logs, val_logs)
                    write_log_tf2(
                        self.model_writer, name_logs + l_name_metrics +
                        name_val_metric + ["time_in_sec"],
                        val_logs + l_value_metrics + l_val_value_metrics +
                        [start_time - time.time()], epoch)

            if epoch % self.sigma_step == 0:  # update simga
                sigma_val = sigma_val * self.sigma_decay
            # save the models
            if epoch % self.w_saving_step == 0:
                self.save_model(epoch)

    def save_model(self, step):
        print("Saving model at {} step {}".format(self.checkpoint_dir, step))
        checkpoint_dir = self.checkpoint_dir
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if not os.path.isfile("{}model_generator.yaml".format(
                self.checkpoint_dir)):
            gene_yaml = self.generator.to_yaml()
            with open("{}model_generator.yaml".format(self.checkpoint_dir),
                      "w") as yaml_file:
                yaml_file.write(gene_yaml)
        if not os.path.isfile("{}model_combined.yaml".format(
                self.checkpoint_dir)):
            comb_yaml = self.combined.to_yaml()
            with open("{}model_combined.yaml".format(self.checkpoint_dir),
                      "w") as yaml_file:
                yaml_file.write(comb_yaml)
        if not os.path.isfile("{}model_discri.yaml".format(
                self.checkpoint_dir)):
            discri_yaml = self.discriminator.to_yaml()
            with open("{}model_discri.yaml".format(self.checkpoint_dir),
                      "w") as yaml_file:
                yaml_file.write(discri_yaml)
        self.generator.save_weights("{}model_gene_i{}.h5".format(
            self.checkpoint_dir, step))
        self.discriminator.save_weights("{}model_discri_i{}.h5".format(
            self.checkpoint_dir, step))
        self.combined.save_weights("{}model_combined_i{}.h5".format(
            self.checkpoint_dir, step))

    def load_from_checkpoint(self, step):
        assert os.path.isfile("{}model_discri_i{}.h5".format(
            self.checkpoint_dir,
            step)), "No file at {}".format("{}model_discri_i{}.h5".format(
                self.checkpoint_dir, step))
        self.discriminator.load_weights("{}model_discri_i{}.h5".format(
            self.checkpoint_dir, step))
        self.generator.load_weights("{}model_gene_i{}.h5".format(
            self.checkpoint_dir, step))
        self.combined.load_weights("{}model_combined_i{}.h5".format(
            self.checkpoint_dir, step))

    def load_generator(self, path_yaml, path_weight):
        # load YAML and create model
        yaml_file = open(path_yaml, 'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        loaded_model = model_from_yaml(loaded_model_yaml)
        # load weights into new model
        loaded_model.load_weights(path_weight)
        print("Loaded model from disk")
        return loaded_model

    def val_metric(self):
        test_dataset = tf.data.Dataset.from_tensor_slices(
            (self.val_X, self.val_Y)).batch(self.val_X.shape[0])
        #test_dist_dataset = self.strategy.experimental_distribute_dataset(test_dataset)
        for i, (x, y) in enumerate(test_dataset):
            #print("eval on {}".format(i))

            val_pred = self.generator.predict(x)
            #print("type  {} {}".format(type(y),type(val_pred)))
            label = y
        return compute_metric(label.numpy(), val_pred)

    def predict_on_iter(self,
                        batch,
                        path_save,
                        l_image_id=None,
                        un_rescale=True):
        """given an iter load the model at this iteration, returns the a predicted_batch but check if image have been saved at this directory
        :param dataset:
        :param batch could be a string : path to the dataset  or an array corresponding to the batch we are going to predict on
        """
        if type(batch) == type(
                "u"
        ):  # the param is an string we load the bathc from this directory
            #print("We load our data from {}".format(batch))

            l_image_id = find_image_indir(batch + XDIR, "npy")
            batch, _ = load_data(batch,
                                 x_shape=self.model_yaml["input_shape"],
                                 label_shape=self.model_yaml["dim_gt_image"],
                                 normalization=self.normalization,
                                 dict_band_X=self.dict_band_X,
                                 dict_band_label=self.dict_band_label,
                                 dict_rescale_type=self.dict_rescale_type,
                                 dict_scale=self.scale_dict_train,
                                 fact_s2=self.fact_s2,
                                 fact_s1=self.fact_s1,
                                 s2_bands=self.s2bands,
                                 s1_bands=self.s1bands,
                                 clip_s2=False)
        else:
            if l_image_id is None:
                print("We defined our own index for image name")
                l_image_id = [i for i in range(batch.shape[0])]
        assert len(l_image_id) == batch.shape[
            0], "Wrong size of the name of the images is {} should be {} ".format(
                len(l_image_id), batch.shape[0])
        if os.path.isdir(path_save):
            print(
                "[INFO] the directory where to store the image already exists")
            data_array, path_tile, _ = load_from_dir(
                path_save, self.model_yaml["dim_gt_image"])
            return data_array
        else:
            create_safe_directory(path_save)
            batch_res = self.generator.predict(batch)
            # if un_rescale:  # remove the normalization made on the data

            # _, batch_res, _ = rescale_array(batch, batch_res, dict_group_band_X=self.dict_band_X,
            #                                 dict_group_band_label=self.dict_band_label,
            #                                 dict_rescale_type=self.dict_rescale_type,
            #                                 dict_scale=self.scale_dict_train, invert=True, fact_scale2=self.fact_s2,
            #                                 fact_scale1=self.fact_s1,clip_s2=False)
            assert batch_res.shape[0] == batch.shape[
                0], "Wrong prediction should have shape {} but has shape {}".format(
                    batch_res.shape, batch.shape)
            if path_save is not None:
                # we store the data at path_save
                for i in range(batch_res.shape[0]):
                    np.save(
                        "{}_image_{}".format(path_save,
                                             l_image_id[i].split("/")[-1]),
                        batch_res[i, :, :, :])
        return batch_res
    try:
        os.mkdir(os.getcwd() + os.sep + 'out' + os.sep + model_name)
    except:
        pass

    # save features and labels
    h5f_data = h5py.File(features_path, 'w')
    h5f_data.create_dataset('dataset_1', data=np.array(features))

    h5f_label = h5py.File(labels_path, 'w')
    h5f_label.create_dataset('dataset_1', data=np.array(le_labels))

    h5f_data.close()
    h5f_label.close()

    # save model and weights
    model_json = model.to_json()
    with open(model_path + str(test_size) + ".json", "w") as json_file:
        json_file.write(model_json)

    # save weights
    model.save_weights(model_path + str(test_size) + ".h5")
    print("saved model and weights to disk..")

    print("features and labels saved..")

    # end time
    end = time.time()
    print("end time - {}".format(
        datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
class Seq2SeqAtt(object):
    model_name = 'seq2seq-qa-glove'

    def __init__(self):
        self.model = None
        self.encoder_model = None
        self.decoder_model = None
        self.target_word2idx = None
        self.target_idx2word = None
        self.max_decoder_seq_length = None
        self.max_encoder_seq_length = None
        self.num_decoder_tokens = None
        self.glove_model = GloveModel()

    @staticmethod
    def get_architecture_file_path(model_dir_path):
        return os.path.join(model_dir_path, Seq2SeqAtt.model_name + '-architecture.json')

    @staticmethod
    def get_weight_file_path(model_dir_path):
        return os.path.join(model_dir_path, Seq2SeqAtt.model_name + '-weights.h5')

    def load_glove_model(self, data_dir_path):
        self.glove_model.load_model(data_dir_path)

    def load_model(self, model_dir_path):
        self.target_word2idx = np.load(
            model_dir_path + '/' + Seq2SeqAtt.model_name + '-target-word2idx.npy').item()
        self.target_idx2word = np.load(
            model_dir_path + '/' + Seq2SeqAtt.model_name + '-target-idx2word.npy').item()
        context = np.load(model_dir_path + '/' + Seq2SeqAtt.model_name + '-config.npy').item()
        self.max_encoder_seq_length = context['input_max_seq_length']
        self.max_decoder_seq_length = context['target_max_seq_length']
        self.num_decoder_tokens = context['num_target_tokens']

        self.create_model()
        self.model.load_weights(Seq2SeqAtt.get_weight_file_path(model_dir_path))

    def create_model(self):
        resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
        tf.contrib.distribute.initialize_tpu_system(resolver)
        strategy = tf.contrib.distribute.TPUStrategy(resolver)

        with strategy.scope():
            hidden_size = 256
            enc_timesteps = self.max_encoder_seq_length
            #timesteps = self.max_encoder_seq_length #perhaps making timesteps size of max sequence length would work?????""
            dec_timesteps = self.max_decoder_seq_length
            print(f"embedding size: {self.glove_model.embedding_size}")
            # encoder_inputs = Input(shape=(None, self.glove_model.embedding_size), name='encoder_inputs')
            # decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_inputs')
            encoder_inputs = Input(shape=(enc_timesteps, self.glove_model.embedding_size), name='encoder_inputs')
            decoder_inputs = Input(shape=(dec_timesteps, self.num_decoder_tokens), name='decoder_inputs')
            
            # Encoder GRU
            encoder_gru = Bidirectional(GRU(hidden_size, return_sequences=True, return_state=True, name='encoder_gru'), name='bidirectional_encoder')
            encoder_out, encoder_fwd_state, encoder_back_state = encoder_gru(encoder_inputs)

            # Set up the decoder GRU, using `encoder_states` as initial state.
            decoder_gru = GRU(hidden_size*2, return_sequences=True, return_state=True, name='decoder_gru')
            decoder_out, decoder_state = decoder_gru(
                decoder_inputs, initial_state=Concatenate(axis=-1)([encoder_fwd_state, encoder_back_state])
            )

            # Attention layer
            attn_layer = AttentionLayer(name='attention_layer')
            attn_out, attn_states = attn_layer([encoder_out, decoder_out])

            # Concat attention input and decoder GRU output
            decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out])

            # Dense layer
            dense = Dense(self.num_decoder_tokens, activation='softmax', name='softmax_layer')
            dense_time = TimeDistributed(dense, name='time_distributed_layer')
            decoder_pred = dense_time(decoder_concat_input)

            # Full model
            self.model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred)
            self.model.compile(optimizer=tf.train.RMSPropOptimizer(learning_rate=0.01) loss='categorical_crossentropy')

            self.model.summary()

            """ Inference model """
            batch_size = 1

            """ Encoder (Inference) model """
            encoder_inf_inputs = Input(batch_shape=(batch_size, enc_timesteps, self.glove_model.embedding_size), name='encoder_inf_inputs')
            encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder_gru(encoder_inf_inputs)
            self.encoder_model = Model(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state])

            """ Decoder (Inference) model """
            decoder_inf_inputs = Input(batch_shape=(batch_size, 1, self.num_decoder_tokens), name='decoder_word_inputs')
            encoder_inf_states = Input(batch_shape=(batch_size, dec_timesteps, 2*hidden_size), name='encoder_inf_states')
            decoder_init_state = Input(batch_shape=(batch_size, 2*hidden_size), name='decoder_init')

            decoder_inf_out, decoder_inf_state = decoder_gru(
                decoder_inf_inputs, initial_state=decoder_init_state)
            attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out])
            decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_inf_out, attn_inf_out])
            decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat)
            self.decoder_model = Model(inputs=[encoder_inf_states, decoder_init_state, decoder_inf_inputs],
                                outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state])

    def fit(self, data_set, model_dir_path, epochs=None, batch_size=None, test_size=None, random_state=None,
            save_best_only=False, max_target_vocab_size=None):
        if batch_size is None:
            batch_size = 64
        if epochs is None:
            epochs = 100
        if test_size is None:
            test_size = 0.2
        if random_state is None:
            random_state = 42
        if max_target_vocab_size is None:
            max_target_vocab_size = 5000

        data_set_seq2seq = SQuADSeq2SeqEmbTupleSamples(data_set, self.glove_model.word2em,
                                                       self.glove_model.embedding_size,
                                                       max_target_vocab_size=max_target_vocab_size)
        data_set_seq2seq.save(model_dir_path, 'qa-glove-att')

        x_train, x_test, y_train, y_test = data_set_seq2seq.split(test_size=test_size, random_state=random_state)

        print(len(x_train))
        print(len(x_test))

        self.max_encoder_seq_length = data_set_seq2seq.input_max_seq_length
        self.max_decoder_seq_length = data_set_seq2seq.target_max_seq_length
        self.num_decoder_tokens = data_set_seq2seq.num_target_tokens
        print(f'max_encoder_seq_length: {self.max_encoder_seq_length}')
        print(f'max_decoder_seq_length: {self.max_decoder_seq_length}')
        print(f'num_decoder_tokens: {self.num_decoder_tokens}')

        weight_file_path = self.get_weight_file_path(model_dir_path)
        architecture_file_path = self.get_architecture_file_path(model_dir_path)

        self.create_model()

        with open(architecture_file_path, 'w') as f:
            f.write(self.model.to_json())

        train_gen = generate_batch(data_set_seq2seq, x_train, y_train, batch_size)
        test_gen = generate_batch(data_set_seq2seq, x_test, y_test, batch_size)

        train_num_batches = len(x_train) // batch_size
        test_num_batches = len(x_test) // batch_size

        checkpoint = ModelCheckpoint(filepath=weight_file_path, save_best_only=save_best_only)

#########COLAB##########
        #TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
        #tensorflow.logging.set_verbosity(tensorflow.logging.INFO)

        #self.model = tensorflow.contrib.tpu.keras_to_tpu_model(
        #    self.model,
        #    strategy=tensorflow.contrib.tpu.TPUDistributionStrategy(
        #        tensorflow.contrib.cluster_resolver.TPUClusterResolver(TPU_WORKER)))
#######################

        history = self.model.fit_generator(generator=train_gen, steps_per_epoch=train_num_batches,
                                           epochs=epochs,
                                           verbose=1, validation_data=test_gen, validation_steps=test_num_batches,
                                           callbacks=[checkpoint])

        self.model.save_weights(weight_file_path)

        np.save(os.path.join(model_dir_path, Seq2SeqAtt.model_name + '-history.npy'), history.history)

        return history

    def reply(self, paragraph, question):
        input_seq = []
        input_emb = []
        input_text = paragraph.lower() + ' question ' + question.lower()
        for word in nltk.word_tokenize(input_text):
            if not in_white_list(word):
                continue
            emb = self.glove_model.encode_word(word)
            input_emb.append(emb)
        input_seq.append(input_emb)
        input_seq = pad_sequences(input_seq, self.max_encoder_seq_length)
        states_value = self.encoder_model.predict(input_seq)
        target_seq = np.zeros((1, 1, self.num_decoder_tokens))
        target_seq[0, 0, self.target_word2idx['START']] = 1
        target_text = ''
        target_text_len = 0
        terminated = False
        while not terminated:
            output_tokens, h, c = self.decoder_model.predict([target_seq] + states_value)

            sample_token_idx = np.argmax(output_tokens[0, -1, :])
            sample_word = self.target_idx2word[sample_token_idx]
            target_text_len += 1

            if sample_word != 'START' and sample_word != 'END':
                target_text += ' ' + sample_word

            if sample_word == 'END' or target_text_len >= self.max_decoder_seq_length:
                terminated = True

            target_seq = np.zeros((1, 1, self.num_decoder_tokens))
            target_seq[0, 0, sample_token_idx] = 1

            states_value = [h, c]
        return target_text.strip()

    def test_run(self, ds, index=None):
        if index is None:
            index = 0
        paragraph, question, actual_answer = ds.get_data(index)
        predicted_answer = self.reply(paragraph, question)
        print({'predict': predicted_answer, 'actual': actual_answer})
class Pix2Pix():
    def __init__(self):
        # Input shape
        self.img_rows = 256
        self.img_cols = 256
        self.channels = 3
        self.img_shape = (self.img_rows, self.img_cols, self.channels)

        # Configure data loader
        self.dataset_name = 'facades'
        self.data_loader = DataLoader(dataset_name=self.dataset_name,
                                      img_res=(self.img_rows, self.img_cols))


        # Calculate output shape of D (PatchGAN)
        patch = int(self.img_rows / 2**4)
        self.disc_patch = (patch, patch, 1)

        # Number of filters in the first layer of G and D
        self.gf = 64
        self.df = 64

        optimizer = Adam(0.0002, 0.5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='mse',
            optimizer=optimizer,
            metrics=['accuracy'])

        #-------------------------
        # Construct Computational
        #   Graph of Generator
        #-------------------------

        # Build the generator
        self.generator = self.build_generator()

        # Input images and their conditioning images
        img_A = Input(shape=self.img_shape)
        img_B = Input(shape=self.img_shape)

        # By conditioning on B generate a fake version of A
        fake_A = self.generator(img_B)

        # For the combined model we will only train the generator
        #self.discriminator.trainable = False

        # Discriminators determines validity of translated images / condition pairs
        valid = self.discriminator([fake_A, img_B])

        self.combined = Model(inputs=[img_A, img_B], outputs=[valid, fake_A])
        self.combined.compile(loss=['mse', 'mae'],
                              loss_weights=[1, 100],
                              optimizer=optimizer)

        valid.trainable = False
        #self.combined.load_weights("Weights/199.h5")

    def build_generator(self):

        layer_per_block = [4, 4, 4, 4, 4, 15, 4, 4, 4, 4, 4]


        tiramisu = Tiramisu(layer_per_block)
        tiramisu.summary()


        #d0 = Input(shape=self.img_shape)

        return tiramisu

    def build_discriminator(self):

        def d_layer(layer_input, filters, f_size=4, bn=True):
            """Discriminator layer"""
            d = Conv2D(filters, kernel_size=f_size, strides=2, padding='same')(layer_input)
            d = LeakyReLU(alpha=0.2)(d)
            if bn:
                d = BatchNormalization(momentum=0.8)(d)
            return d

        img_A = Input(shape=self.img_shape)
        img_B = Input(shape=self.img_shape)

        # Concatenate image and conditioning image by channels to produce input
        combined_imgs = Concatenate(axis=-1)([img_A, img_B])

        d1 = d_layer(combined_imgs, self.df, bn=False)
        d2 = d_layer(d1, self.df*2)
        d3 = d_layer(d2, self.df*4)
        d4 = d_layer(d3, self.df*8)

        validity = Conv2D(1, kernel_size=4, strides=1, padding='same')(d4)

        return Model([img_A, img_B], validity)

    def train(self, epochs, batch_size=1, sample_interval=50):

        start_time = datetime.datetime.now()

        # Adversarial loss ground truths
        valid = np.ones((batch_size,) + self.disc_patch)
        fake = np.zeros((batch_size,) + self.disc_patch)

        for epoch in range(epochs):
            for batch_i, (imgs_A, imgs_B) in enumerate(self.data_loader.load_batch(batch_size)):

                # ---------------------
                #  Train Discriminator
                # ---------------------
                print(imgs_A.shape)

                # Condition on B and generate a translated version
                fake_A = self.generator.predict(imgs_B)

                # Train the discriminators (original images = real / generated = Fake)
                d_loss_real = self.discriminator.train_on_batch([imgs_A, imgs_B], valid)
                d_loss_fake = self.discriminator.train_on_batch([fake_A, imgs_B], fake)
                d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

                # -----------------
                #  Train Generator
                # -----------------

                # Train the generators
                g_loss = self.combined.train_on_batch([imgs_A, imgs_B], [valid, imgs_A])

                elapsed_time = datetime.datetime.now() - start_time
                # Plot the progress
                print ("[Epoch %d/%d] [Batch %d/%d] [D loss: %f, acc: %3d%%] [G loss: %f] time: %s" % (epoch, epochs,
                                                                        batch_i, self.data_loader.n_batches,
                                                                        d_loss[0], 100*d_loss[1],
                                                                        g_loss[0],
                                                                        elapsed_time))

                # If at save interval => save generated image samples
                if batch_i % sample_interval == 0:
                    self.sample_images(epoch, batch_i)


            self.combined.save_weights("Weights/"+str(epoch)+".h5")  







    def img_to_frame(self,imgA,imgB,fakeA):
        no_images = imgA.shape[0]
        img_height = imgA.shape[1]
        img_width = imgA.shape[2]
        pad = 20
        title_pad=20
        pad_top = pad+title_pad
        frame=np.zeros((no_images*(img_height+pad_top),no_images*(img_width+pad),3))
        count=0
        gen_imgs = np.concatenate([imgB, fakeA, imgA])
        gen_imgs = 0.5 * gen_imgs + 0.5
        titles = ['Condition', 'Generated', 'Original']
        for r in range(no_images):
            for c in range(no_images):
                im = gen_imgs[count]
                count=count+1
                y0 = r*(img_height+pad_top) + pad//2
                x0 = c*(img_width+pad) + pad//2
                # print(frame[y0:y0+img_height,x0:x0+img_width,:].shape)
                frame[y0:y0+img_height,x0:x0+img_width,:] = im*255
                frame = cv2.putText(frame, titles[r], (x0, y0-title_pad//4), cv2.FONT_HERSHEY_COMPLEX, .5, (255,255,255))
        return frame




    def sample_images(self, epoch, batch_i):
        os.makedirs('images/%s' % self.dataset_name, exist_ok=True)
        os.makedirs('images/dehazed', exist_ok=True)
        os.makedirs('images/haze', exist_ok=True)
        os.makedirs('images/original',exist_ok=True)
        r, c = 3, 3
         
     
        imgs_A, imgs_B, or_A, or_B = self.data_loader.load_data(batch_size=3, is_testing=True)
   
        fake_A = self.generator.predict(imgs_B)

        cv2.imwrite("images/dehazed"+"/"+"Img:"+str(epoch)+"_"+str(batch_i)+".jpg",(fake_A[0]*0.5+0.5)*255) 
        cv2.imwrite("images/haze"+"/"+"Img:"+str(epoch)+"_"+str(batch_i)+".jpg",(or_B[0]*0.5+0.5)*255)
        cv2.imwrite("images/original"+"/"+"Img:"+str(epoch)+"_"+str(batch_i)+".jpg",(or_A[0]*0.5+0.5)*255)
        
        frame=self.img_to_frame(imgs_A,imgs_B,fake_A)
	
        cv2.imwrite("images/"+self.dataset_name+"/"+"Img:"+str(epoch)+"_"+str(batch_i)+".png",frame)
Exemple #20
0
class DeepVelocity(object):
    def __init__(self,
                 lr=0.00017654,
                 lat_input_shape=(64, ),
                 screen_input_shape=(
                     64,
                     64,
                 ),
                 structured_input_shape=(2, ),
                 verbose=False):
        """
        https://keras.io/getting-started/functional-api-guide/#multi-input-and-multi-output-models
        https://keras.io/gett ing-started/functional-api-guide/#shared-layers
        https://blog.keras.io/building-autoencoders-in-keras.html        
        """
        # Gross hack, change later?
        self.lr = lr

        if verbose:
            print("Network structured input shape is",
                  structured_input.get_shape())
            print("Network screen input shape is", screen_input.get_shape())
            print("Network latent input shape is", lat_input.get_shape())

        # Create the two state encoding legs
        structured_input_a = Input(shape=structured_input_shape)
        lat_input_a = Input(shape=lat_input_shape)
        screen_input_a = Input(shape=screen_input_shape, )

        structured_input_b = Input(shape=structured_input_shape)
        lat_input_b = Input(shape=lat_input_shape)
        screen_input_b = Input(shape=screen_input_shape)

        eng_state_a = [structured_input_a, lat_input_a, screen_input_a]
        eng_state_b = [structured_input_b, lat_input_b, screen_input_b]

        # We want to broadcast the structured input (x, y) into their own
        # channels, each with the same dimension as the screen input
        # We can then concatenate, then convolve over the whole tensor
        x = RepeatVector(64 * 64)(structured_input_a)
        x = Reshape((64, 64, 2))(x)
        structured_output_a = x

        x = RepeatVector(64 * 64)(structured_input_b)
        x = Reshape((64, 64, 2))(x)
        structured_output_b = x

        # Similar with the latent vector, except it will simply be repeated
        # column wise
        x = RepeatVector(64)(lat_input_a)
        x = Reshape((64, 64, 1))(x)
        lat_output_a = x

        x = RepeatVector(64)(lat_input_b)
        x = Reshape((64, 64, 1))(x)
        lat_output_b = x

        # The screen is the correct shape, just add a channel dimension
        x = Reshape((64, 64, 1))(screen_input_a)
        screen_output_a = x

        x = Reshape((64, 64, 1))(screen_input_b)
        screen_output_b = x

        x = concatenate([
            screen_output_a, structured_output_a, lat_output_a,
            screen_output_b, structured_output_b, lat_output_b
        ],
                        axis=-1)
        print("Hello, World!", x.shape)
        x = Conv2D(16, (3, 3))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        print("1", x.shape)

        x = Conv2D(32, (3, 3))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D(2)(x)
        print("2", x.shape)

        x = Conv2D(64, (3, 3))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        print("3", x.shape)

        x = Conv2D(128, (3, 3))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D(2)(x)
        print("4", x.shape)

        x = Conv2D(256, (3, 3))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        print("5", x.shape)

        x = Conv2D(512, (3, 3))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D(2)(x)
        print("6", x.shape)

        x = Conv2D(1024, (3, 3))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        print("7", x.shape)

        x = Conv2D(2, (1, 1))(x)
        x = Activation('linear')(x)
        x = AveragePooling2D()(x)
        print("8", x.shape)

        x = Activation("softmax")(x)
        print("9", x.shape)

        prob_output = Reshape((2, ))(x)
        print("10", prob_output.shape)

        self.probabilityNetwork = Model(inputs=eng_state_a + eng_state_b,
                                        outputs=[prob_output])

    def compile(self):
        # print("LR: ",self.lr)
        # self.lr = 10**np.random.uniform(-2.2, -3.8)
        optimizer = Nadam(lr=self.lr,
                          beta_1=0.9,
                          beta_2=0.999,
                          epsilon=1e-08,
                          schedule_decay=0.004)
        # optimizer = SGD()
        # self.probabilityNetwork = make_parallel(self.probabilityNetwork, 2)
        self.probabilityNetwork.compile(
            optimizer=optimizer,
            loss='categorical_crossentropy',
            metrics=['acc', 'mse', 'categorical_crossentropy'])

    def save_weights(self, path):
        self.probabilityNetwork.save_weights(path)

    def load(self, path):
        loc = os.path.join(self.path(), path)
        print("Loading weights", loc)
        self.probabilityNetwork.load_weights(loc)
        return self

    def save_model(self, path):
        self.probabilityNetwork.save(path)

    def load_model(self, path):
        # loc = os.path.join(self.path(), path)
        # print("Loading model", path)
        self.probabilityNetwork = load_model(path)
        return self

    def path(self):
        return os.path.dirname(os.path.realpath(__file__))
Exemple #21
0
autoencoder.compile( optimizer = 'adadelta', loss = 'binary_crossentropy' )

autoencoder.fit( x_train, x_train,
                    epochs = 50,
                    batch_size = 256,
                    shuffle = True,
                    validation_data = (x_test,x_test) )

encoded_imgs    = encoder.predict( x_test )
decoded_imgs    = decoder.predict( encoded_imgs )

# Save model
model_name  = logs_path + "/ae" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
with open( model_name+".yaml", "w"  ) as model_yaml:
    model_yaml.write( autoencoder.to_yaml() )
autoencoder.save_weights( model_name+".h5" )
print "Model saved as '" + model_name + "'"



# n = 10
# plt.figure( figsize=(20,4) )
# for i in range(n):
#     ax  = plt.subplot( 2, n, i+1 )
#     plt.imshow( x_test[i].reshape(28,28) )
#     plt.gray()
#     ax.get_xaxis().set_visible( False )
#     ax.get_yaxis().set_visible( False )

#     ax  = plt.subplot(2, n, i+1+n )
#     plt.imshow( decoded_imgs[i].reshape(28,28) )
Exemple #22
0
    #     vae.load_weights(args.weights)
    # else:
    # train the autoencoder

    total_records = len(gen_records)

    num_train = 0
    num_val = 0

    for key, _record in gen_records.items():
        if _record['train'] == True:
            num_train += 1
        else:
            num_val += 1

    print("train: %d, val: %d" % (num_train, num_val))
    print('total records: %d' % (total_records))

    steps_per_epoch = num_train // cfg.BATCH_SIZE
    val_steps = num_val // cfg.BATCH_SIZE
    vae.fit_generator(train_gen,
                      epochs=epochs,
                      steps_per_epoch=steps_per_epoch,
                      validation_data=(val_gen, None))
    # vae.fit(x_train,
    #         epochs=epochs,
    #         batch_size=batch_size,
    #         validation_data=(x_test, None))
    vae.save_weights('vae_cnn_mnist.h5')

    plot_results(models, data, batch_size=batch_size, model_name="vae_cnn")
def main(batch_size=150,
         p_drop=0.4,
         latent_dim=2,
         cpl_fn='minvar',
         cpl_str=1e-3,
         n_epoch=500,
         run_iter=0,
         model_id='cnn',
         exp_name='MNIST'):


    fileid = model_id + \
        '_cf_' + cpl_fn + \
        '_cs_' + str(cpl_str) + \
        '_pd_' + str(p_drop) + \
        '_bs_' + str(batch_size) + \
        '_ld_' + str(latent_dim) + \
        '_ne_' + str(n_epoch) + \
        '_ri_' + str(run_iter)

    fileid = fileid.replace('.', '-')
    train_dat, train_lbl, val_dat, val_lbl, dir_pth = dataIO(exp_name=exp_name)

    #Architecture parameters ------------------------------
    input_dim = train_dat.shape[1]
    n_arms = 2
    fc_dim = 49

    #Model definition -------------------------------------
    M = {}
    M['in_ae'] = Input(shape=(28, 28, 1), name='in_ae')
    for i in range(n_arms):
        M['co1_ae_' + str(i)] = Conv2D(10, (3, 3),
                                       activation='relu',
                                       padding='same',
                                       name='co1_ae_' + str(i))(M['in_ae'])
        M['mp1_ae_' + str(i)] = MaxPooling2D(
            (2, 2), padding='same',
            name='mp1_ae_' + str(i))(M['co1_ae_' + str(i)])
        M['dr1_ae_' + str(i)] = Dropout(rate=p_drop, name='dr1_ae_' + str(i))(
            M['mp1_ae_' + str(i)])
        M['fl1_ae_' + str(i)] = Flatten(name='fl1_ae_' + str(i))(M['dr1_ae_' +
                                                                   str(i)])
        M['fc01_ae_' + str(i)] = Dense(fc_dim,
                                       activation='relu',
                                       name='fc01_ae_' + str(i))(M['fl1_ae_' +
                                                                   str(i)])
        M['fc02_ae_' + str(i)] = Dense(fc_dim,
                                       activation='relu',
                                       name='fc02_ae_' + str(i))(M['fc01_ae_' +
                                                                   str(i)])
        M['fc03_ae_' + str(i)] = Dense(fc_dim,
                                       activation='relu',
                                       name='fc03_ae_' + str(i))(M['fc02_ae_' +
                                                                   str(i)])

        if cpl_fn in ['mse']:
            M['ld_ae_' + str(i)] = Dense(latent_dim,
                                         activation='linear',
                                         name='ld_ae_' + str(i))(M['fc03_ae_' +
                                                                   str(i)])
        elif cpl_fn in ['mseBN', 'fullcov', 'minvar']:
            M['fc04_ae_' + str(i)] = Dense(latent_dim,
                                           activation='linear',
                                           name='fc04_ae_' + str(i))(
                                               M['fc03_ae_' + str(i)])
            M['ld_ae_' + str(i)] = BatchNormalization(
                scale=False,
                center=False,
                epsilon=1e-10,
                momentum=0.99,
                name='ld_ae_' + str(i))(M['fc04_ae_' + str(i)])

        M['fc05_ae_' + str(i)] = Dense(fc_dim,
                                       activation='relu',
                                       name='fc05_ae_' + str(i))(M['ld_ae_' +
                                                                   str(i)])
        M['fc06_ae_' + str(i)] = Dense(fc_dim,
                                       activation='relu',
                                       name='fc06_ae_' + str(i))(M['fc05_ae_' +
                                                                   str(i)])
        M['fc07_ae_' + str(i)] = Dense(fc_dim * 4,
                                       activation='relu',
                                       name='fc07_ae_' + str(i))(M['fc06_ae_' +
                                                                   str(i)])
        M['re1_ae_' + str(i)] = Reshape(
            (14, 14, 1), name='re1_ae_' + str(i))(M['fc07_ae_' + str(i)])
        M['us1_ae_' + str(i)] = UpSampling2D(
            (2, 2), name='us1_ae_' + str(i))(M['re1_ae_' + str(i)])
        M['co2_ae_' + str(i)] = Conv2D(10, (3, 3),
                                       activation='relu',
                                       padding='same',
                                       name='co2_ae_' + str(i))(M['us1_ae_' +
                                                                  str(i)])
        M['ou_ae_' + str(i)] = Conv2D(1, (3, 3),
                                      activation='sigmoid',
                                      padding='same',
                                      name='ou_ae_' + str(i))(M['co2_ae_' +
                                                                str(i)])

    cplAE = Model(inputs=M['in_ae'],
                  outputs=[M['ou_ae_' + str(i)] for i in range(n_arms)] +
                  [M['ld_ae_' + str(i)] for i in range(n_arms)])

    if cpl_fn in ['mse', 'mseBN']:
        cpl_fn_loss = mse
    elif cpl_fn == 'fullcov':
        cpl_fn_loss = fullcov
    elif cpl_fn == 'minvar':
        cpl_fn_loss = minvar

    assert type(cpl_fn)
    #Create loss dictionary
    loss_dict = {
        'ou_ae_0': mse(M['in_ae'], M['ou_ae_0']),
        'ou_ae_1': mse(M['in_ae'], M['ou_ae_1']),
        'ld_ae_0': cpl_fn_loss(M['ld_ae_0'], M['ld_ae_1']),
        'ld_ae_1': cpl_fn_loss(M['ld_ae_1'], M['ld_ae_0'])
    }

    #Loss weights dictionary
    loss_wt_dict = {
        'ou_ae_0': 1.0,
        'ou_ae_1': 1.0,
        'ld_ae_0': cpl_str,
        'ld_ae_1': cpl_str
    }

    #Add loss definitions to the model
    cplAE.compile(optimizer='adam', loss=loss_dict, loss_weights=loss_wt_dict)

    #Data feed
    train_input_dict = {'in_ae': train_dat}
    val_input_dict = {'in_ae': val_dat}
    train_output_dict = {
        'ou_ae_0': train_dat,
        'ou_ae_1': train_dat,
        'ld_ae_0': np.empty((train_dat.shape[0], latent_dim)),
        'ld_ae_1': np.empty((train_dat.shape[0], latent_dim))
    }
    val_output_dict = {
        'ou_ae_0': val_dat,
        'ou_ae_1': val_dat,
        'ld_ae_0': np.empty((val_dat.shape[0], latent_dim)),
        'ld_ae_1': np.empty((val_dat.shape[0], latent_dim))
    }

    log_cb = CSVLogger(filename=dir_pth['logs'] + fileid + '.csv')

    #Train model
    cplAE.fit(train_input_dict,
              train_output_dict,
              validation_data=(val_input_dict, val_output_dict),
              batch_size=batch_size,
              initial_epoch=0,
              epochs=n_epoch,
              verbose=2,
              shuffle=True,
              callbacks=[log_cb])

    #Saving weights
    cplAE.save_weights(dir_pth['result'] + fileid + '-modelweights' + '.h5')

    matsummary = {}
    #Trained model prediction
    for i in range(n_arms):
        encoder = Model(inputs=M['in_ae'], outputs=M['ld_ae_' + str(i)])
        matsummary['z_val_' + str(i)] = encoder.predict({'in_ae': val_dat})
        matsummary['z_train_' + str(i)] = encoder.predict({'in_ae': train_dat})
    matsummary['train_lbl'] = train_lbl
    matsummary['val_lbl'] = val_lbl
    sio.savemat(dir_pth['result'] + fileid + '-summary.mat', matsummary)
    return

def calc_steps(data_len, batchsize):
    return (data_len + batchsize - 1) // batchsize


# Calculate the steps per epoch
train_steps = calc_steps(len(train_path), 8)
val_steps = calc_steps(len(val_path), 8)

checkpointer = ModelCheckpoint('cp-{epoch:02d}-{val_loss:.4f}-od-resnet50.h5',
                               verbose=1)
# Train the model
history = model.fit_generator(
    traingen,
    steps_per_epoch=train_steps,
    epochs=20,  # Change this to a larger number to train for longer
    validation_data=valgen,
    validation_steps=val_steps,
    verbose=1,
    max_queue_size=5  # Change this number based on memory restrictions
)

model.save('outlier_detector_resnet50.h5')

model.save_weights('model_weights.h5')

# Save the model architecture
with open('model_architecture.json', 'w') as f:
    f.write(model.to_json())
class CartoonGAN():
    def __init__(self, args):
        self.model_name = 'CartoonGAN'
        self.batch_size = args.batch_size
        self.epochs = args.epochs
        self.gpu = args.gpu_num
        self.image_channels = args.image_channels
        self.image_size = args.image_size
        self.init_epoch = args.init_epoch
        self.log_dir = args.log_dir
        self.lr = args.lr
        self.model_dir = args.model_dir
        self.weight = args.weight

    # method for generator
    def generator(self):
        input_shape = [self.image_size, self.image_size, self.image_channels]
        input_img = Input(shape=input_shape, name="input")

        # first block
        x = ReflectionPadding2D(3)(input_img)
        x = Conv2D(64, (7, 7),
                   strides=1,
                   use_bias=True,
                   padding='valid',
                   name="conv1")(x)
        x = InstanceNormalization(name="norm1")(x)
        x = Activation("relu")(x)

        # down-convolution
        channel = 128
        for i in range(2):
            x = Conv2D(channel, (3, 3),
                       strides=2,
                       use_bias=True,
                       padding='same',
                       name="conv{}_1".format(i + 2))(x)
            x = Conv2D(channel, (3, 3),
                       strides=1,
                       use_bias=True,
                       padding='same',
                       name="conv{}_2".format(i + 2))(x)
            x = InstanceNormalization(name="norm{}".format(i + 2))(x)
            x = Activation("relu")(x)
            channel = channel * 2

        # residual blocks
        x_res = x
        for i in range(8):
            x = ReflectionPadding2D(1)(x)
            x = Conv2D(256, (3, 3),
                       strides=1,
                       use_bias=True,
                       padding='valid',
                       name="conv{}_1".format(i + 4))(x)
            x = InstanceNormalization(name="norm{}_1".format(i + 4))(x)
            x = Activation("relu")(x)
            x = ReflectionPadding2D(1)(x)
            x = Conv2D(256, (3, 3),
                       strides=1,
                       use_bias=True,
                       padding='valid',
                       name="conv{}_2".format(i + 4))(x)
            x = InstanceNormalization(name="norm{}_2".format(i + 4))(x)
            x = Add()([x, x_res])
            x_res = x

        # up-convolution
        for i in range(2):
            x = Conv2DTranspose(channel // 2,
                                3,
                                2,
                                padding="same",
                                output_padding=1,
                                name="deconv{}_1".format(i + 1))(x)
            x = Conv2D(channel // 2, (3, 3),
                       strides=1,
                       use_bias=True,
                       padding="same",
                       name="deconv{}_2".format(i + 1))(x)
            x = InstanceNormalization(name="norm_deconv" + str(i + 1))(x)
            x = Activation("relu")(x)
            channel = channel // 2

        # last block
        x = ReflectionPadding2D(3)(x)
        x = Conv2D(3, (7, 7),
                   strides=1,
                   use_bias=True,
                   padding="valid",
                   name="deconv3")(x)
        x = Activation("tanh")(x)

        model = Model(input_img, x, name='Cartoon_Generator')

        return model

    # method for discriminator
    def discriminator(self):
        input_shape = [self.image_size, self.image_size, self.image_channels]
        input_img = Input(shape=input_shape, name="input")

        # first block
        x = Conv2D(32, (3, 3),
                   strides=1,
                   use_bias=True,
                   padding='same',
                   name="conv1")(input_img)
        x = LeakyReLU(alpha=0.2)(x)

        # block loop
        channel = 64
        for i in range(2):
            x = Conv2D(channel, (3, 3),
                       strides=2,
                       use_bias=True,
                       padding='same',
                       name="conv{}_1".format(i + 2))(x)
            x = LeakyReLU(alpha=0.2)(x)
            x = Conv2D(channel * 2, (3, 3),
                       strides=1,
                       use_bias=True,
                       padding='same',
                       name="conv{}_2".format(i + 2))(x)
            x = InstanceNormalization()(x)
            x = LeakyReLU(alpha=0.2)(x)
            channel = channel * 2

        # last block
        x = Conv2D(256, (3, 3),
                   strides=1,
                   use_bias=True,
                   padding='same',
                   name="conv4")(x)
        x = InstanceNormalization()(x)
        x = LeakyReLU(alpha=0.2)(x)

        x = Conv2D(1, (3, 3),
                   strides=1,
                   use_bias=True,
                   padding='same',
                   activation='sigmoid',
                   name="conv5")(x)

        model = Model(input_img, x, name='Cartoon_Discriminator')

        return model

    # vgg loss function
    def vgg_loss(self, y_true, y_pred):
        # get vgg model
        input_shape = [self.image_size, self.image_size, self.image_channels]
        img_input = Input(shape=input_shape, name="vgg_input")
        vgg19 = tf.keras.applications.vgg19.VGG19(weights='imagenet')
        vggmodel = Model(inputs=vgg19.input,
                         outputs=vgg19.get_layer('block4_conv4').output)
        x = vggmodel(img_input)
        vgg = Model(img_input, x, name='VGG_for_Feature_Extraction')

        # get l1 loss for the content loss
        y_true = vgg(y_true)
        y_pred = vgg(y_pred)
        content_loss = tf.losses.absolute_difference(y_true, y_pred)

        return content_loss

    # compile each model
    def compile_model(self):
        # init summary writer for tensorboard
        self.callback1 = TensorBoard(self.log_dir + '/discriminator')
        self.callback2 = TensorBoard(self.log_dir + '/generator')
        self.callback3 = TensorBoard(self.log_dir + '/generated_images')

        # model stuff
        input_shape = [self.image_size, self.image_size, self.image_channels]
        adam1 = Adam(lr=self.lr)
        adam2 = Adam(lr=self.lr * 2)

        # init and add multi-gpu support
        try:
            self.discriminator = multi_gpu_model(self.discriminator(),
                                                 gpus=self.gpu)
        except:
            self.discriminator = self.discriminator()
        try:
            self.generator = multi_gpu_model(self.generator(), gpus=self.gpu)
        except:
            self.generator = self.generator()

        # compile discriminator
        self.discriminator.compile(loss='binary_crossentropy', optimizer=adam1)

        # compile generator
        input_tensor = Input(shape=input_shape)
        generated_catroon_tensor = self.generator(input_tensor)
        self.discriminator.trainable = False  # for here we only train the generator
        discriminator_output = self.discriminator(generated_catroon_tensor)
        self.train_generator = Model(
            input_tensor,
            outputs=[generated_catroon_tensor, discriminator_output])
        # add multi-gpu support
        try:
            self.train_generator = multi_gpu_model(self.train_generator,
                                                   gpus=self.gpu)
        except:
            pass
        self.train_generator.compile(
            loss=[self.vgg_loss, 'binary_crossentropy'],
            loss_weights=[float(self.weight), 1.0],
            optimizer=adam2)

        # set callback model
        self.callback1.set_model(self.discriminator)
        self.callback2.set_model(self.train_generator)
        self.callback3.set_model(self.train_generator)

    # method for training process
    def train(self):

        # start training
        flip = False
        variance = 1 / 127.5
        start_time = time.time()
        for epoch in range(1, self.epochs + 1):

            # create batch generator at each epoch
            batch_generator = DataGenerator(image_size=self.image_size,
                                            batch_size=self.batch_size)
            batch_end = len(batch_generator)
            print('Epoch {}'.format(epoch))

            # start training for each batch
            for idx, (photo, cartoon, smooth_cartoon,
                      index) in enumerate(batch_generator):

                # these two tensors measure the output of generator and discriminator
                real = np.ones((self.batch_size, ) + (64, 64, 1))
                fake = np.zeros((self.batch_size, ) + (64, 64, 1))

                # check if it is the end of an epoch
                if index + 1 == batch_end:
                    break

                # initial training or start training
                if epoch < self.init_epoch:
                    g_loss = self.train_generator.train_on_batch(
                        photo, [photo, real])
                    generated_img = self.generator.predict(photo)
                    print(
                        "Batch %d (initial training for generator), g_loss: %.5f, with time: %4.4f"
                        % (idx, g_loss[2], time.time() - start_time))
                    start_time = time.time()
                    write_log(self.callback2, 'g_loss', g_loss[2],
                              idx + (epoch + 1) * len(batch_generator))
                    if idx % 20 == 0:
                        write_images(self.callback3, generated_img,
                                     'generated_imgs',
                                     idx + (epoch + 1) * len(batch_generator))

                    if epoch % 20 == 0 and K.eval(
                            self.train_generator.optimizer.lr) > 0.0001:
                        K.set_value(
                            self.train_generator.optimizer.lr,
                            K.eval(self.train_generator.optimizer.lr) * 0.99)

                else:

                    # add noise to the input of discriminator
                    if variance > 0.00001:
                        variance = variance * 0.9999
                        gaussian = np.random.normal(
                            0, variance, (cartoon.shape[1], cartoon.shape[2]))
                        cartoon[:, :, :, 0] = cartoon[:, :, :, 0] + gaussian
                        cartoon[:, :, :, 1] = cartoon[:, :, :, 1] + gaussian
                        cartoon[:, :, :, 2] = cartoon[:, :, :, 2] + gaussian
                        gaussian = np.random.normal(
                            0, variance, (cartoon.shape[1], cartoon.shape[2]))
                        smooth_cartoon[:, :, :,
                                       0] = smooth_cartoon[:, :, :,
                                                           0] + gaussian
                        smooth_cartoon[:, :, :,
                                       1] = smooth_cartoon[:, :, :,
                                                           1] + gaussian
                        smooth_cartoon[:, :, :,
                                       2] = smooth_cartoon[:, :, :,
                                                           2] + gaussian

                    # generate cartoonized images
                    generated_img = self.generator.predict(photo)

                    # to certain probability: flip the label of discriminator
                    if idx % 9 == 0 or np.random.uniform(0, 1) < 0.05:
                        real = fake
                        fake = fake + 1
                        flip = True

                    # train discriminator and adversarial loss
                    real_loss = self.discriminator.train_on_batch(
                        cartoon, real)
                    smooth_loss = self.discriminator.train_on_batch(
                        smooth_cartoon, fake)
                    fake_loss = self.discriminator.train_on_batch(
                        generated_img, fake)
                    d_loss = (real_loss + smooth_loss + fake_loss) / 3

                    # train generator
                    if flip:
                        real = fake
                        fake = fake - 1
                        flip = False

                    g_loss = self.train_generator.train_on_batch(
                        photo, [photo, real])
                    print(
                        "Batch %d, d_loss: %.5f, g_loss: %.5f, with time: %4.4f"
                        % (idx, d_loss, g_loss[2], time.time() - start_time))
                    start_time = time.time()

                    # add losses to writer
                    write_log(self.callback1, 'd_loss', d_loss,
                              idx + (epoch + 1) * len(batch_generator))
                    write_log(self.callback2, 'g_loss', g_loss[2],
                              idx + (epoch + 1) * len(batch_generator))
                    if idx % 20 == 0:
                        write_images(self.callback3, generated_img,
                                     'generated_imgs',
                                     idx + (epoch + 1) * len(batch_generator))

                    # change learning rate
                    if epoch % 20 == 0 and K.eval(
                            self.discriminator.optimizer.lr) > 0.0001:
                        K.set_value(
                            self.discriminator.optimizer.lr,
                            K.eval(self.discriminator.optimizer.lr) * 0.95)
                    if epoch % 20 == 0 and K.eval(
                            self.train_generator.optimizer.lr) > 0.0001:
                        K.set_value(
                            self.train_generator.optimizer.lr,
                            K.eval(self.train_generator.optimizer.lr) * 0.95)

                # save model
                if epoch % 50 == 0:
                    self.generator.save_weights(
                        self.model_dir + '/' +
                        'CartoonGan_generator_epoch_{}.h5'.format(epoch))
                    self.discriminator.save_weights(
                        self.model_dir + '/' +
                        'CartoonGan_discriminator_epoch_{}.h5'.format(epoch))
                    self.train_generator.save_weights(
                        self.model_dir + '/' +
                        'CartoonGan_train_generator_epoch_{}.h5'.format(epoch))

        print('Done!')
        self.generator.save('CartoonGan_generator.h5')
Exemple #26
0
class PerceptualModel(NNInterface):
    def __init__(self):
        super().__init__()
        self.__model = vgg16.VGG16(weights='imagenet')
        self.ref_model = self.get_dropout_model(0)
        self.tar_model = self.get_dropout_model(0)
        print(self.tar_model.summary())

    def get_features_model(self, layer_name):
        layer = self.__model.get_layer(layer_name).output
        model = Model(self.__model.input, outputs=layer)
        return model

    def call(self, x, training=True, ref=True):
        x = vgg16.preprocess_input(x)
        if ref:
            return self.ref_model(x, training=training)
        else:
            return self.tar_model(x, training=training)

    def compute_output_shape(self, input_shape):
        return self.__model.compute_output_shape(input_shape)

    def freeze_layers(self, freeze_idx):

        for i, layer in enumerate(self.__model.layers):
            if freeze_idx > i:
                layer.trainable = False

        for i, layer in enumerate(self.__model.layers):
            print("layer {} is trainable {}".format(layer.name,
                                                    layer.trainable))

    def add_dropout(self):
        # Store the fully connected layers
        fc1 = self.__model.layers[-3]
        fc2 = self.__model.layers[-2]
        predictions = self.__model.layers[-1]

        # Create the dropout layers
        dropout1 = Dropout(0.5)
        dropout2 = Dropout(0.5)

        # Reconnect the layers
        x = dropout1(fc1.output)
        x = fc2(x)
        # x = dropout2(x)
        predictors = predictions(x)
        input = self.__model.input

        # Create a new model
        self.__model = Model(input, predictors)
        # self.__model.summary()

    def get_dropout_model(self, dropout_num):
        model = tf.keras.Sequential()

        dropout1 = Dropout(0.5)
        dropout2 = Dropout(0.5)

        for layer in self.__model.layers:
            model.add(layer)
            if layer.name == "fc1" and dropout_num > 0:
                model.add(dropout1)
            if layer.name == "fc2" and dropout_num > 1:
                model.add(dropout2)
        return model

    def save_model(self, iter_num, output_path):
        output_path = os.path.join(output_path, "ckpts")
        checkpoint_path = "weights_after_{}_iterations".format(iter_num)
        self.__model.save_weights(os.path.join(output_path, checkpoint_path))

    def load_model(self, ckpt_path):
        self.__model.load_weights(ckpt_path)
Exemple #27
0
                                 callbacks=[],
                                 verbose=1)
# ---------------------------------------------------------------------------------------------------------------------

# --------------------------------------
# EXPORT MODEL ARCHITECTURE AND WEIGHTS |
# --------------------------------------
# export model structure to json file:
model_struct_json = model.to_json()
filename = filepattern('model_allfreeze_', '.json')
with open(filename, 'w') as f:
    f.write(model_struct_json)

# export weights to an hdf5 file:
w_filename = filepattern('weights_allfreeze_', '.h5')
model.save_weights(w_filename)
# ---------------------------------------------------------------------------------------------------------------------

# -------------------------------------------------------------
# VISUALIZE BASE ARCHITECTURE TO DECIDE WHICH LAYERS TO FREEZE |
# -------------------------------------------------------------
# PUT BREAKPOINT HERE!!!!!!!!!!!!!!!
print(list(show_architecture(base)))
# INSERT DEBUGGER BREAKPOINT DIRECTLY ON THE NEXT COMMAND TO VIEW THE ARCHITECTURE AT RUNTIME
# ---------------------------------------------------------------------------------------------------------------------

# ------------------------
# STOP NEPTUNE EXPERIMENT |
# ------------------------
npt.stop()
Exemple #28
0
def main():
    # Counting Dataset
    counting_dataset_path = 'counting_data_UCF'
    counting_dataset = list()
    train_labels = {}
    val_labels = {}
    for im_path in glob.glob(os.path.join(counting_dataset_path, '*.jpg')):
        counting_dataset.append(im_path)
        img = image.load_img(im_path)
        gt_file = im_path.replace('.jpg', '_ann.mat')
        h, w = img.size
        dmap, crowd_number = load_gt_from_mat(gt_file, (w, h))
        train_labels[im_path] = dmap
        val_labels[im_path] = crowd_number
    counting_dataset_pyramid, train_labels_pyramid = multiscale_pyramid(
        counting_dataset, train_labels)

    # Ranking Dataset
    ranking_dataset_path = 'ranking_data'
    ranking_dataset = list()
    for im_path in glob.glob(os.path.join(ranking_dataset_path, '*.jpg')):
        ranking_dataset.append(im_path)

    # randomize the order of images before splitting
    np.random.shuffle(counting_dataset)

    split_size = int(round(len(counting_dataset) / 5))
    splits_list = list()
    for t in range(5):
        splits_list.append(counting_dataset[t * split_size:t * split_size +
                                            split_size])

    split_val_labels = {}

    mae_sum = 0.0
    mse_sum = 0.0

    # create folder to save results
    date = str(datetime.datetime.now())
    d = date.split()
    d1 = d[0]
    d2 = d[1].split(':')
    results_folder = 'Results-' + d1 + '-' + d2[0] + '.' + d2[1]
    if not os.path.exists(results_folder):
        os.makedirs(results_folder)

    # 5-fold cross validation
    epochs = int(round(iterations / iterations_per_epoch))
    n_fold = 5

    for f in range(0, n_fold):
        print('\nFold ' + str(f))

        # Model
        model = VGG16(include_top=False, weights='imagenet')
        transfer_layer = model.get_layer('block5_conv3')
        conv_model = Model(inputs=[model.input],
                           outputs=[transfer_layer.output],
                           name='vgg_partial')

        counting_input = Input(shape=(224, 224, 3),
                               dtype='float32',
                               name='counting_input')
        ranking_input = Input(shape=(224, 224, 3),
                              dtype='float32',
                              name='ranking_input')
        x = conv_model([counting_input, ranking_input])
        counting_output = Conv2D(1, (3, 3),
                                 strides=(1, 1),
                                 padding='same',
                                 data_format=None,
                                 dilation_rate=(1, 1),
                                 activation='relu',
                                 use_bias=True,
                                 kernel_initializer='glorot_uniform',
                                 bias_initializer='zeros',
                                 kernel_regularizer=None,
                                 bias_regularizer=None,
                                 activity_regularizer=None,
                                 kernel_constraint=None,
                                 bias_constraint=None,
                                 name='counting_output')(x)

        # The ranking output is computed using SUM pool. Here I use
        # GlobalAveragePooling2D followed by a multiplication by 14^2 to do
        # this.
        ranking_output = Lambda(
            lambda i: 14.0 * 14.0 * i,
            name='ranking_output')(GlobalAveragePooling2D(
                name='global_average_pooling2d')(counting_output))
        train_model = Model(inputs=[counting_input, ranking_input],
                            outputs=[counting_output, ranking_output])
        train_model.summary()

        # l2 weight decay
        for layer in train_model.layers:
            if hasattr(layer, 'kernel_regularizer'):
                layer.kernel_regularizer = regularizers.l2(5e-4)
            elif layer.name == 'vgg_partial':
                for l in layer.layers:
                    if hasattr(l, 'kernel_regularizer'):
                        l.kernel_regularizer = regularizers.l2(5e-4)

        optimizer = SGD(lr=0.0, decay=0.0, momentum=0.9, nesterov=False)
        loss = {
            'counting_output': euclideanDistanceCountingLoss,
            'ranking_output': pairwiseRankingHingeLoss
        }
        loss_weights = [1.0, 0.0]
        train_model.compile(optimizer=optimizer,
                            loss=loss,
                            loss_weights=loss_weights)

        splits_list_tmp = splits_list.copy()

        # counting validation split
        split_val = splits_list_tmp[f]

        del splits_list_tmp[f]
        flat = itertools.chain.from_iterable(splits_list_tmp)

        # counting train split
        split_train = list(flat)

        # counting validation split labels
        split_val_labels = {k: val_labels[k] for k in split_val}

        counting_dataset_pyramid_split = []
        train_labels_pyramid_split = []
        for key in split_train:
            counting_dataset_pyramid_split.append(
                counting_dataset_pyramid[key][0])
            counting_dataset_pyramid_split.append(
                counting_dataset_pyramid[key][1])
            counting_dataset_pyramid_split.append(
                counting_dataset_pyramid[key][2])
            counting_dataset_pyramid_split.append(
                counting_dataset_pyramid[key][3])
            counting_dataset_pyramid_split.append(
                counting_dataset_pyramid[key][4])

            train_labels_pyramid_split.append(train_labels_pyramid[key][0])
            train_labels_pyramid_split.append(train_labels_pyramid[key][1])
            train_labels_pyramid_split.append(train_labels_pyramid[key][2])
            train_labels_pyramid_split.append(train_labels_pyramid[key][3])
            train_labels_pyramid_split.append(train_labels_pyramid[key][4])

        index_shuf = np.arange(len(counting_dataset_pyramid_split))
        np.random.shuffle(index_shuf)
        counting_dataset_pyramid_split_shuf = []
        train_labels_pyramid_split_shuf = []
        for i in index_shuf:
            counting_dataset_pyramid_split_shuf.append(
                counting_dataset_pyramid_split[i])
            train_labels_pyramid_split_shuf.append(
                train_labels_pyramid_split[i])

        train_generator = DataGenerator(counting_dataset_pyramid_split_shuf,
                                        train_labels_pyramid_split_shuf,
                                        ranking_dataset, **params)
        lrate = LearningRateScheduler(step_decay)
        callbacks_list = [lrate]
        train_model.fit_generator(generator=train_generator,
                                  epochs=epochs,
                                  callbacks=callbacks_list)

        #test images
        tmp_model = train_model.get_layer('vgg_partial')
        test_input = Input(shape=(None, None, 3),
                           dtype='float32',
                           name='test_input')
        new_input = tmp_model(test_input)
        co = train_model.get_layer('counting_output')(new_input)
        test_output = Lambda(lambda i: K.sum(i, axis=(1, 2)),
                             name='test_output')(co)
        test_model = Model(inputs=[test_input], outputs=[test_output])

        predictions = np.empty((len(split_val), 1))
        y_validation = np.empty((len(split_val), 1))
        for i in range(len(split_val)):
            img = image.load_img(split_val[i], target_size=(224, 224))
            img_to_array = image.img_to_array(img)
            img_to_array = preprocess_input(img_to_array)
            img_to_array = np.expand_dims(img_to_array, axis=0)

            pred_test = test_model.predict(img_to_array)
            predictions[i] = pred_test
            y_validation[i] = split_val_labels[split_val[i]]

        mean_abs_err = mae(predictions, y_validation)
        mean_sqr_err = mse(predictions, y_validation)

        # serialize model to JSON
        model_json = test_model.to_json()
        model_json_name = "test_model_" + str(f) + ".json"
        with open(model_json_name, "w") as json_file:
            json_file.write(model_json)
        # serialize weights to HDF5
        model_h5_name = "test_model_" + str(f) + ".h5"
        test_model.save_weights(model_h5_name)
        print("Saved model to disk")

        print('\n######################')
        print('Results on TEST SPLIT:')
        print(' MAE: {}'.format(mean_abs_err))
        print(' MSE: {}'.format(mean_sqr_err))
        print("Took %f seconds" % (time.time() - s))
        path1 = results_folder + '/test_split_results_fold-' + str(f) + '.txt'
        with open(path1, 'w') as f:
            f.write('mae: %f,\nmse: %f, \nTook %f seconds' %
                    (mean_abs_err, mean_sqr_err, time.time() - s))

        mae_sum = mae_sum + mean_abs_err
        mse_sum = mse_sum + mean_sqr_err

    print('\n################################')
    print('Average Results on TEST SPLIT:')
    print(' AVE MAE: {}'.format(mae_sum / n_fold))
    print(' AVE MSE: {}'.format(mse_sum / n_fold))
    print("Took %f seconds" % (time.time() - s))
    path2 = results_folder + '/test_split_results_avg.txt'
    with open(path2, 'w') as f:
        f.write('avg_mae: %f, \navg_mse: %f, \nTook %f seconds' %
                (mae_sum / n_fold, mse_sum / n_fold, time.time() - s))
Exemple #29
0
class DEC(object):
    def __init__(self, dims, n_clusters=10, alpha=1.0, init='glorot_uniform'):

        super(DEC, self).__init__()

        self.dims = dims
        self.input_dim = dims[0]
        self.n_stacks = len(self.dims) - 1

        self.n_clusters = n_clusters
        self.alpha = alpha
        self.encoder = autoencoder(self.dims, init=init)

        # prepare DEC model
        clustering_layer = ClusteringLayer(self.n_clusters, name='clustering')(
            self.encoder.output)
        self.model = Model(inputs=self.encoder.input, outputs=clustering_layer)

    def load_weights(self, weights):  # load weights of DEC model
        self.model.load_weights(weights)

    def extract_features(self, x):
        return self.encoder.predict(x)

    def predict(
            self,
            x):  # predict cluster labels using the output of clustering layer
        q = self.model.predict(x, verbose=0)
        return q.argmax(1)

    @staticmethod
    def target_distribution(q):
        weight = q**2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    def compile(self, optimizer='sgd', loss='kld'):
        self.model.compile(optimizer=optimizer, loss=loss)

    def fit(self,
            x,
            y=None,
            maxiter=2e4,
            batch_size=256,
            tol=1e-3,
            update_interval=140,
            save_dir='./results/temp'):

        print('Update interval', update_interval)
        save_interval = int(x.shape[0] / batch_size) * 5  # 5 epochs
        print('Save interval', save_interval)

        # Step 1: initialize cluster centers using k-means
        t1 = time()
        print('Initializing cluster centers with k-means.')
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
        y_pred = kmeans.fit_predict(self.encoder.predict(x))
        y_pred_last = np.copy(y_pred)
        self.model.get_layer(name='clustering').set_weights(
            [kmeans.cluster_centers_])

        # Step 2: deep clustering
        # logging file
        import csv
        logfile = open(save_dir + '/dec_log.csv', 'w')
        logwriter = csv.DictWriter(
            logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'loss'])
        logwriter.writeheader()

        loss = 0
        index = 0
        index_array = np.arange(x.shape[0])
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q = self.model.predict(x, verbose=0)
                p = self.target_distribution(
                    q)  # update the auxiliary target distribution p

                # evaluate the clustering performance
                y_pred = q.argmax(1)
                if y is not None:
                    acc = np.round(metrics.acc(y, y_pred), 5)
                    nmi = np.round(metrics.nmi(y, y_pred), 5)
                    ari = np.round(metrics.ari(y, y_pred), 5)
                    loss = np.round(loss, 5)
                    logdict = dict(iter=ite,
                                   acc=acc,
                                   nmi=nmi,
                                   ari=ari,
                                   loss=loss)
                    logwriter.writerow(logdict)
                    print(
                        'Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' %
                        (ite, acc, nmi, ari), ' ; loss=', loss)

                # check stop criterion
                delta_label = np.sum(y_pred != y_pred_last).astype(
                    np.float32) / y_pred.shape[0]
                y_pred_last = np.copy(y_pred)
                if ite > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print('Reached tolerance threshold. Stopping training.')
                    logfile.close()
                    break

            # train on batch
            # if index == 0:
            #     np.random.shuffle(index_array)
            idx = index_array[index * batch_size:min((index + 1) *
                                                     batch_size, x.shape[0])]
            loss = self.model.train_on_batch(x=x[idx], y=p[idx])
            index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0

            # save intermediate model
            if ite % save_interval == 0:
                print('saving model to:',
                      save_dir + '/DEC_model_' + str(ite) + '.h5')
                self.model.save_weights(save_dir + '/DEC_model_' + str(ite) +
                                        '.h5')

            ite += 1

        # save the trained model
        logfile.close()
        print('saving model to:', save_dir + '/DEC_model_final.h5')
        self.model.save_weights(save_dir + '/DEC_model_final.h5')

        return y_pred
def _main(args):
    config_path = os.path.expanduser(args.config_path)
    weights_path = os.path.expanduser(args.weights_path)
    assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
        config_path)
    assert weights_path.endswith(
        '.weights'), '{} is not a .weights file'.format(weights_path)

    output_path = os.path.expanduser(args.output_path)
    assert output_path.endswith(
        '.h5'), 'output path {} is not a .h5 file'.format(output_path)
    output_root = os.path.splitext(output_path)[0]

    # Load weights and config.
    print('Loading weights.')
    weights_file = open(weights_path, 'rb')
    major, minor, revision = np.ndarray(shape=(3, ),
                                        dtype='int32',
                                        buffer=weights_file.read(12))
    if (major * 10 + minor) >= 2 and major < 1000 and minor < 1000:
        seen = np.ndarray(shape=(1, ),
                          dtype='int64',
                          buffer=weights_file.read(8))
    else:
        seen = np.ndarray(shape=(1, ),
                          dtype='int32',
                          buffer=weights_file.read(4))
    print('Weights Header: ', major, minor, revision, seen)

    print('Parsing Darknet config.')
    unique_config_file = unique_config_sections(config_path)
    cfg_parser = configparser.ConfigParser()
    cfg_parser.read_file(unique_config_file)

    print('Creating Keras model.')
    input_layer = Input(shape=(None, None, 3))
    prev_layer = input_layer
    all_layers = []

    weight_decay = float(cfg_parser['net_0']['decay']
                         ) if 'net_0' in cfg_parser.sections() else 5e-4
    count = 0
    out_index = []
    for section in cfg_parser.sections():
        print('Parsing section {}'.format(section))
        if section.startswith('convolutional'):
            filters = int(cfg_parser[section]['filters'])
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            pad = int(cfg_parser[section]['pad'])
            activation = cfg_parser[section]['activation']
            batch_normalize = 'batch_normalize' in cfg_parser[section]

            padding = 'same' if pad == 1 and stride == 1 else 'valid'

            # Setting weights.
            # Darknet serializes convolutional weights as:
            # [bias/beta, [gamma, mean, variance], conv_weights]
            prev_layer_shape = K.int_shape(prev_layer)

            weights_shape = (size, size, prev_layer_shape[-1], filters)
            darknet_w_shape = (filters, weights_shape[2], size, size)
            weights_size = np.product(weights_shape)

            print('conv2d', 'bn' if batch_normalize else '  ', activation,
                  weights_shape)

            conv_bias = np.ndarray(shape=(filters, ),
                                   dtype='float32',
                                   buffer=weights_file.read(filters * 4))
            count += filters

            if batch_normalize:
                bn_weights = np.ndarray(shape=(3, filters),
                                        dtype='float32',
                                        buffer=weights_file.read(filters * 12))
                count += 3 * filters

                bn_weight_list = [
                    bn_weights[0],  # scale gamma
                    conv_bias,  # shift beta
                    bn_weights[1],  # running mean
                    bn_weights[2]  # running var
                ]

            conv_weights = np.ndarray(shape=darknet_w_shape,
                                      dtype='float32',
                                      buffer=weights_file.read(weights_size *
                                                               4))
            count += weights_size

            # DarkNet conv_weights are serialized Caffe-style:
            # (out_dim, in_dim, height, width)
            # We would like to set these to Tensorflow order:
            # (height, width, in_dim, out_dim)
            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
            conv_weights = [conv_weights] if batch_normalize else [
                conv_weights, conv_bias
            ]

            # Handle activation.
            act_fn = None
            if activation == 'leaky':
                pass  # Add advanced activation later.
            elif activation != 'linear':
                raise ValueError(
                    'Unknown activation function `{}` in section {}'.format(
                        activation, section))

            # Create Conv2D layer
            if stride > 1:
                # Darknet uses left and top padding instead of 'same' mode
                prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer)
            conv_layer = (Conv2D(filters, (size, size),
                                 strides=(stride, stride),
                                 kernel_regularizer=l2(weight_decay),
                                 use_bias=not batch_normalize,
                                 weights=conv_weights,
                                 activation=act_fn,
                                 padding=padding))(prev_layer)

            if batch_normalize:
                conv_layer = (BatchNormalization(
                    weights=bn_weight_list))(conv_layer)
            prev_layer = conv_layer

            if activation == 'linear':
                all_layers.append(prev_layer)
            elif activation == 'leaky':
                act_layer = LeakyReLU(alpha=0.1)(prev_layer)
                prev_layer = act_layer
                all_layers.append(act_layer)

        elif section.startswith('route'):
            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
            layers = [all_layers[i] for i in ids]
            if len(layers) > 1:
                print('Concatenating route layers:', layers)
                concatenate_layer = Concatenate()(layers)
                all_layers.append(concatenate_layer)
                prev_layer = concatenate_layer
            else:
                skip_layer = layers[0]  # only one layer to route
                all_layers.append(skip_layer)
                prev_layer = skip_layer

        elif section.startswith('maxpool'):
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                MaxPooling2D(pool_size=(size, size),
                             strides=(stride, stride),
                             padding='same')(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('shortcut'):
            index = int(cfg_parser[section]['from'])
            activation = cfg_parser[section]['activation']
            assert activation == 'linear', 'Only linear activation supported.'
            all_layers.append(Add()([all_layers[index], prev_layer]))
            prev_layer = all_layers[-1]

        elif section.startswith('upsample'):
            stride = int(cfg_parser[section]['stride'])
            assert stride == 2, 'Only stride=2 supported.'
            all_layers.append(UpSampling2D(stride)(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('yolo'):
            out_index.append(len(all_layers) - 1)
            all_layers.append(None)
            prev_layer = all_layers[-1]

        elif section.startswith('net'):
            pass

        else:
            raise ValueError(
                'Unsupported section header type: {}'.format(section))

    # Create and save model.
    if len(out_index) == 0: out_index.append(len(all_layers) - 1)
    model = Model(inputs=input_layer,
                  outputs=[all_layers[i] for i in out_index])
    print(model.summary())
    if args.weights_only:
        model.save_weights('{}'.format(output_path))
        print('Saved Keras weights to {}'.format(output_path))
    else:
        model.save('{}'.format(output_path))
        print('Saved Keras model to {}'.format(output_path))

    # Check to see if all weights have been read.
    remaining_weights = len(weights_file.read()) / 4
    weights_file.close()
    print('Read {} of {} from Darknet weights.'.format(
        count, count + remaining_weights))
    if remaining_weights > 0:
        print('Warning: {} unused weights'.format(remaining_weights))

    if args.plot_model:
        plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
        print('Saved model plot to {}.png'.format(output_root))