def get_model(feature_extractor, rpn_model, anchors, hyper_params, mode="training"): """Generating rpn model for given backbone base model and hyper params. inputs: feature_extractor = feature extractor layer from the base model rpn_model = tf.keras.model generated rpn model anchors = (total_anchors, [y1, x1, y2, x2]) these values in normalized format between [0, 1] hyper_params = dictionary mode = "training" or "inference" outputs: frcnn_model = tf.keras.model """ input_img = rpn_model.input rpn_reg_predictions, rpn_cls_predictions = rpn_model.output # roi_bboxes = RoIBBox(anchors, mode, hyper_params, name="roi_bboxes")([rpn_reg_predictions, rpn_cls_predictions]) # roi_pooled = RoIPooling(hyper_params, name="roi_pooling")([feature_extractor.output, roi_bboxes]) # output = TimeDistributed(Flatten(), name="frcnn_flatten")(roi_pooled) output = TimeDistributed(Dense(4096, activation="relu"), name="frcnn_fc1")(output) output = TimeDistributed(Dropout(0.5), name="frcnn_dropout1")(output) output = TimeDistributed(Dense(4096, activation="relu"), name="frcnn_fc2")(output) output = TimeDistributed(Dropout(0.5), name="frcnn_dropout2")(output) frcnn_cls_predictions = TimeDistributed(Dense(hyper_params["total_labels"], activation="softmax"), name="frcnn_cls")(output) frcnn_reg_predictions = TimeDistributed(Dense(hyper_params["total_labels"] * 4, activation="linear"), name="frcnn_reg")(output) # if mode == "training": input_gt_boxes = Input(shape=(None, 4), name="input_gt_boxes", dtype=tf.float32) input_gt_labels = Input(shape=(None, ), name="input_gt_labels", dtype=tf.int32) rpn_cls_actuals = Input(shape=(None, None, hyper_params["anchor_count"]), name="input_rpn_cls_actuals", dtype=tf.float32) rpn_reg_actuals = Input(shape=(None, 4), name="input_rpn_reg_actuals", dtype=tf.float32) frcnn_reg_actuals, frcnn_cls_actuals = RoIDelta(hyper_params, name="roi_deltas")( [roi_bboxes, input_gt_boxes, input_gt_labels]) # loss_names = ["rpn_reg_loss", "rpn_cls_loss", "frcnn_reg_loss", "frcnn_cls_loss"] rpn_reg_loss_layer = Lambda(train_utils.reg_loss, name=loss_names[0])([rpn_reg_actuals, rpn_reg_predictions]) rpn_cls_loss_layer = Lambda(train_utils.rpn_cls_loss, name=loss_names[1])([rpn_cls_actuals, rpn_cls_predictions]) frcnn_reg_loss_layer = Lambda(train_utils.reg_loss, name=loss_names[2])([frcnn_reg_actuals, frcnn_reg_predictions]) frcnn_cls_loss_layer = Lambda(train_utils.frcnn_cls_loss, name=loss_names[3])([frcnn_cls_actuals, frcnn_cls_predictions]) # frcnn_model = Model(inputs=[input_img, input_gt_boxes, input_gt_labels, rpn_reg_actuals, rpn_cls_actuals], outputs=[roi_bboxes, rpn_reg_predictions, rpn_cls_predictions, frcnn_reg_predictions, frcnn_cls_predictions, rpn_reg_loss_layer, rpn_cls_loss_layer, frcnn_reg_loss_layer, frcnn_cls_loss_layer]) # for layer_name in loss_names: layer = frcnn_model.get_layer(layer_name) frcnn_model.add_loss(layer.output) frcnn_model.add_metric(layer.output, name=layer_name, aggregation="mean") # else: bboxes, labels, scores = Decoder(hyper_params["variances"], hyper_params["total_labels"], name="faster_rcnn_decoder")( [roi_bboxes, frcnn_reg_predictions, frcnn_cls_predictions]) frcnn_model = Model(inputs=input_img, outputs=[bboxes, labels, scores]) # return frcnn_model
def create_implExModel(num_nodes, num_edges, embed_size=50, n3_reg=1e-3, learning_rate=5e-1, num_negs=50, alpha=1., beta=1.): # Build complEx Model sub_inputs = Input(shape=(), name='subject') obj_inputs = Input(shape=(), name='object') rel_inputs = Input(shape=(), name='relation') cnt_inputs = Input(shape=(), name='count') y_true_inputs = Input(shape=(), name='label') inputs = { "subject": sub_inputs, "object": obj_inputs, "relation": rel_inputs, "count": cnt_inputs, "label": y_true_inputs } node_layer = Embedding(input_dim=num_nodes, output_dim=embed_size, embeddings_initializer=GlorotUniform(), name='node_embedding') edge_layer = Embedding(input_dim=num_edges, output_dim=embed_size, embeddings_initializer=GlorotUniform(), name='edge_embedding') sub_embed = node_layer(sub_inputs) rel_embed = edge_layer(rel_inputs) obj_embed = node_layer(obj_inputs) outputs = ComplExDotScore(n3_reg)([sub_embed, rel_embed, obj_embed]) model = Model(inputs, outputs, name='implEx') # Compile implEx Model wbce_loss = tf.nn.weighted_cross_entropy_with_logits( y_true_inputs, outputs, num_negs) / num_negs confidence = 1 + alpha * tf.math.log(1 + cnt_inputs / beta) loss = K.sum(confidence * wbce_loss) model.add_loss(loss) model.add_metric(K.mean(wbce_loss), 'weighted_binarycrossentropy') model.compile(optimizer=Adagrad(learning_rate)) return model
def model(train_ds, val_ds, bdir, movie_num, epochs, batch_size, learning_rate, patience): os.system("rm " + bdir + "log.csv") filelendf = pd.read_csv(bdir + 'Dataset/file_length.dat', engine = 'python', sep = ':', index_col = 0) print ("--> Starting Training with learning rate = {learning_rate} for epochs = {epochs} with patience = {patience}".format(learning_rate = learning_rate, epochs = epochs, patience = patience)) adam = Adam(learning_rate = learning_rate) Ip = Input(shape = (movie_num, ), name = "Input") Op = Input(shape = (movie_num, ), name = "Target") Weight = Input(shape = (movie_num, ), name = "Weight") Count = Input(shape = (1, ), name = "Count") n = pow(2, floor(log(movie_num)/log(2))) if n < 512: print ("Insufficient number of movies for a good model") exit() else: x = layer_creator(n, Ip) Output = Dense(movie_num, activation = "relu", name = "Output")(x) model = Model(inputs = [Ip, Op, Weight, Count], outputs = Output) model.add_loss(rmse(Op, Output, Weight, Count)) model.add_metric(mae(Op, Output, Weight, Count), aggregation = 'mean', name = 'mae') model.compile(optimizer = adam, loss = None, metrics = None) #print (model.summary()) es = EarlyStopping(monitor = 'val_loss', mode = 'min', verbose = 1, patience = patience) cl = CSVLogger(bdir + 'log.csv', append = True, separator = ',') mc = ModelCheckpoint(bdir + 'model.h5', monitor = 'val_loss', verbose = 1, save_best_only = True) history = model.fit(train_ds, epochs = epochs, steps_per_epoch = (filelendf.loc['Train']['Length'] // batch_size), validation_data = val_ds, callbacks = [es, cl, mc]) print ("--> Plotting Loss") #print(history.history.keys()) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'val'], loc = 'upper left') plt.savefig(bdir + 'loss.png', bbox_inches = 'tight')
def build_vae(input_shape): ''' Arguments: input_shape(tuple): the shape of input images (H, W, C) Returns: encoder, decoder, autoencoder models ''' def build_encoder(input_shape, latent_dim): inputs = Input(input_shape) x = Conv2D(256, 4, strides=2, padding='same', activation='relu')(inputs) x = Conv2D(128, 4, strides=2, padding='same', activation='relu')(x) x = Conv2D(64, 4, strides=2, padding='same', activation='relu')(x) # x = Conv2D(64, 4, strides=2, padding='same', activation='relu')(x) x = Flatten()(x) mean = Dense(latent_dim)(x) logvar = Dense(latent_dim)(x) epsilon = K.random_normal(K.shape(mean)) z = mean + K.exp(0.5 * logvar) * epsilon encoder = Model(inputs, [z, mean, logvar], name='encoder') return encoder def build_decoder(latent_dim): decoder = Sequential([ Dense(4* 4* 64, activation='relu', input_shape=(latent_dim,)), Reshape((4, 4, 64)), # Conv2DTranspose(128, 4, strides=2, padding='same', activation='relu'), Conv2DTranspose(64, 4, strides=2, padding='same', activation='relu'), Conv2DTranspose(32, 4, strides=2, padding='same', activation='relu'), Conv2DTranspose(3, 4, strides=2, padding='same', activation='sigmoid') ], name='decoder') return decoder latent_dim = 512 encoder = build_encoder(input_shape, latent_dim) # encoder.summary() decoder = build_decoder(latent_dim) # decoder.summary() inputs = Input(input_shape) z, mean, logvar = encoder(inputs) decoder_out = decoder(z) autoencoder = Model(inputs, decoder_out) bce_loss = K.sum(binary_crossentropy(inputs, decoder_out), axis=[1, 2]) kl_loss = -0.5 * K.sum(1 + logvar - K.square(mean) - K.exp(logvar), axis=-1) vae_loss = K.mean(bce_loss + kl_loss) autoencoder.add_loss(vae_loss) autoencoder.add_metric(tf.reduce_mean(bce_loss), name='bce_sum', aggregation='mean') autoencoder.add_metric(tf.reduce_mean(bce_loss) / input_shape[0] / input_shape[1], name='bce', aggregation='mean') autoencoder.add_metric(tf.reduce_mean(kl_loss), name='KL', aggregation='mean') autoencoder.compile(Adam(1e-3, decay=5e-4)) return encoder, decoder, autoencoder
def create_model_and_compile(self): input_shape = self.in_shape self.base_model = self.base_network() self.cs_layer = Dense(self.num_classes, use_bias=False, name='CS_layer') qpn_in = Input(shape=input_shape, name='in_qpn') qpny = Input(shape=(self.num_classes, ), name='in_qpny') qpny_label = Input(shape=(1, ), name='in_qpny_label') qpn_out = self.base_model(qpn_in) qpn_cls_sig = self.cs_layer(qpn_out) fe_loss = Lambda(self.triplet_loss_all_combinations, name='triplet_loss_FE')(qpn_out) * self.fe_weight fe_accuracy = Lambda(self.FE_accuracy, name='FE_accuracy_metric')(qpn_out) cs_loss = Lambda(self.manual_CS_loss, name='CS_loss_calc')( [qpn_cls_sig, qpny]) * self.cs_weight cs_accuracy = Lambda(self.CS_accuracy, name='CS_Acc')([qpn_cls_sig, qpny]) total_loss = fe_loss + cs_loss model = Model(inputs=[qpn_in, qpny, qpny_label], outputs=[qpn_cls_sig], name='FEModel') if 'adm' in self.optim: optm = Adam(lr=self.LR) elif 'ranger' in self.optim: # option to use a newer optimizer radam = tfa.optimizers.RectifiedAdam(lr=self.LR, min_lr=1e-7) optm = tfa.optimizers.Lookahead(radam, sync_period=6, slow_step_size=0.5) model.add_loss(total_loss) model.compile(optimizer=optm) # Metrics to track the accuracy and loss progression model.add_metric(fe_accuracy, name='fe_a', aggregation='mean') model.add_metric(cs_accuracy, name='cs_a', aggregation='mean') model.add_metric(fe_loss, name='fe_loss', aggregation='mean') model.add_metric(cs_loss, name='cs_loss_out', aggregation='mean') return model, optm
def build_triplet_distances_model(extractor_model, dist_type='eucl', alpha=1.0, add_loss=False): anchor_in = Input(shape=(224, 224, 3), name="anchor_in") anchor_out = extractor_model(anchor_in) pos_in = Input(shape=(224, 224, 3), name="pos_in") pos_out = extractor_model(pos_in) neg_in = Input(shape=(224, 224, 3), name="neg_in") neg_out = extractor_model(neg_in) if dist_type == 'cos': pos_dist = CosineDistance(name="pos_dist")([anchor_out, pos_out]) neg_dist = CosineDistance(name="neg_dist")([anchor_out, neg_out]) else: pos_dist = EuclidianDistanceSquared(name="pos_dist")( [anchor_out, pos_out]) neg_dist = EuclidianDistanceSquared(name="neg_dist")( [anchor_out, neg_out]) triplet = TripletLoss(alpha=alpha)([pos_dist, neg_dist]) triplet_model = Model([anchor_in, pos_in, neg_in], [triplet, pos_dist, neg_dist]) triplet_model.add_metric(pos_dist, aggregation='mean', name="pos_dist_mean") triplet_model.add_metric(neg_dist, aggregation='mean', name="neg_dist_mean") if add_loss: triplet_model.add_loss(triplet) else: triplet_model.add_metric(triplet, aggregation='mean', name="triplet_loss_mean") triplet_model.compile(optimizer=Adamax(), loss=None) return triplet_model
class MLMTextClassifyModel(AbstractTextClassifyModel, TFBasedModel): def __init__(self, config): super().__init__(config=config) self.tgt_token_ids = [ self.tokenizer.token2id(t) for t in self.tgt_tokens ] self.pred_mask = [ 1 if idx in self.tgt_token_ids else 0 for idx in range(self.vocab_size) ] def _load_config(self, config): super()._load_config(config) self.max_len = self.task_config["max_len"] self.word2label = jload(self.task_config['token2label_path']) self.label2word = inverse_dict(self.word2label, overwrite=False) self.pattern = self.task_config["pattern"] # self.keep_tokens = load_lines(self.task_config["keep_token_path"]) self.tgt_tokens = flat([list(w) for w in self.word2label]) # self.keep_tokens += self.tgt_tokens self.label_num = len(set(self.word2label.values())) def build_model(self, pretrained_model_path=None, pretrained_model_tag="bert", pos_weight=1., bilstm_dim_list=[], transformer_kwargs={}, h5_file=None): with self.get_scope(): # transformer_kwargs = { # "keep_tokens": self.keep_token_ids # } self.nn_model = get_mlm_model( pretrained_model_path, pretrained_model_tag="bert", transformer_kwargs=transformer_kwargs, h5_file=h5_file) logger.info("nn model's summary:") self.nn_model.summary(print_fn=logger.info) self._update_model_dict("test", self.nn_model) return self.nn_model @discard_kwarg def compile_model(self, optimizer_name, optimizer_args, rdrop_alpha=None): logger.info("compiling model...") with self.get_scope(): token_output = Input(shape=(None, ), name='token_output', dtype=tf.int32) self.train_model = Model(self.nn_model.inputs + [token_output], self.nn_model.output, name="train_model") output = self.train_model.output loss_mask = Lambda( function=lambda x: tf.cast(tf.not_equal(x, 0), tf.float32), name="pred_mask")(token_output) loss_layer = build_classify_loss_layer(multi_label=False, with_mask=True) loss = loss_layer([token_output, output, loss_mask]) self.train_model.add_loss(loss) accuracy_func = masked_sparse_categorical_accuracy metric_layer = MetricLayer(accuracy_func, name="metric_layer") accuracy = metric_layer([token_output, output, loss_mask]) self.train_model.add_metric(accuracy, aggregation="mean", name="accuracy") optimizer = OptimizerFactory.create(optimizer_name, optimizer_args) self.train_model.compile(optimizer=optimizer) logger.info("training model's summary:") self.train_model.summary(print_fn=logger.info) self._update_model_dict("train", self.train_model) def example2feature(self, example: UnionTextClassifyExample) -> Dict: # if example.extra_text: # text = self.pattern # extra_text = self.tokenizer.end_token.join(example.text, example.extra_text) # else: # text = self.pattern # extra_text = example.text text = self.pattern + example.text feature = self.tokenizer.do_tokenize(text=text) mask_spans = find_span(feature["tokens"], "[MASK]") assert len(mask_spans) == 1 feature["mask_span"] = mask_spans[0] if isinstance(example, LabeledTextClassifyExample): if isinstance(example.label, list): labels = [e.name for e in example.label] else: labels = [example.label.name] feature.update(labels=labels) return feature def _feature2records(self, idx, feature: Dict, mode: str) -> List[Dict]: record = dict(idx=idx, **feature) truncate_record(record=record, max_len=self.max_len, keys=["token_ids", "segment_ids", "tokens"]) if mode == "train": labels = feature.get("labels") if labels is None: raise ValueError("no labels given in train mode!") label = random.choice(labels) tgt_word = random.choice(self.label2word[label]) tokened = self.tokenizer.do_tokenize(tgt_word) tgt_token_span = tokened["tokens"][1:-1] tgt_token_span_id = tokened["token_ids"][1:-1] mask_start, mask_end = record["mask_span"] assert len(tgt_token_span) == mask_end - mask_start tgt_tokens = copy.copy(record["tokens"]) tgt_token_ids = copy.copy(record["token_ids"]) token_output = [0] * len(tgt_token_ids) tgt_tokens[mask_start:mask_end] = tgt_token_span tgt_token_ids[mask_start:mask_end] = tgt_token_span_id token_output[mask_start:mask_end] = tgt_token_span_id record.update(target_tokens=tgt_tokens, tgt_token_ids=tgt_token_ids, token_output=token_output) return [record] @discard_kwarg @log_cost_time def _post_predict(self, features, pred_tensors, show_detail=False, threshold=.5) -> List[LabelOrLabels]: def _tensor2output(feature, pred_tensor) -> LabelOrLabels: mask_idx_start, mask_idx_end = feature["mask_span"] # logger.info(pred_tensor.shape) pred_tensor = pred_tensor[mask_idx_start:mask_idx_end] pred_tensor = pred_tensor * self.pred_mask # logger.info(pred_tensor) # logger.info(pred_tensor.shape) probs = np.max(pred_tensor, axis=-1) # logger.info(probs) prob = np.prod(probs) # logger.info(prob) pred_tensor = np.argmax(pred_tensor, axis=-1) word = "".join(self.tokenizer.id2token(e) for e in pred_tensor) # logger.info(word) label = self.word2label[word] return Label(name=label, prob=prob) preds = [_tensor2output(f, t) for f, t in zip(features, pred_tensors)] return preds
def createModel(max_len, features, dimsEmbed, lr, two_layer=False, bidir=False, cells=32, regularization_base=2e-6, locality_term=False, batch_size=None, locality_power=1, **kwargs): print(tf.executing_eagerly()) inp = Input(shape=(max_len, ), name="inputs1") inp2 = Input(shape=(max_len, ), name="inputs2") inp3 = Input(shape=(batch_size, batch_size), name="inputs3") if 'regularization_base_latent' in kwargs: regularization_base_latent = kwargs['regularization_base_latent'] else: regularization_base_latent = regularization_base embedLayer = Embedding(features + 1, dimsEmbed, input_length=max_len, embeddings_initializer=he_uniform(2), mask_zero=True, name='inpEmbed1') prev_layer = embedLayer(inp) embed2Layer = Embedding(2, cells, input_length=max_len, embeddings_initializer=he_uniform(3), mask_zero=True, trainable=False, name='inpEmbed2') embed2 = embed2Layer(inp2) embed2Layer.set_weights( np.expand_dims(np.stack([ np.zeros_like(embed2Layer.get_weights()[0][0]), np.ones_like(embed2Layer.get_weights()[0][0]) ], axis=0), axis=0)) if two_layer: lstmEncSecLayer = LSTM(cells, activation='relu', name='encoder2Layer', return_sequences=True, kernel_initializer=he_normal(1), kernel_regularizer=l1_l2( regularization_base, regularization_base), bias_regularizer=l1_l2(2 * regularization_base, 2 * regularization_base)) if bidir: lstmEncSecLayer = Bidirectional(lstmEncSecLayer) prev_layer = lstmEncSecLayer(prev_layer) lstmEncLayer = LSTM(cells, activation='relu', return_sequences=False, kernel_initializer=he_normal(5), name='encoderLayer', return_state=True, recurrent_regularizer=l1_l2(regularization_base / 20, regularization_base / 20), kernel_regularizer=l1_l2(regularization_base, regularization_base), bias_regularizer=l1_l2(regularization_base * 2, regularization_base * 2)) if bidir: lstmEncLayer = Bidirectional(lstmEncLayer) enc, h1, h2, h3, h4 = lstmEncLayer(prev_layer) concat = Concatenate()([h1, h2, h3, h4]) else: enc, h1, c1 = lstmEncLayer(prev_layer) concat = Concatenate()([h1, c1]) bn1 = BatchNormalization(name='bn1')(concat) hLayer = Dense(cells, activation='tanh', use_bias=True, kernel_initializer=he_normal(10), name='hDense', activity_regularizer=l1_l2(regularization_base_latent / 2, regularization_base_latent / 2), bias_regularizer=l1_l2(regularization_base_latent * 2.5, regularization_base_latent * 2.5)) h = hLayer(bn1) cLayer = Dense(cells, activation='linear', use_bias=True, kernel_initializer=he_normal(11), name='cDense', activity_regularizer=l1_l2(regularization_base_latent / 2, regularization_base_latent / 2), bias_regularizer=l1_l2(regularization_base_latent * 2.5, regularization_base_latent * 2.5)) c = cLayer(bn1) if locality_term: locality1 = Lambda(locality1_op)([h, c]) locality2 = Lambda(locality2_op)(inp3) locality_layer = Lambda(locality_term_op)([locality1, locality2]) decoderInpH = Input((cells, )) decoderInpC = Input((cells, )) decoderPrevInput = Input(((max_len, cells))) print("model kwargs: ", kwargs) timeWindowsConstant = False if 'timeWindowsConstant' not in kwargs else kwargs[ 'timeWindowsConstant'] decoderInpDenses = False if 'decoderInpDenses' not in kwargs else kwargs[ 'decoderInpDenses'] inpHZeros = False if 'inpHZeros' not in kwargs else kwargs['inpHZeros'] inpCZeros = False if 'inpCZeros' not in kwargs else kwargs['inpCZeros'] outAdditionalDense = False if 'outAdditionalDense' not in kwargs else kwargs[ 'outAdditionalDense'] decoderInpH_topass = decoderInpH decoderInpC_topass = decoderInpC if timeWindowsConstant: rep = decoderPrevInput else: rep = RepeatVector(max_len)(decoderInpH) # mult = Multiply()([decoderPrevInput, decoderPrevInput]) if decoderInpDenses: decoderInpHDense1 = Dense( cells, activation='relu', kernel_initializer=he_normal(32678), name='decInpHDense1', kernel_regularizer=l1_l2(regularization_base * 1.0, regularization_base * 1.0), bias_regularizer=l1_l2(regularization_base * 1.0, regularization_base * 1.0))(decoderInpH) decoderInpHDense2 = Dense( cells, activation='relu', kernel_initializer=he_normal(32679), name='decInpHDense2', kernel_regularizer=l1_l2(regularization_base * 1.0, regularization_base * 1.0), bias_regularizer=l1_l2(regularization_base * 1.0, regularization_base * 1.0))(decoderInpHDense1) decoderInpH_topass = Dense( cells, activation='tanh', kernel_initializer=he_normal(72679), name='decInpHDense3', kernel_regularizer=l1_l2(regularization_base * 1.0, regularization_base * 1.0), bias_regularizer=l1_l2(regularization_base * 1.0, regularization_base * 1.0))(decoderInpHDense2) decoderInpCDense1 = Dense( cells, activation='relu', kernel_initializer=he_normal(32618), name='decInpCDense1', kernel_regularizer=l1_l2(regularization_base * 1.0, regularization_base * 1.0), bias_regularizer=l1_l2(regularization_base * 1.0, regularization_base * 1.0))(decoderInpC) decoderInpCDense2 = Dense( cells, activation='relu', kernel_initializer=he_normal(32628), name='decInpCDense2', kernel_regularizer=l1_l2(regularization_base * 1.0, regularization_base * 1.0), bias_regularizer=l1_l2(regularization_base * 1.0, regularization_base * 1.0))(decoderInpCDense1) decoderInpC_topass = Dense( cells, activation='linear', kernel_initializer=he_normal(32619), name='decInpCDense3', kernel_regularizer=l1_l2(regularization_base * 1.0, regularization_base * 1.0), bias_regularizer=l1_l2(regularization_base * 1.0, regularization_base * 1.0))(decoderInpCDense2) if inpHZeros: decoderInpH_topass = Lambda(lambda x: x * 0.0)(decoderInpH_topass) if inpCZeros: decoderInpC_topass = Lambda(lambda x: x * 0.0)(decoderInpC_topass) # if not timeWindowsConstant: mult = Multiply()([rep, decoderPrevInput]) mask = Masking(0.0)(mult) prev_layer = LSTM(cells, activation='relu', return_sequences=True, kernel_initializer=he_normal(21), name='decoder1', kernel_regularizer=l1_l2(regularization_base * 1.5, regularization_base * 1.5), bias_regularizer=l1_l2(regularization_base * 2.5, regularization_base * 2.5))( mask, initial_state=[ decoderInpH_topass, decoderInpC_topass ]) if two_layer: prev_layer = LSTM( cells, activation='relu', return_sequences=True, kernel_initializer=he_normal(35), name='decoder2', kernel_regularizer=l1_l2(regularization_base * 1.5, regularization_base * 1.5), bias_regularizer=l1_l2(regularization_base * 2.5, regularization_base * 2.5))(prev_layer) # to comment if outAdditionalDense: outPrev = TimeDistributed(Dense( 32, activation='relu', kernel_initializer=he_normal(836), name='densePrevOut', kernel_regularizer=l1_l2(regularization_base * 2.0, regularization_base * 2.0), bias_regularizer=l1_l2(regularization_base * 2.5, regularization_base * 2.5)), name='densePrevOut')(prev_layer) out = TimeDistributed(Dense(features, activation='softmax', kernel_initializer=he_normal(100), name='denseOut'), name='denseOut')(outPrev) else: out = TimeDistributed(Dense(features, activation='softmax', kernel_initializer=he_normal(100), name='denseOut'), name='denseOut')(prev_layer) decoder = Model(inputs=[decoderInpH, decoderInpC, decoderPrevInput], outputs=out) if locality_term: model = Model(inputs=[inp, inp2, inp3], outputs=decoder([h, c, embed2])) else: model = Model(inputs=[inp, inp2], outputs=decoder([h, c, embed2])) model.summary() decoder.summary() if locality_term: print("Using locality term! Locality power: ", locality_power) locality_loss = (1 - locality_layer) * tf.constant(locality_power) model.add_loss(locality_loss) model.add_metric(locality_loss, name='locality', aggregation='mean') # model.add_metric(get_gradient_norm(model), name='locality', aggregation='mean') # model.add_metric(locality_loss, name='localityS', aggregation='sum') model.compile(optimizer=Adam(lr, clipnorm=1.0, clipvalue=0.5), loss='categorical_crossentropy') decoder.compile(optimizer=Adam(lr, clipnorm=1.0, clipvalue=0.5), loss='categorical_crossentropy') # model.metrics_tensors = [] if locality_term: encoder = Model(inputs=[inp, inp2, inp3], outputs=[h, c, embed2]) else: encoder = Model(inputs=[inp, inp2], outputs=[h, c, embed2]) return model, encoder, decoder
def init_model(backbone_model_name, freeze_backbone_for_N_epochs, input_shape, region_num, attribute_name_to_label_encoder_dict, kernel_regularization_factor, bias_regularization_factor, gamma_regularization_factor, beta_regularization_factor, pooling_mode, min_value, max_value, use_horizontal_flipping): def _add_pooling_module(input_tensor): # Add a global pooling layer output_tensor = input_tensor if len(K.int_shape(output_tensor)) == 4: if pooling_mode == "Average": output_tensor = GlobalAveragePooling2D()(output_tensor) elif pooling_mode == "Max": output_tensor = GlobalMaxPooling2D()(output_tensor) elif pooling_mode == "GeM": output_tensor = GlobalGeMPooling2D()(output_tensor) else: assert False, "{} is an invalid argument!".format(pooling_mode) # Add the clipping operation if min_value is not None and max_value is not None: output_tensor = Lambda(lambda x: K.clip( x, min_value=min_value, max_value=max_value))(output_tensor) return output_tensor def _add_classification_module(input_tensor): # Add a batch normalization layer output_tensor = input_tensor output_tensor = BatchNormalization(epsilon=2e-5)(output_tensor) # Add a dense layer with softmax activation label_encoder = attribute_name_to_label_encoder_dict["identity_ID"] class_num = len(label_encoder.classes_) output_tensor = Dense(units=class_num, use_bias=False, kernel_initializer=RandomNormal( mean=0.0, stddev=0.001))(output_tensor) output_tensor = Activation("softmax")(output_tensor) return output_tensor def _triplet_hermans_loss(y_true, y_pred, metric="euclidean", margin="soft"): # Create the loss in two steps: # 1. Compute all pairwise distances according to the specified metric. # 2. For each anchor along the first dimension, compute its loss. dists = cdist(y_pred, y_pred, metric=metric) loss = batch_hard(dists=dists, pids=tf.argmax(y_true, axis=-1), margin=margin) return loss # Initiation miscellaneous_output_tensor_list = [] # Initiate the early blocks applications_instance = Applications() model_name_to_model_function = applications_instance.get_model_name_to_model_function( ) assert backbone_model_name in model_name_to_model_function.keys( ), "Backbone {} is not supported.".format(backbone_model_name) model_function = model_name_to_model_function[backbone_model_name] blocks = applications_instance.get_model_in_blocks( model_function=model_function, include_top=False) vanilla_input_tensor = Input(shape=input_shape) intermediate_output_tensor = vanilla_input_tensor for block in blocks[:-1]: block = Applications.wrap_block(block, intermediate_output_tensor) intermediate_output_tensor = block(intermediate_output_tensor) # Initiate the last blocks last_block = Applications.wrap_block(blocks[-1], intermediate_output_tensor) last_block_for_global_branch_model = replicate_model( model=last_block, suffix="global_branch") last_block_for_regional_branch_model = replicate_model( model=last_block, suffix="regional_branch") # Add the global branch miscellaneous_output_tensor = _add_pooling_module( input_tensor=last_block_for_global_branch_model( intermediate_output_tensor)) miscellaneous_output_tensor_list.append(miscellaneous_output_tensor) # Add the regional branch if region_num > 0: # Process each region regional_branch_output_tensor = last_block_for_regional_branch_model( intermediate_output_tensor) total_height = K.int_shape(regional_branch_output_tensor)[1] region_size = total_height // region_num for region_index in np.arange(region_num): # Get a slice of feature maps start_index = region_index * region_size end_index = (region_index + 1) * region_size if region_index == region_num - 1: end_index = total_height sliced_regional_branch_output_tensor = Lambda( lambda x, start_index=start_index, end_index=end_index: x[:, start_index:end_index])(regional_branch_output_tensor) # Downsampling sliced_regional_branch_output_tensor = Conv2D( filters=K.int_shape(sliced_regional_branch_output_tensor)[-1] // region_num, kernel_size=3, padding="same")(sliced_regional_branch_output_tensor) sliced_regional_branch_output_tensor = Activation("relu")( sliced_regional_branch_output_tensor) # Add the regional branch miscellaneous_output_tensor = _add_pooling_module( input_tensor=sliced_regional_branch_output_tensor) miscellaneous_output_tensor_list.append( miscellaneous_output_tensor) # Define the model used in inference inference_model = Model(inputs=[vanilla_input_tensor], outputs=miscellaneous_output_tensor_list, name="inference_model") specify_regularizers(inference_model, kernel_regularization_factor, bias_regularization_factor, gamma_regularization_factor, beta_regularization_factor) # Define the model used in classification classification_input_tensor_list = [ Input(shape=K.int_shape(item)[1:]) for item in miscellaneous_output_tensor_list ] classification_output_tensor_list = [] for classification_input_tensor in classification_input_tensor_list: classification_output_tensor = _add_classification_module( input_tensor=classification_input_tensor) classification_output_tensor_list.append(classification_output_tensor) classification_model = Model(inputs=classification_input_tensor_list, outputs=classification_output_tensor_list, name="classification_model") specify_regularizers(classification_model, kernel_regularization_factor, bias_regularization_factor, gamma_regularization_factor, beta_regularization_factor) # Define the model used in training expand = lambda x: x if isinstance(x, list) else [x] vanilla_input_tensor = Input(shape=K.int_shape(inference_model.input)[1:]) vanilla_feature_tensor_list = expand(inference_model(vanilla_input_tensor)) if use_horizontal_flipping: flipped_input_tensor = tf.image.flip_left_right(vanilla_input_tensor) flipped_feature_tensor_list = expand( inference_model(flipped_input_tensor)) merged_feature_tensor_list = [ sum(item_tuple) / 2 for item_tuple in zip( vanilla_feature_tensor_list, flipped_feature_tensor_list) ] else: merged_feature_tensor_list = vanilla_feature_tensor_list miscellaneous_output_tensor_list = merged_feature_tensor_list classification_output_tensor_list = expand( classification_model(merged_feature_tensor_list)) training_model = Model(inputs=[vanilla_input_tensor], outputs=miscellaneous_output_tensor_list + classification_output_tensor_list, name="training_model") # Add the flipping loss if use_horizontal_flipping: flipping_loss_list = [ K.mean(mean_squared_error(*item_tuple)) for item_tuple in zip( vanilla_feature_tensor_list, flipped_feature_tensor_list) ] flipping_loss = sum(flipping_loss_list) training_model.add_metric(flipping_loss, name="flipping", aggregation="mean") training_model.add_loss(1.0 * flipping_loss) # Compile the model triplet_hermans_loss_function = lambda y_true, y_pred: 1.0 * _triplet_hermans_loss( y_true, y_pred) miscellaneous_loss_function_list = [ triplet_hermans_loss_function ] * len(miscellaneous_output_tensor_list) categorical_crossentropy_loss_function = lambda y_true, y_pred: 1.0 * categorical_crossentropy( y_true, y_pred, from_logits=False, label_smoothing=0.0) classification_loss_function_list = [ categorical_crossentropy_loss_function ] * len(classification_output_tensor_list) training_model.compile_kwargs = { "optimizer": Adam(), "loss": miscellaneous_loss_function_list + classification_loss_function_list } if freeze_backbone_for_N_epochs > 0: specify_trainable(model=training_model, trainable=False, keywords=[block.name for block in blocks]) training_model.compile(**training_model.compile_kwargs) # Print the summary of the training model summarize_model(training_model) return training_model, inference_model
class GlobalPointerModel(AbstractTextSpanClassifyModelAIConfig, TFBasedModel): def _load_config(self, config): super()._load_config(config) self.max_len = self.task_config['max_len'] self.labels = load_lines(self.task_config['label_file_path']) self.label2id, self.id2label = seq2dict(self.labels) self.label_num = len(self.label2id) def build_model(self, pretrained_model_path=None, pretrained_model_tag="bert", bilstm_dim_list=[], head_size=64, pos_weight=1, **kwargs): """ 构建模型 Args: head_size: GlobalPointer层的embedding size pretrained_model_path: 预训练模型地址 pretrained_model_tag: 预训练模型类型bert/... bilstm_dim_list: 序列encode过程中如果要接bilstm。输入每个bilstm层的dimension pos_weight: 正例的权重 **kwargs: Returns: nn模型 """ with self.get_scope(): encoder_model = get_sequence_encoder_model( vocab_size=self.vocab_size, pretrained_model_path=pretrained_model_path, pretrained_model_tag=pretrained_model_tag, bilstm_dim_list=bilstm_dim_list, **kwargs) sequence_embedding = encoder_model.output output = GlobalPointer(self.label_num, head_size)(sequence_embedding) output = Lambda(lambda x: x**pos_weight, name="pos_weight_layer")(output) self.nn_model = Model(inputs=encoder_model.inputs, outputs=[output], name="token_classify_model") logger.info("nn model's summary:") self.nn_model.summary(print_fn=logger.info) self._update_model_dict("test", self.nn_model) return self.nn_model @discard_kwarg def compile_model(self, optimizer_name: str, optimizer_args: dict): logger.info( f"compile model with optimizer_name:{optimizer_name}, optimizer_args:{optimizer_args}" ) with self.get_scope(): classify_output = Input(shape=(self.label_num, None, None), dtype=tf.float32, name='classify_output') token_ids, segment_ids = self.nn_model.inputs output = self.nn_model([token_ids, segment_ids]) self.train_model = Model( inputs=[token_ids, segment_ids, classify_output], outputs=[output]) loss_layer = LossLayer(loss_func=global_pointer_crossentropy, name="loss_layer") loss = loss_layer([classify_output, output]) self.train_model.add_loss(loss) accuracy_func = global_pointer_f1_score metric_layer = MetricLayer(accuracy_func, name="metric_layer") metric = metric_layer([classify_output, output]) self.train_model.add_metric(metric, aggregation="mean", name="global_pointer_f1_score") optimizer = OptimizerFactory.create(optimizer_name, optimizer_args) self.train_model.compile(optimizer=optimizer) logger.info("training model's summary:") self.train_model.summary(print_fn=logger.info) self._update_model_dict("train", self.train_model) def example2feature(self, example: UnionTextSpanClassifyExample) -> Dict: feature = self.tokenizer.do_tokenize(text=example.text, store_map=True) if isinstance(example, LabeledTextSpanClassifyExample): feature.update(text_spans=[ e.dict(exclude_none=True) for e in example.text_spans ]) return feature def _feature2records(self, idx, feature: Dict, mode: str) -> List[dict]: record = dict(idx=idx, **feature) if mode == "train": text_spans = feature.get("text_spans") if text_spans is None: raise ValueError(f"not text_spans key found in train mode!") text_spans: TextSpans = [TextSpan(**e) for e in text_spans] char2token = record["char2token"] token_len = len(record["tokens"]) classify_output = np.zeros(shape=(self.label_num, token_len, token_len)) for text_span in text_spans: label_id = self.label2id[text_span.label] token_start = char2token[text_span.span[0]] token_end = char2token[text_span.span[1] - 1] classify_output[label_id][token_start][token_end] = 1 record.update(classify_output=classify_output) truncate_record(record=record, max_len=self.max_len, keys=["token_ids", "segment_ids", "tokens"]) return [record] @discard_kwarg @log_cost_time def _post_predict(self, features, pred_tensors, show_detail) -> List[TextSpans]: def _tensor2output(feature, pred_tensor) -> TextSpans: text_spans = [] prob_tensor = tf.math.sigmoid(pred_tensor) for l, s, e in zip(*np.where(pred_tensor > 0)): if e < s: break label = self.id2label[l] char_start = feature["token2char"][s][0] char_end = feature["token2char"][e][1] text = feature["text"][char_start:char_end] prob = prob_tensor[l][s][e] text_span = TextSpan(text=text, label=label, span=(char_start, char_end), prob=prob) text_spans.append(text_span) return text_spans preds = [_tensor2output(f, p) for f, p in zip(features, pred_tensors)] return preds
class SeqLabelingModel(AbstractTextSpanClassifyModelAIConfig, TFBasedModel): def _load_config(self, config): super()._load_config(config) self.seq_label_strategy: SeqLabelStrategy = SeqLabelStrategy[ self.task_config['seq_label_strategy']] self.max_len = self.task_config['max_len'] self.multi_label = self.task_config.get("multi_label", False) self.label_list = read_seq_label_file( self.task_config['label_file_path'], self.seq_label_strategy) self.label2id, self.id2label = seq2dict(self.label_list) self.label_num = len(self.label2id) def build_model(self, pretrained_model_path=None, pretrained_model_tag="bert", bilstm_dim_list=[], use_crf=False, crf_lr_multiplier=100, pos_weight=1, **kwargs): """ 构建模型 Args: pretrained_model_path: 预训练模型地址 pretrained_model_tag: 预训练模型类型bert/... dense_dim_list: 序列encode之后过的每个全连接层的维度(默认用relu做激活函数)。如果为空列表,表示不添加全连接层 hidden_dropout_prob: 序列encode之后过得dropout层drop概率。避免过拟合 bilstm_dim_list: 序列encode过程中如果要接bilstm。输入每个bilstm层的dimension use_crf: 是否使用crf层 crf_lr_multiplier: crf层的学习率倍数,参考https://kexue.fm/archives/7196 pos_weight: 正例的权重 **kwargs: Returns: nn模型 """ with self.get_scope(): encoder_model = get_sequence_encoder_model( vocab_size=self.vocab_size, pretrained_model_path=pretrained_model_path, pretrained_model_tag=pretrained_model_tag, bilstm_dim_list=bilstm_dim_list, **kwargs) sequence_embedding = encoder_model.output classify_activation = sigmoid if self.multi_label else softmax classifier_layer = Dense( self.label_num, name="token_classifier", activation=classify_activation, kernel_initializer=TruncatedNormal(stddev=0.02)) prob_vec_output = classifier_layer(sequence_embedding) if use_crf: classifier_layer = CRF(lr_multiplier=crf_lr_multiplier, name="crf_layer") prob_vec_output = classifier_layer(prob_vec_output) if self.multi_label: prob_vec_output = Lambda( lambda x: x**pos_weight, name="pos_weight_layer")(prob_vec_output) self.nn_model = Model(inputs=encoder_model.inputs, outputs=[prob_vec_output], name="token_classify_model") logger.info("nn model's summary:") self.nn_model.summary(print_fn=logger.info) self._update_model_dict("test", self.nn_model) return self.nn_model def compile_model(self, optimizer_name: str, optimizer_args: dict, **kwargs): logger.info( f"compile model with optimizer_name:{optimizer_name}, optimizer_args:{optimizer_args}" ) with self.get_scope(): classify_labels = Input( shape=(None, self.label_num) if self.multi_label else (None, ), name='classify_labels', dtype=tf.int32) token_ids, segment_ids = self.nn_model.inputs output = self.nn_model([token_ids, segment_ids]) self.train_model = Model( inputs=[token_ids, segment_ids, classify_labels], outputs=[output]) loss_mask = Lambda( function=lambda x: tf.cast(tf.not_equal(x, 0), tf.float32), name="pred_mask")(token_ids) # 计算loss的时候,过滤掉pad token的loss loss_layer = build_classify_loss_layer(multi_label=self.multi_label, with_mask=True) loss = loss_layer([classify_labels, output, loss_mask]) self.train_model.add_loss(loss) # 计算accuracy的时候,过滤掉pad token 的accuracy masked_accuracy_func = masked_binary_accuracy if self.multi_label else masked_sparse_categorical_accuracy metric_layer = MetricLayer(masked_accuracy_func) masked_accuracy = metric_layer([classify_labels, output, loss_mask]) self.train_model.add_metric(masked_accuracy, aggregation="mean", name="accuracy") optimizer = OptimizerFactory.create(optimizer_name, optimizer_args) self.train_model.compile(optimizer=optimizer) logger.info("training model's summary:") self.train_model.summary(print_fn=logger.info) self._update_model_dict("train", self.train_model) def example2feature(self, example: UnionTextSpanClassifyExample) -> Dict: feature = self.tokenizer.do_tokenize(text=example.text, store_map=True) if isinstance(example, LabeledTextSpanClassifyExample): feature.update(text_spans=[ e.dict(exclude_none=True) for e in example.text_spans ]) return feature def _feature2records(self, idx, feature: Dict, mode: str) -> List[Dict]: record = dict(idx=idx, **feature) if mode == "train": text_spans = feature.get("text_spans") if text_spans is None: raise ValueError(f"not text_spans key found in train mode!") text_spans = [TextSpan(**e) for e in text_spans] token_label_func = get_overlap_token_label_sequence if self.multi_label else get_token_label_sequence target_token_label_sequence = token_label_func( feature["tokens"], text_spans, feature["char2token"], self.seq_label_strategy) classify_labels = token_label2classify_label_input( target_token_label_sequence, self.multi_label, self.label2id) record.update( target_token_label_sequence=target_token_label_sequence, classify_labels=classify_labels) truncate_record( record=record, max_len=self.max_len, keys=["token_ids", "segment_ids", "tokens", "classify_labels"]) return [record] @discard_kwarg @log_cost_time def _post_predict(self, features, pred_tensors, show_detail, threshold=0.5) -> List[TextSpans]: def _tensor2output(feature, pred_tensor) -> TextSpans: pred_labels = tensor2labels(pred_tensor, self.multi_label, self.id2label, threshold=threshold) tokens = feature["tokens"] pred_labels = pred_labels[:len(tokens)] if show_detail: logger.info(f"tokens:{tokens}") for idx, (token, pred_label) in enumerate(zip(tokens, pred_labels)): if pred_label and pred_label != self.seq_label_strategy.empty: logger.info( f"idx:{idx}, token:{token}, pred:{pred_label}") decode_func = decode_overlap_label_sequence if self.multi_label else decode_label_sequence text_spans = decode_func(feature, pred_labels, self.seq_label_strategy) return text_spans preds = [_tensor2output(f, p) for f, p in zip(features, pred_tensors)] return preds
class TransformerMLMModel(AbstractMLMClassifyModel, TFBasedModel): def _load_config(self, config): super()._load_config(config) self.max_len = self.task_config["max_len"] self.mask_percent = self.task_config.get("mask_percent", 0.15) def build_model(self, pretrained_model_path=None, pretrained_model_tag="bert", pos_weight=1., bilstm_dim_list=[], transformer_kwargs={}, h5_file=None): with self.get_scope(): if hasattr(self, 'keep_token_ids'): transformer_kwargs.update(keep_tokens=self.keep_token_ids) self.nn_model = get_mlm_model( pretrained_model_path, pretrained_model_tag="bert", transformer_kwargs=transformer_kwargs, h5_file=h5_file) logger.info("nn model's summary:") self.nn_model.summary(print_fn=logger.info) self._update_model_dict("test", self.nn_model) return self.nn_model @discard_kwarg def compile_model(self, optimizer_name, optimizer_args, rdrop_alpha=None): logger.info("compiling model...") with self.get_scope(): token_output = Input(shape=(None, ), name='token_output', dtype=tf.int32) self.train_model = Model(self.nn_model.inputs + [token_output], self.nn_model.output, name="train_model") output = self.train_model.output loss_mask = Lambda( function=lambda x: tf.cast(tf.not_equal(x, 0), tf.float32), name="pred_mask")(token_output) loss_layer = build_classify_loss_layer(multi_label=False, with_mask=True) loss = loss_layer([token_output, output, loss_mask]) self.train_model.add_loss(loss) accuracy_func = masked_sparse_categorical_accuracy metric_layer = MetricLayer(accuracy_func, name="metric_layer") accuracy = metric_layer([token_output, output, loss_mask]) self.train_model.add_metric(accuracy, aggregation="mean", name="accuracy") optimizer = OptimizerFactory.create(optimizer_name, optimizer_args) self.train_model.compile(optimizer=optimizer) logger.info("training model's summary:") self.train_model.summary(print_fn=logger.info) self._update_model_dict("train", self.train_model) def example2feature(self, example: MLMExample) -> Dict: feature = self.tokenizer.do_tokenize(text=example.text) tokens = feature["tokens"] masks = [e for e in enumerate(tokens) if e[1] == MASK] feature["masks"] = masks if example.masked_tokens: assert len(masks) == len(example.masked_tokens) feature["masked_tokens"] = [ (m[0], t) for m, t in zip(masks, example.masked_tokens) ] return feature def _feature2records(self, idx, feature: Dict, mode: str) -> List[Dict]: record = dict(idx=idx, **feature) if mode == "train": masked_tokens = feature.get("masked_tokens") if not masked_tokens: token_infos = [ e for e in enumerate(feature["tokens"]) if e[1] not in self.tokenizer.special_tokens ] masked_tokens = random.sample( token_infos, int(len(token_infos) * self.mask_percent)) token_output = [0] * len(feature["tokens"]) tokens = copy.copy(feature["tokens"]) token_ids = copy.copy(feature["token_ids"]) for idx, token in masked_tokens: token_id = self.tokenizer.token2id(token) token_output[idx] = token_id if tokens[idx] != MASK: r = random.random() if r <= 0.8: t = MASK elif r <= 0.9: t = random.choice(self.tokenizer.vocabs) else: t = token tokens[idx] = t token_ids[idx] = self.tokenizer.token2id(t) record.update(token_output=token_output, masked_tokens=masked_tokens, tokens=tokens, token_ids=token_ids) truncate_record( record=record, max_len=self.max_len, keys=["token_ids", "segment_ids", "tokens", "token_output"]) return [record] @discard_kwarg @log_cost_time def _post_predict(self, features, pred_tensors, show_detail=False, threshold=.5) -> List[List[str]]: def _tensor2output(feature, pred_tensor): # masked_tokens = feature["masked_tokens"] # token2char = feature["token2char"] # masked_chars = [] masks = feature["masks"] # logger.info(masks) # logger.info(pred_tensor.shape) pred_tensor = np.argmax(pred_tensor, axis=-1) # logger.info(pred_tensor) pred_tokens = [self.tokenizer.id2token(e) for e in pred_tensor] # logger.info(pred_tokens) # logger.info(pred_tensor.shape) # logger.info(pred_tensor) masked_token_ids = [ pred_tensor[e[0]] for e in masks if e[0] < len(pred_tensor) ] masked_tokens = [ self.tokenizer.id2token(i) for i in masked_token_ids ] return masked_tokens preds = [_tensor2output(f, t) for f, t in zip(features, pred_tensors)] return preds
class VAE(): ''' VAE Model Variational autoencoder modeling of the Ising spin configurations ''' def __init__(self, input_shape=(81, 81, 1), scaled=False, padded=False, conv_number=4, filter_base_length=3, filter_base_stride=3, filter_base=32, filter_length=3, filter_stride=3, filter_factor=1, dropout=False, z_dim=2, kl_anneal=False, alpha=1.0, beta=8.0, lamb=1.0, krnl_init='lecun_normal', act='selu', opt='nadam', lr=1e-5, batch_size=128, dataset_size=1115136): self.eps = 1e-8 ''' initialize model parameters ''' self.scaled = scaled self.padded = padded if self.padded: self.padding = 'same' else: self.padding = 'valid' # convolutional parameters # number of convolutions self.conv_number = conv_number # number of filters for first convolution self.filter_base = filter_base # multiplicative factor for filters in subsequent convolutions self.filter_factor = filter_factor # filter side length self.filter_base_length = filter_base_length self.filter_length = filter_length # filter stride self.filter_base_stride = filter_base_stride self.filter_stride = filter_stride # convolutional input and output shapes self.input_shape = input_shape self.n_feat = np.prod(self.input_shape) self.final_conv_shape = get_final_conv_shape( self.input_shape, self.conv_number, self.filter_base_length, self.filter_length, self.filter_base_stride, self.filter_stride, self.filter_base, self.filter_factor, self.padded) self.dropout = dropout # latent and classification dimensions # latent dimension self.z_dim = z_dim # total correlation weights self.kl_anneal_b = kl_anneal self.alpha, self.beta, self.lamb = alpha, beta, lamb # kernel initializer and activation self.krnl_init = krnl_init self.act = act if self.scaled: self.dec_out_act = 'sigmoid' else: self.dec_out_act = 'tanh' self.out_init = 'glorot_uniform' # optimizer self.vae_opt_n = opt # learning rate self.lr = lr # batch size, dataset size, and log importance weight self.batch_size = batch_size self.dataset_size = dataset_size self._set_log_importance_weight() self._set_prior_params() # loss history self.vae_loss_history = [] self.tc_loss_history = [] self.rc_loss_history = [] # past epochs (changes if loading past trained model) self.past_epochs = 0 # checkpoint managers self.vae_mngr = None # build full model self._build_model() def get_file_prefix(self): ''' gets parameter tuple and filename string prefix ''' params = (self.conv_number, self.filter_base_length, self.filter_base_stride, self.filter_base, self.filter_length, self.filter_stride, self.filter_factor, self.dropout, self.z_dim, self.kl_anneal_b, self.alpha, self.beta, self.lamb, self.krnl_init, self.act, self.vae_opt_n, self.lr, self.batch_size) file_name = 'btcvae.{}.{}.{}.{}.{}.{}.{}.{:d}.{}.{:d}.{:.0e}.{:.0e}.{:.0e}.{}.{}.{}.{:.0e}.{}'.format( *params) return file_name def scale_configurations(self, x): return (x + 1) / 2 def _set_log_importance_weight(self): ''' logarithmic importance weights for minibatch stratified sampling ''' n, m = self.dataset_size, self.batch_size - 1 strw = np.float32(n - m) / np.float32(n * m) w = np.ones((self.batch_size, self.batch_size), dtype=np.float32) / m w.reshape(-1)[::m + 1] = 1. / n w.reshape(-1)[1::m + 1] = strw w[m - 1, 0] = strw self.log_importance_weight = K.log(K.constant(w, dtype=tf.float32)) return def _set_prior_params(self): # mu = 0, stdv = 1 => log(var) = 0 self.mu_prior = K.constant(np.zeros(shape=(self.batch_size, self.z_dim)), dtype=tf.float32) self.logvar_prior = K.constant(np.zeros(shape=(self.batch_size, self.z_dim)), dtype=tf.float32) return def sample_gaussian(self, beta): ''' samples a point in a multivariate gaussian distribution ''' mu, logvar = beta return mu + K.exp(0.5 * logvar) * K.random_normal( shape=(self.batch_size, self.z_dim)) def sample_logistic(self, shape): u = K.random_uniform(shape, 0.0, 1.0) l = K.log(u + self.eps) - K.log(1 - u + self.eps) return l def sample_bernoulli(self, p): logp = tf.math.log_sigmoid(p) logq = tf.math.log_sigmoid(-p) l = self.sample_logistic(K.int_shape(p)) z = logp - logq + l return 1. / (1. + K.exp(-100 * z)) def gauss_log_density(self, z, beta=None): ''' logarithmic probability density for multivariate gaussian distribution given samples z and parameters beta = (mu, log(var)) ''' if beta is None: mu, logvar = self.mu_prior, self.logvar_prior else: mu, logvar = beta norm = K.log(2 * np.pi) zsc = (z - mu) * K.exp(-0.5 * logvar) return -0.5 * (zsc**2 + logvar + norm) def log_sum_exp(self, z): ''' numerically stable logarithmic sum of exponentials ''' m = K.max(z, axis=1, keepdims=True) u = z - m m = K.squeeze(m, 1) return m + K.log(K.sum(K.exp(u), axis=1, keepdims=False)) def total_correlation_loss(self): # log p(z) logpz = K.sum(K.reshape(self.gauss_log_density(self.z), shape=(self.batch_size, -1)), axis=1) # log q(z|x) logqz_x = K.sum(K.reshape(self.gauss_log_density( self.z, (self.mu, self.logvar)), shape=(self.batch_size, -1)), axis=1) # log q(z) ~ log (1/MN) sum_m q(z|x_m) = -log(MN)+log(sum_m(exp(q(z|x_m)))) _logqz = self.gauss_log_density( K.reshape(self.z, shape=(self.batch_size, 1, self.z_dim)), (K.reshape(self.mu, shape=(1, self.batch_size, self.z_dim)), K.reshape(self.logvar, shape=(1, self.batch_size, self.z_dim)))) logqz_prodmarginals = K.sum(self.log_sum_exp( K.reshape(self.log_importance_weight, shape=(self.batch_size, self.batch_size, 1)) + _logqz), axis=1) logqz = self.log_sum_exp(self.log_importance_weight + K.sum(_logqz, axis=2)) # alpha controls index-code mutual information # beta controls total correlation # gamma controls dimension-wise kld melbo = -self.alpha * (logqz_x - logqz) - self.beta * ( logqz - logqz_prodmarginals) - self.lamb * (logqz_prodmarginals - logpz) return -self.kl_anneal * melbo / self.z_dim def kullback_leibler_divergence_loss(self): return -0.5 * self.kl_anneal * self.beta * K.sum( 1. + self.logvar - K.square(self.mu) - K.exp(self.logvar), axis=-1) def reconstruction_loss(self): if not self.scaled: x = self.scale_configurations(self.enc_x_input) x_hat = self.scale_configurations(self.x_output) else: x = self.enc_x_input x_hat = self.x_output return -K.sum(K.reshape(x * K.log(x_hat + self.eps) + (1. - x) * K.log(1. - x_hat + self.eps), shape=(self.batch_size, -1)), axis=-1) / self.n_feat def _build_model(self): ''' builds each component of the VAE model ''' self._build_encoder() self._build_decoder() self._build_vae() def _build_encoder(self): ''' builds encoder model ''' # takes sample (real or fake) as input self.enc_x_input = Input(batch_shape=(self.batch_size, ) + self.input_shape, name='enc_x_input') conv = self.enc_x_input # iterative convolutions over input for i in range(self.conv_number): filter_number = get_filter_number(i, self.filter_base, self.filter_factor) filter_length, filter_stride = get_filter_length_stride( i, self.filter_base_length, self.filter_base_stride, self.filter_length, self.filter_stride) conv = Conv2D(filters=filter_number, kernel_size=filter_length, kernel_initializer=self.krnl_init, padding=self.padding, strides=filter_stride, name='enc_conv_{}'.format(i))(conv) if self.act == 'lrelu': conv = LeakyReLU(alpha=0.1, name='enc_conv_lrelu_{}'.format(i))(conv) conv = BatchNormalization( name='enc_conv_batchnorm_{}'.format(i))(conv) if self.dropout: conv = SpatialDropout2D( rate=0.5, name='enc_conv_drop_{}'.format(i))(conv) elif self.act == 'selu': conv = Activation(activation='selu', name='enc_conv_selu_{}'.format(i))(conv) if self.dropout: conv = AlphaDropout( rate=0.5, noise_shape=(self.batch_size, 1, 1, filter_number), name='enc_conv_drop_{}'.format(i))(conv) # flatten final convolutional layer x = Flatten(name='enc_fltn_0')(conv) if np.any(np.array([self.alpha, self.beta, self.lamb]) > 0): # mean self.mu = Dense(units=self.z_dim, kernel_initializer=self.out_init, activation='linear', name='enc_mu_ouput')(x) # logarithmic variance self.logvar = Dense(units=self.z_dim, kernel_initializer=self.out_init, activation='linear', name='enc_logvar_ouput')(x) # latent space self.z = Lambda(self.sample_gaussian, output_shape=(self.z_dim, ), name='enc_z_output')([self.mu, self.logvar]) # build encoder self.encoder = Model(inputs=[self.enc_x_input], outputs=[self.mu, self.logvar, self.z], name='encoder') else: # latent space self.z = Dense(self.z_dim, kernel_initializer=self.out_init, activation='sigmoid', name='enc_z_ouput')(x) # build encoder self.encoder = Model(inputs=[self.enc_x_input], outputs=[self.z], name='encoder') def _build_decoder(self): ''' builds decoder model ''' # latent unit gaussian and categorical inputs self.dec_z_input = Input(batch_shape=(self.batch_size, self.z_dim), name='dec_z_input') x = self.dec_z_input # dense layer with same feature count as final convolution u = 0 x = Dense(units=np.prod(self.final_conv_shape), kernel_initializer=self.krnl_init, name='dec_dense_{}'.format(u))(x) if self.act == 'lrelu': x = LeakyReLU(alpha=0.1, name='dec_dense_lrelu_{}'.format(u))(x) x = BatchNormalization(name='dec_dense_batchnorm_{}'.format(u))(x) elif self.act == 'selu': x = Activation(activation='selu', name='dec_dense_selu_{}'.format(u))(x) u += 1 # reshape to final convolution shape convt = Reshape(target_shape=self.final_conv_shape, name='dec_rshp_0')(x) if self.dropout: if self.act == 'lrelu': convt = SpatialDropout2D(rate=0.5, name='dec_rshp_drop_0')(convt) elif self.act == 'selu': convt = AlphaDropout(rate=0.5, noise_shape=(self.batch_size, 1, 1, self.final_conv_shape[-1]), name='dec_rshp_drop_0')(convt) u = 0 # transform to sample shape with transposed convolutions for i in range(self.conv_number - 1, 0, -1): filter_number = get_filter_number(i - 1, self.filter_base, self.filter_factor) convt = Conv2DTranspose(filters=filter_number, kernel_size=self.filter_length, kernel_initializer=self.krnl_init, padding=self.padding, strides=self.filter_stride, name='dec_convt_{}'.format(u))(convt) if self.act == 'lrelu': convt = LeakyReLU(alpha=0.1, name='dec_convt_lrelu_{}'.format(u))(convt) convt = BatchNormalization( name='dec_convt_batchnorm_{}'.format(u))(convt) if self.dropout: convt = SpatialDropout2D( rate=0.5, name='dec_convt_drop_{}'.format(u))(convt) elif self.act == 'selu': convt = Activation(activation='selu', name='dec_convt_selu_{}'.format(u))(convt) if self.dropout: convt = AlphaDropout( rate=0.5, noise_shape=(self.batch_size, 1, 1, filter_number), name='dec_convt_drop_{}'.format(u))(convt) u += 1 self.dec_x_output = Conv2DTranspose( filters=1, kernel_size=self.filter_base_length, kernel_initializer=self.out_init, activation=self.dec_out_act, padding=self.padding, strides=self.filter_base_stride, name='dec_x_output')(convt) # build decoder self.decoder = Model(inputs=[self.dec_z_input], outputs=[self.dec_x_output], name='decoder') def _build_vae(self): ''' builds variational autoencoder network ''' self.kl_anneal = Input(batch_shape=(self.batch_size, ), name='kl_anneal') # build VAE if np.all(np.array([self.alpha, self.beta, self.lamb]) == 0): self.x_output = self.decoder(self.encoder(self.enc_x_input)) self.vae = Model(inputs=[self.enc_x_input], outputs=[self.x_output], name='variational_autoencoder') elif self.alpha == self.beta == self.lamb: self.x_output = self.decoder(self.encoder(self.enc_x_input)[2]) self.vae = Model(inputs=[self.enc_x_input, self.kl_anneal], outputs=[self.x_output], name='variational_autoencoder') tc_loss = self.kl_anneal * self.kullback_leibler_divergence_loss() self.vae.add_loss(tc_loss) self.vae.add_metric(tc_loss, name='tc_loss', aggregation='mean') elif np.any(np.array([self.alpha, self.beta, self.lamb]) > 0): self.x_output = self.decoder(self.encoder(self.enc_x_input)[2]) self.vae = Model(inputs=[self.enc_x_input, self.kl_anneal], outputs=[self.x_output], name='variational_autoencoder') tc_loss = self.kl_anneal * self.total_correlation_loss() self.vae.add_loss(tc_loss) self.vae.add_metric(tc_loss, name='tc_loss', aggregation='mean') # define VAE optimizer if self.vae_opt_n == 'sgd': self.vae_opt = SGD(learning_rate=self.lr) elif self.vae_opt_n == 'sgdm': self.vae_opt = SGD(learning_rate=self.lr, momentum=0.5) elif self.vae_opt_n == 'nsgd': self.vae_opt = SGD(learning_rate=self.lr, momentum=0.5, nesterov=True) elif self.vae_opt_n == 'rmsprop': self.vae_opt = RMSprop(learning_rate=self.lr) elif self.vae_opt_n == 'rmsprop_cent': self.vae_opt = RMSprop(learning_rate=self.lr, centered=True) elif self.vae_opt_n == 'adam': self.vae_opt = Adam(learning_rate=self.lr, beta_1=0.5) elif self.vae_opt_n == 'adam_ams': self.vae_opt = Adam(learning_rate=self.lr, beta_1=0.5, amsgrad=True) elif self.vae_opt_n == 'adamax': self.vae_opt = Adamax(learning_rate=self.lr, beta_1=0.5) elif self.vae_opt_n == 'adamax_ams': self.vae_opt = Adamax(learning_rate=self.lr, beta_1=0.5, amsgrad=True) elif self.vae_opt_n == 'nadam': self.vae_opt = Nadam(learning_rate=self.lr, beta_1=0.5) # compile VAE rc_loss = self.reconstruction_loss() self.vae.add_loss(rc_loss) self.vae.add_metric(rc_loss, name='rc_loss', aggregation='mean') self.vae.compile(optimizer=self.vae_opt) def encode(self, x_batch, verbose=False): ''' encoder input configurations ''' return self.encoder.predict(x_batch, batch_size=self.batch_size, verbose=verbose) def generate(self, beta_batch, verbose=False): ''' generate new configurations using samples from the latent distribution ''' # sample latent space if np.any(np.array([self.alpha, self.beta, self.lamb]) > 0): if len(beta_batch) == 2: z_batch = sample_gaussian(beta_batch) else: z_batch = beta_batch else: z_batch = beta_batch # generate configurations return self.decoder.predict(z_batch, batch_size=self.batch_size, verbose=verbose) def model_summaries(self): ''' print model summaries ''' self.encoder.summary() self.decoder.summary() self.vae.summary() def save_weights(self, name, lattice_sites, interval, num_samples, scaled, seed): ''' save weights to file ''' # file parameters params = (name, lattice_sites, interval, num_samples, scaled, seed) file_name = os.getcwd() + '/{}.{}.{}.{}.{:d}.{}.'.format( *params) + self.get_file_prefix() + '.weights.h5' # save weights self.vae.save_weights(file_name) def load_weights(self, name, lattice_sites, interval, num_samples, scaled, seed): ''' load weights from file ''' # file parameters params = (name, lattice_sites, interval, num_samples, scaled, seed) file_name = os.getcwd() + '/{}.{}.{}.{}.{:d}.{}.'.format( *params) + self.get_file_prefix() + '.weights.h5' # load weights self.vae.load_weights(file_name) def get_losses(self): ''' retrieve loss histories ''' # reshape arrays into (epochs, batches) vae_loss = np.array(self.vae_loss_history).reshape( -1, self.num_batches) tc_loss = np.array(self.tc_loss_history).reshape(-1, self.num_batches) rc_loss = np.array(self.rc_loss_history).reshape(-1, self.num_batches) return vae_loss, tc_loss, rc_loss def save_losses(self, name, lattice_sites, interval, num_samples, scaled, seed): ''' save loss histories to file ''' # retrieve losses losses = self.get_losses() # file parameters params = (name, lattice_sites, interval, num_samples, scaled, seed) file_name = os.getcwd() + '/{}.{}.{}.{}.{:d}.{}.'.format( *params) + self.get_file_prefix() + '.loss.npy' np.save(file_name, np.stack(losses, axis=-1)) def load_losses(self, name, lattice_sites, interval, num_samples, scaled, seed): ''' load loss histories from file ''' # file parameters params = (name, lattice_sites, interval, num_samples, scaled, seed) file_name = os.getcwd() + '/{}.{}.{}.{}.{:d}.{}.'.format( *params) + self.get_file_prefix() + '.loss.npy' losses = np.load(file_name) # set past epochs self.past_epochs = losses.shape[0] self.num_batches = losses.shape[1] # change loss histories into lists self.vae_loss_history = list(losses[:, :, 0].reshape(-1)) self.tc_loss_history = list(losses[:, :, 1].reshape(-1)) self.rc_loss_history = list(losses[:, :, 2].reshape(-1)) def initialize_checkpoint_managers(self, name, lattice_sites, interval, num_samples, scaled, seed): ''' initialize training checkpoint managers ''' # initialize checkpoints self.vae_ckpt = Checkpoint(step=tf.Variable(0), optimizer=self.vae_opt, net=self.vae) # file parameters params = (name, lattice_sites, interval, num_samples, scaled, seed) directory = os.getcwd() + '/{}.{}.{}.{}.{:d}.{}.'.format( *params) + self.get_file_prefix() + '.ckpts' # initialize checkpoint managers self.vae_mngr = CheckpointManager(self.vae_ckpt, directory + '/vae/', max_to_keep=4) def load_latest_checkpoint(self, name, lattice_sites, interval, num_samples, scaled, seed): ''' load latest training checkpoint from file ''' # initialize checkpoint managers self.initialize_checkpoint_managers(name, lattice_sites, interval, num_samples, scaled, seed) self.load_losses(name, lattice_sites, interval, num_samples, scaled, seed) # file parameters params = (name, lattice_sites, interval, num_samples, scaled, seed) directory = os.getcwd() + '/{}.{}.{}.{}.{:d}.{}.'.format( *params) + self.get_file_prefix() + '.ckpts' # restore checkpoints self.vae_ckpt.restore( self.vae_mngr.latest_checkpoint).assert_consumed() def train_vae(self, x_batch, kl_anneal): ''' train VAE ''' # VAE losses if np.any(np.array([self.alpha, self.beta, self.lamb]) > 0): vae_loss, tc_loss, rc_loss = self.vae.train_on_batch( [x_batch, kl_anneal]) self.vae_loss_history.append(vae_loss.mean()) self.tc_loss_history.append(tc_loss) self.rc_loss_history.append(rc_loss) else: vae_loss, rc_loss = self.vae.train_on_batch(x_batch) self.vae_loss_history.append(vae_loss.mean()) self.tc_loss_history.append(0) self.rc_loss_history.append(rc_loss) def rolling_loss_average(self, epoch, batch): ''' calculate rolling loss averages over batches during training ''' epoch = epoch + self.past_epochs # catch case where there are no calculated losses yet if batch == 0: vae_loss = 0 tc_loss = 0 rc_loss = 0 # calculate rolling average else: # start index for current epoch start = self.num_batches * epoch # stop index for current batch (given epoch) stop = self.num_batches * epoch + batch + 1 # average loss histories vae_loss = np.mean(self.vae_loss_history[start:stop]) tc_loss = np.mean(self.tc_loss_history[start:stop]) rc_loss = np.mean(self.rc_loss_history[start:stop]) return vae_loss, tc_loss, rc_loss def fit(self, x_train, num_epochs=4, save_step=4, random_sampling=False, verbose=False): ''' fit model ''' self.num_temp_x, self.num_temp_y, self.num_samples, _, _, = x_train.shape self.num_batches = (self.num_temp_x * self.num_temp_y * self.num_samples) // self.batch_size if random_sampling: # x_train = extract_unique_data(x_train, self.num_temp_x, self.num_temp_y, self.num_samples, self.input_shape) x_train = x_train.reshape( self.num_temp_x * self.num_temp_y * self.num_samples, *self.input_shape) else: x_train = reorder_training_data(x_train, self.num_temp_x, self.num_temp_y, self.num_samples, self.input_shape, self.batch_size) num_epochs += self.past_epochs if np.all(np.array([self.alpha, self.beta, self.lamb]) == 0): kl_anneal = np.zeros((num_epochs, self.num_batches)) elif not self.kl_anneal_b: kl_anneal = np.ones((num_epochs, self.num_batches)) else: n_cycles = 4 linear_kl_anneal = np.linspace( 0., 1., num_epochs * self.num_batches // (2 * n_cycles)) constant_kl_anneal = np.ones(num_epochs * self.num_batches // (2 * n_cycles)) cycle_kl_anneal = np.concatenate( (linear_kl_anneal, constant_kl_anneal)) kl_anneal = np.tile(cycle_kl_anneal, n_cycles).reshape(num_epochs, self.num_batches) lr_factor = np.ones((num_epochs, self.num_batches)) # loop through epochs for i in range(self.past_epochs, num_epochs): # construct progress bar for current epoch if random_sampling: batch_range = trange(self.num_batches, desc='', disable=not verbose) else: b = np.arange(self.num_batches) np.random.shuffle(b) batch_range = tqdm(b, desc='', disable=not verbose) # loop through batches u = 0 for j in batch_range: # set batch loss description batch_loss = self.rolling_loss_average(i, u) batch_acc = np.exp(-batch_loss[-1]) desc = 'Epoch: {}/{} LR Fctr: {:.4f} KL Anl: {:.4f} VAE Lss: {:.4f} TCKLD Lss: {:.4f} RCNST Lss: {:.4f} RCNST Acc: {:.4f}'.format( i + 1, num_epochs, lr_factor[i, u], kl_anneal[i, u], *batch_loss, batch_acc) batch_range.set_description(desc) # fetch batch if random_sampling: x_batch = draw_random_batch(x_train, self.batch_size) else: x_batch = draw_indexed_batch(x_train, self.batch_size, j) # train VAE self.vae_opt.learning_rate = lr_factor[i, u] * self.lr self.train_vae(x_batch=x_batch, kl_anneal=kl_anneal[i, u] * np.ones(self.batch_size)) u += 1 # if checkpoint managers are initialized if self.vae_mngr is not None: # increment checkpoint self.vae_ckpt.step.assign_add(1) # if save step is reached if np.int32(self.vae_ckpt.step) % save_step == 0: # save model checkpoint vae_save_path = self.vae_mngr.save() print('Checkpoint DSC: {}'.format(vae_save_path))
elif 'rednet' in model_type: output = model_rednet(input_blur) loss = custom_loss_others(input_sharp) custom_psnr = custom_psnr_others(input_sharp) # Define the model model = Model(inputs=[input_sharp, input_blur], outputs=output) # x_unwrap = generator(input_blur) # model = Model(inputs=[input_sharp, input_blur], outputs=x_unwrap) # Add custom loss and metric model.add_loss(loss) # Since training happens on batch of images we will use the mean of SSIM values of all the images in the batch as the # loss value -> batch_mean(mean_scales_mse) model.add_metric(custom_psnr, name='mean_scales_psnr', aggregation='mean') # name = 'psnr' # Compile the model OPTIMIZER = Adam(lr=initial_lr) model.compile(optimizer=OPTIMIZER) # Print the summary print(model.summary()) # Callbacks tensorboard_callback = TensorBoard(log_dir=log_dir) # , histogram_freq=1, profile_batch='1') save_weights_only = False # PolynomialDecay definition if 'reds' in task: data_size = train_sharp_generator.samples // batch_size
class RelationTokenClassifyModel(AbstractRelationClassifyModel, TFBasedModel): custom_objects = dict(TokenExtractLayer=TokenExtractLayer) def _load_config(self, config): super()._load_config(config) self.max_len = self.task_config['max_len'] self.multi_label = self.task_config["multi_label"] self.text_span_labels = load_lines(self.task_config["text_span_label_path"]) self.labels = load_lines(self.task_config['label_path']) self.sparse_label = not self.multi_label self.label2id, self.id2label = seq2dict(self.labels) self.label_num = len(self.label2id) self.embedding_strategy: EmbeddingStrategy = EmbeddingStrategy[self.task_config["embedding_strategy"].upper()] def _init_tokenizer(self): logger.info("initializing tokenizer") tokenizer_args = copy.copy(self.tokenizer_config["tokenizer_args"]) vocab_path = tokenizer_args["vocabs"] vocabs = load_lines(vocab_path) special_tokens = flat([[f"[S:{label}]", f"[O:{label}]"] for label in self.text_span_labels]) special_tokens.extend(["[/S]", "[/O]"]) vocabs = replace_unused_tokens(vocabs, special_tokens) logger.info(f"replacing special tokens:{special_tokens} to vocabs") tokenizer_args.update(vocabs=vocabs) self.tokenizer = build_tokenizer(self.tokenizer_config["tokenizer_name"], tokenizer_args) self.vocab_size = self.tokenizer.vocab_size def build_model(self, pretrained_model_path=None, pretrained_model_tag="bert", pos_weight=1., bilstm_dim_list=[], transformer_kwargs={}, **kwargs): with self.get_scope(): encoder_model = get_sequence_encoder_model(vocab_size=self.vocab_size, pretrained_model_path=pretrained_model_path, pretrained_model_tag=pretrained_model_tag, bilstm_dim_list=bilstm_dim_list, transformer_kwargs=transformer_kwargs) span_idxs = Input(name="span_idxs", shape=(4,), dtype=tf.int32) sequence_embedding = encoder_model.output if self.embedding_strategy != EmbeddingStrategy.CLS: token_idxs = None if self.embedding_strategy == EmbeddingStrategy.ENTITY_START_END: token_idxs = span_idxs if self.embedding_strategy == EmbeddingStrategy.ENTITY_START: token_idxs = span_idxs[:, :2] token_extract_layer = TokenExtractLayer(name="token_extract_layer") class_embedding = token_extract_layer([sequence_embedding, token_idxs]) else: class_embedding = Lambda(lambda x: x[:, 0], name="get_cls_layer")(sequence_embedding) classify_activation = sigmoid if self.multi_label else softmax classifier_layer = Dense( self.label_num, name="classify_layer", activation=classify_activation ) output = classifier_layer(class_embedding) if self.multi_label: output = Lambda(lambda x: x ** pos_weight, name="pos_weight_layer")(output) self.nn_model = Model(inputs=encoder_model.inputs + [span_idxs], outputs=[output]) logger.info("nn model's summary:") self.nn_model.summary(print_fn=logger.info) self._update_model_dict("test", self.nn_model) return self.nn_model def compile_model(self, optimizer_name, optimizer_args, rdrop_alpha=None): logger.info("compiling model...") with self.get_scope(): classify_output = Input(shape=(self.label_num,) if self.multi_label else (), name='classify_output', dtype=tf.float32) inputs = self.nn_model.inputs output = self.nn_model.output loss_input = [classify_output, output] if rdrop_alpha: output1 = self.nn_model(inputs) loss_input.append(output1) output = Lambda(function=lambda x: sum(x) / len(x), name="avg_pool_layer")([output, output1]) self.train_model = Model(inputs + [classify_output], output, name="train_model") loss_layer = build_classify_loss_layer(multi_label=self.multi_label, rdrop_alpha=rdrop_alpha) loss = loss_layer(loss_input) self.train_model.add_loss(loss) accuracy_func = binary_accuracy if self.multi_label else sparse_categorical_accuracy metric_layer = MetricLayer(accuracy_func, name="metric_layer") accuracy = metric_layer([classify_output, output]) self.train_model.add_metric(accuracy, aggregation="mean", name="accuracy") optimizer = OptimizerFactory.create(optimizer_name, optimizer_args) self.train_model.compile(optimizer=optimizer) logger.info("training model's summary:") self.train_model.summary(print_fn=logger.info) self._update_model_dict("train", self.train_model) def example2feature(self, example: UnionRelationClassifyExample) -> Dict: idx_infos = [(f"[S:{example.text_span1.label}]", example.text_span1.span[0]), (f"[O:{example.text_span2.label}]", example.text_span2.span[0]), (f"[/S]", example.text_span1.span[1]), (f"[/O]", example.text_span2.span[1])] text = example.text for token, idx in sorted(idx_infos, key=lambda x: x[1], reverse=True): text = text[:idx] + token + text[idx:] feature = self.tokenizer.do_tokenize(text) tokens = feature["tokens"] span_idxs = [tokens.index(e) for e, span in idx_infos] feature["span_idxs"] = span_idxs if isinstance(example, LabeledRelationClassifyExample): if isinstance(example.label, list): labels = [e.name for e in example.label] else: labels = [example.label.name] feature.update(labels=labels) return feature def _feature2records(self, idx, feature: Dict, mode: str) -> List[Dict]: record = dict(idx=idx, **feature) truncate_record(record=record, max_len=self.max_len, keys=["token_ids", "segment_ids", "tokens"]) if mode == "train": labels = feature.get("labels") if labels is None: raise ValueError("no labels given in train mode!") classify_output = get_classify_output(labels, self.label2id, self.sparse_label) record.update(classify_output=classify_output) return [record] @discard_kwarg @log_cost_time def _post_predict(self, pred_tensors, show_detail=False, threshold=0.5) -> List[LabelOrLabels]: def _tensor2output(pred_tensor) -> LabelOrLabels: if self.multi_label: if show_detail: logger.info(f"pred tensor") logger.info(pred_tensor) hard_pred_tensor = apply_threshold(pred_tensor, threshold) label_data = [int(e.numpy()) for e in n_hot2idx_tensor(hard_pred_tensor)] return [Label(name=self.id2label[label_id], prob=pred_tensor[label_id]) for label_id in label_data] else: label_id = tf.argmax(pred_tensor, axis=-1).numpy() label = self.id2label[label_id] prob = pred_tensor[label_id] return Label(prob=prob, name=label) preds = [_tensor2output(t) for t in pred_tensors] return preds
def create_model(input_shape=(256, 256, 3), coef=1., alpha=1): vgg = VGG19(weights='imagenet', include_top=False, input_shape=input_shape) vgg = Model(inputs=vgg.inputs, outputs=vgg.get_layer('block4_conv1').output, name='vgg') content_input = Input(shape=input_shape, name='content_input') style_input = Input(shape=input_shape, name='style_input') style_out = [] enc_layers = [] c = content_input s = style_input for layer in vgg.layers[1:]: if 'conv' in layer.name: srp = Padding() enc_layers.append(srp) c = srp(c) s = srp(s) new_layer = Conv2D(filters=layer.filters, kernel_size=layer.kernel_size, activation=layer.activation, padding='valid', name=layer.name) elif 'pool' in layer.name: new_layer = MaxPooling2D((2, 2), strides=(2, 2), name=layer.name) else: assert False enc_layers.append(new_layer) c = new_layer(c) s = new_layer(s) new_layer.set_weights(layer.get_weights()) if 'conv1' in s.name: style_out.append(s) adain = AdaIN(alpha=alpha, name='adain')([c, s]) x = adain # Decoder decoder_layers = [ # Block 4 Padding(), Conv2D(256, (3, 3), activation='relu', padding='valid', name='block4_conv1_decoded'), UpSampling2D(), # Block 3 Padding(), Conv2D(256, (3, 3), activation='relu', padding='valid', name='block3_conv4_decoded'), Padding(), Conv2D(256, (3, 3), activation='relu', padding='valid', name='block3_conv3_decoded'), Padding(), Conv2D(256, (3, 3), activation='relu', padding='valid', name='block3_conv2_decoded'), Padding(), Conv2D(128, (3, 3), activation='relu', padding='valid', name='block3_conv1_decoded'), UpSampling2D(), # Block 2 Padding(), Conv2D(128, (3, 3), activation='relu', padding='valid', name='block2_conv2_decoded'), Padding(), Conv2D(64, (3, 3), activation='relu', padding='valid', name='block2_conv1_decoded'), UpSampling2D(), # Block 1 Padding(), Conv2D(64, (3, 3), activation='relu', padding='valid', name='block1_conv2_decoded'), Padding(), Conv2D(3, (3, 3), activation=None, padding='valid', name='block1_conv1_decoded'), PostProcess(name="decoded"), ] for layer in decoder_layers: x = layer(x) # Connections for calculating of losses out = [] for layer in enc_layers: x = layer(x) if 'conv1' in x.name: out.append(x) loss_model = Model(inputs=[content_input, style_input], outputs=x) # Content loss Lc = tf.reduce_mean(tf.square(adain - x), axis=(1, 2, 3)) loss_model.add_loss(Lc) # Style loss L1 = tf.constant(0.) L2 = tf.constant(0.) for t, s in zip(out, style_out): mean_t, variance_t = tf.nn.moments(t, [1, 2]) mean_s, variance_s = tf.nn.moments(s, [1, 2]) std_t, std_s = tf.sqrt(variance_t), tf.sqrt(variance_s) #std_t, std_s = variance_t, variance_s L1 += tf.reduce_mean(K.square(mean_t - mean_s), axis=1) L2 += tf.reduce_mean(K.square(std_t - std_s), axis=1) Ls = L1 + L2 loss_model.add_loss(coef * Ls) loss_model.add_metric(Lc, name="Lc") loss_model.add_metric(Ls, name="Ls") # Weights freezing for layer in loss_model.layers: layer.trainable = layer.name.endswith('decoded') return loss_model
def get_model(base_model, rpn_model, anchors, hyper_params, mode="training"): """Generating rpn model for given backbone base model and hyper params. inputs: base_model = tf.keras.model pretrained backbone, only VGG16 available for now rpn_model = tf.keras.model generated rpn model hyper_params = dictionary mode = "training" or "inference" outputs: frcnn_model = tf.keras.model """ input_img = base_model.input rpn_reg_predictions, rpn_cls_predictions = rpn_model.output # roi_bboxes = RoIBBox(anchors, hyper_params, name="roi_bboxes")( [rpn_reg_predictions, rpn_cls_predictions]) # roi_pooled = RoIPooling( hyper_params, name="roi_pooling")([base_model.output, roi_bboxes]) # output = TimeDistributed(Flatten(), name="frcnn_flatten")(roi_pooled) output = TimeDistributed(Dense(4096, activation="relu"), name="frcnn_fc1")(output) output = TimeDistributed(BatchNormalization(), name="frcnn_batch_norm1")(output) output = TimeDistributed(Dropout(0.2), name="frcnn_dropout1")(output) output = TimeDistributed(Dense(2048, activation="relu"), name="frcnn_fc2")(output) output = TimeDistributed(BatchNormalization(), name="frcnn_batch_norm2")(output) output = TimeDistributed(Dropout(0.2), name="frcnn_dropout2")(output) frcnn_cls_predictions = TimeDistributed(Dense(hyper_params["total_labels"], activation="softmax"), name="frcnn_cls")(output) frcnn_reg_predictions = TimeDistributed(Dense( hyper_params["total_labels"] * 4, activation="linear"), name="frcnn_reg")(output) # if mode == "training": input_gt_boxes = Input(shape=(None, 4), name="input_gt_boxes", dtype=tf.float32) input_gt_labels = Input(shape=(None, ), name="input_gt_labels", dtype=tf.int32) rpn_cls_actuals = Input(shape=(None, None, hyper_params["anchor_count"]), name="input_rpn_cls_actuals", dtype=tf.float32) rpn_reg_actuals = Input(shape=(None, 4), name="input_rpn_reg_actuals", dtype=tf.float32) frcnn_reg_actuals, frcnn_cls_actuals = RoIDelta( hyper_params, name="roi_deltas")([roi_bboxes, input_gt_boxes, input_gt_labels]) # loss_names = [ "rpn_reg_loss", "rpn_cls_loss", "frcnn_reg_loss", "frcnn_cls_loss" ] rpn_reg_loss_layer = Lambda(helpers.reg_loss, name=loss_names[0])( [rpn_reg_actuals, rpn_reg_predictions]) rpn_cls_loss_layer = Lambda(helpers.rpn_cls_loss, name=loss_names[1])( [rpn_cls_actuals, rpn_cls_predictions]) frcnn_reg_loss_layer = Lambda(helpers.reg_loss, name=loss_names[2])( [frcnn_reg_actuals, frcnn_reg_predictions]) frcnn_cls_loss_layer = Lambda( helpers.frcnn_cls_loss, name=loss_names[3])([frcnn_cls_actuals, frcnn_cls_predictions]) # frcnn_model = Model(inputs=[ input_img, input_gt_boxes, input_gt_labels, rpn_reg_actuals, rpn_cls_actuals ], outputs=[ roi_bboxes, rpn_reg_predictions, rpn_cls_predictions, frcnn_reg_predictions, frcnn_cls_predictions, rpn_reg_loss_layer, rpn_cls_loss_layer, frcnn_reg_loss_layer, frcnn_cls_loss_layer ]) # for layer_name in loss_names: layer = frcnn_model.get_layer(layer_name) frcnn_model.add_loss(layer.output) frcnn_model.add_metric(layer.output, name=layer_name, aggregation="mean") # else: frcnn_model = Model(inputs=[input_img], outputs=[ roi_bboxes, rpn_reg_predictions, rpn_cls_predictions, frcnn_reg_predictions, frcnn_cls_predictions ]) # return frcnn_model
class VariationalAutoencoder(): def __init__(self, input_dim, encoder_conv_filters, encoder_conv_kernel_size, encoder_conv_strides, decoder_conv_t_filters, decoder_conv_t_kernel_size, decoder_conv_t_strides, z_dim, use_batch_norm=False, use_dropout=False): self.name = 'variational_autoencoder' self.input_dim = input_dim self.encoder_conv_filters = encoder_conv_filters self.encoder_conv_kernel_size = encoder_conv_kernel_size self.encoder_conv_strides = encoder_conv_strides self.decoder_conv_t_filters = decoder_conv_t_filters self.decoder_conv_t_kernel_size = decoder_conv_t_kernel_size self.decoder_conv_t_strides = decoder_conv_t_strides self.z_dim = z_dim self.use_batch_norm = use_batch_norm self.use_dropout = use_dropout self.n_layers_encoder = len(encoder_conv_filters) self.n_layers_decoder = len(decoder_conv_t_filters) self._build() def _build(self): ### THE ENCODER encoder_input = Input(shape=self.input_dim, name='encoder_input') x = encoder_input for i in range(self.n_layers_encoder): conv_layer = Conv2D(filters=self.encoder_conv_filters[i], kernel_size=self.encoder_conv_kernel_size[i], strides=self.encoder_conv_strides[i], padding='same', name='encoder_conv_' + str(i)) x = conv_layer(x) if self.use_batch_norm: x = BatchNormalization()(x) x = LeakyReLU()(x) if self.use_dropout: x = Dropout(rate=0.25)(x) shape_before_flattening = K.int_shape(x)[1:] x = Flatten()(x) self.mu = Dense(self.z_dim, name='mu')(x) self.log_var = Dense(self.z_dim, name='log_var')(x) self.encoder_mu_log_var = Model(encoder_input, (self.mu, self.log_var)) def sampling(args): mu, log_var = args epsilon = K.random_normal(shape=K.shape(mu), mean=0., stddev=1.) return mu + K.exp(log_var / 2) * epsilon encoder_output = Lambda(sampling, name='encoder_output')([self.mu, self.log_var]) self.encoder = Model(encoder_input, encoder_output) ### THE DECODER decoder_input = Input(shape=(self.z_dim, ), name='decoder_input') x = Dense(np.prod(shape_before_flattening))(decoder_input) x = Reshape(shape_before_flattening)(x) for i in range(self.n_layers_decoder): conv_t_layer = Conv2DTranspose( filters=self.decoder_conv_t_filters[i], kernel_size=self.decoder_conv_t_kernel_size[i], strides=self.decoder_conv_t_strides[i], padding='same', name='decoder_conv_t_' + str(i)) x = conv_t_layer(x) if i < self.n_layers_decoder - 1: if self.use_batch_norm: x = BatchNormalization()(x) x = LeakyReLU()(x) if self.use_dropout: x = Dropout(rate=0.25)(x) else: x = Activation('sigmoid')(x) decoder_output = x self.decoder = Model(decoder_input, decoder_output) ### THE FULL VAE model_input = encoder_input model_output = self.decoder(encoder_output) self.model = Model(model_input, model_output) def compile(self, learning_rate, r_loss_factor): self.learning_rate = learning_rate ### COMPILATION def vae_r_loss(y_true, y_pred): r_loss = K.mean(K.square(y_true - y_pred), axis=[1, 2, 3]) return r_loss_factor * r_loss def vae_kl_loss(y_true, y_pred): kl_loss = -0.5 * K.sum( 1 + self.log_var - K.square(self.mu) - K.exp(self.log_var), axis=1) return kl_loss def vae_loss(y_true, y_pred): r_loss = vae_r_loss(y_true, y_pred) kl_loss = vae_kl_loss(y_true, y_pred) return r_loss + kl_loss optimizer = Adam(learning_rate=learning_rate) self.model.add_loss(vae_loss(self.model.input, self.model.output)) self.model.add_metric(vae_kl_loss(self.model.input, self.model.output), name='vae_kl_loss') self.model.compile(optimizer=optimizer, loss=None, metrics=[vae_r_loss]) def save(self, folder): if not os.path.exists(folder): os.makedirs(folder) os.makedirs(os.path.join(folder, 'viz')) os.makedirs(os.path.join(folder, 'weights')) os.makedirs(os.path.join(folder, 'images')) with open(os.path.join(folder, 'params.pkl'), 'wb') as f: pickle.dump([ self.input_dim, self.encoder_conv_filters, self.encoder_conv_kernel_size, self.encoder_conv_strides, self.decoder_conv_t_filters, self.decoder_conv_t_kernel_size, self.decoder_conv_t_strides, self.z_dim, self.use_batch_norm, self.use_dropout ], f) self.plot_model(folder) def load_weights(self, filepath): self.model.load_weights(filepath) def train(self, x_train, batch_size, epochs, run_folder, print_every_n_batches=100, initial_epoch=0, lr_decay=1): custom_callback = CustomCallback(run_folder, print_every_n_batches, initial_epoch, self) lr_sched = step_decay_schedule(initial_lr=self.learning_rate, decay_factor=lr_decay, step_size=1) checkpoint_filepath = os.path.join( run_folder, "weights/weights-{epoch:03d}-{loss:.2f}.h5") checkpoint1 = ModelCheckpoint(checkpoint_filepath, save_weights_only=True, verbose=1) checkpoint2 = ModelCheckpoint(os.path.join(run_folder, 'weights/weights.h5'), save_weights_only=True, verbose=1) callbacks_list = [checkpoint1, checkpoint2, custom_callback, lr_sched] self.model.fit(x_train, x_train, batch_size=batch_size, shuffle=True, epochs=epochs, initial_epoch=initial_epoch, callbacks=callbacks_list) def train_with_generator( self, data_flow, epochs, steps_per_epoch, run_folder, print_every_n_batches=100, initial_epoch=0, lr_decay=1, ): custom_callback = CustomCallback(run_folder, print_every_n_batches, initial_epoch, self) lr_sched = step_decay_schedule(initial_lr=self.learning_rate, decay_factor=lr_decay, step_size=1) checkpoint_filepath = os.path.join( run_folder, "weights/weights-{epoch:03d}-{loss:.2f}.h5") checkpoint1 = ModelCheckpoint(checkpoint_filepath, save_weights_only=True, verbose=1) checkpoint2 = ModelCheckpoint(os.path.join(run_folder, 'weights/weights.h5'), save_weights_only=True, verbose=1) callbacks_list = [checkpoint1, checkpoint2, custom_callback, lr_sched] self.model.save_weights(os.path.join(run_folder, 'weights/weights.h5')) self.model.fit(data_flow, shuffle=True, epochs=epochs, initial_epoch=initial_epoch, callbacks=callbacks_list, steps_per_epoch=steps_per_epoch) def plot_model(self, run_folder): plot_model(self.model, to_file=os.path.join(run_folder, 'viz/model.png'), show_shapes=True, show_layer_names=True) plot_model(self.encoder, to_file=os.path.join(run_folder, 'viz/encoder.png'), show_shapes=True, show_layer_names=True) plot_model(self.decoder, to_file=os.path.join(run_folder, 'viz/decoder.png'), show_shapes=True, show_layer_names=True)
class VAE(): ''' A class representing the variational autoencoder used to encode the bar arrays. The VAE encodes a batch of bars of shape input_shape into a vector with latent_dim dimensions. Parameters ---------- latent_dim : int The dimensionality of the latent space. input_shape : list of int with lenght 2 The shape of the input space without batch size. The first element specifies the number of timesteps in a bar, the second one the number of instruments. weights : str, optional Path to a weights file that was previously saved by Keras. If specified, the weights are loaded. If None, the weights are initialized by Keras. debug : bool, optional If true print debug information, such as model summaries during class construction. ''' def __init__(self, latent_dim, input_shape, weights=None, debug=True): # set main class attributes self.input_shape = input_shape self.latent_dim = latent_dim self.debug = debug self.weightdir = 'weights/' self.callbacks = [tf.keras.callbacks.TensorBoard(log_dir='./logs')] # saves the necassary data to be viewed by the Tensorboard application # create input tensors and instantiate the encoder and decoder models bar_input = Input(shape = self.input_shape, name='encoder_input') latent_input = Input(shape = self.latent_dim, name = 'latent_input') self.encoder = self.make_encoder(bar_input) self.decoder = self.make_decoder(latent_input) # create the output tensors encoder_output = self.encoder(bar_input) vae_output = self.decoder(encoder_output[2]) # instantiate the VAE model self.VAE = Model(bar_input, vae_output, name='VAE_DNN') # calcuate the loss functions and add them to the model z_mean, z_log_var, z = encoder_output kl_loss = -.5 * tf.math.reduce_sum(1 + z_log_var - tf.math.square(z_mean) - tf.math.exp(z_log_var), axis = -1) recon_loss = mean_squared_error(tf.reshape(bar_input, [-1]), tf.reshape(vae_output, [-1])) recon_loss *= np.prod(self.input_shape, dtype = float) vae_loss = tf.math.reduce_mean(0.1 * kl_loss + recon_loss) self.VAE.add_loss(vae_loss) self.VAE.add_metric(recon_loss, name = 'recon_loss', aggregation='mean') # add the reconstruction loss as an additional viewable metric for performance analysis # compile the model and load the weights if specified self.VAE.compile(optimizer='adam') if self.debug: self.VAE.summary() if weights: self.VAE.load_weights(weights) def make_encoder(self, bar_input, n_conv = 4): ''' Helper function to instatiate the encoder model. Parameters ---------- bar_input : Tensorflow symbolic tensor The symbolic tensor which represents the encoder input. n_conv : int, optional The number of convolution and pooling layers applied. Returns: -------- Keras model The created encoder model. ''' x = bar_input # add the specified number of convolution/pooling layers for i in range(n_conv): x = Conv1D(64 * 2 ** i, 3, activation = 'relu', padding = 'valid') (x) x = MaxPooling1D(2) (x) x = Flatten(name = 'flatten')(x) #x = Dense(self.latent_dim * 2, activation = 'relu') (x) # intermediate dense layer z_mean = Dense(self.latent_dim, name = 'z_mean') (x) z_log_var = Dense(self.latent_dim, name = 'z_log_var') (x) # function to sample from a standard normal distribution, which is wrapped as a Lambda layer normal_sample_f = lambda y : tf.random.normal(tf.shape(y)) eps = Lambda(normal_sample_f) (z_log_var) # Reparamerisation trick, rescale the sampled value to the actual distribution z = z_mean + tf.math.exp(0.5 * z_log_var) * eps # model returns the mean and logartihm of the variance as well as the sampled value encoder = Model(bar_input, [z_mean, z_log_var, z], name='encoder') if self.debug: encoder.summary() return encoder def make_decoder(self, latent_input, n_deconv = 4): ''' Helper function to instatiate the decoder model. Parameters ---------- latent_input : Tensorflow symbolic tensor The symbolic tensor which represents the latent input. n_deconv : int, optional The number of upsampling and convolution layers applied. Returns: -------- Keras model The created encoder model. ''' x = latent_input #x = Dense(self.latent_dim * 2, activation = 'relu') (x) x = Dense(4 * 128, activation = 'relu') (x) x = Reshape((4, 128)) (x) for _ in range(n_deconv - 1): x = UpSampling1D(2) (x) x = Conv1D(64 * 2 ** i, 3, activation = 'relu', padding = 'same') (x) x = UpSampling1D(2) (x) x = Conv1D(22, 5, activation = 'relu', padding = 'same') (x) decoder = Model(latent_input, x, name='decoder') if self.debug: decoder.summary() return decoder def train(self, train_data, epochs, validation_split=0.0, **kwargs): ''' Train the VAE model on a given dataset for a number of epochs. The weights are saved in the weightdir folder with a timestamp after training is completed. Parameters ---------- train_data : array_like An array with shape (batch, *input_shape) which contains data to train the VAE on. epochs : int The number of epochs to train the model. validation_split : float, optional A number in the range [0,1]. It corresponds to the fraction of the samples withheld from the training data set to use for validation purposes. **kwargs Further keyword arguments to be passed to the train method of the VAE. ''' history = self.VAE.fit(train_data, epochs=epochs, validation_split = validation_split, callbacks = self.callbacks, **kwargs) name = strftime("%Y-%m-%d_%H:%M:%S", gmtime()) self.VAE.save_weights(self.weightdir + name) return history def interpolate(self, bar1, bar2, n_steps = 10): ''' Interpolate between the specified bars. To achive this, each bar is encoded as a latent vector. Between those latent vectors, the specified number of intemediate vectors are generated. The entire sequence is then decoded into a sequence of bars again. Parameters ---------- bar1, bar2 : array_like Arrays of shape input_shape which represent the bars to be interpolated. n_steps : int, optional The number of steps in the intepolqation inculding the two original bars. Returns ------- array_like Array of shape (n_steps, *input:_shape) representing the generated sequence. ''' lv1 = self.encoder.predict(bar1[None,:,:])[2] lv2 = self.encoder.predict(bar2[None,:,:])[2] lvs = np.concatenate([lv1 + (lv2-lv1)*i/(n_steps - 1) for i in range(n_steps)]) bars_pred = self.decoder.predict(lvs) return bars_pred def sample(self, bar, n_samples): ''' Sample from the distribtion of a given input bar. Parameters ---------- bar : array_like Array of shape input_shape around which the distribution is to be sampled. Return ------ array_like Array of shape (n_samples, *input_shape) which contians the samples. ''' return self.VAE.predict(np.array([bar]*n_samples))
class CLSTokenClassifyModel(AbstractTextClassifyModel, TFBasedModel): def _load_config(self, config): super()._load_config(config) self.multi_label = self.task_config["multi_label"] self.max_len = self.task_config["max_len"] self.labels = load_lines(self.task_config['label_path']) self.sparse_label = not self.multi_label self.label2id, self.id2label = seq2dict(self.labels) self.label_num = len(self.label2id) def build_model(self, pretrained_model_path=None, pretrained_model_tag="bert", pos_weight=1., bilstm_dim_list=[], transformer_kwargs={}): with self.get_scope(): encoder_model = get_sequence_encoder_model( vocab_size=self.vocab_size, pretrained_model_path=pretrained_model_path, pretrained_model_tag=pretrained_model_tag, bilstm_dim_list=bilstm_dim_list, transformer_kwargs=transformer_kwargs) sequence_embedding = encoder_model.output class_embedding = Lambda(function=lambda x: x[:, 0], name="cls_layer")(sequence_embedding) classify_activation = sigmoid if self.multi_label else softmax classifier_layer = Dense( self.label_num, name="classifier", activation=classify_activation, kernel_initializer=TruncatedNormal(stddev=0.02)) output = classifier_layer(class_embedding) if self.multi_label: output = Lambda(lambda x: x**pos_weight, name="pos_weight_layer")(output) self.nn_model = Model(inputs=encoder_model.inputs, outputs=[output], name="nn_model") logger.info("nn model's summary:") self.nn_model.summary(print_fn=logger.info) self._update_model_dict("test", self.nn_model) return self.nn_model @discard_kwarg def compile_model(self, optimizer_name, optimizer_args, rdrop_alpha=None): logger.info("compiling model...") with self.get_scope(): classify_output = Input( shape=(self.label_num, ) if self.multi_label else (), name='classify_output', dtype=tf.int32) inputs = self.nn_model.inputs output = self.nn_model(inputs) loss_input = [classify_output, output] if rdrop_alpha: output1 = self.nn_model(inputs) loss_input.append(output1) output = Lambda(function=lambda x: sum(x) / len(x), name="avg_pool_layer")([output, output1]) self.train_model = Model(inputs + [classify_output], [output], name="train_model") loss_layer = build_classify_loss_layer(multi_label=self.multi_label, rdrop_alpha=rdrop_alpha) loss = loss_layer(loss_input) self.train_model.add_loss(loss) accuracy_func = binary_accuracy if self.multi_label else sparse_categorical_accuracy metric_layer = MetricLayer(accuracy_func, name="metric_layer") accuracy = metric_layer([classify_output, output]) self.train_model.add_metric(accuracy, aggregation="mean", name="accuracy") optimizer = OptimizerFactory.create(optimizer_name, optimizer_args) self.train_model.compile(optimizer=optimizer) logger.info("training model's summary:") self.train_model.summary(print_fn=logger.info) self._update_model_dict("train", self.train_model) def example2feature(self, example: UnionTextClassifyExample) -> Dict: feature = self.tokenizer.do_tokenize(text=example.text, extra_text=example.extra_text) if isinstance(example, LabeledTextClassifyExample): if isinstance(example.label, list): labels = [e.name for e in example.label] else: labels = [example.label.name] feature.update(labels=labels) return feature def _feature2records(self, idx, feature: Dict, mode: str) -> List[Dict]: record = dict(idx=idx, **feature) truncate_record(record=record, max_len=self.max_len, keys=["token_ids", "segment_ids", "tokens"]) if mode == "train": labels = feature.get("labels") if labels is None: raise ValueError("no labels given in train mode!") classify_output = get_classify_output(labels, self.label2id, self.sparse_label) record.update(classify_output=classify_output) return [record] @discard_kwarg @log_cost_time def _post_predict(self, pred_tensors, show_detail=False, threshold=.5) -> List[LabelOrLabels]: def _tensor2output(pred_tensor) -> LabelOrLabels: if self.multi_label: if show_detail: logger.info(f"pred tensor") logger.info(pred_tensor) hard_pred_tensor = apply_threshold(pred_tensor, threshold) label_data = [ int(e.numpy()) for e in n_hot2idx_tensor(hard_pred_tensor) ] return [ Label(name=self.id2label[label_id], prob=pred_tensor[label_id]) for label_id in label_data ] else: label_id = tf.argmax(pred_tensor, axis=-1).numpy() label = self.id2label[label_id] prob = pred_tensor[label_id] return Label(prob=prob, name=label) preds = [_tensor2output(t) for t in pred_tensors] return preds