def evaluate(self , gold_path , predict_path) : gold_ite = DatasetHandler.read_dev_data(gold_path) predict_ite = DatasetHandler.read_dev_data(predict_path) nr_processing = 0 nr_gold = 0 nr_processing_right = 0 nr_line = 0 while True : try : gold_instance = gold_ite.next() predict_instance = predict_ite.next() except StopIteration : break nr_line += 1 gold_unigrams , gold_tags = self._processing_one_segmented_WSAtom_instance2unigrams_and_tags(gold_instance) predict_unigrams , predict_tags = self._processing_one_segmented_WSAtom_instance2unigrams_and_tags(predict_instance) gold_coor_seq = self.__innerfunc_4evaluate_generate_word_coordinate_sequence_from_tags(gold_tags) predict_coor_seq = self.__innerfunc_4evaluate_generate_word_coordinate_sequence_from_tags(predict_tags) cur_nr_gold , cur_nr_processing , cur_nr_processing_right = ( self.__innerfunc_4evaluate_get_nr_gold_and_processing_and_processing_right(gold_coor_seq , predict_coor_seq) ) nr_gold += cur_nr_gold nr_processing += cur_nr_processing nr_processing_right += cur_nr_processing_right p , r , f = self.__innerfunc_4evaluate_calculate_prf(nr_gold , nr_processing , nr_processing_right) print ("Eval result :\np : %.2f%% r : %.2f%% f : %.2f%%\n" "line num : %d total word num : %d total predict word num : %d predict right num : %d ") %( p * 100 , r * 100, f * 100 , nr_line , nr_gold , nr_processing , nr_processing_right )
def plot_roc(): for domain in domains: for model_name in model_names: for init_opt in init_opts: for preprocess_opt in preprocess_opts: scores = [] labels = [] for ind_fold, fold in enumerate(folds): K.clear_session() dh = DatasetHandler(domain) dataset_fold = dh.get_fold(ind_fold, preprocess_opt) model_path = os.path.join('log', domain, model_name, init_opt, preprocess_opt, fold, 'stage_5.h5') model = model_loader.load_full_model(model_name, no_cats=2) model.load_weights(model_path) scores.append( model.predict(dataset_fold['test_data'], batch_size=10)) labels.append(dataset_fold['test_labels']) scores = np.concatenate(scores)[:, 1] labels = np.concatenate(labels)[:, 1] fpr, tpr, _ = roc_curve(labels, scores) roc_auc = auc(fpr, tpr) savemat( os.path.join( 'log', 'roc', domain + '_' + model_name + '_' + init_opt + '_' + preprocess_opt + '.mat'), { 'fpr': fpr, 'tpr': tpr, 'roc_auc': roc_auc }) plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristics curve') plt.legend(loc="lower right") plt.savefig( os.path.join( 'log', 'roc', domain + '_' + model_name + '_' + init_opt + '_' + preprocess_opt)) plt.close()
def seg_eval(args): if not (DatasetHandler.is_readable(args.gold_file)): logging.error("path '%s' open failed !" % (args.gold_file)) logging.error('Exit!') exit(1) if not DatasetHandler.is_readable(args.predict_file): logging.error("path '%s' open failed ! predict file open error ." % (args.predict_file)) logging.error("Exit!") exit(1) segmentor = Segmentor() segmentor.evaluate(args.gold_file, args.predict_file)
def main(): for domain in domains: for model_name in model_names: for init_opt in init_opts: for preprocess_opt in preprocess_opts: log_path = os.path.join(log_path_main, domain, model_name, init_opt) if not os.path.exists(log_path): os.makedirs(log_path) for ind_fold, fold in enumerate(folds): K.clear_session() dh = DatasetHandler(domain) dataset_fold = dh.get_fold(ind_fold, preprocess_opt) model_path = os.path.join('log', domain, model_name, init_opt, preprocess_opt, fold, 'stage_5.h5') model = model_loader.load_full_model(model_name, no_cats=2) model.load_weights(model_path) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) guided_bprop = GuidedBackprop(model) for ind_image, image in enumerate( dataset_fold['test_data']): if domain == 'VL': image = np.dot(image[..., :3], [0.299, 0.587, 0.114]) image = image[:, :, np.newaxis] image = np.repeat(image, 3, axis=2) mask = guided_bprop.get_mask(image) mask = np.power(mask, 2) mask = np.sum(mask, axis=2) mask = np.sqrt(mask) mask -= np.min(mask) norm_max = np.max(mask) if norm_max == 0: norm_max = 1 mask /= norm_max mask *= 255 mask = np.uint8(mask) img = Image.fromarray(mask, 'L') im_name = dataset_fold['test_image_names'][ ind_image].split('.')[0] img.save(os.path.join(log_path, im_name + '.png'), 'PNG')
def _4training_evaluate_processing(self, dev_path): nr_processing_right = 0 nr_gold = 0 nr_processing = 0 for instance in DatasetHandler.read_dev_data(dev_path): unigrams, gold_tags = Segmentor._processing_one_segmented_WSAtom_instance2unigrams_and_tags( instance) predict_tags = Decoder.decode_for_predict(self.extractor, self.model, self.constrain, unigrams) gold_coor_seq = self.__innerfunc_4evaluate_generate_word_coordinate_sequence_from_tags( gold_tags) predict_coor_seq = self.__innerfunc_4evaluate_generate_word_coordinate_sequence_from_tags( predict_tags) cur_nr_gold, cur_nr_processing, cur_nr_processing_right = ( self. __innerfunc_4evaluate_get_nr_gold_and_processing_and_processing_right( gold_coor_seq, predict_coor_seq)) nr_gold += cur_nr_gold nr_processing += cur_nr_processing nr_processing_right += cur_nr_processing_right p, r, f = self.__innerfunc_4evaluate_calculate_prf( nr_gold, nr_processing, nr_processing_right) print >> sys.stderr, ( "Eval result :\np : %.2f%% r : %.2f%% f : %.2f%%\n" "total word num : %d total predict word num : %d predict right num : %d " ) % (p * 100, r * 100, f * 100, nr_gold, nr_processing, nr_processing_right) return f
def seg_train(args): if not DatasetHandler.is_readable(args.training_file): logging.error("path '%s' open failed !" % (args.training_file)) logging.error('Exit!') exit(1) if not DatasetHandler.is_readable(args.developing_file): logging.error("path '%s' open failed !" % (args.developing_file)) logging.error("Exit!") exit(1) if not DatasetHandler.is_writeable(args.model_saving): logging.error("path '%s' open failed !" % (args.model_saving)) logging.error('Exit!') exit(1) segmentor = Segmentor() segmentor.train(args.training_file, args.developing_file, args.model_saving, args.max_iter)
def seg_predict(args): if not DatasetHandler.is_readable(args.predict_file): logging.error("path '%s' open failed !" % (args.predict_file)) logging.error('Exit!') exit(1) if not DatasetHandler.is_readable(args.model_loading): logging.error("path '%s' open failed ! Model load Error ." % (args.model_loading)) logging.error("Exit!") exit(1) if not DatasetHandler.is_writeable( args.output_path) and args.output_path != "stdout": logging.error("path '%s' open failed !" % (args.output_path)) logging.error('Exit!') exit(1) segmentor = Segmentor() segmentor.predict(args.model_loading, args.predict_file, args.output_path)
def train(self , training_path , dev_path , model_saving_path , max_iter=None) : self._set_max_iter(max_iter) self.raw_training_data = DatasetHandler.read_training_data(training_path) self._build_inner_lexicon(threshold=0.9) self._processing_raw_training_data2unigrams_and_tags() self._build_extractor() self._build_constrain() self._build_decoder() self._build_training_model() self._training_processing( model_saving_path , dev_path)
def train(self, training_path, dev_path, model_saving_path, max_iter=None): self._set_max_iter(max_iter) self.raw_training_data = DatasetHandler.read_training_data( training_path) self._build_inner_lexicon(threshold=0.9) self._processing_raw_training_data2unigrams_and_tags() self._build_extractor() self._build_constrain() self._build_decoder() self._build_training_model() self._training_processing(model_saving_path, dev_path)
def evaluate(self, gold_path, predict_path): gold_ite = DatasetHandler.read_dev_data(gold_path) predict_ite = DatasetHandler.read_dev_data(predict_path) nr_processing = 0 nr_gold = 0 nr_processing_right = 0 nr_line = 0 while True: try: gold_instance = gold_ite.next() predict_instance = predict_ite.next() except StopIteration: break nr_line += 1 gold_unigrams, gold_tags = self._processing_one_segmented_WSAtom_instance2unigrams_and_tags( gold_instance) predict_unigrams, predict_tags = self._processing_one_segmented_WSAtom_instance2unigrams_and_tags( predict_instance) gold_coor_seq = self.__innerfunc_4evaluate_generate_word_coordinate_sequence_from_tags( gold_tags) predict_coor_seq = self.__innerfunc_4evaluate_generate_word_coordinate_sequence_from_tags( predict_tags) cur_nr_gold, cur_nr_processing, cur_nr_processing_right = ( self. __innerfunc_4evaluate_get_nr_gold_and_processing_and_processing_right( gold_coor_seq, predict_coor_seq)) nr_gold += cur_nr_gold nr_processing += cur_nr_processing nr_processing_right += cur_nr_processing_right p, r, f = self.__innerfunc_4evaluate_calculate_prf( nr_gold, nr_processing, nr_processing_right) print( "Eval result :\np : %.2f%% r : %.2f%% f : %.2f%%\n" "line num : %d total word num : %d total predict word num : %d predict right num : %d " ) % (p * 100, r * 100, f * 100, nr_line, nr_gold, nr_processing, nr_processing_right)
def _predict_processing(self , predict_path , output_path) : if isinstance(output_path , file) : output_f = output_path else : if output_path == "stdout" : output_f = sys.stdout else : output_f = open(output_path , "w") logging.info("set output %s " %(output_f.name)) logging.info("reading instance from %s . predicting ." %(predict_path)) for instance , separator_data in DatasetHandler.read_predict_data(predict_path) : self.constrain.set_constrain_data(separator_data) predict_tags = Decoder.decode_for_predict(self.extractor , self.model , self.constrain , instance) segmented_line = self._processing_unigrams_and_tags2segmented_line(instance,predict_tags) output_f.write("%s" %( "".join([segmented_line , os.linesep]) ) ) if output_f is not sys.stdout : output_f.close() logging.info("predicting done.")
def _4training_evaluate_processing(self , dev_path) : nr_processing_right = 0 nr_gold = 0 nr_processing = 0 for instance in DatasetHandler.read_dev_data(dev_path) : unigrams , gold_tags = Segmentor._processing_one_segmented_WSAtom_instance2unigrams_and_tags(instance) predict_tags = Decoder.decode_for_predict(self.extractor , self.model , self.constrain , unigrams) gold_coor_seq = self.__innerfunc_4evaluate_generate_word_coordinate_sequence_from_tags(gold_tags) predict_coor_seq = self.__innerfunc_4evaluate_generate_word_coordinate_sequence_from_tags(predict_tags) cur_nr_gold , cur_nr_processing , cur_nr_processing_right = ( self.__innerfunc_4evaluate_get_nr_gold_and_processing_and_processing_right(gold_coor_seq , predict_coor_seq) ) nr_gold += cur_nr_gold nr_processing += cur_nr_processing nr_processing_right += cur_nr_processing_right p , r , f = self.__innerfunc_4evaluate_calculate_prf(nr_gold , nr_processing , nr_processing_right) print >>sys.stderr , ("Eval result :\np : %.2f%% r : %.2f%% f : %.2f%%\n" "total word num : %d total predict word num : %d predict right num : %d ")%( p * 100 , r * 100, f * 100 , nr_gold , nr_processing , nr_processing_right ) return f
def _predict_processing(self, predict_path, output_path): if isinstance(output_path, file): output_f = output_path else: if output_path == "stdout": output_f = sys.stdout else: output_f = open(output_path, "w") logging.info("set output %s " % (output_f.name)) logging.info("reading instance from %s . predicting ." % (predict_path)) for instance, separator_data in DatasetHandler.read_predict_data( predict_path): self.constrain.set_constrain_data(separator_data) predict_tags = Decoder.decode_for_predict(self.extractor, self.model, self.constrain, instance) segmented_line = self._processing_unigrams_and_tags2segmented_line( instance, predict_tags) output_f.write("%s" % ("".join([segmented_line, os.linesep]))) if output_f is not sys.stdout: output_f.close() logging.info("predicting done.")
def main(): domains = ['IR', 'VL'] preprocess_methods = ['mean_subtraction', 'scaling'] init_methods = ['random', 'ImageNet'] model_names = ['ResNet50', 'VGG19'] stages = ['stage_1', 'stage_2', 'stage_3', 'stage_4', 'stage_5'] ResNet50_layer_names = [ 'max_pooling2d_1', 'activation_10', 'activation_22', 'activation_40', 'activation_49' ] VGG19_layer_names = [ 'block1_pool', 'block2_pool', 'block3_pool', 'block4_pool', 'block5_pool' ] out_dict = {} sampling_method = 'pca' for domain in domains: for preprocess_method in preprocess_methods: # load dataset dh = DatasetHandler(domain) dataset_all = dh.get_all(preprocess_method) for init_method in init_methods: for model_name in model_names: for ind_stage, stage in enumerate(stages): # load model keras.backend.clear_session() if init_method == 'random': model = model_loader.load_full_model( model_name, random_weights=True, no_cats=2, weight_decay=0.001) elif init_method == 'ImageNet': model = model_loader.load_full_model( model_name, random_weights=False, no_cats=2, weight_decay=0.001) # strip layers if model_name == 'ResNet50': end_layer = ResNet50_layer_names[ind_stage] elif model_name == 'VGG19': end_layer = VGG19_layer_names[ind_stage] model = keras.models.Model( inputs=model.input, outputs=model.get_layer(end_layer).output) feats = model.predict(dataset_all['data']) feats = np.reshape(feats, (dataset_all['data'].shape[0], -1)) if sampling_method == 'pca': pca = PCA(1024) feats = pca.fit_transform(feats) elif sampling_method == 'uniform': if feats.shape[1] > 16384: sample_indices = np.round( np.linspace(0, feats.shape[1] - 1, 16384)) feats = feats[:, sample_indices.astype(int)] name = domain + '_' + preprocess_method + '_' + init_method + '_' + model_name + '_' + stage out_dict[name + '_feats'] = feats out_dict[name + '_labels'] = dataset_all['labels'] savemat('feats_' + sampling_method + '.mat', out_dict, do_compression=True)
def main(): # initialize log file file_log = open(os.path.join(log_path, 'log.txt'), 'w') file_log.write(domain + ' - ' + str(ind_fold) + '\n') file_log.write(model_name + '\n') file_log.write(init_method + '\n') file_log.write(preprocess_method + '\n') # read dataset dh = DatasetHandler(domain) dataset_fold = dh.get_fold(ind_fold, preprocess_method) if init_method == 'random': model = model_loader.load_full_model(model_name, random_weights=True, no_cats=2, weight_decay=0.001) elif init_method == 'ImageNet': model = model_loader.load_full_model(model_name, random_weights=False, no_cats=2, weight_decay=0.001) # train the last layer accs = [] false_images = [] model = model_loader.set_trainable_layers(model, model_name, 'final') learning_rate = 0.1 for ind_iter in range(5): model = trainer.train_model(model, dataset_fold, learning_rate) learning_rate /= 2 acc, false_image = trainer.test_model(model, dataset_fold) accs.append(acc) false_images.append(false_image) model.save_weights(os.path.join(log_path, 'final_layer.h5')) # fine-tune stage 5 and onwards model = model_loader.set_trainable_layers(model, model_name, '5') learning_rate = 0.01 for ind_iter in range(5): model = trainer.train_model(model, dataset_fold, learning_rate) learning_rate /= 2 acc, false_image = trainer.test_model(model, dataset_fold) accs.append(acc) false_images.append(false_image) model.save_weights(os.path.join(log_path, 'stage_5.h5')) # record accuracies file_log.write('Final layer\n') file_log.write(str(accs[0]) + '\n') file_log.write('Stage 5\n') file_log.write(str(accs[1]) + '\n') # record falsely classified images file_log.write('Final layer\n') for fi in false_images[0]: file_log.write(fi + '\n') file_log.write('Stage 5\n') for fi in false_images[1]: file_log.write(fi + '\n') file_log.close()