def view_tongue_data_augment(): patient_tongue_dir = config['root_path'] + \ config['original_path'] + 'tongue_9585' tongue_zhiliao_path = config['root_path'] + \ config['original_path'] + 'tongue_zhiliao.list' yaopin_path = config['root_path'] + \ config['original_path'] + 'yaopin.vocab' tongue_ids, tongue_image_arrays, tongue_yaofangs, tongue_image_shape = patient_tongue_generator.loadDatafromFile( patient_tongue_dir, tongue_zhiliao_path, image_normal_size=(224, 224)) # fetch max(id) in yaopin.vocab as nb_yao with open(yaopin_path, 'r') as yaopin_file: nb_yao = max( int(line.split(' ')[0]) for line in yaopin_file.readlines()) total_tongue_x, total_y = tongue2text_gen.data_tensorization( tongue_image_arrays, tongue_yaofangs, tongue_image_shape, nb_yao) datagen = image_augment.image_augment_gen() augmented_x, augmented_y = image_augment.data_tensoration_augment( datagen, total_tongue_x, total_y) print(np.shape(augmented_x)) print(np.shape(augmented_y))
def tfidf_weights_test(): patient_tongue_dir = config['root_path'] + \ config['original_path'] + 'tongue_9585' tongue_zhiliao_path = config['root_path'] + \ config['original_path'] + 'tongue_zhiliao.list' yaopin_path = config['root_path'] + \ config['original_path'] + 'yaopin.vocab' tongue_ids, tongue_image_arrays, tongue_yaofangs, tongue_image_shape = patient_tongue_generator.loadDatafromFile( patient_tongue_dir, tongue_zhiliao_path, image_normal_size=(224, 224)) patient_tongue_dir = config['root_path'] + \ config['original_path'] + 'tongue_9585' tongue_zhiliao_path = config['root_path'] + \ config['original_path'] + 'tongue_zhiliao.list' yaofangs_corpus = tfidf.list2corpus(tongue_yaofangs) word, weight = tfidf.get_tf_idf(yaofangs_corpus) weight = list(weight) min = 999999 for i in range(len(weight)): for j in range(len(weight[i])): if weight[i][j] > 0.0 and weight[i][j] < min: min = weight[i][j] print(min)
def train_predict_tongue2text_basic_gen(train_new=True): ''' @param train_new: flag of train a new model model and replace the model on disk ''' patient_tongue_dir = config['root_path'] + \ config['original_path'] + 'tongue_9585' tongue_zhiliao_path = config['root_path'] + \ config['original_path'] + 'tongue_zhiliao.list' yaopin_path = config['root_path'] + \ config['original_path'] + 'yaopin.vocab' # tongue_ids: [01012045534615_1_4_7, ...] # tongue_image_array: [np.array(pixels matrix of image), np.array(pixels matrix of image2), ...] # tongue_yaofangs: [ [0,1,2,3], [4,5,6,7], ... ] tongue_ids, tongue_image_arrays, tongue_yaofangs, tongue_image_shape = patient_tongue_generator.loadDatafromFile( patient_tongue_dir, tongue_zhiliao_path, image_normal_size=(224, 224)) # fetch max(id) in yaopin.vocab as nb_yao with open(yaopin_path, 'r') as yaopin_file: nb_yao = max( int(line.split(' ')[0]) for line in yaopin_file.readlines()) # _use_data_augment = True # set for use image data augment, can only be use on service 225 with big memory _use_data_augment = False ''' The part of train a new gen_model and storage it on disk, the new one will cover the old one ''' # store keras layers_framework(optional, gen_frame_path==None or not) frame_name = 'tongue2text_cnnmlp_9585_act(bi)_t3_100it.json' gen_frame_path = config['root_path'] + \ config['cache_path'] + 'keras/' + frame_name train_on_batch = False # switch train_on_batch or not if train_new == True: _ = patient_tongue_generator.tongue_basic_gen_trainer( tongue_image_arrays, tongue_yaofangs, tongue_image_shape, nb_yao, gen_model_path=gen_frame_path, train_on_batch=train_on_batch, use_data_augment=_use_data_augment) ''' The part of load a trained gen_model from disk, the trained gen_model will be reload and use to eval and predict directly, without retraining which is for time saving ''' trained_gen_model = tongue2text_gen.loadStoredModel(gen_frame_path, gen_frame_path.replace( '.json', '.h5'), compile_info={ 'recompile': True, 'aux_output': False, 'use_tfidf_tensor': False }) # test # gen_output: [ [0.8, 0.4., ...], [...], [...], ... ] gen_output = patient_tongue_generator.basic_gen_predictor_test( tongue_image_arrays, tongue_yaofangs, tongue_image_shape, nb_yao, trained_gen_model) # print(gen_output[0]) # yaopin_dict: {0:'麻黄',1:'桂枝',...} yaopin_dict = patient_tongue_generator.load_yaopin_dict(yaopin_path) # print(yaopin_dict) # test_tongue_ids = tongue_ids[: 500] # test_yaofangs = tongue_yaofangs[: 500] # test_tongue_ids = tongue_ids[2000 : 2500] # test_yaofangs = tongue_yaofangs[2000 : 2500] # test_tongue_ids = tongue_ids[4000 : 4500] # test_yaofangs = tongue_yaofangs[4000 : 4500] # test_tongue_ids = tongue_ids[6000 : 6500] # test_yaofangs = tongue_yaofangs[6000 : 6500] test_tongue_ids = tongue_ids[len(tongue_ids) - 500:] test_yaofangs = tongue_yaofangs[len(tongue_yaofangs) - 500:] '''the evaluation criterion ''' precisions = [] recalls = [] errors = [] for i, output in enumerate(gen_output): # print test data label info: print('%d. \npatient tongue id: %s' % (i, test_tongue_ids[i])) print('label yaofang:') yaofang_label = patient_tongue_generator.sample_yaofang( test_yaofangs[i], yaopin_dict) print(' '.join(yaofang_label)) # output_index = patient_tongue_generator.dynamic_threshold_outputfilter(output) output_index = patient_tongue_generator.threshold_outputfilter(output) # print('predicted yaofang ids: {0}'.format(output_index)) yaofang_output = patient_tongue_generator.sample_yaofang( output_index, yaopin_dict) print('predicted yaofang:') print(' '.join(yaofang_output) + '\n') precision, recall, error = generator_eval.evaluator( test_yaofangs[i], output_index) precisions.append(precision) recalls.append(recall) errors.append(error) print('------Score: precision: %f, recall: %f, error: %f' % (precision, recall, error)) print( '------Average Score: average precision: %f, average recall: %f, error: %f' % (np.average(precisions), np.average(recalls), np.average(errors)))
def train_predict_tongue2text_cnn2_withlda_gen(train_new=True): ''' @param train_new: flag of train a new model model and replace the model on disk ''' patient_tongue_dir = config['root_path'] + \ config['original_path'] + 'tongue_9585' tongue_zhiliao_path = config['root_path'] + \ config['original_path'] + 'tongue_zhiliao.list' yaopin_path = config['root_path'] + \ config['original_path'] + 'yaopin.vocab' tongue_ids, tongue_image_arrays, tongue_yaofangs, tongue_image_shape = patient_tongue_generator.loadDatafromFile( patient_tongue_dir, tongue_zhiliao_path, image_normal_size=(224, 224)) # fetch max(id) in yaopin.vocab as nb_yao with open(yaopin_path, 'r') as yaopin_file: nb_yao = max( int(line.split(' ')[0]) for line in yaopin_file.readlines()) # _use_tfidf_tensor = True # set for use tfidf_tensor _use_tfidf_tensor = False # _use_data_augment = True # set for use image data augment, can only be use on service 225 with big memory _use_data_augment = False ''' TODO: storage model and load it from disk The part of train a new gen_model with lda and storage it on disk, the new one will cover the old one ''' lda_model_name = 'tongue_9585_gensim_lda.topic' lda_model_path = config['root_path'] + \ config['cache_path'] + 'nlp/' + lda_model_name # _lda_replace = True # first time is True, other is False if not needed _lda_replace = False if _use_tfidf_tensor == True: frame_name = 'tongue2text_cnn2passmlp_lda_9585_act(tfidf)_t3_100it.json' else: frame_name = 'tongue2text_cnn2passmlp_lda_9585_act(bi)_t3_100it.json' gen_frame_path = config['root_path'] + \ config['cache_path'] + 'keras/' + frame_name if train_new == True: _ = patient_tongue_generator.tongue_gen_withlda_trainer( tongue_image_arrays, tongue_yaofangs, tongue_image_shape, nb_yao, lda_model_path, gen_model_path=gen_frame_path, lda_replace=_lda_replace, use_tfidf_tensor=_use_tfidf_tensor, use_data_augment=_use_data_augment) ''' The part of load a trained gen_model from disk, the trained gen_model will be reload and use to eval and predict directly, without retraining which is for time saving ''' trained_gen_model = tongue2text_gen.loadStoredModel(gen_frame_path, gen_frame_path.replace( '.json', '.h5'), compile_info={ 'recompile': True, 'aux_output': True, 'use_tfidf_tensor': _use_tfidf_tensor }) # test # gen_output: [ [0.8, 0.4., ...], [...], [...], ... ] gen_output_list = patient_tongue_generator.gen_withlda_predictor_test( tongue_image_arrays, tongue_yaofangs, tongue_image_shape, nb_yao, trained_gen_model, lda_model_path, use_tfidf_tensor=_use_tfidf_tensor) # just get the gen_output, dropout the aux_output gen_output = gen_output_list[0] del (gen_output_list) # print(gen_output[0]) # yaopin_dict: {0:'麻黄',1:'桂枝',...} yaopin_dict = patient_tongue_generator.load_yaopin_dict(yaopin_path) # print(yaopin_dict) # test_tongue_ids = tongue_ids[: 500] # test_yaofangs = tongue_yaofangs[: 500] # test_tongue_ids = tongue_ids[2000 : 2500] # test_yaofangs = tongue_yaofangs[2000 : 2500] # test_tongue_ids = tongue_ids[4000 : 4500] # test_yaofangs = tongue_yaofangs[4000 : 4500] # test_tongue_ids = tongue_ids[6000 : 6500] # test_yaofangs = tongue_yaofangs[6000 : 6500] test_tongue_ids = tongue_ids[len(tongue_ids) - 500:] test_yaofangs = tongue_yaofangs[len(tongue_yaofangs) - 500:] '''the evaluation criterion ''' precisions = [] recalls = [] errors = [] for i, output in enumerate(gen_output): # print test data label info: print('%d. \npatient tongue id: %s' % (i, test_tongue_ids[i])) print('label yaofang:') yaofang_label = patient_tongue_generator.sample_yaofang( test_yaofangs[i], yaopin_dict) print(' '.join(yaofang_label)) # output_index = patient_tongue_generator.dynamic_threshold_outputfilter(output) output_index = patient_tongue_generator.threshold_outputfilter(output) # print('predicted yaofang ids: {0}'.format(output_index)) yaofang_output = patient_tongue_generator.sample_yaofang( output_index, yaopin_dict) print('predicted yaofang:') print(' '.join(yaofang_output) + '\n') precision, recall, error = generator_eval.evaluator( test_yaofangs[i], output_index) precisions.append(precision) recalls.append(recall) errors.append(error) print('------Score: precision: %f, recall: %f, error: %f' % (precision, recall, error)) print( '------Average Score: average precision: %f, average recall: %f, error: %f' % (np.average(precisions), np.average(recalls), np.average(errors)))
def train_predict_tongue2text_sklearn_gen(step=0): ''' @param step: 0: keras train, 1: load keras model train sk_classifier and test ''' patient_tongue_dir = config['root_path'] + \ config['original_path'] + 'tongue_9585' tongue_zhiliao_path = config['root_path'] + \ config['original_path'] + 'tongue_zhiliao.list' yaopin_path = config['root_path'] + \ config['original_path'] + 'yaopin.vocab' # tongue_ids: [01012045534615_1_4_7, ...] # tongue_image_array: [np.array(pixels matrix of image), np.array(pixels matrix of image2), ...] # tongue_yaofangs: [ [0,1,2,3], [4,5,6,7], ... ] tongue_ids, tongue_image_arrays, tongue_yaofangs, tongue_image_shape = patient_tongue_generator.loadDatafromFile( patient_tongue_dir, tongue_zhiliao_path, image_normal_size=(224, 224)) # fetch max(id) in yaopin.vocab as nb_yao with open(yaopin_path, 'r') as yaopin_file: nb_yao = max(int(line.split(' ')[0]) for line in yaopin_file.readlines()) frame_name = 'test_tongue2text_cnn2mlp_9585_act(bi)_t3_100it.json' if step == 0: ''' The part of train a new sklearn_gen_model @todo: need to storage keras feature scratch and sklearn generator on disk together(with same type names) ''' trained_gen_model = patient_tongue_generator.tongue_sklearn_gen_keras_trainer( tongue_image_arrays, tongue_yaofangs, tongue_image_shape, nb_yao) # store keras layers_framework(optional) gen_frame_path = config['root_path'] + \ config['cache_path'] + 'keras/' + frame_name tongue2text_sklearn_gen.storageKerasModel( model=trained_gen_model, frame_path=gen_frame_path) if step == 1: ''' The part of load keras model from disk first and train the sklearn classifier ''' gen_frame_path = config['root_path'] + \ config['cache_path'] + 'keras/' + frame_name gen_record_path = gen_frame_path.replace('.json', '.h5') print('load keras model from disk...') trained_tongue_gen_model = tongue2text_sklearn_gen.loadStoredKerasModel( gen_frame_path, gen_record_path, recompile=True) trained_tongue_gen_classifier = patient_tongue_generator.tongue_sklearn_gen_sk_trainer( tongue_image_arrays, tongue_yaofangs, tongue_image_shape, nb_yao, trained_tongue_gen_model) ''' The part of test trained sklearn classifier-generator ''' _proba_predict = True # set the output_type of sklearn classifier-generator(proba or not) gen_output = patient_tongue_generator.sklearn_gen_predictor_test( tongue_image_arrays, tongue_yaofangs, tongue_image_shape, nb_yao, trained_tongue_gen_model, trained_tongue_gen_classifier, proba_predict=_proba_predict) print(gen_output[0]) # yaopin_dict: {0:'麻黄',1:'桂枝',...} yaopin_dict = patient_tongue_generator.load_yaopin_dict(yaopin_path) # print(yaopin_dict) test_tongue_ids = tongue_ids[:200] test_yaofangs = tongue_yaofangs[:200] '''the evaluation criterion ''' precisions = [] recalls = [] errors = [] for i, output in enumerate(gen_output): # print test data label info: print('%d. \npatient tongue id: %s' % (i, test_tongue_ids[i])) print('label yaofang:') yaofang_label = patient_tongue_generator.sample_yaofang( test_yaofangs[i], yaopin_dict) print(' '.join(yaofang_label)) if _proba_predict == False: output_index = patient_tongue_generator.label_outputfilter( output) else: output_index = patient_tongue_generator.threshold_outputfilter( output) # print('predicted yaofang ids: {0}'.format(output_index)) yaofang_output = patient_tongue_generator.sample_yaofang( output_index, yaopin_dict) print('predicted yaofang:') print(' '.join(yaofang_output) + '\n') precision, recall, error = generator_eval.evaluator( test_yaofangs[i], output_index) precisions.append(precision) recalls.append(recall) errors.append(error) print('------Score: precision: %f, recall: %f, error: %f' % (precision, recall, error)) print('------Average Score: average precision: %f, average recall: %f, error: %f' % (np.average(precisions), np.average(recalls), np.average(errors)))