def test(): # Construct a Test_CNN_NET obj. text_cnn = Test_CNN_NET(Text_train.config) with tf.Session() as sess: # Load the model. saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state('./old_model/') saver.restore(sess, ckpt.model_checkpoint_path) # Get sentences and convert them into vector for i in range(text_cnn.config.test_steps): num = i # Load training data and labels content_file = open('./jieba_treat_test/' + str(num) + '.txt', 'r') label_file = open('./jieba_treat_test/' + str(num) + '_l.txt', 'r') all_sentence_words = raw.get_all_words(content_file) embeddings = raw.get_embeddings(all_sentence_words) embeddings = np.array(embeddings) labels = raw.get_labels(label_file) labels = np.array(labels) # Feed the data into the network feed_dict = { text_cnn.input_x: embeddings, text_cnn.input_label: labels, text_cnn.keep_prob: text_cnn.config.keep_prob } # Compute the loss and accuracy acc = sess.run(text_cnn.accuracy, feed_dict) print("step {}, acc {:g}".format(i, acc)) content_file.close() label_file.close()
def train_model(model, max_len, get_cross_validation=False, non_zero=False): """For the 0/1 segemation task, load data, compile, fit, evaluate model, and predict frame labels. Args: model: model name. max_len: the number of frames for each video. get_cross_validation: whether to cross validate. non_zero: whether to use the non-zero data. If true Returns: loss_mean: loss for this model. acc_mean: accuracy for classification model. classes: predications. Predication for all the videos is using cross validation. y_test: test ground truth. Equal to all labels if using cross validation.""" x = get_data.get_feature_tensor(feature_dir,feature_name,max_len) y = get_data.get_frame_01_labels(feature_dir,feature_name,max_len) y_video = get_data.get_labels(label_dir, label_name) y = np.array(y) print 'x', x.shape, 'y', y.shape np.set_printoptions(threshold='nan') if model == ED_TCN: n_nodes = [512, 512] #, 1024] pool_sizes = [2, 2] #, 2] conv_lens = [10, 10] #, 10] causal = False model = ED_TCN(n_nodes, pool_sizes, conv_lens, 2, 512, max_len, causal=causal, activation='norm_relu', optimizer='rmsprop') model.summary() loss = np.zeros((4)) acc = np.zeros((4)) classes = np.zeros((200,max_len, 2)) if get_cross_validation == False: if non_zero == True: x,labels_new, y = get_data.non_zero_data(x,y_video,max_len, y, use_y_frame=True) y_cat = np_utils.to_categorical(y,num_classes=2) y_cat = np.reshape(y_cat, (-1, max_len, 2)) x_train, x_test, y_train, y_test = cross_validation.train_test_split(x,y_cat,test_size=0.2, random_state=1) model.fit(x_train,y_train, validation_data=[x_test,y_test],epochs=5) loss_and_metrics = model.evaluate(x_test,y_test) loss_mean = loss_and_metrics[0] acc_mean = loss_and_metrics[1] classes = model.predict(x_test) elif get_cross_validation == True: y_cat = np_utils.to_categorical(y,num_classes=2) y_cat = np.reshape(y_cat, (200, max_len, 2)) x_train_cro, y_train_cro, x_test_cro, y_test_cro = train.set_cross_validation(x, y_cat) for i in range(4): print i model.fit(x_train_cro[i], y_train_cro[i],batch_size=20) loss_and_metrics = model.evaluate(x_test_cro[i], y_test_cro[i]) loss[i] = loss_and_metrics[0] acc[i] = loss_and_metrics[1] classes[i*50:(i+1)*50] = model.predict(x_test_cro[i]) loss_mean = np.mean(loss) acc_mean = np.mean(acc) y_test = y_cat print 'loss_mean: ', loss_mean, ' ', 'acc_mean: ', acc_mean return loss_mean, acc_mean, classes, y_test
def set_eval_data(self): eval_indices = self.generate_indices(self.TRAIN_SIZE, self.SAMPLES) self.eval_data = gd.get_melspectrograms(eval_indices) self.eval_labels = np.asarray(gd.get_labels(eval_indices)) if args.verbose: unique, counts = np.unique(NN.eval_labels, return_counts=True) print('Eval samples: {}, classes: {}'.format( NN.eval_labels.shape[0], dict(zip(unique, counts))))
def set_training_data(self): train_indices = self.generate_indices(0, self.TRAIN_SIZE) self.train_data = gd.get_melspectrograms(train_indices) self.train_labels = np.asarray(gd.get_labels(train_indices)) if args.verbose: unique, counts = np.unique(NN.train_labels, return_counts=True) print('Train samples: {}, classes: {}'.format( NN.train_labels.shape[0], dict(zip(unique, counts))))
def get_error_list(indices): import pandas csv = pandas.read_csv('./labels.csv', header=0) errors = [] for idx in indices: eval_results = NN.evaluate(gd.get_melspectrograms([idx]), np.asarray(gd.get_labels([idx]))) if eval_results['accuracy'] == 0.0: errors.append(csv['path'][idx]) return errors
def train_frame_model(model, y_categorical, max_len, get_cross_validation): """Load data, compile, fit, evaluate model, and predict labels. Args: model: model name. y_categorical: whether to use the original label or one-hot label. True for classification models. False for regression models. max_len: the number of frames for each video. get_cross_validation: whether to cross validate. Returns: classes: predications. Predication for all the videos is using cross validation. y_test: test ground truth. Equal to all labels if using cross validation.""" x = get_data.get_frame_labels(feature_dir, feature_name, max_len) y = get_data.get_labels(label_dir, label_name) y = np.array(y) if y_categorical == True: y = np_utils.to_categorical(y) print x.shape, y.shape model = frame_labels_classification(6, max_len) if get_cross_validation == True: loss = np.zeros((4)) acc = np.zeros((4)) classes = np.zeros((200, 6)) x_train_cro, y_train_cro, x_test_cro, y_test_cro = train.set_cross_validation( x, y) for i in range(3): model.fit(x_train_cro[i], y_train_cro[i], validation_data=[x_test_cro[i], y_test_cro[i]], epochs=5) loss_and_metrics = model.evaluate(x_test_cro[i], y_test_cro[i]) loss[i] = loss_and_metrics[0] acc[i] = loss_and_metrics[1] classes[i * 50:(i + 1) * 50] = model.predict(x_test_cro[i]) loss_mean = np.mean(loss) acc_mean = np.mean(acc) y_test = y elif get_cross_validation == False: x_train, x_test, y_train, y_test = cross_validation.train_test_split( x, y, test_size=0.2, random_state=1) model.fit(x_train, y_train, validation_data=[x_test, y_test], epochs=5) loss_mean, acc_mean = model.evaluate(x_test, y_test) classes = model.predict(x_test) return classes, y_test
def train(): # Construct a CNN_NET obj. text_cnn = CNN_NET(config.word_embedding_length, config.sentence_length, config.learning_rate, config.filter_size, config.num_class, config.regularization_rate) with tf.Session() as sess: # Define saver and constraint the num of models can save. saver = tf.train.Saver(max_to_keep=5) # Initialize all of the parameters. sess.run(tf.global_variables_initializer()) for i in range(config.num_epochs): print('epoch {}'.format(i)) for j in range(config.steps): num = j # Load training data and labels content_file = open('./jieba_treat/' + str(num) + '.txt', 'r') label_file = open('./jieba_treat/' + str(num) + '_l.txt', 'r') all_sentence_words = raw.get_all_words(content_file) embeddings = raw.get_embeddings(all_sentence_words) embeddings = np.array(embeddings) labels = raw.get_labels(label_file) labels = np.array(labels) # Feed the data into the network feed_dict = { text_cnn.input_x: embeddings, text_cnn.input_label: labels, text_cnn.keep_prob: config.keep_prob } # Compute the loss and accuracy loss, _, acc = sess.run( [text_cnn.loss, text_cnn.train_op, text_cnn.accuracy], feed_dict) loss_file.write(str(loss) + '\n') acc_file.write(str(acc) + '\n') print("step {}, loss {:g}, acc {:g}".format(j, loss, acc)) content_file.close() label_file.close() # Save models saver.save(sess, MODELSAVEPATH + 'epoch_' + str(i) + '.ckpt')
from itertools import cycle import matplotlib.pyplot as plt import numpy as np import feature_extraction, get_data from sklearn.neural_network import MLPClassifier from sklearn.preprocessing import label_binarize # setup plot details colors = cycle(['navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal']) X = feature_extraction.get_tags()[:5240] y = get_data.get_labels() classifier = MLPClassifier() classifier.fit(X, y) y_score = classifier.predict(X) # from sklearn.metrics import average_precision_score # average_precision = average_precision_score(get_data.get_validation_labels(), y_score) # # print('Average precision-recall score: {0:0.2f}'.format( # average_precision)) from sklearn.metrics import precision_recall_curve from sklearn.metrics import plot_precision_recall_curve disp = plot_precision_recall_curve(classifier, get_data.get_validation_features(), get_data.get_validation_labels) # disp.ax_.set_title('2-class Precision-Recall curve: ' # 'AP={0:0.2f}'.format(average_precision))
from sklearn.feature_extraction.text import CountVectorizer import pandas as pd from sklearn.preprocessing import StandardScaler from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer import get_data pd.set_option('display.max_rows', None) df_features = get_data.get_features() df_labels = get_data.get_labels() df_features_validate = get_data.get_validation_features() df_labels_validate = get_data.get_validation_labels() df_features_test = get_data.get_test_features() # Concatenate datasets dataset_whole = pd.concat( [df_features, df_features_validate, df_features_test]) # Fix errors dataset_whole['title'] = dataset_whole['title'].fillna('N/A') def remove_errors(self): if type(self) == str and self.isdigit() == False: return 0 else: return self dataset_whole['year'] = dataset_whole['year'].apply(remove_errors).apply(int)
import os import datetime import keras import pandas as pd from get_data import get_generators, get_labels, get_class_weights from model import model from utils import show_batch, confusion_matrix_callback import consts as C if __name__ == "__main__": labels_columns = get_labels().columns train_generator, validation_generator = get_generators() # imgs, labels = train_generator[0] # show_batch(imgs,labels,labels_columns) print("start training") # -- callbacks -- log_dir = r"C:\Users\User\PycharmProjects\PlantPathology\logs\fit\\" + datetime.datetime.now( ).strftime("%Y%m%d-%H%M%S") + C.MODEL os.mkdir(log_dir) csv_logger = keras.callbacks.CSVLogger(log_dir + '\\training.log') tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0) reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=C.MINIMUM_LR, verbose=True)
def train_model(model, y_categorical, max_len, get_cross_validation=False, non_zero=False): """Load data, compile, fit, evaluate model, and predict labels. Args: model: model name. y_categorical: whether to use the original label or one-hot label. True for classification models. False for regression models. max_len: the number of frames for each video. get_cross_validation: whether to cross validate. non_zero: whether to use the non-zero data. If true Returns: loss_mean: loss for this model. acc_mean: accuracy for classification model. classes: predications. Predication for all the videos is using cross validation. y_test: test ground truth. Equal to all labels if using cross validation.""" # for label_numer = 'OPR', labels are [0,1,2,3,4,5] n_classes = 6 x = get_data.get_feature_tensor(feature_dir, feature_name, max_len) y = get_data.get_labels(label_dir, label_name) if non_zero == True: x, y = get_data.non_zero_data(x, y, max_len, y) if y_categorical == True: y = np_utils.to_categorical(y) y = np.array(y) print 'x', x.shape, 'y', y.shape # choose model if model == TK_TCN_regression: model = TK_TCN_regression(n_classes=n_classes, feat_dim=512, max_len=max_len) model.compile(loss='mean_absolute_error', optimizer='sgd', metrics=['accuracy']) else: if model == TK_TCN_resnet: model = TK_TCN_resnet(n_classes=n_classes, feat_dim=512, max_len=max_len) elif model == TCN_V1: model = TCN_V1(n_classes=n_classes, feat_dim=512, max_len=max_len) elif model == TCN_V2: model = TCN_V2(n_classes=n_classes, feat_dim=512, max_len=max_len) elif model == TCN_V3: model = TCN_V3(n_classes=n_classes, feat_dim=512, max_len=max_len) elif model == TCN_V4: model = TCN_V4(n_classes=n_classes, feat_dim=512, max_len=max_len) elif model == TCN_V5: model = TCN_V5(n_classes=n_classes, feat_dim=512, max_len=max_len) # compile model optimizer = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['categorical_accuracy']) # model.compile(loss='mean_absolute_error', optimizer=optimizer,metrics=['categorical_accuracy']) # visualize # model.summary() if get_cross_validation == True: loss = np.zeros((4)) acc = np.zeros((4)) classes = np.zeros((200, n_classes)) x_train_cro, y_train_cro, x_test_cro, y_test_cro = set_cross_validation( x, y) for i in range(3): model.fit(x_train_cro[i], y_train_cro[i], validation_data=[x_test_cro[i], y_test_cro[i]], epochs=5) loss_and_metrics = model.evaluate(x_test_cro[i], y_test_cro[i]) loss[i] = loss_and_metrics[0] acc[i] = loss_and_metrics[1] classes[i * 50:(i + 1) * 50] = model.predict(x_test_cro[i]) loss_mean = np.mean(loss) acc_mean = np.mean(acc) y_test = y elif get_cross_validation == False: x_train, x_test, y_train, y_test = cross_validation.train_test_split( x, y, test_size=0.2, random_state=1) model.fit(x_train, y_train, validation_data=[x_test, y_test], epochs=5) loss_mean, acc_mean = model.evaluate(x_test, y_test) classes = model.predict(x_test) return loss_mean, acc_mean, classes, y_test