def q5b2(): logger.log('Analyzing Q5(b2)...') compare = { 'acc_converge': { 3: acc_converge_epoch(histories['Q4']['optimal']['val_accuracy']), 4: acc_converge_epoch(histories['Q5']['val_accuracy']) }, 'loss_converge': { 3: loss_converge_epoch(histories['Q4']['optimal']['val_loss']), 4: loss_converge_epoch(histories['Q5']['val_loss']) }, 'final_acc': { 3: training_result(histories['Q4']['optimal']['val_accuracy']), 4: training_result(histories['Q5']['val_accuracy']) }, 'final_loss': { 3: training_result(histories['Q4']['optimal']['val_loss'], mode='loss'), 4: training_result(histories['Q5']['val_loss'], mode='loss') } } compare_df = pd.DataFrame(compare) compare_df.to_csv('result/AQ5_result.csv') logger.log('Saved result to \"result/AQ5_result.csv\"')
def compare(): logger.log('Compare all features, 6 features and 5 features') mse_all = hist1['Q1']['a']['val_mse'] best_6 = val_mse[[c for c in df.columns if len(c.split(",")) == 1]].idxmin() mse_6 = hist2['Q2'][best_6]['val_mse'] best_5 = val_mse[[c for c in df.columns if len(c.split(",")) == 2]].idxmin() mse_5 = hist2['Q2'][best_5]['val_mse'] f, ax = plt.subplots(figsize=(10, 5)) plt.plot(mse_all, label='all features') plt.plot(mse_6, label='6 features') plt.plot(mse_5, label='5 features') plt.xlabel('epoch', fontsize=18) plt.ylabel('mean squared error', fontsize=18) plt.legend(loc='upper right', fontsize=18) plt.title('MSE for Different Numbers of Input Features', fontsize=20, pad=20) plt.xlim(1000, 5000) plt.ylim(0.0058, 0.010) plt.xticks(fontsize=12, wrap=True) plt.yticks(fontsize=12, wrap=True) plt.tight_layout() plt.savefig('result/BQ2_compare.png') logger.log('Saved result to \"result/BQ2_compare.png\"') final = { 'mse_converge': { "all_features": loss_converge_epoch(mse_all), "6_features": loss_converge_epoch(mse_6), "5_features": loss_converge_epoch(mse_5) }, 'final_mse': { "all_features": training_result(mse_all, mode='loss'), "6_features": training_result(mse_6, mode='loss'), "5_features": training_result(mse_5, mode='loss') } } final_df = pd.DataFrame(final) final_df.to_csv('result/BQ2_result.csv') logger.log('Saved result to \"result/BQ2_result.csv\"') # update hyperparameter result = min(final['final_mse'].keys(), key=lambda x: final['final_mse'][x]) if result == 'all_features': removed = [] elif result == '6_features': removed = list(map(int, best_6.split(","))) elif result == '5_features': removed = list(map(int, best_5.split(","))) hyperparameters['input_shape'] = (len(columns) - len(removed), ) hyperparameters['removed'] = removed write_dict_to_json(hyperparameters, args.params)
def compare(): df = pd.DataFrame(histories['Q3']) val_mse = df[df.index == 'val_mse'].apply( lambda x: x.explode()).reset_index(drop=True) f, ax = plt.subplots(1, 1, figsize=(15, 5)) val_mse.plot() plt.xlabel('epoch', fontsize=15) plt.ylabel('mean squared error', fontsize=15) plt.legend(loc='upper right', fontsize=12) plt.title('MSE for Different Models', fontsize=15, pad=20) plt.xlim(500, 10000) plt.ylim(0.0055, 0.0115) plt.xticks(fontsize=12) plt.yticks(fontsize=12) plt.tight_layout() plt.savefig('result/BQ3_compare.png') logger.log('Saved result to \"result/BQ3_compare.png\"') df.applymap(lambda x: training_result(x, mode='loss')).to_csv( 'result/BQ3_compare.csv') logger.log('Saved result to \"result/BQ3_compare.csv\"') df.applymap(lambda x: loss_converge_epoch(x)).to_csv( 'result/BQ3_converge_epoch.csv') logger.log('Saved result to \"result/BQ3_converge_epoch.csv\"')
def rfe(n_to_remove, removed=[]): if n_to_remove == 0: return logger.log('RFE %s' % (n_to_remove)) results = dict() for i in range(X_train.shape[1]): if i in removed: continue logger.log('Removed %s' % (removed + [i])) model = tf.keras.Sequential([ tf.keras.layers.InputLayer(input_shape=(X_train.shape[1] - len(removed) - 1, )), tf.keras.layers.Dense( num_neurons, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l=beta)), tf.keras.layers.Dense( 1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(l=beta)) ]) model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=alpha), loss=tf.keras.losses.MeanSquaredError(), metrics=['mse']) with tqdm(total=epochs) as pbar: update = tf.keras.callbacks.LambdaCallback( on_epoch_end=lambda batch, logs: pbar.update(1)) hist = model.fit(x=remove_features(X_train, removed + [i]), y=y_train, batch_size=batch_size, epochs=epochs, validation_data=(remove_features( X_test, removed + [i]), y_test), verbose=0, callbacks=[update]) key = ",".join(map(str, removed + [i])) histories['Q2'][key] = { 'mse': hist.history['mse'], 'val_mse': hist.history['val_mse'], 'loss': hist.history['loss'], 'val_loss': hist.history['val_loss'] } results[i] = training_result(hist.history['val_mse'], mode='loss') i = min(results.keys(), key=lambda x: results[x]) logger.log('Feature %s is removed!' % (i)) rfe(n_to_remove - 1, removed + [i])
def Q2(x_train, y_train, x_test, y_test): logger.log('Start Q2') num_ch_c1 = [10, 30, 50, 70, 90] num_ch_c2 = [20, 40, 60, 80, 100] epochs = EPOCHS batch_size = BATCH_SIZE learning_rate = LR use_dropout = False histories = {c1: {c2: None for c2 in num_ch_c2} for c1 in num_ch_c1} for c1 in num_ch_c1: for c2 in num_ch_c2: logger.log(f'Train C1={c1} C2={c2}') model = make_model(c1, c2, use_dropout) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) optimizer_ = 'SGD' optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate) model.compile(optimizer=optimizer, loss=loss, metrics='accuracy') with tqdm(total=epochs) as pbar: update = tf.keras.callbacks.LambdaCallback( on_epoch_end=lambda batch, logs: pbar.update(1) ) logs = tf.keras.callbacks.CSVLogger( f'./logs/{c1}_{c2}_{optimizer_}_no_dropout', separator=',', append=False ) history = model.fit( x_train, y_train, batch_size=batch_size, epochs=epochs, shuffle=True, validation_data=(x_test, y_test), verbose=0, callbacks=[update, logs] ) model.save(f'./models/{c1}_{c2}_{optimizer_}_no_dropout.h5') histories[c1][c2] = history.history['val_accuracy'] hist_df = pd.DataFrame(histories) hist_df = hist_df.applymap(lambda x: training_result(x, mode='acc')) hist_df.to_csv('./results/c1c2_test_accuracy.csv', header=True, index=True) opt_c1 = hist_df.max().idxmax() opt_c2 = hist_df[opt_c1].idxmax() logger.log(f'Best C1={opt_c1} C2={opt_c2}') logger.end('Done Q2') return opt_c1, opt_c2
required=True) args = parser.parse_args() logger = Logger() logger.log('Starting q2_analyze.py...') logger.log('Loading \"' + args.data[0] + '\" and \"' + args.data[1] + '\"') hist1 = read_json_to_dict(args.data[0]) hist2 = read_json_to_dict(args.data[1]) # Hyperparameters hyperparameters = read_json_to_dict(args.params) # Setup data to be used df = pd.DataFrame( hist2['Q2']).applymap(lambda x: training_result(x, mode='loss')) columns = hist2['columns'] def decode_col_name(col): idx = list(map(int, col.split(","))) removed_col = [columns[i] for i in idx] remaining_cols = list(set(columns) - set(removed_col)) return ", ".join(remaining_cols) val_mse = df[df.index == 'val_mse'].squeeze() def corr_coef(): logger.log('Plotting correlation coefficient of features...')
from utils.dict_json import read_json_to_dict from utils.acc_loss import acc_converge_epoch, loss_converge_epoch, training_result parser = argparse.ArgumentParser() parser.add_argument('-D', '--data', help='Path to result json file', required=True) args = parser.parse_args() logger = Logger() logger.log('Starting q2_analyze.py...') logger.log('Loading \"' + args.data + '\"') histories = read_json_to_dict(args.data) batch_epoch = pd.DataFrame(histories['Q2']['cv']['accuracy']) time_per_epoch = pd.DataFrame(histories['Q2']['cv']['time']) epoch_to_converge = batch_epoch.applymap(lambda x: acc_converge_epoch(x)) cv_accuracy = batch_epoch.applymap(lambda x: training_result(x)) total_time_to_converge = (epoch_to_converge * time_per_epoch).mean() print(epoch_to_converge) epoch_to_converge = epoch_to_converge.mean() print(cv_accuracy) cv_accuracy = cv_accuracy.mean() batch_size = histories['Q2']['optimal']['optimal_batch'] def q2a1(): logger.log('Analyzing Q2(a1)...') f, ax = plt.subplots(5, 1, figsize=(10, 25)) for i, batch in enumerate(batch_epoch.columns, start=0): ax[i].plot(batch_epoch[batch][0][1:], label='Fold 1') ax[i].plot(batch_epoch[batch][1][1:], label='Fold 2')
def cross_validation(): input_shape = hyperparameters['input_shape'] num_classes = hyperparameters['num_classes'] epochs = 500 batch_sizes = [4, 8, 16, 32, 64] num_neurons = hyperparameters['num_neurons'] alpha = hyperparameters['alpha'] histories['Q2'] = { 'cv': { 'accuracy': {batch: [] for batch in batch_sizes}, 'time': {batch: [] for batch in batch_sizes}, }, 'optimal': dict() } logger.log('Starting cross validation...') X, y = dataset.get_train() X_test, y_test = dataset.get_test() for fold, (train_index, valid_index) in enumerate(dataset.get_kfold(), start=1): X_train, X_valid = X[train_index], X[valid_index] y_train, y_valid = y[train_index], y[valid_index] for batch in batch_sizes: logger.log('Fold %s Batch Size %s' % (fold, batch)) with tqdm(total=epochs, desc='Fold %s Batch Size %s' % (fold, batch)) as pbar: update = tf.keras.callbacks.LambdaCallback( on_epoch_end=lambda batch, logs: pbar.update(1) ) model = tf.keras.Sequential([ tf.keras.layers.InputLayer(input_shape=input_shape), tf.keras.layers.Dense(num_neurons, activation='relu'), tf.keras.layers.Dense(num_classes, activation='softmax') ]) model.compile( optimizer=tf.keras.optimizers.SGD(learning_rate=alpha), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics=['accuracy'] ) start = time.time() history = model.fit( x=X_train, y=y_train, batch_size=batch, epochs=epochs, validation_data=(X_valid, y_valid), verbose=0, callbacks=[update] ) end = time.time() histories['Q2']['cv']['accuracy'][batch].append(history.history['val_accuracy']) histories['Q2']['cv']['time'][batch].append((end - start) / epochs) batch_epoch = pd.DataFrame(histories['Q2']['cv']['accuracy']) time_per_epoch = pd.DataFrame(histories['Q2']['cv']['time']) epoch_to_converge = batch_epoch.applymap(lambda x: acc_converge_epoch(x)) cv_accuracy = batch_epoch.applymap(lambda x: training_result(x)).mean() total_time_to_converge = (epoch_to_converge * time_per_epoch).mean() def standardize(arr): return (arr - arr.mean()) / arr.std() def deciding_factor(total_time, acc): time_score = np.exp(-standardize(total_time)) acc_score = np.exp(standardize(acc)) print(acc_score * time_score) return int((acc_score * time_score).idxmax()) hyperparameters['batch_size'] = deciding_factor(total_time_to_converge, cv_accuracy) logger.log('Optimal batch size: %s' % hyperparameters['batch_size']) write_dict_to_json(hyperparameters, args.params) logger.log('Done cross validation')
def cross_validation(): input_shape = hyperparameters['input_shape'] num_classes = hyperparameters['num_classes'] epochs = 500 batch_size = hyperparameters['batch_size'] num_neurons = hyperparameters['num_neurons'] alpha = hyperparameters['alpha'] beta = [0, 1e-3, 1e-6, 1e-9, 1e-12] histories['Q4'] = { 'cv': { 'accuracy': {b: [] for b in beta}, 'time': {b: [] for b in beta}, }, 'optimal': dict() } logger.log('Starting cross validation...') X, y = dataset.get_train() X_test, y_test = dataset.get_test() for fold, (train_index, valid_index) in enumerate(dataset.get_kfold(), start=1): X_train, X_valid = X[train_index], X[valid_index] y_train, y_valid = y[train_index], y[valid_index] for b in beta: logger.log('Fold %s Decay %s' % (fold, b)) with tqdm(total=epochs, desc='Fold %s Decay %s' % (fold, b)) as pbar: update = tf.keras.callbacks.LambdaCallback( on_epoch_end=lambda batch, logs: pbar.update(1) ) model = tf.keras.Sequential([ tf.keras.layers.InputLayer(input_shape=input_shape), tf.keras.layers.Dense(num_neurons, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l=b)), tf.keras.layers.Dense(num_classes, activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(l=b)) ]) model.compile( optimizer=tf.keras.optimizers.SGD(learning_rate=alpha), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics=['accuracy'] ) start = time.time() history = model.fit( x=X_train, y=y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid, y_valid), verbose=0, callbacks=[update] ) end = time.time() histories['Q4']['cv']['accuracy'][b].append(history.history['val_accuracy']) histories['Q4']['cv']['time'][b].append((end - start) / epochs) beta_epoch = pd.DataFrame(histories['Q4']['cv']['accuracy']) cv_accuracy = beta_epoch.applymap(lambda x: training_result(x)).mean() hyperparameters['beta'] = float(cv_accuracy.idxmax()) write_dict_to_json(hyperparameters, args.params) logger.log('Done cross validation')
def rfe(n_features, features_left=[]): if len(features_left) <= n_features: return logger.log('RFE %s' % (len(features_left) - 1)) results = dict() for i, col in enumerate(features_left): columns = list(features_left[0:i]) + list( features_left[i + 1:len(features_left)]) dataset = PreprocessDataset(df=df, feature_columns=columns, label_column=df.columns[-1], test_ratio=0.3, fold=5) X_train, y_train = dataset.get_train() X_test, y_test = dataset.get_test() logger.log('Features %s' % (", ".join(columns))) model = tf.keras.Sequential([ tf.keras.layers.InputLayer(input_shape=(len(columns), )), tf.keras.layers.Dense( num_neurons, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l=beta)), tf.keras.layers.Dense( 1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(l=beta)) ]) model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=alpha), loss=tf.keras.losses.MeanSquaredError(), metrics=['mse']) with tqdm(total=epochs) as pbar: update = tf.keras.callbacks.LambdaCallback( on_epoch_end=lambda batch, logs: pbar.update(1)) hist = model.fit(x=X_train, y=y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test), verbose=0, callbacks=[update]) key = ", ".join(columns) histories['Q2'][key] = { 'mse': hist.history['mse'], 'val_mse': hist.history['val_mse'], 'loss': hist.history['loss'], 'val_loss': hist.history['val_loss'] } results[key] = training_result(hist.history['val_mse'], mode='loss') cols = min(results.keys(), key=lambda x: results[x]) logger.log('Features left %s' % (cols)) rfe(n_features, cols.split(", "))
def Q5(): models = { 'char_cnn_Adam_no_dropout': 'Char CNN No Dropout', 'char_gru_Adam_no_dropout': 'Char GRU No Dropout', 'word_cnn_Adam_no_dropout': 'Word CNN No Dropout', 'word_gru_Adam_no_dropout': 'Word GRU No Dropout', 'char_cnn_Adam_dropout': 'Char CNN Dropout', 'char_gru_Adam_dropout': 'Char GRU Dropout', 'word_cnn_Adam_dropout': 'Word CNN Dropout', 'word_gru_Adam_dropout': 'Word GRU Dropout' } time = pd.read_csv('./results/time.csv', index_col=0) time = time.div(250) f, ax = plt.subplots() time.plot(kind='barh', ax=ax) ax.set_title('Model Timing') ax.set_xlabel('time per epoch(s)') ax.set_ylabel('model') x_extra = (max(time.max()) - min(time.min())) * 0.2 ax.set_xlim((max(0, min(time.min()) - x_extra), max(time.max()) + x_extra)) for p in ax.patches: ax.annotate("{:.2f}".format(p.get_width()), (p.get_width() + 0.1, p.get_y() + p.get_height() / 2), va='center') plt.tight_layout() plt.savefig(f'./results/Q5_time.png') plt.close() results = { model: pd.read_csv('./logs/' + model) for model in models.keys() } acc_df = pd.DataFrame({ 'No Dropout': { models[model].replace(' No Dropout', ''): results[model]['val_accuracy'] for model in models if 'no_dropout' in model }, 'Dropout': { models[model].replace(' Dropout', ''): results[model]['val_accuracy'] for model in models if 'dropout' in model and 'no_dropout' not in model } }) loss_df = pd.DataFrame({ 'No Dropout': { models[model].replace(' No Dropout', ''): results[model]['val_loss'] for model in models if 'no_dropout' in model }, 'Dropout': { models[model].replace(' Dropout', ''): results[model]['val_loss'] for model in models if 'dropout' in model and 'no_dropout' not in model } }) acc_df = acc_df.applymap(lambda x: training_result(x, mode='acc')) f, ax = plt.subplots() acc_df.plot(kind='barh', ax=ax) ax.set_title('Model Test Accuracies Comparison') ax.set_xlabel('accuracy') ax.set_ylabel('model') x_extra = (max(acc_df.max()) - min(acc_df.min())) * 0.2 ax.set_xlim( (max(0, min(acc_df.min()) - x_extra), max(acc_df.max()) + x_extra)) for p in ax.patches: ax.annotate("{:.5f}".format(p.get_width()), (p.get_width() + 0.005, p.get_y() + p.get_height() / 2), va='center') plt.tight_layout() plt.savefig(f'./results/Q5_accuracy_comparison.png') plt.close() loss_df = loss_df.applymap(lambda x: training_result(x, mode='loss')) f, ax = plt.subplots() loss_df.plot(kind='barh', ax=ax) ax.set_title('Model Test Loss Comparison') ax.set_xlabel('loss') ax.set_ylabel('model') x_extra = (max(loss_df.max()) - min(loss_df.min())) * 0.2 ax.set_xlim( (max(0, min(loss_df.min()) - x_extra), max(loss_df.max()) + x_extra)) for p in ax.patches: ax.annotate("{:.5f}".format(p.get_width()), (p.get_width() + 0.05, p.get_y() + p.get_height() / 2), va='center') plt.tight_layout() plt.savefig(f'./results/Q5_loss_comparison.png') plt.close() acc_results = pd.DataFrame( {models[model]: results[model]['val_accuracy'] for model in results}) acc_results.plot() plt.title('Model Test Accuracies') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend() plt.tight_layout() plt.savefig(f'./results/Q5_accuracy_epoch.png') plt.close() loss_results = pd.DataFrame( {models[model]: results[model]['val_loss'] for model in results}) loss_results.plot() plt.title('Model Test Loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend() plt.tight_layout() plt.savefig(f'./results/Q5_loss_epoch.png') plt.close()
def Q6(): models = { 'char_gru_Adam_no_dropout': 'Char GRU', 'char_vanilla_Adam': 'Char Vanilla', 'char_lstm_Adam': 'Char LSTM', 'char_gru_2_layers_Adam': 'Char 2-Layer GRU', 'char_gru_Adam_gradient_clipping': 'Char GRU with\nGradient Clipping', 'word_gru_Adam_no_dropout': 'Word GRU', 'word_vanilla_Adam': 'Word Vanilla', 'word_lstm_Adam': 'Word LSTM', 'word_gru_2_layers_Adam': 'Word 2-Layer GRU', 'word_gru_Adam_gradient_clipping': 'Word GRU with\nGradient Clipping' } for model in models: LossAccPlot('./logs/' + model, models[model]) results = { model: pd.read_csv('./logs/' + model) for model in models.keys() } acc_df = pd.DataFrame({ 'Char': { models[model].replace('Char ', ''): results[model]['val_accuracy'] for model in models if 'char' in model }, 'Word': { models[model].replace('Word ', ''): results[model]['val_accuracy'] for model in models if 'word' in model } }) loss_df = pd.DataFrame({ 'Char': { models[model].replace('Char ', ''): results[model]['val_loss'] for model in models if 'char' in model }, 'Word': { models[model].replace('Word ', ''): results[model]['val_loss'] for model in models if 'word' in model } }) acc_df = acc_df.applymap(lambda x: training_result(x, mode='acc')) f, ax = plt.subplots(figsize=(7.4, 5.8)) acc_df.plot(kind='barh', ax=ax) ax.set_title('Model Test Accuracies Comparison') ax.set_xlabel('accuracy') ax.set_ylabel('model') x_extra = (max(acc_df.max()) - min(acc_df.min())) * 0.2 ax.set_xlim( (max(0, min(acc_df.min()) - x_extra), max(acc_df.max()) + x_extra)) for p in ax.patches: ax.annotate("{:.5f}".format(p.get_width()), (p.get_width() + 0.005, p.get_y() + p.get_height() / 2), va='center') ax.legend(loc='right', bbox_to_anchor=(1.0, 0.3)) plt.tight_layout() plt.savefig(f'./results/Q6_accuracy_comparison.png') plt.close() loss_df = loss_df.applymap(lambda x: training_result(x, mode='loss')) f, ax = plt.subplots(figsize=(7.4, 5.8)) loss_df.plot(kind='barh', ax=ax) ax.set_title('Model Test Loss Comparison') ax.set_xlabel('loss') ax.set_ylabel('model') x_extra = (max(loss_df.max()) - min(loss_df.min())) * 0.2 ax.set_xlim( (max(0, min(loss_df.min()) - x_extra), max(loss_df.max()) + x_extra)) for p in ax.patches: ax.annotate("{:.5f}".format(p.get_width()), (p.get_width() + 0.05, p.get_y() + p.get_height() / 2), va='center') plt.tight_layout() plt.savefig(f'./results/Q6_loss_comparison.png') plt.close() char_acc_results = pd.DataFrame({ models[model].replace('Char ', ''): results[model]['val_accuracy'] for model in results if 'char' in model }) char_acc_results.plot() plt.title('Char Model Test Accuracies') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend() plt.tight_layout() plt.savefig(f'./results/Q6_char_accuracy_epoch.png') plt.close() char_loss_results = pd.DataFrame({ models[model].replace('Char ', ''): results[model]['val_loss'] for model in results if 'char' in model }) char_loss_results.plot() plt.title('Char Model Test Loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend() plt.tight_layout() plt.savefig(f'./results/Q6_char_loss_epoch.png') plt.close() word_acc_results = pd.DataFrame({ models[model].replace('Word ', ''): results[model]['val_accuracy'] for model in results if 'word' in model }) word_acc_results.plot() plt.title('Word Model Test Accuracies') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend() plt.tight_layout() plt.savefig(f'./results/Q6_word_accuracy_epoch.png') plt.close() word_loss_results = pd.DataFrame({ models[model].replace('Word ', ''): results[model]['val_loss'] for model in results if 'word' in model }) word_loss_results.plot() plt.title('Word Model Test Loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend() plt.tight_layout() plt.savefig(f'./results/Q6_word_loss_epoch.png') plt.close()
import matplotlib.pyplot as plt from utils.logger import Logger from utils.dict_json import read_json_to_dict from utils.acc_loss import acc_converge_epoch, loss_converge_epoch, smooth_curve, training_result parser = argparse.ArgumentParser() parser.add_argument('-D', '--data', help='Path to result json file', required=True) args = parser.parse_args() logger = Logger() logger.log('Starting q3_analyze.py...') logger.log('Loading \"' + args.data + '\"') histories = read_json_to_dict(args.data) neuron_epoch = pd.DataFrame(histories['Q3']['cv']['accuracy']) cv_accuracy = neuron_epoch.applymap(lambda x: training_result(x)) print(cv_accuracy) cv_accuracy = cv_accuracy.mean() num_neurons = histories['Q3']['optimal']['optimal_num'] def q3a1(): logger.log('Analyzing Q3(a1)...') f, ax = plt.subplots(5, 1, figsize=(10, 25)) for i, num in enumerate(neuron_epoch.columns, start=0): ax[i].plot(neuron_epoch[num][0][1:], label='Fold 1') ax[i].plot(neuron_epoch[num][1][1:], label='Fold 2') ax[i].plot(neuron_epoch[num][2][1:], label='Fold 3') ax[i].plot(neuron_epoch[num][3][1:], label='Fold 4') ax[i].plot(neuron_epoch[num][4][1:], label='Fold 5')