def load_train_val_data(parser): print("load_train_val_data ... ") ''' train = load.load_dataset("data/train.json") val = load.load_dataset("data/validation.json") preproc = load.preproc(*train) train_x, train_y = preproc.process(*train) val_x, val_y = preproc.process(*val) print("train size : {}, {}".format(len(train_x), len(train_y))) print("val size : {}, {}".format(len(val_x), len(val_y))) ''' args = parser.parse_args() model = architecture.build_model() #print(model.summary()) save_dir = make_save_dir("data/", "model") file_name = get_filename_for_saving(save_dir) check_pointer = keras.callbacks.ModelCheckpoint(filepath=file_name, save_best_only=False) stopping = keras.callbacks.EarlyStopping(patience=10) reduce_lr = keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, min_lr=default_lr * 0.001) model.fit(train_x, train_y, batch_size=int(args.batchsize), epochs=int(args.epochs), validation_data=(val_x, val_y))
def validation(global_weight=[]): if global_weight is not None: model = architecture.build_model() model.set_weights(global_weight) print("===> validation start") m_probs = model.predict(val_x) committee_labels = np.argmax(val_y, axis=2) committee_labels = committee_labels[:, 0] print("===================") temp = [] preds = np.argmax(m_probs / prior, axis=2) for i, j in zip(preds, val_labels): t = sst.mode(i[:len(j) - 1])[0][0] temp.append(t) print(i[:len(j) - 1]) preds = temp print("preds : \n", preds) report = skm.classification_report(committee_labels, preds, target_names=preproc.classes, digits=3) scores = skm.precision_recall_fscore_support(committee_labels, preds, average=None) print("report : \n", report) # print("scores : ", scores) cm = confusion_matrix(committee_labels, preds) print("confusion matrix : \n", cm) f1 = f1_score(committee_labels, preds, average='micro') print("f1_score : ", f1) # ***roc_auc_score - m_probs*** m_probs = np.sum(m_probs, axis=1) m_probs = m_probs / 71 # one data set max size (element count) -> normalization # print(ground_truth.shape, m_probs.shape) ovo_auroc = roc_auc_score(committee_labels, m_probs, multi_class='ovo') ovr_auroc = roc_auc_score(committee_labels, m_probs, multi_class='ovr') print("ovr_auroc : ", ovr_auroc) print("ovo_auroc : ", ovo_auroc) result = {} save_result(model, current_round, result) print("===> validation end")
def local_train(): save_dir = make_save_dir("data/", "model") file_name = get_filename_for_saving(save_dir) check_pointer = keras.callbacks.ModelCheckpoint(filepath=file_name, save_best_only=False) stopping = keras.callbacks.EarlyStopping(patience=10) model = architecture.build_model() model.fit(train_x, train_y, batch_size=32, epochs=30, validation_data=(val_x, val_y), callbacks=[check_pointer, stopping]) validation(model.get_weights())
def fl_task(): print("fl task") global current_round global_round = request_current_round() if global_round == current_round: global_weight = request_global_weight() if global_weight is not None: validation(global_weight) model = architecture.build_model() if global_weight is not None: model.set_weights(global_weight) print("==> local training start") print("==> local training end") update_local_weight(model.get_weights()) delay_compare_weight() current_round += 1 delay_compare_weight()
replay_files = [] # r=root, d=directories, f = files for r, d, f in os.walk(PATH): for file in f: if '.json' in file: replay_files.append(os.path.join(r, file)) for f in replay_files: print(f) queue = [Queue(32)] queue_m_sizes = [32] batch_size = len(replay_files) #exit(0) build_model() frames_node = tf.get_collection('frames')[0] # can_afford_node = tf.get_collection('can_afford')[0] turns_left_node = tf.get_collection('turns_left')[0] my_ships_node = tf.get_collection('my_ships')[0] moves_node = tf.get_collection('moves')[0] spawn_node = tf.get_collection('spawn')[0] loss_node = tf.get_collection('loss')[0] optimizer_node = tf.get_collection('optimizer')[0] saver = tf.train.Saver(max_to_keep=1) # path = random.choice(replay_files) # # Load all training data # game = utils.Halite() # #path = '1068739.json' # game.load_replay(path)
def predict(parser): val = load.load_dataset("data/validation_2.json") preproc = load.preproc(*val) args = parser.parse_args() print("args model : ", args.model) model = architecture.build_model() model.load_weights(args.model) with open("data/validation_2.json", "rb") as fid: val_labels = [json.loads(l)['labels'] for l in fid] counts = collections.Counter(preproc.class_to_int[l[0]] for l in val_labels) counts = sorted(counts.most_common(), key=lambda x: x[0]) counts = list(zip(*counts))[1] print("counts : ", counts) smooth = 500 counts = np.array(counts)[None, None, :] total = np.sum(counts) + counts.shape[1] print("total : ", total) prior = (counts + smooth) / float(total) # ??? print("prior : ", prior) ecgs, committee_labels = preproc.process(*val) m_probs = model.predict(ecgs) committee_labels = np.argmax(committee_labels, axis=2) committee_labels = committee_labels[:, 0] print("===================") temp = [] preds = np.argmax(m_probs / prior, axis=2) for i, j in zip(preds, val_labels): t = sst.mode(i[:len(j) - 1])[0][0] temp.append(t) #print(i[:len(j)-1]) preds = temp #print("preds : \n", preds) report = skm.classification_report(committee_labels, preds, target_names=preproc.classes, digits=3) scores = skm.precision_recall_fscore_support(committee_labels, preds, average=None) print("report : \n", report) cm = confusion_matrix(committee_labels, preds) print("confusion matrix : \n", cm) f1 = f1_score(committee_labels, preds, average='micro') #print("f1_score : ", f1) # ***roc_auc_score - m_probs*** s_probs = np.sum(m_probs, axis=1) s_probs = s_probs / 71 # one data set max size (element count) -> normalization #ovo_auroc = roc_auc_score(committee_labels, s_probs, multi_class='ovo') ovr_auroc = roc_auc_score(committee_labels, s_probs, multi_class='ovr') print("ovr_auroc : ", ovr_auroc) #print("ovo_auroc : ", ovo_auroc) ''' bootstrapping ''' n_bootstraps = 100 np.random.seed(3033) total_precision = [] total_recall = [] total_f1 = [] total_auroc = [] precision = [] recall = [] f1 = [] total = [] for j in range(n_bootstraps): indices = np.random.random_integers(0, len(m_probs) - 1, 100) #print("indices : ", len(indices)) if len(np.unique(committee_labels[indices])) < 2: continue sub_labels = [] sub_result = [] sub_probs = [] #print(indices) for i in indices: sub_labels.append(committee_labels[i]) sub_result.append(preds[i]) sub_probs.append(m_probs[i]) s_scores = precision_recall_fscore_support(sub_labels, sub_result, labels=[0, 1, 2, 3], average=None) # ***roc_auc_score - m_probs*** s_p = np.sum(sub_probs, axis=1) s_p = s_p / 71 # one data set max size (element count) -> normalization # ovo_auroc = roc_auc_score(committee_labels, s_probs, multi_class='ovo') #print(sub_labels) #print(s_p) try: s_auroc = roc_auc_score(sub_labels, s_p, multi_class='ovr') except: s_auroc = -1 #print(s_scores) precision.append(np.array(s_scores[0])) recall.append(np.array(s_scores[1])) f1.append(np.array(s_scores[2])) #auroc.append(s_auroc) total_precision.append(np.average(s_scores[0])) total_recall.append(np.average(s_scores[1])) total_f1.append(np.average(s_scores[2])) total_auroc.append(s_auroc) total_precision.sort() total_recall.sort() total_f1.sort() total_auroc.sort() total_auroc.remove(-1) #print(total_auroc) ''' bootstrapping 시 클래스가 존재하지 않는 케이스가 있을수도 있음 ''' precision = np.array(precision) precision[precision == .0] = np.nan recall = np.array(recall) recall[recall == .0] = np.nan f1 = np.array(f1) f1[f1 == .0] = np.nan #print(total_auroc) for i in range(4): pre = precision[:, i] pre.sort() rec = recall[:, i] rec.sort() f = f1[:, i] f.sort() pre = np.round(pre[int(len(pre) * 0.025):int(len(pre) * 0.975)], 3) rec = np.round(rec[int(len(rec) * 0.025):int(len(rec) * 0.975)], 3) f = np.round(pre[int(len(f) * 0.025):int(len(f) * 0.975)], 3) ''' print(i, " : ", "{0} ({1}, {2})".format(np.round(np.nanmean(pre), 3), round(pre[0], 3), round(pre[-1], 3)), " : ", "{0} ({1}, {2})".format(np.round(np.nanmean(rec), 3), round(rec[0], 3), round(rec[-1], 3)), " : ", "{0} ({1}, {2})".format(np.round(np.nanmean(f), 3), round(f[0], 3), round(f[-1], 3))) ''' item = [ i, "{0} ({1}, {2})".format(np.round(np.nanmean(pre), 3), round(np.nanmin(pre), 3), round(np.nanmax(pre), 3)), "{0} ({1}, {2})".format(np.round(np.nanmean(rec), 3), round(np.nanmin(rec), 3), round(np.nanmax(rec), 3)), "{0} ({1}, {2})".format(np.round(np.nanmean(f), 3), round(np.nanmin(f), 3), round(np.nanmax(f), 3)) ] total.append(item) total_auroc = np.round( total_auroc[int(len(total_auroc) * 0.025):int(len(total_auroc) * 0.975)], 3) total_precision = np.round( total_precision[int(len(total_precision) * 0.025):int(len(total_precision) * 0.975)], 3) total_recall = np.round( total_recall[int(len(total_recall) * .025):int(len(total_recall) * .975)], 3) total_f1 = np.round( total_f1[int(len(total_f1) * .025):int(len(total_f1) * .975)], 3) with open(args.file_name, "w", newline='') as file: writer = csv.writer(file) writer.writerow(["", "precision", "recall", "f1-score", "auroc"]) writer.writerow([ "", "{0} ({1}, {2})".format(np.round(np.average(scores[0]), 3), total_precision[0], total_precision[-1]), "{0} ({1}, {2})".format(np.round(np.average(scores[1]), 3), total_recall[0], total_recall[-1]), "{0} ({1}, {2})".format(np.round(np.average(scores[2]), 3), total_f1[0], total_f1[-1]), "{0} ({1}, {2})".format(np.round(ovr_auroc, 3), total_auroc[0], total_auroc[-1]), ]) for i in total: writer.writerow(i)
filepath_loss = "./data/Multi_task_model.loss" filepath_model = "./data/checkpoint/best_model.h5" d_nucl = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4} f_matrix, f_labels, f_pos = get_kmer_from_50mer(filepath_train) f_matrix_val, f_labels_val, f_pos_val = get_kmer_from_50mer(filepath_val) params = get_params_50mer() d_weights = get_learning_weights(filepath_weights) training_generator = DataGenerator_from_50mer(f_matrix, f_labels, f_pos, **params) val_generator = DataGenerator_from_50mer(f_matrix_val, f_labels_val, f_pos_val, **params) model = build_model() print(model.summary()) model.compile(optimizer='adam', loss={ 'output1': 'categorical_crossentropy', 'output2': 'categorical_crossentropy' }, metrics=['accuracy']) CallBacks = [ EarlyStopping(monitor='val_loss', patience=5), ModelCheckpoint(filepath=filepath_model, monitor='val_loss', save_best_only=True) ]
params['AE epochs'] = 30 params['image res'] = 256 #params['kernel size'] = 3 params['n clusters'] = 3 params['n features'] = 64 params['output dir'] = r'/home/dykuang/UMI-SEG/output/' params['input channels'] = 1 params['en spec'] = [8, 16, 32] #Specify layer parameters for the U-net as encoder params['de spec'] = [8, 8, 8] #Specify layer parameters for the decoder #params['training data list']=['vol_1_slice_{}.npy'.format(i) for i in range(10, 121)] AE, _ = build_model(input_size=(params['image res'], params['image res'], params['input channels']), en_spec=params['en spec'], de_spec=params['de spec'], n_features=params['n features'], n_clusters=params['n clusters']) print(AE.summary()) print(AE.layers[-2].summary()) print(AE.layers[-1].summary()) def datagen(datapath, datalist, batchsize): x = np.zeros([ batchsize, params['image res'], params['image res'], params['input channels'] ]) size = len(datalist) n_batches = size // batchsize index = 0
load_weights_on_restart=False, save_best_only=False, save_freq=100, monitor='val_loss', verbose=1 ), tf.keras.callbacks.TensorBoard( log_dir='./logs', histogram_freq=0, # How often to log histogram visualizations embeddings_freq=0, # How often to log embedding visualizations update_freq=32 ) ] model = architecture.build_model(bert_path, max_seq_length) print(model.summary()) # Instantiate variables initialize_vars(sess) model.fit( train_X, train_y, validation_data=(test_X, test_y), epochs=1, callbacks=callbacks, batch_size=32 ) model.save('BertModel.h5')