logs = model.train(train_input_sequences, train_output_sequences, test_input_sequences, test_output_sequences, epochs=epochs, minibatch_size=minibatch_size, learning_rate=learning_rates, momentum=momentums, weight_decay=0.001) test_results = model.test(test_input_sequences, test_output_sequences, evaluate_per_length=True) train_results = model.test(train_input_sequences, train_output_sequences, evaluate_per_length=True) plot.plot_train_results(logs) plot.plot_results_per_input_length(groups=range(60), results_tuples=[test_results, train_results], plot_opts={'fmt':'.'}, group_plot_opts=[TEST_SCATTER_STYLE, TRAIN_SCATTER_STYLE]) plot.plot_results_per_input_length(results_tuples=[train_results, test_results], group_plot_opts=[TEST_SCATTER_STYLE, TRAIN_SCATTER_STYLE]) plot.plot_embeddings(model.embedder.layers[0].embeddings.get_value(), vocab) # model = models.recurrent_model(vocab_size, embedding_size, context_vector_size, D=2) # model.train(input_sequences, output_sequences, epochs=epochs, minibatch_size=minibatch_size, learning_rate=learning_rates, momentum=momentums) # model = models.recurrent_model(vocab_size, embedding_size, context_vector_size, D=4) # model.train(input_sequences, output_sequences, epochs=epochs, minibatch_size=minibatch_size, learning_rate=learning_rates, momentum=momentums) # model = models.gru_model(vocab_size, embedding_size, context_vector_size, D=1) # model.train(input_sequences, output_sequences, epochs=epochs, minibatch_size=minibatch_size, learning_rate=learning_rates, momentum=momentums) # model = models.gru_model(vocab_size, embedding_size, context_vector_size, D=2) # model.train(input_sequences, output_sequences, epochs=epochs, minibatch_size=minibatch_size, learning_rate=learning_rates, momentum=momentums) # model = models.gru_model(vocab_size, embedding_size, context_vector_size, D=4) # model.train(input_sequences, output_sequences, epochs=epochs, minibatch_size=minibatch_size, learning_rate=learning_rates, momentum=momentums)
def evaluate_training(df_tile, embeds_cols, savepath=None, verbose=False): if verbose: print('Start evaluating of best features') if not os.path.isdir(savepath): os.makedirs(savepath) # ----------- Well level ----------- # Create well collapse dataframe df_well_with_dmso = collapse_well_level(df_tile.copy(), remove_dmso=False) df_save_well = df_well_with_dmso.copy() # Plot embeddings with ground truth labels and assigned labeles moa_unique_list = sorted(unique(list(df_well_with_dmso['moa']))) pca_well, pca_tsne_well, tsne_well, umap_well = plot_embeddings(df_well_with_dmso, embeds_cols, moa_unique_list, savepath) # Save well values df_save_well['PCA1'] = pca_well[:, 0] df_save_well['PCA2'] = pca_well[:, 1] df_save_well['TSNE1'] = tsne_well[:, 0] df_save_well['TSNE2'] = tsne_well[:, 1] df_save_well['PCA_TSNE1'] = pca_tsne_well[:, 0] df_save_well['PCA_TSNE2'] = pca_tsne_well[:, 1] df_save_well['UMAP1'] = umap_well[:, 0] df_save_well['UMAP2'] = umap_well[:, 1] # Create well DMSO dataframe df_tile_dmso = df_tile.loc[(df_tile['compound'] == 'DMSO'), :].copy() df_tile_dmso = df_tile_dmso.reset_index(drop=True) df_well_dmso = df_well_with_dmso.loc[(df_well_with_dmso['compound'] == 'DMSO'), :].copy() df_well_dmso = df_well_dmso.reset_index(drop=True) # Plot DMSO embeddings batch_unique_list = sorted(unique(list(df_well_with_dmso['batch']))) plot_dmso_pca(df_tile_dmso, df_well_dmso, embeds_cols, batch_unique_list, savepath) plot_distance_heatmaps(df_tile_dmso, df_well_dmso, embeds_cols, savepath) plot_DMSO_3PCA(df_tile_dmso, embeds_cols, savepath) # clustering wells df_well = collapse_well_level(df_tile.copy(), remove_dmso=True) predictions, n_clusters, pca_tsne_image = assign_clusters(df_well, embeds_cols, min_cluster_size=10, min_samples=3) plot_cluster_assignment(pca_tsne_image, predictions, list(df_well['moa']), savepath, prefix="Well_") # Save clustering assignment df_well['cluster_nr'] = predictions df_well['PCA_TSNE1'] = pca_tsne_image[:, 0] df_well['PCA_TSNE2'] = pca_tsne_image[:, 1] # Plot consistency_matrix create_consistency_matrix(df_well, predictions, savepath) # ----------- Treatment level ----------- # Average per treatment per plate and median per treatment per batch avg_df = collapse_plate_level(df_tile.copy(), do_median=False) df_treatment = collapse_treatment_level(avg_df, do_median=True, remove_dmso=True) # clustering treatments predictions2, n_clusters2, pca_tsne_image2 = assign_clusters(df_treatment, embeds_cols, min_cluster_size=5, min_samples=3) plot_cluster_assignment(pca_tsne_image2, predictions2, list(df_treatment['moa']), savepath, prefix="Treatment_") # Save clustering assignment df_treatment['cluster_nr'] = predictions2 df_treatment['PCA_TSNE1'] = pca_tsne_image2[:, 0] df_treatment['PCA_TSNE2'] = pca_tsne_image2[:, 1] # Labeled evaluation df_treatment = df_treatment[df_treatment['moa'] != 'undefined'].copy() df_treatment = df_treatment.reset_index(drop=True) plot_clustermap(df_treatment, embeds_cols, savepath) # NSC and NSCB NSC_k_NN(df_treatment, embeds_cols, plot_conf=True, savepath=savepath) NSB_k_NN(df_treatment, embeds_cols, plot_conf=True, savepath=savepath) return df_save_well
from dataset import get_hero_dict from model import build_model from plot import plot_embeddings from evaluation import most_similar #用已经训练好的模型参数来生成dota2vec的可视化图表已经测试相似度 #读取训练好的参数 model = build_model() model.load_weights("dota2vec.h5") embeddings = model.get_weights()[0] #读取英雄名字典 hero_dict = get_hero_dict() #绘制嵌入层的可视化图 plot_embeddings(embeddings, hero_dict) #测试英雄相似度 hero_name = '水晶室女' print(most_similar(hero_name, embeddings, hero_dict))
'rb') as pkl_file: record_history = pickle.load(pkl_file) pkl_file.close() model.load_state_dict(torch.load(trained_weight_file)) plot_history(experiment_folder, record_history) if embed_dim > 2: train_embeddings_baseline, train_labels_baseline, train_all_images = extract_embeddings_high_dim( train_loader, model, embed_dim) val_embeddings_baseline, val_labels_baseline, val_all_images = extract_embeddings_high_dim( test_loader, model, embed_dim) else: train_embeddings_baseline, train_labels_baseline = extract_embeddings( train_loader, model) val_embeddings_baseline, val_labels_baseline = extract_embeddings( test_loader, model) plot_embeddings(experiment_folder, 'train', train_embeddings_baseline, train_labels_baseline) plot_embeddings(experiment_folder, 'test', val_embeddings_baseline, val_labels_baseline) knn = KNeighborsClassifier(n_neighbors=15) knn.fit(train_embeddings_baseline, train_labels_baseline) score = knn.score(val_embeddings_baseline, val_labels_baseline) print(score) pass
def main(): """main function""" writer = SummaryWriter(log_dir='runs/' + args.log_dir) # tensorboard # hyper parameters setting lr = args.lr k = args.K amount = args.amount n_epochs = args.n_epochs log_interval = 100 batch_size = args.batch_size pretrained = args.pretrained method = args.method n_K = args.n_K margin = args.margin shuffle_interval = args.shuffle_interval opt = args.optimizer step_size = args.step_size global_loss = args.global_loss triplet_loss_p = args.triplet_loss_p network = args.network embedding_len = args.embedding_len batch_n_classes = args.batch_n_classes batch_n_num = args.batch_n_num use_sampler = args.use_sampler rm_zero = args.rm_zero center_sigma = args.center_sigma gamma = args.gamma weight_decay = args.weight_decay data_augmentation = args.data_augmentation save_model_path = args.save_model_path log_dir = args.log_dir freeze_parameter = args.freeze_parameter use_cross_entropy = args.use_cross_entropy # load data dataset = SpecificDataset(args.dataset, data_augmentation) n_classes = dataset.n_classes classes = dataset.classes channels = dataset.channels width, height = dataset.width, dataset.height gap = dataset.gap train_dataset = SampledDataset(dataset.train_dataset, channels, amount) print('Train data has {}'.format(len(train_dataset))) test_dataset = dataset.test_dataset print('Validation data has {}'.format(len(test_dataset))) test_dataset_fc = dataset.test_dataset_fc if dataset.test_dataset_fc is not None else None kwargs = {'num_workers': 8, 'pin_memory': False} # tarin_shuffle = True if shuffle_interval == 0 else False tarin_shuffle = (shuffle_interval == 0) batch_sampler = BatchSampler(train_dataset, n_classes=batch_n_classes, n_num=batch_n_num) sampler_train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=batch_sampler, **kwargs) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=tarin_shuffle, **kwargs) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, **kwargs) test_fc_loader = torch.utils.data.DataLoader(test_dataset_fc, batch_size=batch_size, shuffle=False, **kwargs) if test_dataset_fc is not None else None embedding_net = EmbeddingNet(network=network, pretrained=pretrained, embedding_len=embedding_len, gap=gap, freeze_parameter=freeze_parameter) if method == 'classification': # model = resnet.resnet32().cuda() model = ClassificationNet(embedding_net, n_classes=n_classes, embedding_len=embedding_len).cuda() elif method in ['kTriplet', 'batchHardTriplet', 'batchAllTriplet', 'batchSemiHardTriplet']: model = embedding_net.cuda() else: print('method must provide') sys.exit(-1) optimizer = get_optimizer(opt, model, lr, weight_decay) if opt == 'SGD': #if args.dataset == 'SD198': #scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[200, 500, 950], gamma=0.5, last_epoch=-1) #else: scheduler = lr_scheduler.StepLR(optimizer, step_size, gamma=0.5, last_epoch=-1) else: scheduler = None # add model graph into tensorboard #dummy_input = torch.zeros(size=(batch_size, channels, height, width)).cuda() #writer.add_graph(model, dummy_input) #del dummy_input if method == 'classification': loss_fn = nn.CrossEntropyLoss().cuda() fit_classification(train_loader, test_loader, test_fc_loader, model, loss_fn, optimizer, scheduler, n_epochs, writer=writer, n_classes=n_classes, data_augmentation=data_augmentation) elif method in ['kTriplet', 'batchHardTriplet', 'batchAllTriplet', 'batchSemiHardTriplet']: loss_fn = nn.TripletMarginLoss(margin=margin, p=triplet_loss_p, reduction='none').cuda() fit(train_loader, sampler_train_loader, test_loader, test_fc_loader, model, loss_fn, optimizer, scheduler, n_epochs, k, n_K, log_interval, shuffle_interval, global_loss=global_loss, writer=writer, n_classes=n_classes, gamma=gamma, center_sigma=center_sigma, use_sampler=use_sampler, rm_zero=rm_zero, method=method, data_augmentation=data_augmentation, freeze_parameter=freeze_parameter, use_cross_entropy=use_cross_entropy) # save model save_model_path = os.path.join(save_model_path, log_dir) torch.save(model.state_dict(), save_model_path) print('save model in {}'.format(save_model_path)) # plot tensor in tensorboard train_embeddings_tl, train_labels_tl = extract_embeddings(train_loader, model, embedding_len) plot_embeddings(train_embeddings_tl, train_labels_tl, classes, writer, tag='train_embeddings') val_embeddings_tl, val_labels_tl = extract_embeddings(test_loader, model, embedding_len) plot_embeddings(val_embeddings_tl, val_labels_tl, classes, writer, tag='val_embeddings')