Esempio n. 1
0
    logs = model.train(train_input_sequences, train_output_sequences, 
                        test_input_sequences, test_output_sequences, 
                        epochs=epochs, minibatch_size=minibatch_size,
                        learning_rate=learning_rates, momentum=momentums,
                        weight_decay=0.001)

    test_results  = model.test(test_input_sequences, test_output_sequences, evaluate_per_length=True)
    train_results = model.test(train_input_sequences, train_output_sequences, evaluate_per_length=True)

    plot.plot_train_results(logs)

    plot.plot_results_per_input_length(groups=range(60), results_tuples=[test_results, train_results], plot_opts={'fmt':'.'}, group_plot_opts=[TEST_SCATTER_STYLE, TRAIN_SCATTER_STYLE])
    plot.plot_results_per_input_length(results_tuples=[train_results, test_results], group_plot_opts=[TEST_SCATTER_STYLE, TRAIN_SCATTER_STYLE])


    plot.plot_embeddings(model.embedder.layers[0].embeddings.get_value(), vocab)

    # model = models.recurrent_model(vocab_size, embedding_size, context_vector_size, D=2)
    # model.train(input_sequences, output_sequences, epochs=epochs, minibatch_size=minibatch_size, learning_rate=learning_rates, momentum=momentums)

    # model = models.recurrent_model(vocab_size, embedding_size, context_vector_size, D=4)
    # model.train(input_sequences, output_sequences, epochs=epochs, minibatch_size=minibatch_size, learning_rate=learning_rates, momentum=momentums)

    # model = models.gru_model(vocab_size, embedding_size, context_vector_size, D=1)
    # model.train(input_sequences, output_sequences, epochs=epochs, minibatch_size=minibatch_size, learning_rate=learning_rates, momentum=momentums)

    # model = models.gru_model(vocab_size, embedding_size, context_vector_size, D=2)
    # model.train(input_sequences, output_sequences, epochs=epochs, minibatch_size=minibatch_size, learning_rate=learning_rates, momentum=momentums)

    # model = models.gru_model(vocab_size, embedding_size, context_vector_size, D=4)
    # model.train(input_sequences, output_sequences, epochs=epochs, minibatch_size=minibatch_size, learning_rate=learning_rates, momentum=momentums)
Esempio n. 2
0
def evaluate_training(df_tile, embeds_cols, savepath=None, verbose=False):
    if verbose:
        print('Start evaluating of best features')

    if not os.path.isdir(savepath):
        os.makedirs(savepath)

    # ----------- Well level -----------
    # Create well collapse dataframe
    df_well_with_dmso = collapse_well_level(df_tile.copy(), remove_dmso=False)
    df_save_well = df_well_with_dmso.copy()

    # Plot embeddings with ground truth labels and assigned labeles
    moa_unique_list = sorted(unique(list(df_well_with_dmso['moa'])))
    pca_well, pca_tsne_well, tsne_well, umap_well = plot_embeddings(df_well_with_dmso, embeds_cols, moa_unique_list,
                                                                    savepath)

    # Save well values
    df_save_well['PCA1'] = pca_well[:, 0]
    df_save_well['PCA2'] = pca_well[:, 1]
    df_save_well['TSNE1'] = tsne_well[:, 0]
    df_save_well['TSNE2'] = tsne_well[:, 1]
    df_save_well['PCA_TSNE1'] = pca_tsne_well[:, 0]
    df_save_well['PCA_TSNE2'] = pca_tsne_well[:, 1]
    df_save_well['UMAP1'] = umap_well[:, 0]
    df_save_well['UMAP2'] = umap_well[:, 1]

    # Create well DMSO dataframe
    df_tile_dmso = df_tile.loc[(df_tile['compound'] == 'DMSO'), :].copy()
    df_tile_dmso = df_tile_dmso.reset_index(drop=True)
    df_well_dmso = df_well_with_dmso.loc[(df_well_with_dmso['compound'] == 'DMSO'), :].copy()
    df_well_dmso = df_well_dmso.reset_index(drop=True)

    # Plot DMSO embeddings
    batch_unique_list = sorted(unique(list(df_well_with_dmso['batch'])))
    plot_dmso_pca(df_tile_dmso, df_well_dmso, embeds_cols, batch_unique_list, savepath)
    plot_distance_heatmaps(df_tile_dmso, df_well_dmso, embeds_cols, savepath)
    plot_DMSO_3PCA(df_tile_dmso, embeds_cols, savepath)

    # clustering wells
    df_well = collapse_well_level(df_tile.copy(), remove_dmso=True)
    predictions, n_clusters, pca_tsne_image = assign_clusters(df_well, embeds_cols, min_cluster_size=10, min_samples=3)
    plot_cluster_assignment(pca_tsne_image, predictions, list(df_well['moa']), savepath, prefix="Well_")

    # Save clustering assignment
    df_well['cluster_nr'] = predictions
    df_well['PCA_TSNE1'] = pca_tsne_image[:, 0]
    df_well['PCA_TSNE2'] = pca_tsne_image[:, 1]

    # Plot consistency_matrix
    create_consistency_matrix(df_well, predictions, savepath)

    # ----------- Treatment level -----------
    # Average per treatment per plate and median per treatment per batch
    avg_df = collapse_plate_level(df_tile.copy(), do_median=False)
    df_treatment = collapse_treatment_level(avg_df, do_median=True, remove_dmso=True)

    # clustering treatments
    predictions2, n_clusters2, pca_tsne_image2 = assign_clusters(df_treatment, embeds_cols, min_cluster_size=5, min_samples=3)
    plot_cluster_assignment(pca_tsne_image2, predictions2, list(df_treatment['moa']), savepath, prefix="Treatment_")

    # Save clustering assignment
    df_treatment['cluster_nr'] = predictions2
    df_treatment['PCA_TSNE1'] = pca_tsne_image2[:, 0]
    df_treatment['PCA_TSNE2'] = pca_tsne_image2[:, 1]

    # Labeled evaluation
    df_treatment = df_treatment[df_treatment['moa'] != 'undefined'].copy()
    df_treatment = df_treatment.reset_index(drop=True)
    plot_clustermap(df_treatment, embeds_cols, savepath)

    # NSC and NSCB
    NSC_k_NN(df_treatment, embeds_cols, plot_conf=True, savepath=savepath)
    NSB_k_NN(df_treatment, embeds_cols, plot_conf=True, savepath=savepath)
    return df_save_well
Esempio n. 3
0
from dataset import get_hero_dict
from model import build_model
from plot import plot_embeddings
from evaluation import most_similar

#用已经训练好的模型参数来生成dota2vec的可视化图表已经测试相似度

#读取训练好的参数
model = build_model()
model.load_weights("dota2vec.h5")
embeddings = model.get_weights()[0]

#读取英雄名字典
hero_dict = get_hero_dict()

#绘制嵌入层的可视化图
plot_embeddings(embeddings, hero_dict)

#测试英雄相似度
hero_name = '水晶室女'
print(most_similar(hero_name, embeddings, hero_dict))
Esempio n. 4
0
              'rb') as pkl_file:
        record_history = pickle.load(pkl_file)
        pkl_file.close()

    model.load_state_dict(torch.load(trained_weight_file))

plot_history(experiment_folder, record_history)

if embed_dim > 2:
    train_embeddings_baseline, train_labels_baseline, train_all_images = extract_embeddings_high_dim(
        train_loader, model, embed_dim)
    val_embeddings_baseline, val_labels_baseline, val_all_images = extract_embeddings_high_dim(
        test_loader, model, embed_dim)
else:
    train_embeddings_baseline, train_labels_baseline = extract_embeddings(
        train_loader, model)
    val_embeddings_baseline, val_labels_baseline = extract_embeddings(
        test_loader, model)
plot_embeddings(experiment_folder, 'train', train_embeddings_baseline,
                train_labels_baseline)
plot_embeddings(experiment_folder, 'test', val_embeddings_baseline,
                val_labels_baseline)

knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(train_embeddings_baseline, train_labels_baseline)

score = knn.score(val_embeddings_baseline, val_labels_baseline)
print(score)

pass
Esempio n. 5
0
def main():
    """main function"""

    writer = SummaryWriter(log_dir='runs/' + args.log_dir)  # tensorboard

    # hyper parameters setting
    lr = args.lr
    k = args.K
    amount = args.amount
    n_epochs = args.n_epochs
    log_interval = 100
    batch_size = args.batch_size
    pretrained = args.pretrained
    method = args.method
    n_K = args.n_K
    margin = args.margin
    shuffle_interval = args.shuffle_interval
    opt = args.optimizer
    step_size = args.step_size
    global_loss = args.global_loss
    triplet_loss_p = args.triplet_loss_p
    network = args.network
    embedding_len = args.embedding_len
    batch_n_classes = args.batch_n_classes
    batch_n_num = args.batch_n_num
    use_sampler = args.use_sampler
    rm_zero = args.rm_zero
    center_sigma = args.center_sigma
    gamma = args.gamma
    weight_decay = args.weight_decay
    data_augmentation = args.data_augmentation
    save_model_path = args.save_model_path
    log_dir = args.log_dir
    freeze_parameter = args.freeze_parameter
    use_cross_entropy = args.use_cross_entropy

    # load data
    dataset = SpecificDataset(args.dataset, data_augmentation)
    n_classes = dataset.n_classes
    classes = dataset.classes
    channels = dataset.channels
    width, height = dataset.width, dataset.height
    gap = dataset.gap

    train_dataset = SampledDataset(dataset.train_dataset, channels, amount)
    print('Train data has {}'.format(len(train_dataset)))

    test_dataset = dataset.test_dataset
    print('Validation data has {}'.format(len(test_dataset)))

    test_dataset_fc = dataset.test_dataset_fc if dataset.test_dataset_fc is not None else None
    kwargs = {'num_workers': 8, 'pin_memory': False}
    # tarin_shuffle = True if shuffle_interval == 0 else False
    tarin_shuffle = (shuffle_interval == 0)

    batch_sampler = BatchSampler(train_dataset, n_classes=batch_n_classes, n_num=batch_n_num)
    sampler_train_loader = torch.utils.data.DataLoader(train_dataset,
                                                       batch_sampler=batch_sampler, **kwargs)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=tarin_shuffle, **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batch_size, shuffle=False, **kwargs)
    test_fc_loader = torch.utils.data.DataLoader(test_dataset_fc,
                                                 batch_size=batch_size, shuffle=False,
                                                 **kwargs) if test_dataset_fc is not None else None

    embedding_net = EmbeddingNet(network=network, pretrained=pretrained,
                                 embedding_len=embedding_len, gap=gap, freeze_parameter=freeze_parameter)

    if method == 'classification':
        # model = resnet.resnet32().cuda()
        model = ClassificationNet(embedding_net, n_classes=n_classes, embedding_len=embedding_len).cuda()
    elif method in ['kTriplet', 'batchHardTriplet', 'batchAllTriplet', 'batchSemiHardTriplet']:
        model = embedding_net.cuda()
    else:
        print('method must provide')
        sys.exit(-1)


    optimizer = get_optimizer(opt, model, lr, weight_decay)

    if opt == 'SGD':
        #if args.dataset == 'SD198':
            #scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[200, 500, 950], gamma=0.5, last_epoch=-1)
        #else:
        scheduler = lr_scheduler.StepLR(optimizer, step_size, gamma=0.5, last_epoch=-1)
    else:
        scheduler = None

    # add model graph into tensorboard
    #dummy_input = torch.zeros(size=(batch_size, channels, height, width)).cuda()
    #writer.add_graph(model, dummy_input)
    #del dummy_input

    if method == 'classification':
        loss_fn = nn.CrossEntropyLoss().cuda()
        fit_classification(train_loader, test_loader, test_fc_loader, model, loss_fn, optimizer, scheduler, n_epochs,
                           writer=writer, n_classes=n_classes, data_augmentation=data_augmentation)

    elif method in ['kTriplet', 'batchHardTriplet', 'batchAllTriplet', 'batchSemiHardTriplet']:
        loss_fn = nn.TripletMarginLoss(margin=margin, p=triplet_loss_p, reduction='none').cuda()
        fit(train_loader, sampler_train_loader, test_loader, test_fc_loader, model, loss_fn, optimizer, scheduler,
            n_epochs, k, n_K, log_interval, shuffle_interval, global_loss=global_loss, writer=writer,
            n_classes=n_classes, gamma=gamma, center_sigma=center_sigma, use_sampler=use_sampler, rm_zero=rm_zero,
            method=method, data_augmentation=data_augmentation, freeze_parameter=freeze_parameter, use_cross_entropy=use_cross_entropy)

    # save model
    save_model_path = os.path.join(save_model_path, log_dir)
    torch.save(model.state_dict(), save_model_path)
    print('save model in {}'.format(save_model_path))

    # plot tensor in tensorboard
    train_embeddings_tl, train_labels_tl = extract_embeddings(train_loader, model, embedding_len)
    plot_embeddings(train_embeddings_tl, train_labels_tl, classes, writer, tag='train_embeddings')
    val_embeddings_tl, val_labels_tl = extract_embeddings(test_loader, model, embedding_len)
    plot_embeddings(val_embeddings_tl, val_labels_tl, classes, writer, tag='val_embeddings')