def getfeatures(folder, target_size, model='Resnet50'):
    model = ResNet50(weights="imagenet", include_top=False)
    images = getFilesInDir(folder)
    images_train, images_test = test_train_split(images, fraction)
    tensors_train = []
    tensors_test = []
    for i, j in images_train.items():
        for k in j:
            tensors_train.append(img_to_tensor(k, target_size=(target_size)))
    for i, j in images_test.items():
        for k in j:
            tensors_test.append(img_to_tensor(k, target_size=(target_size)))
            # print()
    preprocessed_tensors_train = [preprocess_input(i) for i in tensors_train]
    preprocessed_tensors_test = [preprocess_input(i) for i in tensors_test]
    print("Total Training Tensors created:" + str(len(tensors_train)))
    print("Total Testing Tensors created:" + str(len(tensors_test)))
    labels_train = create_labels(images_train, output_classes=21)
    labels_test = create_labels(images_test, output_classes=21)
    print("Total Training Lables created:" + str(len(labels_train)))
    print("Total Testing Lables created:" + str(len(labels_test)))
    features_list_train = []
    features_list_test = []
    features_list_train = [
        model.predict(x) for x in preprocessed_tensors_train
    ]
    features_list_test = [model.predict(x) for x in preprocessed_tensors_test]
    return (features_list_train, labels_train, features_list_test, labels_test)
def find_best_acc_and_thresh(labels_csv: Path,
                             inference_folder: Path, classes: List[str]) -> \
        Dict[str, float]:
    """
    Find the best accuracy and threshold for the given images.

    Args:
        labels_csv: CSV file containing the ground truth labels.
        inference_folder: Folder containing the predicted labels. 
        classes: Names of the classes in the dataset.

    Returns:
        A dictionary mapping class names to the best thresholds.
    """
    gt_labels = create_labels(csv_path=labels_csv)
    prediction_csv_paths = get_csv_paths(folder=inference_folder)
    best_acc = 0
    best_thresholds = None
    best_csv = None
    for prediction_csv_path in prediction_csv_paths:
        prediction_labels = create_labels(csv_path=prediction_csv_path)
        avg_class_acc, conf_matrix = get_scores(
            gt_labels=gt_labels,
            prediction_labels=prediction_labels,
            classes=classes)
        print(f"thresholds {parse_thresholds(csv_path=prediction_csv_path)} "
              f"has average class accuracy {avg_class_acc:.5f}")
        if best_acc < avg_class_acc:
            best_acc = avg_class_acc
            best_csv = prediction_csv_path
            best_thresholds = parse_thresholds(csv_path=prediction_csv_path)
    print(f"view these predictions in {best_csv}")
    return best_thresholds
def print_final_test_results(labels_csv: Path, inference_folder: Path,
                             classes: List[str]) -> None:
    """
    Print the final accuracy and confusion matrix.

    Args:
        labels_csv: CSV file containing the ground truth labels.
        inference_folder: Folder containing the predicted labels.
        classes: Names of the classes in the dataset.
    """
    gt_labels = create_labels(csv_path=labels_csv)
    prediction_csv_paths = get_csv_paths(folder=inference_folder)
    for prediction_csv_path in prediction_csv_paths:
        prediction_labels = create_labels(csv_path=prediction_csv_path)
        avg_class_acc, conf_matrix = get_scores(
            gt_labels=gt_labels,
            prediction_labels=prediction_labels,
            classes=classes)
        print(f"test set has final avg class acc: {avg_class_acc:.5f}"
              f"\n{conf_matrix}")
Exemple #4
0
def inference():

    # Inference Path #
    make_dirs(config.inference_path)

    # Prepare Data Loader #
    test_loader = get_celeba_loader('test', config.batch_size,
                                    config.selected_attrs)

    # Prepare Generator #
    G = Generator(num_classes=len(config.selected_attrs)).to(device)
    G.load_state_dict(
        torch.load(
            os.path.join(
                config.weights_path,
                'StarGAN_Generator_Epoch_{}.pkl'.format(config.num_epochs))))

    # Test #
    print("StarGAN | Generating Aligned CelebA Images started...")
    for i, (image, label) in enumerate(test_loader):

        # Prepare Data #
        image = image.to(device)
        fixed_labels = create_labels(label,
                                     selected_attrs=config.selected_attrs)

        # Generate Fake Images #
        x_fake_list = [image]

        for c_fixed in fixed_labels:
            x_fake_list.append(G(image, c_fixed))
        x_concat = torch.cat(x_fake_list, dim=3)

        # Save Images #
        save_image(denorm(x_concat.data.cpu()),
                   os.path.join(
                       config.inference_path,
                       'StarGAN_Aligned_CelebA_Results_%04d.png' % (i + 1)),
                   nrow=1,
                   padding=0)

    make_gifs_test("StarGAN", config.inference_path)
Exemple #5
0
    def flow(self):
        i = 0

        while True:
            images_batch = []
            labels_batch = []
            for b in range(self.batch_size):
                if i == len(self.images):
                    i = 0

                image = cv2.imread(self.images[i])
                image = resize(image, resize=self.resize)
                image = crop(image, ROI=self.ROI)

                label_img = cv2.imread(self.masks[i], cv2.IMREAD_GRAYSCALE)
                label_img = resize(label_img, resize=self.resize)
                label_img = crop(label_img, ROI=self.ROI)

                images_batch.append(norm_data(image))
                labels_batch.append(create_labels(label_img, self.classes))

                i += 1

            yield np.array(images_batch, dtype=np.float32), np.array(labels_batch, dtype=np.float32)
                clip_list_temp.extend(all_selec)

    # create variable buttons
    num_vars_prev = num_vars
    clip_list, num_vars = check_and_create(clip_list_temp,
                                           clip_list_prev,
                                           frame,
                                           1,
                                           num_vars,
                                           '',
                                           button_font)

    # create function buttons
    num_funcs_prev = num_funcs
    func_clip_list, num_funcs = check_and_create(clip_list_func_temp,
                                                clip_list_func_prev,
                                                frame,
                                                2,
                                                num_funcs,
                                                'Control',
                                                 button_font)

    # create labels
    create_labels([num_vars, num_funcs],
                  [num_vars_prev, num_funcs_prev],
                  frame,
                  button_font)

    # update tk
    root.update_idletasks()
    root.update()
            # print(ord(i))

    for i in test_string:
        if ord(i) < 32 or ord(i) > 122:
            print(i, ord(i), chr(ord(i)), train_string.find(i))
            # print(ord(i)

    X_train = create_input_array(train_string)
    print(X_train.shape)

    X_test = create_input_array(test_string)
    print(X_test.shape)

    print(X_train[0])

    Y_train = create_labels(train_string, hashmap)
    print(Y_train.shape)

    Y_test = create_labels(test_string, hashmap)
    print(Y_test.shape)

    # print(Y_train)
    print(Y_train.shape)

    # print(Y_test)
    print(Y_test.shape)

    encrypter = Sequential()
    encrypter.add(layers.Dense(91, input_shape=(91, )))
    encrypter.add(layers.LeakyReLU())
                 Later will become a null vector where indices would representing dimensions where value is one
        - bag_of_words: similar to naive but unknown words are ignored.
                 Later will become a null vector where indicies would representing dimensions where value is one or more (in case a word is present X times, the value will be X)
        - tfidf: similar to bag_of_word instead of having discrete value, compute importance of each word using TF-IDF (widely used in Information Extraction)
        - word_embeddings:  List of indices from word_to_index_we. Later, these indices will be mapped to the embeddings of index_we_to_emb
        - sentence_embeddings: Vector of 600 dimensions representing the sentence embeddings.
    '''

    X_naive = transform_for_naive(data, word_to_index)
    X_bow = transform_for_bag_of_words(data, word_to_index)
    X_tfidf = transform_for_tfidf(data)
    X_we = transform_for_word_embeddings(data, word_to_index_we,
                                         index_we_to_emb)
    X_se = transform_for_sentence_embeddings(data)
    X_topics = transform_for_topics(data)
    Y = create_labels(data)
    '''
    #Visualize using only one kind of features
    visualize_tsne(X_naive, Y, 'naive')
    visualize_tsne(X_bow, Y, 'bow')
    visualize_tsne(X_tfidf, Y, 'tfidf')
    visualize_tsne(X_we, Y, 'word_emb', index_we_to_emb)
    visualize_tsne(X_se, Y, 'sent_emb')
    visualize_tsne(X_topics, Y, 'topics')
    #'''

    X_bow_se = np.concatenate([X_bow, X_se], axis=1)
    X_bow_topics = np.concatenate([X_bow, X_topics], axis=1)
    X_tfidf_se = np.concatenate([X_tfidf, X_se], axis=1)
    X_tfidf_topics = np.concatenate([X_tfidf, X_topics], axis=1)
    X_se_topics = np.concatenate([X_se, X_topics], axis=1)
def train():
    data = np.load("../dataset/dev.npy")
    labels = np.load("../dataset/dev_transcripts.npy")

    # temorary dataset
    data = data[0:2]
    labels = labels[0:2]
    # temporary dataset

    vocab = create_vocab(labels)

    labels = create_labels(labels, vocab)

    shuffle_index = np.arange(len(data))
    shuffle(shuffle_index)

    batch_size = cfg.BATCH_SIZE
    learning_rate = cfg.LEARNING_RATE

    # my_listener = Listener(40, 256, 0.0)
    # my_speller  = Speller(33, 512, 512, 256, 3)

    if isfile("../weights/listener.pt"):
        with open("../weights/listener.pt", 'rb') as fl:
            my_listener = torch.load(fl)
        with open("../weights/speller.pt", 'rb') as fs:
            my_speller = torch.load(fs)
        print("model loading completed.")
    else:
        my_listener = Listener(40, 256, 0.0)
        my_speller = Speller(33, 512, 512, 256, 3)

    loss_fn = torch.nn.CrossEntropyLoss(reduce=False)
    my_optimizer = torch.optim.Adam([{
        'params': my_speller.parameters()
    }, {
        'params': my_listener.parameters()
    }],
                                    lr=cfg.LEARNING_RATE)

    start_index = 0
    for epoch in range(cfg.EPOCH):
        losses = 0.0
        start_index = 0
        while (start_index + batch_size <= len(data)):
            batch_data = data[shuffle_index[start_index:start_index +
                                            batch_size]]
            batch_labels = labels[shuffle_index[start_index:start_index +
                                                batch_size]]
            batch_data, batch_labels, batch_lengths, batch_label_lengths = preprocess(
                batch_data, batch_labels)
            one_hot_batch_labels = OneHot(batch_labels, 33)
            listener_output = my_listener(batch_data, batch_lengths)

            speller_output = my_speller(batch_labels.size(1), listener_output,
                                        one_hot_batch_labels)

            batch_loss = loss_fn(
                speller_output[0].contiguous().view(-1, 33),
                torch.autograd.Variable(batch_labels).view(-1, ))
            batch_loss = batch_loss.view(speller_output[0].size(0),
                                         speller_output[0].size(1))
            mask = torch.zeros(batch_loss.size())
            for i in range(batch_label_lengths.size(0)):
                mask[i, :batch_label_lengths[i]] = 1.0
            batch_loss = torch.mul(batch_loss, torch.autograd.Variable(mask))
            batch_loss = torch.sum(batch_loss) / torch.sum(mask)
            print("epoch {} batch_loss == {:.5f}".format(
                epoch, batch_loss.data[0]))
            batch_loss.backward()
            losses += batch_loss.data.cpu().numpy()
            my_optimizer.step()

            start_index += batch_size
            # break
        if (epoch % 3 == 0):
            with open("../weights/listener.pt", 'wb') as fl:
                torch.save(my_listener, fl)
            with open("../weights/speller.pt", 'wb') as fs:
                torch.save(my_speller, fs)
Exemple #10
0
def cross_validation(k, X, y, params, regression):
    """
    Performing regression using K-Cross Validation.

    This function is used to generate a model, given data, a regression function
    and a set of parameters.

    Args:
        k (int): k for cross validation
        X (nd.array): training samples of form N x D
        y (nd.array): training samples of form N
        params (dict): dictionary of training samples
        regression (function): regression function

    Returns:
        float: mean loss on validation datasets
        float: mean accuracy on validation datasets

    Raise:
        ValueError: if the regression function raises an error
    """

    # Cross-validation
    k_indices = build_k_indices(y, k)
    accuracies = []
    losses = []

    # print(f"(max_iters: {params['max_iters']}, gamma: {params['gamma']}, lambda: {params['lambda_']})")
    # each iteration for each split of training and validation
    for k_iteration in range(k):
        # split the data accordingly into training and validation
        X_train, Y_train, X_val, Y_val = cross_validation_iter(
            y, X, k_indices, k_iteration)
        # initial weights
        W_init = np.random.rand(D, )
        # initialize dictionary for the training regression model
        args_train = {
            "tx": X_train,
            "y": Y_train,
            "initial_w": W_init,
            "max_iters": params["max_iters"],
            "gamma": params["gamma"],
            "lambda_": params["lambda_"]
        }
        # try to train the model, if this doesnt work, raise an error
        try:
            W, loss_tr = regression(**args_train)
        except ValueError:
            print("Regression diverged with these parameters.")
            return None, None

        if "Logistic" in f_name:
            prediction_val_regression = sigmoid(X_val @ W)
        else:
            prediction_val_regression = X_val @ W
        # calculate prediction for the validation dataset
        prediction_val = create_labels(prediction_val_regression)
        # calculate corresponding loss and accuracy
        loss_val = calculate_mse_loss(Y_val, prediction_val)
        acc_val = calculate_acc(Y_val, prediction_val)
        losses.append(loss_val)
        accuracies.append(acc_val)
    # finally, generate the means
    mean_loss_val = np.array(losses).mean()
    mean_acc_val = np.array(accuracies).mean()

    return mean_loss_val, mean_acc_val