Beispiel #1
0
def save_images(image_links, query):
    count = 0
    l = len(image_links)
    ppb(0, l, prefix='Progress:', suffix='Complete', length=50)
    for i, (url, type) in enumerate(image_links.items()):
        if not type:
            type = 'jpg'
        try:
            filename = f"{query.strip()}_{count}.{type}"
            urllib.request.urlretrieve(url, f"images/{query.replace(' ', '_')}/{filename}")
        except:
            pass
        ppb(i + 1, l, prefix='Progress:', suffix='Complete', length=50)
        count+=1
def reset_images(image_dest, image_base):

    # delete all images in the image_dest folder -------------------------------------------------------------------------------
    filenames = [f for f in listdir(image_dest) if isfile(join(image_dest, f))]
    if (len(filenames) != 0):
        ppb(0,
            len(filenames),
            prefix='Deleting current dataset:  ',
            suffix='Complete')  # print progress
        for i in range(len(filenames)):
            os.remove(join(image_dest, filenames[i]))
            ppb(i + 1,
                len(filenames),
                prefix='Deleting current dataset:  ',
                suffix='Complete')  # print progress

    # copy all images from image_base to image_dest ----------------------------------------------------------------------------
    filenames_bu = [
        f for f in listdir(image_base) if isfile(join(image_base, f))
    ]

    ppb(0,
        len(filenames_bu),
        prefix='Copying original images:   ',
        suffix='Complete')  # print progress
    for i in range(len(filenames_bu)):
        copyfile(join(image_base, filenames_bu[i]),
                 join(image_dest, filenames_bu[i]))
        ppb(i + 1,
            len(filenames_bu),
            prefix='Copying original images:   ',
            suffix='Complete')  # print progress
def train_and_predict_with_best_CNN(img_folder, base_folder, image_width, x_new, y_raw, test_pct, n_cats, n_iterations):

    filepaths = [img_folder+'/'+f for f in listdir(img_folder) if isfile(join(img_folder, f))] # get prediction image paths

    # use functions from dataset.py to preprocess new prediction images --------------------------------------------------------
    d.reset_images(img_folder, base_folder)                         # delete preprocessed images & recopy
    d.resize_images(img_folder, image_width)                       # resize images for prediction
    x_tuple = []                                                  #
    for fp in filepaths:                                         # get pixel data from every image
        x_tuple.append(list(Image.open(fp, 'r').getdata()))     #
    image_data = d.get_lists_from_tuples(x_tuple, image_width) # format pixel data properly for prediction
    image_data = np.array(image_data).astype('float32') / 255 # turn image_data into array and normalize

    # awdawwdaa wd
    x_train, y_train, x_test, y_test = split_format_CNN(x_new, y_raw, test_pct, n_cats) # format training data for CNN
    cnn_params, num_params = get_CNN_params([2])
    cnn_params = cnn_params[2][0]

    n = 0 # count required to print progress bar=
    ppb(0, n_iterations, prefix = 'Training and testing CNNs: ', suffix = 'Complete', length = 100) # print initial progress bar

    best_acc = 0
    results = {}
    iteration_accs  = [] # stores accuracy results for each iteration
    iteration_times = [] # stores total execution times for each iteration

    # run all classifiers for given number of iterations -----------------------------------------------------------
    for j in range(n_iterations):

        model, time = train_CNN(x_train, y_train, cnn_params, 2, 5) # store model and time elapsed
        acc = model.evaluate(x_test, y_test)[1] # evaluate using test data

        iteration_accs.append(acc)    # store accuracy of current iteration in results list
        iteration_times.append(time) # store execution time of current iteration in times list

        if (acc > best_acc):
            best_cnn = model
            best_acc = acc

        n+=1 # progress bar stuff
        ppb(n, n_iterations, prefix = 'Training and testing CNNs: ', suffix = 'Complete', length = 100)

    results['avg_accuracy'] = np.mean(iteration_accs) # add accuracy key/value pair to dictionary
    results['avg_time'] = np.mean(iteration_times)   # add tune key/value pair to dictionary

    predictions = np.argmax(best_cnn.predict(image_data), axis=-1) # predict the image's class

    return filepaths, results, predictions
def run_CNN_suite(x_raw, y_raw, test_pct, n_cats, iterations, phases, n_phases):

    all_params_and_results = []
    x_train, y_train, x_test, y_test = split_format_CNN(x_raw, y_raw, test_pct, n_cats) # format training data for CNN
    cnn_params, num_params = get_CNN_params(phases)

    n = 0 # count required to print progress bar
    l = len(phases) * num_params * iterations # number of times an CNN is trained in total
    ppb(0, l, prefix = 'Training and testing CNNs: ', suffix = 'Complete', length = 100) # print initial progress bar


    # loop through every phase from 0-4 ----------------------------------------------------------------------------------------
    # all_params_and_results appends a dict of parameters and results if phase is in given list, otherwise appends none
    for phase in range(n_phases):

        if phase in phases: # if the current phase (0-4) is one of the phases provided in main, train MLP accordingly ----------

            phase_params_and_results = [] # will store the parameters and results dictionary associated with current phase

            # for every parameter value, run all 34 1-vs-all classifiers "iterations" times and average results ----------------
            for i in range(len(cnn_params[phase])):

                iteration_accs  = [] # stores accuracy results for each iteration
                iteration_times = [] # stores total execution times for each iteration

                # run all classifiers for given number of iterations -----------------------------------------------------------
                for j in range(iterations):

                    model, time = train_CNN(x_train, y_train, cnn_params[phase][i], phase) # store model and time elapsed
                    acc = model.evaluate(x_test, y_test)[1] # evaluate using test data

                    iteration_accs.append(acc)    # store accuracy of current iteration in results list
                    iteration_times.append(time) # store execution time of current iteration in times list

                    n+=1 # progress bar stuff
                    ppb(n, l, prefix = 'Training and testing CNNs: ', suffix = 'Complete', length = 100)

                cnn_params[phase][i]['avg_accuracy'] = np.mean(iteration_accs) # add accuracy key/value pair to dictionary
                cnn_params[phase][i]['avg_time'] = np.mean(iteration_times)   # add tune key/value pair to dictionary
                phase_params_and_results.append(cnn_params[phase][i])        # update results of current phase

            all_params_and_results.append(phase_params_and_results) # add all phase results to list of all results

        else:
            all_params_and_results.append(None)

    return all_params_and_results
def resize_images(image_dest, nw):
    filenames = [f for f in listdir(image_dest) if isfile(join(image_dest, f))]
    ppb(0,
        len(filenames),
        prefix='Resizing multiplied images:',
        suffix='Complete')  # print progress

    for i in range(len(filenames)):
        im = Image.open(join(image_dest, filenames[i]))  # open the image
        # round(im.size[1]/round(im.size[0]/nw))
        resized_im = im.resize(
            (nw, nw))  # resize every image to "nw" pixels wide & tall
        resized_im.save(join(image_dest,
                             filenames[i]))  # save the cropped image
        ppb(i + 1,
            len(filenames),
            prefix='Resizing multiplied images:',
            suffix='Complete')  # print progress
def multiply_images(img_folder):
    p = 1
    ppb(0,
        l,
        prefix='Creating filtered images:  ',
        suffix='Complete',
        length=100)  # print initial progress bar

    # For each of the source images (now 1 per COA), create 3 copies with different levels of contrast -------------------------
    filenames = [f for f in listdir(img_folder) if isfile(join(img_folder, f))]
    for fn in filenames:
        create_new_contrast(img_folder + '/' + fn, 0.2, "-ct",
                            p)  # filename-ct.png: ct = contrast tiny
        p += 1
        create_new_contrast(img_folder + '/' + fn, 0.4, "-cl",
                            p)  # filename-cl.png: cl = contrast low
        p += 1
        create_new_contrast(img_folder + '/' + fn, 1.5, "-ch",
                            p)  # filename-ch.png: ch = contrast high
        p += 1

    # For each of the source images (now 4 per COA), create a copy that is blurry ----------------------------------------------
    filenames = [
        f for f in listdir(img_folder) if isfile(join(img_folder, f))
    ]  # update filenames list
    for fn in filenames:
        create_new_blur(img_folder + '/' + fn, '-b',
                        p)  # filename-cl-b.png: b = blurred
        p += 1

    # For each of the source images (now 8 per COA), create 3  copies with different levels of exposure ------------------------
    filenames = [
        f for f in listdir(img_folder) if isfile(join(img_folder, f))
    ]  # update filenames list
    for fn in filenames:
        create_new_exposure(img_folder + '/' + fn, 0.2, '-et',
                            p)  # filename-cl-bb-et.png: et = exposure tiny
        p += 1
        create_new_exposure(img_folder + '/' + fn, 0.5, '-el',
                            p)  # filename-cl-bb-el.png: el = exposure low
        p += 1
        create_new_exposure(img_folder + '/' + fn, 1.5, '-eh',
                            p)  # filename-cl-bb-eh.png: eh = exposure high
        p += 1
def run_CNN_suite_layers(x_raw, y_raw, test_pct, n_cats, iterations, nns):

    all_params_and_results = []
    x_train, y_train, x_test, y_test = split_format_CNN(x_raw, y_raw, test_pct, n_cats) # format training data for CNN
    cnn_params, num_params = get_CNN_params([2])
    cnn_params = cnn_params[2][0]

    n = 0 # count required to print progress bar
    l = len(nns) * iterations # number of times an CNN is trained in total
    ppb(0, l, prefix = 'Training and testing CNNs: ', suffix = 'Complete', length = 100) # print initial progress bar

    nn_results = []
    for nn in nns:
        nn_results.append(cnn_params.copy())

    # loop through every phase from 0-4 ----------------------------------------------------------------------------------------
    # all_params_and_results appends a dict of parameters and results if phase is in given list, otherwise appends none
    for nn in nns:

        iteration_accs  = [] # stores accuracy results for each iteration
        iteration_times = [] # stores total execution times for each iteration

        # run all classifiers for given number of iterations -----------------------------------------------------------
        for j in range(iterations):

            model, time = train_CNN(x_train, y_train, cnn_params, 2, nn) # store model and time elapsed
            acc = model.evaluate(x_test, y_test)[1] # evaluate using test data

            iteration_accs.append(acc)    # store accuracy of current iteration in results list
            iteration_times.append(time) # store execution time of current iteration in times list

            n+=1 # progress bar stuff
            ppb(n, l, prefix = 'Training and testing CNNs: ', suffix = 'Complete', length = 100)

        nn_results[nn]['avg_accuracy'] = np.mean(iteration_accs) # add accuracy key/value pair to dictionary
        nn_results[nn]['avg_time'] = np.mean(iteration_times)   # add tune key/value pair to dictionary

        all_params_and_results.append(nn_results[nn]) # add all phase results to list of all results

    return all_params_and_results
def split_filename(filename, p):
    ppb(p, l, prefix='Creating filtered images:  ',
        suffix='Complete')  # update progress bar
    name, ext = os.path.splitext(filename)  # split filename
    return name, ext  # return both
Beispiel #9
0
def train_and_predict_with_best_MLP(img_folder, base_folder, image_width,
                                    x_new, y_raw, test_pct, n_cats,
                                    n_iterations):

    filepaths = [
        img_folder + '/' + f for f in listdir(img_folder)
        if isfile(join(img_folder, f))
    ]

    # use functions from dataset.py to preprocess new prediction images --------------------------------------------------------
    d.reset_images(img_folder,
                   base_folder)  # delete preprocessed images & recopy
    d.resize_images(img_folder, image_width)  # resize images for prediction
    x_tuple = []  #
    for fp in filepaths:  # get pixel data from every image
        x_tuple.append(list(Image.open(fp, 'r').getdata()))  #
    image_data = d.get_lists_from_tuples(
        x_tuple, image_width)  # format pixel data properly for prediction
    image_data = np.array(image_data).astype(
        'float32') / 255  # turn image_data into array and normalize

    # flatten image data for use with MLP prediction
    flat_image_data = []
    for i in range(len(image_data)):
        flat_image = []
        for j in range(len(image_data[i])):
            for k in range(len(image_data[i][j])):
                for l in range(len(image_data[i][j][k])):
                    flat_image.append(image_data[i][j][k][l])
        flat_image_data.append(flat_image)
    flat_image_data = np.array(flat_image_data)

    # awdawwdaa wd
    x_formatted, y_all_34 = format_MLP(
        x_new, y_raw, n_cats)  # format data to use 34 classifiers
    mlp_params, num_params = get_MLP_params([2])
    mlp_params = mlp_params[4][0]

    n = 0  # count required to print progress bar=
    l = n_iterations * n_cats
    ppb(0,
        l,
        prefix='Training and testing MLPs: ',
        suffix='Complete',
        length=100)  # print initial progress bar

    best_acc = 0
    results = {}
    iteration_accs = []  # stores accuracy results for each iteration
    iteration_times = []  # stores total execution times for each iteration

    # run all classifiers for given number of iterations -----------------------------------------------------------
    for j in range(n_iterations):

        it_acc = 0  # used to find mean accuracy of current iteration
        it_time = 0  # used to sum total execution time of current iteration
        models_34 = []

        # run all 34 1-vs-all classifiers -------------------------------------------------------------------------
        for y_train in y_all_34:

            x_train, y_train, x_test, y_test = split_MLP(
                x_formatted, y_train, test_pct)  # split data for MLP

            model, time = train_MLP(
                x_train, y_train,
                mlp_params)  # train model & store training time
            acc = model.score(x_test,
                              y_test)  # store accuracy of current model
            models_34.append(model)

            it_acc += acc / n_cats  # update accuracy of current iteration with weight 1/n_categories to find mean
            it_time += time  # update execution time of current iteration

            n += 1  # progress bar stuff
            ppb(n,
                l,
                prefix='Training and testing MLPs: ',
                suffix='Complete',
                length=100)

        iteration_accs.append(
            it_acc)  # store accuracy of current iteration in results list
        iteration_times.append(
            it_time)  # store execution time of current iteration in times list

        if (acc > best_acc):
            best_mlp = models_34
            best_acc = acc

    results['avg_accuracy'] = np.mean(
        iteration_accs)  # add accuracy key/value pair to dictionary
    results['avg_time'] = np.mean(
        iteration_times)  # add tune key/value pair to dictionary

    predictions = []
    for model in models_34:
        predictions.append(model.predict(flat_image_data))

    return filepaths, results, predictions
Beispiel #10
0
def run_MLP_suite(x_raw, y_raw, test_pct, n_cats, iterations, phases,
                  n_phases):

    all_params_and_results = []
    x_formatted, y_all_34 = format_MLP(
        x_raw, y_raw, n_cats)  # format data to use 34 classifiers
    mlp_params, num_params = get_MLP_params(phases)

    l = len(
        phases
    ) * num_params * n_cats * iterations  # number of times an MLP is trained in total
    ppb(0,
        l,
        prefix='Training and testing MLPs: ',
        suffix='Complete',
        length=100)  # print initial progress bar
    n = 0  # count required to print progress bar

    # loop through every phase from 0-4 ----------------------------------------------------------------------------------------
    # all_params_and_results appends a dict of parameters and results if phase is in given list, otherwise appends none
    for phase in range(n_phases):

        if phase in phases:  # if the current phase (0-4) is one of the phases provided in main, train MLP accordingly ----------

            phase_params_and_results = [
            ]  # will store the parameters and results dictionary associated with current phase

            # for every parameter value, run all 34 1-vs-all classifiers "iterations" times and average results ----------------
            for i in range(len(mlp_params[phase])):

                iteration_accs = [
                ]  # stores accuracy results for each iteration
                iteration_times = [
                ]  # stores total execution times for each iteration

                # run all classifiers for given number of iterations -----------------------------------------------------------
                for j in range(iterations):

                    it_acc = 0  # used to find mean accuracy of current iteration
                    it_time = 0  # used to sum total execution time of current iteration

                    # run all 34 1-vs-all classifiers -------------------------------------------------------------------------
                    for y_train in y_all_34:

                        x_train, y_train, x_test, y_test = split_MLP(
                            x_formatted, y_train,
                            test_pct)  # split data for MLP

                        model, time = train_MLP(
                            x_train, y_train, mlp_params[phase]
                            [i])  # train model & store training time
                        acc = model.score(
                            x_test, y_test)  # store accuracy of current model

                        it_acc += acc / n_cats  # update accuracy of current iteration with weight 1/n_categories to find mean
                        it_time += time  # update execution time of current iteration

                        n += 1  # progress bar stuff
                        ppb(n,
                            l,
                            prefix='Training and testing MLPs: ',
                            suffix='Complete',
                            length=100)

                    iteration_accs.append(
                        it_acc
                    )  # store accuracy of current iteration in results list
                    iteration_times.append(
                        it_time
                    )  # store execution time of current iteration in times list

                mlp_params[phase][i]['avg_accuracy'] = np.mean(
                    iteration_accs
                )  # add accuracy key/value pair to dictionary
                mlp_params[phase][i]['avg_time'] = np.mean(
                    iteration_times)  # add tune key/value pair to dictionary
                phase_params_and_results.append(
                    mlp_params[phase][i])  # update results of current phase

            all_params_and_results.append(
                phase_params_and_results
            )  # add all phase results to list of all results

        else:
            all_params_and_results.append(None)

    return all_params_and_results