Beispiel #1
0
def sat4_mc_test(label, num_corruptions, channel):
    """ Generates a dataset from single data sample corrupted multiple times."""

    inFilename = './datasets/sat4-sat6/sat4_scaled_ZCA_retain_False_epsilon_0.001_rastered.npz'
    train_set, _, _ = load_data_npz(inFilename)

    # take first example of given label
    sample = train_set[0][train_set[1] == label][0]
    print 'taking sample shape as:{}'.format(sample.shape)

    corruptions = np.zeros((num_corruptions + 1, sample.shape[0],
                            sample.shape[1], sample.shape[2]))
    scaling = 1.0
    for i in xrange(num_corruptions + 1):
        print scaling
        temp = sample
        temp[channel] = temp[channel] * scaling
        corruptions[i] = temp
        scaling -= 0.1

    print 'dataset shape is:{}'.format(corruptions.shape)
    outFilename = './datasets/sat4-sat6/sat4_label_{0}_corrupted_{1}_channel_{2}.npz'.format(
        label, num_corruptions, channel)
    with open(outFilename, 'wb') as f:
        np.savez(f, X_test=corruptions)
    f.close()
def main(argv):

    data_file = None
    results_dir = None

    try:
        opts, args = getopt.getopt(argv, "hd:r:",
                                   ["data_file=", "results_dir="])
    except getopt.GetoptError:
        print 'incorrect usage'
        print 'usage: softmax.py -d <data_file> -r <results_dir>'
        sys.exit(2)

    for opt, arg in opts:
        if opt == "-h":
            print 'usage: softmax.py -d <data_file> -r <results_dir>'

        elif opt in ("-d", "--data_file"):
            data_file = arg
        elif opt in ("-r", "--results__dir"):
            results_dir = arg

    if data_file is None:
        print 'data_file was not given'
        print 'usage: softmax.py -d <data_file> -r <results_dir>'
        sys.exit(2)

    try:
        train_set, validation_set, test_set = load_data_npz(data_file)
    except IOError:
        print 'cannot open data_file', data_file

    X_train = train_set[0]
    X_train = toShared_x(X_train)
    X_test = test_set[0]
    X_test = toShared_x(X_test)

    nChannels = 4
    dimHidden = 100
    epochs = 15
    miniBatchSize = 20
    alpha = 0.001
    nChains = 20
    n_plotting_samples = 10
    k = 15
    persistent_bool = True

    apply_rbm_sgd(X_train, X_test, nChannels, dimHidden, epochs, miniBatchSize,
                  alpha, nChains, n_plotting_samples, persistent_bool, k,
                  results_dir)
def plot_channel_hists(filename,
                       channel,
                       class_labels,
                       class_label_descriptions,
                       zorders,
                       colors,
                       scale_to_unit_interval=False,
                       rastered_input=False):
    """ plots histograms for each class in class_labels for channel channel """

    train_set, _, _ = load_data_npz(filename)
    X_train = train_set[0]
    y_train = train_set[1]
    assert len(class_labels) == len(class_label_descriptions) == len(
        zorders) == len(colors)
    assert max(class_labels) <= y_train.max() and min(
        class_labels) >= y_train.min()

    if scale_to_unit_interval:
        X_train = unit_scale(X_train)
    if rastered_input:
        # X_train must be of shape [nSamples, nChannels, rows, cols]
        assert len(X_train.shape) == 4
        spectral_intensities = np.squeeze(
            np.apply_over_axes(np.mean, X_train,
                               [2, 3]))  # average over raster
        xlabel = 'Average channel intensity (normalised)'

    else:
        spectral_intensities = X_train
        xlabel = 'Channel intensity (normalised)'

    plt.figure()
    for label, description, zorder, color in zip(class_labels,
                                                 class_label_descriptions,
                                                 zorders, colors):
        plt.hist(spectral_intensities[y_train == label, channel],
                 bins=100,
                 label=description,
                 zorder=zorder,
                 color=color)
    plt.xlim(0.0, 1.0)
    plt.xlabel(xlabel, fontsize=14)
    plt.ylabel('number of samples', fontsize=14)
    plt.legend()
    plt.show()
Beispiel #4
0
def main(argv):

    data_file = None
    results_dir = None

    try:
        opts, args = getopt.getopt(argv, "hd:r:",
                                   ["data_file=", "results_dir="])
    except getopt.GetoptError:
        print 'incorrect usage'
        print 'usage: softmax.py -d <data_file> -r <results_dir>'
        sys.exit(2)

    for opt, arg in opts:
        if opt == "-h":
            print 'usage: softmax.py -d <data_file> -r <results_dir>'

        elif opt in ("-d", "--data_file"):
            data_file = arg
        elif opt in ("-r", "--results__dir"):
            results_dir = arg

    if data_file is None:
        print 'data_file was not given'
        print 'usage: softmax.py -d <data_file> -r <results_dir>'
        sys.exit(2)

    try:
        train_set, validation_set, test_set = load_data_npz(data_file)
    except IOError:
        print 'cannot open data_file', data_file

    X = train_set[0]
    X = toShared_x(X)

    nChannels = 4
    dimHidden = 100
    epochs = 15
    miniBatchSize = 20
    alpha = 0.001
    momentum = None

    apply_dA_sgd(X, nChannels, dimHidden, epochs, miniBatchSize, alpha,
                 results_dir, momentum)
def plot_covariance(filename, labels, rastered=False):

    train_set, _, _ = load_data_npz(filename)
    X_train = train_set[0].astype(float)

    if rastered:
        X_train = train_set[0].swapaxes(3, 1).reshape(-1, 4)

    assert len(labels) == X_train.shape[1]

    cov = np.dot(np.transpose(X_train), X_train) / X_train.shape[0]

    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(cov, interpolation='nearest')
    fig.colorbar(cax)

    ax.set_xticklabels([''] + labels)
    ax.set_yticklabels([''] + labels)

    plt.show()
def plot_spec_profile(filename,
                      class_label,
                      channel_descriptions,
                      zorders,
                      colors,
                      scale_to_unit_interval=False,
                      rastered_input=False):
    """ plots spectral profile for class class_label """

    train_set, _, _ = load_data_npz(filename)
    X_train = train_set[0]
    y_train = train_set[1]
    assert len(zorders) == len(colors)

    if scale_to_unit_interval:
        X_train = unit_scale(X_train)
    if rastered_input:
        # X_train must be of shape [nSamples, nChannels, rows, cols]
        assert len(X_train.shape) == 4
        spectral_intensities = np.squeeze(
            np.apply_over_axes(np.mean, X_train,
                               [2, 3]))  # average over raster
        xlabel = 'Average channel intensity (normalised)'

    else:
        spectral_intensities = X_train
        xlabel = 'Channel intensity (normalised)'

    plt.figure()
    for channel in xrange(spectral_intensities.shape[1]):
        plt.hist(spectral_intensities[y_train == class_label, channel],
                 bins=100,
                 label=channel_descriptions[channel],
                 zorder=zorders[channel],
                 color=colors[channel])
    plt.xlabel(xlabel, fontsize=14)
    plt.ylabel('number of samples', fontsize=14)
    plt.legend()
    plt.show()
Beispiel #7
0
def main(argv):

    data_file = None
    results_dir = None
    model_file = None

    try:
        opts, args = getopt.getopt(
            argv, "hd:r:m:", ["data_file=", "results_dir=", "model_file="])
    except getopt.GetoptError:
        print 'incorrect usage'
        print 'usage1: softmax.py -d <data_file> -r <results_dir>'
        print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
        sys.exit(2)

    for opt, arg in opts:
        if opt == "-h":
            print 'usage1: softmax.py -d <data_file> -r <results_dir>'
            print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'

        elif opt in ("-d", "--data_file"):
            data_file = arg
        elif opt in ("-r", "--results__dir"):
            results_dir = arg
        elif opt in ("-m", "--model_file"):
            model_file = arg

    if data_file is None:
        print 'data_file was not given'
        print 'usage1: softmax.py -d <data_file> -r <results_dir>'
        print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
        sys.exit(2)

    try:
        print 'loading in data...'
        train_set, validation_set, test_set = load_data_npz(data_file)
    except IOError:
        print 'cannot open data_file', data_file

    # unit scale if using cross entropy cost and not scaled already
    #print 'unit scaling data'
    #train_set[0], validation_set[0], test_set[0] = unit_scale([train_set[0],validation_set[0],test_set[0]])

    train_set, validation_set, test_set = [
        toShared_xy(train_set),
        toShared_xy(validation_set),
        toShared_xy(test_set)
    ]

    nChannels = 4
    corruptionLevels = [0.1, 0.2, 0.3]
    dropout_rates = [0.0, 0.0, 0.0, 0.0]
    dimHiddenLayers = [100, 100, 100]
    nLabels = 4
    sgd_opts = {
        'epochs_pre': 1,
        'min_epochs': 10,
        'max_epochs': 10,
        'alpha_pre': 0.001,
        'alpha_init': 0.1,
        'gamma': 0.0001,
        'p': 0.75,
        'monitor_frequency': 1000
    }
    momentum = 0.9
    miniBatchSize = 20
    monitoring_to_file = True

    if results_dir is not None:
        # train a new model
        print 'setting up model...'
        apply_sdA_sgd(train_set, validation_set, test_set, nChannels,
                      dimHiddenLayers, nLabels, miniBatchSize, sgd_opts,
                      dropout_rates, corruptionLevels, results_dir, momentum,
                      monitoring_to_file)

    elif model_file is not None:
        # test a saved model
        # this option will be ignored if results_dir is not None
        print 'testing saved model...'
        test_saved_model(test_set, nLabels, model_file)
    else:
        print 'exactly 2 arguments must be passed on command line'
        print 'usage1: softmax.py -d <data_file> -r <results_dir>'
        print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
def main(argv):

    data_file = None
    results_dir = None
    model_file = None

    try:
        opts, args = getopt.getopt(argv, "hd:r:m:",
                                   ["data_file=", "results_dir="])
    except getopt.GetoptError:
        print 'incorrect usage'
        print 'usage: mlp.py -d <data_file> -r <results_dir>'
        sys.exit(2)

    for opt, arg in opts:
        if opt == "-h":
            print 'usage1: mlp.py -d <data_file> -r <results_dir>'
        elif opt in ("-d", "--data_file"):
            data_file = arg
        elif opt in ("-r", "--results__dir"):
            results_dir = arg
        elif opt in ("-m", "--model_file"):
            model_file = arg

    if data_file is None:
        print 'data_file was not given'
        print 'usage1: mlp.py -d <data_file> -r <results_dir>'
        print 'usage2: mlp.py -d <data_file> -m <saved_model_file>'
        sys.exit(2)

    if results_dir is not None:

        # train a new model

        dimHiddenLayers = [100, 100, 100]
        nLabels = 4
        miniBatchSize = 20
        sgd_opts = {
            'min_epochs': 10,
            'max_epochs': 10,
            'alpha_init': 0.1,
            'gamma': 0.0001,
            'p': 0.75,
            'monitor_frequency': 1000
        }
        lmbda = 0.0
        dropout_rates = [0.0, 0.0, 0.0, 0.5]
        activations = [relu, relu, relu]
        momentum = 0.9
        monitoring_to_file = True

        label_descriptions = [
            'rapeseed', 'water', 'built up', 'bare soil', 'wheat', 'grass',
            'clouds', 'cloud shadows'
        ]

        try:
            print 'loading in data...'
            data = load_data_npz(data_file)
            #plot_bar(np.bincount(train_set[1]), xlabel='landcover class', ylabel='number of samples', label_descriptions=label_descriptions)

            # landsat 2 remove class that corresponds to border
            # for i in xrange(len(data)):
            #     keep = data[i][1]!=13
            #     data[i][0]=data[i][0][keep]
            #     data[i][1]=data[i][1][keep]

            #     data[i][1][data[i][1]==14]=13
            #     data[i][1][data[i][1]==15]=14
            #     data[i][1][data[i][1]==16]=15
            #     print np.unique(data[i][1])

            train_set, validation_set, test_set = data

        except IOError:
            print 'cannot open data_file', data_file

        train_set, validation_set, test_set = [
            toShared_xy(train_set),
            toShared_xy(validation_set),
            toShared_xy(test_set)
        ]
        print 'setting up model...'
        apply_mlp_sgd(train_set, validation_set, test_set, dimHiddenLayers,
                      nLabels, miniBatchSize, sgd_opts, lmbda, dropout_rates,
                      activations, results_dir, momentum, monitoring_to_file)

    elif model_file is not None:

        # Test a saved model.

        # rapideye
        imageShape = (5000, 5000)
        zoomParams = {
            'zoom': 10,
            'x1': 2400,
            'x2': 2600,
            'y1': 2400,
            'y2': 2600
        }  # origin top left
        label_descriptions = [
            'rapeseed', 'water', 'built up', 'other', 'wheat', 'grass',
            'clouds', 'cloud shadows'
        ]

        #landsat2
        # imageShape = (8191,8081)
        # zoomParams = {'zoom':30, 'x1':5100, 'x2':5200,
        #               'y1':3100, 'y2':3200} # origin top left
        # label_descriptions = ['rapeseed', 'wheat', 'grass 1', 'grass 2', 'grass 3', 'built up 1', 'built up 2', 'fallow', 'bare soil 1', 'barley', 'built up 3', 'potatoes', 'shallow water', 'bare soil 2', 'clouds', 'cloud shadows']

        miniBatchSize = 1000

        try:
            test_set = load_data_npz(data_file, only_test=True)

            # landsat 2 remove class that corresponds to border
            # keep = test_set[1]!=13
            # test_set[0]=test_set[0][keep]
            # test_set[1]=test_set[1][keep]

            # test_set[1][test_set[1]==14]=13
            # test_set[1][test_set[1]==15]=14
            # test_set[1][test_set[1]==16]=15
            # print np.unique(test_set[1])

        except IOError:
            print 'cannot open data_file', data_file

        X_test, y_test = toShared_xy(test_set)
        print 'testing saved model...'
        test_saved_model(X_test, model_file, label_descriptions, imageShape,
                         miniBatchSize, y_test, zoomParams)

    else:
        print 'exactly 2 arguments must be passed on command line'
        print 'usage1: mlp.py -d <data_file> -r <results_dir>'
        print 'usage2: mlp.py -d <data_file> -m <saved_model_file>'
Beispiel #9
0
def main(argv):

    data_file = None
    results_dir = None
    model_file = None

    try:
        opts, args = getopt.getopt(
            argv, "hd:r:m:",
            ["data_file=", "results_dir=", "model_file="])
        
    except getopt.GetoptError:
        print 'incorrect usage'
        print 'usage1: softmax.py -d <data_file> -r <results_dir>'
        print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
        sys.exit(2)
        
    for opt, arg in opts:
        if opt=="-h":
            print 'usage1: softmax.py -d <data_file> -r <results_dir>'
            print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
        elif opt in ("-d", "--data_file"):
            data_file = arg
        elif opt in ("-r", "--results__dir"):
            results_dir = arg
        elif opt in ("-m", "--model_file"):
            model_file = arg
            
    if data_file is None:
        print 'data_file was not given'
        print 'usage1: softmax.py -d <data_file> -r <results_dir>'
        print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
        sys.exit(2)

    if results_dir is not None:

        # Train a new model.
    
        nLabels=10
        sgd_opts = {'min_epochs':10, 'max_epochs':10, 'alpha_init':0.1, 
                'gamma':0.0001, 'p':0.75}
        miniBatchSize = 600
        lmbda = 0.0

        try:
            train_set, validation_set, test_set = load_data_pickled(data_file)
        
        except IOError:
            print 'cannot open data_file', data_file

        train_set, validation_set, test_set = [toShared_xy(train_set),
                                               toShared_xy(validation_set),
                                               toShared_xy(test_set)]    
        apply_softmax_sgd(
            train_set, validation_set, test_set, nLabels,
            miniBatchSize, sgd_opts, lmbda, results_dir)
        
    elif model_file is not None:

        # Test a saved model.

        nLabels = 4
        try:
            _, _, test_set = load_data_npz(data_file)
        except IOError:
            print 'cannot open data_file', data_file

        test_set = toShared_xy(test_set)    
        test_saved_model(test_set, nLabels, model_file)
        
    else:
        print 'exactly 2 arguments must be passed on command line'
        print 'usage1: softmax.py -d <data_file> -r <results_dir>'
        print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
Beispiel #10
0
def main(argv):

    data_file = None
    results_dir = None
    model_file = None

    try:
        opts, args = getopt.getopt(
            argv, "hd:r:m:", ["data_file=", "results_dir=", "model_file="])
    except getopt.GetoptError:
        print 'incorrect usage'
        print 'usage1: softmax.py -d <data_file> -r <results_dir>'
        print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
        sys.exit(2)

    for opt, arg in opts:
        if opt == "-h":
            print 'usage1: softmax.py -d <data_file> -r <results_dir>'
            print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'

        elif opt in ("-d", "--data_file"):
            data_file = arg
        elif opt in ("-r", "--results__dir"):
            results_dir = arg
        elif opt in ("-m", "--model_file"):
            model_file = arg

    if data_file is None:
        print 'data_file was not given'
        print 'usage1: softmax.py -d <data_file> -r <results_dir>'
        print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
        sys.exit(2)

    try:
        print 'loading in data...'
        train_set, validation_set, test_set = load_data_npz(data_file)
    except IOError:
        print 'cannot open data_file', data_file

    train_set, validation_set, test_set = [
        toShared_xy(train_set),
        toShared_xy(validation_set),
        toShared_xy(test_set)
    ]
    nChannels = 4
    dimHiddenLayers = [1000, 1000, 1000]
    nLabels = 4
    sgd_opts = {
        'epochs_pre': 15,
        'min_epochs': 15,
        'max_epochs': 15,
        'alpha_pre': 0.001,
        'alpha_init': 0.1,
        'gamma': 0.0001,
        'p': 0.75
    }
    miniBatchSize = 20
    persistent_bool = True
    k = 1

    if results_dir is not None:
        # train a new model
        print 'setting up model...'
        apply_dbn_sgd(train_set, validation_set, test_set, nChannels,
                      dimHiddenLayers, nLabels, miniBatchSize, sgd_opts,
                      persistent_bool, k, results_dir)
    elif model_file is not None:
        # test a saved model
        # this option will be ignored if results_dir is not None
        print 'testing saved model...'
        test_saved_model(test_set, nLabels, model_file)
    else:
        print 'exactly 2 arguments must be passed on command line'
        print 'usage1: softmax.py -d <data_file> -r <results_dir>'
        print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
Beispiel #11
0
def main(argv):

    data_file = None
    results_dir = None
    model_file = None

    try:
        opts, args = getopt.getopt(
            argv, "hd:r:m:",
            ["data_file=", "results_dir=", "model_file="])

    except getopt.GetoptError:
        print 'incorrect usage'
        print 'usage1: softmax.py -d <data_file> -r <results_dir>'
        print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
        sys.exit(2)
        
    for opt, arg in opts:
        if opt=="-h":
            print 'usage1: softmax.py -d <data_file> -r <results_dir>'
            print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
        elif opt in ("-d", "--data_file"):
            data_file = arg
        elif opt in ("-r", "--results__dir"):
            results_dir = arg
        elif opt in ("-m", "--model_file"):
            model_file = arg
            
    if data_file is None:
        print 'data_file was not given'
        print 'usage1: softmax.py -d <data_file> -r <results_dir>'
        print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'
        sys.exit(2)
    
    if results_dir is not None:
        
        # Train a new model.

        numFilters = [6, 12]
        nLabels = 4
        dimHiddenSig = 128
        filterDim = 5
        poolShape = (3,3)
        sgd_opts = {'min_epochs':50, 'max_epochs':50, 'alpha_init':0.01,
                    'gamma':0.0001, 'p':0.75, 'monitor_frequency':1000}
        miniBatchSize = 20
        lmbda = 0.0
        dropout_rates = [0.0, 0.0, 0.0, 0.0, 0.0]
        activations = [relu, relu, relu, relu]
        momentum = 0.9
        mc_samples = None
        monitoring_to_file = True

        try:
            print 'loading in data...'
            train_set, validation_set, test_set = load_data_npz(data_file)

        except IOError:
            print 'cannot open data_file', data_file


        train_set, validation_set, test_set = [toShared_xy(train_set),
                                               toShared_xy(validation_set),
                                               toShared_xy(test_set)]
        print 'setting up model...'
        apply_LeNet_sgd(
            train_set, validation_set, test_set, numFilters, nLabels,
            dimHiddenSig, filterDim, poolShape, miniBatchSize, sgd_opts,
            lmbda, dropout_rates, activations, results_dir, momentum,
            mc_samples, monitoring_to_file)
    
    elif model_file is not None:

        # Test a saved model.

        nLabels = 4
        plot_shape = (100,100)
        nSamples = 10000
        label_descriptions=['barren', 'trees', 'grassland', 'other']
        
        try:
            test_set = load_data_npz(data_file, only_test=True)
        except IOError:
            print 'cannot open data_file', data_file

        X_test, y_test = toShared_x(test_set)
        print 'testing saved model...'
        test_saved_LeNet(X_test, nLabels, model_file,
                         label_descriptions, plot_shape,
                         mc_dropout=True)
        
    else:
        print 'exactly 2 arguments must be passed on command line'
        print 'usage1: softmax.py -d <data_file> -r <results_dir>'
        print 'usage2: softmax.py -d <data_file> -m <saved_model_file>'