コード例 #1
0
ファイル: evaluator.py プロジェクト: olavvatne/CNN
    def _build(self, batch_size, init):
        print_section('Building model')

        index = T.lscalar()  # index to a [mini]batch
        x = T.matrix('x')   # the data is presented as rasterized images
        y = T.imatrix('y') #label data

        #Drop switch. Only train should drop units. For testing and validation all units should be used (but output rescaled)
        drop = T.iscalar('drop')
        learning_rate = T.scalar('learning_rate', dtype=theano.config.floatX)
        mix_factor = T.scalar('factor', dtype=theano.config.floatX)

        self.model.build(x, drop, batch_size, init_params=init)
        errors = self.model.get_output_layer().errors(y)

        self.test_model = create_theano_func('test', self.data, x, y, drop, [index], errors, batch_size)
        self.validate_model = create_theano_func('validation', self.data, x, y, drop, [index], errors, batch_size)
        self.get_training_loss = create_theano_func(
            'train', self.data, x, y, drop, [index], errors, batch_size, prefix="_loss"
        )

        cost = self.model.get_cost(y, mix_factor) + (self.params.l2_reg * self.model.getL2())
        opt = Backpropagation.create(self.model.params)
        grads = T.grad(cost, self.model.params)
        updates = opt.updates(self.model.params, grads, learning_rate, self.params.momentum)

        self.train_model = create_theano_func(
            'train', self.data, x, y, drop, [index, learning_rate, mix_factor], cost, batch_size,
            updates=updates, dropping=True
        )

        self.tester = create_profiler_func(
            self.data, x, y, drop, [index, mix_factor], self.model.get_output_layer(), cost, batch_size
        )
コード例 #2
0
ファイル: evaluator.py プロジェクト: hexiangquan/CNN
    def _build(self, batch_size, init):
        print_section('Building model')

        index = T.lscalar()  # index to a [mini]batch
        x = T.matrix('x')  # the data is presented as rasterized images
        y = T.imatrix('y')  #label data

        #Drop switch. Only train should drop units. For testing and validation all units should be used (but output rescaled)
        drop = T.iscalar('drop')
        learning_rate = T.scalar('learning_rate', dtype=theano.config.floatX)
        mix_factor = T.scalar('factor', dtype=theano.config.floatX)

        self.model.build(x, drop, batch_size, init_params=init)
        errors = self.model.get_output_layer().errors(y)

        self.test_model = create_theano_func('test', self.data, x, y, drop,
                                             [index], errors, batch_size)
        self.validate_model = create_theano_func('validation', self.data, x, y,
                                                 drop, [index], errors,
                                                 batch_size)
        self.get_training_loss = create_theano_func('train',
                                                    self.data,
                                                    x,
                                                    y,
                                                    drop, [index],
                                                    errors,
                                                    batch_size,
                                                    prefix="_loss")

        cost = self.model.get_cost(
            y, mix_factor) + (self.params.l2_reg * self.model.getL2())
        opt = Backpropagation.create(self.model.params)
        grads = T.grad(cost, self.model.params)
        updates = opt.updates(self.model.params, grads, learning_rate,
                              self.params.momentum)

        self.train_model = create_theano_func(
            'train',
            self.data,
            x,
            y,
            drop, [index, learning_rate, mix_factor],
            cost,
            batch_size,
            updates=updates,
            dropping=True)

        self.tester = create_profiler_func(self.data, x, y, drop,
                                           [index, mix_factor],
                                           self.model.get_output_layer(), cost,
                                           batch_size)
コード例 #3
0
ファイル: data.py プロジェクト: hexiangquan/CNN
    def load(self, dataset_path, params, batch_size=1):
        print_section('Creating aerial image dataset')

        self.std = params.dataset_std
        chunks = params.chunk_size

        #TODO: ensure that the dataset is as expected.
        creator = Creator(dataset_path,
                          dim=(params.input_dim, params.output_dim),
                          rotation=params.use_rotation,
                          preproccessing=params.use_preprocessing,
                          std=self.std,
                          only_mixed=params.only_mixed_labels,
                          reduce_testing=params.reduce_testing,
                          reduce_training=params.reduce_training,
                          reduce_validation=params.reduce_validation)
        train, valid, test = creator.dynamically_create(
            params.samples_per_image,
            enable_label_noise=params.use_label_noise,
            label_noise=params.label_noise,
            only_mixed=params.only_mixed_labels)

        #Testing dataset size requirements
        AerialDataset.dataset_check('train', train, batch_size)
        AerialDataset.dataset_check('valid', valid, batch_size)
        AerialDataset.dataset_check('test', test, batch_size)

        AerialDataset.dataset_shared_stats(train[0].shape, train[1].shape,
                                           chunks)

        self.set_nr_examples(train, valid, test)

        nr_of_chunks = AerialDataset.dataset_sizes(train, valid, test, chunks)

        training_chunks = self._chunkify(train, nr_of_chunks, batch_size)

        AerialDataset.dataset_chunk_stats(len(training_chunks),
                                          len(training_chunks[0][0]),
                                          len(training_chunks[-1][0]))

        self.active = self.shared_dataset(training_chunks[0],
                                          cast_to_int=False)
        self.set['train'] = self.active[0], T.cast(self.active[1], 'int32')
        self.set['validation'] = self.shared_dataset(valid, cast_to_int=True)
        self.set['test'] = self.shared_dataset(test, cast_to_int=True)

        #Not stored on the GPU, unlike the shared variables defined above.
        self.all_training = training_chunks
        return True
コード例 #4
0
ファイル: data.py プロジェクト: olavvatne/CNN
    def load(self, dataset_path, params, batch_size=1):
        print_section('Creating aerial image dataset')

        self.std = params.dataset_std
        chunks = params.chunk_size

        #TODO: ensure that the dataset is as expected.
        creator = Creator(dataset_path,
                          dim=(params.input_dim, params.output_dim),
                          rotation=params.use_rotation,
                          preproccessing=params.use_preprocessing,
                          std=self.std,
                          only_mixed=params.only_mixed_labels,
                          reduce_testing=params.reduce_testing,
                          reduce_training=params.reduce_training,
                          reduce_validation=params.reduce_validation)
        train, valid, test = creator.dynamically_create(
            params.samples_per_image,
            enable_label_noise=params.use_label_noise,
            label_noise=params.label_noise,
            only_mixed=params.only_mixed_labels
        )

        #Testing dataset size requirements
        AerialDataset.dataset_check('train', train, batch_size)
        AerialDataset.dataset_check('valid', valid, batch_size)
        AerialDataset.dataset_check('test', test, batch_size)

        AerialDataset.dataset_shared_stats(train[0].shape, train[1].shape, chunks)

        self.set_nr_examples(train, valid, test)

        nr_of_chunks = AerialDataset.dataset_sizes(train, valid, test, chunks)

        training_chunks = self._chunkify(train, nr_of_chunks, batch_size)

        AerialDataset.dataset_chunk_stats(len(training_chunks), len(training_chunks[0][0]), len(training_chunks[-1][0]))

        self.active = self.shared_dataset(training_chunks[0], cast_to_int=False)
        self.set['train'] = self.active[0], T.cast(self.active[1], 'int32')
        self.set['validation'] = self.shared_dataset(valid, cast_to_int=True )
        self.set['test'] = self.shared_dataset(test, cast_to_int=True)

        #Not stored on the GPU, unlike the shared variables defined above.
        self.all_training = training_chunks
        return True
コード例 #5
0
ファイル: data.py プロジェクト: hexiangquan/CNN
    def load(self, dataset_path, params, batch_size=1):
        print_section('Loading aerial curriculum dataset')
        chunks = params.chunk_size
        self.std = params.dataset_std  #Need for debug

        #For later stage loading
        self.stage = 0
        self.stage_path = os.path.join(dataset_path, "train")
        self.nr_of_stages = len(os.listdir(os.path.join(dataset_path,
                                                        "train")))

        train = self.load_set(dataset_path,
                              "train",
                              stage="stage{}".format(self.stage))
        valid = self.load_set(dataset_path, "valid")
        test = self.load_set(dataset_path, "test")

        #Testing dataset size requirements
        AerialCurriculumDataset.dataset_check('train', train, batch_size)
        AerialCurriculumDataset.dataset_check('valid', valid, batch_size)
        AerialCurriculumDataset.dataset_check('test', test, batch_size)

        AerialCurriculumDataset.dataset_shared_stats(train[0].shape,
                                                     train[1].shape, chunks)

        self.set_nr_examples(train, valid, test)

        nr_of_chunks = AerialCurriculumDataset.dataset_sizes(
            train, valid, test, chunks)

        training_chunks = self._chunkify(train, nr_of_chunks, batch_size)

        AerialCurriculumDataset.dataset_chunk_stats(
            len(training_chunks), len(training_chunks[0][0]),
            len(training_chunks[-1][0]))

        self.active = self.shared_dataset(training_chunks[0],
                                          cast_to_int=False)
        self.set['train'] = self.active[0], T.cast(self.active[1], 'int32')
        self.set['validation'] = self.shared_dataset(valid, cast_to_int=True)
        self.set['test'] = self.shared_dataset(test, cast_to_int=True)

        #Not stored on the GPU, unlike the shared variables defined above.
        self.all_training = training_chunks
        return True
コード例 #6
0
ファイル: data.py プロジェクト: olavvatne/CNN
    def load(self, dataset_path, params, batch_size=1):
        print_section('Loading aerial curriculum dataset')
        chunks = params.chunk_size
        self.std = params.dataset_std #Need for debug

        #For later stage loading
        self.stage = 0
        self.stage_path = os.path.join(dataset_path, "train")
        self.nr_of_stages = len(os.listdir(os.path.join(dataset_path, "train")))

        train = self.load_set(dataset_path, "train", stage="stage{}".format(self.stage))
        valid = self.load_set(dataset_path, "valid")
        test = self.load_set(dataset_path, "test")

        #Testing dataset size requirements
        AerialCurriculumDataset.dataset_check('train', train, batch_size)
        AerialCurriculumDataset.dataset_check('valid', valid, batch_size)
        AerialCurriculumDataset.dataset_check('test', test, batch_size)

        AerialCurriculumDataset.dataset_shared_stats(train[0].shape, train[1].shape, chunks)

        self.set_nr_examples(train, valid, test)

        nr_of_chunks = AerialCurriculumDataset.dataset_sizes(train, valid, test, chunks)

        training_chunks = self._chunkify(train, nr_of_chunks, batch_size)

        AerialCurriculumDataset.dataset_chunk_stats(len(training_chunks), len(training_chunks[0][0]), len(training_chunks[-1][0]))

        self.active = self.shared_dataset(training_chunks[0], cast_to_int=False)
        self.set['train'] = self.active[0], T.cast(self.active[1], 'int32')
        self.set['validation'] = self.shared_dataset(valid, cast_to_int=True )
        self.set['test'] = self.shared_dataset(test, cast_to_int=True)

        #Not stored on the GPU, unlike the shared variables defined above.
        self.all_training = training_chunks
        return True
コード例 #7
0
ファイル: evaluator.py プロジェクト: hexiangquan/CNN
    def _train(self, batch_size, max_epochs):
        print_section('Training model')

        patience = self.params.initial_patience  # look as this many examples regardless
        patience_increase = self.params.patience_increase  # wait this much longer when a new best is found
        improvement_threshold = self.params.improvement_threshold  # a relative improvement of this much is considered significant

        learning_rate = self.params.learning_rate
        learning_adjustment = self.params.learning_adjustment
        learning_decrease = self.params.learning_decrease
        nr_learning_adjustments = 0
        print('---- Initial learning rate {}'.format(learning_rate))

        max_factor = self.params.factor_rate
        factor_adjustment = self.params.factor_adjustment
        factor_decrease = self.params.factor_decrease
        factor_minimum = self.params.factor_minimum
        print('---- Initial loss mixture ratio {}'.format(max_factor))

        curriculum = self.params.curriculum_enable
        curriculum_start = self.params.curriculum_start
        curriculum_adjustment = self.params.curriculum_adjustment

        # go through this many minibatch before checking the network on the validation set
        gui_frequency = 500
        validation_frequency = min(self.nr_train_batches, patience / 2)
        best_validation_loss = np.inf
        best_iter = 0
        test_score = 0.
        self.start_time = timeit.default_timer()

        storage = ParamStorage()

        nr_chunks = self.data.get_chunk_number()
        epoch = 0
        done_looping = False
        iter = 0

        #==== INITIAL PERFORMANCE ====
        chunk_batches = self.data.get_elements(0) / batch_size
        validation_score = self._get_validation_score(batch_size, epoch, 0)
        test_score = self._get_test_score(batch_size)
        training_score = self._get_training_score(chunk_batches)

        #==== UPDATE GUI ====
        if visual_params.gui_enabled:
            interface.server.append_job_update(epoch, training_score,
                                               validation_score, test_score,
                                               learning_rate)

        try:
            while (epoch < max_epochs) and (not done_looping):
                epoch = epoch + 1
                if (epoch % learning_adjustment == 0):
                    learning_rate *= learning_decrease
                    nr_learning_adjustments += 1
                    #Temp
                    learning_adjustment = max(10, int(learning_adjustment / 2))
                    print('---- New learning rate {}'.format(learning_rate))

                if (epoch > factor_adjustment):
                    max_factor = max(max_factor * factor_decrease,
                                     factor_minimum)
                    print('---- New convex combination {}'.format(max_factor))

                if (epoch % 20 == 0):
                    print('---- Storing temp model')
                    storage.store_params(self.model.params, id=str(epoch))

                if (curriculum and epoch % curriculum_adjustment == 0
                        and epoch >= curriculum_start):
                    print(
                        "---- Mixing examples from next stage with training data"
                    )
                    self.data.mix_in_next_stage()

                #For current examples chunk in GPU memory
                for chunk_index in range(nr_chunks):
                    self.data.switch_active_training_set(chunk_index)
                    nr_elements = self.data.get_elements(chunk_index)
                    chunk_batches = nr_elements / batch_size

                    #Each chunk contains a certain number of batches.
                    for minibatch_index in range(chunk_batches):
                        cost_ij = self.train_model(minibatch_index,
                                                   learning_rate, max_factor)
                        if iter % 1000 == 0:
                            print(
                                '---- Training @ iter = {}. Patience = {}. Loss = {}'
                                .format(iter, patience, cost_ij))

                        if visual_params.gui_enabled and iter % gui_frequency == 0:
                            interface.server.get_command_status()

                        if visual_params.gui_enabled and interface.server.is_testing(
                        ):
                            self._debug(batch_size, chunk_batches, max_factor)

                        #if(np.isnan(cost_ij)):
                        #    print('cost IS NAN')

                        #==== EVAULATE ====
                        if (iter + 1) % validation_frequency == 0:

                            #==== CURRENT PERFORMANCE ====
                            validation_score = self._get_validation_score(
                                batch_size, epoch, minibatch_index)
                            test_score = self._get_test_score(batch_size)
                            train_score = self._get_training_score(
                                chunk_batches)  #No other purpose than charting

                            #==== UPDATE GUI ====
                            if visual_params.gui_enabled:
                                interface.server.append_job_update(
                                    epoch, train_score, validation_score,
                                    test_score, learning_rate)
                            self.events.append({
                                "epoch": epoch,
                                "training_loss": train_score,
                                "validation_loss": validation_score,
                                "test_loss": test_score,
                                "training_rate": learning_rate
                            })

                            #==== EARLY STOPPING ====
                            if validation_score < best_validation_loss:

                                #improve patience if loss improvement is good enough
                                if validation_score < best_validation_loss * improvement_threshold:
                                    patience = max(patience,
                                                   iter * patience_increase)
                                    print(
                                        "---- New best validation loss. Patience increased to {}"
                                        .format(patience))

                                # save best validation score and iteration number
                                best_validation_loss = validation_score
                                best_iter = iter

                        if patience <= iter:
                            done_looping = True
                            break
                        if visual_params.gui_enabled and interface.server.stop:
                            done_looping = True

                        iter += 1  #Increment interation after each batch has been processed.

        except KeyboardInterrupt:
            self.set_result(best_iter, iter, best_validation_loss, test_score,
                            nr_learning_adjustments, epoch)
            print(
                "Inpterupted by user. Current model params will be saved now.")
        except Exception as e:
            print "Unexpected error:", sys.exc_info()[0]
            raise
        self.set_result(best_iter, iter, best_validation_loss, test_score,
                        nr_learning_adjustments, epoch)
コード例 #8
0
ファイル: run.py プロジェクト: hexiangquan/CNN
from storage import ParamStorage
from precisionrecall import PrecisionRecallCurve
from interface.server import send_precision_recall_data
from interface.command import get_command
'''
This tool creates the datapoints necessary for a precision and recall curve figure. The tool samples a patch dataset
from the test and validation set (-data), and creates predictions using a trained model (-model). These predictions are
thresholded at several values. The binarized predictions and the label are then used to calculate the precision as well
as the recall. These values including the threshold amount, constitute a data point. Supplying a experiment id (-store_gui),
will store the datapoints in the web GUI.

It's worth noting that the measurements are relaxed. Relaxed precision and relaxed recall. This is implemented by the
image processing operation, dilation. The slack variable is set to 3 pixels.
'''

print_section("TOOLS: Measure precision and recall of model")
print("-data: path to dataset | -store: job_gui id to store curve in GUI "\
      "| -store_path: store results locally | -model: stored model to use")

#====== Arguments ===============================================
is_dataset_path, dataset_path = get_command(
    '-data', default='/home/olav/Pictures/Mass_roads_alpha')
store_gui, job_id = get_command('-store_gui', default='-1')
is_store_path, store_path = get_command('-store_path',
                                        default='./pr_data.json')
is_model, model_path = get_command('-model', default='./results/params.pkl')
#==============================================================

store = ParamStorage()
data = store.load_params(path=model_path)
batch_size = data['optimization'].batch_size
コード例 #9
0
import matplotlib.pyplot as plt

sys.path.append(os.path.abspath("./"))
from interface.command import get_command
from printing import print_section, print_action

from storage import ParamStorage
from config import filename_params, dataset_params, pr_path, dataset_path
from augmenter.aerial import Creator
from data import AerialCurriculumDataset
import tools.util as util
'''
Create histograms of difference between prediction and label for dataset.
Allow finetuning of curriculum strategy.
'''
print_section(
    'Generating plot of diff distribution between label and prediction')
#====== Arguments ===============================================
is_samples, samples = get_command('-samples', default="100")
samples = int(samples)

is_teacher_location, teacher_location = get_command(
    '-teacher', default=filename_params.curriculum_teacher)

verify, stage = get_command('-verify', default="0")
stage = "stage" + stage

is_tradeoff, tradeoff = get_command('-tradeoff', default="0.5")
tradeoff = float(tradeoff)

#Dataset path. Config used if not supplied
is_alt_dataset, alt_dataset = get_command('-dataset')
コード例 #10
0
ファイル: cnn.py プロジェクト: olavvatne/CNN
def run_cnn(model_params, optimization_params, dataset_path, dataset_params, filename_params, visual_params, epochs, verbose=False):
    print(filename_params)
    if not os.path.exists(filename_params.results):
        os.makedirs(filename_params.results)

    is_config, config_values = interface.command.get_command("-config")
    is_curriculum, curriculum_set = interface.command.get_command("-curriculum")
    is_batch_run, batch_index = interface.command.get_command("-batch", default="0")
    is_init_params, param_path = interface.command.get_command("-params")

    if is_config:
        #Assume  config is speficially for running bootstrapping batches.
        config_arr = eval(config_values)
        if len(config_arr) == 2:
            loss_function = config_arr[0]
            label_noise = float(config_arr[1])
            dataset_params.label_noise = label_noise
            model_params.loss = loss_function
            batch_index = loss_function + "-" + str(label_noise) + "-" + batch_index
            print(batch_index)

    if is_curriculum:
        dataset_path = curriculum_set

    weights = None
    if is_init_params:
        store = ParamStorage()
        if not param_path:
            param_path = "./results/params.pkl"
        weights = store.load_params(path=param_path)['params']


    dataset = DataLoader.create()
    dataset.load(dataset_path, dataset_params, optimization_params.batch_size) #Input stage
    model = ConvModel(model_params, verbose=True) #Create network stage

    evaluator = Evaluator(model, dataset, optimization_params, dataset_path)
    evaluator.run(epochs=epochs,  verbose=verbose, init=weights)
    report = evaluator.get_result()
    network_store_path = filename_params.network_save_name
    result_path = filename_params.results + "/results.json"
    if is_batch_run:
        network_store_path = filename_params.results + "/batch" + batch_index +  ".pkl"
        result_path =filename_params.results + "/batch" + batch_index +  ".json"

    storage = ParamStorage(path=network_store_path)
    storage.store_params(model.params)

    dataset.destroy()

    if visual_params.gui_enabled:
         interface.server.stop_job(report)

    printing.print_section('Evaluation precision and recall')

    prc = PrecisionRecallCurve(pr_path, model.params, model_params, dataset_params)
    test_datapoints = prc.get_curves_datapoints(optimization_params.batch_size, set_name="test")
    valid_datapoints = prc.get_curves_datapoints(optimization_params.batch_size, set_name="valid")
    #Stores the model params. Model can later be restored.
    printing.print_section('Storing model parameters')

    if visual_params.gui_enabled:
        interface.server.send_precision_recall_data(test_datapoints, valid_datapoints)
    storage.store_result(result_path, evaluator.events, test_datapoints, valid_datapoints)
コード例 #11
0
ファイル: run.py プロジェクト: olavvatne/CNN
from precisionrecall import PrecisionRecallCurve
from interface.server import send_precision_recall_data
from interface.command import get_command

"""
This tool creates the datapoints necessary for a precision and recall curve figure. The tool samples a patch dataset
from the test and validation set (-data), and creates predictions using a trained model (-model). These predictions are
thresholded at several values. The binarized predictions and the label are then used to calculate the precision as well
as the recall. These values including the threshold amount, constitute a data point. Supplying a experiment id (-store_gui),
will store the datapoints in the web GUI.

It's worth noting that the measurements are relaxed. Relaxed precision and relaxed recall. This is implemented by the
image processing operation, dilation. The slack variable is set to 3 pixels.
"""

print_section("TOOLS: Measure precision and recall of model")
print(
    "-data: path to dataset | -store: job_gui id to store curve in GUI "
    "| -store_path: store results locally | -model: stored model to use"
)

# ====== Arguments ===============================================
is_dataset_path, dataset_path = get_command("-data", default="/home/olav/Pictures/Mass_roads_alpha")
store_gui, job_id = get_command("-store_gui", default="-1")
is_store_path, store_path = get_command("-store_path", default="./pr_data.json")
is_model, model_path = get_command("-model", default="./results/params.pkl")
# ==============================================================

store = ParamStorage()
data = store.load_params(path=model_path)
batch_size = data["optimization"].batch_size
コード例 #12
0
ファイル: run.py プロジェクト: olavvatne/CNN
The tool creates and saves the prediction stitch image, as well as a hit and miss image. This image show, where the
prediction are correct (green), where they are missing (red) and where they should not be according to the label (blue).
'''

def store_image(image, job_id, store_gui, name="image"):
    out = Image.resize(image, 1.0)

    if store_gui:
        buf= StringIO.StringIO()
        out.save(buf, format='JPEG')
        send_result_image(job_id, buf.getvalue())

    image.save('./tools/visualize/'+ name +'.jpg')
    image.show()

print_section('TOOLS: Visualize result from model')
print("-data: Path to image in dataset you want visualization of | -store_gui: Upload images to exp with supplied id | \
      -tradeoff: Threshold value associated with precision recall breakeven |-storeimage: Include aerial image")

is_image_path, image_path = get_command('-data', default='/home/olav/Pictures/Mass_roads/test/data/10378780_15.tiff')

store_data_image, temp = get_command('-storeimage')

store_gui, job_id = get_command('-store_gui', default="None")

is_tradeoff, bto = get_command('-tradeoff', default="0.5")
bto = float(bto)

is_model, model_path = get_command('-model', default="./results/params.pkl")
store = ParamStorage()
data = store.load_params(path=model_path)
コード例 #13
0
'''
This tool pre-generate a patch dataset. The tool is especially necessary for curriculum learning. The reason for not
doing this every time the network is trained, is that a previously trained model needs to be loaded in order to do
difficulty estimation. There are several properties that can be set, when using command line.
-baseline: No difficulty estimation
-stages: Array with floats, setting difficulty threshold per stage. Please refrain from using space inside array.
-tradeoff: Previously trained curriculum teacher's best precision and recall tradeoff. (threshold value)
-dataset: Path to dataset. IE
-initsamples: Samples per image for first stage
-currsamples: Samples per image for remaining stages
-teacher: Curriculum teacher model
-save: Path to where pre-generated patch dataset should be stored.

REMEMBER: The patch creator is initialized using the config.py file.
'''
print_section("TOOLS: Creating curriculum learning dataset")

# Baseline will create a curriculum with no example ordering, but same amount of examples.
# Avoids results from curriculum learning to be caused by the model just having seen more examples.
is_baseline, baseline = get_command('-baseline')

is_stages, stages = get_command('-stages', default="[0.1, 1.0]")
stages = np.array(eval(stages))

#Precision recall breakeven point. 0.5 used as a default.
is_tradeoff, tradeoff = get_command('-tradeoff')
if is_tradeoff:
    tradeoff = float(tradeoff)

#Dataset path. Config used if not supplied
is_alt_dataset, alt_dataset = get_command('-dataset')
コード例 #14
0
ファイル: run.py プロジェクト: hexiangquan/CNN
'''


def store_image(image, job_id, store_gui, name="image"):
    out = Image.resize(image, 1.0)

    if store_gui:
        buf = StringIO.StringIO()
        out.save(buf, format='JPEG')
        send_result_image(job_id, buf.getvalue())

    image.save('./tools/visualize/' + name + '.jpg')
    image.show()


print_section('TOOLS: Visualize result from model')
print(
    "-data: Path to image in dataset you want visualization of | -store_gui: Upload images to exp with supplied id | \
      -tradeoff: Threshold value associated with precision recall breakeven |-storeimage: Include aerial image"
)

is_image_path, image_path = get_command(
    '-data',
    default='/home/olav/Pictures/Mass_roads/test/data/10378780_15.tiff')

store_data_image, temp = get_command('-storeimage')

store_gui, job_id = get_command('-store_gui', default="None")

is_tradeoff, bto = get_command('-tradeoff', default="0.5")
bto = float(bto)
コード例 #15
0
ファイル: curriculum_diff.py プロジェクト: olavvatne/CNN
from interface.command import get_command
from printing import print_section, print_action

from storage import ParamStorage
from config import filename_params, dataset_params, pr_path, dataset_path
from augmenter.aerial import Creator
from data import AerialCurriculumDataset
import tools.util as util



'''
Create histograms of difference between prediction and label for dataset.
Allow finetuning of curriculum strategy.
'''
print_section('Generating plot of diff distribution between label and prediction')
#====== Arguments ===============================================
is_samples, samples = get_command('-samples', default="100")
samples = int(samples)

is_teacher_location, teacher_location = get_command('-teacher', default=filename_params.curriculum_teacher)

verify, stage = get_command('-verify', default="0")
stage = "stage" + stage

is_tradeoff, tradeoff = get_command('-tradeoff', default="0.5")
tradeoff = float(tradeoff)

#Dataset path. Config used if not supplied
is_alt_dataset, alt_dataset = get_command('-dataset')
if is_alt_dataset:
コード例 #16
0
ファイル: cnn.py プロジェクト: hexiangquan/CNN
def run_cnn(model_params,
            optimization_params,
            dataset_path,
            dataset_params,
            filename_params,
            visual_params,
            epochs,
            verbose=False):
    print(filename_params)
    if not os.path.exists(filename_params.results):
        os.makedirs(filename_params.results)

    is_config, config_values = interface.command.get_command("-config")
    is_curriculum, curriculum_set = interface.command.get_command(
        "-curriculum")
    is_batch_run, batch_index = interface.command.get_command("-batch",
                                                              default="0")
    is_init_params, param_path = interface.command.get_command("-params")

    if is_config:
        #Assume  config is speficially for running bootstrapping batches.
        config_arr = eval(config_values)
        if len(config_arr) == 2:
            loss_function = config_arr[0]
            label_noise = float(config_arr[1])
            dataset_params.label_noise = label_noise
            model_params.loss = loss_function
            batch_index = loss_function + "-" + str(
                label_noise) + "-" + batch_index
            print(batch_index)

    if is_curriculum:
        dataset_path = curriculum_set

    weights = None
    if is_init_params:
        store = ParamStorage()
        if not param_path:
            param_path = "./results/params.pkl"
        weights = store.load_params(path=param_path)['params']

    dataset = DataLoader.create()
    dataset.load(dataset_path, dataset_params,
                 optimization_params.batch_size)  #Input stage
    model = ConvModel(model_params, verbose=True)  #Create network stage

    evaluator = Evaluator(model, dataset, optimization_params, dataset_path)
    evaluator.run(epochs=epochs, verbose=verbose, init=weights)
    report = evaluator.get_result()
    network_store_path = filename_params.network_save_name
    result_path = filename_params.results + "/results.json"
    if is_batch_run:
        network_store_path = filename_params.results + "/batch" + batch_index + ".pkl"
        result_path = filename_params.results + "/batch" + batch_index + ".json"

    storage = ParamStorage(path=network_store_path)
    storage.store_params(model.params)

    dataset.destroy()

    if visual_params.gui_enabled:
        interface.server.stop_job(report)

    printing.print_section('Evaluation precision and recall')

    prc = PrecisionRecallCurve(pr_path, model.params, model_params,
                               dataset_params)
    test_datapoints = prc.get_curves_datapoints(optimization_params.batch_size,
                                                set_name="test")
    valid_datapoints = prc.get_curves_datapoints(
        optimization_params.batch_size, set_name="valid")
    #Stores the model params. Model can later be restored.
    printing.print_section('Storing model parameters')

    if visual_params.gui_enabled:
        interface.server.send_precision_recall_data(test_datapoints,
                                                    valid_datapoints)
    storage.store_result(result_path, evaluator.events, test_datapoints,
                         valid_datapoints)
コード例 #17
0
ファイル: evaluator.py プロジェクト: olavvatne/CNN
    def _train(self, batch_size, max_epochs):
        print_section('Training model')

        patience = self.params.initial_patience # look as this many examples regardless
        patience_increase = self.params.patience_increase  # wait this much longer when a new best is found
        improvement_threshold = self.params.improvement_threshold # a relative improvement of this much is considered significant

        learning_rate = self.params.learning_rate
        learning_adjustment = self.params.learning_adjustment
        learning_decrease = self.params.learning_decrease
        nr_learning_adjustments = 0
        print('---- Initial learning rate {}'.format(learning_rate))

        max_factor = self.params.factor_rate
        factor_adjustment = self.params.factor_adjustment
        factor_decrease = self.params.factor_decrease
        factor_minimum = self.params.factor_minimum
        print('---- Initial loss mixture ratio {}'.format(max_factor))

        curriculum = self.params.curriculum_enable
        curriculum_start = self.params.curriculum_start
        curriculum_adjustment = self.params.curriculum_adjustment

         # go through this many minibatch before checking the network on the validation set
        gui_frequency = 500
        validation_frequency = min(self.nr_train_batches, patience / 2)
        best_validation_loss = np.inf
        best_iter = 0
        test_score = 0.
        self.start_time = timeit.default_timer()

        storage = ParamStorage()

        nr_chunks = self.data.get_chunk_number()
        epoch = 0
        done_looping = False
        iter = 0

        #==== INITIAL PERFORMANCE ====
        chunk_batches = self.data.get_elements( 0 ) / batch_size
        validation_score = self._get_validation_score(batch_size, epoch, 0)
        test_score = self._get_test_score(batch_size)
        training_score = self._get_training_score(chunk_batches)

        #==== UPDATE GUI ====
        if visual_params.gui_enabled:
                interface.server.append_job_update(epoch, training_score, validation_score, test_score, learning_rate)

        try:
            while (epoch < max_epochs) and (not done_looping):
                epoch = epoch + 1
                if(epoch % learning_adjustment == 0):
                        learning_rate *= learning_decrease
                        nr_learning_adjustments += 1
                        #Temp
                        learning_adjustment = max(10, int(learning_adjustment/2))
                        print('---- New learning rate {}'.format(learning_rate))

                if(epoch > factor_adjustment):
                        max_factor = max(max_factor * factor_decrease, factor_minimum)
                        print('---- New convex combination {}'.format(max_factor))

                if(epoch % 20 == 0):
                    print('---- Storing temp model')
                    storage.store_params(self.model.params, id=str(epoch))

                if(curriculum and epoch % curriculum_adjustment == 0 and epoch >= curriculum_start):
                    print("---- Mixing examples from next stage with training data")
                    self.data.mix_in_next_stage()

                #For current examples chunk in GPU memory
                for chunk_index in range(nr_chunks):
                    self.data.switch_active_training_set( chunk_index )
                    nr_elements = self.data.get_elements( chunk_index )
                    chunk_batches = nr_elements / batch_size

                    #Each chunk contains a certain number of batches.
                    for minibatch_index in range(chunk_batches):
                        cost_ij = self.train_model(minibatch_index, learning_rate, max_factor)
                        if iter % 1000 == 0:
                            print('---- Training @ iter = {}. Patience = {}. Loss = {}'.format(iter, patience, cost_ij))

                        if visual_params.gui_enabled and iter % gui_frequency == 0:
                            interface.server.get_command_status()

                        if visual_params.gui_enabled and interface.server.is_testing():
                            self._debug(batch_size, chunk_batches, max_factor)

                        #if(np.isnan(cost_ij)):
                        #    print('cost IS NAN')

                        #==== EVAULATE ====
                        if (iter + 1) % validation_frequency == 0:

                            #==== CURRENT PERFORMANCE ====
                            validation_score = self._get_validation_score(batch_size, epoch, minibatch_index)
                            test_score = self._get_test_score(batch_size)
                            train_score = self._get_training_score(chunk_batches) #No other purpose than charting

                            #==== UPDATE GUI ====
                            if visual_params.gui_enabled:
                                    interface.server.append_job_update(
                                        epoch,
                                        train_score,
                                        validation_score,
                                        test_score,
                                        learning_rate)
                            self.events.append({
                                                "epoch": epoch,
                                                "training_loss": train_score,
                                                "validation_loss": validation_score,
                                                "test_loss": test_score,
                                                "training_rate": learning_rate
                                            })

                            #==== EARLY STOPPING ====
                            if validation_score < best_validation_loss:

                                #improve patience if loss improvement is good enough
                                if validation_score < best_validation_loss * improvement_threshold:
                                    patience = max(patience, iter * patience_increase)
                                    print("---- New best validation loss. Patience increased to {}".format(patience))

                                # save best validation score and iteration number
                                best_validation_loss = validation_score
                                best_iter = iter

                        if patience <= iter:
                            done_looping = True
                            break
                        if visual_params.gui_enabled and interface.server.stop:
                            done_looping = True

                        iter += 1 #Increment interation after each batch has been processed.

        except KeyboardInterrupt:
            self.set_result(best_iter, iter, best_validation_loss, test_score, nr_learning_adjustments, epoch)
            print("Inpterupted by user. Current model params will be saved now.")
        except Exception as e:
            print "Unexpected error:", sys.exc_info()[0]
            raise
        self.set_result(best_iter, iter, best_validation_loss, test_score, nr_learning_adjustments, epoch)