def analyze_classes(path): try: y_train = try_pickle_load(path + 'y_train.bin') y_test = try_pickle_load(path + 'y_test.bin') except IOError: logger.error("Unable to load Theano dataset from %s", path) exit(1) n_classes = int(max(y_train.max(), y_test.max()) + 1) logger.info("Dataset has %d classes", n_classes) image_shape = (y_train.shape[-2], y_train.shape[-1]) logger.info("Image shape is %s", image_shape) y = np.concatenate((y_train, y_test), axis=0).astype('int8').reshape((-1)) print y.shape yy = np.bincount(y) ii = np.nonzero(yy)[0] counts = np.vstack((ii, yy[ii])).T print "Counts\n", counts total = counts[:, 1].sum(dtype='float32') print "Total %10.0f" % total percents = counts[:, 1] / total print "Percents\n", "\n".join([("%7.2f" % (x * 100.)) for x in percents])
def main(x_path, y_path): x = try_pickle_load(x_path) y = try_pickle_load(y_path) print "Shape of loaded x data is", x.shape print "Shape of loaded y data is", y.shape assert(x.shape[0] == y.shape[0]) test_size = int(x.shape[0] * TEST_SIZE) train_size = x.shape[0] - test_size assert(train_size + test_size == x.shape[0]) print "Train size", train_size print "Test size", test_size indices = np.arange(x.shape[0]) np.random.shuffle(indices) train_ind = indices[:train_size] test_ind = indices[train_size:] train_set_x = x[train_ind] test_set_x = x[test_ind] train_set_y = y[train_ind] test_set_y = y[test_ind] folder_name = os.path.split(x_path)[0] print "Folder to save", folder_name try_pickle_dump(train_set_x, os.path.join(folder_name, "x_train.bin")) try_pickle_dump(test_set_x, os.path.join(folder_name, "x_test.bin")) try_pickle_dump(train_set_y, os.path.join(folder_name, "y_train.bin")) try_pickle_dump(test_set_y, os.path.join(folder_name, "y_test.bin")) print "Done"
def main(x_path, y_path): x = try_pickle_load(x_path) y = try_pickle_load(y_path) print "Shape of loaded x data is", x.shape print "Shape of loaded y data is", y.shape assert (x.shape[0] == y.shape[0]) test_size = int(x.shape[0] * TEST_SIZE) train_size = x.shape[0] - test_size assert (train_size + test_size == x.shape[0]) print "Train size", train_size print "Test size", test_size indices = np.arange(x.shape[0]) np.random.shuffle(indices) train_ind = indices[:train_size] test_ind = indices[train_size:] train_set_x = x[train_ind] test_set_x = x[test_ind] train_set_y = y[train_ind] test_set_y = y[test_ind] folder_name = os.path.split(x_path)[0] print "Folder to save", folder_name try_pickle_dump(train_set_x, os.path.join(folder_name, "x_train.bin")) try_pickle_dump(test_set_x, os.path.join(folder_name, "x_test.bin")) try_pickle_dump(train_set_y, os.path.join(folder_name, "y_train.bin")) try_pickle_dump(test_set_y, os.path.join(folder_name, "y_test.bin")) print "Done"
def faces(fold): """ Retrieves a list of face images. Images are numpy arrays of (img_height, img_width, RGB) shape. The images represent the clipped and masked face images from the given fold of the FDDB database. :param fold: int indicating which fold is desired. In [1, 10] range. """ log.info("Retrieving face images for fold %s", str(fold)) # generate file name in which this fold's face images are stored faces_file_name = os.path.join( FACE_ONLY_ROOT, "fddb_facesonly_fold_{:02d}.zip".format(fold)) # try to load and return pickled data face_images = util.try_pickle_load(faces_file_name, zip=False) if face_images is not None: return face_images # resulting face images # each image is a numpy array of RGB components of # (img_height, img_width, 3) shape face_images = [] # go through all the photos in the fold # and their FDDB elipsis info (face annotations) for photo_path, (masks, bboxes) in image_face_masks_bboxes(fold).items(): log.info("Processing photo %s", photo_path) # load photo log.debug("Loading photo") photo_RGB = cv2.imread(photo_path, 1) # for each elipse info get mask and bbox for mask, bbox in zip(masks, bboxes): # apply the bounding box log.debug("Applying mask and bounds") face_img = np.array(photo_RGB[bbox[0][0]:bbox[1][0], bbox[0][1]:bbox[1][1], :]) # apply the mask face_mask = mask[bbox[0][0]:bbox[1][0], bbox[0][1]:bbox[1][1]] face_img[np.logical_not(face_mask), :] = 0 # store the image face_images.append(face_img) # store image data for subsequent usage if not util.try_pickle_dump(face_images, faces_file_name, zip=False): raise "Failed to pickle face images" return face_images
def image_face_masks_bboxes(fold): """ Returns a dictionary in which keys are file paths of images belonging to the fold. Values are tuples (masks, bboxes) where "masks" are lists of face-elipse booleam masks for that image and "bboxes" are bounding box info for that image. The returned dictionary is ordered the same way the elisis info file is. """ log.info("Retrieving image masks for fold %s", str(fold)) # file name of the cached version masks_file_name = os.path.join( FACE_MASK_ROOT, "fddb_face_masks_fold{:02d}.zip".format(fold)) # try to load and return pickled data masks = util.try_pickle_load(masks_file_name, zip=False) if masks is not None: return masks # there is no pickled version, we need to create the masks masks_dict = collections.OrderedDict() for photo_path, elipses in image_elipses(fold).items(): log.info("Processing photo %s", photo_path) # load photo log.debug("Loading photo") photo_RGB = cv2.imread(photo_path, 1) photo_shape = photo_RGB.shape[:2] # for each elipse info get mask and bbox, and store them # first prepare the numpy arrays in which they are stored masks = np.zeros((len(elipses), photo_shape[0], photo_shape[1]), dtype=np.bool) bboxes = np.zeros((len(elipses), 2, 2), dtype=np.int32) # then out those arrays into the dict masks_dict[photo_path] = (masks, bboxes) # and then fill up the arrays with real data for elipse_ind, elipse in enumerate(elipses): log.debug("Calculating mask and bounds") mask, bbox = __elipsis_mask_and_box(photo_shape, elipse) masks[elipse_ind] = mask bboxes[elipse_ind] = bbox # store image data for subsequent usage if not util.try_pickle_dump(masks_dict, masks_file_name, zip=False): raise "Failed to pickle face masks" return masks_dict
def load(subset=None, min_occ=1, min_files=1): """ Loads the raw text data that constitutes the Microsoft Sentence Completion Challenge (stored in ./data/). Processes the data, tokenizes and parses it, and returns the results. Returned is a tuple (train_sents, question_groups, answers, feature_sizes). The 'train_sents' numpy array of shape (token_count, feature_count). Features colums are at first textual (orth, lemma, lemma_4), then syntactic (pos, dependency-type). The [-2] column contains syntactic-parent-indices, and the [-1] column denotes to which sentence the token belongs. The 'question_groups' object is an iterable question groups. Each group consists of 5 sentences (one of which is correct). Each sentence is a parsed-text-array as described above. The 'answers' object is a numpy array of shape (question_group_count, ) that contains the indices of the correct sentences in question groups. The 'feature_sizes' object is a numpy array contaning the dimensionality of each feature. :param subset: The number of training files to process. If None (default), all of the files are processed. :param min_occ: Miniumum required number of occurences of a token (word) required for it to be included in the vocabulary. Default value (1) uses all words that occured in the trainset. :param min_files: Minumum required number of files a term has to occur in for it to be included in the vocabulary. """ dir = os.path.join("data", "processed") if not os.path.exists(dir): os.makedirs(dir) name_base = "subset_%r-min_occ_%r-min_files_%r" % (subset, min_occ, min_files) # look for the cached processed data, return if present file_name = os.path.join(dir, name_base + ".pkl") data = util.try_pickle_load(file_name) if data is not None: return data # did not find cached data, will have to process it # log the loading process also to a file log_name = os.path.join(dir, name_base + ".log") log.addHandler(logging.FileHandler(log_name)) # process the data, cache it and return data = _load(subset, min_occ, min_files) util.try_pickle_dump(data, file_name) return data
def evaluate_conv(conf, net_weights=None): """ Evaluates Farabet-like conv network conf: dictionary network configuration """ ################ # LOADING DATA # ################ logger.info("... loading data") logger.debug("Theano.config.floatX is %s" % theano.config.floatX) path = conf['data']['location'] batch_size = conf['evaluation']['batch-size'] assert(type(batch_size) is int) logger.info('Batch size %d' % (batch_size)) try: x_train_allscales = try_pickle_load(path + 'x_train.bin') x_train = x_train_allscales[0] # first scale y_train = try_pickle_load(path + 'y_train.bin') x_test_allscales = try_pickle_load(path + 'x_test.bin') x_test = x_test_allscales[0] y_test = try_pickle_load(path + 'y_test.bin') except IOError: logger.error("Unable to load Theano dataset from %s", path) exit(1) n_classes = int(max(y_train.max(), y_test.max()) + 1) logger.info("Dataset has %d classes", n_classes) image_shape = (x_train.shape[-2], x_train.shape[-1]) logger.info("Image shape is %s", image_shape) logger.info('Train set has %d images' % x_train.shape[0]) logger.info('Input train set has shape of %s ', x_train.shape) logger.info('Test set has %d images' % x_test.shape[0]) logger.info('Input test set has shape of %s ', x_test.shape) # compute number of minibatches for training, validation and testing n_train_batches = x_train.shape[0] // batch_size n_test_batches = x_test.shape[0] // batch_size logger.info("Number of train batches %d" % n_train_batches) logger.info("Number of test batches %d" % n_test_batches) logger.info("... building network") # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # input is presented as (batch, channel, x, y) x0 = T.tensor4('x') x2 = T.tensor4('x') x4 = T.tensor4('x') # matrix row - batch index, column label of pixel # every column is a list of pixel labels (image matrix reshaped to list) y = T.imatrix('y') # create all layers builder_name = conf['network']['builder-name'] layers, out_shape, conv_out = get_net_builder(builder_name)( x0, x2, x4, y, batch_size, classes=n_classes, image_shape=image_shape, nkerns=conf['network']['layers'][:3], seed=conf['network']['seed'], activation=lReLU, bias=0.001, sparse=False) logger.info("Image out shape is %s", out_shape) # last layer, log reg log_reg_layer = layers[0] y_flat = y.flatten(1) y_train_shape = (y_train.shape[0], out_shape[0], out_shape[1]) y_test_shape = (y_test.shape[0], out_shape[0], out_shape[1]) # resize marked images to out_size of the network y_test_downscaled = np.empty(y_test_shape) for i in xrange(y_test.shape[0]): y_test_downscaled[i] = resize_marked_image(y_test[i], out_shape) x_train_shared, y_train_shared = \ shared_dataset((np.zeros_like(x_train), np.zeros(y_train_shape))) x2_train_shared = theano.shared(np.zeros_like(x_train_allscales[1]), borrow=True) x4_train_shared = theano.shared(np.zeros_like(x_train_allscales[2]), borrow=True) x_test_shared, y_test_shared = \ shared_dataset((x_test, y_test_downscaled)) x2_test_shared = theano.shared(x_test_allscales[1], borrow=True) x4_test_shared = theano.shared(x_test_allscales[2], borrow=True) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue y_train_shared_i32 = T.cast(y_train_shared, 'int32') y_test_shared_i32 = T.cast(y_test_shared, 'int32') ############### # BUILD MODEL # ############### logger.info("... building model") class_freqs = calc_class_freqs(np.concatenate([y_train, y_test], axis=0)) care_classes = build_care_classes(n_classes, conf['data']) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], [log_reg_layer.errors(y_flat), build_loss(log_reg_layer, conf['network']['loss'], y_flat, class_freqs, care_classes)] + list(log_reg_layer.accurate_pixels_class(y_flat)), givens={ x0: x_test_shared[index * batch_size: (index + 1) * batch_size], x2: x2_test_shared[index * batch_size: (index + 1) * batch_size], x4: x4_test_shared[index * batch_size: (index + 1) * batch_size], y: y_test_shared_i32[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent layers_w_weights = filter(lambda l: l.params is not None, layers) params = [p for l in layers_w_weights for p in l.params] # list of Ws through all layers weights = [l.params[0] for l in layers_w_weights] assert(len(weights) == len(params)/2) # the cost we minimize during training is the NLL of the model # and L2 regularization (lamda * L2-norm) # L2-norm is sum of squared params (using only W, not b) # params has Ws on even locations cost = build_loss(log_reg_layer, conf['network']['loss'], y_flat, class_freqs, care_classes)\ + 10**-5 * T.sum([T.sum(w ** 2) for w in weights]) # train_model is a function that updates the model parameters update_params = build_weight_updates(conf['training'], cost, params) train_model = theano.function( [index], cost, updates=update_params.updates, givens={ x0: x_train_shared[index * batch_size: (index + 1) * batch_size], x2: x2_train_shared[index * batch_size: (index + 1) * batch_size], x4: x4_train_shared[index * batch_size: (index + 1) * batch_size], y: y_train_shared_i32[index * batch_size: (index + 1) * batch_size] } ) pre_fn = lambda: change_train_set_multiscale( [x_train_shared, x2_train_shared, x4_train_shared], [x_train_allscales[0], x_train_allscales[1], x_train_allscales[2]], y_train_shared, y_train, out_shape) # set loaded weights if net_weights is not None: try: for net_weight, layer in zip(net_weights, layers): layer.set_weights(net_weight) logger.info("Loaded net weights from file.") best_params = net_weights net_weights = None except: logger.error("Uncompatible network to load weights in") ############### # TRAIN MODEL # ############### logger.info("... training model") start_time = time.clock() best_validation_loss, best_iter, best_params = eval_model( conf['training'], train_model, test_model, n_train_batches, n_test_batches, layers, pre_fn, update_params) end_time = time.clock() logger.info('Best validation score of %f %% obtained at iteration %i, ' % (best_validation_loss * 100., best_iter + 1)) print >> sys.stderr, ('The code for file %s ran for %.2fm' % (os.path.split(__file__)[1], (end_time - start_time) / 60.)) # set best weights for net_weight, layer in zip(best_params, layers): layer.set_weights(net_weight) logger.info('Starting second step, with Dropout hidden layers') layers, new_layers = net_builders.extend_net_w1l_drop( conv_out, conf['network']['layers'][-2] * 3, layers, n_classes, nkerns=conf['network']['layers'][-1:], activation=lReLU, bias=0.001) # create a function to compute the mistakes that are made by the model test_model2 = theano.function( [index], [layers[0].errors(y_flat), build_loss(layers[0], conf['net']['loss'], y_flat, class_freqs, care_classes)] + list(layers[0].accurate_pixels_class(y_flat)), givens={ x0: x_test_shared[index * batch_size: (index + 1) * batch_size], x2: x2_test_shared[index * batch_size: (index + 1) * batch_size], x4: x4_test_shared[index * batch_size: (index + 1) * batch_size], y: y_test_shared_i32[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent layers_w_weights = filter(lambda l: l.params is not None, new_layers) params2 = [p for l in layers_w_weights for p in l.params] # list of Ws through all layers weights2 = [l.params[0] for l in layers_w_weights] assert(len(weights2) == len(params2)/2) cost2 = build_loss(layers[0], conf['network']['loss'], y_flat, class_freqs, care_classes) # + 10**-3 * T.sum([T.sum(w ** 2) for w in weights2]) # train_model is a function that updates the model parameters update_params2 = build_weight_updates(conf['training2'], cost2, params2) train_model2 = theano.function( [index], cost2, updates=update_params2.updates, givens={ x0: x_train_shared[index * batch_size: (index + 1) * batch_size], x2: x2_train_shared[index * batch_size: (index + 1) * batch_size], x4: x4_train_shared[index * batch_size: (index + 1) * batch_size], y: y_train_shared_i32[index * batch_size: (index + 1) * batch_size] } ) # try to load weights in second stage if net_weights is not None: try: for net_weight, layer in zip(net_weights, layers): layer.set_weights(net_weight) logger.info("Loaded net weights from file.") net_weights = None except: logger.error("Uncompatible network to load weights in") # evaluate model2 start_time = time.clock() best_validation_loss, best_iter, best_params = eval_model( conf['training2'], train_model2, test_model2, n_train_batches, n_test_batches, layers, pre_fn, update_params2) end_time = time.clock() logger.info('Best validation score of %f %% obtained at iteration %i, ' % (best_validation_loss * 100., best_iter + 1)) print >> sys.stderr, ('The code for file %s ran for %.2fm' % (os.path.split(__file__)[1], (end_time - start_time) / 60.))
def histograms(fold): """ Generates YIQ component histograms for face and not-face parts of the images of the given fold(s) of the FDDB database. Returns a tuple (hist_face, hist_noface). :type fold: int or iterable of ints :param fold: When int: number of the fold of the FDDB database. When iterable: a number of folds for the FDDB database. """ if not isinstance(fold, int): # fold param is an iterable # get individual fold histograms hists_face, hists_noface = zip(*[histograms(f) for f in fold]) # sum them up and return fold_count = len(hists_face) hist_face = sum(hists_face[1:], hists_face[0]) / fold_count hist_noface = sum(hists_noface[1:], hists_noface[0]) / fold_count return (hist_face, hist_noface) # generate file name in which this fold's histograms are stored hist_file_name = os.path.join( HIST_ROOT, "fddb_YIQ_histogram_fold_{:02d}.pkl".format(fold)) # try to load and return pickled histogram data pickled_hist = util.try_pickle_load(hist_file_name) if pickled_hist is not None: return pickled_hist # failed to load pickled data, create histograms # prepare histograms # first dimension indicates Y, I or Q, # second dimension are bins hist_face = np.zeros((3, 256), np.int) hist_noface = np.zeros((3, 256), np.int) # go through all the photos in the fold # and their FDDB elipsis info (face annotations) for photo_path, (masks, bboxes) in image_face_masks_bboxes(fold).items(): log.info("Processing photo %s", photo_path) # load photo, convert to YIO log.debug("Loading photo") photo_RGB = cv2.imread(photo_path, 1) log.debug("Converting to YIQ") photo_YIQ = util.rgb_to_yiq(photo_RGB) # create masks from elipses and OR them into one mask log.debug("Creating faces mask") mask_face = masks.any(axis=0) mask_noface = np.logical_not(mask_face) # add current image histograms to total histograms log.debug("Histogramming") for component_ind in range(3): hist_face[component_ind, :] += np.histogram( photo_YIQ[mask_face, component_ind], __bin_edges[component_ind] )[0] hist_noface[component_ind, :] += np.histogram( photo_YIQ[mask_noface, component_ind], __bin_edges[component_ind] )[0] # normalize histograms hist_face = hist_face.astype(np.float) / hist_face[1, :].sum() hist_noface = hist_noface.astype(np.float) / hist_noface[1, :].sum() # store histogram data for subsequent usage if not util.try_pickle_dump((hist_face, hist_noface), hist_file_name): raise "Failed to pickle histograms" return (hist_face, hist_noface)
def evaluate_conv(conf, net_weights=None): """ Evaluates Farabet-like conv network conf: dictionary network configuration """ ################ # LOADING DATA # ################ logger.info("... loading data") logger.debug("Theano.config.floatX is %s" % theano.config.floatX) path = conf['data']['location'] batch_size = conf['evaluation']['batch-size'] assert (type(batch_size) is int) logger.info('Batch size %d' % (batch_size)) try: x_train_allscales = try_pickle_load(path + 'x_train.bin') x_train = x_train_allscales[0] # first scale y_train = try_pickle_load(path + 'y_train.bin') x_test_allscales = try_pickle_load(path + 'x_test.bin') x_test = x_test_allscales[0] y_test = try_pickle_load(path + 'y_test.bin') except IOError: logger.error("Unable to load Theano dataset from %s", path) exit(1) n_classes = int(max(y_train.max(), y_test.max()) + 1) logger.info("Dataset has %d classes", n_classes) image_shape = (x_train.shape[-2], x_train.shape[-1]) logger.info("Image shape is %s", image_shape) logger.info('Train set has %d images' % x_train.shape[0]) logger.info('Input train set has shape of %s ', x_train.shape) logger.info('Test set has %d images' % x_test.shape[0]) logger.info('Input test set has shape of %s ', x_test.shape) # compute number of minibatches for training, validation and testing n_train_batches = x_train.shape[0] // batch_size n_test_batches = x_test.shape[0] // batch_size logger.info('Batch size %d' % (batch_size)) logger.info("Number of train batches %d" % n_train_batches) logger.info("Number of test batches %d" % n_test_batches) logger.info("... building network") # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # input is presented as (batch, channel, x, y) x = T.tensor4('x') # matrix row - batch index, column label of pixel # every column is a list of pixel labels (image matrix reshaped to list) y = T.imatrix('y') # create all layers ''' layers, out_shape = build_net(x, y, batch_size, classes=NCLASSES. image_shape=image_shape, nkerns=[16, 64, 256], sparse=True) ''' layers, out_shape = build_net2(x, y, batch_size, classes=n_classes, image_shape=image_shape, nkerns=[32, 128, 256, 256], sparse=True, activation=ReLU, bias=0.001) logger.info("Image out shape is %s", out_shape) # last layer, log reg log_reg_layer = layers[0] y_flat = y.flatten(1) y_train_shape = (y_train.shape[0], out_shape[0], out_shape[1]) y_test_shape = (y_test.shape[0], out_shape[0], out_shape[1]) # resize marked images to out_size of the network y_test_downscaled = np.empty(y_test_shape) for i in xrange(y_test.shape[0]): y_test_downscaled[i] = resize_marked_image(y_test[i], out_shape) x_train_shared, y_train_shared = \ shared_dataset((np.zeros_like(x_train), np.zeros(y_train_shape))) x_test_shared, y_test_shared = \ shared_dataset((x_test, y_test_downscaled)) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue y_train_shared_i32 = T.cast(y_train_shared, 'int32') y_test_shared_i32 = T.cast(y_test_shared, 'int32') ############### # BUILD MODEL # ############### logger.info("... building model") # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], [ log_reg_layer.errors(y_flat), log_reg_layer.negative_log_likelihood(y_flat) ] + list(log_reg_layer.accurate_pixels_class(y_flat)), givens={ x: x_test_shared[index * batch_size:(index + 1) * batch_size], y: y_test_shared_i32[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = [p for l in layers for p in l.params] # list of Ws through all layers weights = [l.params[0] for l in layers] assert (len(weights) == len(params) / 2) # the cost we minimize during training is the NLL of the model # and L2 regularization (lamda * L2-norm) # L2-norm is sum of squared params (using only W, not b) # params has Ws on even locations cost = log_reg_layer.negative_log_likelihood(y_flat)\ + 10**-5 * T.sum([T.sum(w ** 2) for w in weights]) # train_model is a function that updates the model parameters update_params = build_weight_updates(conf['training'], cost, params) train_model = theano.function( [index], cost, updates=update_params.updates, givens={ x: x_train_shared[index * batch_size:(index + 1) * batch_size], y: y_train_shared_i32[index * batch_size:(index + 1) * batch_size] }) pre_fn = lambda: change_train_set(x_train_shared, x_train, y_train_shared, y_train, out_shape) # set loaded weights if net_weights is not None: try: for net_weight, layer in zip(net_weights, layers): layer.set_weights(net_weight) logger.info("Loaded net weights from file.") net_weights = None except: logger.error("Uncompatible network to load weights in") ############### # TRAIN MODEL # ############### logger.info("... training model") start_time = time.clock() best_validation_loss, best_iter, best_params = eval_model( conf['training'], train_model, test_model, n_train_batches, n_test_batches, layers, pre_fn, update_params) end_time = time.clock() logger.info('Best validation score of %f %% obtained at iteration %i, ' % (best_validation_loss * 100., best_iter + 1)) print >> sys.stderr, ('The code for file %s ran for %.2fm' % (os.path.split(__file__)[1], (end_time - start_time) / 60.)) logger.info('Starting second step, with Dropout hidden layers') layers, new_layers = extend_net1(layers, NCLASSES, nkerns=[1000], activation=ReLU, bias=0.001) # create a function to compute the mistakes that are made by the model test_model2 = theano.function( [index], [layers[0].errors(y_flat), layers[0].bayesian_nll(y_flat)] + list(layers[0].accurate_pixels_class(y_flat)), givens={ x: x_test_shared[index * batch_size:(index + 1) * batch_size], y: y_test_shared_i32[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params2 = [p for l in new_layers for p in l.params] # list of Ws through all layers weights2 = [l.params[0] for l in new_layers] assert (len(weights2) == len(params2) / 2) # the cost we minimize during training is the NLL of the model # and L2 regularization (lamda * L2-norm) # L2-norm is sum of squared params (using only W, not b) # params has Ws on even locations cost2 = layers[0].negative_log_likelihood(y_flat)\ + 10**-3 * T.sum([T.sum(w ** 2) for w in weights2]) # train_model is a function that updates the model parameters update_params2 = build_weight_updates(conf['training2'], cost2, params2) train_model2 = theano.function( [index], cost2, updates=update_params2.updates, givens={ x: x_train_shared[index * batch_size:(index + 1) * batch_size], y: y_train_shared_i32[index * batch_size:(index + 1) * batch_size] }) # try to load weights in second stage if net_weights is not None: try: for net_weight, layer in zip(net_weights, layers): layer.set_weights(net_weight) logger.info("Loaded net weights from file.") net_weights = None except: logger.error("Uncompatible network to load weights in") # evaluate model2 start_time = time.clock() best_validation_loss, best_iter, best_params = eval_model( conf['training2'], train_model2, test_model2, n_train_batches, n_test_batches, layers, pre_fn, update_params2) end_time = time.clock() logger.info('Best validation score of %f %% obtained at iteration %i, ' % (best_validation_loss * 100., best_iter + 1)) print >> sys.stderr, ('The code for file %s ran for %.2fm' % (os.path.split(__file__)[1], (end_time - start_time) / 60.))
""" logging.basicConfig(level=logging.INFO) # create a file handler handler = logging.FileHandler('output.log') handler.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s %(message)s') handler.setFormatter(formatter) # add the handler to the root logger logging.getLogger('').addHandler(handler) argc = len(sys.argv) if argc == 3: net_config_path = sys.argv[1] params = try_pickle_load(sys.argv[2]) if params is None: exit(1) elif argc == 2: net_config_path = sys.argv[1] params = None else: logger.error("Too few arguments") exit(1) conf = load_config(net_config_path) if conf is None: exit(1) # run evaluation evaluate_conv(conf, net_weights=params)
conf['run-dataset'], postproc=oversegment, postproc_params=best_params, show=False) if __name__ == '__main__': """ Examples of usage: python validate.py network.conf network-12-34.bin [train/validation/test] validates network """ logging.basicConfig(level=logging.INFO) argc = len(sys.argv) if argc == 4: net_config_path = sys.argv[1] params = try_pickle_load(sys.argv[2]) dataset = sys.argv[3] if dataset not in ['train', 'validation', 'test']: print "Wrong dataset type: train/validation/test" exit(1) if params is None: exit(1) else: logger.error("Too few arguments") exit(1) conf = load_config(net_config_path) if conf is None: exit(1) conf['run-dataset'] = dataset
def validate(conf, net_weights): logger.info("... loading data") logger.debug("Theano.config.floatX is %s" % theano.config.floatX) path = conf['data']['location'] batch_size = 1 assert (type(batch_size) is int) logger.info('Batch size %d' % (batch_size)) try: x_train_allscales = try_pickle_load(path + 'x_train.bin') x_train = x_train_allscales[0] # first scale y_train = try_pickle_load(path + 'y_train.bin') x_test_allscales = try_pickle_load(path + 'x_test.bin') x_test = x_test_allscales[0] y_test = try_pickle_load(path + 'y_test.bin') except IOError: logger.error("Unable to load Theano dataset from %s", path) exit(1) n_classes = int(max(y_train.max(), y_test.max()) + 1) logger.info("Dataset has %d classes", n_classes) image_shape = (x_train.shape[-2], x_train.shape[-1]) logger.info("Image shape is %s", image_shape) logger.info('Train set has %d images' % x_train.shape[0]) logger.info('Input train set has shape of %s ', x_train.shape) logger.info('Test set has %d images' % x_test.shape[0]) logger.info('Input test set has shape of %s ', x_test.shape) # compute number of minibatches for training, validation and testing n_train_batches = x_train.shape[0] // batch_size n_test_batches = x_test.shape[0] // batch_size logger.info("Number of train batches %d" % n_train_batches) logger.info("Number of test batches %d" % n_test_batches) logger.info("... building network") # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # input is presented as (batch, channel, x, y) x0 = T.tensor4('x') x2 = T.tensor4('x') x4 = T.tensor4('x') # matrix row - batch index, column label of pixel # every column is a list of pixel labels (image matrix reshaped to list) y = T.imatrix('y') # create all layers layers, out_shape, conv_out = build_multiscale(x0, x2, x4, y, batch_size, classes=n_classes, image_shape=image_shape, nkerns=[16, 64, 256], activation=lReLU, bias=0.001, sparse=False) logger.info("Image out shape is %s", out_shape) # last layer, log reg y_flat = y.flatten(1) y_train_shape = (y_train.shape[0], out_shape[0], out_shape[1]) y_test_shape = (y_test.shape[0], out_shape[0], out_shape[1]) # resize marked images to out_size of the network y_train_downscaled = np.empty(y_train_shape) for i in xrange(y_train.shape[0]): y_train_downscaled[i] = resize_marked_image(y_train[i], out_shape) # resize marked images to out_size of the network y_test_downscaled = np.empty(y_test_shape) for i in xrange(y_test.shape[0]): y_test_downscaled[i] = resize_marked_image(y_test[i], out_shape) x_train_shared, y_train_shared = \ shared_dataset((x_train, y_train_downscaled)) x2_train_shared = theano.shared(x_train_allscales[1], borrow=True) x4_train_shared = theano.shared(x_train_allscales[2], borrow=True) x_test_shared, y_test_shared = \ shared_dataset((x_test, y_test_downscaled)) x2_test_shared = theano.shared(x_test_allscales[1], borrow=True) x4_test_shared = theano.shared(x_test_allscales[2], borrow=True) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue y_train_shared_i32 = T.cast(y_train_shared, 'int32') y_test_shared_i32 = T.cast(y_test_shared, 'int32') ############### # BUILD MODEL # ############### logger.info("... building model") layers, new_layers = extend_net_w1l_drop(conv_out, layers, n_classes, nkerns=[1000], activation=lReLU, bias=0.001) test_model_trainset = theano.function( [index], [layers[0].errors(y_flat), layers[0].negative_log_likelihood(y_flat)] + list(layers[0].accurate_pixels_class(y_flat)), givens={ x0: x_train_shared[index * batch_size:(index + 1) * batch_size], x2: x2_train_shared[index * batch_size:(index + 1) * batch_size], x4: x4_train_shared[index * batch_size:(index + 1) * batch_size], y: y_train_shared_i32[index * batch_size:(index + 1) * batch_size] }) test_model_testset = theano.function( [index], [layers[0].errors(y_flat), layers[0].negative_log_likelihood(y_flat)] + list(layers[0].accurate_pixels_class(y_flat)), givens={ x0: x_test_shared[index * batch_size:(index + 1) * batch_size], x2: x2_test_shared[index * batch_size:(index + 1) * batch_size], x4: x4_test_shared[index * batch_size:(index + 1) * batch_size], y: y_test_shared_i32[index * batch_size:(index + 1) * batch_size] }) # try to load weights in second stage try: if net_weights is not None: for net_weight, layer in zip(net_weights, layers): layer.set_weights(net_weight) logger.info("Loaded net weights from file.") net_weights = None except: logger.error("Uncompatible network to load weights in") exit(1) set_layers_training_mode(layers, 0) logger.info("---> Train set") start_time = time.clock() validation = [test_model_trainset(i) for i in xrange(n_train_batches)] end_time = time.clock() logger.info("Validated %d images in %.2f seconds", n_train_batches, end_time - start_time) print_stats(validation, layers[0].n_classes) logger.info("---> Test set") start_time = time.clock() validation = [test_model_testset(i) for i in xrange(n_test_batches)] end_time = time.clock() logger.info("Validated %d images in %.2f seconds", n_train_batches, end_time - start_time) print_stats(validation, layers[0].n_classes)
def evaluate_conv(conf, net_weights=None): """ Evaluates conv network conf: dictionary network configuration """ ################ # LOADING DATA # ################ logger.info("... loading data") logger.debug("Theano.config.floatX is %s" % theano.config.floatX) path = conf['data']['location'] batch_size = conf['evaluation']['batch-size'] assert(type(batch_size) is int) logger.info('Batch size %d' % (batch_size)) try: x_train_allscales = try_pickle_load(path + 'x_train.bin') x_train = x_train_allscales[0] # first scale y_train = try_pickle_load(path + 'y_train.bin') x_test_allscales = try_pickle_load(path + 'x_test.bin') x_test = x_test_allscales[0] y_test = try_pickle_load(path + 'y_test.bin') except IOError: logger.error("Unable to load Theano dataset from %s", path) exit(1) n_classes = int(max(y_train.max(), y_test.max()) + 1) logger.info("Dataset has %d classes", n_classes) image_shape = (x_train.shape[-2], x_train.shape[-1]) logger.info("Image shape is %s", image_shape) logger.info('Train set has %d images' % x_train.shape[0]) logger.info('Input train set has shape of %s ', x_train.shape) logger.info('Test set has %d images' % x_test.shape[0]) logger.info('Input test set has shape of %s ', x_test.shape) # compute number of minibatches for training, validation and testing n_train_batches = x_train.shape[0] // batch_size n_test_batches = x_test.shape[0] // batch_size logger.info('Batch size %d' % (batch_size)) logger.info("Number of train batches %d" % n_train_batches) logger.info("Number of test batches %d" % n_test_batches) logger.info("... building network") # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # input is presented as (batch, channel, x, y) x = T.tensor4('x') # matrix row - batch index, column label of pixel # every column is a list of pixel labels (image matrix reshaped to list) y = T.imatrix('y') # create all layers layers, out_shape = build_net(x, y, batch_size, classes=n_classes, image_shape=image_shape) logger.info("Image out shape is %s", out_shape) # last layer, log reg log_reg_layer = layers[0] y_flat = y.flatten(1) y_train_shape = (y_train.shape[0], out_shape[0], out_shape[1]) y_test_shape = (y_test.shape[0], out_shape[0], out_shape[1]) # resize marked images to out_size of the network y_test_downscaled = np.empty(y_test_shape) for i in xrange(y_test.shape[0]): y_test_downscaled[i] = resize_marked_image(y_test[i], out_shape) x_train_shared, y_train_shared = \ shared_dataset((np.zeros_like(x_train), np.zeros(y_train_shape))) x_test_shared, y_test_shared = \ shared_dataset((x_test, y_test_downscaled)) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue y_train_shared_i32 = T.cast(y_train_shared, 'int32') y_test_shared_i32 = T.cast(y_test_shared, 'int32') ############### # BUILD MODEL # ############### logger.info("... building model") # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], [log_reg_layer.errors(y_flat), log_reg_layer.negative_log_likelihood(y_flat)] + list(log_reg_layer.accurate_pixels_class(y_flat)), givens={ x: x_test_shared[index * batch_size: (index + 1) * batch_size], y: y_test_shared_i32[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent layers_w_weights = filter(lambda l: l.params is not None, layers) params = [p for l in layers_w_weights for p in l.params] # list of Ws through all layers weights = [l.params[0] for l in layers_w_weights] assert(len(weights) == len(params)/2) # the cost we minimize during training is the NLL of the model cost = log_reg_layer.negative_log_likelihood(y_flat) # train_model is a function that updates the model parameters update_params = build_weight_updates(conf['training'], cost, params) train_model = theano.function( [index], cost, updates=update_params.updates, givens={ x: x_train_shared[index * batch_size: (index + 1) * batch_size], y: y_train_shared_i32[index * batch_size: (index + 1) * batch_size] } ) pre_fn = lambda: change_train_set( x_train_shared, x_train, y_train_shared, y_train, out_shape) # set loaded weights if net_weights is not None: try: for net_weight, layer in zip(net_weights, layers): layer.set_weights(net_weight) logger.info("Loaded net weights from file.") net_weights = None except: logger.error("Uncompatible network to load weights in") ############### # TRAIN MODEL # ############### logger.info("... training model") start_time = time.clock() best_validation_loss, best_iter, best_params = eval_model( conf['training'], train_model, test_model, n_train_batches, n_test_batches, layers, pre_fn, update_params) end_time = time.clock() logger.info('Best validation score of %f %% obtained at iteration %i, ' % (best_validation_loss * 100., best_iter + 1)) print >> sys.stderr, ('The code for file %s ran for %.2fm' % (os.path.split(__file__)[1], (end_time - start_time) / 60.))
def validate(conf, net_weights): logger.info("... loading data") logger.debug("Theano.config.floatX is %s" % theano.config.floatX) path = conf['data']['location'] batch_size = 1 assert (type(batch_size) is int) logger.info('Batch size %d' % (batch_size)) try: x_train_allscales = try_pickle_load(path + 'x_' + conf['run-dataset'] + '.bin') x_train = x_train_allscales[0] # first scale y_train = try_pickle_load(path + 'y_' + conf['run-dataset'] + '.bin') except IOError: logger.error("Unable to load Theano dataset from %s", path) exit(1) y_valid = try_pickle_load(path + 'y_validation.bin') print path + 'y_validation.bin' n_classes = int(max(y_train.max(), y_valid.max()) + 1) logger.info("Dataset has %d classes", n_classes) image_shape = (x_train.shape[-2], x_train.shape[-1]) logger.info("Image shape is %s", image_shape) logger.info('Dataset has %d images' % x_train.shape[0]) logger.info('Input data has shape of %s ', x_train.shape) # compute number of minibatches n_train_batches = x_train.shape[0] // batch_size logger.info("Number of train batches %d" % n_train_batches) logger.info("... building network") # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # input is presented as (batch, channel, x, y) x0 = T.tensor4('x') x2 = T.tensor4('x') x4 = T.tensor4('x') # matrix row - batch index, column label of pixel # every column is a list of pixel labels (image matrix reshaped to list) y = T.imatrix('y') # create all layers builder_name = conf['network']['builder-name'] layers, out_shape, conv_out = get_net_builder(builder_name)( x0, x2, x4, y, batch_size, classes=n_classes, image_shape=image_shape, nkerns=conf['network']['layers'][:3], seed=conf['network']['seed'], activation=lReLU, bias=0.001, sparse=False) logger.info("Image out shape is %s", out_shape) y_train_shape = (y_train.shape[0], out_shape[0], out_shape[1]) # resize marked images to out_size of the network y_train_downscaled = np.empty(y_train_shape) # for i in xrange(y_train.shape[0]): # y_train_downscaled[i] = resize_marked_image(y_train[i], out_shape) x_train_shared, y_train_shared = \ shared_dataset((x_train, y_train_downscaled)) x2_train_shared = theano.shared(x_train_allscales[1], borrow=True) x4_train_shared = theano.shared(x_train_allscales[2], borrow=True) ############### # BUILD MODEL # ############### logger.info("... building model") layers, new_layers = extend_net_w1l_drop( conv_out, conf['network']['layers'][-2] * 3, layers, n_classes, nkerns=conf['network']['layers'][-1:], seed=conf['network']['seed'], activation=lReLU, bias=0.001) test_model = theano.function( [index], [layers[0].y_pred], givens={ x0: x_train_shared[index * batch_size:(index + 1) * batch_size], x2: x2_train_shared[index * batch_size:(index + 1) * batch_size], x4: x4_train_shared[index * batch_size:(index + 1) * batch_size] }) # try to load weights try: if net_weights is not None: for net_weight, layer in zip(net_weights, layers): layer.set_weights(net_weight) logger.info("Loaded net weights from file.") net_weights = None except: logger.error("Uncompatible network to load weights in") exit(1) set_layers_training_mode(layers, 0) logger.info("---> Results - no postprocessing") start_time = time.clock() validation = [ test_model(i)[0].reshape(NET_OUT_SHAPE) for i in xrange(n_train_batches) ] end_time = time.clock() logfiles_path = conf['data']['location'] +\ 'samples_' + conf['run-dataset'] + '.log' logger.info("Validated %d images in %.2f seconds", n_train_batches, end_time - start_time) get_stats(validation, y_train, layers[0].n_classes, conf['data']['dont-care-classes'], logfiles_path, conf['run-dataset']) logger.info("---> Results - superpixels") stats_func = lambda p: get_stats(validation, y_train, layers[0].n_classes, conf['data']['dont-care-classes'], logfiles_path, conf['run-dataset'], postproc=oversegment, postproc_params=p, show=False, log=False) start_time = time.clock() best_params = find_best_superpixel_params(stats_func) end_time = time.clock() logger.info("Done in %.2f seconds", end_time - start_time) logger.info("Best params are %s", best_params) # run one more time with params, log output this time get_stats(validation, y_train, layers[0].n_classes, conf['data']['dont-care-classes'], logfiles_path, conf['run-dataset'], postproc=oversegment, postproc_params=best_params, show=False)
def main(path, marked_path=None): # images multiscale imgs_mscale = try_pickle_load(path) n_scales = len(imgs_mscale) imgs_s0 = imgs_mscale[0] # scale 1 image_shape = (imgs_s0.shape[2], imgs_s0.shape[3]) images_to_show = min(IMAGES_TO_SHOW, len(imgs_s0)) print "Images shape", imgs_s0.shape print "Number of images to show", images_to_show print "Number of scales", n_scales print "Requested image shape will be", image_shape n_rows = (1 + n_scales) * 2 perturbed_imgs = [ np.empty((images_to_show, imgs.shape[1], imgs.shape[2], imgs.shape[3])) for imgs in imgs_mscale ] perturbed_marks = None if marked_path is not None: marked_imgs = try_pickle_load(marked_path) perturbed_marks = np.empty( (images_to_show, marked_imgs.shape[1], marked_imgs.shape[2])) for i in xrange(images_to_show): imgs_to_perturb = [img[i] for img in imgs_mscale] # if we loaded markings, add marked image to list of imgs to perturb if perturbed_marks is not None: imgs_to_perturb.append(marked_imgs[i]) ret_list = perturb_image(imgs_to_perturb, image_shape) for n_scale in range(n_scales): perturbed_imgs[n_scale][i] = ret_list[n_scale] if perturbed_marks is not None: perturbed_marks[i] = ret_list[n_scales] for i, imgs in enumerate(imgs_mscale): for j in xrange(images_to_show): pylab.subplot(n_rows, images_to_show, i * images_to_show + j + 1) pylab.axis('off') pylab.imshow(imgs[j, CHANNEL, :, :]) pylab.gray() # set colormap for ind, imgs in enumerate(perturbed_imgs): i = n_scales + ind for j in xrange(images_to_show): pylab.subplot(n_rows, images_to_show, i * images_to_show + j + 1) pylab.axis('off') pylab.imshow(imgs[j, CHANNEL, :, :]) pylab.gray() if perturbed_marks is not None: for j in xrange(images_to_show): pylab.subplot(n_rows, images_to_show, (2 * n_scales + 0) * images_to_show + j + 1) pylab.axis('off') pylab.imshow(marked_imgs[j, :, :]) pylab.jet() pylab.subplot(n_rows, images_to_show, (2 * n_scales + 1) * images_to_show + j + 1) pylab.axis('off') pylab.imshow(perturbed_marks[j, :, :]) pylab.jet() pylab.show()
def load_ngrams(n, features_use, tree, subset=None, min_occ=1, min_files=1, remove_subst_tokens=False): """ Loads the dataset for microsot sentence completion challenge, processed into ngrams. The raw dataset is loadaed and processed using the 'load' function, to which 'subset', 'min_occ' and 'min_files' are forwared. The resulting dataset is then processed into ngrams using the 'ngrams' function, to which 'n' and 'tree' parameter are forwarded. This is then cached on the disk for subsequent usage. The resulting ngrams are pruned from unwanted features as indicated by the 'features_use parameter'. Returns a tuple (sents, q_groups, answers, feature_sizes). This reflects the returned value by the 'load' function, except that 'sents' and 'g_groups' are now not just features extracted from text, but ngrams built from those features. """ features_use = np.array(features_use, dtype=bool) log.info("Loading %d-grams, %s, features_use: %s", n, "tree" if tree else "linear", "".join([str(int(i)) for i in features_use])) dir = os.path.join("data", "processed") if not os.path.exists(dir): os.makedirs(dir) name_base = "%s-%d_grams-subset_%r-min_occ_%r-min_files_%r" % ( "tree" if tree else "linear", n, subset, min_occ, min_files) # tree-grams can all be seen as a feature-subset of 4 grams if tree and n < 4: ngrams_all = load_ngrams(4, np.ones(features_use.size, dtype=bool), tree, subset, min_occ, min_files, remove_subst_tokens) else: # look for the cached 4-grams with all the features file_name = os.path.join(dir, name_base + ".pkl") ngrams_all = util.try_pickle_load(file_name) # it is possible that sentences are split # in order to avoid Python bug with storing large arrays if ngrams_all is not None and isinstance(ngrams_all[0], list): sents = np.vstack(ngrams_all[0]) ngrams_all = (sents, ) + ngrams_all[1:] # if unable to load cached data, create it if ngrams_all is None: # load data tokens, q_groups, answers, ftr_sizes = load(subset, min_occ, min_files) # tokens that should be present in ngrams # the purpose is to remove ngrams containing tokens that are # substitutes for removed ones invalid_tokens = None if remove_subst_tokens: invalid_tokens = dict(zip(range(3), ftr_sizes[:3] - 1)) log.info("Invalid tokens: %r", invalid_tokens) # define a function for generating ngrams, and process # trainset and questions _ngrams = lambda tokens: ngrams(n, tree, tokens, invalid_tokens) sent_ngrams = _ngrams(tokens) q_ngrams = [map(_ngrams, qg) for qg in q_groups] # store the processed data for subsequent usage # split sent ngrams to avoid Py bug with pickling large arrays util.try_pickle_dump( (np.vsplit(sent_ngrams, np.arange(1, 10) * (len(sent_ngrams) / 10)), q_ngrams, answers, ftr_sizes), file_name) ngrams_all = (sent_ngrams, q_ngrams, answers, ftr_sizes) # remove unwanted features from ngrams_all used_ftr = np.arange(ngrams_all[0].shape[1])[np.tile(features_use, n)] sents = ngrams_all[0][:, used_ftr] q_groups = [[q[:, used_ftr] for q in qg] for qg in ngrams_all[1]] return (sents, q_groups) + ngrams_all[2:]
def histograms(fold): """ Generates YIQ component histograms for face and not-face parts of the images of the given fold(s) of the FDDB database. Returns a tuple (hist_face, hist_noface). :type fold: int or iterable of ints :param fold: When int: number of the fold of the FDDB database. When iterable: a number of folds for the FDDB database. """ if not isinstance(fold, int): # fold param is an iterable # get individual fold histograms hists_face, hists_noface = zip(*[histograms(f) for f in fold]) # sum them up and return fold_count = len(hists_face) hist_face = sum(hists_face[1:], hists_face[0]) / fold_count hist_noface = sum(hists_noface[1:], hists_noface[0]) / fold_count return (hist_face, hist_noface) # generate file name in which this fold's histograms are stored hist_file_name = os.path.join( HIST_ROOT, "fddb_YIQ_histogram_fold_{:02d}.pkl".format(fold)) # try to load and return pickled histogram data pickled_hist = util.try_pickle_load(hist_file_name) if pickled_hist is not None: return pickled_hist # failed to load pickled data, create histograms # prepare histograms # first dimension indicates Y, I or Q, # second dimension are bins hist_face = np.zeros((3, 256), np.int) hist_noface = np.zeros((3, 256), np.int) # go through all the photos in the fold # and their FDDB elipsis info (face annotations) for photo_path, (masks, bboxes) in image_face_masks_bboxes(fold).items(): log.info("Processing photo %s", photo_path) # load photo, convert to YIO log.debug("Loading photo") photo_RGB = cv2.imread(photo_path, 1) log.debug("Converting to YIQ") photo_YIQ = util.rgb_to_yiq(photo_RGB) # create masks from elipses and OR them into one mask log.debug("Creating faces mask") mask_face = masks.any(axis=0) mask_noface = np.logical_not(mask_face) # add current image histograms to total histograms log.debug("Histogramming") for component_ind in range(3): hist_face[component_ind, :] += np.histogram( photo_YIQ[mask_face, component_ind], __bin_edges[component_ind])[0] hist_noface[component_ind, :] += np.histogram( photo_YIQ[mask_noface, component_ind], __bin_edges[component_ind])[0] # normalize histograms hist_face = hist_face.astype(np.float) / hist_face[1, :].sum() hist_noface = hist_noface.astype(np.float) / hist_noface[1, :].sum() # store histogram data for subsequent usage if not util.try_pickle_dump((hist_face, hist_noface), hist_file_name): raise "Failed to pickle histograms" return (hist_face, hist_noface)
def main(path, marked_path=None): # images multiscale imgs_mscale = try_pickle_load(path) n_scales = len(imgs_mscale) imgs_s0 = imgs_mscale[0] # scale 1 image_shape = (imgs_s0.shape[2], imgs_s0.shape[3]) images_to_show = min(IMAGES_TO_SHOW, len(imgs_s0)) print "Images shape", imgs_s0.shape print "Number of images to show", images_to_show print "Number of scales", n_scales print "Requested image shape will be", image_shape n_rows = (1 + n_scales) * 2 perturbed_imgs = [np.empty((images_to_show, imgs.shape[1], imgs.shape[2], imgs.shape[3])) for imgs in imgs_mscale] perturbed_marks = None if marked_path is not None: marked_imgs = try_pickle_load(marked_path) perturbed_marks = np.empty((images_to_show, marked_imgs.shape[1], marked_imgs.shape[2])) for i in xrange(images_to_show): imgs_to_perturb = [img[i] for img in imgs_mscale] # if we loaded markings, add marked image to list of imgs to perturb if perturbed_marks is not None: imgs_to_perturb.append(marked_imgs[i]) ret_list = perturb_image(imgs_to_perturb, image_shape) for n_scale in range(n_scales): perturbed_imgs[n_scale][i] = ret_list[n_scale] if perturbed_marks is not None: perturbed_marks[i] = ret_list[n_scales] for i, imgs in enumerate(imgs_mscale): for j in xrange(images_to_show): pylab.subplot(n_rows, images_to_show, i * images_to_show + j + 1) pylab.axis('off') pylab.imshow(imgs[j, CHANNEL, :, :]) pylab.gray() # set colormap for ind, imgs in enumerate(perturbed_imgs): i = n_scales + ind for j in xrange(images_to_show): pylab.subplot(n_rows, images_to_show, i * images_to_show + j + 1) pylab.axis('off') pylab.imshow(imgs[j, CHANNEL, :, :]) pylab.gray() if perturbed_marks is not None: for j in xrange(images_to_show): pylab.subplot(n_rows, images_to_show, (2*n_scales+0) * images_to_show + j + 1) pylab.axis('off') pylab.imshow(marked_imgs[j, :, :]) pylab.jet() pylab.subplot(n_rows, images_to_show, (2*n_scales+1) * images_to_show + j + 1) pylab.axis('off') pylab.imshow(perturbed_marks[j, :, :]) pylab.jet() pylab.show()
def validate(conf, net_weights): logger.info("... loading data") logger.debug("Theano.config.floatX is %s" % theano.config.floatX) path = conf['data']['location'] batch_size = 1 assert(type(batch_size) is int) logger.info('Batch size %d' % (batch_size)) try: x_train_allscales = try_pickle_load( path + 'x_' + conf['run-dataset'] + '.bin') x_train = x_train_allscales[0] # first scale y_train = try_pickle_load( path + 'y_' + conf['run-dataset'] + '.bin') except IOError: logger.error("Unable to load Theano dataset from %s", path) exit(1) y_valid = try_pickle_load(path + 'y_validation.bin') print path + 'y_validation.bin' n_classes = int(max(y_train.max(), y_valid.max()) + 1) logger.info("Dataset has %d classes", n_classes) image_shape = (x_train.shape[-2], x_train.shape[-1]) logger.info("Image shape is %s", image_shape) logger.info('Dataset has %d images' % x_train.shape[0]) logger.info('Input data has shape of %s ', x_train.shape) # compute number of minibatches n_train_batches = x_train.shape[0] // batch_size logger.info("Number of train batches %d" % n_train_batches) logger.info("... building network") # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # input is presented as (batch, channel, x, y) x0 = T.tensor4('x') x2 = T.tensor4('x') x4 = T.tensor4('x') # matrix row - batch index, column label of pixel # every column is a list of pixel labels (image matrix reshaped to list) y = T.imatrix('y') # create all layers builder_name = conf['network']['builder-name'] layers, out_shape, conv_out = get_net_builder(builder_name)( x0, x2, x4, y, batch_size, classes=n_classes, image_shape=image_shape, nkerns=conf['network']['layers'][:3], seed=conf['network']['seed'], activation=lReLU, bias=0.001, sparse=False) logger.info("Image out shape is %s", out_shape) y_train_shape = (y_train.shape[0], out_shape[0], out_shape[1]) # resize marked images to out_size of the network y_train_downscaled = np.empty(y_train_shape) # for i in xrange(y_train.shape[0]): # y_train_downscaled[i] = resize_marked_image(y_train[i], out_shape) x_train_shared, y_train_shared = \ shared_dataset((x_train, y_train_downscaled)) x2_train_shared = theano.shared(x_train_allscales[1], borrow=True) x4_train_shared = theano.shared(x_train_allscales[2], borrow=True) ############### # BUILD MODEL # ############### logger.info("... building model") layers, new_layers = extend_net_w1l_drop( conv_out, conf['network']['layers'][-2] * 3, layers, n_classes, nkerns=conf['network']['layers'][-1:], seed=conf['network']['seed'], activation=lReLU, bias=0.001) test_model = theano.function( [index], [layers[0].y_pred], givens={ x0: x_train_shared[index * batch_size: (index + 1) * batch_size], x2: x2_train_shared[index * batch_size: (index + 1) * batch_size], x4: x4_train_shared[index * batch_size: (index + 1) * batch_size] } ) # try to load weights try: if net_weights is not None: for net_weight, layer in zip(net_weights, layers): layer.set_weights(net_weight) logger.info("Loaded net weights from file.") net_weights = None except: logger.error("Uncompatible network to load weights in") exit(1) set_layers_training_mode(layers, 0) logger.info("---> Results - no postprocessing") start_time = time.clock() validation = [test_model(i)[0].reshape(NET_OUT_SHAPE) for i in xrange(n_train_batches)] end_time = time.clock() logfiles_path = conf['data']['location'] +\ 'samples_' + conf['run-dataset'] + '.log' logger.info("Validated %d images in %.2f seconds", n_train_batches, end_time - start_time) get_stats(validation, y_train, layers[0].n_classes, conf['data']['dont-care-classes'], logfiles_path, conf['run-dataset']) logger.info("---> Results - superpixels") stats_func = lambda p: get_stats( validation, y_train, layers[0].n_classes, conf['data']['dont-care-classes'], logfiles_path, conf['run-dataset'], postproc=oversegment, postproc_params=p, show=False, log=False) start_time = time.clock() best_params = find_best_superpixel_params(stats_func) end_time = time.clock() logger.info("Done in %.2f seconds", end_time - start_time) logger.info("Best params are %s", best_params) # run one more time with params, log output this time get_stats( validation, y_train, layers[0].n_classes, conf['data']['dont-care-classes'], logfiles_path, conf['run-dataset'], postproc=oversegment, postproc_params=best_params, show=False)
def validate(conf, net_weights): logger.info("... loading data") logger.debug("Theano.config.floatX is %s" % theano.config.floatX) path = conf['data']['location'] batch_size = 1 assert(type(batch_size) is int) logger.info('Batch size %d' % (batch_size)) try: x_train_allscales = try_pickle_load(path + 'x_train.bin') x_train = x_train_allscales[0] # first scale y_train = try_pickle_load(path + 'y_train.bin') x_test_allscales = try_pickle_load(path + 'x_test.bin') x_test = x_test_allscales[0] y_test = try_pickle_load(path + 'y_test.bin') except IOError: logger.error("Unable to load Theano dataset from %s", path) exit(1) n_classes = int(max(y_train.max(), y_test.max()) + 1) logger.info("Dataset has %d classes", n_classes) image_shape = (x_train.shape[-2], x_train.shape[-1]) logger.info("Image shape is %s", image_shape) logger.info('Train set has %d images' % x_train.shape[0]) logger.info('Input train set has shape of %s ', x_train.shape) logger.info('Test set has %d images' % x_test.shape[0]) logger.info('Input test set has shape of %s ', x_test.shape) # compute number of minibatches for training, validation and testing n_train_batches = x_train.shape[0] // batch_size n_test_batches = x_test.shape[0] // batch_size logger.info("Number of train batches %d" % n_train_batches) logger.info("Number of test batches %d" % n_test_batches) logger.info("... building network") # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # input is presented as (batch, channel, x, y) x0 = T.tensor4('x') x2 = T.tensor4('x') x4 = T.tensor4('x') # matrix row - batch index, column label of pixel # every column is a list of pixel labels (image matrix reshaped to list) y = T.imatrix('y') # create all layers layers, out_shape, conv_out = build_multiscale( x0, x2, x4, y, batch_size, classes=n_classes, image_shape=image_shape, nkerns=[16, 64, 256], activation=lReLU, bias=0.001, sparse=False) logger.info("Image out shape is %s", out_shape) # last layer, log reg y_flat = y.flatten(1) y_train_shape = (y_train.shape[0], out_shape[0], out_shape[1]) y_test_shape = (y_test.shape[0], out_shape[0], out_shape[1]) # resize marked images to out_size of the network y_train_downscaled = np.empty(y_train_shape) for i in xrange(y_train.shape[0]): y_train_downscaled[i] = resize_marked_image(y_train[i], out_shape) # resize marked images to out_size of the network y_test_downscaled = np.empty(y_test_shape) for i in xrange(y_test.shape[0]): y_test_downscaled[i] = resize_marked_image(y_test[i], out_shape) x_train_shared, y_train_shared = \ shared_dataset((x_train, y_train_downscaled)) x2_train_shared = theano.shared(x_train_allscales[1], borrow=True) x4_train_shared = theano.shared(x_train_allscales[2], borrow=True) x_test_shared, y_test_shared = \ shared_dataset((x_test, y_test_downscaled)) x2_test_shared = theano.shared(x_test_allscales[1], borrow=True) x4_test_shared = theano.shared(x_test_allscales[2], borrow=True) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue y_train_shared_i32 = T.cast(y_train_shared, 'int32') y_test_shared_i32 = T.cast(y_test_shared, 'int32') ############### # BUILD MODEL # ############### logger.info("... building model") layers, new_layers = extend_net_w1l_drop( conv_out, layers, n_classes, nkerns=[1000], activation=lReLU, bias=0.001) test_model_trainset = theano.function( [index], [layers[0].errors(y_flat), layers[0].negative_log_likelihood(y_flat)] + list(layers[0].accurate_pixels_class(y_flat)), givens={ x0: x_train_shared[index * batch_size: (index + 1) * batch_size], x2: x2_train_shared[index * batch_size: (index + 1) * batch_size], x4: x4_train_shared[index * batch_size: (index + 1) * batch_size], y: y_train_shared_i32[index * batch_size: (index + 1) * batch_size] } ) test_model_testset = theano.function( [index], [layers[0].errors(y_flat), layers[0].negative_log_likelihood(y_flat)] + list(layers[0].accurate_pixels_class(y_flat)), givens={ x0: x_test_shared[index * batch_size: (index + 1) * batch_size], x2: x2_test_shared[index * batch_size: (index + 1) * batch_size], x4: x4_test_shared[index * batch_size: (index + 1) * batch_size], y: y_test_shared_i32[index * batch_size: (index + 1) * batch_size] } ) # try to load weights in second stage try: if net_weights is not None: for net_weight, layer in zip(net_weights, layers): layer.set_weights(net_weight) logger.info("Loaded net weights from file.") net_weights = None except: logger.error("Uncompatible network to load weights in") exit(1) set_layers_training_mode(layers, 0) logger.info("---> Train set") start_time = time.clock() validation = [test_model_trainset(i) for i in xrange(n_train_batches)] end_time = time.clock() logger.info("Validated %d images in %.2f seconds", n_train_batches, end_time - start_time) print_stats(validation, layers[0].n_classes) logger.info("---> Test set") start_time = time.clock() validation = [test_model_testset(i) for i in xrange(n_test_batches)] end_time = time.clock() logger.info("Validated %d images in %.2f seconds", n_train_batches, end_time - start_time) print_stats(validation, layers[0].n_classes)