Ejemplo n.º 1
0
def make_submission_xception(name, name_ext, dropout_p):
    data_info = load_organized_data_info(imgs_dim=HEIGHT, name=name)
    _, _, _, _, _, te_names = create_embeddings(name)
    batch_size = 32

    datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
    datagen = datagen.flow_from_directory(directory=data_info['dir_te'],
                                          target_size=(HEIGHT, WIDTH),
                                          class_mode=None,
                                          batch_size=batch_size,
                                          shuffle=False)

    model_file = join(MODELS_DIR, MODEL_FILE.format(name, name_ext))
    model = Xception(weights='imagenet', include_top=False, pooling='avg')
    top_classifier = _top_classifier(l2_reg=0,
                                     dropout_p=dropout_p,
                                     input_shape=(2048, ))
    model = Model(inputs=model.input, outputs=top_classifier(model.output))
    model.load_weights(model_file)

    probs_pred = model.predict_generator(generator=datagen,
                                         steps=ceil(data_info['num_te'] /
                                                    batch_size))

    submission_file = 'xception_fine_tuned_{:s}.csv'.format(name)
    create_submission_file(image_names=te_names,
                           probs=probs_pred,
                           file_name=join(SUBMISSIONS_DIR, submission_file))
Ejemplo n.º 2
0
model.load_weights(root_dir + 'weights/'+ weight_name)

test_paths = glob(os.path.join(root_dir , 'test/audio/*wav'))


def test_generator(test_batch_size):
    while True:
        for start in range(0, len(test_paths), test_batch_size):
            x_batch = []
            end = min(start + test_batch_size, len(test_paths))
            this_paths = test_paths[start:end]
            for x in this_paths:
                x_batch.append(process_wav_file(x,phase='TEST'))
            x_batch = np.array(x_batch)
            yield x_batch

predictions = model.predict_generator(test_generator(batch_size), int(np.ceil(len(test_paths)/batch_size)))
classes = np.argmax(predictions, axis=1)

# last batch will contain padding, so remove duplicates
submission = dict()
for i in range(len(test_paths)):
    fname, label = os.path.basename(test_paths[i]), id2name[classes[i]]
    submission[fname] = label


with open(root_dir + weight_name + '.csv', 'w') as fout:
    fout.write('fname,label\n')
    for fname, label in submission.items():
        fout.write('{},{}\n'.format(fname, label))
Ejemplo n.º 3
0
class TestSetAnalysis(object):
	"""
	class string
	"""

	def __init__(self, model='vgg19', show=True):
		"""
		doc string constructor
		"""
		firstlayer_index = 0
		if model=='vgg19':
		    from keras.applications.vgg19 import VGG19
		    self.model = VGG19(weights='imagenet', include_top = True)
		elif model=='vgg16':
		    from keras.applications.vgg16 import VGG16
		    self.model = VGG16(weights='imagenet', include_top = True)
		elif model=='inceptionv3':
			from keras.applications.inception_v3 import InceptionV3
			self.model = InceptionV3(weights='imagenet', include_top = True)
		elif model=='resnet50':
		    from keras.applications.resnet50 import ResNet50
		    self.model = ResNet50(weights='imagenet', include_top = True)
		elif model=='xception':
		    from keras.applications.xception import Xception
		    self.model = Xception(weights='imagenet', include_top = True)        
		elif model.endswith('.hdf5'):
			self.model = load_model(model)
			firstlayer_index = 1
		else:
		    print("Valid models are:")
		    print("vgg19, vgg16, inceptionv3, resnet50, xception")
		    print("xception/inceptionv3 model is only available in tf backend")
		    print("Or provide path to a saved model in .hdf5 format")
		    exit()
		if show:
			print(self.model.summary())
		self.inputshape = self.model.layers[firstlayer_index].output_shape[1:]

#------------------------------------------------------------------------------

	def predict_gen(self, data_dir, batchsize=32, rescale=1.0/255):
		self.data_dir = data_dir
		datagen = ImageDataGenerator(rescale=rescale)
		self.generator = datagen.flow_from_directory(self.data_dir, \
								target_size=self.inputshape[:2], \
		                        batch_size=batchsize, \
		                        class_mode='categorical', \
		                        shuffle=False)

		nfiles = []
		class_folders = glob(self.data_dir+'*')
		for i in range(len(class_folders)):
		    files = glob(class_folders[i]+'/*')
		    nfiles.append(len(files))

		samples = self.generator.samples
		self.nb_class = self.generator.num_class
		self.predictions = self.model.predict_generator(self.generator, \
												samples/batchsize+1)
		self.predictions = self.predictions[:samples, :]
		self.predict_labels = np.argmax(self.predictions, axis=1)
		self.true_labels = []
		for i in range(self.nb_class):
			self.true_labels +=  list([i] * nfiles[i])

		self.confusion_matrix = confusion_matrix(\
										self.true_labels, \
										self.predict_labels)

		if self.nb_class==2:
			self.FPR, self.TPR, thresholds = roc_curve(\
										self.true_labels, \
										self.predictions[:,1])
			self.roc_auc = roc_auc_score(\
										self.true_labels, \
										self.predictions[:,1])
			self.get_cm_index()

#------------------------------------------------------------------------------

	def predict_array(self, xdata, ydata, batchsize=32, rescale=1.0/255):

		# samples = self.generator.samples
		# self.nb_class = self.generator.num_class
		self.predictions = self.model.predict(xdata*rescale, batch_size=batchsize)
		# self.predictions = self.predictions[:samples, :]
		self.predict_labels = np.argmax(self.predictions, axis=1)
		self.true_labels = np.argmax(ydata, axis=1)

		self.confusion_matrix = confusion_matrix(\
										self.true_labels, \
										self.predict_labels)

		self.FPR, self.TPR, thresholds = roc_curve(\
									self.true_labels, \
									self.predictions[:,1])
		self.roc_auc = roc_auc_score(\
									self.true_labels, \
									self.predictions[:,1])
		self.get_cm_index()			

#------------------------------------------------------------------------------

	def get_information_dictionary(self):
		mydict = {
			"FPR": self.FPR,
			"TPR": self.TPR,
			"predictions": self.predictions,
			"true_labels": self.true_labels,
			"predict_labels": self.predict_labels,
			"roc_auc": self.roc_auc,
			"confusion_matrix": self.confusion_matrix
		}
		return mydict

#------------------------------------------------------------------------------

	def plot_confusion_matrix(self, cmap='Blues', \
								save=False, savename='cm.png'):
		plt.figure(figsize=(8,8))
		matrix = np.zeros(self.confusion_matrix.shape)
		for i in range(len(matrix)):
			matrix[i] = self.confusion_matrix[i]/\
						float(np.sum(self.confusion_matrix[i]))
		plt.imshow(matrix, cmap=cmap)
		plt.xticks([], [])
		plt.yticks([], [])
		plt.clim(0, 1)
		if save:
			print("Now saving confusion matrix figure")
			plt.savefig(savename)
		else:
			plt.show()
		return matrix

#------------------------------------------------------------------------------

	def plot_roc_curve(self, \
					save=False, savename='roc.png'):
		plt.figure(figsize=(8,8))
		plt.plot(self.FPR, self.TPR, 'k', lw=2)
		plt.plot(self.FPR, self.FPR, 'k', lw=0.5)
		plt.axhline(y=1, color='k', ls=':', lw=0.5)
		plt.axvline(x=0, color='k', ls=':', lw=0.5)
		plt.xlim(-0.01,1)
		plt.ylim(0,1.01)
		plt.xlabel('$\mathtt{FalsePositiveRate}$', fontsize=22)
		plt.ylabel('$\mathtt{TruePositiveRate}$', fontsize=22)
		if save:
			f.savefig(savefigname)
		else:
			plt.show()

#------------------------------------------------------------------------------

	def plot_samples(self, ind_arr, title, N=100, ncol=15, \
						save=False, savefigname='samples.eps'):

	    ind_arr = np.random.choice(ind_arr, size=N, replace=False)
	    names = np.array(self.generator.filenames)[ind_arr]
	    N = N - N%ncol
	    print(N)
	    nrow = N/ncol
	    f, axarr = plt.subplots(nrow, ncol, sharex=True, sharey=True, \
	    						figsize=(ncol, nrow))
	    f.subplots_adjust(wspace=0.0, hspace=0)
	    f.suptitle("$\mathtt{%s}$"%title, fontsize=22)

	    for i in range(nrow):
	        for j in range(ncol):
	            axarr[i,j].imshow(cv2.imread(self.data_dir+names[i*ncol+j]))
	            axarr[i,j].set_xticks([], [])
	            axarr[i,j].set_yticks([], [])
	    if save:
	    	f.savefig(savefigname)
	    else:
	    	plt.show()

#------------------------------------------------------------------------------

	def get_cm_index(self):
	    self.tp = []
	    self.tn = []
	    self.fp = []
	    self.fn = []
	    for i in range(len(self.true_labels)):
	        if self.true_labels[i]==1 and self.predict_labels[i]==1:
	            self.tp.append(i)
	        elif self.true_labels[i]==0 and self.predict_labels[i]==0:
	            self.tn.append(i)
	        elif self.true_labels[i]==0 and self.predict_labels[i]==1:
	            self.fp.append(i)
	        elif self.true_labels[i]==1 and self.predict_labels[i]==0:
	            self.fn.append(i)

#------------------------------------------------------------------------------

	def plot_all(self):
		self.plot_confusion_matrix()
		if self.nb_class==2:
			self.plot_roc_curve()
			self.plot_samples(self.tp, 'TruePositive')
			self.plot_samples(self.fp, 'FalsePositive')
			self.plot_samples(self.tn, 'TrueNegative')
			self.plot_samples(self.fn, 'FalseNegative')
Ejemplo n.º 4
0
train_item_ids = x_train.index

train_image_ids = x_train.image

train_labels = x_train.deal_probability

#         print(val_labels)
train_gen = ImageGenerator(image_dir,
                           train_item_ids,
                           train_image_ids,
                           train_labels,
                           dim=(224, 224),
                           shuffle=False)

features = model.predict_generator(train_gen, verbose=1)
ids = train_image_ids
print('getting img names')
# In[5]:

# img_names = [i for i in os.walk(image_dir)][0][2]
# print(f'size of images: {len(img_names)}')

# features = []
# ids = []
# for img_name in tqdm(img_names):

#     img_path = image_dir + img_name
#     try:
#         img = image.load_img(img_path, target_size=(224, 224))
#     except OSError: