Esempio n. 1
0
    def __init__(self, constel_file, empty=False):
        # Extract time stamp from the name of the file
        # self.timestamp = float(constel_file.split('/')[-1].strip('.txt'))

        if not empty:
            # Read information from file to list of list
            with open(constel_file) as f:
                point_from_objs = [line.split() for line in f]

        else:
            point_from_objs = []

        # point_from_objs = [line.split() for line in open(constel_file)] # this file is never closed... will the garbage collector close it?
        if len(point_from_objs) < settings.min_objects:
            self.ignore = True
        else:
            self.constellation = []

            self.ignore = False
            # Create list of constellation
            self.constellation = [
                constel_point(point_from_obj)
                for point_from_obj in point_from_objs
            ]

            # Get descriptors
            self.descriptors = Descriptors(self.constellation)
Esempio n. 2
0
 def getDescriptors(self):
     descriptors_xml = etree.Element('config_properties_descriptor')
     descriptors = Descriptors()
     raw_desc = descriptors.toxml()
     if raw_desc:
         # FIXME UGLY... have to remove the xsd 
         # from the configuration_descriptor 
         # so that we don't get them later. 
         # I know there is an easier fix just 
         # need to think about it.
         rep = [ x for x in raw_desc.split('\n') 
                 if x.startswith('<configuration_descriptor')][0]
         try:
             descriptors_namespace_fix = etree.fromstring(
                 raw_desc.replace(rep, '<configuration_descriptor>'))
             descriptors_xml.append(descriptors_namespace_fix)
         except SyntaxError, ex:
             logger.error('Error with xml from descriptors:\n %s' % str(ex))
             pass
Esempio n. 3
0
 def __init__(self):
     print('Face Spoofing Class')
     self._color_space = 0
     self._descriptor = Descriptors()
     self._dictionary = dict()
     self._gaussian_var = 2.0
     self._features = list()
     self._fourier = list()
     self._images = list()
     self._kernel_size = 7
     self._labels = list()
     self._neg_label = 'None'
     self._models = None
     self._paths = list()
     self._pos_label = 'None'
     self._size = (640, 360)
     self._type = 'None'
     self._vr_height = 1
     self._vr_width = 30
Esempio n. 4
0
 def getDescriptors(self):
     descriptors_xml = etree.Element('config_properties_descriptor')
     descriptors = Descriptors()
     raw_desc = descriptors.toxml()
     if raw_desc:
         # FIXME UGLY... have to remove the xsd
         # from the configuration_descriptor
         # so that we don't get them later.
         # I know there is an easier fix just
         # need to think about it.
         rep = [
             x for x in raw_desc.split('\n')
             if x.startswith('<configuration_descriptor')
         ][0]
         try:
             descriptors_namespace_fix = etree.fromstring(
                 raw_desc.replace(rep, '<configuration_descriptor>'))
             descriptors_xml.append(descriptors_namespace_fix)
         except SyntaxError, ex:
             logger.error('Error with xml from descriptors:\n %s' % str(ex))
             pass
Esempio n. 5
0
def obtain_video_features(folder_path,
                          dataset_tuple,
                          frame_drop=1,
                          size=(400, 300),
                          file_name='video_features.npy',
                          saveCopy=False,
                          show=False,
                          verbose=False):
    descriptor = Descriptors()
    feature_list = list()
    label_list = list()
    path_list = list()

    if os.path.isfile(file_name):
        print('>> Importing', file_name)
        feature_list, label_list, path_list = np.load(file_name)
        feature_list = list(feature_list)
        label_list = list(label_list)
        path_list = list(path_list)

    inner_counter = overall_counter = 0
    for (path, label) in dataset_tuple:
        if path not in path_list:
            if verbose:
                print(overall_counter + 1, inner_counter + 1, path, label)
            frame_counter = 0
            probe_fourcc = cv.VideoWriter_fourcc(*'MP42')
            read_path = os.path.join(folder_path, path)
            read_video = cv.VideoCapture(read_path)
            if saveCopy:
                spec_video = cv.VideoWriter(read_path.replace(
                    '.mov', '_spec.avi'),
                                            probe_fourcc,
                                            20.0,
                                            size,
                                            isColor=False)
                tiny_video = cv.VideoWriter(read_path.replace(
                    '.mov', '_tiny.avi'),
                                            probe_fourcc,
                                            20.0,
                                            size,
                                            isColor=True)
            while (read_video.isOpened()):
                ret, read_frame = read_video.read()
                if ret:
                    if frame_counter % frame_drop == 0:
                        read_color = cv.resize(read_frame, (size[0], size[1]),
                                               interpolation=cv.INTER_AREA)
                        read_greyd = cv.cvtColor(read_color, cv.COLOR_BGR2GRAY)
                        read_noise = get_residual_noise(read_greyd,
                                                        filter_type='median')
                        read_spect = get_fourier_spectrum(noise_img=read_noise)
                        read_featA = descriptor.get_hog_feature(
                            image=read_greyd,
                            pixel4cell=(64, 64),
                            cell4block=(1, 1),
                            orientation=8)
                        read_featB = descriptor.get_glcm_feature(
                            image=read_spect, dists=[1, 2], shades=20)
                        read_feats = np.concatenate((read_featA, read_featB),
                                                    axis=0)
                        read_spect = (read_spect / np.max(read_spect)) * 255
                        if saveCopy:
                            spec_video.write(read_spect.astype('uint8'))
                            tiny_video.write(read_color)
                        if show:
                            cv.imshow('face', read_color)
                            cv.imshow(
                                'spec',
                                cv.normalize(read_spect, 0, 255,
                                             cv.NORM_MINMAX))
                            cv.waitKey(1)
                        if not np.any(np.isnan(read_feats)):
                            feature_list.append(read_feats)
                            label_list.append(label)
                            path_list.append(path)
                else:
                    break
                frame_counter += 1
            inner_counter += 1
            if inner_counter % 100 == 0:
                np.save(file_name, [feature_list, label_list, path_list])
        else:
            if verbose:
                print(overall_counter + 1, inner_counter + 1, path, label,
                      'WARNING: feature previously extracted!')
        overall_counter += 1
    np.save(file_name, [feature_list, label_list, path_list])
    read_video.release()
    if saveCopy:
        spec_video.release()
        tiny_video.release()
Esempio n. 6
0
from phonetics import Phonetics
from stemmer import Stemmer
from advas import Advas

# define global settings
encoding = "utf-8"

# --- preprocessing ---

# - load vocabulary
# - load topic dependencies

# - prepare search entries
keywordList = {'Essen': 0.5, 'Literatur': 0.4}
keywords = Descriptors(keywordList)

classificationList = {}
classification = Descriptors(classificationList)

searchEntry1 = Entry(["Stattdessen gab es Eintopf."], encoding, keywords,
                     classification)
searchEntry2 = Entry(["Das Krankenhaus am Rande der Grossstadt"], encoding,
                     keywords, classification)
searchEntry3 = Entry(["was haben wir gelacht,\n", "als wir an dich gedacht"],
                     encoding, keywords, classification)
searchEntry4 = Entry(["das ist ein test, nicht wahr.\n", "Oder doch?"],
                     encoding, keywords, classification)

filename5 = "lausanne.txt"
keywords5 = Descriptors({'Lausanne': 0.9, 'Vaud': 0.9})
Esempio n. 7
0
class FaceSpoofing:
    def __init__(self):
        print('Face Spoofing Class')
        self._color_space = 0
        self._descriptor = Descriptors()
        self._dictionary = dict()
        self._gaussian_var = 2.0
        self._features = list()
        self._fourier = list()
        self._images = list()
        self._kernel_size = 7
        self._labels = list()
        self._neg_label = 'None'
        self._models = None
        self._paths = list()
        self._pos_label = 'None'
        self._size = (640, 360)
        self._type = 'None'
        self._vr_height = 1
        self._vr_width = 30

    def __build_dictionary(self):
        lab_dict = {
            label: number
            for (number, label
                 ) in zip(range(self.get_num_classes()), self.get_classes())
        }
        num_dict = {
            number: label
            for (number, label
                 ) in zip(range(self.get_num_classes()), self.get_classes())
        }
        self._dictionary = dict(
            list(lab_dict.items()) + list(num_dict.items()))
        print(self._dictionary)

    def __channel_swap(self, image):
        spare = copy.copy(image)
        image[:, :, 0] = spare[:, :, 2]
        image[:, :, 2] = spare[:, :, 0]
        return image

    def __feature_sampling(self, num_samples=100):
        rand_features = list()
        rand_labels = list()
        for cat in self.get_classes():
            cat_indices = [
                index for (index, value) in enumerate(self._labels)
                if value == cat
            ]
            cat_sampled = random.sample(cat_indices, num_samples)
            cat_features = [self._features[index] for index in cat_sampled]
            cat_labels = [self._labels[index] for index in cat_sampled]
            rand_features.extend(cat_features)
            rand_labels.extend(cat_labels)
        return rand_features, rand_labels

    def __manage_bagging_results(self, dictionary, neg_list, pos_list):
        pos_value = sum(pos_list) / len(pos_list)
        neg_value = sum(neg_list) / len(neg_list)
        if self._pos_label in dictionary:
            dictionary[self._pos_label].append(pos_value)
        else:
            dictionary[self._pos_label] = [pos_value]
        if self._neg_label in dictionary:
            dictionary[self._neg_label].append(neg_value)
        else:
            dictionary[self._neg_label] = [neg_value]
        return dictionary

    def __manage_results(self, dictionary, score_list):
        for (label, result) in score_list:
            if label in dictionary:
                dictionary[label].append(result)
            else:
                dictionary[label] = [result]
        for label in self.get_classes():
            if label not in dictionary:
                dictionary[label] = [0.0]
        return dictionary

    def __mean_and_return(self, dictionary):
        new_list = {
            key: float(np.mean(value))
            for (key, value) in dictionary.items()
        }
        return new_list

    def __mean_and_sort(self, dictionary):
        new_list = [(key, float(np.mean(value)))
                    for (key, value) in dictionary.items()]
        new_list.sort(key=lambda tup: tup[1], reverse=True)
        return new_list

    def get_gray_image(self, color_img):
        return cv.cvtColor(color_img, cv.COLOR_BGR2GRAY)

    def get_residual_noise(self, gray_img, filter_type='median'):
        if filter_type == 'median':
            blurred = cv.medianBlur(src=gray_img, ksize=self._kernel_size)
            noise = cv.subtract(gray_img, blurred)
        elif filter_type == 'gaussian':
            blurred = cv.GaussianBlur(src=gray_img,
                                      ksize=(self._kernel_size,
                                             self._kernel_size),
                                      sigmaX=self._gaussian_var,
                                      sigmaY=0.0)
            noise = cv.subtract(gray_img, blurred)
        else:
            raise ValueError(
                'ERROR: Two smoothing methods available: median and gaussian')
        return noise

    def get_classes(self):
        categories = set(self._labels)
        return list(categories)

    def get_fourier_spectrum(self, noise_img):
        fft_img = np.fft.fft2(noise_img)
        fts_img = np.fft.fftshift(fft_img)
        mag_img = np.abs(fts_img)
        log_img = np.log(1 + mag_img)
        return log_img

    def get_num_classes(self):
        categories = set(self._labels)
        return len(categories)

    def gray2spec_pipeline(self, sample_image, show=False):
        sample_gray = self.get_gray_image(color_img=sample_image)
        sample_noise = self.get_residual_noise(gray_img=sample_gray,
                                               filter_type='median')
        sample_spectrum = self.get_fourier_spectrum(noise_img=sample_noise)
        if show:
            cv.imshow('spectrum',
                      cv.normalize(sample_spectrum, 0, 255, cv.NORM_MINMAX))
            cv.waitKey(1)
        return sample_spectrum

    def gray2feat_pipeline(self, sample_image, show=False):
        sample_gray = cv.cvtColor(sample_image, cv.COLOR_BGR2GRAY)
        sample_hsvs = cv.cvtColor(sample_image, cv.COLOR_BGR2HSV)
        sample_ycrc = cv.cvtColor(sample_image, cv.COLOR_BGR2YCrCb)

        sample_noise = self.get_residual_noise(gray_img=sample_gray,
                                               filter_type='median')
        sample_spectrum = self.get_fourier_spectrum(noise_img=sample_noise)

        sample_featureA = self._descriptor.get_hog_feature(image=sample_gray,
                                                           pixel4cell=(96, 96),
                                                           cell4block=(1, 1),
                                                           orientation=8)
        sample_featureB = self._descriptor.get_lbp_ch_feature(
            image=sample_hsvs, bins=265, points=8, radius=1)
        sample_featureC = self._descriptor.get_lbp_ch_feature(
            image=sample_ycrc, bins=265, points=8, radius=1)
        sample_featureD = self._descriptor.get_glcm_feature(
            image=sample_spectrum, dists=[1, 2], shades=20)
        sample_feature = np.concatenate((sample_featureA, sample_featureB,
                                         sample_featureC, sample_featureD),
                                        axis=0)
        if show:
            cv.imshow('spectrum',
                      cv.normalize(sample_spectrum, 0, 255, cv.NORM_MINMAX))
            cv.waitKey(1)
        return sample_feature

    def import_features(self, feature_dict):
        for ((label, path), features) in feature_dict.items():
            print('Imported Features: ', (label, path), len(features),
                  len(features[0]))
            for feat in features:
                self._features.append(feat)
                self._labels.append(label)
                self._paths.append(path)

    def load_model(self, file_name='saves/model.npy'):
        self._labels, self._models, self._type = np.load(file_name)

    def obtain_image_features(self,
                              folder_path,
                              dataset_tuple,
                              new_size=None,
                              file_name='saves/image_features.npy'):
        if new_size is not None:
            self._size = new_size
        for (path, label) in dataset_tuple:
            sample_path = os.path.join(folder_path, path)
            sample_image = cv.imread(sample_path, cv.IMREAD_COLOR)
            scaled_image = cv.resize(sample_image,
                                     (self._size[0], self._size[1]),
                                     interpolation=cv.INTER_AREA)
            feature = self.gray2feat_pipeline(scaled_image)
            if not np.any(np.isnan(feature)):
                self._features.append(feature)
                self._labels.append(label)
        np.save(file_name, [self._features, self._labels])

    def obtain_video_features(self,
                              folder_path,
                              dataset_tuple,
                              frame_drop=1,
                              max_frames=60,
                              new_size=None,
                              file_name='saves/video_features.npy',
                              verbose=False):
        video_counter = 0
        if new_size is not None:
            self._size = new_size
        for (path, label) in dataset_tuple:
            if verbose:
                print(video_counter + 1, path, label)
            frame_counter = 0
            sample_path = os.path.join(folder_path, path)
            sample_video = cv.VideoCapture(sample_path)
            while (sample_video.isOpened()):
                ret, sample_frame = sample_video.read()
                if ret and frame_counter <= max_frames:
                    if frame_counter % frame_drop == 0:
                        scaled_frame = cv.resize(
                            sample_frame, (self._size[0], self._size[1]),
                            interpolation=cv.INTER_AREA)
                        feature = self.gray2feat_pipeline(scaled_frame)
                        if not np.any(np.isnan(feature)):
                            self._features.append(feature)
                            self._labels.append(label)
                else:
                    break
                frame_counter += 1
            video_counter += 1
            np.save(file_name, [self._features, self._labels])

    def predict_feature_oaa(self, probe_features, drop_frame, threshold):
        class_dict = dict()
        for (index, feature) in enumerate(probe_features):
            if index % drop_frame == 0:
                results = [
                    float(model[0].predict(np.array([feature])))
                    for model in self._models
                ]
                labels = [model[1] for model in self._models]
                scores = list(
                    map(lambda left, right: (left, right), labels, results))
                class_dict = self.__manage_results(class_dict, scores)
        class_list = self.__mean_and_sort(class_dict)
        best_label, best_score = class_list[0]
        return (best_label, best_score)

    def predict_feature_bag(self, probe_features, drop_frame, threshold):
        mean_list = list()
        for (index, feature) in enumerate(probe_features):
            if index % drop_frame == 0:
                results = [
                    float(model.predict(np.array([feature])))
                    for model in self._models
                ]
                binnary = [+1.0 if result > 0.0 else 0.0 for result in results]
                mean_list.append(sum(binnary) / len(binnary))
        score = np.mean(mean_list)
        label = self._pos_label if score >= threshold else self._neg_label
        return (label, score)

    def predict_feature_cnn(self, probe_features, drop_frame, threshold):
        class_dict = dict()
        for (index, feature) in enumerate(probe_features):
            if index % drop_frame == 0:
                results = self._models.predict(feature).ravel()
                labels = [
                    self._dictionary[index] for index in range(len(results))
                ]
                scores = list(
                    map(lambda left, right: (left, right), labels, results))
                class_dict = self.__manage_results(class_dict, scores)
        class_list = self.__mean_and_sort(class_dict)
        best_label, best_score = class_list[0]
        return (best_label, best_score)

    def predict_feature(self, probe_features, drop_frame=10, threshold=0.50):
        if self._type == 'OAAPLS' or self._type == 'OAASVM':
            return self.predict_feature_oaa(probe_features, drop_frame,
                                            threshold)
        elif self._type == 'EPLS' or self._type == 'ESVM':
            return self.predict_feature_bag(probe_features, drop_frame,
                                            threshold)
        elif self._type == 'CNN':
            return self.predict_feature_cnn(probe_features, drop_frame,
                                            threshold)
        else:
            return (None, None, None)

    def predict_image(self, probe_image):
        if self._type == 'OAAPLS' or self._type == 'OAASVM':
            class_dict = dict()
            scaled_image = cv.resize(probe_image,
                                     (self._size[0], self._size[1]),
                                     interpolation=cv.INTER_AREA)
            feature = self.gray2feat_pipeline(scaled_image)
            results = [
                float(model[0].predict(np.array([feature])))
                for model in self._models
            ]
            labels = [model[1] for model in self._models]
            scores = list(
                map(lambda left, right: (left, right), labels, results))
            class_dict = self.__manage_results(class_dict, scores)
            return self.__mean_and_sort(class_dict)
        elif self._type == 'CNN':
            pass
        else:
            raise ValueError('Error predicting probe image')

    def predict_video(self, probe_video, drop_frame=10, threshold=0.50):
        frame_counter = 0
        class_dict = dict()
        probe_features = list()
        while (probe_video.isOpened()):
            ret, probe_frame = probe_video.read()
            if ret:
                if frame_counter % drop_frame == 0:
                    scaled_frame = cv.resize(probe_frame,
                                             (self._size[0], self._size[1]),
                                             interpolation=cv.INTER_AREA)
                    feature = self.gray2feat_pipeline(scaled_frame)
                    probe_features.append(feature)
                frame_counter += 1
            else:
                break
        if self._type == 'OAAPLS' or self._type == 'OAASVM':
            return self.predict_feature_oaa(probe_features,
                                            drop_frame=1,
                                            threshold=threshold)
        elif self._type == 'EPLS' or self._type == 'ESVM':
            return self.predict_feature_bag(probe_features,
                                            drop_frame=1,
                                            threshold=threshold)
        elif self._type == 'CNN':
            return self.predict_feature_cnn(probe_features,
                                            drop_frame=1,
                                            threshold=threshold)
        else:
            return (None, None, None)

    def save_features(self, file_name='saves/train_feats.py'):
        np.save(file_name, [self._features, self._labels])

    def save_model(self, file_name='saves/model.npy'):
        np.save(file_name, [self._labels, self._models, self._type])

    def trainPLS(self, components=10, iterations=500):
        from sklearn.cross_decomposition import PLSRegression
        self._models = list()
        self._type = 'OAAPLS'
        print('Training One-Against-All PLS classifiers')
        for label in self.get_classes():
            classifier = PLSRegression(n_components=components,
                                       max_iter=iterations)
            boolean_label = [
                +1.0 if label == lab else -1.0 for lab in self._labels
            ]
            model = classifier.fit(np.array(self._features),
                                   np.array(boolean_label))
            self._models.append((model, label))
        self.save_model(file_name='saves/pls_model.npy')

    def trainSVM(self,
                 cpar=1.0,
                 mode='libsvm',
                 kernel_type='linear',
                 iterations=5000,
                 verbose=False):
        from sklearn.svm import LinearSVR, SVR, NuSVR
        self._type = 'OAASVM'
        self._models = list()
        print('Training One-Against-All SVM classifiers')
        for label in self.get_classes():
            if mode == 'libsvm':
                classifier = SVR(C=cpar, kernel=kernel_type, verbose=verbose)
            elif mode == 'liblinear':
                classifier = LinearSVR(C=cpar,
                                       max_iter=iterations,
                                       verbose=verbose)
            elif mode == 'libsvm-nu':
                classifier = NuSVR(nu=0.5,
                                   C=cpar,
                                   kernel=kernel_type,
                                   verbose=verbose)
            boolean_label = [label == lab for lab in self._labels]
            model = classifier.fit(np.array(self._features),
                                   np.array(boolean_label))
            self._models.append((model, label))
        self.save_model(file_name='saves/svm_model.npy')

    def trainEPLS(self,
                  models=50,
                  samples4model=50,
                  pos_label='live',
                  neg_label='spoof',
                  components=10,
                  iterations=500):
        from sklearn.cross_decomposition import PLSRegression
        self._models = list()
        self._neg_label = neg_label
        self._pos_label = pos_label
        self._type = 'EPLS'
        print('Training an Embedding of PLS classifiers')
        for index in range(models):
            classifier = PLSRegression(n_components=components,
                                       max_iter=iterations)
            rand_features, rand_labels = self.__feature_sampling(
                num_samples=samples4model)
            boolean_label = [
                +1.0 if self._pos_label == lab else -1.0 for lab in rand_labels
            ]
            model = classifier.fit(np.array(rand_features),
                                   np.array(boolean_label))
            self._models.append(model)
            print(' -> Training model %3d with %d random samples' %
                  (index + 1, samples4model))
        self.save_model(file_name='saves/epls_model.npy')

    def trainESVM(self,
                  models=50,
                  samples4model=50,
                  pos_label='live',
                  neg_label='spoof',
                  cpar=1.0,
                  mode='libsvm',
                  kernel_type='linear',
                  iterations=5000,
                  verbose=False):
        from sklearn.svm import LinearSVR, SVR, NuSVR
        self._models = list()
        self._neg_label = neg_label
        self._pos_label = pos_label
        self._type = 'ESVM'
        print('Training an Embedding of SVM classifiers')
        for index in range(models):
            if mode == 'libsvm':
                classifier = SVR(C=cpar, kernel=kernel_type, verbose=verbose)
            elif mode == 'liblinear':
                classifier = LinearSVR(C=cpar,
                                       max_iter=iterations,
                                       verbose=verbose)
            elif mode == 'libsvm-nu':
                classifier = NuSVR(nu=0.5,
                                   C=cpar,
                                   kernel=kernel_type,
                                   verbose=verbose)
            rand_features, rand_labels = self.__feature_sampling(
                num_samples=samples4model)
            boolean_label = [
                +1.0 if self._pos_label == lab else -1.0 for lab in rand_labels
            ]
            model = classifier.fit(np.array(rand_features),
                                   np.array(boolean_label))
            self._models.append(model)
            print(' -> Training model %3d with %d random samples' %
                  (index + 1, samples4model))
        self.save_model(file_name='saves/esvm_model.npy')