Ejemplo n.º 1
0
    def predict(self):

        filepath = self.file.file.file.name

        if self.file.name.split('.')[1:][0] == 'wav':
            a = read(os.path.join(filepath))
            features = numpy.array(a[1], dtype=int)

        elif self.file.name.split('.')[1:][0] == 'png':
            img = Image.open(filepath)
            img.resize((128, 128))
            img.save(filepath)

            features = numpy.array([cv2.imread(filepath)])

            # first order difference, computed over 9-step window
            features[0, :, :, 1] = librosa.feature.delta(features[0, :, :, 0])

            # for using 3 dimensional array to use ResNet and other frameworks
            features[0, :, :, 2] = librosa.feature.delta(features[0, :, :, 1])

            features = numpy.transpose(features, (0, 2, 1, 3))
            features = reformat_data(features)
        else:
            return ValidationError(
                'File not in supported format. Supported formats are png and wav.'
            )

        with open(self.training.model_path, 'rb') as f:
            my_model = pickle.load(f)

        y_pred = my_model.predict(features)

        self.result = y_pred[0]
        self.save()
Ejemplo n.º 2
0
	def predict(self):
		filepath = self.file.file.file.name
		input_data_type = self.file.name.split('.')[1:][0]

		if self.training:
			with open(self.training.model_path, 'rb') as f:
				my_model = pickle.load(f)
		else:
			# if preprocessing job exists, figure out which datatype the preprocessing job object had
			try:
				# getting all preprocessors which are linked to a model from this machine with this type
				machine_preprecessors = FilePreprocessor.objects.filter(input_data_type=input_data_type, machine_id=self.machine_id)

				# Getting
				training = AutoMlTraining.objects.filter(preprocessing_object__in=machine_preprecessors, status='success', task_type=self.task_type, validator__scoring_strategy='accuracy').order_by('-validator__score').first()
				with open(training.model_path, 'rb') as f:
					my_model = pickle.load(f)

				self.training = training
			except:
				raise ValidationError("Either training or machine id has to be provided!")

		if input_data_type == 'wav':
			file = read(os.path.join(filepath))
			features = numpy.array(file[1], dtype=int)

		elif input_data_type == 'png':
			file = Image.open(filepath)
			file.resize((128, 128))
			file.save(filepath)

			features = numpy.array([cv2.imread(filepath)])

			# first order difference, computed over 9-step window
			features[0, :, :, 1] = librosa.feature.delta(features[0, :, :, 0])

			# for using 3 dimensional array to use ResNet and other frameworks
			features[0, :, :, 2] = librosa.feature.delta(features[0, :, :, 1])

			features = numpy.transpose(features, (0, 2, 1, 3))
			features = reformat_data(features)
		else:
			return ValidationError('File not in supported format. Supported formats are png and wav.')

		y_pred = my_model.predict(features)

		self.result = y_pred[0]
Ejemplo n.º 3
0
    def train(self):
        print('in tpot training')
        try:
            # Storing save location for models
            dump_file = os.path.join(AUTO_ML_MODELS_PATH, 'tpot_' + str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + '.dump')

            x = numpy.load(os.path.join(AUTO_ML_DATA_PATH, self.training_data_filename))
            y = numpy.load(os.path.join(AUTO_ML_DATA_PATH, self.training_labels_filename))

            if self.preprocessing_object.input_data_type == 'png':
                x = reformat_data(x)

            # training the models
            model = TPOTClassifier(
                # verbosity=2, max_time_mins=90, max_eval_time_mins=5, config_dict='TPOT light', population_size=4, generations=3, n_jobs=1)
                generations=self.generations,
                population_size=self.population_size,
                offspring_size=self.offspring_size,
                mutation_rate=self.mutation_rate,
                crossover_rate=self.crossover_rate,
                scoring=self.scoring,
                cv=self.cv,
                subsample=self.subsample,
                n_jobs=self.n_jobs,
                max_time_mins=self.max_time_mins,
                # Tpot takes input in mins while most other frameworks take inputs in seconds.
                max_eval_time_mins=self.max_eval_time_mins,
                random_state=self.random_state,
                config_dict=self.config_dict,
                warm_start=self.warm_start,
                memory=self.memory,
                use_dask=self.use_dask,
                early_stop=self.early_stop,
                verbosity=self.verbosity,
                disable_update_check=self.disable_update_check
            )
            start = time.time()
            model.fit(x, y)
            end = time.time()
            print('training finnished')

            with open(dump_file, 'wb') as f:
                print('about to save!')
                pickle.dump(model.fitted_pipeline_, f)
                print('model saved')

            self.training_time = round(end - start, 2)
            self.model_path = dump_file
            self.status = 'success'
            self.save()
            self.additional_remarks = str(model.fitted_pipeline_)
            self.save()

        except Exception as e:
            end = time.time()
            if 'start' in locals():
                self.training_time = round(end - start, 2)

            self.status = 'fail'
            self.additional_remarks = e
            self.save()
Ejemplo n.º 4
0
    def train(self):

        self.status = 'in_progress'
        self.save()
        # Storing save location for models

        print('in autosklearn training')

        try:

            dump_file = os.path.join(
                AUTO_ML_MODELS_PATH, 'auto_sklearn' +
                str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) +
                '.dump')

            print(AUTO_ML_DATA_PATH + ' self ' + self.training_labels_filename)
            x = numpy.load(
                os.path.join(AUTO_ML_DATA_PATH, self.training_data_filename))
            y = numpy.load(
                os.path.join(AUTO_ML_DATA_PATH, self.training_labels_filename))

            if self.preprocessing_object.input_data_type == 'png':
                print('in preforma')
                x = reformat_data(x)

            model = autosklearn.classification.AutoSklearnClassifier(
                time_left_for_this_task=self.run_time,
                per_run_time_limit=self.per_instance_runtime,
                initial_configurations_via_metalearning=self.
                initial_configurations_via_metalearning,
                ml_memory_limit=self.memory_limit,
                ensemble_size=self.ensemble_size,
                ensemble_nbest=self.ensemble_nbest,
                seed=self.seed,
                include_estimators=self.include_estimators,
                exclude_estimators=self.exclude_estimators,
                include_preprocessors=self.include_preprocessors,
                exclude_preprocessors=self.exclude_preprocessors,
                resampling_strategy=self.resampling_strategy,
                tmp_folder=self.tmp_folder,
                output_folder=self.output_folder,
                delete_tmp_folder_after_terminate=self.
                delete_tmp_folder_after_terminate,
                delete_output_folder_after_terminate=self.
                delete_output_folder_after_terminate,
                shared_mode=self.shared_mode,
                smac_scenario_args=self.smac_scenario_args,
                logging_config=self.logging_config,
            )
            print('before training start')
            start = time.time()
            model.fit(x, y)
            end = time.time()
            print(model.show_models())
            # storing the best performer
            with open(dump_file, 'wb') as f:
                pickle.dump(model, f)

            self.training_time = round(end - start, 2)
            self.status = 'success'
            self.model_path = dump_file
            self.save()

            self.additional_remarks = model.show_models()
            self.save()
            print('Status final ' + self.status)

        except Exception as e:
            end = time.time()
            if 'start' in locals():
                print('failed after:' + str(end - start))
                self.training_time = round(end - start, 2)

            self.status = 'fail'
            self.additional_remarks = e
            self.save()
Ejemplo n.º 5
0
    def predict(self):
        try:
            if self.model.framework == 'auto_sklearn' or self.model.framework == 'tpot':
                with open(self.model.model_path, 'rb') as f:
                    my_model = pickle.load(f)

                x = numpy.load(
                    os.path.join(AUTO_ML_DATA_PATH,
                                 self.model.validation_data_filename))
                y = numpy.load(
                    os.path.join(AUTO_ML_DATA_PATH,
                                 self.model.validation_labels_filename))

                if self.model.preprocessing_object.input_data_type == 'png':
                    x = reformat_data(x)

            elif self.model.framework == 'auto_keras':
                my_model = pickle_from_file(self.model.model_path)
                x, y = load_ml_data(self.model.validation_data_filename,
                                    self.model.validation_labels_filename,
                                    False,
                                    self.model.label_one_hot_encoding_binary)
            else:
                print('notimpl (epic fail)')

            y_pred = my_model.predict(x)

            if self.scoring_strategy == 'accuracy':
                score = sklearn.metrics.accuracy_score(y, y_pred)
            elif (self.scoring_strategy == 'precision'):
                score = sklearn.metrics.average_precision_score(y, y_pred)
            elif (self.scoring_strategy == 'roc_auc'):
                score = sklearn.metrics.roc_auc_score(y, y_pred)
            else:
                score = 0
                print('epic fail! no Strat applied')

            print(numpy.unique((y)))

            cnf_matrix = confusion_matrix(y, y_pred)

            target_names = []
            if len(numpy.unique(y)) == 2:
                target_names = numpy.unique(y)
            else:
                for y in numpy.unique(y):
                    target_names.append(
                        y.replace('_behavior',
                                  '').replace('_rod(0.5mm)', '').replace(
                                      '_condition',
                                      '').replace('_element',
                                                  '').replace('force_', ''))

            self.plot_confusion_matrix(cm=cnf_matrix,
                                       normalize=False,
                                       target_names=target_names,
                                       title="Confusion Matrix")

            self.status = 'success'
            self.score = str(round(score, 4))
            self.save()

        except Exception as e:
            self.status = 'fail'
            self.additional_remarks = e
            self.save()