def _test(train_set, validate_set): """ Train and validate a model. :param train_set: Training set :param validate_set: Validation set :return (class_actuals, class_predictions): tuple of list of of classes and list of predicted classes """ classifier = Classifier() classifier.train(train_set) class_actuals, class_predictions = classifier.classify(validate_set) return (class_actuals, class_predictions)
def __init__(self, comm=None): self.controller = Leap.Controller() self.comm = comm # to be used with stream_multiprocess.py ''' to be used with stream_serial.py ''' self.T = Transformer(config_file='./tests/config.json', feature_map='./tests/mapping.txt') # model = './signpy/streaming/my_model_convlstm.h5' # tensorflow 0.12 model = './signpy/streaming/0_normalized.h5' # model = './signpy/streaming/1_normalized.h5' # model = './signpy/streaming/0.h5' self.classifier = Classifier(model)
def evaluate_SVM(self, type="standard"): average_classifier, classifier_metrics = Testing.evaluate_classifier( self.dataFrame, classifier=Classifier(Algorithm.SVM, "SVM"), feature_representation=self.f_vector, fold_quantity=self.fold_quantity, test_size=self.test_size, type=type, number_of_files_for_training=self.number_of_files_to_test, ) return average_classifier, classifier_metrics
def classifierThreadFunction(): global lock, counter while (True): lock.acquire() print("Lock given to classifier thread") classifier = Classifier(counter) classifier.classifyUnclassifedData() classifier.end() print("Lock Released by Classifier Thread") displayStatistics() lock.release() time.sleep(1800)
def __init__(self, input_electrodes, number_of_training_samples, signal_duration, signal_timestep, simulation_timestep, subject): self.path = os.getcwd() self.input_electrodes = input_electrodes self.number_of_training_samples = number_of_training_samples self.encoder = Encoder(self.path, len(input_electrodes), number_of_training_samples, signal_duration, signal_timestep, subject) self.reservoir = NeuCubeReservoir(self.path, simulation_timestep) self.classifier = Classifier()
def train(): fileLoader = FileLoader("data/orginal", "data/result") files = fileLoader.getFilePairs() samples = [] print("Sample extracting") for file in files: sampleExtracter = SampleExtracter(file[0], file[1], 10) samples += sampleExtracter.getSamples() print("Preprocessing") p = Preprocessor(samples) samples = p.getTrainingData() c = Classifier(samples[:100000], 10)
def testGoat(self): print("Test if classifier can detect goats amidst group of sheep") sheep = AnimalCreator.create_sheep(200) goat = AnimalCreator.create_goat(2) # add the goats to the sheep sheep.update(goat) detector = self.kda.getDetector("Manhattan") classifier = Classifier(sheep, detector, True) sheepIDs, lambsIDs, goatsIDs, wolvesIDs = classifier.classifyUser() self.assertEqual(sheepIDs, 200) # self.assertEqual(lambsIDs, 0) # can't guarantee self.assertEqual(goatsIDs, 2)
def train_NL_and_evaluate( self, dfs, batch_size, params_generator, fraction, class_labels, reg_dense=0.005, reg_out=0.005, nums_of_unfrozen_layers=[5, 5, 6, 7], lrs=[1e-3, 1e-4, 5e-5, 1e-5], epochs=[5, 5, 20, 25], verbose_epoch=0, verbose_cycle=1, ): """ Trains and evaluates a nonlinear classifier on top of the base_model """ results = {"acc": 0} for i in range(5): if verbose_cycle: print(f"Learning attempt {i+1}") classifier = Classifier( base_model=self.base_model, num_classes=params_generator["num_classes"], reg_dense=reg_dense, reg_out=reg_out, ) data_train, data_val, data_test = classifier.get_generators( dfs, fraction, batch_size, params_generator) classifier.train( data_train, data_val, fraction, nums_of_unfrozen_layers, lrs, epochs, verbose_epoch, verbose_cycle, ) acc, report = classifier.evaluate_on_test(dfs["test"], data_test, class_labels) if results["acc"] < acc: results["acc"] = acc results["report"] = report results["attempt"] = i + 1 print("Best result from attempt", str(results["attempt"])) print(results["report"])
def build(): read_structure_file(folder_path) global train_set train_set = read_csv(trainFile) if (train_set.empty): tkMessageBox.showerror("Naive Bayes Classifier", "'train.csv' file is empty") return global classifier classifier = Classifier(features_list, train_set) numOfBins = (int)(discretization_bins_entry.get()) classifier.pre_process_data(numOfBins) tkMessageBox.showinfo("Naive Bayes Classifier", "Building classifier using train-set is done!")
def unit_test(x_train, y_train, nb_iter=1): test_size = 0.2 random_state = 15 cv = StratifiedShuffleSplit(y_train, nb_iter, test_size=test_size, random_state=random_state) scores = cross_val_score(Classifier(), X=x_train, y=y_train, scoring='accuracy', cv=cv) return scores
def load_checkpoint(filepath): checkpoint = torch.load(filepath) # build classifier classifier = Classifier(checkpoint['input_size'], checkpoint['output_size'], checkpoint['cl_hidden_layers']) # load classifier state classifier.load_state_dict(checkpoint['state_dict']) # load the validation loss from the time the model was saved min_validation_loss = checkpoint['min_validation_loss'] # load pre-trained model architecture model = get_model(checkpoint['model_arch']) return classifier, model, min_validation_loss
def main(): try: trainingData, tuningData, testData, priorSpam = buildDataSets() nbc = Classifier(priorSpam, COUNT_THRESHOLD, SMOOTHING_FACTOR, DEFAULT_PROBABILITY) # nbc = Classifier2(priorSpam, 0, .01, None) nbc.train(trainingData) nbc.classify(testData) report(testData) except Exception as e: print e return 5
def run(): for N in range(1, 4): model = Classifier(Networks.FullyConnectedNet(N, 10)) optimizer = optimizers.SGD() optimizer.setup(model) train_loss_list, test_loss_list = trainNetwork(model, optimizer) plt.plot(test_loss_list, label='Test Loss') plt.plot(train_loss_list, label='Train Loss') plt.legend() plt.ylabel("Loss") plt.xlabel("Epoch") plt.title("Loss as a function of epochs, N=%s" % N) plt.show()
def transformer_pipe(T, cr): model = './tests/my_model_convlstm.h5' myClassifier = Classifier(model) while True: data = cr.recv_json()['data'] start_time = time.time() A, y = T.transform(data) A = A[:, 186:] res = myClassifier.predict(A) # TODO: INSERT NN print('Shape of data: {}'.format(A.shape)) print('Prediction: {}'.format(res)) end_time = time.time() print('Time elapsed for processing: {}'.format(end_time - start_time))
def classify(self, classifier): self.regressor = None self.densityEstimator = None self.classifier = Classifier(classifier, self.classifierParameters, self.featurespace) try: self.classifier.initialize() self.runFeatureSpaceComputations() op = Operation(self, "dummy", None) self.operationStack.add(op) except Exception as e: QtWidgets.QMessageBox.warning(self, 'Error', str(e), QtWidgets.QMessageBox.Ok, QtWidgets.QMessageBox.Ok)
async def hello(self): clf = await self.model_generator.generate_model("../samples/k260_train_test_accuracy","../samples/model_1") # await self.model_generator.estimateAccuracy() classifier = Classifier(clf,self.config) predictions = await classifier.classify('../samples/k260_target/target_3.wav') print(predictions); truth = ['o','n','e','_','r','i','n','g','_','t','o','_','r','u','l','e','_' ,'t','h','e','m','_','a','l','l','_','o','n','e','_','r','i','n','g','_','t','o','_' ,'f','i','n','d','_','t','h','e','m','_','o','n','e','_','r','i','n','g','_','t','o','_' ,'b','r','i','n','g','_','t','h','e','m','_','a','l','l','_','a','n','d','_','i','n','_' ,'t','h','e','_','d','a','r','k','n','e','s','s','_','b','i','n','d','_','t','h','e','m'] top_n = 5 accuracy = await classifier.check_accuracy(predictions,top_n,truth) print('Top {} accuracy: {}'.format(top_n,accuracy))
def test_thresholdClassify(): D1 = {"1__2__3___1__2__3/D": 3, "1__2__3___1__2__4/D": 4} D2 = { "1__2__3___1__2__3/D": 3, "1__2__3___1__2__4/D": 4, "1__2__3___1__3__4/D": 4 } D = [D1, D2] cls = [6, 7] th = 2 cf = Classifier() print cf.thresholdClassify(D, cls, th)
def main(): # split data training, validation, test = DataPreprocessing().split_data() print(type(training)) df = DataPreprocessing().prepare_data_for_statistics("train") # apply word-based features WordBasedFeatures(df).features() # apply syntactic features SyntacticFeatures(df).features() SyntacticFeatures(df).outputter() # train model model = Classifier(df) print(model.predict(df))
def run_task(): """ The main function that runs the task. """ with open(results_path, "w+") as results_file: pairs = get_pair_samples() scores_all_pairs = calculate_scores(pairs) # visualize the histogram of scores across all pairs draw_histogram(scores_all_pairs) # sort all scores in ascending order sorted_scores = sorted(scores_all_pairs) # set the maximal train size to be 80% of the total number of pairs max_train_size = math.ceil(TRAIN_PROPORTION * len(pairs)) all_data_indices = [i for i in range(len(pairs))] train_sizes = [] mean_f1_scores = [] # run over the different train set sizes for train_size in range(TRAIN_SIZE_STEP, max_train_size + 1, TRAIN_SIZE_STEP): print("train size: ", train_size) train_sizes.append(train_size) f1_vals = [] # randomly sample the training set examples train_indices = random.sample(all_data_indices, train_size) train_data, test_data = get_train_and_test_data( train_indices, sorted_scores) # run over different thresholds and set the true labels for idx in range(len(sorted_scores) - 1, 0, -THRESHOLD_STEP): train_labels, test_labels = set_true_labels_per_threshold( sorted_scores, train_data, test_data, idx) # our classifier needs samples of at least 2 classes in the data if (0 not in train_labels) or (1 not in train_labels): continue # train the linear SVM classifier lin_classifier = Classifier(train_data, train_labels, test_data, test_labels) lin_classifier.train_classifier() # evaluate the classifier f1_vals.append(lin_classifier.evaluate_classifier()) mean_f1_scores = add_avg_f1(np.mean(f1_vals), results_file, mean_f1_scores, train_size) # display the final graph draw_quality_vs_train_size_graph(train_sizes, mean_f1_scores)
def write_sigs(signal): try: classifier = Classifier(signal.mass_combination_tuple) discs = map(lambda x: get_disc_sig(signal, classifier, x), np.arange(-10, 10, 0.1)) excls = map(lambda x: get_excl_lim(signal, classifier, x), np.arange(-10, 10, 0.1)) with open(self.filename, 'a') as f: f.write("{},{},{},{}\n".format(signal.higgsino_mass, signal.bino_mass, max(discs), max(excls))) pbar.update(1) except: pass
def main(): """Run the application""" c = Classifier() try: allowed_labels = [ 'dog', 'person', 'cat', ] c.set_allowed_labels(allowed_labels) c.set_callback_item_found(found_item_callback) c.classify_from_live_stream() # c.classify_video_from_file('./data/test.m4v', './data/test.avi') except KeyboardInterrupt: print('Exiting...')
def test_wolf_and_lamb(self): print("Test if classifier can detect lambs and wolves") sheep = AnimalCreator.create_sheep(400) animal = AnimalCreator.create_lambs_and_wolves(2) goat = AnimalCreator.create_goat(5) # add the animals to the sheep animal.update(sheep) animal.update(goat) detector = self.kda.getDetector("Manhattan") classifier = Classifier(animal, detector, True) sheepIDs, lambsIDs, goatsIDs, wolvesIDs = classifier.classifyUser() self.assertTrue(400 <= sheepIDs <= 406) self.assertEqual(lambsIDs, 4) self.assertEqual(goatsIDs, 5) self.assertEqual(wolvesIDs, 2)
def __init__(self): self.models = [] for model_file in [ G.productFileClassifierModel, G.projectFileClassifierModel ]: if os.path.exists(model_file): model = Classifier(model_file=model_file) model.dbUpdCategories() else: model = None self.models.append(model) self.interval = G.cfg.getfloat('BatchJobService', 'IntervalMinutes') * 60 self.db, self.cursor = None, None self.run()
def runClassifier(self, classifierName: str, trainingSet, testSet) -> EvaluationInfo: try: classifier = Classifier(classifierName) classifier.buildClassifier(trainingSet) # evaluation = new Evaluation(trainingSet); # evaluation.evaluateModel(classifier, testSet) evaluationInfo = classifier.evaluateClassifier(testSet) return evaluationInfo except Exception as ex: Logger.Error("problem running classifier " + str(ex)) return None
def main_func(conf_dict): try: print(conf_dict) loader = Loader(conf_dict) df = loader.dff #Data vectorizer trials dv = DataVectorizer(df, conf_dict) df_classify = dv.make_df_classify() X, y, people = dv.create_Xy() #Classifier estimator = Classifier(X, y, people, dv.df_features, dv.feature_names, conf_dict) #Hyperparameter tuning according to scoring function of type of classifier #Usually mean accuracy on the given test data and labels # estimator.do_hyperparameter_tuning() estimator.k_fold_per_user_classify() # estimator.k_fold_classify(3) #Saving data d = {} for key, value in conf_dict.items(): if key != "num_features": d[key] = value d["accuracy"] = estimator.acc d["cl_report"] = estimator.cl_report d["f1_macro"] = estimator.f1_score d["labels_numeric"] = dv.labels_numeric d["class_samples"] = dv.class_samples.to_dict() d["feature_list"] = estimator.feature_names d["feature_number"] = len(estimator.feature_names) d["chosen_feature_names"] = estimator.chosen_feature_names d["len_chosen_feature_names"] = len(estimator.chosen_feature_names) print("F1 macro:", d["f1_macro"], "\n") return d except: e = sys.exc_info()[0] print("Error: ", e) return None
def classifyWithParameters(self, classifier): self.regressor = None self.densityEstimator = None self.classifierParameters.setTab(classifier) result = self.classifierParameters.exec_() if result == QtWidgets.QDialog.Accepted: self.classifier = Classifier(classifier, self.classifierParameters, self.featurespace) try: self.classifier.initialize() self.runFeatureSpaceComputations() op = Operation(self, "dummy", None) self.operationStack.add(op) except AssertionError as e: QtWidgets.QMessageBox.warning(self, 'Error', str(e), QtWidgets.QMessageBox.Ok, QtWidgets.QMessageBox.Ok)
def analyze_file(file_to_analyze): # create a new classifier object based on data returned from analyzer new_classifier = Classifier() clf = new_classifier.load_dumped_classifier() file_to_predict = [file_to_analyze.get_ml_output()] # make prediction prediction = clf.predict(file_to_predict) # return whether or not we think it's malware if prediction[0] == 1: malware_status = "probably malware" else: malware_status = "probably not malware" return malware_status
def action(): if request.method =='POST': #return rep(str(request.files['image'])) clf = Classifier() clf.load('saved') f = Process.get_image_from_request(request, size = (250,250)) pred = np.argmax(clf.predict(f)[0]) label_dict = pickle.load(open('label_dict','rb')) prediction = 'rick' for key in label_dict: if label_dict[key] = pred: prediction = key break clf.kill() del f del pred return rep(str(prediction))
def main(): # Load the raw data from dat file: raw_data = get_raw_data.from_dat_file('samples/vowdata.dat', duration_mode=2) # Normalize the raw data: raw_data = get_raw_data.normalize(raw_data) # Add a given amount of samples from the raw data into a training set: training_set = Dataset(raw_data, samples_to_take_for_each_group={ 'm': 25, 'w': 25, 'b': 15, 'g': 11 }) # Plot training set: training_set.plot() # Add the rest of the raw data into a testing set: testing_set = Dataset(raw_data) # Define the parameters of the classifier variants: classifier_variants = [[1, 1, 'diag'], [2, 1, 'full'], [3, 2, 'full'], [4, 3, 'full'], [5, 4, 'full']] for variant in classifier_variants: # Create a classifier: classifier = Classifier(n_components=variant[1], covariance_type=variant[2]) # Train a classifier based on the training set: classifier.train(training_set.data) # Classify the testing set with the trained classifier: false_classification_rate, confusion_matrix = classifier.classify( testing_set.data) # Print results: print('Variant ' + str(variant[0])) print('False classification rate:', int(100 * false_classification_rate), '%') print('Confusion matrix:\n', confusion_matrix, '\n')
def hyperparameter_optim(X_train, y_train, params, nb_iter=10, cv=3): clf = RandomizedSearchCV(estimator=Classifier(), param_distributions=params, n_iter=nb_iter, cv=cv, scoring='accuracy') clf.fit(X_train, y_train) print("Best parameters set found:") print(clf.best_params_) print() print("Grid scores:") means = clf.cv_results_['mean_test_score'] stds = clf.cv_results_['std_test_score'] for mean, std, params in zip(means, stds, clf.cv_results_['params']): print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params)) print() return clf