def listen(winlen, tostdout, model): clf = Classifier(model) while True: t = audio.recv(winlen).as_float_vec() r = octave.analyze_row_vec(t) if len(r) > 0: print("XX", len(t), len(r)) r = octave.reduce_features_row_vec(r) c = "-" if len(r) == 1: print(len(r[0])) c = clf.predict(r[0]) target.set(c) print(c) if tostdout: print("\n".join(map(str, t)))
def _show_ground_truth(self, list_segments, len_segments, labels, start_time): """Paint only wrong classified segments and show ground truth confusion matrix. Parameters ---------- list_segments : list of integer List of index segments. len_segments : list of integer List of segments sizes. labels : list of string List of predicted class name for each segment. start_time : floating point Start time of classification. """ classes = list(set(labels)) classes.sort() n_segments = len(labels) spx_matrix = np.zeros((len(classes), len(classes)), np.int) px_matrix = np.zeros((len(classes), len(classes)), np.int) # Create the confusion matrix and paint wrong classified segments individually. for idx_segment in list_segments: if self._gt_segments[idx_segment] is not None: gt_class = classes.index(self._gt_segments[idx_segment]) predicted_class = classes.index(labels[idx_segment]) spx_matrix[ gt_class ][ predicted_class ] += 1 px_matrix[ gt_class ][ predicted_class ] += len_segments[idx_segment] if gt_class != predicted_class: self._image, _ = self.segmenter.paint_segment(self._image, self.get_class_by_name(labels[idx_segment])["color"].value, idx_segment=[idx_segment], border=False) # Create a popup with results of classification. popup_info = "%s\n" % str(self.classifier.get_summary_config()) popup_info += Classifier.confusion_matrix(classes, spx_matrix, "Superpixels") popup_info += Classifier.confusion_matrix(classes, px_matrix, "PixelSum") self.tk.refresh_image(self._image) self.tk.popup(popup_info) end_time = TimeUtils.get_time() self.tk.append_log("\nClassification finished") self.tk.append_log("Time elapsed: %0.3f seconds", (end_time - start_time))
def __init__(self, host, port): """Create listening socket and initialize classifier""" socketserver.TCPServer.__init__(self, (host, port), TCPHandler) config = configparser.ConfigParser() config.read(CONFIG_FILE) retraining_delay = int(config.get('Classification', 'retraining_delay')) self.classifier = Classifier(CLASSIFICATION_DATA_DIR, TS_DATA_DIR, retraining_delay=retraining_delay, upload_data=upload_data)
def main(): # read args args = u.read_args() u.create_directories(args) #create classification model c = Classifier(args) #if training flag is true build model and train it if args['train']: model = c.build() plot_model(model, to_file=args['exp_dir'] + 'modelimage' + '.png', show_layer_names=False, show_shapes=False) operator = Train(model, args) operator.train() operator.validate() #if test is true, load best model and test it if args['test']: #load data only without creating model operator = Train(None, args) operator.validate() true, predicted = operator.test() #plot confusion matrix class_names = ['0', '1'] cf = confusion_matrix(true, predicted) plt.figure() u.plot_confusion_matrix( cf, classes=class_names, normalize=False, title='Confusion matrix, without normalization')
class SettingsScreen(Screen): filepath = StringProperty() hiphop = StringProperty() jazz = StringProperty() rock = StringProperty() pop = StringProperty() metal = StringProperty() classifier = object def __init__(self, **kwargs): super(SettingsScreen, self).__init__(**kwargs) self.filepath = "" self.classifier = Classifier() def on_enter(self): global filePath self.filepath = os.path.basename(filePath) self.hiphop = "Hiphop" self.jazz = "Jazz" self.metal = "Metal" self.pop = "Pop" self.rock = "Rock" self.classifier.setMusic(filePath) print("Music Set") def classifyKnn(self): results = self.classifier.kNN() self.hiphop = "Hiphop: %s " % results[0] self.jazz = "Jazz: %s " % results[1] self.metal = "Metal: %s " % results[2] self.pop = "Pop: %s " % results[3] self.rock = "Rock: %s " % results[4] def classifySvm(self): results = self.classifier.svma() self.hiphop = "Hiphop: %s " % results[0] self.jazz = "Jazz: %s " % results[1] self.metal = "Metal: %s " % results[2] self.pop = "Pop: %s " % results[3] self.rock = "Rock: %s " % results[4] def classifyNn(self): results = self.classifier.nn() self.hiphop = "Hiphop: %s " % results[0] self.jazz = "Jazz: %s " % results[1] self.metal = "Metal: %s " % results[2] self.pop = "Pop: %s " % results[3] self.rock = "Rock: %s " % results[4] def classifyNb(self): results = self.classifier.bnb() self.hiphop = "Hiphop: %s " % results[0] self.jazz = "Jazz: %s " % results[1] self.metal = "Metal: %s " % results[2] self.pop = "Pop: %s " % results[3] self.rock = "Rock: %s " % results[4]
kw_queue=keyword_queue, limit_queue=lim_queue, message_queue=mess_queue) text_processor = TextProcessor(name='Text Processor', database=db, tp_queue=text_processor_queue, dictionary=d) annotator = Annotator(name='Annotator', database=db, train_event=te, annotation_response=annot_resp, socket=socketio, train_threshold=n_before_train, message_queue=mess_queue) classifier = Classifier(name='Classifier', database=db, model=model_queue, dictionary=d) monitor = Monitor(name='Monitor', database=db, socket=socketio, most_important_features=mif, stream=streamer, limit_queue=lim_queue, clf=classifier, annot=annotator, message_queue=mess_queue) trainer = Trainer(name='Trainer', clf=SGDClassifier(loss='log', penalty='elasticnet'), database=db, model=model_queue, train_trigger=te,
blacklist.update(tokenized_blacklist) if (len_blacklist == len(blacklist)): break blacklist = list(blacklist) ## # Model tuning hyperparams = { 'vectorizer__preprocessor': preprocess, 'vectorizer__tokenizer': tokenize, 'vectorizer__sublinear_tf': True } # prepare tuning tools pipeline = Classifier(params=hyperparams, stop_words=blacklist).pipeline cross_validation = model_selection.StratifiedKFold(shuffle=True, n_splits=3) param_grid = { 'vectorizer__max_df': (0.25, 0.5, 0.75, 1.0), 'vectorizer__min_df': (1, 2, 3), 'vectorizer__binary': (True, False), 'classifier__loss': ('hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'), 'classifier__penalty': ('l2', 'l1', 'elasticnet'), 'classifier__alpha': (1e-2, 1e-3, 1e-4), 'classifier__tol': (None, 1e-2, 1e-3, 1e-4), 'classifier__class_weight': (None, 'balanced') } # run tuning routine grid = model_selection.GridSearchCV(pipeline,
X, y, X_real_test = load_data("train.csv", "test.csv") # Preprocessing print("Beginning preprocessing") X_processed, X_test_processed = preprocessing(X, X_real_test) assert X_processed.shape[1] == X_test_processed.shape[ 1] # check that train and test have the same number of features # Classification print("Beginning prediction") clf = Classifier(C=50000) X_train = X_processed.tocsr() y_train = y clf.fit(X_train, y_train) print("Score: %f" % clf.score(X_test, y_test)) print("Saving prediction to file") y_pred = clf.predict(X_test_processed) # Save predictions to file np.savetxt('y_pred.txt', y_pred, fmt='%s')
from rocchio import Rocchio def print_rank(class_list): for id_class, keywords in class_list.items(): sorted_list = sorted(keywords.items(), key=lambda x: x[1], reverse=True) print(id_class, sorted_list[:10]) if __name__ == '__main__': training_folder_path = "../resource/ohsumed/training/" test_folder_path = "../resource/ohsumed/test/" #r = Rocchio() #r.train(training_folder_path) #r.test(test_folder_path) print("EXTRACTING") ke = KeywordExtraction() ke.create_class(training_folder_path) keywords_by_class = ke.extract_keywords(3, training_folder_path) print("RANKING") kr = KeywordRanking() keywords_by_class = kr.rank_keywords(training_folder_path, keywords_by_class) print_rank(keywords_by_class) print("CLASSIFYING") c = Classifier(keywords_by_class) c.classify_all(test_folder_path) c.get_microF()
def __init__(self, **kwargs): super(SettingsScreen, self).__init__(**kwargs) self.filepath = "" self.classifier = Classifier()
## # preparing preprocessing and classification tools prep = Preprocessor() # preprocess my stopwords (blacklist). Scikit will remove stopwords AFTER the tokenization process (and i preprocess my tokens in the tokenization process) # source: https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/feature_extraction/text.py#L265 blacklist = [ prep.stem(prep.strip_accents(prep.lowercase(token))) for token in blacklist ] domain_params = { 'vectorizer__tokenizer': prep.build_tokenizer(), 'classifier__random_state': appconfig['random_state'] } classifier = Classifier({**classifier_params, **domain_params}, blacklist) ## # classifying print('classifying contratos') classifier.train(training_dataset.data, training_dataset.target) ids, corpus = zip(*to_predict) predictions = classifier.predict(corpus) results = zip(ids, predictions) ## # persisting
print() args.classification_name = '{}_lr{}_bs{}'.format( args.classification_model, args.class_lr, args.class_train_batch_size) # -------- Classification & Attack -------- # if args.use_reconstructed_dataset: for recon_type in reconstruction_type_list: args.classification_path = os.path.join( args.recon_output_path, 'classification', args.classification_name, recon_type, 'repeat{}'.format(repeat_idx)) print(args.classification_path) if args.train_classifier or args.test_classifier or args.extract_classifier_features: classifier = Classifier(args) try: if args.early_stop_recon: reconstructed_data_path = os.path.join( args.reconstruction_path, 'recon_{}.pt'.format(recon_type)) else: reconstructed_data_path = os.path.join( args.reconstruction_path, 'recon_{}{:03d}.pt'.format(recon_type, args.epochs)) recon_datasets = utils.build_reconstructed_datasets( reconstructed_data_path) class_datasets['train'] = recon_datasets['train'] except FileNotFoundError:
utils.preprocess_spiral(tmp['face'][idx], args.seq_length[idx], tmp['vertices'][idx], args.dilation[idx]).to(device) for idx in range(len(tmp['face']) - 1) ] down_transform_list = [ utils.to_sparse(down_transform).to(device) for down_transform in tmp['down_transform'] ] up_transform_list = [ utils.to_sparse(up_transform).to(device) for up_transform in tmp['up_transform'] ] model = Classifier(args.in_channels, args.out_channels, args.latent_channels, spiral_indices_list, down_transform_list, up_transform_list).to(device) print('Number of parameters: {}'.format(utils.count_parameters(model))) print(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_step, gamma=args.lr_decay) t = time.time() run(model, train_loader, test_loader, args.epochs, optimizer, scheduler, writer, device) t_duration = time.time() - t print('total time : {:.3f}s'.format(t_duration))
format='%(asctime)s (%(threadName)s) %(message)s', filename='debug.log') logging.info('\n' * 5) logging.info('*' * 10 + 'ACTIVE STREAM' + '*' * 10) logging.info('Starting Application...') logging.getLogger('socketio').setLevel(logging.ERROR) logging.getLogger('werkzeug').setLevel(logging.ERROR) # Initialize Threads streamer = Streamer(credentials_track=credentials['coll_1'], credentials_sample=credentials['main_account'], data=data) text_processor = TextProcessor(data) annotator = Annotator(train_threshold=n_before_train, data=data) classifier = Classifier(data) monitor = Monitor(streamer=streamer, classifier=classifier, annotator=annotator, data=data) trainer = Trainer(data=data, streamer=streamer, clf=SGDClassifier(loss='log', penalty='l1', alpha=0.001)) threads = [ streamer, text_processor, monitor, classifier, trainer, annotator ] socketio.run(app, debug=False)
tokenized_blacklist = tokenize(' '.join(blacklist)) blacklist.update(tokenized_blacklist) if (len_blacklist == len(blacklist)): break blacklist = list(blacklist) ## # Train the model hyperparams = { 'vectorizer__preprocessor': preprocess, 'vectorizer__tokenizer': tokenize, **get_tunning_params() } classifier = Classifier(hyperparams, blacklist) classifier.train(training_dataset.data, training_dataset.target) ## # Predict classes ids, corpus = zip(*to_predict) predictions = classifier.predict(corpus) results = zip(ids, predictions) ## # Get Keywords classes_keywords = [(classe,
from data_acquisition import * from classification import Classifier, plot_training plt.rc("font", family="serif", size=14) plt.rc("text", usetex=True) plt.rc('xtick', labelsize=14) plt.rc('ytick', labelsize=14) plt.rc('axes', labelsize=14) CNN = True L = 40 T_critical = 2 / np.log(1+np.sqrt(2)) ising_classifier = Classifier(CNN=CNN) #plot_training(*ising_classifier.train()) # new training ising_classifier.params = jnp.load('data/params.npy', allow_pickle=True) # use already trained model ############################################ # Plot classification of train+test data ############################################ temperatures = np.linspace(1.0, 4.0, 7) mean = [] std = [] plt.figure() plt.title('Classification results') plt.xlabel(r'$T$') plt.ylabel('Prediction')