예제 #1
0
def listen(winlen, tostdout, model):
    clf = Classifier(model)
    while True:
        t = audio.recv(winlen).as_float_vec()
        r = octave.analyze_row_vec(t)
        if len(r) > 0:
            print("XX", len(t), len(r))
            r = octave.reduce_features_row_vec(r)
            c = "-"
            if len(r) == 1:
                print(len(r[0]))
                c = clf.predict(r[0])
            target.set(c)
            print(c)
        if tostdout:
            print("\n".join(map(str, t)))
예제 #2
0
    def _show_ground_truth(self, list_segments, len_segments, labels, start_time):
        """Paint only wrong classified segments and show ground truth confusion matrix.
        
        Parameters
        ----------
        list_segments : list of integer
            List of index segments.
        len_segments : list of integer
            List of segments sizes.
        labels : list of string
            List of predicted class name for each segment.
        start_time : floating point
            Start time of classification.
        """
        classes = list(set(labels))
        classes.sort()
        
        n_segments = len(labels)
        spx_matrix = np.zeros((len(classes), len(classes)), np.int) 
        px_matrix = np.zeros((len(classes), len(classes)), np.int) 

        # Create the confusion matrix and paint wrong classified segments individually.
        for idx_segment in list_segments:
            if self._gt_segments[idx_segment] is not None:
                gt_class = classes.index(self._gt_segments[idx_segment])
                predicted_class = classes.index(labels[idx_segment])
                
                spx_matrix[ gt_class ][ predicted_class ] += 1
                px_matrix[ gt_class ][ predicted_class ] += len_segments[idx_segment]
        
                if gt_class != predicted_class:
                    self._image, _ = self.segmenter.paint_segment(self._image, self.get_class_by_name(labels[idx_segment])["color"].value, idx_segment=[idx_segment], border=False)
        
        # Create a popup with results of classification.
        popup_info = "%s\n" % str(self.classifier.get_summary_config())
        popup_info += Classifier.confusion_matrix(classes, spx_matrix, "Superpixels")
        popup_info += Classifier.confusion_matrix(classes, px_matrix, "PixelSum")
        
        self.tk.refresh_image(self._image)
        self.tk.popup(popup_info)

        end_time = TimeUtils.get_time()
            
        self.tk.append_log("\nClassification finished")
        self.tk.append_log("Time elapsed: %0.3f seconds", (end_time - start_time))
예제 #3
0
    def __init__(self, host, port):
        """Create listening socket and initialize classifier"""

        socketserver.TCPServer.__init__(self, (host, port), TCPHandler)

        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        retraining_delay = int(config.get('Classification',
                                          'retraining_delay'))
        self.classifier = Classifier(CLASSIFICATION_DATA_DIR,
                                     TS_DATA_DIR,
                                     retraining_delay=retraining_delay,
                                     upload_data=upload_data)
예제 #4
0
def main():
    # read args
    args = u.read_args()
    u.create_directories(args)

    #create classification model
    c = Classifier(args)

    #if training flag is true build model and train it
    if args['train']:

        model = c.build()
        plot_model(model,
                   to_file=args['exp_dir'] + 'modelimage' + '.png',
                   show_layer_names=False,
                   show_shapes=False)
        operator = Train(model, args)
        operator.train()
        operator.validate()

    #if test is true, load best model and test it
    if args['test']:
        #load data only without creating model
        operator = Train(None, args)
        operator.validate()
        true, predicted = operator.test()

        #plot confusion matrix
        class_names = ['0', '1']
        cf = confusion_matrix(true, predicted)
        plt.figure()
        u.plot_confusion_matrix(
            cf,
            classes=class_names,
            normalize=False,
            title='Confusion matrix, without normalization')
class SettingsScreen(Screen):
    filepath = StringProperty()
    hiphop = StringProperty()
    jazz = StringProperty()
    rock = StringProperty()
    pop = StringProperty()
    metal = StringProperty()
    classifier = object

    def __init__(self, **kwargs):
        super(SettingsScreen, self).__init__(**kwargs)
        self.filepath = ""
        self.classifier = Classifier()

    def on_enter(self):
        global filePath
        self.filepath = os.path.basename(filePath)
        self.hiphop = "Hiphop"
        self.jazz = "Jazz"
        self.metal = "Metal"
        self.pop = "Pop"
        self.rock = "Rock"
        self.classifier.setMusic(filePath)
        print("Music Set")

    def classifyKnn(self):
        results = self.classifier.kNN()
        self.hiphop = "Hiphop: %s " % results[0]
        self.jazz = "Jazz: %s " % results[1]
        self.metal = "Metal: %s " % results[2]
        self.pop = "Pop: %s " % results[3]
        self.rock = "Rock: %s " % results[4]

    def classifySvm(self):
        results = self.classifier.svma()
        self.hiphop = "Hiphop: %s " % results[0]
        self.jazz = "Jazz: %s " % results[1]
        self.metal = "Metal: %s " % results[2]
        self.pop = "Pop: %s " % results[3]
        self.rock = "Rock: %s " % results[4]

    def classifyNn(self):
        results = self.classifier.nn()
        self.hiphop = "Hiphop: %s " % results[0]
        self.jazz = "Jazz: %s " % results[1]
        self.metal = "Metal: %s " % results[2]
        self.pop = "Pop: %s " % results[3]
        self.rock = "Rock: %s " % results[4]

    def classifyNb(self):
        results = self.classifier.bnb()
        self.hiphop = "Hiphop: %s " % results[0]
        self.jazz = "Jazz: %s " % results[1]
        self.metal = "Metal: %s " % results[2]
        self.pop = "Pop: %s " % results[3]
        self.rock = "Rock: %s " % results[4]
예제 #6
0
                     kw_queue=keyword_queue,
                     limit_queue=lim_queue,
                     message_queue=mess_queue)
 text_processor = TextProcessor(name='Text Processor',
                                database=db,
                                tp_queue=text_processor_queue,
                                dictionary=d)
 annotator = Annotator(name='Annotator',
                       database=db,
                       train_event=te,
                       annotation_response=annot_resp,
                       socket=socketio,
                       train_threshold=n_before_train,
                       message_queue=mess_queue)
 classifier = Classifier(name='Classifier',
                         database=db,
                         model=model_queue,
                         dictionary=d)
 monitor = Monitor(name='Monitor',
                   database=db,
                   socket=socketio,
                   most_important_features=mif,
                   stream=streamer,
                   limit_queue=lim_queue,
                   clf=classifier,
                   annot=annotator,
                   message_queue=mess_queue)
 trainer = Trainer(name='Trainer',
                   clf=SGDClassifier(loss='log', penalty='elasticnet'),
                   database=db,
                   model=model_queue,
                   train_trigger=te,
예제 #7
0
    blacklist.update(tokenized_blacklist)
    if (len_blacklist == len(blacklist)):
        break
blacklist = list(blacklist)

##
# Model tuning

hyperparams = {
    'vectorizer__preprocessor': preprocess,
    'vectorizer__tokenizer': tokenize,
    'vectorizer__sublinear_tf': True
}

# prepare tuning tools
pipeline = Classifier(params=hyperparams, stop_words=blacklist).pipeline
cross_validation = model_selection.StratifiedKFold(shuffle=True, n_splits=3)
param_grid = {
    'vectorizer__max_df': (0.25, 0.5, 0.75, 1.0),
    'vectorizer__min_df': (1, 2, 3),
    'vectorizer__binary': (True, False),
    'classifier__loss':
    ('hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'),
    'classifier__penalty': ('l2', 'l1', 'elasticnet'),
    'classifier__alpha': (1e-2, 1e-3, 1e-4),
    'classifier__tol': (None, 1e-2, 1e-3, 1e-4),
    'classifier__class_weight': (None, 'balanced')
}

# run tuning routine
grid = model_selection.GridSearchCV(pipeline,
예제 #8
0
    X, y, X_real_test = load_data("train.csv", "test.csv")

    # Preprocessing

    print("Beginning preprocessing")

    X_processed, X_test_processed = preprocessing(X, X_real_test)

    assert X_processed.shape[1] == X_test_processed.shape[
        1]  # check that train and test have the same number of features

    # Classification

    print("Beginning prediction")

    clf = Classifier(C=50000)

    X_train = X_processed.tocsr()
    y_train = y
    clf.fit(X_train, y_train)

    print("Score: %f" % clf.score(X_test, y_test))

    print("Saving prediction to file")

    y_pred = clf.predict(X_test_processed)

    # Save predictions to file

    np.savetxt('y_pred.txt', y_pred, fmt='%s')
from rocchio import Rocchio


def print_rank(class_list):
    for id_class, keywords in class_list.items():
        sorted_list = sorted(keywords.items(),
                             key=lambda x: x[1],
                             reverse=True)
        print(id_class, sorted_list[:10])


if __name__ == '__main__':
    training_folder_path = "../resource/ohsumed/training/"
    test_folder_path = "../resource/ohsumed/test/"
    #r = Rocchio()
    #r.train(training_folder_path)
    #r.test(test_folder_path)

    print("EXTRACTING")
    ke = KeywordExtraction()
    ke.create_class(training_folder_path)
    keywords_by_class = ke.extract_keywords(3, training_folder_path)
    print("RANKING")
    kr = KeywordRanking()
    keywords_by_class = kr.rank_keywords(training_folder_path,
                                         keywords_by_class)
    print_rank(keywords_by_class)
    print("CLASSIFYING")
    c = Classifier(keywords_by_class)
    c.classify_all(test_folder_path)
    c.get_microF()
 def __init__(self, **kwargs):
     super(SettingsScreen, self).__init__(**kwargs)
     self.filepath = ""
     self.classifier = Classifier()
예제 #11
0
##
# preparing preprocessing and classification tools

prep = Preprocessor()

# preprocess my stopwords (blacklist). Scikit will remove stopwords AFTER the tokenization process (and i preprocess my tokens in the tokenization process)
# source: https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/feature_extraction/text.py#L265
blacklist = [
    prep.stem(prep.strip_accents(prep.lowercase(token))) for token in blacklist
]

domain_params = {
    'vectorizer__tokenizer': prep.build_tokenizer(),
    'classifier__random_state': appconfig['random_state']
}
classifier = Classifier({**classifier_params, **domain_params}, blacklist)

##
# classifying

print('classifying contratos')

classifier.train(training_dataset.data, training_dataset.target)

ids, corpus = zip(*to_predict)
predictions = classifier.predict(corpus)
results = zip(ids, predictions)

##
# persisting
예제 #12
0
파일: main.py 프로젝트: heonseok/MPMLD
    print()

    args.classification_name = '{}_lr{}_bs{}'.format(
        args.classification_model, args.class_lr, args.class_train_batch_size)

    # -------- Classification & Attack -------- #
    if args.use_reconstructed_dataset:
        for recon_type in reconstruction_type_list:
            args.classification_path = os.path.join(
                args.recon_output_path, 'classification',
                args.classification_name, recon_type,
                'repeat{}'.format(repeat_idx))
            print(args.classification_path)
            if args.train_classifier or args.test_classifier or args.extract_classifier_features:
                classifier = Classifier(args)

                try:
                    if args.early_stop_recon:
                        reconstructed_data_path = os.path.join(
                            args.reconstruction_path,
                            'recon_{}.pt'.format(recon_type))
                    else:
                        reconstructed_data_path = os.path.join(
                            args.reconstruction_path,
                            'recon_{}{:03d}.pt'.format(recon_type,
                                                       args.epochs))
                    recon_datasets = utils.build_reconstructed_datasets(
                        reconstructed_data_path)
                    class_datasets['train'] = recon_datasets['train']
                except FileNotFoundError:
예제 #13
0
    utils.preprocess_spiral(tmp['face'][idx], args.seq_length[idx],
                            tmp['vertices'][idx],
                            args.dilation[idx]).to(device)
    for idx in range(len(tmp['face']) - 1)
]
down_transform_list = [
    utils.to_sparse(down_transform).to(device)
    for down_transform in tmp['down_transform']
]
up_transform_list = [
    utils.to_sparse(up_transform).to(device)
    for up_transform in tmp['up_transform']
]

model = Classifier(args.in_channels, args.out_channels, args.latent_channels,
           spiral_indices_list, down_transform_list,
           up_transform_list).to(device)
print('Number of parameters: {}'.format(utils.count_parameters(model)))
print(model)

optimizer = torch.optim.Adam(model.parameters(),
                             lr=args.lr,
                             weight_decay=args.weight_decay)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                            args.decay_step,
                                            gamma=args.lr_decay)
t = time.time()
run(model, train_loader, test_loader, args.epochs, optimizer, scheduler,
    writer, device)
t_duration = time.time() - t
print('total time : {:.3f}s'.format(t_duration))
예제 #14
0
                        format='%(asctime)s (%(threadName)s) %(message)s',
                        filename='debug.log')

    logging.info('\n' * 5)
    logging.info('*' * 10 + 'ACTIVE STREAM' + '*' * 10)
    logging.info('Starting Application...')

    logging.getLogger('socketio').setLevel(logging.ERROR)
    logging.getLogger('werkzeug').setLevel(logging.ERROR)

    # Initialize Threads
    streamer = Streamer(credentials_track=credentials['coll_1'],
                        credentials_sample=credentials['main_account'],
                        data=data)
    text_processor = TextProcessor(data)
    annotator = Annotator(train_threshold=n_before_train, data=data)
    classifier = Classifier(data)
    monitor = Monitor(streamer=streamer,
                      classifier=classifier,
                      annotator=annotator,
                      data=data)
    trainer = Trainer(data=data,
                      streamer=streamer,
                      clf=SGDClassifier(loss='log', penalty='l1', alpha=0.001))

    threads = [
        streamer, text_processor, monitor, classifier, trainer, annotator
    ]

    socketio.run(app, debug=False)
예제 #15
0
    tokenized_blacklist = tokenize(' '.join(blacklist))
    blacklist.update(tokenized_blacklist)
    if (len_blacklist == len(blacklist)):
        break
blacklist = list(blacklist)

##
# Train the model

hyperparams = {
    'vectorizer__preprocessor': preprocess,
    'vectorizer__tokenizer': tokenize,
    **get_tunning_params()
}

classifier = Classifier(hyperparams, blacklist)
classifier.train(training_dataset.data, training_dataset.target)

##
# Predict classes

ids, corpus = zip(*to_predict)

predictions = classifier.predict(corpus)

results = zip(ids, predictions)

##
# Get Keywords

classes_keywords = [(classe,
from data_acquisition import *
from classification import Classifier, plot_training


plt.rc("font", family="serif", size=14)
plt.rc("text", usetex=True)
plt.rc('xtick', labelsize=14)
plt.rc('ytick', labelsize=14)
plt.rc('axes', labelsize=14)

CNN = True
L = 40
T_critical = 2 / np.log(1+np.sqrt(2))

ising_classifier = Classifier(CNN=CNN)
#plot_training(*ising_classifier.train()) # new training
ising_classifier.params = jnp.load('data/params.npy', allow_pickle=True) # use already trained model


############################################
# Plot classification of train+test data
############################################
temperatures = np.linspace(1.0, 4.0, 7)
mean = []
std = []

plt.figure()
plt.title('Classification results')
plt.xlabel(r'$T$')
plt.ylabel('Prediction')