Beispiel #1
0
    def do_cross_validation():
        summaries = []
        for pipeline in pipelines:
            for (classifier, classifier_name) in classifiers:
                print 'Using pipeline %s with classifier %s' % (pipeline.get_name(), classifier_name)
                scores = []
                for target in targets:
                    print 'Processing %s (classifier %s)' % (target, classifier_name)

                    task_core = TaskCore(cached_data_loader=cached_data_loader, data_dir=data_dir,
                                         target=target, pipeline=pipeline,
#                                          target=target, pipeline=pipeline,
                                         classifier_name=classifier_name, classifier=classifier,
                                         normalize=should_normalize(classifier), gen_preictal=pipeline.gen_preictal,
                                         cv_ratio=cv_ratio)

                    data = CrossValidationScoreTask(task_core).run()
                    score = data.score

                    scores.append(score)

                    print '%.3f' % score

                if len(scores) > 0:
                    name = pipeline.get_name() + '_' + classifier_name
                    summary = get_score_summary(name, scores)
                    summaries.append((summary, np.mean(scores)))
                    print summary

            print_results(summaries)
    def train_full_model(make_predictions):
        for pipeline in pipelines:
            for (classifier, classifier_name) in classifiers:
                print 'Using pipeline %s with classifier %s' % (pipeline.get_name(), classifier_name)
                guesses = ['clip,seizure,early']
                classifier_filenames = []
                for target in targets:
                    task_core = TaskCore(cached_data_loader=cached_data_loader, data_dir=data_dir,
                                         target=target, pipeline=pipeline,
                                         classifier_name=classifier_name, classifier=classifier,
                                         normalize=should_normalize(classifier), gen_ictal=pipeline.gen_ictal,
                                         cv_ratio=cv_ratio)

                    if make_predictions:
                        predictions = MakePredictionsTask(task_core).run()
                        guesses.append(predictions.data)
                    else:
                        task = TrainClassifierTask(task_core)
                        task.run()
                        classifier_filenames.append(task.filename())

                if make_predictions:
                    filename = 'submission%d-%s_%s.csv' % (ts, classifier_name, pipeline.get_name())
                    filename = os.path.join(submission_dir, filename)
                    with open(filename, 'w') as f:
                        print >> f, '\n'.join(guesses)
                    print 'wrote', filename
                else:
                    print 'Trained classifiers ready in %s' % cache_dir
                    for filename in classifier_filenames:
                        print os.path.join(cache_dir, filename + '.pickle')
Beispiel #3
0
    def do_cross_validation():
        for pipeline in pipelines:
            for (classifier, classifier_name) in classifiers:
                print 'Using pipeline %s with classifier %s' % (
                    pipeline.get_name(), classifier_name)
                scores = []
                for target in targets:
                    print 'Processing %s (classifier %s)' % (target,
                                                             classifier_name)

                    task_core = TaskCore(
                        cached_data_loader=cached_data_loader,
                        data_dir=data_dir,
                        target=target,
                        pipeline=pipeline,
                        classifier_name=classifier_name,
                        classifier=classifier,
                        normalize=should_normalize(classifier),
                        gen_ictal=pipeline.gen_ictal,
                        cv_ratio=cv_ratio)

                    data = CrossValidationScoreTask(task_core).run()
                    score = data.score
                    scores.append(score)

                    print target, 'Seizure_AUC=', data.S_auc, 'Early_AUC=', data.E_auc
Beispiel #4
0
    def do_cross_validation():
        summaries = []
        for pipeline in pipelines:
            for (classifier, classifier_name) in classifiers:
                print('Using pipeline %s with classifier %s' %
                      (pipeline.get_name(), classifier_name))
                scores = []
                S_scores = []
                E_scores = []
                for target in targets:
                    print('Processing %s (classifier %s)' %
                          (target, classifier_name))

                    task_core = TaskCore(
                        cached_data_loader=cached_data_loader,
                        data_dir=data_dir,
                        target=target,
                        pipeline=pipeline,
                        classifier_name=classifier_name,
                        classifier=classifier,
                        normalize=should_normalize(classifier),
                        gen_ictal=pipeline.gen_ictal,
                        cv_ratio=cv_ratio)

                    data = CrossValidationScoreTask(task_core).run()
                    score = data.score

                    scores.append(score)

                    print('%.3f' % score, 'S=%.4f' % data.S_auc,
                          'E=%.4f' % data.E_auc)
                    S_scores.append(data.S_auc)
                    E_scores.append(data.E_auc)

                if len(scores) > 0:
                    name = pipeline.get_name() + '_' + classifier_name
                    summary = get_score_summary(name, scores)
                    summaries.append((summary, np.mean(scores)))
                    print(summary)
                if len(S_scores) > 0:
                    name = pipeline.get_name() + '_' + classifier_name
                    summary = get_score_summary(name, S_scores)
                    print('S', summary)
                if len(E_scores) > 0:
                    name = pipeline.get_name() + '_' + classifier_name
                    summary = get_score_summary(name, E_scores)
                    print('E', summary)

            print_results(summaries)
Beispiel #5
0
    def train_full_model(make_predictions):
        for pipeline in pipelines:
            for (classifier, classifier_name) in classifiers:
                print('Using pipeline %s with classifier %s' %
                      (pipeline.get_name(), classifier_name))
                guesses = ['clip,preictal']
                classifier_filenames = []
                plot2file = PdfPages(
                    os.path.join(figure_dir,
                                 ('figure%d-_%s_%s_.pdf' %
                                  (ts, classifier_name, pipeline.get_name()))))
                for target in targets:
                    task_core = TaskCore(
                        cached_data_loader=cached_data_loader,
                        data_dir=data_dir,
                        target=target,
                        pipeline=pipeline,
                        classifier_name=classifier_name,
                        classifier=classifier,
                        normalize=should_normalize(classifier),
                        gen_preictal=pipeline.gen_preictal,
                        cv_ratio=cv_ratio,
                        plot2file=plot2file)

                    if make_predictions:
                        predictions = MakePredictionsTask(task_core).run()
                        guesses.append(predictions.data)
                    else:
                        task = TrainClassifierTask(task_core)
                        task.run()
                        classifier_filenames.append(task.filename())

                if make_predictions:
                    filename = 'submission%d-%s_%s.csv' % (ts, classifier_name,
                                                           pipeline.get_name())
                    filename = os.path.join(submission_dir, filename)
                    with open(filename, 'w') as f:
                        print('\n'.join(guesses), file=f)
                    print('wrote', filename)
                else:
                    print('Trained classifiers ready in %s' % cache_dir)
                    for filename in classifier_filenames:
                        print(os.path.join(cache_dir, filename + '.pickle'))

                plot2file.close()
    def train_full_model(make_predictions):
        for pipeline in pipelines:
            for classifier in classifiers:
                print 'Using pipeline %s with classifier %s' % (
                    pipeline.get_name(), classifier)
                guesses = ['File,Class']
                classifier_filenames = []
                #plot2file = PdfPages(os.path.join(figure_dir, ('figure%d-_%s_%s_.pdf' % (ts, classifier, pipeline.get_name()))))
                for target in targets:
                    task_core = TaskCore(
                        cached_data_loader=cached_data_loader,
                        data_dir=data_dir,
                        target=target,
                        pipeline=pipeline,
                        classifier=classifier,
                        normalize=should_normalize(classifier),
                        gen_preictal=pipeline.gen_preictal,
                        cv_ratio=cv_ratio,
                        bin_size=bin_size)

                    if make_predictions:
                        predictions = MakePredictionsTask(task_core).run()
                        guesses.append(predictions.data)
                    else:
                        # task = TrainClassifierTask(task_core)
                        # task.run()
                        # classifier_filenames.append(task.filename())
                        print 'not implemented'

                if make_predictions:
                    filename = 'submission%d-%s_%s.csv' % (ts, classifier,
                                                           pipeline.get_name())
                    filename = os.path.join(submission_dir, filename)
                    with open(filename, 'w') as f:
                        print >> f, '\n'.join(guesses)
                    print 'wrote', filename
                else:
                    print 'Trained classifiers ready in %s' % cache_dir
                    for filename in classifier_filenames:
                        print os.path.join(cache_dir, filename + '.pickle')
Beispiel #7
0
    def predict_all(make_predictions):
        for pipeline in pipelines:
            for (classifier, classifier_name) in classifiers:
                print('Using pipeline %s with classifier %s' %
                      (pipeline.get_name(), classifier_name))
                lines = ['clip,preictal']
                subjectID = 0
                X_train = y_train = X_test = test_size = []
                for target in targets:
                    task_core = TaskCore(
                        cached_data_loader=cached_data_loader,
                        data_dir=data_dir,
                        target=target,
                        pipeline=pipeline,
                        classifier_name=classifier_name,
                        classifier=classifier,
                        normalize=should_normalize(classifier),
                        gen_preictal=pipeline.gen_preictal,
                        cv_ratio=cv_ratio)

                    data = GetCrossSubjectDataTask(task_core).run()
                    #                     a = np.shape(data.X_test)[0]
                    test_size.append(np.shape(data.X_test)[0])
                    if subjectID > 0:
                        X_train = np.concatenate((X_train, data.X_train),
                                                 axis=0)
                        y_train = np.concatenate((y_train, data.y_train),
                                                 axis=0)
                        X_test = np.concatenate((X_test, data.X_test), axis=0)
                    else:
                        X_train = data.X_train
                        y_train = data.y_train
                        X_test = data.X_test
                    subjectID += 1

                #Training
                task_core = TaskCore(cached_data_loader=cached_data_loader,
                                     data_dir=data_dir,
                                     target=[],
                                     pipeline=pipeline,
                                     classifier_name=classifier_name,
                                     classifier=classifier,
                                     normalize=should_normalize(classifier),
                                     gen_preictal=pipeline.gen_preictal,
                                     cv_ratio=cv_ratio)
                y_train = np.ceil(0.1 * y_train)
                y_train.astype('int_')
                if should_normalize(classifier):
                    X_train, temp = normalize_data(X_train, X_train)

                print("Training ...")
                print('Dim', np.shape(X_train), np.shape(y_train))
                start = time.get_seconds()
                classifier.fit(X_train, y_train)
                elapsedSecs = time.get_seconds() - start
                print("t=%ds" % int(elapsedSecs))

                y_estimate = classifier.predict_proba(X_train)
                lr = LogisticRegression(random_state=0)
                lr.fit(y_estimate, y_train)
                predictions_proba = classifier.predict_proba(X_test)
                predictions_calibrated = lr.predict_proba(predictions_proba)

                #output
                m = 0
                totalSample = 12
                startIdx = 0
                for target in targets:
                    for i in range(test_size[m] / totalSample):
                        j = i + 1
                        if j < 10:
                            nstr = '000%d' % j
                        elif j < 100:
                            nstr = '00%d' % j
                        elif j < 1000:
                            nstr = '0%d' % j
                        else:
                            nstr = '%d' % j

                        preictalOverAllSample = 0
                        for k in range(totalSample):
                            p = predictions_calibrated[i * totalSample + k +
                                                       startIdx]
                            preictal = translate_prediction(p)
                            preictalOverAllSample += preictal / totalSample

                        newline = '%s_test_segment_%s.mat,%.15f' % (
                            target, nstr, preictalOverAllSample)
                        lines.append(newline)

                    print(newline)
                    startIdx = startIdx + test_size[m]
                    m += 1

                filename = 'submission%d-%s_%s.csv' % (ts, classifier_name,
                                                       pipeline.get_name())
                filename = os.path.join(submission_dir, filename)
                with open(filename, 'w') as f:
                    print('\n'.join(lines), file=f)
                print('wrote', filename)