Ejemplo n.º 1
0
 def test_init_file(self):
     log = Diary(self.TXT_PATH, async=False)
     log.info(self.INFO)
     log.close()
     with open(self.TXT_PATH) as f:
         line = f.readline()
         self.assertTrue(self.INFO in line)
Ejemplo n.º 2
0
 def test_init_does_not_exist(self):
     log = Diary(self.NO_EXIST_PATH, async=False)
     log.info(self.INFO)
     log.close()
     with open(self.NO_EXIST_PATH) as f:
         line = f.readline()
         self.assertTrue(self.INFO in line)
Ejemplo n.º 3
0
 def test_warn_log_trace(self):
     log = Diary(self.WARNINGS_LOG_PATH, async=False)
     log.warn(self.INFO, log_trace=True)
     log.close()
     with open(log.log_file.name) as f:
         self.assertTrue(
             "logged(event, *args, **kwargs)" in f.read())
Ejemplo n.º 4
0
class TestDiary(TestCase):
    def setUp(self):
        self.EPSILON = 0.01
        sub1 = Subject("Biologia",
                       [[3.5, 2.5, 5.0], [1, 1, 1, 1, 1, 1, 1, 1, 0]])
        sub2 = Subject("Matematyka",
                       [[2.5, 3.0, 2.5], [1, 1, 0, 1, 0, 1, 0, 1, 1]])
        self.subjects = [sub1, sub2]
        self.andrzej = Student("Andrzej Abacki", self.subjects)
        self.diary = Diary("AGH", 2016, "data.json")

    def test_init(self):
        self.assertEqual(
            self.diary.get_student("Andrzej Abacki").name, self.andrzej.name)
        self.assertEqual(
            len(self.diary.get_student("Andrzej Abacki").subjects),
            len(self.andrzej.subjects))

    def test_franciszek_compute_total_average(self):
        self.assertAlmostEquals(
            self.diary.get_student(
                "Franciszek Fabacki").compute_total_average(), 3.83, 2)

    def test_get_not_existing_student(self):
        self.assertIsNone(self.diary.get_student("Klima Nima"))
Ejemplo n.º 5
0
    def test_unicode_PY2_DB_error(self):
        if not _PY2:
            return

        unicode_str =  u"\u3002"
        log = Diary(self.INIT_DIR, async=False, file_name="unicode_test.log", db_name="unicode.db")
        with self.assertRaises(ValueError, msg="diary does not support logging unicode strings into a database in python2"):
            log.log(unicode_str)
Ejemplo n.º 6
0
 def setUp(self):
     self.EPSILON = 0.01
     sub1 = Subject("Biologia",
                    [[3.5, 2.5, 5.0], [1, 1, 1, 1, 1, 1, 1, 1, 0]])
     sub2 = Subject("Matematyka",
                    [[2.5, 3.0, 2.5], [1, 1, 0, 1, 0, 1, 0, 1, 1]])
     self.subjects = [sub1, sub2]
     self.andrzej = Student("Andrzej Abacki", self.subjects)
     self.diary = Diary("AGH", 2016, "data.json")
Ejemplo n.º 7
0
    def test_log_event_instance(self):
        mock_level = "CRITICAL"
        log = Diary(self.INIT_DIR, db_name="events.db", async=False)
        e = Event(self.INFO, level=mock_level)
        log.log(e)
        self.assertEquals(e.level, mock_level)

        log.close()
        with DiaryDB(log.db_file.name) as db:
            db.assert_event_logged(self.INFO, level=mock_level)
Ejemplo n.º 8
0
    def test_custom_format_init(self):
        logger = Diary(self.API_DIR, log_format=emergency_format,
                       file_name="EmergencyLogs2.log", db_name="EmergencyDB2.db")

        logger.log(self.INFO)
        logger.close()
        with open(logger.log_file.name) as f:
            self.assertEquals(f.readline(), emergency_format(logger.last_logged_event) + '\n')

        with DiaryDB(logger.db_file.name) as db:
            db.assert_event_logged(self.INFO)
Ejemplo n.º 9
0
    def test_queue_join(self):
        trials = 10
        log = Diary(self.INIT_DIR, async=True, db_name="QUEUE_TEST.db")
        for i in range(trials):
            log.log(i)

        log.close()
        self.assertFalse(log.thread.is_alive())
        with DiaryDB(log.db_file.name) as db:
            entries = db.cursor.execute("SELECT * FROM logs")
            self.assertEquals(len(entries.fetchall()), trials)
Ejemplo n.º 10
0
    def test_log_event_formatted(self):
        log = Diary(self.INIT_DIR, file_name="formatted.log", async=False)
        e = Event(self.INFO, "LEVEL")
        e.set_formatter("{info}|{level}")
        log.log(e)
        log.close()

        with open(log.log_file.name) as f:
            self.assertEquals("{info}|{level}\n".format(info=self.INFO, level="LEVEL"), f.readline())

        e.set_formatter(None) # Set Event formatter back to None to not upset other tests
Ejemplo n.º 11
0
    def test_diary_print(self):
        log = Diary(self.INIT_DIR, file_name="printing.log", also_print=True)

        info_to_log = "hello there world!!!"

        log.info(info_to_log)

        log.close()

        # Nothing bad happened and stdout is hard to capture reliably
        with open(log.log_file.name) as f:
            self.assertTrue(info_to_log in f.readline())
Ejemplo n.º 12
0
    def test_custom_format_event(self):
        class FormattedEvent(Event):
            formatter = "|{dt}|{info}|{level_str}|"

        logger = Diary(self.API_DIR, file_name="formatted.txt", db_name="formattedDB.db", event=FormattedEvent, async=False)
        logger.log(self.INFO)
        logger.close()

        with open(logger.log_file.name) as f:
            self.assertEquals(f.readline(), logger.last_logged_event.formatted() + '\n')

        with DiaryDB(logger.db_file.name) as db:
            db.assert_event_logged(self.INFO, "INFO")
Ejemplo n.º 13
0
    def test_log_event_in_init(self):
        class PrettyEvent(Event):
            formatter = "{info}|{level_str}"

        log = Diary(self.INIT_DIR, file_name="pretty.log", db_name="prettyevents.db", async=False, event=PrettyEvent)
        log.log(self.INFO)
        log.close()

        with DiaryDB(log.db_file.name) as db:
            db.assert_event_logged(self.INFO)

        with open(log.log_file.name) as f:
            self.assertEquals("{info}|{level}\n".format(info=self.INFO, level="INFO"), f.readline())
Ejemplo n.º 14
0
    def test_unicode_PY2(self):
        if not _PY2:
            return

        unicode_str = u"\u3002"
        log = Diary(os.path.join(self.INIT_DIR, "unicode_test.log"), async=False, encoding="utf-8")

        log.log(unicode_str)

        log.close()

        with codecs.open(log.log_file.name, encoding=log.encoding) as f:
            line = f.readline()
            self.assertTrue(unicode_str in line)
Ejemplo n.º 15
0
    def test_unicode_PY3(self):
        if _PY2:
            return

        unicode_str = u"\u3002"
        log = Diary(self.INIT_DIR, file_name="unicode_test.log", async=False)

        log.log(unicode_str)

        log.close()

        with codecs.open(log.log_file.name, encoding=log.encoding) as f:
            line = f.readline()
            self.assertTrue(unicode_str in line)
Ejemplo n.º 16
0
    def test_unicode_event_formatted(self):
        class PrettyEvent(Event):
            formatter = "{info}|{level_str}"

        unicode_str = u"\u3002"
        log = Diary(os.path.join(self.INIT_DIR, "unicode_test.log"), async=False, encoding="utf-8", event=PrettyEvent)

        log.log(unicode_str)

        log.close()

        with codecs.open(log.log_file.name, encoding=log.encoding) as f:
            line = f.readline()
            self.assertTrue(unicode_str in line)
Ejemplo n.º 17
0
    def do_it(self, ev):
        login = self.login_edit.get()
        password = self.password_edit.get()
        diary_id = self.diary_id_edit.get()
        filename = self.filename_edit.get()
        split_type = self.split_type.get()

        if not login:
            messagebox.showinfo("Error", "Логин не задан")
            return
        if not diary_id:
            messagebox.showinfo("Error", "Адрес сообщества не задан")
            return
        if not filename:
            messagebox.showinfo("Error", "Путь к файлу не задан")
            return

        api = Diary()
        try:
            api.login(login, password)
            text_with_header = util.load(filename)
            prefix = os.path.splitext(filename)[0]
            text_with_header = util.fix_characters(text_with_header)
            header, text = find_header(text_with_header)
            if split_type == 1:
                post, comments = split_text_with_comments(header, text)
                util.store(prefix + "_post.txt", post)
                for i, comment in enumerate(comments):
                    util.store(prefix + "_comment_%d.txt" % (i+1), comment)

                # Send to diary
                post_id = api.new_post(post, diary_id)
                for comment in comments:
                    api.add_comment(post_id, comment)

                if len(comments) > 0:
                    messagebox.showinfo("Info", "Пост успешно опубликован, тексты комментариев ищите в файлах *comment_N.txt")
                else:
                    messagebox.showinfo("Info", "Пост успешно опубликован")
            else:
                posts = split_text_with_posts(header, text)
                for i, post in enumerate(posts):
                    util.store(prefix + "_post_%d.txt" % (i + 1), post)

                # Send to diary
                for post in posts:
                    api.new_post(post, diary_id)
                messagebox.showinfo("Info", "Посты успешно опубликованы. Тексты продублированы в файлы *post_N.txt")
        except Exception as e:
            messagebox.showinfo("Error", str(e))
            return
Ejemplo n.º 18
0
    def test_log(self):
        FILE_NAME = "test_log.txt"
        log = Diary(self.INIT_DIR, async=False, file_name=FILE_NAME)
        self.assertTrue(exists_with_ext(os.path.join(
            self.INIT_DIR,
            FILE_NAME
            ), '.txt')
        )

        log.log(self.INFO)
        log.logdb.assert_event_logged(self.INFO, level="INFO", limit=1)
        log.close()

        self.assertEquals(os.path.split(log.log_file.name)[-1], FILE_NAME)

        with open(os.path.join(self.INIT_DIR, FILE_NAME)) as f:
            self.assertTrue(self.INFO in f.readline())
Ejemplo n.º 19
0
Archivo: main.py Proyecto: bchmnn/pydy
    def __init__(self):

        Gtk.Window.__init__(self, title='Diary')
        self.set_default_size(800, 600)
        self.connect('destroy', Gtk.main_quit)

        main_box = Gtk.Box()
        size_group = Gtk.SizeGroup(Gtk.SizeGroupMode.HORIZONTAL)

        # initializing all relevant classes
        diary = Diary("Test User")
        textview = Textview()
        header = Header()
        searchbar = Searchbar()
        sidebox = Sidebox()

        # transfer classes to linker
        linker = Linker(diary, header, sidebox, searchbar, textview)

        # transfer linker to header and sidebox
        header.set_connection_linker(linker)
        sidebox.set_connection_linker(linker)
        sidebox.update_year()
        searchbar.set_connection_linker(linker)
        searchbar.set_revealer_signal()

        # connect size_group to header and sidebox
        header.set_size_group(size_group)
        sidebox.set_size_group(size_group)

        sidebox.set_revealer_signal()

        # setup relevant buttons in header
        header.set_backbutton()
        header.set_forwardbutton()
        header.set_searchbutton()
        header.set_addbutton()
        header.set_editbutton()
        self.set_titlebar(header)

        # create new side_box to add searchbar and sidebox
        # and add it to beginning of mainbox
        side_box = Gtk.VBox()
        side_box.pack_start(searchbar, False, False, 0)
        side_box.pack_start(sidebox, True, True, 0)
        side_box.set_hexpand(False)
        main_box.pack_start(side_box, False, False, 0)

        # add separator between side_box and textview
        separator = Gtk.HSeparator()
        separator.set_size_request(1, 0)
        main_box.pack_start(separator, False, False, 0)

        # add textview to end of mainbox
        main_box.pack_start(textview, False, True, 0)

        self.add(main_box)
Ejemplo n.º 20
0
def _retrieve_from_diary(year, number):
    """Retrieve the data from the data indicated.
    """

    url_base = compose_url(URL_BASE, year, number)

    dia = Diary(year, number, url_base)

    print "Retrieving contents from: %s" % url_base

    for section in sorted(SECTIONS.keys()):
        url = compose_url(url_base, section)

        dia.add_section(SECTIONS[section])

        WScrap.scrap_page(url, dia)

    return dia
Ejemplo n.º 21
0
def _retrieve_from_diary(year, number):
    """Retrieve the data from the data indicated.
    """
    
    url_base = compose_url(URL_BASE, year, number)
    
    dia = Diary(year, number, url_base)    
    
    print "Retrieving contents from: %s" % url_base
    
    for section in sorted(SECTIONS.keys()):
        url = compose_url(url_base, section)
        
        dia.add_section(SECTIONS[section])
        
        WScrap.scrap_page(url, dia)
        
    return dia
Ejemplo n.º 22
0
    def test_custom_everything(self):
        logger = Diary(self.API_DIR, file_name="withlevel.txt", db_name="level_user_events.db",
                       db=UserActivityDB, event=UserEvent)
        event_to_log = UserEvent(self.INFO, user_name="super")
        logger.log(event_to_log, level=critical)
        logger.close()
        with open(logger.log_file.name) as f:
            self.assertTrue(event_to_log.formatted() + '\n', f.readline())

        with UserActivityDB(logger.db_file.name) as db:
            entries = db.cursor.execute("""SELECT * FROM user_activity WHERE
                                        log=(?) AND level LIKE (?) AND user=(?)""",
                              (event_to_log.info, event_to_log.level_str, event_to_log.user_name))
            entry = entries.fetchone()

            self.assertEquals(entry[0], event_to_log.dt)
            self.assertEquals(entry[1], event_to_log.level_str)
            self.assertEquals(entry[2], event_to_log.info)
            self.assertEquals(entry[3], event_to_log.user_name)
Ejemplo n.º 23
0
    def test_write(self):
        FILE_NAME = "test_write.txt"
        log = Diary(self.INIT_DIR, async=False, file_name=FILE_NAME)
        simple_event = Event(self.INFO, "LEVEL")

        self.assertTrue(exists_with_ext(os.path.join(
            self.INIT_DIR,
            FILE_NAME
            ), '.txt')
        )

        log._write(simple_event)
        log.logdb.assert_event_logged(self.INFO, level="LEVEL")
        log.close()

        self.assertEquals(os.path.split(log.log_file.name)[-1], FILE_NAME)
        self.assertIs(log.last_logged_event, simple_event)

        with open(os.path.join(self.INIT_DIR, FILE_NAME)) as f:
            self.assertTrue(self.INFO in f.readline())
Ejemplo n.º 24
0
def initialize_diary():
    diary = Diary(name='digits_vs_letters',
                  path='results',
                  overwrite=False,
                  fig_format='svg')
    diary.add_notebook('training', verbose=True)
    diary.add_notebook('validation', verbose=True)
    return diary
Ejemplo n.º 25
0
    def test_custom_event(self):
        logger = Diary(self.API_DIR, file_name="UserEvents.txt", event=UserEvent)
        logger.log("Start logging")
        logger.info(UserEvent(self.INFO, user_name="admin"))  # Directly log events
        logger.close()

        with open(logger.log_file.name) as f:
            contents = f.read()
            self.assertTrue("Start logging" in contents)
            self.assertTrue(logger.last_logged_event.formatted() in contents)

        with DiaryDB(logger.db_file.name) as db:
            db.assert_event_logged(self.INFO, "INFO")
Ejemplo n.º 26
0
def initialize_diary():
    diary = Diary(STUDENTS, CLASSES, DATES)
    for i in xrange(NUMBER_OF_SCORES):
        diary.add_score(random.choice(CLASSES), random.choice(STUDENTS),
                        random.choice(SCORES))

    for clazz in CLASSES:
        for date in DATES:
            for student in STUDENTS:
                if random.choice([True, False]):
                    diary.add_attendance(clazz, student, date)
    return diary
Ejemplo n.º 27
0
    def test_custom_db_formatted_event(self):
        logger = Diary(self.API_DIR, file_name="withdb.txt", db_name="user_events.db",
                       db=UserActivityDB, event=UserEvent)

        logger.log("Starting app")
        event_to_log = UserEvent("Super user logged in", user_name="super")
        logger.debug(event_to_log)
        logger.close()
        with open(logger.log_file.name) as f:
            contents = f.read()
            self.assertTrue("Starting app" in contents)
            self.assertTrue(logger.last_logged_event.formatted() in contents)

        with UserActivityDB(logger.db_file.name) as db:
            entries = db.cursor.execute("""SELECT * FROM user_activity WHERE
                                        log=(?) AND level LIKE (?) AND user=(?)""",
                              (event_to_log.info, event_to_log.level_str, event_to_log.user_name))
            entry = entries.fetchone()

            self.assertEquals(entry[0], event_to_log.dt)
            self.assertEquals(entry[1], event_to_log.level_str)
            self.assertEquals(entry[2], event_to_log.info)
            self.assertEquals(entry[3], event_to_log.user_name)
Ejemplo n.º 28
0
    def test_levels_setting_levels(self):
        log = Diary(self.INIT_DIR, db_name="levels.db", async=False)
        e = Event(self.INFO, level="")
        log.info(e)
        self.assertIs(e.level, levels.info)
        log.warn(e)
        self.assertIs(e.level, levels.warn)
        log.error(e)
        self.assertIs(e.level, levels.error)
        log.debug(e)
        self.assertIs(e.level, levels.debug)

        log.close()

        with DiaryDB(log.db_file.name) as db:
            db.assert_event_logged(self.INFO, level="INFO", limit=4)
            db.assert_event_logged(self.INFO, level="WARN", limit=4)
            db.assert_event_logged(self.INFO, level="ERROR", limit=4)
            db.assert_event_logged(self.INFO, level="DEBUG", limit=4)
Ejemplo n.º 29
0
def main():
    dataset_names = ['diabetes', 'ecoli', 'glass', 'heart-statlog',
                     'ionosphere', 'iris', 'letter', 'mfeat-karhunen',
                     'mfeat-morphological', 'mfeat-zernike', 'optdigits',
                     'pendigits', 'sonar', 'vehicle', 'waveform-5000']

    data = Data(dataset_names=dataset_names)

    diary = Diary(name='hempstalk', path='results', overwrite=False,
                  fig_format='svg')
    diary.add_notebook('cross_validation')

    # Columns for the DataFrame
    columns=['Dataset', 'MC iteration', 'N-fold id', 'Actual class', 'Model',
            'AUC', 'Prior']
    # Create a DataFrame to record all intermediate results
    df = pd.DataFrame(columns=columns)

    mc_iterations = 10
    n_folds = 10

    gammas = {"diabetes":0.00005, "ecoli":0.1, "glass":0.005,
              "heart-statlog":0.0001, "ionosphere":0.00005, "iris":0.0005,
              "letter":0.000005, "mfeat-karhunen":0.0001,
              "mfeat-morphological":0.0000001, "mfeat-zernike":0.000001,
              "optdigits":0.00005, "pendigits":0.000001, "sonar":0.001,
              "vehicle":0.00005, "waveform-5000":0.001}

    for i, (name, dataset) in enumerate(data.datasets.iteritems()):
        print('Dataset number {}'.format(i))

        data.sumarize_datasets(name)
        for mc in np.arange(mc_iterations):
            skf = StratifiedKFold(dataset.target, n_folds=n_folds, shuffle=True)
            test_folds = skf.test_folds
            for test_fold in np.arange(n_folds):
                x_train, y_train, x_test, y_test = separate_sets(
                        dataset.data, dataset.target, test_fold, test_folds)
                n_training = np.alen(y_train)
                w_auc_fold_dens = 0
                w_auc_fold_bag = 0
                w_auc_fold_com = 0
                prior_sum = 0
                for actual_class in dataset.classes:
                    tr_class = x_train[y_train == actual_class, :]
                    tr_class_unique_values = [np.unique(tr_class[:,column]).shape[0] for column in
                                              range(tr_class.shape[1])]
                    cols_keep = np.where(np.not_equal(tr_class_unique_values,1))[0]
                    tr_class = tr_class[:,cols_keep]
                    x_test_cleaned = x_test[:,cols_keep]
                    t_labels = (y_test == actual_class).astype(int)
                    prior = np.alen(tr_class) / n_training
                    if np.alen(tr_class) > 1 and not all(t_labels == 0):
                        prior_sum += prior
                        n_c = tr_class.shape[1]
                        if n_c > np.alen(tr_class):
                            n_c = np.alen(tr_class)


                        # Train a Density estimator
                        model_gmm = GMM(n_components=1, covariance_type='diag')
                        model_gmm.fit(tr_class)

                        sv = OneClassSVM(nu=0.1, gamma=0.5)
                        bc = BackgroundCheck(estimator=sv)
                        bc.fit(tr_class)
                        svm_scores = bc.predict_proba(x_test_cleaned)[:, 1]
                        # Generate artificial data
                        new_data = model_gmm.sample(np.alen(tr_class))

                        # Train a Bag of Trees
                        bag = BaggingClassifier(
                            base_estimator=DecisionTreeClassifier(),
                            n_estimators=10)

                        new_data = np.vstack((tr_class, new_data))
                        y = np.zeros(np.alen(new_data))
                        y[:np.alen(tr_class)] = 1

                        bag.fit(new_data, y)

                        # Combine the results
                        probs = bag.predict_proba(x_test_cleaned)[:, 1]
                        scores = model_gmm.score(x_test_cleaned)

                        com_scores = (probs / np.clip(1.0 - probs, np.float32(1e-32), 1.0)) * (scores-scores.min())


                        # Generate our new data
                        # FIXME solve problem with #samples < #features
                        pca=True
                        if tr_class.shape[0] < tr_class.shape[1]:
                            pca=False
                        our_new_data = reject.create_reject_data(
                                            tr_class, proportion=1,
                                            method='uniform_hsphere', pca=pca,
                                            pca_variance=0.99, pca_components=0,
                                            hshape_cov=0, hshape_prop_in=0.99,
                                            hshape_multiplier=1.5)
                        our_new_data = np.vstack((tr_class, our_new_data))
                        y = np.zeros(np.alen(our_new_data))
                        y[:np.alen(tr_class)] = 1

                        # Train Our Bag of Trees
                        our_bag = BaggingClassifier(
                            base_estimator=DecisionTreeClassifier(),
                            n_estimators=10)
                        our_bag.fit(our_new_data, y)
                        # Combine the results
                        our_probs = our_bag.predict_proba(x_test_cleaned)[:, 1]

                        our_comb_scores = (our_probs / np.clip(1.0 - our_probs,
                                np.float32(1e-32), 1.0)) * (scores-scores.min())

                        # Scores for the Density estimator
                        auc_dens = roc_auc_score(t_labels, scores)
                        # Scores for the Bag of trees
                        auc_bag = roc_auc_score(t_labels, probs)
                        # Scores for the Combined model
                        auc_com = roc_auc_score(t_labels, com_scores)
                        # Scores for our Bag of trees (trained on our data)
                        auc_our_bag = roc_auc_score(t_labels, our_probs)
                        # Scores for our Bag of trees (trained on our data)
                        auc_our_comb = roc_auc_score(t_labels, our_comb_scores)
                        # Scores for the Background Check with SVm
                        auc_svm = roc_auc_score(t_labels, svm_scores)

                        # Create a new DataFrame to append to the original one
                        dfaux = pd.DataFrame([[name, mc, test_fold, actual_class,
                                             'Combined', auc_com, prior],
                                            [name, mc, test_fold, actual_class,
                                             'P(T$|$X)', auc_bag, prior],
                                            [name, mc, test_fold, actual_class,
                                             'P(X$|$A)', auc_dens, prior],
                                            [name, mc, test_fold, actual_class,
                                             'Our Bagg', auc_our_bag, prior],
                                            [name, mc, test_fold, actual_class,
                                             'Our Combined', auc_our_comb, prior],
                                            [name, mc, test_fold, actual_class,
                                             'SVM_BC', auc_svm, prior]],
                                             columns=columns)
                        df = df.append(dfaux, ignore_index=True)

                        # generate_and_save_plots(t_labels, scores, diary, name, mc, test_fold,
                        #                         actual_class, 'P(X$|$A)')
                        # generate_and_save_plots(t_labels, probs, diary, name, mc, test_fold,
                        #                         actual_class, 'P(T$|$X)')
                        # generate_and_save_plots(t_labels, com_scores, diary, name, mc, test_fold,
                        #                         actual_class, 'Combined')
                        # generate_and_save_plots(t_labels, our_probs, diary, name, mc, test_fold,
                        #                         actual_class, 'Our_Bagg')
                        # generate_and_save_plots(t_labels, our_comb_scores, diary, name, mc, test_fold,
                        #                         actual_class, 'Our_Combined')
                        # generate_and_save_plots(t_labels, svm_scores, diary,
                        #                         name, mc, test_fold,
                        #                         actual_class, 'SVM_BC')



    # Convert values to numeric
    df = df.convert_objects(convert_numeric=True)

    # Group everything except classes
    dfgroup_classes = df.groupby(by=['Dataset', 'MC iteration', 'N-fold id',
                                     'Model'])
    # Compute the Prior sum for each dataset, iteration and fold
    df['Prior_sum'] = dfgroup_classes['Prior'].transform(np.sum)
    # Compute the individual weighted AUC per each class and experiment
    df['wAUC'] = df.Prior * df.AUC / df.Prior_sum

    # Sum the weighted AUC of each class per each experiment
    series_wAUC = dfgroup_classes['wAUC'].sum()

    # Transform the series to a DataFrame
    df_wAUC = series_wAUC.reset_index(inplace=False)
    # Compute mean and standard deviation of wAUC per Dataset and model
    final_results = df_wAUC.groupby(['Dataset', 'Model'])['wAUC'].agg([np.mean,
        np.std])
    # Transform the series to a DataFrame
    final_results.reset_index(inplace=True)

    # Represent the results in a table format
    final_table =  final_results.pivot_table(values=['mean', 'std'],
                                             index=['Dataset'], columns=['Model'])

    # Export the results in a csv and LaTeX file
    export_results(final_table)
Ejemplo n.º 30
0
    y = np.hstack((np.ones(np.alen(x)), np.zeros(np.alen(r)))).T
    model_rej.fit(xr, y)

    return model_rej


def train_classifier_model(x, y):
    model_clas = svm.SVC(probability=True)
    #model_clas = tree.DecisionTreeClassifier(max_depth=3)
    model_clas = model_clas.fit(x, y)
    return model_clas


if __name__ == "__main__":
    diary = Diary(name='test_rgrpg',
                  path='results',
                  overwrite=False,
                  fig_format='svg')
    diary.add_notebook('training')
    diary.add_notebook('validation')

    # for i in  [6]: #range(1,4):
    n_iterations = 1
    n_thresholds = 100
    accuracies = np.empty((n_iterations, n_thresholds))
    recalls = np.empty((n_iterations, n_thresholds))
    for example in [2, 3, 4, 5, 6, 7, 8, 9]:
        np.random.seed(42)
        print('Runing example = {}'.format(example))
        for iteration in range(n_iterations):

            #####################################################
def sgd_optimization_gauss(learning_rate=0.13, n_epochs=1000,
                           batch_size=600):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    """
    #datasets = load_data(dataset)

    diary = Diary(name='experiment', path='results')
    diary.add_notebook('training')
    diary.add_notebook('validation')

    diary.add_notebook('data')
    samples=[4000,10000]
    diary.add_entry('data', ['samples', samples])
    diary.add_entry('data', ['num_classes', len(samples)])
    diary.add_entry('data', ['batch_size', batch_size])

    #means=[[0,0],[5,5]]
    #cov=[[[1,0],[0,1]],[[3,0],[0,3]]]
    #diary.add_entry('data', ['means', means])
    #diary.add_entry('data', ['covariance', cov])
    #datasets = generate_gaussian_data(means, cov, samples)
    datasets = generate_opposite_cs_data(samples)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    diary.add_entry('data', ['train_size', len(train_set_y.eval())])
    diary.add_entry('data', ['valid_size', len(valid_set_y.eval())])
    diary.add_entry('data', ['test_size', len(test_set_y.eval())])

    pt = PresentationTier()
    pt.plot_samples(train_set_x.eval(), train_set_y.eval())

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    delta = 20
    x_min = numpy.min(train_set_x.eval(),axis=0)
    x_max = numpy.max(train_set_x.eval(),axis=0)
    x1_lin = numpy.linspace(x_min[0], x_max[0], delta)
    x2_lin = numpy.linspace(x_min[1], x_max[1], delta)

    MX1, MX2 = numpy.meshgrid(x1_lin, x2_lin)
    x_grid = numpy.asarray([MX1.flatten(),MX2.flatten()]).T
    grid_set_x = theano.shared(numpy.asarray(x_grid,
                                             dtype=theano.config.floatX),
                               borrow=True)
    n_grid_batches = grid_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                           # [int] labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    n_in = train_set_x.eval().shape[-1]
    n_out = max(train_set_y.eval()) + 1
    classifier = LogisticRegression(input=x, n_in=n_in, n_out=n_out)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

    # Scores
    grid_scores_model = theano.function(inputs=[],
            outputs=classifier.scores(),
            givens={
                x: grid_set_x})

    training_scores_model = theano.function(
        inputs=[index],
        outputs=classifier.scores(),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )

    validation_scores_model = theano.function(
        inputs=[index],
        outputs=classifier.scores(),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )
    # compute the gradient of cost with respect to theta = (W,b)
    g_w = T.grad(cost=cost, wrt=classifier.w)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.w, classifier.w - learning_rate * g_w),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]})

    # Accuracy
    validation_accuracy_model = theano.function(
        inputs=[index],
        outputs=classifier.accuracy(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    training_accuracy_model = theano.function(
        inputs=[index],
        outputs=classifier.accuracy(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # Loss
    training_error_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]})

    validation_error_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    print('Creating error and accuracy vectors')
    error_train  = numpy.zeros(n_epochs+1)
    error_val = numpy.zeros(n_epochs+1)
    accuracy_train = numpy.zeros(n_epochs+1)
    accuracy_val = numpy.zeros(n_epochs+1)
    # Results for Isotonic Regression
    error_train_ir  = numpy.zeros(n_epochs+1)
    error_val_ir = numpy.zeros(n_epochs+1)
    accuracy_train_ir = numpy.zeros(n_epochs+1)
    accuracy_val_ir = numpy.zeros(n_epochs+1)

    best_params = None
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    ir = IsotonicRegression(increasing=True, out_of_bounds='clip',
                            y_min=0, y_max=1)
    done_looping = False
    epoch = 0
    CS = None
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index + 1, n_train_batches,
                    this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of best'
                       ' model %f %%') %
                        (epoch, minibatch_index + 1, n_train_batches,
                         test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

        scores_grid = grid_scores_model()
        fig = pt.update_contourline(grid_set_x.eval(), scores_grid, delta)
        diary.save_figure(fig, filename='contour_lines', extension='svg')
        scores_train = numpy.asarray([training_scores_model(i) for i
                                    in range(n_train_batches)]).flatten()
        scores_val = numpy.asarray([validation_scores_model(i) for i
                                  in range(n_valid_batches)]).flatten()

        print('Learning Isotonic Regression from TRAINING set')
        ir.fit(scores_train, train_set_y.eval())
        scores_train_ir = ir.predict(scores_train)
        print('IR predict validation probabilities')
        scores_val_ir  = ir.predict(scores_val)

        scores_set = (scores_train, scores_val, scores_train_ir,
                      scores_val_ir)
        labels_set = (train_set_y.eval(), valid_set_y.eval(),
                      train_set_y.eval(), valid_set_y.eval())
        legend = ['train', 'valid', 'iso. train', 'iso. valid']
        fig = pt.plot_reliability_diagram(scores_set, labels_set, legend)
        diary.save_figure(fig, filename='reliability_diagram', extension='svg')

        # TODO add reliability map
        scores_set = (scores_train)
        prob_set = (train_set_y.eval())
        fig = pt.plot_reliability_map(scores_set, labels_set, legend)
        diary.save_figure(fig, filename='reliability_map', extension='svg')

        fig = pt.plot_histogram_scores(scores_set)
        diary.save_figure(fig, filename='histogram_scores', extension='svg')

        # Performance
        accuracy_train[epoch] = numpy.asarray([training_accuracy_model(i) for i
                                in range(n_train_batches)]).flatten().mean()
        accuracy_val[epoch] = numpy.asarray([validation_accuracy_model(i) for i
                                in range(n_valid_batches)]).flatten().mean()
        error_train[epoch] = numpy.asarray([training_error_model(i) for i
                                in range(n_train_batches)]).flatten().mean()
        error_val[epoch] = numpy.asarray([validation_error_model(i) for i
                               in range(n_valid_batches)]).flatten().mean()

        accuracy_train_ir[epoch] = compute_accuracy(scores_train_ir, train_set_y.eval())
        accuracy_val_ir[epoch] = compute_accuracy(scores_val_ir, valid_set_y.eval())
        error_train_ir[epoch]  = compute_cross_entropy(scores_train_ir, train_set_y.eval())
        error_val_ir[epoch]  = compute_cross_entropy(scores_val_ir, valid_set_y.eval())

        diary.add_entry('training', [error_train[epoch], accuracy_train[epoch]])
        diary.add_entry('validation', [error_val[epoch], accuracy_val[epoch]])

        accuracy_set = (accuracy_train[1:epoch], accuracy_val[1:epoch],
                        accuracy_train_ir[1:epoch], accuracy_val_ir[1:epoch])
        fig = pt.plot_accuracy(accuracy_set, legend)
        diary.save_figure(fig, filename='accuracy', extension='svg')

        error_set = (error_train[1:epoch], error_val[1:epoch],
                     error_train_ir[1:epoch], error_val_ir[1:epoch])
        fig = pt.plot_error(error_set, legend, 'cross-entropy')
        diary.save_figure(fig, filename='error', extension='svg')


    pt.update_contourline(grid_set_x.eval(), scores_grid, delta,
            clabel=True)
    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
                 (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
Ejemplo n.º 32
0
 def test_debug(self):
     DB_NAME = 'levels.db'
     log = Diary(self.INIT_DIR, async=False, db_name=DB_NAME)
     log.debug(self.INFO)
     log.logdb.assert_event_logged(self.INFO, "DEBUG", 1)
     log.close()
Ejemplo n.º 33
0
from diary import Diary

if __name__ == '__main__':
    diary = Diary()
    diary.file_read()
    diary.file_safe('2001-10-01', 'Hahaha nothing')
    diary.file_read()
Ejemplo n.º 34
0
num_epochs=30
batch_size=5000
inner_batch_size=5000
nb_classes=2
noise_proportion=0.25
score_lin=np.linspace(0,1,100)
minibatch_method='lineal' # 'random', 'lineal'
n_hidden=[25, 25]
output_activation= 'sigmoid' # 'isotonic_regression' # sigmoid

if nb_classes == 2:
    loss='binary_crossentropy'
else:
    loss='categorical_crossentropy'

diary = Diary(name='experiment', path='results')
diary.add_notebook('hyperparameters')
diary.add_entry('hyperparameters', ['train_size', train_size])
diary.add_entry('hyperparameters', ['num_classes', nb_classes])
diary.add_entry('hyperparameters', ['batch_size', batch_size])
diary.add_entry('hyperparameters', ['inner_batch_size', inner_batch_size])
diary.add_entry('hyperparameters', ['minibatch_method', minibatch_method])
diary.add_entry('hyperparameters', ['output_activation', output_activation])
diary.add_entry('hyperparameters', ['loss', loss])
diary.add_entry('hyperparameters', ['optimizer', optimizer.get_config()['name']])
for key, value in optimizer.get_config().iteritems():
    diary.add_entry('hyperparameters', [key, value])
diary.add_entry('hyperparameters', ['binarize', binarize])
diary.add_entry('hyperparameters', ['add_noise', add_noise])
diary.add_entry('hyperparameters', ['noise', noise_proportion])
diary.add_notebook('training')
Ejemplo n.º 35
0
def main(dataset_names=None,
         estimator_type="kernel",
         mc_iterations=1,
         n_folds=10,
         seed_num=42):
    if dataset_names is None:
        dataset_names = ['glass', 'hepatitis', 'ionosphere', 'vowel']

    bandwidths_o_norm = {
        'glass': 0.09,
        'hepatitis': 0.105,
        'ionosphere': 0.039,
        'vowel': 0.075
    }

    bandwidths_bc = {
        'glass': 0.09,
        'hepatitis': 0.105,
        'ionosphere': 0.039,
        'vowel': 0.0145
    }

    bandwidths_t_norm = {
        'glass': 0.336,
        'hepatitis': 0.015,
        'ionosphere': 0.0385,
        'vowel': 0.0145
    }

    tuned_mus = {
        'glass': [0.094, 0.095, 0.2, 0.0, 0.0, 0.1],
        'vowel': [0.0, 0.0, 0.5, 0.5, 0.5, 0.0]
    }

    tuned_ms = {
        'glass': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        'vowel': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
    }

    bandwidth_o_norm = 0.05
    bandwidth_t_norm = 0.05
    bandwidth_bc = 0.05

    # Diary to save the partial and final results
    diary = Diary(name='results_Tax2008',
                  path='results',
                  overwrite=False,
                  fig_format='svg')
    # Hyperparameters for this experiment (folds, iterations, seed)
    diary.add_notebook('parameters', verbose=True)
    # Summary for each dataset
    diary.add_notebook('datasets', verbose=False)
    # Partial results for validation
    diary.add_notebook('validation', verbose=True)
    # Final results
    diary.add_notebook('summary', verbose=True)

    columns = ['dataset', 'method', 'mc', 'test_fold', 'acc']
    df = MyDataFrame(columns=columns)

    diary.add_entry('parameters', [
        'seed', seed_num, 'mc_it', mc_iterations, 'n_folds', n_folds,
        'estimator_type', estimator_type, 'bw_o', bandwidth_o_norm, 'bw_t',
        bandwidth_t_norm, 'bw_bc', bandwidth_bc
    ])
    data = Data(dataset_names=dataset_names)
    for name, dataset in data.datasets.iteritems():
        if name in ['letter', 'shuttle']:
            dataset.reduce_number_instances(0.1)
    export_datasets_description_to_latex(data, path=diary.path)

    for i, (name, dataset) in enumerate(data.datasets.iteritems()):
        np.random.seed(seed_num)
        dataset.print_summary()
        diary.add_entry('datasets', [dataset.__str__()])
        # accuracies_tuned = np.zeros(mc_iterations * n_folds)
        # if name in bandwidths_o_norm.keys():
        #     bandwidth_o_norm = bandwidths_o_norm[name]
        #     bandwidth_t_norm = bandwidths_t_norm[name]
        #     bandwidth_bc = bandwidths_bc[name]
        # else:
        #     bandwidth_o_norm = np.mean(bandwidths_o_norm.values())
        #     bandwidth_t_norm = np.mean(bandwidths_t_norm.values())
        #     bandwidth_bc = np.mean(bandwidths_bc.values())
        for mc in np.arange(mc_iterations):
            skf = StratifiedKFold(dataset.target,
                                  n_folds=n_folds,
                                  shuffle=True)
            test_folds = skf.test_folds
            for test_fold in np.arange(n_folds):
                x_train, y_train, x_test, y_test = separate_sets(
                    dataset.data, dataset.target, test_fold, test_folds)

                # if name in ['glass', 'hepatitis', 'ionosphere', 'thyroid',
                #             'iris', 'heart-statlog', 'diabetes', 'abalone',
                #             'mushroom', 'spambase']:
                x_test, y_test = generate_outliers(x_test, y_test)
                # elif name == 'vowel':
                #     x_train = x_train[y_train <= 5]
                #     y_train = y_train[y_train <= 5]
                #     y_test[y_test > 5] = 6
                # elif dataset.n_classes > 2:
                #     x_train = x_train[y_train <= dataset.n_classes/2]
                #     y_train = y_train[y_train <= dataset.n_classes/2]
                #     y_test[y_test > dataset.n_classes/2] = dataset.n_classes+1
                # else:
                #     continue

                if estimator_type == "svm":
                    est = OneClassSVM(nu=0.5, gamma=1.0 / x_train.shape[1])
                elif estimator_type == "gmm":
                    est = GMM(n_components=1)
                elif estimator_type == "gmm3":
                    est = GMM(n_components=3)
                elif estimator_type == "kernel":
                    est = MyMultivariateKernelDensity(kernel='gaussian',
                                                      bandwidth=bandwidth_bc)
                estimators = None
                bcs = None
                if estimator_type == "kernel":
                    estimators, bcs = fit_estimators(
                        MyMultivariateKernelDensity(kernel='gaussian',
                                                    bandwidth=bandwidth_bc),
                        x_train, y_train)

                # Untuned background check
                bc = BackgroundCheck(estimator=est, mu=0.0, m=1.0)
                oc = OcDecomposition(base_estimator=bc)
                if estimators is None:
                    oc.fit(x_train, y_train)
                else:
                    oc.set_estimators(bcs, x_train, y_train)
                accuracy = oc.accuracy(x_test, y_test)
                diary.add_entry('validation', [
                    'dataset', name, 'method', 'BC', 'mc', mc, 'test_fold',
                    test_fold, 'acc', accuracy
                ])
                df = df.append_rows([[name, 'BC', mc, test_fold, accuracy]])

                e = MyMultivariateKernelDensity(kernel='gaussian',
                                                bandwidth=bandwidth_o_norm)
                oc_o_norm = OcDecomposition(base_estimator=e,
                                            normalization="O-norm")
                if estimators is None:
                    oc_o_norm.fit(x_train, y_train)
                else:
                    oc_o_norm.set_estimators(estimators, x_train, y_train)
                accuracy_o_norm = oc_o_norm.accuracy(x_test, y_test)
                diary.add_entry('validation', [
                    'dataset', name, 'method', 'O-norm', 'mc', mc, 'test_fold',
                    test_fold, 'acc', accuracy_o_norm
                ])
                df = df.append_rows(
                    [[name, 'O-norm', mc, test_fold, accuracy_o_norm]])

                e = MyMultivariateKernelDensity(kernel='gaussian',
                                                bandwidth=bandwidth_t_norm)
                oc_t_norm = OcDecomposition(base_estimator=e,
                                            normalization="T-norm")
                if estimators is None:
                    oc_t_norm.fit(x_train, y_train)
                else:
                    oc_t_norm.set_estimators(estimators, x_train, y_train)
                accuracy_t_norm = oc_t_norm.accuracy(x_test, y_test)
                diary.add_entry('validation', [
                    'dataset', name, 'method', 'T-norm', 'mc', mc, 'test_fold',
                    test_fold, 'acc', accuracy_t_norm
                ])
                df = df.append_rows(
                    [[name, 'T-norm', mc, test_fold, accuracy_t_norm]])

                # Tuned background check
                # if name in tuned_mus.keys():
                #     mus = tuned_mus[name]
                #     ms = tuned_ms[name]
                # else:
                #     mus = None
                #     ms = None
                # bc = BackgroundCheck(estimator=est, mu=0.0, m=1.0)
                # oc_tuned = OcDecomposition(base_estimator=bc)
                # oc_tuned.fit(x_train, y_train, mus=mus, ms=ms)
                # accuracy_tuned = oc_tuned.accuracy(x_test, y_test, mus=mus,
                #                                    ms=ms)
                # accuracies_tuned[mc * n_folds + test_fold] = accuracy_tuned
                # diary.add_entry('validation', ['dataset', name,
                #                                'method', 'BC-tuned',
                #                                'mc', mc,
                #                                'test_fold', test_fold,
                #                                'acc', accuracy_tuned])
                # df = df.append_rows([[name, 'BC-tuned', mc, test_fold,
                #                       accuracy_tuned]])
    export_summary(df, diary)
Ejemplo n.º 36
0
 def test_init_new_db(self):
     log = Diary(self.NEW_DB_PATH, async=False)
     log.info(self.INFO)
     log.close()
     with DiaryDB(self.NEW_DB_PATH) as db:
         db.assert_event_logged(self.INFO, level="INFO", limit=1)
Ejemplo n.º 37
0
try:
    perms = [
        perm['name']
        for perm in vk.method('groups.getTokenPermissions')['permissions']
    ]
    if 'manage' not in perms or 'messages' not in perms:
        call_exit('У ключа недостаточно прав')
except ApiError:
    call_exit('Неверный ключ доступа')

try:
    vk.method('groups.getOnlineStatus', {'group_id': parser['Vk']['group_id']})
except Exception:
    call_exit('В настройках группы отключены сообщения или неверный id группы')

d = Diary(parser['Diary']['diary_login'], parser['Diary']['diary_password'],
          session)
try:
    d.auth()
except ValueError:
    call_exit('Неверный логин или пароль')
except requests.exceptions.HTTPError:
    call_exit('Электронный дневник не работает. Попробуйте запустить позже')

payload = {
    'group_id': parser['Vk']['group_id'],
    'enabled': 1,
    'api_version': '5.92',
    'message_new': 1
}
try:
    vk.method('groups.setLongPollSettings', payload)
Ejemplo n.º 38
0
from diary import Diary
import os
import time

def is_down(website, timeout=10):
    response = os.system('ping -c 1 -w {timeout} {website}'.format(
        timeout=timeout,
        website=website
    ))
    if response == 0:
        return False

    return True

# Create a logger with an output file
logger = Diary("google_status.txt")

# If a logger should point to a db give it a db
# logger = Diary("status.db")

while True:
    if is_down("google.com"):
        logger.error("GOOGLE IS DOWN!")
    else:
        logger.log("Google is up.")

    time.sleep(5)
Ejemplo n.º 39
0
 def test_custom_level(self):
     logger = Diary(os.path.join(self.API_DIR))
     logger.log("URGENT ATTENTION NEEDED", level=critical)
     logger.close()
     with open(logger.log_file.name) as f:
         self.assertEquals(f.readline(), formats.standard(logger.last_logged_event) + '\n')
Ejemplo n.º 40
0
 def test_set_db_exc(self):
     log = Diary(self.TXT_PATH)
     self.assertIsNone(log.db_file)
     with self.assertRaises(ValueError,
         msg="Cannot set a database without a database file"):
         log.set_db()
Ejemplo n.º 41
0
def main(dataset_names=None,
         estimator_type="gmm",
         mc_iterations=20,
         n_folds=5,
         n_ensemble=100,
         seed_num=42):
    if dataset_names is None:
        # All the datasets used in Li2014
        datasets_li2014 = [
            'abalone', 'balance-scale', 'credit-approval', 'dermatology',
            'ecoli', 'german', 'heart-statlog', 'hepatitis', 'horse',
            'ionosphere', 'lung-cancer', 'libras-movement', 'mushroom',
            'diabetes', 'landsat-satellite', 'segment', 'spambase', 'wdbc',
            'wpbc', 'yeast'
        ]

        datasets_hempstalk2008 = [
            'diabetes', 'ecoli', 'glass', 'heart-statlog', 'ionosphere',
            'iris', 'letter', 'mfeat-karhunen', 'mfeat-morphological',
            'mfeat-zernike', 'optdigits', 'pendigits', 'sonar', 'vehicle',
            'waveform-5000'
        ]

        datasets_others = [
            'diabetes', 'ecoli', 'glass', 'heart-statlog', 'ionosphere',
            'iris', 'letter', 'mfeat-karhunen', 'mfeat-morphological',
            'mfeat-zernike', 'optdigits', 'pendigits', 'sonar', 'vehicle',
            'waveform-5000', 'scene-classification', 'tic-tac', 'autos', 'car',
            'cleveland', 'dermatology', 'flare', 'page-blocks', 'segment',
            'shuttle', 'vowel', 'zoo', 'abalone', 'balance-scale',
            'credit-approval', 'german', 'hepatitis', 'lung-cancer'
        ]

        # Datasets that we can add but need to be reduced
        datasets_to_add = ['MNIST']

        dataset_names = list(
            set(datasets_li2014 + datasets_hempstalk2008 + datasets_others))

    # Diary to save the partial and final results
    diary = Diary(name='results_Li2014',
                  path='results',
                  overwrite=False,
                  fig_format='svg')
    # Hyperparameters for this experiment (folds, iterations, seed)
    diary.add_notebook('parameters', verbose=True)
    # Summary for each dataset
    diary.add_notebook('datasets', verbose=False)
    # Partial results for validation
    diary.add_notebook('validation', verbose=True)
    # Final results
    diary.add_notebook('summary', verbose=True)

    columns = ['dataset', 'method', 'mc', 'test_fold', 'acc', 'logloss']
    df = MyDataFrame(columns=columns)

    diary.add_entry('parameters', [
        'seed', seed_num, 'mc_it', mc_iterations, 'n_folds', n_folds,
        'n_ensemble', n_ensemble, 'estimator_type', estimator_type
    ])
    data = Data(dataset_names=dataset_names)
    for name, dataset in data.datasets.iteritems():
        if name in ['letter', 'shuttle']:
            dataset.reduce_number_instances(0.1)
    export_datasets_description_to_latex(data, path=diary.path)

    for i, (name, dataset) in enumerate(data.datasets.iteritems()):
        np.random.seed(seed_num)
        dataset.print_summary()
        diary.add_entry('datasets', [dataset.__str__()])
        for mc in np.arange(mc_iterations):
            skf = StratifiedKFold(dataset.target,
                                  n_folds=n_folds,
                                  shuffle=True)
            test_folds = skf.test_folds
            for test_fold in np.arange(n_folds):
                x_train, y_train, x_test, y_test = separate_sets(
                    dataset.data, dataset.target, test_fold, test_folds)

                # Binary discriminative classifier
                sv = SVC(kernel='linear', probability=True)
                # Density estimator for the background check
                if estimator_type == "svm":
                    gamma = 1.0 / x_train.shape[1]
                    est = OneClassSVM(nu=0.1, gamma=gamma)
                elif estimator_type == "gmm":
                    est = GMM(n_components=1)
                elif estimator_type == "gmm3":
                    est = GMM(n_components=3)
                elif estimator_type == "mymvn":
                    est = MyMultivariateNormal()
                # Multiclass discriminative model with one-vs-one binary class.
                ovo = OvoClassifier(base_classifier=sv)
                classifier = ConfidentClassifier(classifier=ovo,
                                                 estimator=est,
                                                 mu=0.5,
                                                 m=0.5)
                ensemble = Ensemble(base_classifier=classifier,
                                    n_ensemble=n_ensemble)
                # classifier = ConfidentClassifier(classifier=sv,
                #                                  estimator=est, mu=0.5,
                #                                  m=0.5)
                # ovo = OvoClassifier(base_classifier=classifier)
                # ensemble = Ensemble(base_classifier=ovo,
                #                     n_ensemble=n_ensemble)
                xs_bootstrap, ys_bootstrap = ensemble.fit(x_train, y_train)
                accuracy = ensemble.accuracy(x_test, y_test)

                log_loss = ensemble.log_loss(x_test, y_test)
                diary.add_entry('validation', [
                    'dataset', name, 'method', 'our', 'mc', mc, 'test_fold',
                    test_fold, 'acc', accuracy, 'logloss', log_loss
                ])
                df = df.append_rows(
                    [[name, 'our', mc, test_fold, accuracy, log_loss]])

                # Li2014: EP-CC model
                # The classification confidence is used in learning the weights
                # of the base classifier as well as in weighted voting.
                ensemble_li = Ensemble(n_ensemble=n_ensemble, lambd=1e-8)
                ensemble_li.fit(x_train,
                                y_train,
                                xs=xs_bootstrap,
                                ys=ys_bootstrap)

                accuracy_li = ensemble_li.accuracy(x_test, y_test)
                log_loss_li = ensemble_li.log_loss(x_test, y_test)
                diary.add_entry('validation', [
                    'dataset', name, 'method', 'Li2014', 'mc', mc, 'test_fold',
                    test_fold, 'acc', accuracy_li, 'logloss', log_loss_li
                ])
                df = df.append_rows(
                    [[name, 'Li2014', mc, test_fold, accuracy_li,
                      log_loss_li]])

    export_summary(df, diary)
Ejemplo n.º 42
0
# - get students average score in class
# - hold students name and surname
# - Count total attendance of student
# The default interface for interaction should be python interpreter.
# Please, use your imagination and create more functionalities.
# Your project should be able to handle entire school.
# If you have enough courage and time, try storing (reading/writing)
# data in text files (YAML, JSON).
# If you have even more courage, try implementing user interface.
#
#Try to expand your implementation as best as you can.
#Think of as many features as you can, and try implementing them.
#Make intelligent use of pythons syntactic sugar (overloading, iterators, generators, etc)
#Most of all: CREATE GOOD, RELIABLE, READABLE CODE.
#The goal of this task is for you to SHOW YOUR BEST python programming skills.
#Impress everyone with your skills, show off with your code.
#
#Your program must be runnable with command "python task.py".
#Show some usecases of your library in the code (print some things)
#
#When you are done upload this code to your github repository.
#
#Delete these comments before commit!
#Good luck.

from diary import Diary, Student, SchoolClass

diary = Diary()
schoolclass = SchoolClass("biology")
student = Student("majlosz", "ef")
#schoolclass.add_students([student])
Ejemplo n.º 43
0
 def test_init_no_ext(self):
     log = Diary(self.NO_EXT_PATH, async=False)
     log.info(self.INFO)
     log.close()
     with open(self.NO_EXT_PATH) as f:
         self.assertTrue(self.INFO in f.readline())
Ejemplo n.º 44
0
def main(dataset_names=None):
    if dataset_names is None:
        dataset_names = [
            'autos',
            'car',
            'cleveland',
            'dermatology',
            'ecoli',
            'flare',
            'glass',
            'led7digit',
            'lymphography',
            'nursery',
            'page-blocks',
            'pendigits',
            'satimage',
            'segment',
            #'shuttle',
            'vehicle',
            'vowel',
            'yeast',
            'zoo',
            'auslan'
        ]

    seed_num = 42
    mc_iterations = 5
    n_folds = 2
    estimator_type = "svm"

    # Diary to save the partial and final results
    diary = Diary(name='results_Krawczyk2015',
                  path='results',
                  overwrite=False,
                  fig_format='svg')
    # Hyperparameters for this experiment (folds, iterations, seed)
    diary.add_notebook('parameters', verbose=True)
    # Summary for each dataset
    diary.add_notebook('datasets', verbose=False)
    # Partial results for validation
    diary.add_notebook('validation', verbose=True)
    # Final results
    diary.add_notebook('summary', verbose=True)

    columns = ['dataset', 'method', 'mc', 'test_fold', 'acc']
    df = MyDataFrame(columns=columns)

    diary.add_entry('parameters', [
        'seed', seed_num, 'mc_it', mc_iterations, 'n_folds', n_folds,
        'estimator_type', estimator_type
    ])
    data = Data(dataset_names=dataset_names)
    for i, (name, dataset) in enumerate(data.datasets.iteritems()):
        np.random.seed(seed_num)
        dataset.print_summary()
        diary.add_entry('datasets', [dataset.__str__()])
        accuracies = np.zeros(mc_iterations * n_folds)
        for mc in np.arange(mc_iterations):
            skf = StratifiedKFold(dataset.target,
                                  n_folds=n_folds,
                                  shuffle=True)
            test_folds = skf.test_folds
            for test_fold in np.arange(n_folds):
                x_train, y_train, x_test, y_test = separate_sets(
                    dataset.data, dataset.target, test_fold, test_folds)

                if estimator_type == "svm":
                    est = OneClassSVM(nu=0.5, gamma=0.5)
                elif estimator_type == "gmm":
                    est = GMM(n_components=3)
                bc = BackgroundCheck(estimator=est)
                oc = OcDecomposition(base_estimator=bc)
                oc.fit(x_train, y_train)
                accuracy = oc.accuracy(x_test, y_test)
                accuracies[mc * n_folds + test_fold] = accuracy
                diary.add_entry('validation', [
                    'dataset', name, 'method', 'our', 'mc', mc, 'test_fold',
                    test_fold, 'acc', accuracy
                ])
                df = df.append_rows([[name, 'our', mc, test_fold, accuracy]])
    df = df.convert_objects(convert_numeric=True)
    table = df.pivot_table(values=['acc'],
                           index=['dataset'],
                           columns=['method'],
                           aggfunc=[np.mean, np.std])
    diary.add_entry('summary', [table])
Ejemplo n.º 45
0
class FileProcessDB(DiaryDB):
    def create_tables(self):
        self.cursor.execute('''
            CREATE TABLE IF NOT EXISTS files
            (inputDT TIMESTAMP, level TEXT, info TEXT, path TEXT, success INT)
                            ''')

    def log(self, event):
        with self.conn:
            self.cursor.execute('''
                INSERT INTO files(inputDT, level, info, path, success)
                VALUES(?, ?, ?, ?, ?)''',
                (event.dt, event.level_str, event.info, event.path, event.success))

logger = Diary("file_info", db_name="file_processes.db", db=FileProcessDB,
               file_name="file_processes.log")

target_dir = "data"
# files_to_process = os.listdir(target_dir)
files_to_process = range(10) # We will pretend the numbers up to 10 are files


for f in files_to_process:
    if process_file(f) == 1:
        e = FileProcessEvent("Success!", 1, f)
        logger.info(e)
    elif process_file(f) == 2:
        e = FileProcessEvent("The goal was not achieved", 2, f)
        logger.warn(e)
    elif process_file(f) == 3:
        e = FileProcessEvent("An error occurred", 3, f)