Beispiel #1
0
 def parse_directory(self):
     self.parser = ScienceLogParser(database=self)
     self._delete_indexes()  # Takes too long while parsing.
     filenames = [os.path.join(self.log_dir, filename) for filename in \
         sorted(os.listdir(unicode(self.log_dir))) if \
         filename.endswith(".bz2")]
     filenames_count = len(filenames)
     for counter, filename in enumerate(filenames):
         sys.stdout.flush()
         if self.con.execute(\
             "select log_name from parsed_logs where parsed_logs.log_name=?",
             (os.path.basename(filename), )).fetchone() is not None:
             print "(%d/%d) %1.1f%% %s already parsed" % \
                   (counter + 1, filenames_count,
                   (counter + 1.) / filenames_count * 100, \
                   os.path.basename(filename))
             continue
         print "(%d/%d) %1.1f%% %s" % (counter + 1, filenames_count,
             (counter + 1.) / filenames_count * 100, \
             os.path.basename(filename))
         try:
             self.parser.parse(filename)
         except KeyboardInterrupt:
             print "Interrupted!"
             self.con.commit()
             exit()
         except:
             print "Can't open file, ignoring."
         self.con.execute("insert into parsed_logs(log_name) values(?)",
                          (os.path.basename(filename), ))
     self.con.commit()
     self._create_indexes()
Beispiel #2
0
 def test_restored_1(self):
     self.database().update_card_after_log_import = (lambda x, y, z: 0)
     self.database().before_1x_log_import()
     filename = os.path.join(os.getcwd(), "tests", "files", "restored_1.txt")
     ScienceLogParser(self.database()).parse(filename)
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.ADDED_CARD, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.REPETITION, )).fetchone()[0] == 1
     sql_res = self.database().con.execute(\
         "select * from log where event_type=?",
         (EventTypes.REPETITION, )).fetchone()
     assert sql_res[4] == 1
     assert sql_res[5] == 2.36
     assert sql_res[6] == 23
     assert sql_res[7] == 8
     assert sql_res[8] == 2
     assert sql_res[9] == 0
     assert sql_res[10] == 0
     assert sql_res[11] == 89 * 24 * 60 * 60
     assert sql_res[12] == 0 # No last rep data.
     assert sql_res[14] - sql_res[2] == 0
     assert sql_res[13] == 5
Beispiel #3
0
 def parse_directory(self):
     self.parser = ScienceLogParser(database=self)
     self._delete_indexes()  # Takes too long while parsing.
     filenames = [os.path.join(self.log_dir, filename) for filename in \
         sorted(os.listdir(unicode(self.log_dir))) if \
         filename.endswith(".bz2")]
     filenames_count = len(filenames)
     for counter, filename in enumerate(filenames):
         sys.stdout.flush()
         if self.con.execute(\
             "select log_name from parsed_logs where parsed_logs.log_name=?",
             (os.path.basename(filename), )).fetchone() is not None:
             print "(%d/%d) %1.1f%% %s already parsed" % \
                   (counter + 1, filenames_count,
                   (counter + 1.) / filenames_count * 100, \
                   os.path.basename(filename))
             continue
         print "(%d/%d) %1.1f%% %s" % (counter + 1, filenames_count,
             (counter + 1.) / filenames_count * 100, \
             os.path.basename(filename))
         try:
             self.parser.parse(filename)
         except KeyboardInterrupt:
             print "Interrupted!"
             self.con.commit()
             exit()
         except:
             print "Can't open file, ignoring."
         self.con.execute("insert into parsed_logs(log_name) values(?)",
             (os.path.basename(filename), ))
     self.con.commit()
     self._create_indexes()
Beispiel #4
0
 def test_logs_new_5(self):
     self.database().update_card_after_log_import = (lambda x, y, z: 0)
     self.database().before_1x_log_import()
     filename = os.path.join(os.getcwd(), "tests", "files", "new_5.txt")
     ScienceLogParser(self.database()).parse(filename)
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.ADDED_CARD, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.REPETITION, )).fetchone()[0] == 2
     assert self.database().con.execute(\
         "select acq_reps from log where event_type=? and object_id='9c8ce28e-1a4b-4148-8287-b8a7790d86d0.1.1'",
         (EventTypes.REPETITION, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         "select ret_reps from log where event_type=? and object_id='9c8ce28e-1a4b-4148-8287-b8a7790d86d0.1.1'",
         (EventTypes.REPETITION, )).fetchone()[0] == 0
     assert self.database().con.execute(\
         "select acq_reps_since_lapse from log where event_type=? and object_id='9c8ce28e-1a4b-4148-8287-b8a7790d86d0.1.1'",
         (EventTypes.REPETITION, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         """select acq_reps from log where event_type=? and object_id='9c8ce28e-1a4b-4148-8287-b8a7790d86d0.1.1'
          order by _id desc limit 1""",
         (EventTypes.REPETITION, )).fetchone()[0] == 2
     assert self.database().con.execute(\
         """select ret_reps from log where event_type=? and object_id='9c8ce28e-1a4b-4148-8287-b8a7790d86d0.1.1'
          order by _id desc limit 1""",
         (EventTypes.REPETITION, )).fetchone()[0] == 0
     assert self.database().con.execute(\
         """select acq_reps_since_lapse from log where event_type=? and object_id='9c8ce28e-1a4b-4148-8287-b8a7790d86d0.1.1'
         order by _id desc limit 1""",
         (EventTypes.REPETITION, )).fetchone()[0] == 2
     assert self.database().con.execute(\
         """select object_id from log where event_type=?""",
         (EventTypes.STARTED_SCHEDULER, )).fetchone()[0] == "SM2 Mnemosyne"
Beispiel #5
0
 def test_logs_imported_1(self):
     self.database().update_card_after_log_import = (lambda x, y, z: 0)
     self.database().before_1x_log_import()
     filename = os.path.join(os.getcwd(), "tests", "files", "imported_1.txt")
     ScienceLogParser(self.database()).parse(filename)
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.ADDED_CARD, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.REPETITION, )).fetchone()[0] == 3
     assert self.database().con.execute(\
         "select acq_reps from log where event_type=? and object_id='f5d9bbe7'",
         (EventTypes.REPETITION, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         "select ret_reps from log where event_type=? and object_id='f5d9bbe7'",
         (EventTypes.REPETITION, )).fetchone()[0] == 0
     assert self.database().con.execute(\
         "select acq_reps_since_lapse from log where event_type=? and object_id='f5d9bbe7'",
         (EventTypes.REPETITION, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         """select acq_reps from log where event_type=? and object_id='f5d9bbe7'
          order by _id desc limit 1""",
         (EventTypes.REPETITION, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         """select ret_reps from log where event_type=? and object_id='f5d9bbe7'
          order by _id desc limit 1""",
         (EventTypes.REPETITION, )).fetchone()[0] == 2
     assert self.database().con.execute(\
         """select acq_reps_since_lapse from log where event_type=? and object_id='f5d9bbe7'
         order by _id desc limit 1""",
         (EventTypes.REPETITION, )).fetchone()[0] == 1
Beispiel #6
0
 def test_restored_2(self):
     self.database().update_card_after_log_import = (lambda x, y, z: 0)
     self.database().before_1x_log_import()
     filename = os.path.join(os.getcwd(), "tests", "files", "restored_2.txt")
     ScienceLogParser(self.database()).parse(filename)
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.ADDED_CARD, )).fetchone()[0] == 1
 def test_past_schedule(self):
     self.database().update_card_after_log_import = (lambda x, y, z: 0)
     self.database().before_1x_log_import()
     filename = os.path.join(os.getcwd(), "tests", "files", "schedule_1.txt")
     ScienceLogParser(self.database()).parse(filename)
     days_elapsed = datetime.date.today() - datetime.date(2009, 8, 15)
     assert self.scheduler().card_count_scheduled_n_days_from_now(\
         -days_elapsed.days) == 124
     assert self.scheduler().card_count_scheduled_n_days_from_now(-1) == 0
Beispiel #8
0
 def test_logs_act_interval(self):
     self.database().update_card_after_log_import = (lambda x, y, z: 0)
     self.database().before_1x_log_import()
     filename = os.path.join(os.getcwd(), "tests", "files", "actinterval_1.txt")
     ScienceLogParser(self.database()).parse(filename)
     assert self.database().con.execute(\
         """select actual_interval from log where event_type=? and object_id='f1300e5a'
         order by _id desc limit 1""",
         (EventTypes.REPETITION, )).fetchone()[0] == 5
Beispiel #9
0
 def import_logs(self, filename):
     w = self.main_widget()
     db = self.database()
     w.set_progress_text(_("Importing history..."))
     log_dir = os.path.join(os.path.dirname(filename), "history")
     if not os.path.exists(log_dir):
         w.close_progress()
         w.show_information(_("No history found to import."))
         return
     # The events that we import from the science logs obviously should not
     # be reexported to these logs (this is true for both the archived logs
     # and log.txt). So, before the import, we flush the SQL logs to the
     # science logs, and after the import we edit the partership index to
     # skip these entries.
     db.dump_to_science_log()
     # Manage database indexes.
     db.before_1x_log_import()
     filenames = [os.path.join(log_dir, logname) for logname in \
         sorted(os.listdir(unicode(log_dir))) if logname.endswith(".bz2")]
     # log.txt can also contain data we need to import, especially on the
     # initial upgrade from 1.x. 'ids_to_parse' will make sure we only pick
     # up the relevant events. (If we do the importing after having used
     # 2.x for a while, there could be duplicate load events, etc, but these
     # don't matter.)
     filenames.append(os.path.join(os.path.dirname(filename), "log.txt"))
     w.set_progress_range(len(filenames))
     ignored_files = []
     parser = ScienceLogParser(self.database(),
         ids_to_parse=self.items_by_id,
         machine_id=self.config().machine_id())
     for filename in filenames:
         try:
             parser.parse(filename)
         except:
             ignored_files.append(filename)
         w.increase_progress(1)
     if ignored_files:
         w.show_information(_("Ignoring unparsable files:<br/>") +\
             '<br/>'.join(ignored_files))
     # Manage database indexes.
     db.after_1x_log_import()
     db.skip_science_log()
Beispiel #10
0
 def import_logs(self, filename):
     w = self.main_widget()
     db = self.database()
     w.set_progress_text(_("Importing history..."))
     log_dir = os.path.join(os.path.dirname(filename), "history")
     if not os.path.exists(log_dir):
         w.close_progress()
         w.show_information(_("No history found to import."))
         return
     # The events that we import from the science logs obviously should not
     # be reexported to these logs (this is true for both the archived logs
     # and log.txt). So, before the import, we flush the SQL logs to the
     # science logs, and after the import we edit the partership index to
     # skip these entries.
     db.dump_to_science_log()
     # Manage database indexes.
     db.before_1x_log_import()
     filenames = [os.path.join(log_dir, logname) for logname in \
         sorted(os.listdir(log_dir)) if logname.endswith(".bz2")]
     # log.txt can also contain data we need to import, especially on the
     # initial upgrade from 1.x. 'ids_to_parse' will make sure we only pick
     # up the relevant events. (If we do the importing after having used
     # 2.x for a while, there could be duplicate load events, etc, but these
     # don't matter.)
     filenames.append(os.path.join(os.path.dirname(filename), "log.txt"))
     w.set_progress_range(len(filenames))
     ignored_files = []
     parser = ScienceLogParser(self.database(),
                               ids_to_parse=self.items_by_id,
                               machine_id=self.config().machine_id())
     for filename in filenames:
         try:
             parser.parse(filename)
         except:
             ignored_files.append(filename)
         w.increase_progress(1)
     if ignored_files:
         w.show_information(_("Ignoring unparsable files:<br/>") +\
             '<br/>'.join(ignored_files))
     # Manage database indexes.
     db.after_1x_log_import()
     db.skip_science_log()
Beispiel #11
0
 def test_logs_corrupt_2(self): # Wrong data, isolated deletion event.
     self.database().update_card_after_log_import = (lambda x, y, z: 0)
     self.database().before_1x_log_import()
     filename = os.path.join(os.getcwd(), "tests", "files", "corrupt_2.txt")
     ScienceLogParser(self.database()).parse(filename)
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.ADDED_CARD, )).fetchone()[0] == 0
     assert self.database().con.execute(\
         "select count() from log where object_id=?",
         ("4b59b830", )).fetchone()[0] == 0
Beispiel #12
0
 def test_logs_new_1(self):
     self.database().update_card_after_log_import = (lambda x, y, z: 0)
     self.database().before_1x_log_import()
     filename = os.path.join(os.getcwd(), "tests", "files", "new_1.txt")
     ScienceLogParser(self.database()).parse(filename)
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.ADDED_CARD, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.REPETITION, )).fetchone()[0] == 10
     assert self.database().con.execute(\
         "select acq_reps from log where event_type=? and object_id='9525224f'",
         (EventTypes.REPETITION, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         "select acq_reps_since_lapse from log where event_type=? and object_id='9525224f'",
         (EventTypes.REPETITION, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         """select scheduled_interval from log where event_type=? and object_id='9525224f'
         order by _id desc limit 1""",
         (EventTypes.REPETITION, )).fetchone()[0] == (6)*60*60*24
     assert self.database().con.execute(\
         """select actual_interval from log where event_type=? and object_id='9525224f'
         order by _id desc limit 1""",
         (EventTypes.REPETITION, )).fetchone()[0] == 0 # This is an artificial log.
     timestamp = self.database().con.execute(\
         """select timestamp from log where event_type=? and object_id='9525224f'
         order by _id desc limit 1""",
         (EventTypes.REPETITION, )).fetchone()[0]
     next_rep = self.database().con.execute(\
         """select next_rep from log where event_type=? and object_id='9525224f'
         order by _id desc limit 1""",
         (EventTypes.REPETITION, )).fetchone()[0]
     assert next_rep - timestamp == (14-3)*60*60*24
     assert self.database().con.execute(\
         "select count() from log").fetchone()[0] == 25
     assert self.database().con.execute(\
         "select acq_reps from log where event_type=? order by _id desc limit 1",
         (EventTypes.LOADED_DATABASE, )).fetchone()[0] == 0
     assert self.database().con.execute(\
         "select ret_reps from log where event_type=? order by _id desc limit 1",
         (EventTypes.LOADED_DATABASE, )).fetchone()[0] == 7
     assert self.database().con.execute(\
         "select lapses from log where event_type=? order by _id desc limit 1",
         (EventTypes.LOADED_DATABASE, )).fetchone()[0] == 336
     assert self.database().con.execute(\
         "select acq_reps from log where event_type=? order by _id desc limit 1",
         (EventTypes.SAVED_DATABASE, )).fetchone()[0] == 0
     assert self.database().con.execute(\
         "select ret_reps from log where event_type=? order by _id desc limit 1",
         (EventTypes.SAVED_DATABASE, )).fetchone()[0] == 12
     assert self.database().con.execute(\
         "select lapses from log where event_type=? order by _id desc limit 1",
         (EventTypes.SAVED_DATABASE, )).fetchone()[0] == 341
Beispiel #13
0
 def test_score(self):
     self.database().update_card_after_log_import = (lambda x, y, z: 0)
     self.database().before_1x_log_import()
     filename = os.path.join(os.getcwd(), "tests", "files", "score_1.txt")
     ScienceLogParser(self.database()).parse(filename)
     days_elapsed = datetime.date.today() - datetime.date(2009, 8, 17)
     assert self.database().retention_score_n_days_ago(days_elapsed.days) \
            == 5/7.*100
     assert self.database().retention_score_n_days_ago(0) == 0
     from mnemosyne.libmnemosyne.statistics_pages.retention_score import RetentionScore
     page = RetentionScore(self.mnemosyne.component_manager)
     for i in range(1, 6):
         page.prepare_statistics(i)
Beispiel #14
0
 def test_logs_new_2(self):
     self.database().update_card_after_log_import = (lambda x, y, z: 0)
     self.database().before_1x_log_import()
     filename = os.path.join(os.getcwd(), "tests", "files", "new_2.txt")
     ScienceLogParser(self.database()).parse(filename)
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.ADDED_CARD, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.REPETITION, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         "select acq_reps from log where event_type=? and object_id='8da62cfb'",
         (EventTypes.REPETITION, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         "select acq_reps_since_lapse from log where event_type=? and object_id='8da62cfb'",
         (EventTypes.REPETITION, )).fetchone()[0] == 1
Beispiel #15
0
 def test_logs_new_6(self):
     self.database().update_card_after_log_import = (lambda x, y, z: 0)
     self.database().before_1x_log_import()
     filename = os.path.join(os.getcwd(), "tests", "files", "new_6.txt")
     ScienceLogParser(self.database()).parse(filename)
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.ADDED_CARD, )).fetchone()[0] == 1
     assert self.database().con.execute(\
         "select count() from log where event_type=?",
         (EventTypes.REPETITION, )).fetchone()[0] == 2
     sql_res = self.database().con.execute(\
         "select * from log where event_type=? and object_id='4c53e29a-f9e9-498b-8beb-d3a494f61bca.1.1'",
         (EventTypes.REPETITION, )).fetchone()
     assert sql_res[4] == 5
     assert sql_res[5] == 2.5
     assert sql_res[6] == 1
     assert sql_res[7] == 0
     assert sql_res[8] == 0
     assert sql_res[9] == 1
     assert sql_res[10] == 0
     assert sql_res[11] == 0
     assert sql_res[12] == 0
     assert sql_res[14] - sql_res[2] == 345600
     assert sql_res[13] == 0
     sql_res = self.database().con.execute(\
         """select * from log where event_type=? and object_id='4c53e29a-f9e9-498b-8beb-d3a494f61bca.1.1'
         order by _id desc limit 1""",
         (EventTypes.REPETITION, )).fetchone()
     assert sql_res[4] == 2
     assert sql_res[5] == 2.5
     assert sql_res[6] == 1
     assert sql_res[7] == 1
     assert sql_res[8] == 0
     assert sql_res[9] == 1
     assert sql_res[10] == 1
     assert sql_res[11] == 302986
     assert sql_res[12] == 10
     assert sql_res[14] - sql_res[2] == 475774
     assert sql_res[13] == 1
Beispiel #16
0
class LogDatabase(object):

    MAX_BEFORE_COMMIT = 100

    def __init__(self, log_dir):
        self.log_dir = log_dir
        self.parser = ScienceLogParser(database=self)

        self.parsed_logs = []
        self.log = []

    def parse_directory(self):
        self._delete_indexes()  # Takes too long while parsing.
        filenames = [os.path.join(self.log_dir, filename) for filename in \
            sorted(os.listdir(str(self.log_dir))) if \
            filename.endswith(".bz2")]
        filenames_count = len(filenames)
        i = 0
        for counter, filename in enumerate(filenames):
            log_name = os.path.basename(filename)
            entry = ParsedLogs.objects.filter(log_name=log_name).first()
            if entry is not None:
                print("(%d/%d) %1.1f%% %s already parsed" % \
                      (counter + 1, filenames_count,
                      (counter + 1.) / filenames_count * 100,
                      os.path.basename(filename)))
                continue
            print("(%d/%d) %1.1f%% %s" % (counter + 1, filenames_count,
                                          (counter + 1.) / filenames_count *
                                          100, os.path.basename(filename)))

            interrupted = False
            try:
                interrupted = self.parser.parse(filename)
                i += 1
            except KeyboardInterrupt:
                interrupted = True
            except:
                print("Can't open file, ignoring.")
            if interrupted:
                print("Interrupted!")
                # self.commit()
                exit()

            self.parsed_logs.append(ParsedLogs(log_name=log_name))
            if i >= self.MAX_BEFORE_COMMIT:
                print("Committing...", flush=True, end=' ')
                self.commit()
                print("Done!")
                i = 0
        self.commit()
        self._create_indexes()

    def commit(self):
        try:
            with transaction.atomic():
                ParsedLogs.objects.bulk_create(self.parsed_logs)
                Log.objects.bulk_create(self.log)
                self.parsed_logs = []
                self.log = []
        except KeyboardInterrupt:
            transaction.rollback()
            print("Interrupted! Rolling back")
            exit()

    def _delete_indexes(self):
        pass
        # self.con.execute("drop index if exists i_log_timestamp;")
        # self.con.execute("drop index if exists i_log_user_id;")
        # self.con.execute("drop index if exists i_log_object_id;")

    def _create_indexes(self):
        pass
        # self.con.execute("create index i_log_timestamp on log (timestamp);")
        # self.con.execute("create index i_log_user_id on log (user_id);")
        # self.con.execute("create index i_log_object_id on log (object_id);")

    def log_started_program(self, timestamp, program_name_version):
        # self.con.execute(
        #     """insert into log(user_id, event, timestamp, object_id)
        #     values(?,?,?,?)""",
        #     (self.parser.user_id, EventTypes.STARTED_PROGRAM, int(timestamp),
        #      program_name_version))
        pass

    def log_stopped_program(self, timestamp):
        # self.log.append(
        #     Log(user_id=self.parser.user_id,
        #         event=EventTypes.STOPPED_PROGRAM,
        #         timestamp=int(timestamp))
        # )
        # self.con.execute(
        #     "insert into log(user_id, event, timestamp) values(?,?,?)",
        #     (self.parser.user_id, EventTypes.STOPPED_PROGRAM, int(timestamp)))
        pass

    def log_started_scheduler(self, timestamp, scheduler_name):
        # self.log.append(
        #     Log(user_id=self.parser.user_id,
        #         event=EventTypes.STARTED_SCHEDULER,
        #         timestamp=int(timestamp),
        #         object_id=scheduler_name)
        # )
        # self.con.execute(
        #     """insert into log(user_id, event, timestamp, object_id)
        #     values(?,?,?,?)""",
        #     (self.parser.user_id, EventTypes.STARTED_SCHEDULER, int(timestamp),
        #     scheduler_name))
        pass

    def log_loaded_database(self, timestamp, machine_id, scheduled_count,
                            non_memorised_count, active_count):
        pass
        # self.log.append(
        #     Log(user_id=self.parser.user_id,
        #         event=EventTypes.LOADED_DATABASE,
        #         timestamp=int(timestamp),
        #         object_id=machine_id,
        #         acq_reps=scheduled_count,
        #         ret_reps=non_memorised_count,
        #         lapses=active_count
        #         )
        # )
        # self.con.execute(
        #     """insert into log(user_id, event, timestamp, object_id, acq_reps,
        #     ret_reps, lapses) values(?,?,?,?,?,?,?)""",
        #     (self.parser.user_id, EventTypes.LOADED_DATABASE, int(timestamp),
        #     machine_id, scheduled_count, non_memorised_count, active_count))

    def log_saved_database(self, timestamp, machine_id, scheduled_count,
                           non_memorised_count, active_count):
        # self.con.execute(
        #     """insert into log(user_id, event, timestamp, object_id, acq_reps,
        #     ret_reps, lapses) values(?,?,?,?,?,?,?)""",
        #     (self.parser.user_id, EventTypes.SAVED_DATABASE, int(timestamp),
        #     machine_id, scheduled_count, non_memorised_count, active_count))
        pass

    def log_added_card(self, timestamp, card_id):
        pass
        # self.con.execute(
        #     """insert into log(user_id, event, timestamp, object_id)
        #     values(?,?,?,?)""",
        #     (self.parser.user_id, EventTypes.ADDED_CARD, int(timestamp), card_id))

    def log_deleted_card(self, timestamp, card_id):
        pass
        # self.con.execute(
        #     """insert into log(user_id, event, timestamp, object_id)
        #     values(?,?,?,?)""",
        #     (self.parser.user_id, EventTypes.DELETED_CARD, int(timestamp), card_id))

    def log_repetition(self, timestamp, card_id, grade, easiness, acq_reps,
                       ret_reps, lapses, acq_reps_since_lapse,
                       ret_reps_since_lapse, scheduled_interval,
                       actual_interval, thinking_time, next_rep,
                       scheduler_data):
        # self.con.execute(\
        #     """insert into log(user_id, event, timestamp, object_id, grade,
        #     easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse,
        #     ret_reps_since_lapse, scheduled_interval, actual_interval,
        #     thinking_time, next_rep)
        #     values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
        #     (self.parser.user_id, EventTypes.REPETITION, int(timestamp), card_id,
        #     grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse,
        #     ret_reps_since_lapse, scheduled_interval, actual_interval,
        #     int(thinking_time), next_rep))

        self.log.append(
            Log(user_id=self.parser.user_id,
                event=EventTypes.REPETITION,
                timestamp=int(timestamp),
                object_id=card_id,
                grade=grade,
                easiness=easiness,
                acq_reps=acq_reps,
                ret_reps=ret_reps,
                lapses=lapses,
                acq_reps_since_lapse=acq_reps_since_lapse,
                ret_reps_since_lapse=ret_reps_since_lapse,
                scheduled_interval=scheduled_interval,
                actual_interval=actual_interval,
                thinking_time=int(thinking_time),
                next_rep=next_rep))

    def set_offset_last_rep(self, card_id, offset, last_rep):
        pass
        # self.con.execute(
        #     """insert or replace into _cards(id, offset, last_rep)
        #     values(?,?,?)""",
        #     (card_id + self.parser.user_id, offset, int(last_rep)))

    def offset_last_rep(self, card_id):
        pass
        # sql_result = self.con.execute("""select offset, last_rep
        #    from _cards where _cards.id=?""",
        #    (card_id + self.parser.user_id, )).fetchone()
        # return sql_result["offset"], sql_result["last_rep"]

    def update_card_after_log_import(self, id, creation_time, offset):
        pass
Beispiel #17
0
class LogDatabase(object):
    def __init__(self, log_dir):
        self.log_dir = log_dir
        self._connection = None
        db_name = os.path.join(self.log_dir, "logs.db")
        initialisation_needed = not os.path.exists(db_name)
        self.con = sqlite3.connect(db_name,
                                   timeout=0.1,
                                   isolation_level="EXCLUSIVE")
        self.con.row_factory = sqlite3.Row
        if initialisation_needed:
            self.con.executescript(SCHEMA)

    def parse_directory(self):
        self.parser = ScienceLogParser(database=self)
        self._delete_indexes()  # Takes too long while parsing.
        filenames = [os.path.join(self.log_dir, filename) for filename in \
            sorted(os.listdir(unicode(self.log_dir))) if \
            filename.endswith(".bz2")]
        filenames_count = len(filenames)
        for counter, filename in enumerate(filenames):
            sys.stdout.flush()
            if self.con.execute(\
                "select log_name from parsed_logs where parsed_logs.log_name=?",
                (os.path.basename(filename), )).fetchone() is not None:
                print "(%d/%d) %1.1f%% %s already parsed" % \
                      (counter + 1, filenames_count,
                      (counter + 1.) / filenames_count * 100, \
                      os.path.basename(filename))
                continue
            print "(%d/%d) %1.1f%% %s" % (counter + 1, filenames_count,
                (counter + 1.) / filenames_count * 100, \
                os.path.basename(filename))
            try:
                self.parser.parse(filename)
            except KeyboardInterrupt:
                print "Interrupted!"
                self.con.commit()
                exit()
            except:
                print "Can't open file, ignoring."
            self.con.execute("insert into parsed_logs(log_name) values(?)",
                             (os.path.basename(filename), ))
        self.con.commit()
        self._create_indexes()

    def _delete_indexes(self):
        self.con.execute("drop index if exists i_log_timestamp;")
        self.con.execute("drop index if exists i_log_user_id;")
        self.con.execute("drop index if exists i_log_object_id;")

    def _create_indexes(self):
        self.con.execute("create index i_log_timestamp on log (timestamp);")
        self.con.execute("create index i_log_user_id on log (user_id);")
        self.con.execute("create index i_log_object_id on log (object_id);")

    def log_started_program(self, timestamp, program_name_version):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id)
            values(?,?,?,?)""",
            (self.parser.user_id, EventTypes.STARTED_PROGRAM, int(timestamp),
             program_name_version))

    def log_stopped_program(self, timestamp):
        self.con.execute(\
            "insert into log(user_id, event, timestamp) values(?,?,?)",
            (self.parser.user_id, EventTypes.STOPPED_PROGRAM, int(timestamp)))

    def log_started_scheduler(self, timestamp, scheduler_name):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id)
            values(?,?,?,?)""",
            (self.parser.user_id, EventTypes.STARTED_SCHEDULER, int(timestamp),
            scheduler_name))

    def log_loaded_database(self, timestamp, machine_id, scheduled_count,
                            non_memorised_count, active_count):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id, acq_reps,
            ret_reps, lapses) values(?,?,?,?,?,?,?)""",
            (self.parser.user_id, EventTypes.LOADED_DATABASE, int(timestamp),
            machine_id, scheduled_count, non_memorised_count, active_count))

    def log_saved_database(self, timestamp, machine_id, scheduled_count,
                           non_memorised_count, active_count):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id, acq_reps,
            ret_reps, lapses) values(?,?,?,?,?,?,?)""",
            (self.parser.user_id, EventTypes.SAVED_DATABASE, int(timestamp),
            machine_id, scheduled_count, non_memorised_count, active_count))

    def log_added_card(self, timestamp, card_id):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id)
            values(?,?,?,?)""",
            (self.parser.user_id, EventTypes.ADDED_CARD, int(timestamp), card_id))

    def log_deleted_card(self, timestamp, card_id):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id)
            values(?,?,?,?)""",
            (self.parser.user_id, EventTypes.DELETED_CARD, int(timestamp), card_id))

    def log_repetition(self, timestamp, card_id, grade, easiness, acq_reps,
                       ret_reps, lapses, acq_reps_since_lapse,
                       ret_reps_since_lapse, scheduled_interval,
                       actual_interval, thinking_time, next_rep,
                       scheduler_data):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id, grade,
            easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse,
            ret_reps_since_lapse, scheduled_interval, actual_interval,
            thinking_time, next_rep)
            values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
            (self.parser.user_id, EventTypes.REPETITION, int(timestamp), card_id,
            grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse,
            ret_reps_since_lapse, scheduled_interval, actual_interval,
            int(thinking_time), next_rep))

    def set_offset_last_rep(self, card_id, offset, last_rep):
        self.con.execute(\
            """insert or replace into _cards(id, offset, last_rep)
            values(?,?,?)""",
            (card_id + self.parser.user_id, offset, int(last_rep)))

    def offset_last_rep(self, card_id):
        sql_result = self.con.execute(
            """select offset, last_rep
           from _cards where _cards.id=?""",
            (card_id + self.parser.user_id, )).fetchone()
        return sql_result["offset"], sql_result["last_rep"]

    def update_card_after_log_import(self, id, creation_time, offset):
        pass

    def dump_reps_to_txt_file(self, filename):
        f = file(filename, "w")
        for cursor in self.con.execute("select * from log"):
            print >> f, cursor["user_id"], \
                time.strftime("%Y-%m-%d %H:%M:%S", \
                time.localtime(cursor["timestamp"])), \
                cursor["object_id"], cursor["grade"], \
                cursor["easiness"], cursor["acq_reps"], \
                cursor["ret_reps"], cursor["lapses"], \
                cursor["acq_reps_since_lapse"], \
                cursor["ret_reps_since_lapse"], \
                cursor["scheduled_interval"], cursor["actual_interval"], \
                cursor["thinking_time"], \
                time.strftime("%Y-%m-%d %H:%M:%S", \
                time.localtime(cursor["next_rep"])), \
                cursor["event"]
Beispiel #18
0
class LogDatabase(object):

    def __init__(self, log_dir):
        self.log_dir = log_dir
        self._connection = None
        db_name = os.path.join(self.log_dir, "logs.db")
        initialisation_needed = not os.path.exists(db_name)
        self.con = sqlite3.connect(db_name, timeout=0.1,
                                   isolation_level="EXCLUSIVE")
        self.con.row_factory = sqlite3.Row
        if initialisation_needed:
            self.con.executescript(SCHEMA)

    def parse_directory(self):
        self.parser = ScienceLogParser(database=self)
        self._delete_indexes()  # Takes too long while parsing.
        filenames = [os.path.join(self.log_dir, filename) for filename in \
            sorted(os.listdir(unicode(self.log_dir))) if \
            filename.endswith(".bz2")]
        filenames_count = len(filenames)
        for counter, filename in enumerate(filenames):
            sys.stdout.flush()
            if self.con.execute(\
                "select log_name from parsed_logs where parsed_logs.log_name=?",
                (os.path.basename(filename), )).fetchone() is not None:
                print "(%d/%d) %1.1f%% %s already parsed" % \
                      (counter + 1, filenames_count,
                      (counter + 1.) / filenames_count * 100, \
                      os.path.basename(filename))
                continue
            print "(%d/%d) %1.1f%% %s" % (counter + 1, filenames_count,
                (counter + 1.) / filenames_count * 100, \
                os.path.basename(filename))
            try:
                self.parser.parse(filename)
            except KeyboardInterrupt:
                print "Interrupted!"
                self.con.commit()
                exit()
            except:
                print "Can't open file, ignoring."
            self.con.execute("insert into parsed_logs(log_name) values(?)",
                (os.path.basename(filename), ))
        self.con.commit()
        self._create_indexes()

    def _delete_indexes(self):
        self.con.execute("drop index if exists i_log_timestamp;")
        self.con.execute("drop index if exists i_log_user_id;")
        self.con.execute("drop index if exists i_log_object_id;")

    def _create_indexes(self):
        self.con.execute("create index i_log_timestamp on log (timestamp);")
        self.con.execute("create index i_log_user_id on log (user_id);")
        self.con.execute("create index i_log_object_id on log (object_id);")

    def log_started_program(self, timestamp, program_name_version):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id)
            values(?,?,?,?)""",
            (self.parser.user_id, EventTypes.STARTED_PROGRAM, int(timestamp),
             program_name_version))

    def log_stopped_program(self, timestamp):
        self.con.execute(\
            "insert into log(user_id, event, timestamp) values(?,?,?)",
            (self.parser.user_id, EventTypes.STOPPED_PROGRAM, int(timestamp)))

    def log_started_scheduler(self, timestamp, scheduler_name):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id)
            values(?,?,?,?)""",
            (self.parser.user_id, EventTypes.STARTED_SCHEDULER, int(timestamp),
            scheduler_name))

    def log_loaded_database(self, timestamp, machine_id, scheduled_count,
        non_memorised_count, active_count):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id, acq_reps,
            ret_reps, lapses) values(?,?,?,?,?,?,?)""",
            (self.parser.user_id, EventTypes.LOADED_DATABASE, int(timestamp),
            machine_id, scheduled_count, non_memorised_count, active_count))

    def log_saved_database(self, timestamp, machine_id, scheduled_count,
        non_memorised_count, active_count):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id, acq_reps,
            ret_reps, lapses) values(?,?,?,?,?,?,?)""",
            (self.parser.user_id, EventTypes.SAVED_DATABASE, int(timestamp),
            machine_id, scheduled_count, non_memorised_count, active_count))

    def log_added_card(self, timestamp, card_id):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id)
            values(?,?,?,?)""",
            (self.parser.user_id, EventTypes.ADDED_CARD, int(timestamp), card_id))

    def log_deleted_card(self, timestamp, card_id):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id)
            values(?,?,?,?)""",
            (self.parser.user_id, EventTypes.DELETED_CARD, int(timestamp), card_id))

    def log_repetition(self, timestamp, card_id, grade, easiness, acq_reps,
        ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse,
        scheduled_interval, actual_interval, thinking_time,
        next_rep, scheduler_data):
        self.con.execute(\
            """insert into log(user_id, event, timestamp, object_id, grade,
            easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse,
            ret_reps_since_lapse, scheduled_interval, actual_interval,
            thinking_time, next_rep)
            values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
            (self.parser.user_id, EventTypes.REPETITION, int(timestamp), card_id,
            grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse,
            ret_reps_since_lapse, scheduled_interval, actual_interval,
            int(thinking_time), next_rep))

    def set_offset_last_rep(self, card_id, offset, last_rep):
        self.con.execute(\
            """insert or replace into _cards(id, offset, last_rep)
            values(?,?,?)""",
            (card_id + self.parser.user_id, offset, int(last_rep)))

    def offset_last_rep(self, card_id):
        sql_result = self.con.execute("""select offset, last_rep
           from _cards where _cards.id=?""",
           (card_id + self.parser.user_id, )).fetchone()
        return sql_result["offset"], sql_result["last_rep"]

    def update_card_after_log_import(self, id, creation_time, offset):
        pass

    def dump_reps_to_txt_file(self, filename):
        f = file(filename, "w")
        for cursor in self.con.execute("select * from log"):
            print >> f, cursor["user_id"], \
                time.strftime("%Y-%m-%d %H:%M:%S", \
                time.localtime(cursor["timestamp"])), \
                cursor["object_id"], cursor["grade"], \
                cursor["easiness"], cursor["acq_reps"], \
                cursor["ret_reps"], cursor["lapses"], \
                cursor["acq_reps_since_lapse"], \
                cursor["ret_reps_since_lapse"], \
                cursor["scheduled_interval"], cursor["actual_interval"], \
                cursor["thinking_time"], \
                time.strftime("%Y-%m-%d %H:%M:%S", \
                time.localtime(cursor["next_rep"])), \
                cursor["event"]
Beispiel #19
0
    def __init__(self, log_dir):
        self.log_dir = log_dir
        self.parser = ScienceLogParser(database=self)

        self.parsed_logs = []
        self.log = []