def parse_directory(self): self.parser = ScienceLogParser(database=self) self._delete_indexes() # Takes too long while parsing. filenames = [os.path.join(self.log_dir, filename) for filename in \ sorted(os.listdir(unicode(self.log_dir))) if \ filename.endswith(".bz2")] filenames_count = len(filenames) for counter, filename in enumerate(filenames): sys.stdout.flush() if self.con.execute(\ "select log_name from parsed_logs where parsed_logs.log_name=?", (os.path.basename(filename), )).fetchone() is not None: print "(%d/%d) %1.1f%% %s already parsed" % \ (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, \ os.path.basename(filename)) continue print "(%d/%d) %1.1f%% %s" % (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, \ os.path.basename(filename)) try: self.parser.parse(filename) except KeyboardInterrupt: print "Interrupted!" self.con.commit() exit() except: print "Can't open file, ignoring." self.con.execute("insert into parsed_logs(log_name) values(?)", (os.path.basename(filename), )) self.con.commit() self._create_indexes()
def test_restored_1(self): self.database().update_card_after_log_import = (lambda x, y, z: 0) self.database().before_1x_log_import() filename = os.path.join(os.getcwd(), "tests", "files", "restored_1.txt") ScienceLogParser(self.database()).parse(filename) assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.ADDED_CARD, )).fetchone()[0] == 1 assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.REPETITION, )).fetchone()[0] == 1 sql_res = self.database().con.execute(\ "select * from log where event_type=?", (EventTypes.REPETITION, )).fetchone() assert sql_res[4] == 1 assert sql_res[5] == 2.36 assert sql_res[6] == 23 assert sql_res[7] == 8 assert sql_res[8] == 2 assert sql_res[9] == 0 assert sql_res[10] == 0 assert sql_res[11] == 89 * 24 * 60 * 60 assert sql_res[12] == 0 # No last rep data. assert sql_res[14] - sql_res[2] == 0 assert sql_res[13] == 5
def test_logs_new_5(self): self.database().update_card_after_log_import = (lambda x, y, z: 0) self.database().before_1x_log_import() filename = os.path.join(os.getcwd(), "tests", "files", "new_5.txt") ScienceLogParser(self.database()).parse(filename) assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.ADDED_CARD, )).fetchone()[0] == 1 assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.REPETITION, )).fetchone()[0] == 2 assert self.database().con.execute(\ "select acq_reps from log where event_type=? and object_id='9c8ce28e-1a4b-4148-8287-b8a7790d86d0.1.1'", (EventTypes.REPETITION, )).fetchone()[0] == 1 assert self.database().con.execute(\ "select ret_reps from log where event_type=? and object_id='9c8ce28e-1a4b-4148-8287-b8a7790d86d0.1.1'", (EventTypes.REPETITION, )).fetchone()[0] == 0 assert self.database().con.execute(\ "select acq_reps_since_lapse from log where event_type=? and object_id='9c8ce28e-1a4b-4148-8287-b8a7790d86d0.1.1'", (EventTypes.REPETITION, )).fetchone()[0] == 1 assert self.database().con.execute(\ """select acq_reps from log where event_type=? and object_id='9c8ce28e-1a4b-4148-8287-b8a7790d86d0.1.1' order by _id desc limit 1""", (EventTypes.REPETITION, )).fetchone()[0] == 2 assert self.database().con.execute(\ """select ret_reps from log where event_type=? and object_id='9c8ce28e-1a4b-4148-8287-b8a7790d86d0.1.1' order by _id desc limit 1""", (EventTypes.REPETITION, )).fetchone()[0] == 0 assert self.database().con.execute(\ """select acq_reps_since_lapse from log where event_type=? and object_id='9c8ce28e-1a4b-4148-8287-b8a7790d86d0.1.1' order by _id desc limit 1""", (EventTypes.REPETITION, )).fetchone()[0] == 2 assert self.database().con.execute(\ """select object_id from log where event_type=?""", (EventTypes.STARTED_SCHEDULER, )).fetchone()[0] == "SM2 Mnemosyne"
def test_logs_imported_1(self): self.database().update_card_after_log_import = (lambda x, y, z: 0) self.database().before_1x_log_import() filename = os.path.join(os.getcwd(), "tests", "files", "imported_1.txt") ScienceLogParser(self.database()).parse(filename) assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.ADDED_CARD, )).fetchone()[0] == 1 assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.REPETITION, )).fetchone()[0] == 3 assert self.database().con.execute(\ "select acq_reps from log where event_type=? and object_id='f5d9bbe7'", (EventTypes.REPETITION, )).fetchone()[0] == 1 assert self.database().con.execute(\ "select ret_reps from log where event_type=? and object_id='f5d9bbe7'", (EventTypes.REPETITION, )).fetchone()[0] == 0 assert self.database().con.execute(\ "select acq_reps_since_lapse from log where event_type=? and object_id='f5d9bbe7'", (EventTypes.REPETITION, )).fetchone()[0] == 1 assert self.database().con.execute(\ """select acq_reps from log where event_type=? and object_id='f5d9bbe7' order by _id desc limit 1""", (EventTypes.REPETITION, )).fetchone()[0] == 1 assert self.database().con.execute(\ """select ret_reps from log where event_type=? and object_id='f5d9bbe7' order by _id desc limit 1""", (EventTypes.REPETITION, )).fetchone()[0] == 2 assert self.database().con.execute(\ """select acq_reps_since_lapse from log where event_type=? and object_id='f5d9bbe7' order by _id desc limit 1""", (EventTypes.REPETITION, )).fetchone()[0] == 1
def test_restored_2(self): self.database().update_card_after_log_import = (lambda x, y, z: 0) self.database().before_1x_log_import() filename = os.path.join(os.getcwd(), "tests", "files", "restored_2.txt") ScienceLogParser(self.database()).parse(filename) assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.ADDED_CARD, )).fetchone()[0] == 1
def test_past_schedule(self): self.database().update_card_after_log_import = (lambda x, y, z: 0) self.database().before_1x_log_import() filename = os.path.join(os.getcwd(), "tests", "files", "schedule_1.txt") ScienceLogParser(self.database()).parse(filename) days_elapsed = datetime.date.today() - datetime.date(2009, 8, 15) assert self.scheduler().card_count_scheduled_n_days_from_now(\ -days_elapsed.days) == 124 assert self.scheduler().card_count_scheduled_n_days_from_now(-1) == 0
def test_logs_act_interval(self): self.database().update_card_after_log_import = (lambda x, y, z: 0) self.database().before_1x_log_import() filename = os.path.join(os.getcwd(), "tests", "files", "actinterval_1.txt") ScienceLogParser(self.database()).parse(filename) assert self.database().con.execute(\ """select actual_interval from log where event_type=? and object_id='f1300e5a' order by _id desc limit 1""", (EventTypes.REPETITION, )).fetchone()[0] == 5
def import_logs(self, filename): w = self.main_widget() db = self.database() w.set_progress_text(_("Importing history...")) log_dir = os.path.join(os.path.dirname(filename), "history") if not os.path.exists(log_dir): w.close_progress() w.show_information(_("No history found to import.")) return # The events that we import from the science logs obviously should not # be reexported to these logs (this is true for both the archived logs # and log.txt). So, before the import, we flush the SQL logs to the # science logs, and after the import we edit the partership index to # skip these entries. db.dump_to_science_log() # Manage database indexes. db.before_1x_log_import() filenames = [os.path.join(log_dir, logname) for logname in \ sorted(os.listdir(unicode(log_dir))) if logname.endswith(".bz2")] # log.txt can also contain data we need to import, especially on the # initial upgrade from 1.x. 'ids_to_parse' will make sure we only pick # up the relevant events. (If we do the importing after having used # 2.x for a while, there could be duplicate load events, etc, but these # don't matter.) filenames.append(os.path.join(os.path.dirname(filename), "log.txt")) w.set_progress_range(len(filenames)) ignored_files = [] parser = ScienceLogParser(self.database(), ids_to_parse=self.items_by_id, machine_id=self.config().machine_id()) for filename in filenames: try: parser.parse(filename) except: ignored_files.append(filename) w.increase_progress(1) if ignored_files: w.show_information(_("Ignoring unparsable files:<br/>") +\ '<br/>'.join(ignored_files)) # Manage database indexes. db.after_1x_log_import() db.skip_science_log()
def import_logs(self, filename): w = self.main_widget() db = self.database() w.set_progress_text(_("Importing history...")) log_dir = os.path.join(os.path.dirname(filename), "history") if not os.path.exists(log_dir): w.close_progress() w.show_information(_("No history found to import.")) return # The events that we import from the science logs obviously should not # be reexported to these logs (this is true for both the archived logs # and log.txt). So, before the import, we flush the SQL logs to the # science logs, and after the import we edit the partership index to # skip these entries. db.dump_to_science_log() # Manage database indexes. db.before_1x_log_import() filenames = [os.path.join(log_dir, logname) for logname in \ sorted(os.listdir(log_dir)) if logname.endswith(".bz2")] # log.txt can also contain data we need to import, especially on the # initial upgrade from 1.x. 'ids_to_parse' will make sure we only pick # up the relevant events. (If we do the importing after having used # 2.x for a while, there could be duplicate load events, etc, but these # don't matter.) filenames.append(os.path.join(os.path.dirname(filename), "log.txt")) w.set_progress_range(len(filenames)) ignored_files = [] parser = ScienceLogParser(self.database(), ids_to_parse=self.items_by_id, machine_id=self.config().machine_id()) for filename in filenames: try: parser.parse(filename) except: ignored_files.append(filename) w.increase_progress(1) if ignored_files: w.show_information(_("Ignoring unparsable files:<br/>") +\ '<br/>'.join(ignored_files)) # Manage database indexes. db.after_1x_log_import() db.skip_science_log()
def test_logs_corrupt_2(self): # Wrong data, isolated deletion event. self.database().update_card_after_log_import = (lambda x, y, z: 0) self.database().before_1x_log_import() filename = os.path.join(os.getcwd(), "tests", "files", "corrupt_2.txt") ScienceLogParser(self.database()).parse(filename) assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.ADDED_CARD, )).fetchone()[0] == 0 assert self.database().con.execute(\ "select count() from log where object_id=?", ("4b59b830", )).fetchone()[0] == 0
def test_logs_new_1(self): self.database().update_card_after_log_import = (lambda x, y, z: 0) self.database().before_1x_log_import() filename = os.path.join(os.getcwd(), "tests", "files", "new_1.txt") ScienceLogParser(self.database()).parse(filename) assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.ADDED_CARD, )).fetchone()[0] == 1 assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.REPETITION, )).fetchone()[0] == 10 assert self.database().con.execute(\ "select acq_reps from log where event_type=? and object_id='9525224f'", (EventTypes.REPETITION, )).fetchone()[0] == 1 assert self.database().con.execute(\ "select acq_reps_since_lapse from log where event_type=? and object_id='9525224f'", (EventTypes.REPETITION, )).fetchone()[0] == 1 assert self.database().con.execute(\ """select scheduled_interval from log where event_type=? and object_id='9525224f' order by _id desc limit 1""", (EventTypes.REPETITION, )).fetchone()[0] == (6)*60*60*24 assert self.database().con.execute(\ """select actual_interval from log where event_type=? and object_id='9525224f' order by _id desc limit 1""", (EventTypes.REPETITION, )).fetchone()[0] == 0 # This is an artificial log. timestamp = self.database().con.execute(\ """select timestamp from log where event_type=? and object_id='9525224f' order by _id desc limit 1""", (EventTypes.REPETITION, )).fetchone()[0] next_rep = self.database().con.execute(\ """select next_rep from log where event_type=? and object_id='9525224f' order by _id desc limit 1""", (EventTypes.REPETITION, )).fetchone()[0] assert next_rep - timestamp == (14-3)*60*60*24 assert self.database().con.execute(\ "select count() from log").fetchone()[0] == 25 assert self.database().con.execute(\ "select acq_reps from log where event_type=? order by _id desc limit 1", (EventTypes.LOADED_DATABASE, )).fetchone()[0] == 0 assert self.database().con.execute(\ "select ret_reps from log where event_type=? order by _id desc limit 1", (EventTypes.LOADED_DATABASE, )).fetchone()[0] == 7 assert self.database().con.execute(\ "select lapses from log where event_type=? order by _id desc limit 1", (EventTypes.LOADED_DATABASE, )).fetchone()[0] == 336 assert self.database().con.execute(\ "select acq_reps from log where event_type=? order by _id desc limit 1", (EventTypes.SAVED_DATABASE, )).fetchone()[0] == 0 assert self.database().con.execute(\ "select ret_reps from log where event_type=? order by _id desc limit 1", (EventTypes.SAVED_DATABASE, )).fetchone()[0] == 12 assert self.database().con.execute(\ "select lapses from log where event_type=? order by _id desc limit 1", (EventTypes.SAVED_DATABASE, )).fetchone()[0] == 341
def test_score(self): self.database().update_card_after_log_import = (lambda x, y, z: 0) self.database().before_1x_log_import() filename = os.path.join(os.getcwd(), "tests", "files", "score_1.txt") ScienceLogParser(self.database()).parse(filename) days_elapsed = datetime.date.today() - datetime.date(2009, 8, 17) assert self.database().retention_score_n_days_ago(days_elapsed.days) \ == 5/7.*100 assert self.database().retention_score_n_days_ago(0) == 0 from mnemosyne.libmnemosyne.statistics_pages.retention_score import RetentionScore page = RetentionScore(self.mnemosyne.component_manager) for i in range(1, 6): page.prepare_statistics(i)
def test_logs_new_2(self): self.database().update_card_after_log_import = (lambda x, y, z: 0) self.database().before_1x_log_import() filename = os.path.join(os.getcwd(), "tests", "files", "new_2.txt") ScienceLogParser(self.database()).parse(filename) assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.ADDED_CARD, )).fetchone()[0] == 1 assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.REPETITION, )).fetchone()[0] == 1 assert self.database().con.execute(\ "select acq_reps from log where event_type=? and object_id='8da62cfb'", (EventTypes.REPETITION, )).fetchone()[0] == 1 assert self.database().con.execute(\ "select acq_reps_since_lapse from log where event_type=? and object_id='8da62cfb'", (EventTypes.REPETITION, )).fetchone()[0] == 1
def test_logs_new_6(self): self.database().update_card_after_log_import = (lambda x, y, z: 0) self.database().before_1x_log_import() filename = os.path.join(os.getcwd(), "tests", "files", "new_6.txt") ScienceLogParser(self.database()).parse(filename) assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.ADDED_CARD, )).fetchone()[0] == 1 assert self.database().con.execute(\ "select count() from log where event_type=?", (EventTypes.REPETITION, )).fetchone()[0] == 2 sql_res = self.database().con.execute(\ "select * from log where event_type=? and object_id='4c53e29a-f9e9-498b-8beb-d3a494f61bca.1.1'", (EventTypes.REPETITION, )).fetchone() assert sql_res[4] == 5 assert sql_res[5] == 2.5 assert sql_res[6] == 1 assert sql_res[7] == 0 assert sql_res[8] == 0 assert sql_res[9] == 1 assert sql_res[10] == 0 assert sql_res[11] == 0 assert sql_res[12] == 0 assert sql_res[14] - sql_res[2] == 345600 assert sql_res[13] == 0 sql_res = self.database().con.execute(\ """select * from log where event_type=? and object_id='4c53e29a-f9e9-498b-8beb-d3a494f61bca.1.1' order by _id desc limit 1""", (EventTypes.REPETITION, )).fetchone() assert sql_res[4] == 2 assert sql_res[5] == 2.5 assert sql_res[6] == 1 assert sql_res[7] == 1 assert sql_res[8] == 0 assert sql_res[9] == 1 assert sql_res[10] == 1 assert sql_res[11] == 302986 assert sql_res[12] == 10 assert sql_res[14] - sql_res[2] == 475774 assert sql_res[13] == 1
class LogDatabase(object): MAX_BEFORE_COMMIT = 100 def __init__(self, log_dir): self.log_dir = log_dir self.parser = ScienceLogParser(database=self) self.parsed_logs = [] self.log = [] def parse_directory(self): self._delete_indexes() # Takes too long while parsing. filenames = [os.path.join(self.log_dir, filename) for filename in \ sorted(os.listdir(str(self.log_dir))) if \ filename.endswith(".bz2")] filenames_count = len(filenames) i = 0 for counter, filename in enumerate(filenames): log_name = os.path.basename(filename) entry = ParsedLogs.objects.filter(log_name=log_name).first() if entry is not None: print("(%d/%d) %1.1f%% %s already parsed" % \ (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, os.path.basename(filename))) continue print("(%d/%d) %1.1f%% %s" % (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, os.path.basename(filename))) interrupted = False try: interrupted = self.parser.parse(filename) i += 1 except KeyboardInterrupt: interrupted = True except: print("Can't open file, ignoring.") if interrupted: print("Interrupted!") # self.commit() exit() self.parsed_logs.append(ParsedLogs(log_name=log_name)) if i >= self.MAX_BEFORE_COMMIT: print("Committing...", flush=True, end=' ') self.commit() print("Done!") i = 0 self.commit() self._create_indexes() def commit(self): try: with transaction.atomic(): ParsedLogs.objects.bulk_create(self.parsed_logs) Log.objects.bulk_create(self.log) self.parsed_logs = [] self.log = [] except KeyboardInterrupt: transaction.rollback() print("Interrupted! Rolling back") exit() def _delete_indexes(self): pass # self.con.execute("drop index if exists i_log_timestamp;") # self.con.execute("drop index if exists i_log_user_id;") # self.con.execute("drop index if exists i_log_object_id;") def _create_indexes(self): pass # self.con.execute("create index i_log_timestamp on log (timestamp);") # self.con.execute("create index i_log_user_id on log (user_id);") # self.con.execute("create index i_log_object_id on log (object_id);") def log_started_program(self, timestamp, program_name_version): # self.con.execute( # """insert into log(user_id, event, timestamp, object_id) # values(?,?,?,?)""", # (self.parser.user_id, EventTypes.STARTED_PROGRAM, int(timestamp), # program_name_version)) pass def log_stopped_program(self, timestamp): # self.log.append( # Log(user_id=self.parser.user_id, # event=EventTypes.STOPPED_PROGRAM, # timestamp=int(timestamp)) # ) # self.con.execute( # "insert into log(user_id, event, timestamp) values(?,?,?)", # (self.parser.user_id, EventTypes.STOPPED_PROGRAM, int(timestamp))) pass def log_started_scheduler(self, timestamp, scheduler_name): # self.log.append( # Log(user_id=self.parser.user_id, # event=EventTypes.STARTED_SCHEDULER, # timestamp=int(timestamp), # object_id=scheduler_name) # ) # self.con.execute( # """insert into log(user_id, event, timestamp, object_id) # values(?,?,?,?)""", # (self.parser.user_id, EventTypes.STARTED_SCHEDULER, int(timestamp), # scheduler_name)) pass def log_loaded_database(self, timestamp, machine_id, scheduled_count, non_memorised_count, active_count): pass # self.log.append( # Log(user_id=self.parser.user_id, # event=EventTypes.LOADED_DATABASE, # timestamp=int(timestamp), # object_id=machine_id, # acq_reps=scheduled_count, # ret_reps=non_memorised_count, # lapses=active_count # ) # ) # self.con.execute( # """insert into log(user_id, event, timestamp, object_id, acq_reps, # ret_reps, lapses) values(?,?,?,?,?,?,?)""", # (self.parser.user_id, EventTypes.LOADED_DATABASE, int(timestamp), # machine_id, scheduled_count, non_memorised_count, active_count)) def log_saved_database(self, timestamp, machine_id, scheduled_count, non_memorised_count, active_count): # self.con.execute( # """insert into log(user_id, event, timestamp, object_id, acq_reps, # ret_reps, lapses) values(?,?,?,?,?,?,?)""", # (self.parser.user_id, EventTypes.SAVED_DATABASE, int(timestamp), # machine_id, scheduled_count, non_memorised_count, active_count)) pass def log_added_card(self, timestamp, card_id): pass # self.con.execute( # """insert into log(user_id, event, timestamp, object_id) # values(?,?,?,?)""", # (self.parser.user_id, EventTypes.ADDED_CARD, int(timestamp), card_id)) def log_deleted_card(self, timestamp, card_id): pass # self.con.execute( # """insert into log(user_id, event, timestamp, object_id) # values(?,?,?,?)""", # (self.parser.user_id, EventTypes.DELETED_CARD, int(timestamp), card_id)) def log_repetition(self, timestamp, card_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, thinking_time, next_rep, scheduler_data): # self.con.execute(\ # """insert into log(user_id, event, timestamp, object_id, grade, # easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, # ret_reps_since_lapse, scheduled_interval, actual_interval, # thinking_time, next_rep) # values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", # (self.parser.user_id, EventTypes.REPETITION, int(timestamp), card_id, # grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, # ret_reps_since_lapse, scheduled_interval, actual_interval, # int(thinking_time), next_rep)) self.log.append( Log(user_id=self.parser.user_id, event=EventTypes.REPETITION, timestamp=int(timestamp), object_id=card_id, grade=grade, easiness=easiness, acq_reps=acq_reps, ret_reps=ret_reps, lapses=lapses, acq_reps_since_lapse=acq_reps_since_lapse, ret_reps_since_lapse=ret_reps_since_lapse, scheduled_interval=scheduled_interval, actual_interval=actual_interval, thinking_time=int(thinking_time), next_rep=next_rep)) def set_offset_last_rep(self, card_id, offset, last_rep): pass # self.con.execute( # """insert or replace into _cards(id, offset, last_rep) # values(?,?,?)""", # (card_id + self.parser.user_id, offset, int(last_rep))) def offset_last_rep(self, card_id): pass # sql_result = self.con.execute("""select offset, last_rep # from _cards where _cards.id=?""", # (card_id + self.parser.user_id, )).fetchone() # return sql_result["offset"], sql_result["last_rep"] def update_card_after_log_import(self, id, creation_time, offset): pass
class LogDatabase(object): def __init__(self, log_dir): self.log_dir = log_dir self._connection = None db_name = os.path.join(self.log_dir, "logs.db") initialisation_needed = not os.path.exists(db_name) self.con = sqlite3.connect(db_name, timeout=0.1, isolation_level="EXCLUSIVE") self.con.row_factory = sqlite3.Row if initialisation_needed: self.con.executescript(SCHEMA) def parse_directory(self): self.parser = ScienceLogParser(database=self) self._delete_indexes() # Takes too long while parsing. filenames = [os.path.join(self.log_dir, filename) for filename in \ sorted(os.listdir(unicode(self.log_dir))) if \ filename.endswith(".bz2")] filenames_count = len(filenames) for counter, filename in enumerate(filenames): sys.stdout.flush() if self.con.execute(\ "select log_name from parsed_logs where parsed_logs.log_name=?", (os.path.basename(filename), )).fetchone() is not None: print "(%d/%d) %1.1f%% %s already parsed" % \ (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, \ os.path.basename(filename)) continue print "(%d/%d) %1.1f%% %s" % (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, \ os.path.basename(filename)) try: self.parser.parse(filename) except KeyboardInterrupt: print "Interrupted!" self.con.commit() exit() except: print "Can't open file, ignoring." self.con.execute("insert into parsed_logs(log_name) values(?)", (os.path.basename(filename), )) self.con.commit() self._create_indexes() def _delete_indexes(self): self.con.execute("drop index if exists i_log_timestamp;") self.con.execute("drop index if exists i_log_user_id;") self.con.execute("drop index if exists i_log_object_id;") def _create_indexes(self): self.con.execute("create index i_log_timestamp on log (timestamp);") self.con.execute("create index i_log_user_id on log (user_id);") self.con.execute("create index i_log_object_id on log (object_id);") def log_started_program(self, timestamp, program_name_version): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.STARTED_PROGRAM, int(timestamp), program_name_version)) def log_stopped_program(self, timestamp): self.con.execute(\ "insert into log(user_id, event, timestamp) values(?,?,?)", (self.parser.user_id, EventTypes.STOPPED_PROGRAM, int(timestamp))) def log_started_scheduler(self, timestamp, scheduler_name): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.STARTED_SCHEDULER, int(timestamp), scheduler_name)) def log_loaded_database(self, timestamp, machine_id, scheduled_count, non_memorised_count, active_count): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id, acq_reps, ret_reps, lapses) values(?,?,?,?,?,?,?)""", (self.parser.user_id, EventTypes.LOADED_DATABASE, int(timestamp), machine_id, scheduled_count, non_memorised_count, active_count)) def log_saved_database(self, timestamp, machine_id, scheduled_count, non_memorised_count, active_count): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id, acq_reps, ret_reps, lapses) values(?,?,?,?,?,?,?)""", (self.parser.user_id, EventTypes.SAVED_DATABASE, int(timestamp), machine_id, scheduled_count, non_memorised_count, active_count)) def log_added_card(self, timestamp, card_id): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.ADDED_CARD, int(timestamp), card_id)) def log_deleted_card(self, timestamp, card_id): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.DELETED_CARD, int(timestamp), card_id)) def log_repetition(self, timestamp, card_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, thinking_time, next_rep, scheduler_data): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, thinking_time, next_rep) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", (self.parser.user_id, EventTypes.REPETITION, int(timestamp), card_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, int(thinking_time), next_rep)) def set_offset_last_rep(self, card_id, offset, last_rep): self.con.execute(\ """insert or replace into _cards(id, offset, last_rep) values(?,?,?)""", (card_id + self.parser.user_id, offset, int(last_rep))) def offset_last_rep(self, card_id): sql_result = self.con.execute( """select offset, last_rep from _cards where _cards.id=?""", (card_id + self.parser.user_id, )).fetchone() return sql_result["offset"], sql_result["last_rep"] def update_card_after_log_import(self, id, creation_time, offset): pass def dump_reps_to_txt_file(self, filename): f = file(filename, "w") for cursor in self.con.execute("select * from log"): print >> f, cursor["user_id"], \ time.strftime("%Y-%m-%d %H:%M:%S", \ time.localtime(cursor["timestamp"])), \ cursor["object_id"], cursor["grade"], \ cursor["easiness"], cursor["acq_reps"], \ cursor["ret_reps"], cursor["lapses"], \ cursor["acq_reps_since_lapse"], \ cursor["ret_reps_since_lapse"], \ cursor["scheduled_interval"], cursor["actual_interval"], \ cursor["thinking_time"], \ time.strftime("%Y-%m-%d %H:%M:%S", \ time.localtime(cursor["next_rep"])), \ cursor["event"]
class LogDatabase(object): def __init__(self, log_dir): self.log_dir = log_dir self._connection = None db_name = os.path.join(self.log_dir, "logs.db") initialisation_needed = not os.path.exists(db_name) self.con = sqlite3.connect(db_name, timeout=0.1, isolation_level="EXCLUSIVE") self.con.row_factory = sqlite3.Row if initialisation_needed: self.con.executescript(SCHEMA) def parse_directory(self): self.parser = ScienceLogParser(database=self) self._delete_indexes() # Takes too long while parsing. filenames = [os.path.join(self.log_dir, filename) for filename in \ sorted(os.listdir(unicode(self.log_dir))) if \ filename.endswith(".bz2")] filenames_count = len(filenames) for counter, filename in enumerate(filenames): sys.stdout.flush() if self.con.execute(\ "select log_name from parsed_logs where parsed_logs.log_name=?", (os.path.basename(filename), )).fetchone() is not None: print "(%d/%d) %1.1f%% %s already parsed" % \ (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, \ os.path.basename(filename)) continue print "(%d/%d) %1.1f%% %s" % (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, \ os.path.basename(filename)) try: self.parser.parse(filename) except KeyboardInterrupt: print "Interrupted!" self.con.commit() exit() except: print "Can't open file, ignoring." self.con.execute("insert into parsed_logs(log_name) values(?)", (os.path.basename(filename), )) self.con.commit() self._create_indexes() def _delete_indexes(self): self.con.execute("drop index if exists i_log_timestamp;") self.con.execute("drop index if exists i_log_user_id;") self.con.execute("drop index if exists i_log_object_id;") def _create_indexes(self): self.con.execute("create index i_log_timestamp on log (timestamp);") self.con.execute("create index i_log_user_id on log (user_id);") self.con.execute("create index i_log_object_id on log (object_id);") def log_started_program(self, timestamp, program_name_version): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.STARTED_PROGRAM, int(timestamp), program_name_version)) def log_stopped_program(self, timestamp): self.con.execute(\ "insert into log(user_id, event, timestamp) values(?,?,?)", (self.parser.user_id, EventTypes.STOPPED_PROGRAM, int(timestamp))) def log_started_scheduler(self, timestamp, scheduler_name): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.STARTED_SCHEDULER, int(timestamp), scheduler_name)) def log_loaded_database(self, timestamp, machine_id, scheduled_count, non_memorised_count, active_count): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id, acq_reps, ret_reps, lapses) values(?,?,?,?,?,?,?)""", (self.parser.user_id, EventTypes.LOADED_DATABASE, int(timestamp), machine_id, scheduled_count, non_memorised_count, active_count)) def log_saved_database(self, timestamp, machine_id, scheduled_count, non_memorised_count, active_count): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id, acq_reps, ret_reps, lapses) values(?,?,?,?,?,?,?)""", (self.parser.user_id, EventTypes.SAVED_DATABASE, int(timestamp), machine_id, scheduled_count, non_memorised_count, active_count)) def log_added_card(self, timestamp, card_id): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.ADDED_CARD, int(timestamp), card_id)) def log_deleted_card(self, timestamp, card_id): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.DELETED_CARD, int(timestamp), card_id)) def log_repetition(self, timestamp, card_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, thinking_time, next_rep, scheduler_data): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, thinking_time, next_rep) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", (self.parser.user_id, EventTypes.REPETITION, int(timestamp), card_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, int(thinking_time), next_rep)) def set_offset_last_rep(self, card_id, offset, last_rep): self.con.execute(\ """insert or replace into _cards(id, offset, last_rep) values(?,?,?)""", (card_id + self.parser.user_id, offset, int(last_rep))) def offset_last_rep(self, card_id): sql_result = self.con.execute("""select offset, last_rep from _cards where _cards.id=?""", (card_id + self.parser.user_id, )).fetchone() return sql_result["offset"], sql_result["last_rep"] def update_card_after_log_import(self, id, creation_time, offset): pass def dump_reps_to_txt_file(self, filename): f = file(filename, "w") for cursor in self.con.execute("select * from log"): print >> f, cursor["user_id"], \ time.strftime("%Y-%m-%d %H:%M:%S", \ time.localtime(cursor["timestamp"])), \ cursor["object_id"], cursor["grade"], \ cursor["easiness"], cursor["acq_reps"], \ cursor["ret_reps"], cursor["lapses"], \ cursor["acq_reps_since_lapse"], \ cursor["ret_reps_since_lapse"], \ cursor["scheduled_interval"], cursor["actual_interval"], \ cursor["thinking_time"], \ time.strftime("%Y-%m-%d %H:%M:%S", \ time.localtime(cursor["next_rep"])), \ cursor["event"]
def __init__(self, log_dir): self.log_dir = log_dir self.parser = ScienceLogParser(database=self) self.parsed_logs = [] self.log = []