def import_logs(self, filename): w = self.main_widget() db = self.database() w.set_progress_text(_("Importing history...")) log_dir = os.path.join(os.path.dirname(filename), "history") if not os.path.exists(log_dir): w.close_progress() w.show_information(_("No history found to import.")) return # The events that we import from the science logs obviously should not # be reexported to these logs (this is true for both the archived logs # and log.txt). So, before the import, we flush the SQL logs to the # science logs, and after the import we edit the partership index to # skip these entries. db.dump_to_science_log() # Manage database indexes. db.before_1x_log_import() filenames = [os.path.join(log_dir, logname) for logname in \ sorted(os.listdir(log_dir)) if logname.endswith(".bz2")] # log.txt can also contain data we need to import, especially on the # initial upgrade from 1.x. 'ids_to_parse' will make sure we only pick # up the relevant events. (If we do the importing after having used # 2.x for a while, there could be duplicate load events, etc, but these # don't matter.) filenames.append(os.path.join(os.path.dirname(filename), "log.txt")) w.set_progress_range(len(filenames)) ignored_files = [] parser = ScienceLogParser(self.database(), ids_to_parse=self.items_by_id, machine_id=self.config().machine_id()) for filename in filenames: try: parser.parse(filename) except: ignored_files.append(filename) w.increase_progress(1) if ignored_files: w.show_information(_("Ignoring unparsable files:<br/>") +\ '<br/>'.join(ignored_files)) # Manage database indexes. db.after_1x_log_import() db.skip_science_log()
def import_logs(self, filename): w = self.main_widget() db = self.database() w.set_progress_text(_("Importing history...")) log_dir = os.path.join(os.path.dirname(filename), "history") if not os.path.exists(log_dir): w.close_progress() w.show_information(_("No history found to import.")) return # The events that we import from the science logs obviously should not # be reexported to these logs (this is true for both the archived logs # and log.txt). So, before the import, we flush the SQL logs to the # science logs, and after the import we edit the partership index to # skip these entries. db.dump_to_science_log() # Manage database indexes. db.before_1x_log_import() filenames = [os.path.join(log_dir, logname) for logname in \ sorted(os.listdir(unicode(log_dir))) if logname.endswith(".bz2")] # log.txt can also contain data we need to import, especially on the # initial upgrade from 1.x. 'ids_to_parse' will make sure we only pick # up the relevant events. (If we do the importing after having used # 2.x for a while, there could be duplicate load events, etc, but these # don't matter.) filenames.append(os.path.join(os.path.dirname(filename), "log.txt")) w.set_progress_range(len(filenames)) ignored_files = [] parser = ScienceLogParser(self.database(), ids_to_parse=self.items_by_id, machine_id=self.config().machine_id()) for filename in filenames: try: parser.parse(filename) except: ignored_files.append(filename) w.increase_progress(1) if ignored_files: w.show_information(_("Ignoring unparsable files:<br/>") +\ '<br/>'.join(ignored_files)) # Manage database indexes. db.after_1x_log_import() db.skip_science_log()
class LogDatabase(object): def __init__(self, log_dir): self.log_dir = log_dir self._connection = None db_name = os.path.join(self.log_dir, "logs.db") initialisation_needed = not os.path.exists(db_name) self.con = sqlite3.connect(db_name, timeout=0.1, isolation_level="EXCLUSIVE") self.con.row_factory = sqlite3.Row if initialisation_needed: self.con.executescript(SCHEMA) def parse_directory(self): self.parser = ScienceLogParser(database=self) self._delete_indexes() # Takes too long while parsing. filenames = [os.path.join(self.log_dir, filename) for filename in \ sorted(os.listdir(unicode(self.log_dir))) if \ filename.endswith(".bz2")] filenames_count = len(filenames) for counter, filename in enumerate(filenames): sys.stdout.flush() if self.con.execute(\ "select log_name from parsed_logs where parsed_logs.log_name=?", (os.path.basename(filename), )).fetchone() is not None: print "(%d/%d) %1.1f%% %s already parsed" % \ (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, \ os.path.basename(filename)) continue print "(%d/%d) %1.1f%% %s" % (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, \ os.path.basename(filename)) try: self.parser.parse(filename) except KeyboardInterrupt: print "Interrupted!" self.con.commit() exit() except: print "Can't open file, ignoring." self.con.execute("insert into parsed_logs(log_name) values(?)", (os.path.basename(filename), )) self.con.commit() self._create_indexes() def _delete_indexes(self): self.con.execute("drop index if exists i_log_timestamp;") self.con.execute("drop index if exists i_log_user_id;") self.con.execute("drop index if exists i_log_object_id;") def _create_indexes(self): self.con.execute("create index i_log_timestamp on log (timestamp);") self.con.execute("create index i_log_user_id on log (user_id);") self.con.execute("create index i_log_object_id on log (object_id);") def log_started_program(self, timestamp, program_name_version): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.STARTED_PROGRAM, int(timestamp), program_name_version)) def log_stopped_program(self, timestamp): self.con.execute(\ "insert into log(user_id, event, timestamp) values(?,?,?)", (self.parser.user_id, EventTypes.STOPPED_PROGRAM, int(timestamp))) def log_started_scheduler(self, timestamp, scheduler_name): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.STARTED_SCHEDULER, int(timestamp), scheduler_name)) def log_loaded_database(self, timestamp, machine_id, scheduled_count, non_memorised_count, active_count): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id, acq_reps, ret_reps, lapses) values(?,?,?,?,?,?,?)""", (self.parser.user_id, EventTypes.LOADED_DATABASE, int(timestamp), machine_id, scheduled_count, non_memorised_count, active_count)) def log_saved_database(self, timestamp, machine_id, scheduled_count, non_memorised_count, active_count): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id, acq_reps, ret_reps, lapses) values(?,?,?,?,?,?,?)""", (self.parser.user_id, EventTypes.SAVED_DATABASE, int(timestamp), machine_id, scheduled_count, non_memorised_count, active_count)) def log_added_card(self, timestamp, card_id): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.ADDED_CARD, int(timestamp), card_id)) def log_deleted_card(self, timestamp, card_id): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.DELETED_CARD, int(timestamp), card_id)) def log_repetition(self, timestamp, card_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, thinking_time, next_rep, scheduler_data): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, thinking_time, next_rep) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", (self.parser.user_id, EventTypes.REPETITION, int(timestamp), card_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, int(thinking_time), next_rep)) def set_offset_last_rep(self, card_id, offset, last_rep): self.con.execute(\ """insert or replace into _cards(id, offset, last_rep) values(?,?,?)""", (card_id + self.parser.user_id, offset, int(last_rep))) def offset_last_rep(self, card_id): sql_result = self.con.execute( """select offset, last_rep from _cards where _cards.id=?""", (card_id + self.parser.user_id, )).fetchone() return sql_result["offset"], sql_result["last_rep"] def update_card_after_log_import(self, id, creation_time, offset): pass def dump_reps_to_txt_file(self, filename): f = file(filename, "w") for cursor in self.con.execute("select * from log"): print >> f, cursor["user_id"], \ time.strftime("%Y-%m-%d %H:%M:%S", \ time.localtime(cursor["timestamp"])), \ cursor["object_id"], cursor["grade"], \ cursor["easiness"], cursor["acq_reps"], \ cursor["ret_reps"], cursor["lapses"], \ cursor["acq_reps_since_lapse"], \ cursor["ret_reps_since_lapse"], \ cursor["scheduled_interval"], cursor["actual_interval"], \ cursor["thinking_time"], \ time.strftime("%Y-%m-%d %H:%M:%S", \ time.localtime(cursor["next_rep"])), \ cursor["event"]
class LogDatabase(object): def __init__(self, log_dir): self.log_dir = log_dir self._connection = None db_name = os.path.join(self.log_dir, "logs.db") initialisation_needed = not os.path.exists(db_name) self.con = sqlite3.connect(db_name, timeout=0.1, isolation_level="EXCLUSIVE") self.con.row_factory = sqlite3.Row if initialisation_needed: self.con.executescript(SCHEMA) def parse_directory(self): self.parser = ScienceLogParser(database=self) self._delete_indexes() # Takes too long while parsing. filenames = [os.path.join(self.log_dir, filename) for filename in \ sorted(os.listdir(unicode(self.log_dir))) if \ filename.endswith(".bz2")] filenames_count = len(filenames) for counter, filename in enumerate(filenames): sys.stdout.flush() if self.con.execute(\ "select log_name from parsed_logs where parsed_logs.log_name=?", (os.path.basename(filename), )).fetchone() is not None: print "(%d/%d) %1.1f%% %s already parsed" % \ (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, \ os.path.basename(filename)) continue print "(%d/%d) %1.1f%% %s" % (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, \ os.path.basename(filename)) try: self.parser.parse(filename) except KeyboardInterrupt: print "Interrupted!" self.con.commit() exit() except: print "Can't open file, ignoring." self.con.execute("insert into parsed_logs(log_name) values(?)", (os.path.basename(filename), )) self.con.commit() self._create_indexes() def _delete_indexes(self): self.con.execute("drop index if exists i_log_timestamp;") self.con.execute("drop index if exists i_log_user_id;") self.con.execute("drop index if exists i_log_object_id;") def _create_indexes(self): self.con.execute("create index i_log_timestamp on log (timestamp);") self.con.execute("create index i_log_user_id on log (user_id);") self.con.execute("create index i_log_object_id on log (object_id);") def log_started_program(self, timestamp, program_name_version): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.STARTED_PROGRAM, int(timestamp), program_name_version)) def log_stopped_program(self, timestamp): self.con.execute(\ "insert into log(user_id, event, timestamp) values(?,?,?)", (self.parser.user_id, EventTypes.STOPPED_PROGRAM, int(timestamp))) def log_started_scheduler(self, timestamp, scheduler_name): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.STARTED_SCHEDULER, int(timestamp), scheduler_name)) def log_loaded_database(self, timestamp, machine_id, scheduled_count, non_memorised_count, active_count): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id, acq_reps, ret_reps, lapses) values(?,?,?,?,?,?,?)""", (self.parser.user_id, EventTypes.LOADED_DATABASE, int(timestamp), machine_id, scheduled_count, non_memorised_count, active_count)) def log_saved_database(self, timestamp, machine_id, scheduled_count, non_memorised_count, active_count): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id, acq_reps, ret_reps, lapses) values(?,?,?,?,?,?,?)""", (self.parser.user_id, EventTypes.SAVED_DATABASE, int(timestamp), machine_id, scheduled_count, non_memorised_count, active_count)) def log_added_card(self, timestamp, card_id): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.ADDED_CARD, int(timestamp), card_id)) def log_deleted_card(self, timestamp, card_id): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id) values(?,?,?,?)""", (self.parser.user_id, EventTypes.DELETED_CARD, int(timestamp), card_id)) def log_repetition(self, timestamp, card_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, thinking_time, next_rep, scheduler_data): self.con.execute(\ """insert into log(user_id, event, timestamp, object_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, thinking_time, next_rep) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", (self.parser.user_id, EventTypes.REPETITION, int(timestamp), card_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, int(thinking_time), next_rep)) def set_offset_last_rep(self, card_id, offset, last_rep): self.con.execute(\ """insert or replace into _cards(id, offset, last_rep) values(?,?,?)""", (card_id + self.parser.user_id, offset, int(last_rep))) def offset_last_rep(self, card_id): sql_result = self.con.execute("""select offset, last_rep from _cards where _cards.id=?""", (card_id + self.parser.user_id, )).fetchone() return sql_result["offset"], sql_result["last_rep"] def update_card_after_log_import(self, id, creation_time, offset): pass def dump_reps_to_txt_file(self, filename): f = file(filename, "w") for cursor in self.con.execute("select * from log"): print >> f, cursor["user_id"], \ time.strftime("%Y-%m-%d %H:%M:%S", \ time.localtime(cursor["timestamp"])), \ cursor["object_id"], cursor["grade"], \ cursor["easiness"], cursor["acq_reps"], \ cursor["ret_reps"], cursor["lapses"], \ cursor["acq_reps_since_lapse"], \ cursor["ret_reps_since_lapse"], \ cursor["scheduled_interval"], cursor["actual_interval"], \ cursor["thinking_time"], \ time.strftime("%Y-%m-%d %H:%M:%S", \ time.localtime(cursor["next_rep"])), \ cursor["event"]
class LogDatabase(object): MAX_BEFORE_COMMIT = 100 def __init__(self, log_dir): self.log_dir = log_dir self.parser = ScienceLogParser(database=self) self.parsed_logs = [] self.log = [] def parse_directory(self): self._delete_indexes() # Takes too long while parsing. filenames = [os.path.join(self.log_dir, filename) for filename in \ sorted(os.listdir(str(self.log_dir))) if \ filename.endswith(".bz2")] filenames_count = len(filenames) i = 0 for counter, filename in enumerate(filenames): log_name = os.path.basename(filename) entry = ParsedLogs.objects.filter(log_name=log_name).first() if entry is not None: print("(%d/%d) %1.1f%% %s already parsed" % \ (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, os.path.basename(filename))) continue print("(%d/%d) %1.1f%% %s" % (counter + 1, filenames_count, (counter + 1.) / filenames_count * 100, os.path.basename(filename))) interrupted = False try: interrupted = self.parser.parse(filename) i += 1 except KeyboardInterrupt: interrupted = True except: print("Can't open file, ignoring.") if interrupted: print("Interrupted!") # self.commit() exit() self.parsed_logs.append(ParsedLogs(log_name=log_name)) if i >= self.MAX_BEFORE_COMMIT: print("Committing...", flush=True, end=' ') self.commit() print("Done!") i = 0 self.commit() self._create_indexes() def commit(self): try: with transaction.atomic(): ParsedLogs.objects.bulk_create(self.parsed_logs) Log.objects.bulk_create(self.log) self.parsed_logs = [] self.log = [] except KeyboardInterrupt: transaction.rollback() print("Interrupted! Rolling back") exit() def _delete_indexes(self): pass # self.con.execute("drop index if exists i_log_timestamp;") # self.con.execute("drop index if exists i_log_user_id;") # self.con.execute("drop index if exists i_log_object_id;") def _create_indexes(self): pass # self.con.execute("create index i_log_timestamp on log (timestamp);") # self.con.execute("create index i_log_user_id on log (user_id);") # self.con.execute("create index i_log_object_id on log (object_id);") def log_started_program(self, timestamp, program_name_version): # self.con.execute( # """insert into log(user_id, event, timestamp, object_id) # values(?,?,?,?)""", # (self.parser.user_id, EventTypes.STARTED_PROGRAM, int(timestamp), # program_name_version)) pass def log_stopped_program(self, timestamp): # self.log.append( # Log(user_id=self.parser.user_id, # event=EventTypes.STOPPED_PROGRAM, # timestamp=int(timestamp)) # ) # self.con.execute( # "insert into log(user_id, event, timestamp) values(?,?,?)", # (self.parser.user_id, EventTypes.STOPPED_PROGRAM, int(timestamp))) pass def log_started_scheduler(self, timestamp, scheduler_name): # self.log.append( # Log(user_id=self.parser.user_id, # event=EventTypes.STARTED_SCHEDULER, # timestamp=int(timestamp), # object_id=scheduler_name) # ) # self.con.execute( # """insert into log(user_id, event, timestamp, object_id) # values(?,?,?,?)""", # (self.parser.user_id, EventTypes.STARTED_SCHEDULER, int(timestamp), # scheduler_name)) pass def log_loaded_database(self, timestamp, machine_id, scheduled_count, non_memorised_count, active_count): pass # self.log.append( # Log(user_id=self.parser.user_id, # event=EventTypes.LOADED_DATABASE, # timestamp=int(timestamp), # object_id=machine_id, # acq_reps=scheduled_count, # ret_reps=non_memorised_count, # lapses=active_count # ) # ) # self.con.execute( # """insert into log(user_id, event, timestamp, object_id, acq_reps, # ret_reps, lapses) values(?,?,?,?,?,?,?)""", # (self.parser.user_id, EventTypes.LOADED_DATABASE, int(timestamp), # machine_id, scheduled_count, non_memorised_count, active_count)) def log_saved_database(self, timestamp, machine_id, scheduled_count, non_memorised_count, active_count): # self.con.execute( # """insert into log(user_id, event, timestamp, object_id, acq_reps, # ret_reps, lapses) values(?,?,?,?,?,?,?)""", # (self.parser.user_id, EventTypes.SAVED_DATABASE, int(timestamp), # machine_id, scheduled_count, non_memorised_count, active_count)) pass def log_added_card(self, timestamp, card_id): pass # self.con.execute( # """insert into log(user_id, event, timestamp, object_id) # values(?,?,?,?)""", # (self.parser.user_id, EventTypes.ADDED_CARD, int(timestamp), card_id)) def log_deleted_card(self, timestamp, card_id): pass # self.con.execute( # """insert into log(user_id, event, timestamp, object_id) # values(?,?,?,?)""", # (self.parser.user_id, EventTypes.DELETED_CARD, int(timestamp), card_id)) def log_repetition(self, timestamp, card_id, grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, ret_reps_since_lapse, scheduled_interval, actual_interval, thinking_time, next_rep, scheduler_data): # self.con.execute(\ # """insert into log(user_id, event, timestamp, object_id, grade, # easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, # ret_reps_since_lapse, scheduled_interval, actual_interval, # thinking_time, next_rep) # values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", # (self.parser.user_id, EventTypes.REPETITION, int(timestamp), card_id, # grade, easiness, acq_reps, ret_reps, lapses, acq_reps_since_lapse, # ret_reps_since_lapse, scheduled_interval, actual_interval, # int(thinking_time), next_rep)) self.log.append( Log(user_id=self.parser.user_id, event=EventTypes.REPETITION, timestamp=int(timestamp), object_id=card_id, grade=grade, easiness=easiness, acq_reps=acq_reps, ret_reps=ret_reps, lapses=lapses, acq_reps_since_lapse=acq_reps_since_lapse, ret_reps_since_lapse=ret_reps_since_lapse, scheduled_interval=scheduled_interval, actual_interval=actual_interval, thinking_time=int(thinking_time), next_rep=next_rep)) def set_offset_last_rep(self, card_id, offset, last_rep): pass # self.con.execute( # """insert or replace into _cards(id, offset, last_rep) # values(?,?,?)""", # (card_id + self.parser.user_id, offset, int(last_rep))) def offset_last_rep(self, card_id): pass # sql_result = self.con.execute("""select offset, last_rep # from _cards where _cards.id=?""", # (card_id + self.parser.user_id, )).fetchone() # return sql_result["offset"], sql_result["last_rep"] def update_card_after_log_import(self, id, creation_time, offset): pass