def test_bad_exists_properties(self): test = { "data": [{ "~e~": 1 }, { "~e~": 1 }], "query": { "from": TEST_TABLE, "select": [{ "name": "count", "aggregate": "count" }], }, "expecting_list": { "meta": { "format": "value" }, "data": { "count": 2 } } } subtest = wrap(test) cont = self.utils.fill_container(subtest, typed=False) db = Sqlite(filename="metadata.localhost.sqlite") try: with db.transaction() as t: t.execute( "insert into " + quote_column("meta.columns") + "(name, es_type, jx_type, nested_path, es_column, es_index, last_updated) VALUES " + quote_set([ ".", "object", "exists", '["."]', ".", cont.alias, Date.now() ])) except Exception as e: pass try: with db.transaction() as t: t.execute( "insert into " + quote_column("meta.columns") + "(name, es_type, jx_type, nested_path, es_column, es_index, last_updated) VALUES " + quote_set([ "~e~", "long", "exists", '["."]', "~e~", cont.alias, Date.now() ])) except Exception as e: pass self.utils.send_queries(subtest)
def add_cset_entries(self, ordered_rev_list, timestamp=False, number_forward=True): ''' Adds a list of revisions to the table. Assumes ordered_rev_list is an ordered based on how changesets are found in the changelog. Going forwards or backwards is dealt with by flipping the list :param ordered_cset_list: Order given from changeset log searching. :param timestamp: If false, records are kept indefinitely but if holes exist: (delete, None, delete, None) those delete's with None's around them will not be deleted. :param numbered: If True, this function will number the revision list by going forward from max(revNum), else it'll go backwards from revNum, then add X to all revnums and self.next_revnum where X is the length of ordered_rev_list :return: ''' with self.conn.transaction() as t: current_min = t.get_one("SELECT min(revnum) FROM csetlog")[0] current_max = t.get_one("SELECT max(revnum) FROM csetlog")[0] if not current_min or not current_max: current_min = 0 current_max = 0 direction = -1 start = current_min - 1 if number_forward: direction = 1 start = current_max + 1 ordered_rev_list = ordered_rev_list[::-1] insert_list = [(start + direction * count, rev, int(time.time()) if timestamp else -1) for count, rev in enumerate(ordered_rev_list)] # In case of overlapping requests fmt_insert_list = [] for cset_entry in insert_list: tmp = self._get_one_revision(t, cset_entry) if not tmp: fmt_insert_list.append(cset_entry) for _, tmp_insert_list in jx.groupby(fmt_insert_list, size=SQL_CSET_BATCH_SIZE): t.execute( "INSERT INTO csetLog (revnum, revision, timestamp)" + " VALUES " + sql_list( quote_set((revnum, revision, timestamp)) for revnum, revision, timestamp in tmp_insert_list)) # Move the revision numbers forward if needed self.recompute_table_revnums() # Start a maintenance run if needed if self.check_for_maintenance(): Log.note("Scheduling maintenance run on clogger.") self.maintenance_signal.go()
def insert_into_db_chunked(transaction, data, cmd, sql_chunk_size=500): # For the `cmd` object, we expect something like (don't forget the whitespace at the end): # "INSERT INTO temporal (tuid, file, revision, line) VALUES " # # `data` must be a list of tuples. for _, inserts_list in jx.groupby(data, size=sql_chunk_size): transaction.execute(cmd + sql_list( quote_set(entry) for entry in inserts_list))
def insert_into_db_chunked(transaction, data, cmd, sql_chunk_size=500): # For the `cmd` object, we expect something like (don't forget the whitespace at the end): # "INSERT INTO temporal (tuid, file, revision, line) VALUES " # # `data` must be a list of tuples. for _, inserts_list in jx.groupby(data, size=sql_chunk_size): transaction.execute( cmd + sql_list(quote_set(entry) for entry in inserts_list) )
def initialize_to_range(self, old_rev, new_rev, delete_old=True): ''' Used in service testing to get to very old changesets quickly. :param old_rev: The oldest revision to keep :param new_rev: The revision to start searching from :return: ''' old_settings = [ self.disable_tipfilling, self.disable_backfilling, self.disable_maintenance, self.disable_deletion ] self.disable_tipfilling = True self.disable_backfilling = True self.disable_maintenance = True self.disable_deletion = True old_rev = old_rev[:12] new_rev = new_rev[:12] with self.working_locker: if delete_old: with self.conn.transaction() as t: t.execute("DELETE FROM csetLog") with self.conn.transaction() as t: t.execute( "INSERT INTO csetLog (revision, timestamp) VALUES " + quote_set((new_rev, -1)) ) self._fill_in_range(old_rev, new_rev, timestamp=True, number_forward=False) self.disable_tipfilling = old_settings[0] self.disable_backfilling = old_settings[1] self.disable_maintenance = old_settings[2] self.disable_deletion = old_settings[3]
def initialize_to_range(self, old_rev, new_rev, delete_old=True): ''' Used in service testing to get to very old changesets quickly. :param old_rev: The oldest revision to keep :param new_rev: The revision to start searching from :return: ''' old_settings = [ self.disable_tipfilling, self.disable_backfilling, self.disable_maintenance, self.disable_deletion ] self.disable_tipfilling = True self.disable_backfilling = True self.disable_maintenance = True self.disable_deletion = True old_rev = old_rev[:12] new_rev = new_rev[:12] with self.working_locker: if delete_old: with self.conn.transaction() as t: t.execute("DELETE FROM csetLog") with self.conn.transaction() as t: t.execute("INSERT INTO csetLog (revision, timestamp) VALUES " + quote_set((new_rev, -1))) self._fill_in_range(old_rev, new_rev, timestamp=True, number_forward=False) self.disable_tipfilling = old_settings[0] self.disable_backfilling = old_settings[1] self.disable_maintenance = old_settings[2] self.disable_deletion = old_settings[3]
def csetLog_deleter(self, please_stop=None): ''' Deletes changesets from the csetLog table and also changesets from the annotation table that have revisions matching the given changesets. Accepts lists of csets from self.deletions_todo. :param please_stop: :return: ''' while not please_stop: try: request = self.deletions_todo.pop(till=please_stop) if please_stop: break # If deletion is disabled, ignore the current # request - it will need to be re-requested. if self.disable_deletion: Till(till=CSET_DELETION_WAIT_TIME).wait() continue with self.working_locker: first_cset = request # Since we are deleting and moving stuff around in the # TUID tables, we need everything to be contained in # one transaction with no interruptions. with self.conn.transaction() as t: revnum = self._get_one_revnum(t, first_cset)[0] csets_to_del = t.get( "SELECT revnum, revision FROM csetLog WHERE revnum <= ?", (revnum,) ) csets_to_del = [cset for _, cset in csets_to_del] existing_frontiers = t.query( "SELECT revision FROM latestFileMod WHERE revision IN " + quote_set(csets_to_del) ).data existing_frontiers = [existing_frontiers[i][0] for i, _ in enumerate(existing_frontiers)] Log.note( "Deleting all annotations and changeset log entries with revisions in the list: {{csets}}", csets=csets_to_del ) if len(existing_frontiers) > 0: # This handles files which no longer exist anymore in # the main branch. Log.note( "Deleting existing frontiers for revisions: {{revisions}}", revisions=existing_frontiers ) t.execute( "DELETE FROM latestFileMod WHERE revision IN " + quote_set(existing_frontiers) ) Log.note("Deleting annotations...") t.execute( "DELETE FROM annotations WHERE revision IN " + quote_set(csets_to_del) ) Log.note( "Deleting {{num_entries}} csetLog entries...", num_entries=len(csets_to_del) ) t.execute( "DELETE FROM csetLog WHERE revision IN " + quote_set(csets_to_del) ) # Recalculate the revnums self.recompute_table_revnums() except Exception as e: Log.warning("Unexpected error occured while deleting from csetLog:", cause=e) Till(seconds=CSET_DELETION_WAIT_TIME).wait() return
def csetLog_maintenance(self, please_stop=None): ''' Handles deleting old csetLog entries and timestamping revisions once they pass the length for permanent storage for deletion later. :param please_stop: :return: ''' while not please_stop: try: # Wait until something signals the maintenance cycle # to begin (or end). (self.maintenance_signal | please_stop).wait() if please_stop: break if self.disable_maintenance: continue Log.warning( "Starting clog maintenance. Since this doesn't start often, " "we need to explicitly see when it's started with this warning." ) # Reset signal so we don't request # maintenance infinitely. with self.maintenance_signal.lock: self.maintenance_signal._go = False with self.working_locker: all_data = None with self.conn.transaction() as t: all_data = sorted( t.get("SELECT revnum, revision, timestamp FROM csetLog"), key=lambda x: int(x[0]) ) # Restore maximum permanents (if overflowing) new_data = [] modified = False for count, (revnum, revision, timestamp) in enumerate(all_data[::-1]): if count < MINIMUM_PERMANENT_CSETS: if timestamp != -1: modified = True new_data.append((revnum, revision, -1)) else: new_data.append((revnum, revision, timestamp)) elif type(timestamp) != int or timestamp == -1: modified = True new_data.append((revnum, revision, int(time.time()))) else: new_data.append((revnum, revision, timestamp)) # Delete annotations at revisions with timestamps # that are too old. The csetLog entries will have # their timestamps reset here. new_data1 = [] annrevs_to_del = [] current_time = time.time() for count, (revnum, revision, timestamp) in enumerate(new_data[::-1]): new_timestamp = timestamp if timestamp != -1: if current_time >= timestamp + TIME_TO_KEEP_ANNOTATIONS.seconds: modified = True new_timestamp = current_time annrevs_to_del.append(revision) new_data1.append((revnum, revision, new_timestamp)) if len(annrevs_to_del) > 0: # Delete any latestFileMod and annotation entries # that are too old. Log.note( "Deleting annotations and latestFileMod for revisions for being " "older than {{oldest}}: {{revisions}}", oldest=TIME_TO_KEEP_ANNOTATIONS, revisions=annrevs_to_del ) with self.conn.transaction() as t: t.execute( "DELETE FROM latestFileMod WHERE revision IN " + quote_set(annrevs_to_del) ) t.execute( "DELETE FROM annotations WHERE revision IN " + quote_set(annrevs_to_del) ) # Delete any overflowing entries new_data2 = new_data1 reved_all_data = all_data[::-1] deleted_data = reved_all_data[MAXIMUM_NONPERMANENT_CSETS:] delete_overflowing_revstart = None if len(deleted_data) > 0: _, delete_overflowing_revstart, _ = deleted_data[0] new_data2 = set(all_data) - set(deleted_data) # Update old frontiers if requested, otherwise # they will all get deleted by the csetLog_deleter # worker if UPDATE_VERY_OLD_FRONTIERS: _, max_revision, _ = all_data[-1] for _, revision, _ in deleted_data: with self.conn.transaction() as t: old_files = t.get( "SELECT file FROM latestFileMod WHERE revision=?", (revision,) ) if old_files is None or len(old_files) <= 0: continue self.tuid_service.get_tuids_from_files( old_files, max_revision, going_forward=True, ) still_exist = True while still_exist and not please_stop: Till(seconds=TUID_EXISTENCE_WAIT_TIME).wait() with self.conn.transaction() as t: old_files = t.get( "SELECT file FROM latestFileMod WHERE revision=?", (revision,) ) if old_files is None or len(old_files) <= 0: still_exist = False # Update table and schedule a deletion if modified: with self.conn.transaction() as t: insert_into_db_chunked( t, new_data2, "INSERT OR REPLACE INTO csetLog (revnum, revision, timestamp) VALUES " ) if not deleted_data: continue Log.note("Scheduling {{num_csets}} for deletion", num_csets=len(deleted_data)) self.deletions_todo.add(delete_overflowing_revstart) except Exception as e: Log.warning("Unexpected error occured while maintaining csetLog, continuing to try: ", cause=e) return
def add_cset_entries(self, ordered_rev_list, timestamp=False, number_forward=True): ''' Adds a list of revisions to the table. Assumes ordered_rev_list is an ordered based on how changesets are found in the changelog. Going forwards or backwards is dealt with by flipping the list :param ordered_cset_list: Order given from changeset log searching. :param timestamp: If false, records are kept indefinitely but if holes exist: (delete, None, delete, None) those delete's with None's around them will not be deleted. :param numbered: If True, this function will number the revision list by going forward from max(revNum), else it'll go backwards from revNum, then add X to all revnums and self.next_revnum where X is the length of ordered_rev_list :return: ''' with self.conn.transaction() as t: current_min = t.get_one("SELECT min(revnum) FROM csetlog")[0] current_max = t.get_one("SELECT max(revnum) FROM csetlog")[0] if not current_min or not current_max: current_min = 0 current_max = 0 direction = -1 start = current_min - 1 if number_forward: direction = 1 start = current_max + 1 ordered_rev_list = ordered_rev_list[::-1] insert_list = [ ( start + direction * count, rev, int(time.time()) if timestamp else -1 ) for count, rev in enumerate(ordered_rev_list) ] # In case of overlapping requests fmt_insert_list = [] for cset_entry in insert_list: tmp = self._get_one_revision(t, cset_entry) if not tmp: fmt_insert_list.append(cset_entry) for _, tmp_insert_list in jx.groupby(fmt_insert_list, size=SQL_CSET_BATCH_SIZE): t.execute( "INSERT INTO csetLog (revnum, revision, timestamp)" + " VALUES " + sql_list( quote_set((revnum, revision, timestamp)) for revnum, revision, timestamp in tmp_insert_list ) ) # Move the revision numbers forward if needed self.recompute_table_revnums() # Start a maintenance run if needed if self.check_for_maintenance(): Log.note("Scheduling maintenance run on clogger.") self.maintenance_signal.go()
def create_and_insert_tuids(self, revision): self.replace_line_with_tuidline() line_origins = [] all_new_lines = [] for line_obj in self.lines: line_entry = (line_obj.filename, revision, line_obj.line) if not line_obj.tuid or line_obj.is_new_line: all_new_lines.append(line_obj.line) line_origins.append(line_entry) with self.tuid_service.conn.transaction() as t: # Get the new lines, excluding those that have existing tuids existing_tuids = {} if len(all_new_lines) > 0: try: existing_tuids = { line: tuid for tuid, file, revision, line in t.query( "SELECT tuid, file, revision, line FROM temporal" " WHERE file = " + quote_value(self.filename)+ " AND revision = " + quote_value(revision) + " AND line IN " + quote_set(all_new_lines) ).data } except Exception as e: # Log takes out important output, use print instead self.failed_file = True print("Trying to find new lines: " + str(all_new_lines)) Log.error("Error encountered:", cause=e) insert_entries = [] insert_lines = set(all_new_lines) - set(existing_tuids.keys()) if len(insert_lines) > 0: try: insert_entries = [ (self.tuid_service.tuid(),) + line_origins[linenum-1] for linenum in insert_lines ] insert_into_db_chunked( t, insert_entries, "INSERT INTO temporal (tuid, file, revision, line) VALUES " ) except Exception as e: Log.note( "Failed to insert new tuids (likely due to merge conflict) on {{file}}: {{cause}}", file=self.filename, cause=e ) self.failed_file = True return fmt_inserted_lines = {line: tuid for tuid, _, _, line in insert_entries} for line_obj in self.lines: # If a tuid already exists for this line, # replace, otherwise, use the newly created one. if line_obj.line in existing_tuids: line_obj.tuid = existing_tuids[line_obj.line] elif line_obj.line in fmt_inserted_lines: line_obj.tuid = fmt_inserted_lines[line_obj.line] if not line_obj.tuid: Log.warning( "Cannot find TUID at {{file}} and {{rev}}for: {{line}}", file=self.filename, rev=revision, line=str(line_obj) ) self.failed_file = True return
def csetLog_deleter(self, please_stop=None): ''' Deletes changesets from the csetLog table and also changesets from the annotation table that have revisions matching the given changesets. Accepts lists of csets from self.deletions_todo. :param please_stop: :return: ''' while not please_stop: try: request = self.deletions_todo.pop(till=please_stop) if please_stop: break # If deletion is disabled, ignore the current # request - it will need to be re-requested. if self.disable_deletion: Till(till=CSET_DELETION_WAIT_TIME).wait() continue with self.working_locker: first_cset = request # Since we are deleting and moving stuff around in the # TUID tables, we need everything to be contained in # one transaction with no interruptions. with self.conn.transaction() as t: revnum = self._get_one_revnum(t, first_cset)[0] csets_to_del = t.get( "SELECT revnum, revision FROM csetLog WHERE revnum <= ?", (revnum, )) csets_to_del = [cset for _, cset in csets_to_del] existing_frontiers = t.query( "SELECT revision FROM latestFileMod WHERE revision IN " + quote_set(csets_to_del)).data existing_frontiers = [ existing_frontiers[i][0] for i, _ in enumerate(existing_frontiers) ] Log.note( "Deleting all annotations and changeset log entries with revisions in the list: {{csets}}", csets=csets_to_del) if len(existing_frontiers) > 0: # This handles files which no longer exist anymore in # the main branch. Log.note( "Deleting existing frontiers for revisions: {{revisions}}", revisions=existing_frontiers) t.execute( "DELETE FROM latestFileMod WHERE revision IN " + quote_set(existing_frontiers)) Log.note("Deleting annotations...") t.execute( "DELETE FROM annotations WHERE revision IN " + quote_set(csets_to_del)) Log.note("Deleting {{num_entries}} csetLog entries...", num_entries=len(csets_to_del)) t.execute("DELETE FROM csetLog WHERE revision IN " + quote_set(csets_to_del)) # Recalculate the revnums self.recompute_table_revnums() except Exception as e: Log.warning( "Unexpected error occured while deleting from csetLog:", cause=e) Till(seconds=CSET_DELETION_WAIT_TIME).wait() return
def csetLog_maintenance(self, please_stop=None): ''' Handles deleting old csetLog entries and timestamping revisions once they pass the length for permanent storage for deletion later. :param please_stop: :return: ''' while not please_stop: try: # Wait until something signals the maintenance cycle # to begin (or end). (self.maintenance_signal | please_stop).wait() if please_stop: break if self.disable_maintenance: continue Log.warning( "Starting clog maintenance. Since this doesn't start often, " "we need to explicitly see when it's started with this warning." ) # Reset signal so we don't request # maintenance infinitely. with self.maintenance_signal.lock: self.maintenance_signal._go = False with self.working_locker: all_data = None with self.conn.transaction() as t: all_data = sorted(t.get( "SELECT revnum, revision, timestamp FROM csetLog"), key=lambda x: int(x[0])) # Restore maximum permanents (if overflowing) new_data = [] modified = False for count, (revnum, revision, timestamp) in enumerate(all_data[::-1]): if count < MINIMUM_PERMANENT_CSETS: if timestamp != -1: modified = True new_data.append((revnum, revision, -1)) else: new_data.append((revnum, revision, timestamp)) elif type(timestamp) != int or timestamp == -1: modified = True new_data.append( (revnum, revision, int(time.time()))) else: new_data.append((revnum, revision, timestamp)) # Delete annotations at revisions with timestamps # that are too old. The csetLog entries will have # their timestamps reset here. new_data1 = [] annrevs_to_del = [] current_time = time.time() for count, (revnum, revision, timestamp) in enumerate(new_data[::-1]): new_timestamp = timestamp if timestamp != -1: if current_time >= timestamp + TIME_TO_KEEP_ANNOTATIONS.seconds: modified = True new_timestamp = current_time annrevs_to_del.append(revision) new_data1.append((revnum, revision, new_timestamp)) if len(annrevs_to_del) > 0: # Delete any latestFileMod and annotation entries # that are too old. Log.note( "Deleting annotations and latestFileMod for revisions for being " "older than {{oldest}}: {{revisions}}", oldest=TIME_TO_KEEP_ANNOTATIONS, revisions=annrevs_to_del) with self.conn.transaction() as t: t.execute( "DELETE FROM latestFileMod WHERE revision IN " + quote_set(annrevs_to_del)) t.execute( "DELETE FROM annotations WHERE revision IN " + quote_set(annrevs_to_del)) # Delete any overflowing entries new_data2 = new_data1 reved_all_data = all_data[::-1] deleted_data = reved_all_data[MAXIMUM_NONPERMANENT_CSETS:] delete_overflowing_revstart = None if len(deleted_data) > 0: _, delete_overflowing_revstart, _ = deleted_data[0] new_data2 = set(all_data) - set(deleted_data) # Update old frontiers if requested, otherwise # they will all get deleted by the csetLog_deleter # worker if UPDATE_VERY_OLD_FRONTIERS: _, max_revision, _ = all_data[-1] for _, revision, _ in deleted_data: with self.conn.transaction() as t: old_files = t.get( "SELECT file FROM latestFileMod WHERE revision=?", (revision, )) if old_files is None or len(old_files) <= 0: continue self.tuid_service.get_tuids_from_files( old_files, max_revision, going_forward=True, ) still_exist = True while still_exist and not please_stop: Till(seconds=TUID_EXISTENCE_WAIT_TIME ).wait() with self.conn.transaction() as t: old_files = t.get( "SELECT file FROM latestFileMod WHERE revision=?", (revision, )) if old_files is None or len( old_files) <= 0: still_exist = False # Update table and schedule a deletion if modified: with self.conn.transaction() as t: insert_into_db_chunked( t, new_data2, "INSERT OR REPLACE INTO csetLog (revnum, revision, timestamp) VALUES " ) if not deleted_data: continue Log.note("Scheduling {{num_csets}} for deletion", num_csets=len(deleted_data)) self.deletions_todo.add(delete_overflowing_revstart) except Exception as e: Log.warning( "Unexpected error occured while maintaining csetLog, continuing to try: ", cause=e) return
def test_annotation_memory(service): import psutil import os import gc, pprint gc.set_debug(gc.DEBUG_SAVEALL) with open('resources/stressfiles.json', 'r') as f: files = json.load(f) total_trials = 1000 total_files = 1 files_to_get = files[:total_files] test_rev = "58eb13b394f4" all_end_mems = [None] * total_trials all_percents = [None] * total_trials process = psutil.Process(os.getpid()) start_mem = -1 for i in range(total_trials): # Randomize files #files_to_get = [random.choice(files) for _ in range(total_files)] with service.conn.transaction() as t: t.execute("DELETE FROM temporal WHERE file IN " + quote_set(files_to_get)) t.execute("DELETE FROM annotations WHERE file IN " + quote_set(files_to_get)) t.execute("DELETE FROM latestFileMod WHERE file IN " + quote_set(files_to_get)) if start_mem == -1: start_mem = round(process.memory_info().rss / (1000 * 1000), 2) service.get_tuids(files_to_get, test_rev) end_mem = round(process.memory_info().rss / (1000 * 1000), 2) pc_used = service.statsdaemon.get_used_memory_percent() Log.note("GC get_count: {{getc}}", getc=gc.get_count()) Log.note("GC collect: {{getc}}", getc=gc.collect()) Log.note( "Started with {{mem}}, finished with {{endmem}}. Percent currently used is {{pc}}", mem=start_mem, endmem=end_mem, pc=pc_used) Log.note("Used {{mem}} Mb since first get_tuids call.", mem=str(end_mem - start_mem)) if GC_DEBUG: Log.note("Uncollected garbage: ") pprint.pprint(gc.garbage) import time time.sleep(10) all_end_mems[i] = end_mem all_percents[i] = pc_used from matplotlib import pyplot as plt plt.figure() plt.plot(all_end_mems) plt.title("Memory usage over time.") plt.xlabel("Trial count") plt.ylabel("Memory usage (Mb)") plt.figure() plt.plot(all_percents) plt.title("Percent of memory used over time.") plt.xlabel("Trial count") plt.ylabel("Memory usage (%)") plt.show(block=True)
def test_annotation_memory(service): import psutil import os import gc, pprint gc.set_debug(gc.DEBUG_SAVEALL) with open('resources/stressfiles.json', 'r') as f: files = json.load(f) total_trials = 1000 total_files = 1 files_to_get = files[:total_files] test_rev = "58eb13b394f4" all_end_mems = [None] * total_trials all_percents = [None] * total_trials process = psutil.Process(os.getpid()) start_mem = -1 for i in range(total_trials): # Randomize files #files_to_get = [random.choice(files) for _ in range(total_files)] with service.conn.transaction() as t: t.execute("DELETE FROM temporal WHERE file IN " + quote_set(files_to_get)) t.execute("DELETE FROM annotations WHERE file IN " + quote_set(files_to_get)) t.execute("DELETE FROM latestFileMod WHERE file IN " + quote_set(files_to_get)) if start_mem == -1: start_mem = round(process.memory_info().rss / (1000 * 1000), 2) service.get_tuids(files_to_get, test_rev) end_mem = round(process.memory_info().rss / (1000 * 1000), 2) pc_used = service.statsdaemon.get_used_memory_percent() Log.note("GC get_count: {{getc}}", getc=gc.get_count()) Log.note("GC collect: {{getc}}", getc=gc.collect()) Log.note( "Started with {{mem}}, finished with {{endmem}}. Percent currently used is {{pc}}", mem=start_mem, endmem=end_mem, pc=pc_used ) Log.note("Used {{mem}} Mb since first get_tuids call.", mem=str(end_mem - start_mem)) if GC_DEBUG: Log.note("Uncollected garbage: ") pprint.pprint(gc.garbage) import time time.sleep(10) all_end_mems[i] = end_mem all_percents[i] = pc_used from matplotlib import pyplot as plt plt.figure() plt.plot(all_end_mems) plt.title("Memory usage over time.") plt.xlabel("Trial count") plt.ylabel("Memory usage (Mb)") plt.figure() plt.plot(all_percents) plt.title("Percent of memory used over time.") plt.xlabel("Trial count") plt.ylabel("Memory usage (%)") plt.show(block=True)
def create_and_insert_tuids(self, revision): self.replace_line_with_tuidline() line_origins = [] all_new_lines = [] for line_obj in self.lines: line_entry = (line_obj.filename, revision, line_obj.line) if not line_obj.tuid or line_obj.is_new_line: all_new_lines.append(line_obj.line) line_origins.append(line_entry) with self.tuid_service.conn.transaction() as t: # Get the new lines, excluding those that have existing tuids existing_tuids = {} if len(all_new_lines) > 0: try: existing_tuids = { line: tuid for tuid, file, revision, line in t.query( "SELECT tuid, file, revision, line FROM temporal" " WHERE file = " + quote_value(self.filename) + " AND revision = " + quote_value(revision) + " AND line IN " + quote_set(all_new_lines)).data } except Exception as e: # Log takes out important output, use print instead self.failed_file = True print("Trying to find new lines: " + str(all_new_lines)) Log.error("Error encountered:", cause=e) insert_entries = [] insert_lines = set(all_new_lines) - set(existing_tuids.keys()) if len(insert_lines) > 0: try: insert_entries = [(self.tuid_service.tuid(), ) + line_origins[linenum - 1] for linenum in insert_lines] insert_into_db_chunked( t, insert_entries, "INSERT INTO temporal (tuid, file, revision, line) VALUES " ) except Exception as e: Log.note( "Failed to insert new tuids (likely due to merge conflict) on {{file}}: {{cause}}", file=self.filename, cause=e) self.failed_file = True return fmt_inserted_lines = { line: tuid for tuid, _, _, line in insert_entries } for line_obj in self.lines: # If a tuid already exists for this line, # replace, otherwise, use the newly created one. if line_obj.line in existing_tuids: line_obj.tuid = existing_tuids[line_obj.line] elif line_obj.line in fmt_inserted_lines: line_obj.tuid = fmt_inserted_lines[line_obj.line] if not line_obj.tuid: Log.warning( "Cannot find TUID at {{file}} and {{rev}}for: {{line}}", file=self.filename, rev=revision, line=str(line_obj)) self.failed_file = True return