def scrub(self): ''' Perform a thorough scrub and cleanup of the database ''' # Currently only reasons are a problem try: start_count = Reason.objects.count() except Exception: e = sys.exc_info()[1] self.log.error("Failed to load reason objects: %s" % e) return dup_reasons = [] cmp_reasons = dict() batch_update = [] for reason in BatchFetch(Reason.objects): ''' Loop through each reason and create a key out of the data. \ This lets us take advantage of a fast hash lookup for \ comparisons ''' id = reason.id reason.id = None key = md5(pickle.dumps(reason)).hexdigest() reason.id = id if key in cmp_reasons: self.log.debug("Update interactions from %d to %d" \ % (reason.id, cmp_reasons[key])) dup_reasons.append([reason.id]) batch_update.append([cmp_reasons[key], reason.id]) else: cmp_reasons[key] = reason.id self.log.debug("key %d" % reason.id) self.log.debug("Done with updates, deleting dupes") try: cursor = connection.cursor() cursor.executemany( 'update reports_entries_interactions set reason_id=%s where reason_id=%s', batch_update) cursor.executemany('delete from reports_reason where id = %s', dup_reasons) transaction.set_dirty() except Exception: ex = sys.exc_info()[1] self.log.error("Failed to delete reasons: %s" % ex) raise self.log.info("Found %d dupes out of %d" % (len(dup_reasons), start_count)) # Cleanup orphans start_count = Reason.objects.count() Reason.prune_orphans() self.log.info("Pruned %d Reason records" % (start_count - Reason.objects.count())) start_count = Entries.objects.count() Entries.prune_orphans() self.log.info("Pruned %d Entries records" % (start_count - Entries.objects.count()))
def scrub(self): ''' Perform a thorough scrub and cleanup of the database ''' # Currently only reasons are a problem try: start_count = Reason.objects.count() except Exception: e = sys.exc_info()[1] self.log.error("Failed to load reason objects: %s" % e) return dup_reasons = [] cmp_reasons = dict() batch_update = [] for reason in BatchFetch(Reason.objects): ''' Loop through each reason and create a key out of the data. \ This lets us take advantage of a fast hash lookup for \ comparisons ''' id = reason.id reason.id = None key = md5(pickle.dumps(reason)).hexdigest() reason.id = id if key in cmp_reasons: self.log.debug("Update interactions from %d to %d" \ % (reason.id, cmp_reasons[key])) dup_reasons.append([reason.id]) batch_update.append([cmp_reasons[key], reason.id]) else: cmp_reasons[key] = reason.id self.log.debug("key %d" % reason.id) self.log.debug("Done with updates, deleting dupes") try: cursor = connection.cursor() cursor.executemany('update reports_entries_interactions set reason_id=%s where reason_id=%s', batch_update) cursor.executemany('delete from reports_reason where id = %s', dup_reasons) transaction.set_dirty() except Exception: ex = sys.exc_info()[1] self.log.error("Failed to delete reasons: %s" % ex) raise self.log.info("Found %d dupes out of %d" % (len(dup_reasons), start_count)) # Cleanup orphans start_count = Reason.objects.count() Reason.prune_orphans() self.log.info("Pruned %d Reason records" % (start_count - Reason.objects.count())) start_count = Entries.objects.count() Entries.prune_orphans() self.log.info("Pruned %d Entries records" % (start_count - Entries.objects.count()))
def load_stats(cdata, sdata, vlevel, quick=False, location=''): cursor = connection.cursor() clients = {} cursor.execute("SELECT name, id from reports_client;") [clients.__setitem__(a, b) for a, b in cursor.fetchall()] for node in sdata.findall('Node'): name = node.get('name') if not name in clients: cursor.execute(\ "INSERT INTO reports_client VALUES (NULL, %s, %s, NULL, NULL)", [datetime.now(), name]) clients[name] = cursor.lastrowid if vlevel > 0: print("Client %s added to db" % name) else: if vlevel > 0: print("Client %s already exists in db" % name) pingability = {} [pingability.__setitem__(n.get('name'), n.get('pingable', default='N')) \ for n in cdata.findall('Client')] for node in sdata.findall('Node'): name = node.get('name') c_inst = Client.objects.filter(id=clients[name])[0] try: pingability[name] except KeyError: pingability[name] = 'N' for statistics in node.findall('Statistics'): t = strptime(statistics.get('time')) # Maybe replace with django.core.db typecasts typecast_timestamp()? # import from django.backends util timestamp = datetime(t[0], t[1], t[2], t[3], t[4], t[5]) ilist = Interaction.objects.filter(client=c_inst, timestamp=timestamp) if ilist: current_interaction = ilist[0] if vlevel > 0: print("Interaction for %s at %s with id %s already exists"%(clients[name], datetime(t[0],t[1],t[2],t[3],t[4],t[5]),current_interaction.id)) continue else: newint = Interaction(client=c_inst, timestamp = timestamp, state = statistics.get('state', default="unknown"), repo_rev_code = statistics.get('revision',default="unknown"), client_version = statistics.get('client_version',default="unknown"), goodcount = statistics.get('good',default="0"), totalcount = statistics.get('total',default="0"), server = location) newint.save() current_interaction = newint if vlevel > 0: print("Interaction for %s at %s with id %s INSERTED in to db"%(clients[name], timestamp, current_interaction.id)) pattern = [('Bad/*', TYPE_CHOICES[0]), ('Extra/*', TYPE_CHOICES[2]), ('Modified/*', TYPE_CHOICES[1]),] for (xpath, type) in pattern: for x in statistics.findall(xpath): kargs = build_reason_kwargs(x) if not quick: rls = Reason.objects.filter(**kargs) else: rls = [] if rls: rr = rls[0] if vlevel > 0: print "Reason exists: %s"% (rr.id) else: rr = Reason(**kargs) rr.save() if vlevel > 0: print "Created reason: %s" % rr.id links = Entries.objects.filter(name=x.get('name'), kind=x.tag) if links: entry = links[0] else: entry = Entries(name=x.get('name'), kind=x.tag) entry.save() interaction = Entries_interactions(entry=entry, reason=rr, interaction=current_interaction, type=type[0]) interaction.save() if vlevel > 0: print "%s interaction created with reason id %s and entry %s" % (xpath, rr.id, entry.id) for times in statistics.findall('OpStamps'): for metric, value in times.items(): if not quick: mmatch = Performance.objects.filter(metric=metric, value=value) else: mmatch = [] if mmatch: item_id = mmatch[0].id else: mperf = Performance(metric=metric, value=value) mperf.save() item_id = mperf.id try: cursor.execute("INSERT INTO reports_performance_interaction VALUES (NULL, %s, %s);", [item_id, current_interaction.id]) except: pass if vlevel > 1: print("----------------INTERACTIONS SYNCED----------------") cursor.execute("select reports_interaction.id, x.client_id from (select client_id, MAX(timestamp) as timer from reports_interaction Group BY client_id) x, reports_interaction where reports_interaction.client_id = x.client_id AND reports_interaction.timestamp = x.timer") for row in cursor.fetchall(): cursor.execute("UPDATE reports_client SET current_interaction_id = %s where reports_client.id = %s", [row[0],row[1]]) if vlevel > 1: print("------------LATEST INTERACTION SET----------------") for key in pingability.keys(): if key not in clients: #print "Ping Save Problem with client %s" % name continue cmatch = Client.objects.filter(id=clients[key])[0] pmatch = Ping.objects.filter(client=cmatch).order_by('-endtime') if pmatch: if pmatch[0].status == pingability[key]: pmatch[0].endtime = datetime.now() pmatch[0].save() else: newp = Ping(client=cmatch, status=pingability[key], starttime=datetime.now(), endtime=datetime.now()) newp.save() else: newp = Ping(client=cmatch, status=pingability[key], starttime=datetime.now(), endtime=datetime.now()) newp.save() if vlevel > 1: print "---------------PINGDATA SYNCED---------------------" connection._commit()
def purge(self, client=None, maxdate=None, state=None): '''Purge historical data from the database''' filtered = False # indicates whether or not a client should be deleted if not client and not maxdate and not state: self.errExit("Reports.prune: Refusing to prune all data") ipurge = Interaction.objects if client: try: cobj = Client.objects.get(name=client) ipurge = ipurge.filter(client=cobj) except Client.DoesNotExist: self.log.error("Client %s not in database" % client) raise SystemExit(-1) self.log.debug("Filtering by client: %s" % client) if maxdate: filtered = True if not isinstance(maxdate, datetime.datetime): raise TypeError("maxdate is not a DateTime object") self.log.debug("Filtering by maxdate: %s" % maxdate) ipurge = ipurge.filter(timestamp__lt=maxdate) # Handle ping data as well ping = Ping.objects.filter(endtime__lt=maxdate) if client: ping = ping.filter(client=cobj) ping.delete() if state: filtered = True if state not in ('dirty', 'clean', 'modified'): raise TypeError("state is not one of the following values " + \ "('dirty','clean','modified')") self.log.debug("Filtering by state: %s" % state) ipurge = ipurge.filter(state=state) count = ipurge.count() rnum = 0 try: while rnum < count: grp = list(ipurge[:1000].values("id")) # just in case... if not grp: break Interaction.objects.filter(id__in=[x['id'] for x in grp]).delete() rnum += len(grp) self.log.debug("Deleted %s of %s" % (rnum, count)) except: self.log.error("Failed to remove interactions") (a, b, c) = sys.exc_info() msg = traceback.format_exception(a, b, c, limit=2)[-1][:-1] del a, b, c self.log.error(msg) # bulk operations bypass the Interaction.delete method self.log.debug("Pruning orphan Performance objects") Performance.prune_orphans() self.log.debug("Pruning orphan Reason objects") Reason.prune_orphans() if client and not filtered: '''Delete the client, ping data is automatic''' try: self.log.debug("Purging client %s" % client) cobj.delete() except: self.log.error("Failed to delete client %s" % client) (a, b, c) = sys.exc_info() msg = traceback.format_exception(a, b, c, limit=2)[-1][:-1] del a, b, c self.log.error(msg)
self.log.debug("Done with updates, deleting dupes") try: cursor = connection.cursor() cursor.executemany('update reports_entries_interactions set reason_id=%s where reason_id=%s', batch_update) cursor.executemany('delete from reports_reason where id = %s', dup_reasons) transaction.set_dirty() except Exception, ex: self.log.error("Failed to delete reasons: %s" % ex) raise self.log.info("Found %d dupes out of %d" % (len(dup_reasons), start_count)) # Cleanup orphans start_count = Reason.objects.count() Reason.prune_orphans() self.log.info("Pruned %d Reason records" % (start_count - Reason.objects.count())) start_count = Entries.objects.count() Entries.prune_orphans() self.log.info("Pruned %d Entries records" % (start_count - Entries.objects.count())) def django_command_proxy(self, command): '''Call a django command''' if command == 'sqlall': django.core.management.call_command(command, 'reports') else: django.core.management.call_command(command) def load_stats(self, stats_file=None, clientspath=None, verb=0, quick=False): '''Load statistics data into the database'''