def purge_data(cutoff=None, verbose=False): """Implements data purging per our data retention policy""" responses_to_update = set() if cutoff is None: # Default to wiping out 180 days ago which is roughly 6 months. cutoff = datetime.now() - timedelta(days=180) # First, ResponseEmail. objs = ResponseEmail.objects.filter(opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() msg = 'feedback_responseemail: %d, ' % (count, ) # Second, ResponseContext. objs = ResponseContext.objects.filter(opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() msg += 'feedback_responsecontext: %d, ' % (count, ) # Third, ResponsePI. objs = ResponsePI.objects.filter(opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() msg += 'feedback_responsepi: %d' % (count, ) j_info(app='feedback', src='purge_data', action='purge_data', msg=msg) if responses_to_update: index_chunk(ResponseMappingType, list(responses_to_update))
def index_chunk_task(index, batch_id, rec_id, chunk): """Index a chunk of things. :arg index: the name of the index to index to :arg batch_id: the name for the batch this chunk belongs to :arg rec_id: the id for the record for this task :arg chunk: a (cls_path, id_list) of things to index """ cls_path, id_list = chunk cls = from_class_path(cls_path) rec = None try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() # Update record data. rec = Record.objects.get(pk=rec_id) rec.start_time = datetime.datetime.now() rec.message = u'Reindexing into %s' % index rec.status = Record.STATUS_IN_PROGRESS rec.save() index_chunk(cls, id_list) rec.mark_success() except Exception: if rec is not None: rec.mark_fail(u'Errored out %s %s' % (sys.exc_type, sys.exc_value)) raise finally: unpin_this_thread()
def purge_data(cutoff=None, verbose=False): """Implements data purging per our data retention policy""" responses_to_update = set() if cutoff is None: # Default to wiping out 180 days ago which is roughly 6 months. cutoff = datetime.now() - timedelta(days=180) # First, ResponseEmail. objs = ResponseEmail.objects.filter(opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() if verbose: print 'Purged %d feedback_responseemail records' % count # Second, ResponseContext. objs = ResponseContext.objects.filter(opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() if verbose: print 'Purged %d feedback_responsecontext records' % count if responses_to_update: if verbose: print '%d responses to re-index' % len(responses_to_update) index_chunk(ResponseMappingType, list(responses_to_update))
def index_chunk_task(index, batch_id, rec_id, chunk): """Index a chunk of things. :arg index: the name of the index to index to :arg batch_id: the name for the batch this chunk belongs to :arg rec_id: the id for the record for this task :arg chunk: a (class, id_list) of things to index """ cls, id_list = chunk try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() # Update record data. rec = Record.objects.get(pk=rec_id) rec.start_time = datetime.datetime.now() rec.message = u'Reindexing into %s' % index rec.status = Record.STATUS_IN_PROGRESS rec.save() index_chunk(cls, id_list, reraise=True) rec.mark_success() except Exception: rec.mark_fail(u'Errored out %s %s' % ( sys.exc_type, sys.exc_value)) raise finally: unpin_this_thread()
def purge_data(): """Purges feedback data * ResponseEmail >= 180 days * ResponseContext >= 180 days * ResponsePI >= 180 days """ cutoff = datetime.now() - timedelta(days=180) responses_to_update = set() # First, ResponseEmail. objs = ResponseEmail.objects.filter(opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() msg = 'feedback_responseemail: %d, ' % (count, ) # Second, ResponseContext. objs = ResponseContext.objects.filter(opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() msg += 'feedback_responsecontext: %d, ' % (count, ) # Third, ResponsePI. objs = ResponsePI.objects.filter( opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() msg += 'feedback_responsepi: %d' % (count, ) if responses_to_update: index_chunk(ResponseDocType, list(responses_to_update)) return msg
def purge_data(cutoff=None, verbose=False): """Implements data purging per our data retention policy""" responses_to_update = set() if cutoff is None: # Default to wiping out 180 days ago which is roughly 6 months. cutoff = datetime.now() - timedelta(days=180) # First, ResponseEmail. objs = ResponseEmail.objects.filter(opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() msg = 'feedback_responseemail: %d, ' % (count, ) # Second, ResponseContext. objs = ResponseContext.objects.filter(opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() msg += 'feedback_responsecontext: %d, ' % (count, ) # Third, ResponsePI. objs = ResponsePI.objects.filter( opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() msg += 'feedback_responsepi: %d' % (count, ) j_info(app='feedback', src='purge_data', action='purge_data', msg=msg) if responses_to_update: index_chunk(ResponseMappingType, list(responses_to_update))
def purge_data(): """Purges feedback data * ResponseEmail >= 180 days * ResponseContext >= 180 days * ResponsePI >= 180 days """ cutoff = datetime.now() - timedelta(days=180) responses_to_update = set() # First, ResponseEmail. objs = ResponseEmail.objects.filter(opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() msg = 'feedback_responseemail: %d, ' % (count, ) # Second, ResponseContext. objs = ResponseContext.objects.filter(opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() msg += 'feedback_responsecontext: %d, ' % (count, ) # Third, ResponsePI. objs = ResponsePI.objects.filter(opinion__created__lte=cutoff) responses_to_update.update(objs.values_list('opinion_id', flat=True)) count = objs.count() objs.delete() msg += 'feedback_responsepi: %d' % (count, ) if responses_to_update: index_chunk(ResponseDocType, list(responses_to_update)) return msg