def sync_status(limit=100000): # Stream through all samples. sample_count = Sample.objects.count() for index in range(0, sample_count, limit): vals = Sample.objects.order_by('id', '-statuses__status_code').distinct('id').values_list('id', 'status_code', 'statuses__id', 'statuses__status_code')[index:index+limit] # Pull all mismatching values. ids = [] status_codes = [] # status_ids = [] for sample_id, status_code, status_id, latest_status_code in vals: if status_code != latest_status_code: ids.append(sample_id) status_codes.append(latest_status_code) # status_ids.append(status_id) # Sync using a bulk update. if ids: pg_bulk_update(Sample, 'id', 'status_code', list(ids), list(status_codes)) # pg_bulk_update(Sample, 'id', 'status_id', list(ids), list(status_ids)) print 'Synced %s out of %s samples at %s'%(len(ids), limit, index)
def make_fake_data(samples_to_make=1000, batch_threshold=1000, delete_existing=True, make_statuses=True, years=1): """Makes mock data for testing performance. Optionally, resets db. """ if delete_existing: Sample.objects.all().delete() print "Deleted existing" # Make up a set of offset = samples_to_make-samples_to_make/52/years # Create all the samples. samples = [] for barcode in range(samples_to_make): sample = Sample() sample.barcode = str(barcode) sample.created = now() sample.production = True samples.append(sample) if len(samples) >= batch_threshold: Sample.objects.bulk_create(samples) del samples[:] print "Made %s samples."%Sample.objects.count() if samples: Sample.objects.bulk_create(samples) print "Finished making %s samples."%Sample.objects.count() if not make_statuses: return # Pull all ids for samples. sample_ids = Sample.objects.values_list('id', flat=True) # Create all the statuses. offset = len(sample_ids)-len(sample_ids)/52/years statuses = [] for sample in sample_ids[:offset]: statuses.append(SampleStatus(sample_id=sample, status_code=SampleStatus.RECEIVED, created=now())) statuses.append(SampleStatus(sample_id=sample, status_code=SampleStatus.LAB, created=now())) statuses.append(SampleStatus(sample_id=sample, status_code=SampleStatus.COMPLETE, created=now())) if len(statuses) >= batch_threshold: SampleStatus.objects.bulk_create(statuses) del statuses[:] for sample in sample_ids[offset:]: statuses.append(SampleStatus(sample_id=sample, status_code=SampleStatus.RECEIVED, created=now())) statuses.append(SampleStatus(sample_id=sample, status_code=SampleStatus.LAB, created=now())) if len(statuses) >= batch_threshold: SampleStatus.objects.bulk_create(statuses) del statuses[:] print "Made %s statuses."%SampleStatus.objects.count() if statuses: SampleStatus.objects.bulk_create(statuses) print "Finished making %s statuses."%SampleStatus.objects.count() # Set all the statuses to lab. vals = (Sample.objects .filter(statuses__status_code=SampleStatus.LAB) .values_list('id', 'statuses__id')) sample_ids, sample_status_ids = zip(*vals) pg_bulk_update(Sample, 'id', 'latest_status', list(sample_ids), list(sample_status_ids)) # Set all the statuses to completed. vals = (Sample.objects .filter(statuses__status_code=SampleStatus.COMPLETE) .values_list('id', 'statuses__id')) sample_ids, sample_status_ids = zip(*vals) pg_bulk_update(Sample, 'id', 'latest_status', list(sample_ids), list(sample_status_ids))