def task_join_catalogs(job_id): """Union of unique rows in two catalogs""" job = JoinCatalogs.objects.get(pk=job_id) job.job_status = 'started' job.save() if default_storage.exists(job.left_table.handle.name): left = csv.DictReader(default_storage.open(job.left_table.handle.name)) if default_storage.exists(job.right_table.handle.name): right = csv.DictReader(default_storage.open(job.right_table.handle.name)) keys = set(left.fieldnames + right.fieldnames) left = [r for r in left] right = [r for r in right] joinedCatalogs, columns = join(left, right, job.fk_field) path = os.path.join(BASE_DIR, MEDIA_ROOT, 'catalogs', '{}.csv'.format(job.results_label)) handler = open(path,'w') handler = csv.DictWriter(handler, fieldnames=keys) handler.writeheader() handler.writerows(joinedCatalogs) job.completed = 1 job.results_handle = 'catalogs/{}.csv'.format(job.results_label) result = Catalog( name=job.results_label, handle='catalogs/{}.csv'.format(job.results_label) ) result.save() job.complete = True job.completed_date = timezone.now() job.job_status = 'complete' job.save() return True
def post(self, request): form = self.form_class(request.POST) if form.is_valid(): job = form.save(commit=False) left = default_storage.open(job.left_table.handle.name) right = default_storage.open(job.right_table.handle.name) left = csv.DictReader(left) right = csv.DictReader(right) keys = left.fieldnames for key in right.fieldnames: if key not in keys: keys.append(key) keys = sorted(keys) left = [r for r in left] right = [r for r in right] union = left + right path = os.path.join(BASE_DIR, MEDIA_ROOT, 'catalogs', '{}.csv'.format(job.results_label)) handler = open(path,'w') handler = csv.DictWriter(handler, fieldnames=keys) handler.writeheader() handler.writerows(union) job.msg_json = union job.job_status = "complete" job.completed = True job.results_handle = 'catalogs/{}.csv'.format(job.results_label) response = HttpResponse() response['content-type'] = 'text/plain' # data['left_table'] = job.left_table.handle.name # data['left_content'] = default_storage.exists(job.left_table.handle.name) data = json.dumps(union, indent=2) result = Catalog( name=job.results_label, json=data, handle='catalogs/{}.csv'.format(job.results_label) ) # result.handle.save('catalogs/joined_csv.csv', union) result.save() response.content = data job.save() return response # return redirect(reverse('jobs:index')) return render(request, self.template_name, {'form': form})