def explore_data_context(request, pk): try: data = SuppliedData.objects.get(pk=pk) except (SuppliedData.DoesNotExist, ValueError ): # Catches primary key does not exist and badly formed UUID return {}, None, render( request, 'error.html', { 'sub_title': _('Sorry, the page you are looking for is not available'), 'link': 'index', 'link_text': _('Go to Home page'), 'msg': _("We don't seem to be able to find the data you requested.") }, status=404) try: data.original_file.file.name except FileNotFoundError: return {}, None, render( request, 'error.html', { 'sub_title': _('Sorry, the page you are looking for is not available'), 'link': 'index', 'link_text': _('Go to Home page'), 'msg': _('The data you were hoping to explore no longer exists.\n\nThis is because all ' 'data suplied to this website is automatically deleted after 7 days, and therefore ' 'the analysis of that data is no longer available.') }, status=404) file_type = get_file_type(data.original_file) context = { 'original_file': { 'url': data.original_file.url, 'size': data.original_file.size }, 'file_type': file_type, 'data_uuid': pk, 'current_url': request.build_absolute_uri(), 'source_url': data.source_url, 'form_name': data.form_name, 'created_datetime': data.created.strftime('%A, %d %B %Y %I:%M%p %Z'), 'created_date': data.created.strftime('%A, %d %B %Y'), } return (context, data, None)
def iati_json_output(output_dir, file): context = {} file_type = get_file_type(file) context = {"file_type": file_type} file_type = context['file_type'] if file_type != 'xml': schema_iati = SchemaIATI() context.update( convert_spreadsheet(output_dir, '', file, file_type, schema_iati.activity_schema, xml=True, cache=False)) data_file = context['converted_path'] # sort converted xml sort_iati_xml_file(context['converted_path'], context['converted_path']) else: data_file = file context = context_api_transform( common_checks_context_iati(context, output_dir, data_file, file_type, api=True)) if file_type != 'xml': # Remove unwanted files in the output # TODO: can we do this by no writing the files in the first place? os.remove(os.path.join(output_dir, 'heading_source_map.json')) os.remove(os.path.join(output_dir, 'cell_source_map.json')) if file_type == 'csv': shutil.rmtree(os.path.join(output_dir, 'csv_dir')) ruleset_dir = os.path.join(output_dir, 'ruleset') if os.path.exists(ruleset_dir): shutil.rmtree(ruleset_dir) return context
def ocds_json_output(output_dir, file, schema_version, convert): context = {} file_type = get_file_type(file) context = {"file_type": file_type} if file_type == 'json': with open(file, encoding='utf-8') as fp: try: json_data = json.load(fp) except ValueError: raise APIException('The file looks like invalid json') schema_ocds = SchemaOCDS(schema_version, json_data) if schema_ocds.invalid_version_data: msg = '\033[1;31mThe schema version in your data is not valid. Accepted values: {}\033[1;m' raise APIException( msg.format(str(list(schema_ocds.version_choices.keys())))) if schema_ocds.extensions: schema_ocds.create_extended_release_schema_file(output_dir, "") url = schema_ocds.extended_schema_file or schema_ocds.release_schema_url if convert: context.update( convert_json(output_dir, '', file, schema_url=url, flatten=True, cache=False)) else: metatab_schema_url = SchemaOCDS( select_version='1.1').release_pkg_schema_url metatab_data = get_spreadsheet_meta_data(output_dir, file, metatab_schema_url, file_type=file_type) schema_ocds = SchemaOCDS(schema_version, release_data=metatab_data) if schema_ocds.invalid_version_data: msg = '\033[1;31mThe schema version in your data is not valid. Accepted values: {}\033[1;m' raise APIException( msg.format(str(list(schema_ocds.version_choices.keys())))) if schema_ocds.extensions: schema_ocds.create_extended_release_schema_file(output_dir, '') url = schema_ocds.extended_schema_file or schema_ocds.release_schema_url pkg_url = schema_ocds.release_pkg_schema_url context.update( convert_spreadsheet(output_dir, '', file, file_type, schema_url=url, pkg_schema_url=pkg_url, cache=False)) with open(context['converted_path'], encoding='utf-8') as fp: json_data = json.load(fp) context = context_api_transform( common_checks_ocds(context, output_dir, json_data, schema_ocds, api=True, cache=False)) if file_type == 'xlsx': # Remove unwanted files in the output # TODO: can we do this by no writing the files in the first place? os.remove(os.path.join(output_dir, 'heading_source_map.json')) os.remove(os.path.join(output_dir, 'cell_source_map.json')) return context
def test_get_file_unrecognised_file_type(): with pytest.raises(UnrecognisedFileType): get_file_type(SimpleUploadedFile('test', b'test'))
def test_get_file_type_json_noextension(): assert get_file_type(SimpleUploadedFile('test', b'{}')) == 'json'
def test_get_file_type_json(): assert get_file_type(SimpleUploadedFile('test.json', b'{}')) == 'json'
def test_get_file_type_csv(file_name): assert get_file_type(SimpleUploadedFile(file_name, b'a,b')) == 'csv'
def test_get_file_type_xlsx(file_name): with open(os.path.join('cove', 'fixtures', 'basic.xlsx')) as fp: assert get_file_type(UploadedFile(fp, file_name)) == 'xlsx'