def metadata_conversion(old_sql, new_sql, user, user_email): """ Conversion of an existing metadata file to a newer version Parameters ---------- old_sql: str The path to the file containing the old sql file. new_sql: str The filename of the new sql file. user: str The name of the user for the 'contributions' section user_email: str The email address of the user. Returns ------- """ parser = JSONParser_1_3() metadata = parser.parse_from_file(old_sql) metadata.contributors.append( structure.Contribution( title=user, email=user_email, date=datetime.now, obj=None, comment="Update metadata to v1.3 using metadata conversion tool", )) compiler = JSONCompiler() with open(new_sql) as out_file: out_file.write(compiler.visit(metadata))
def test_roundtrip_with_missing_fields(): json_compiler = JSONCompiler() json_parser = JSONParser_1_4() with open("tests/data/metadata_v14.json", "r") as _input_file: input_string = _input_file.read() for expected_json in __mask(json.loads(input_string), keep=["id", "metaMetadata"]): assert_equal(json_compiler.visit(json_parser.parse(expected_json)), expected_json)
def assert_roundtrip(expected_json, input_string): json_compiler = JSONCompiler() json_parser = JSONParser_1_4() rdf_compiler = RDFCompiler() rdf_p = RDFParser() # Step 1: Parse JSON to internal structure internal_metadata = json_parser.parse_from_string(input_string) # Step 2: Translate to rdf _ = rdf_compiler.visit(internal_metadata) # Step 3: Parse rdf string internal_metadata2 = rdf_p.parse(rdf_compiler.graph) # Step 4: Translate to JSON result_json = json_compiler.visit(internal_metadata2) # Final step: Compare assert_equal(expected_json, result_json, disregard_ordering=True)
def test_roundtrip(): json_compiler = JSONCompiler() json_parser = JSONParser_1_4() rdf_compiler = RDFCompiler() rdf_p = RDFParser() with open("tests/data/metadata_v14.json", "r") as _input_file: input_string = _input_file.read() expected_json = json.loads(input_string) # Step 1: Parse JSON to internal structure internal_metadata = json_parser.parse_from_string(input_string) # Step 2: Translate to rdf _ = rdf_compiler.visit(internal_metadata) # Step 3: Parse rdf string internal_metadata2 = rdf_p.parse(rdf_compiler.graph) # Step 4: Translate to JSON result_json = json_compiler.visit(internal_metadata2) # Final step: Compare assert_equal(expected_json, result_json, disregard_ordering=True)
def post(self, request, schema, table): table_obj = actions._get_table(schema=schema, table=table) raw_input = request.data metadata, error = actions.try_parse_metadata(raw_input) if metadata is not None: compiler = JSONCompiler() table_obj.comment = json.dumps(compiler.visit(metadata)) cursor = actions.load_cursor_from_context(request.data) # Surprisingly, SQLAlchemy does not seem to escape comment strings # properly. Certain strings cause errors database errors. # This MAY be a security issue. Therefore, we do not use # SQLAlchemy's compiler here but do it manually. sql = "COMMENT ON TABLE {schema}.{table} IS %s".format( schema=table_obj.schema, table=table_obj.name) cursor.execute(sql, (table_obj.comment, )) return JsonResponse(raw_input) else: raise APIError(error)
def test_translation_1_3_to_1_4(): parser = JSONParser_1_3() compiler = JSONCompiler() with open("tests/data/metadata_v13_minimal.json", "r") as _input_file: input_string = _input_file.read() # Step 1: Parse JSON to internal structure internal_metadata = parser.parse_from_string(input_string) # Step 2: Translate to version 1_4 result_json = compiler.visit(internal_metadata) expected_json = OrderedDict( json.loads('''{ "metaMetadata": { "metadataVersion": "OEP-1.4.0", "metadataLicense": { "name": "CC0-1.0", "title": "Creative Commons Zero v1.0 Universal", "path": "https://creativecommons.org/publicdomain/zero/1.0/"}}}''') ) assert_equal(expected_json, result_json)
def test_roundtrip(): json_compiler = JSONCompiler() json_parser = JSONParser_1_4() json_renderer = JSONRenderer() rdf_compiler = RDFCompiler() rdf_p = RDFParser() with open("tests/data/metadata_v14.ttl", "r") as _input_file: input_string = _input_file.read() expected_graph = Graph() expected_graph.parse(data=input_string, format="ttl") # Step 1: Parse Turtle to internal structure internal_metadata = rdf_p.parse_from_string(input_string) # Step 2: Translate to json string json_metadata = json_renderer.render(json_compiler.visit(internal_metadata)) # Step 3: Parse json string internal_metadata2 = json_parser.parse_from_string(json_metadata) # Step 4: Translate to Turtle _ = rdf_compiler.visit(internal_metadata2) # Final step: Compare for (t1, t2) in _squashed_graphs_triples(expected_graph, rdf_compiler.graph): assert t1 == t2 assert isomorphic(expected_graph, rdf_compiler.graph)
def test_compiler_v1_4(): compiler = JSONCompiler() with open("tests/data/metadata_v14.json", "r") as _input_file: expected_result = json.load(_input_file) result = compiler.visit(metadata_v_1_4) assert_equal(expected_result, result)
def get(self, request, schema, table, row_id=None): schema, table = actions.get_table_name(schema, table, restrict_schemas=False) columns = request.GET.getlist("column") where = request.GET.getlist("where") if row_id and where: raise actions.APIError( "Where clauses and row id are not allowed in the same query") orderby = request.GET.getlist("orderby") if row_id and orderby: raise actions.APIError( "Order by clauses and row id are not allowed in the same query" ) limit = request.GET.get("limit") if row_id and limit: raise actions.APIError( "Limit by clauses and row id are not allowed in the same query" ) offset = request.GET.get("offset") if row_id and offset: raise actions.APIError( "Order by clauses and row id are not allowed in the same query" ) format = request.GET.get("form") if offset is not None and not offset.isdigit(): raise actions.APIError("Offset must be integer") if limit is not None and not limit.isdigit(): raise actions.APIError("Limit must be integer") if not all(parser.is_pg_qual(c) for c in columns): raise actions.APIError("Columns are no postgres qualifiers") if not all(parser.is_pg_qual(c) for c in orderby): raise actions.APIError( "Columns in groupby-clause are no postgres qualifiers") # OPERATORS could be EQUALS, GREATER, LOWER, NOTEQUAL, NOTGREATER, NOTLOWER # CONNECTORS could be AND, OR # If you connect two values with an +, it will convert the + to a space. Whatever. where_clauses = self.__read_where_clause(where) if row_id: clause = { "operands": [{ "type": "column", "column": "id" }, row_id], "operator": "EQUALS", "type": "operator", } if where_clauses: where_clauses = conjunction(clause, where_clauses) else: where_clauses = clause # TODO: Validate where_clauses. Should not be vulnerable data = { "schema": schema, "table": table, "columns": columns, "where": where_clauses, "orderby": orderby, "limit": limit, "offset": offset, } return_obj = self.__get_rows(request, data) session = sessions.load_session_from_context( return_obj.pop("context")) if "context" in return_obj else None # Extract column names from description if "description" in return_obj: cols = [col[0] for col in return_obj["description"]] else: cols = [] return_obj["data"] = [] return_obj["rowcount"] = 0 if format == "csv": pseudo_buffer = Echo() writer = csv.writer(pseudo_buffer, quoting=csv.QUOTE_ALL) response = OEPStream( (writer.writerow(x) for x in itertools.chain([cols], return_obj["data"])), content_type="text/csv", session=session, ) response[ "Content-Disposition"] = 'attachment; filename="{schema}__{table}.csv"'.format( schema=schema, table=table) return response elif format == "datapackage": pseudo_buffer = Echo() writer = csv.writer(pseudo_buffer, quoting=csv.QUOTE_ALL) zf = zipstream.ZipFile(mode='w', compression=zipstream.ZIP_DEFLATED) csv_name = "{schema}__{table}.csv".format(schema=schema, table=table) zf.write_iter( csv_name, (writer.writerow(x).encode("utf-8") for x in itertools.chain([cols], return_obj["data"]))) table_obj = actions._get_table(schema=schema, table=table) if table_obj.comment: zf.writestr("datapackage.json", table_obj.comment.encode("utf-8")) else: zf.writestr( "datapackage.json", json.dumps(JSONCompiler().visit( OEPMetadata())).encode("utf-8")) response = OEPStream( (chunk for chunk in zf), content_type="application/zip", session=session, ) response[ "Content-Disposition"] = 'attachment; filename="{schema}__{table}.zip"'.format( schema=schema, table=table) return response else: if row_id: dict_list = [ dict(zip(cols, row)) for row in return_obj["data"] ] if dict_list: dict_list = dict_list[0] else: raise Http404 # TODO: Figure out what JsonResponse does different. return JsonResponse(dict_list, safe=False) return stream((dict(zip(cols, row)) for row in return_obj["data"]), session=session)