def test_write_big_csv(self): out = StringIO() f = resource_filename(__name__, "Wikidata-20131129161111.xml.gz") xml = XmlReader.read_xml(gzip.open(f)) CsvWriter.write_csv(xml, out) out.seek(0) self.assertThat(len(out.readlines()), Equals(3679))
def test_write_big_csv(self): out = StringIO() f = resource_filename(__name__, "Wikidata-20131129161111.xml.gz") xml = XmlReader.read_xml(gzip.open(f)) CsvWriter.write_csv(xml, out) out.seek(0) self.assertThat(len(out.readlines()), Equals(5627))
def test_write_csv(self): out = StringIO() CsvWriter.write_csv(test_data, out) out.seek(0) line = out.readline() self.assertThat(line.strip(), Equals("Q51,31,wikibase-entityid,Q5107")) line = out.readline() self.assertThat(line.strip(), Equals("Q51,373,string,Europe")) self.assertThat(out.read(), Equals(""))
def test_write_csv(self): out = StringIO() CsvWriter.write_csv(test_data, out) out.seek(0) line = out.readline() assert 'Q51,claim,31,wikibase-entityid,Q5107' == line.strip() line = out.readline() assert 'Q51,claim,373,string,Europe' == line.strip() line = out.readline() assert 'Q51,qualifier,1,string,qual' == line.strip() line = out.readline() assert 'Q51,reference,2,string,ref' == line.strip() assert '' == out.read()
import logging import argparse import sys import time from propertysuggester.parser import JsonReader, CsvWriter from propertysuggester.utils.CompressedFileType import CompressedFileType if __name__ == "__main__": logging.basicConfig(level=20) # Print logging.info parser = argparse.ArgumentParser( description="this program converts wikidata JSON dumps to CSV data.") parser.add_argument("input", help="The JSON input file (a wikidata dump)", type=CompressedFileType('r')) parser.add_argument("output", help="The CSV output file (default=sys.stdout)", default=sys.stdout, nargs='?', type=CompressedFileType('wb')) args = parser.parse_args() start = time.time() CsvWriter.write_csv(JsonReader.read_json(args.input), args.output) logging.info("total time: %.2fs" % (time.time() - start))