コード例 #1
0
class TestStackExchangeFileReader(unittest.TestCase):

  def setUp(self):
    mock_xml_file = [
      '<?xml version="1.0" encoding="utf-8"?>',
      '<posts>',
      '<row Id="1" PostTypeId="1" AcceptedAnswerId="7" />',
      '<row Id="2" PostTypeId="2" ParentId="1" />'
    ]
    self.reader = StackExchangeFileReader(lines=mock_xml_file, attrib_names=['Id','ParentId'])

  def test_extract(self):
    values = self.reader.next_values()
    value = next(values)
    self.assertIsNone(value)
    value = next(values)
    self.assertIsNone(value)

    value = next(values)
    self.assertEqual(2,len(value))
    self.assertEqual('1',value['Id'])
    self.assertEqual(None,value['ParentId'])

    value = next(values)
    self.assertEqual(2,len(value))
    self.assertEqual('2',value['Id'])
    self.assertEqual('1',value['ParentId'])

    self.assertRaises(StopIteration, next, values)
コード例 #2
0
import sys
import argparse
import csv
from stack_io.extract import StackExchangeFileReader


parser = argparse.ArgumentParser(
  description='Reads specific values from a XML StackExchange dump file from STDIN and writes them in a CVS format')
parser.add_argument('--out', help='output CSV file name') 
parser.add_argument('names', nargs='+', help='one or more attributes to be extracted from the XML.')
args = parser.parse_args()

reader = StackExchangeFileReader(lines=sys.stdin, attrib_names=args.names)
if args.out:
  csvout = open(args.out,'w')
else:
  csvout = sys.stdout

writer = csv.DictWriter(csvout, fieldnames=args.names)
writer.writeheader()

for value in reader.next_values():
  if value:
    writer.writerow(value)