def run_xml2sql(): print """xml2sql by Kailash Nadh (http://nadh.in) --help for help """ # parse arguments parser = argparse.ArgumentParser(description='Convert an xml file to sql.') parser.add_argument('--input', type=file, dest='input_file', required=True, help='input xml filename') parser.add_argument('--output', dest='output_file', required=True, help='output sql filename') parser.add_argument('--tag', dest='tag', required=True, help='the record tag. eg: item') parser.add_argument('--table', dest='table', required=True, help='table name') parser.add_argument('--ignore', dest='ignore', default='', nargs='+', help='list of tags to ignore') parser.add_argument('--encoding', dest='encoding', default='utf-8', help='character encoding (default=utf-8)') parser.add_argument('--limit', type=int, dest='limit', default=-1, help='maximum number of records to process') parser.add_argument('--packet', type=float, dest='packet', default='8', \ help=r'maximum size of an insert query in MB. \ see MySQL\'s max_allowed_packet (default=8)') args = parser.parse_args() converter = xml2sql(args.input_file, args.output_file, args.encoding) num = converter.convert(tag=args.tag, table=args.table, ignore=args.ignore, limit=args.limit, packet=args.packet) print "\n\nWrote", num['num'], "records to", args.output_file, \ " (INSERT queries =", num['num_insert'], ")"
def run_xml2sql(): print """xml2sql by Kailash Nadh (http://nadh.in) --help for help """ # parse arguments parser = argparse.ArgumentParser(description="Convert an xml file to sql.") parser.add_argument("--input", type=file, dest="input_file", required=True, help="input xml filename") parser.add_argument("--output", dest="output_file", required=True, help="output sql filename") parser.add_argument("--tag", dest="tag", required=True, help="the record tag. eg: item") parser.add_argument("--table", dest="table", required=True, help="table name") parser.add_argument("--ignore", dest="ignore", default="", nargs="+", help="list of tags to ignore") parser.add_argument("--encoding", dest="encoding", default="utf-8", help="character encoding (default=utf-8)") parser.add_argument("--limit", type=int, dest="limit", default=-1, help="maximum number of records to process") parser.add_argument( "--packet", type=float, dest="packet", default="8", help=r"maximum size of an insert query in MB. \ see MySQL\'s max_allowed_packet (default=8)", ) args = parser.parse_args() converter = xml2sql(args.input_file, args.output_file, args.encoding) num = converter.convert(tag=args.tag, table=args.table, ignore=args.ignore, limit=args.limit, packet=args.packet) print "\n\nWrote", num["num"], "records to", args.output_file, " (INSERT queries =", num["num_insert"], ")"
def run_xml2sql(): print """xml2sql by Kailash Nadh (http://nadh.in) --help for help """ # parse arguments parser = argparse.ArgumentParser(description='Convert an xml file to sql.') parser.add_argument('--input', type=file, dest='input_file', required=True, help='input xml filename') parser.add_argument('--output', dest='output_file', required=True, help='output sql filename') parser.add_argument('--tag', dest='tag', required=True, help='the record tag. eg: item') parser.add_argument('--table', dest='table', required=True, help='table name') parser.add_argument('--ignore', dest='ignore', default='', nargs='+', help='list of tags to ignore') parser.add_argument('--encoding', dest='encoding', default='utf-8', help='character encoding (default=utf-8)') parser.add_argument('--limit', type=int, dest='limit', default=-1, help='maximum number of records to process') parser.add_argument('--packet', type=float, dest='packet', default='8', \ help=r'maximum size of an insert query in MB. \ see MySQL\'s max_allowed_packet (default=8)' ) args = parser.parse_args() converter = xml2sql(args.input_file, args.output_file, args.encoding) num = converter.convert(tag=args.tag, table=args.table, ignore=args.ignore, limit=args.limit, packet=args.packet) print "\n\nWrote", num['num'], "records to", args.output_file, \ " (INSERT queries =", num['num_insert'], ")"
# Result: We get 16446 rows, including some duplicates. TIWIS: Per line below, I am doing new tests in draftone-altxml.py to see if I can just make a nice manual importer of some sort. #NEXT TEST is to see whether the FederalSiteIdentifier ATTRIBUTE is a true "site" uid. This will require a different XML module. # Result: _______ #Next test is to keep cycling in fields to see whether solely non-numeric fields are ignored. #(I have tried fields up through and including Classification_Name so far. #(If we can even get a basic numeric site identifier field, we may be able to scrape other data manually from the XML using other libraries.) ignorestring = "ReportingOrganization PlannedCompletionDateStep7 PlannedCompletionDateStep8 PlannedCompletionDateStep9 " for tag in fulltaglist: try: index = tagstokeep.index(tag) print "using "+tag except ValueError, e: print "discarding "+tag ignorestring += tag+" " ignorestring = ignorestring.strip() siteconverter = xml2sql("fcsi-rscf.xml", "sites.sql") siteconverter.convert(table="sites",tag="Site",ignore=ignorestring) #In converting sites to a .sql dump, we'll want to ignore most tags, since they create all sorts of probles. #I'll then need to break up that dump into individual transactions. #(30,000+ inserts in a single transaction is a bad idea in SQLite. The "lite" is there for a reason. #Now let's create a very crude sqlite database (mainly using TEXT types for now) and import the smaller table. #conn = sqlite3.connect('crude.db') #c = conn.cursor() #c.execute('''CREATE TABLE IF NOT EXISTS organizations # (Code TEXT, EN TEXT, FR TEXT)''') #c.execute('DELETE FROM organizations')