def parse_xml(ratings): '''parses ratings from an XML string as formatted by fide.com''' profile = ''' playerslist player fideid = dataset:players name = dataset:players country = dataset:players sex = dataset:players title = dataset:players w_title = dataset:players o_title = dataset:players foa_title = dataset:players rating = dataset:players games = dataset:players k = dataset:players rapid_rating = dataset:players rapid_games = dataset:players rapid_k = dataset:players blitz_rating = dataset:players blitz_games = dataset:players blitz_k = dataset:players birthday = dataset:players flag = dataset:players ''' records = xmldataset.parse_using_profile(ratings, profile) df = pd.DataFrame.from_records(records['players']) logger.info(f'parsed {len(df)} ratings from XML') return df
<publish_date>2000-11-17</publish_date> <description>After the collapse of a nanotechnology society in England, the young survivors lay the foundation for a new society.</description> </book> <book id="bk104"> <author>Corets, Eva</author> <title>Oberon's Legacy</title> <genre>Fantasy</genre> <price>5.95</price> <publish_date>2001-03-10</publish_date> <description>In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant.</description> </book> </shop> </catalog>""" profile = """ catalog shop book author = dataset:title_and_author title = dataset:title_and_author dataset:title_and_genre genre = dataset:title_and_genre""" # Pretty Print the output output = xmldataset.parse_using_profile(xml, profile) pp(output)
<url>http://minwook-shin.github.io/python-html-sanitization-text-linkification-using-bleach/</url> <publish_date>2019-05-22T00:00:00+00:00</publish_date> </article> <article id="https://minwook-shin.github.io/"> <author>minwook-shin</author> <url>http://minwook-shin.github.io/python-iterating-searching-modifying-html-using-beautifulsoup/</url> <publish_date>2019-05-21T00:00:00+00:00</publish_date> </article> </blog> </site>""" profile = """ site blog number = external_dataset:blog_information article id = dataset:blog_article,prefix:blog_article_ author = dataset:blog_article,prefix:blog_article_ url = dataset:blog_article,prefix:blog_article_ publish_date = dataset:blog_article,name:date,prefix:blog_article_ __EXTERNAL_VALUE__ = blog_information:number:blog_article""" result = xmldataset.parse_using_profile(xml, profile) import pprint pp = pprint.PrettyPrinter(indent=4).pprint pp(result) print(type(result))
detail, with attention to XML DOM interfaces, XSLT processing, SAX and more.</description> </book> <book id="bk112"> <author>Galos, Mike</author> <title>Visual Studio 7: A Comprehensive Guide</title> <genre>Computer</genre> <price>49.95</price> <publish_date>2001-04-16</publish_date> <description>Microsoft Visual Studio 7 is explored in depth, looking at how Visual Basic, Visual C++, C#, and ASP+ are integrated into a comprehensive development environment.</description> </book> <specificafter> <specificvalue>123</specificvalue> </specificafter> </lowest> </catalog>""" # ------------------------------------------------------------------------------ # Setup Pretty Printing # ------------------------------------------------------------------------------ ppsetup = pprint.PrettyPrinter(indent=4) pp = ppsetup.pprint # ------------------------------------------------------------------------------ # Call parse_using_profile # ------------------------------------------------------------------------------ print(parse_using_profile(xml, profile))
</book> <book id="bk104"> <author>Corets, Eva</author> <title>Oberon's Legacy</title> <genre>Fantasy</genre> <price>5.95</price> <publish_date>2001-03-10</publish_date> <description>In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant.</description> </book> </shop> </catalog>""" profile=""" catalog shop number = external_dataset:shop_information book __NEW_DATASET__ = title_and_author title_and_genre id = dataset:title_and_author dataset:title_and_genre author = dataset:title_and_author title = dataset:title_and_author dataset:title_and_genre genre = dataset:title_and_genre,name:style __EXTERNAL_VALUE__ = shop_information:number:title_and_author shop_information:number:title_and_genre""" # Pretty Print the output output = xmldataset.parse_using_profile(xml,profile) pp(output)
Ascendant.</description> </book> </shop> </catalog>""" profile = """ catalog shop number = external_dataset:shop_information book __NEW_DATASET__ = title_and_author title_and_genre id = dataset:title_and_author dataset:title_and_genre author = dataset:title_and_author title = dataset:title_and_author dataset:title_and_genre genre = dataset:title_and_genre,name:style __EXTERNAL_VALUE__ = shop_information:number:title_and_author shop_information:number:title_and_genre""" def print_dataset(value): pp(value) # Pretty Print the output output = xmldataset.parse_using_profile( xml, profile, dispatch={'__generic__': { 'counter': 2, 'coderef': print_dataset }})
<author>Corets, Eva</author> <title>Oberon's Legacy</title> <genre>Fantasy</genre> <price>5.95</price> <publish_date>2001-03-10</publish_date> <description>In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant.</description> </book> </shop> </catalog>""" def to_upper(value): return value.upper() profile=""" catalog shop number = external_dataset:shop_information book id = dataset:title_and_author dataset:title_and_genre author = dataset:title_and_author,process:to_upper title = dataset:title_and_author dataset:title_and_genre genre = dataset:title_and_genre,name:style __EXTERNAL_VALUE__ = shop_information:number:title_and_author shop_information:number:title_and_genre""" # Pretty Print the output output = xmldataset.parse_using_profile(xml,profile, process = { 'to_upper' : to_upper }) pp(output)
<description>In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant.</description> </book> </shop> </catalog>""" profile=""" catalog shop number = external_dataset:shop_information book __NEW_DATASET__ = title_and_author title_and_genre id = dataset:title_and_author dataset:title_and_genre author = dataset:title_and_author title = dataset:title_and_author dataset:title_and_genre genre = dataset:title_and_genre,name:style __EXTERNAL_VALUE__ = shop_information:number:title_and_author shop_information:number:title_and_genre""" def print_dataset(value): pp(value) # Pretty Print the output output = xmldataset.parse_using_profile(xml,profile, dispatch = { '__generic__' : { 'counter' : 2, 'coderef' : print_dataset } })
</book> </shop> </catalog>""" profile=""" catalog shop number = external_dataset:shop_information book __NEW_DATASET__ = title_and_author title_and_genre id = dataset:title_and_author dataset:title_and_genre author = dataset:title_and_author title = dataset:title_and_author dataset:title_and_genre genre = dataset:title_and_genre,name:style __EXTERNAL_VALUE__ = shop_information:number:title_and_author shop_information:number:title_and_genre""" def print_dataset(value): pp(value) # Pretty Print the output output = xmldataset.parse_using_profile(xml,profile, dispatch = { 'title_and_author' : { 'counter' : 2, 'coderef' : print_dataset }, 'title_and_genre' : { 'counter' : 3, 'coderef' : print_dataset } })
raw_shop = pandas.io.sql.read_sql(query_shop, myconn)['data'][0] raw_quest = pandas.io.sql.read_sql(query_quest, myconn)['data'][0] myconn.close() #%% xmldataset setup profile_shop = """ ShopItemConfigList ShopItemConfigData id = dataset:ShopItemConfigList name = dataset:ShopItemConfigList""" profile_quest = """ QuestConfigList QuestConfigData id = dataset:QuestConfigList name = dataset:QuestConfigList repeatable = dataset:QuestConfigList questGiver = dataset:QuestConfigList type = dataset:QuestConfigList minLevel = dataset:QuestConfigList questRegion = dataset:QuestConfigList questLevel = dataset:QuestConfigList""" #%% Parse XML df_shop = xmldataset.parse_using_profile(raw_shop, profile_shop) df_shop = DataFrame(df_shop['ShopItemConfigList']) df_quest = xmldataset.parse_using_profile(raw_quest, profile_quest) df_quest = DataFrame(df_quest['QuestConfigList'])
profile = """ catalog shop number = external_dataset:shop_information book __NEW_DATASET__ = title_and_author title_and_genre id = dataset:title_and_author dataset:title_and_genre author = dataset:title_and_author title = dataset:title_and_author dataset:title_and_genre genre = dataset:title_and_genre,name:style __EXTERNAL_VALUE__ = shop_information:number:title_and_author shop_information:number:title_and_genre""" def print_dataset(value): pp(value) # Pretty Print the output output = xmldataset.parse_using_profile(xml, profile, dispatch={ 'title_and_author': { 'counter': 2, 'coderef': print_dataset }, 'title_and_genre': { 'counter': 3, 'coderef': print_dataset } })
def new_prog(self, filex): # filex = 'symdev1918' filename = filex + '.xml' xml_data = open(filename).read() profile = """ SymCLI_ML Symmetrix Symm_Info symid = dataset:mydata Device Dev_Info dev_name = dataset:mydata configuration = dataset:mydata device_group = dataset:mydata ld_name = dataset:mydata status = dataset:mydata,name:dev_status attached_bcv = dataset:mydata snapvx_source = dataset:mydata snapvx_target = dataset:mydata thin_pool_name = dataset:mydata SRP_name = dataset:mydata Product wwn = dataset:mydata Flags snap_save_device = dataset:mydata gatekeeper = dataset:mydata meta = dataset:mydata Capacity megabytes = dataset:mydata cylinders = dataset:mydata blocks = dataset:mydata kilobytes = dataset:mydata RDF RDF_Info pair_state = dataset:mydata consistency_state = dataset:mydata Mode mode = dataset:mydata adaptive_copy = dataset:mydata adaptive_copy_write_pending = dataset:mydata Status link = dataset:mydata,name:link_status link_status_change_time = dataset:mydata Local ra_group_num = dataset:mydata,name:local_ra_group_num Remote dev_name = dataset:mydata,name:remote_dev_name remote_symid = dataset:mydata wwn = dataset:mydata,name:remote_wwn state = dataset:mydata,name:remote_dev_state """ output = xmldataset.parse_using_profile(xml_data, profile) df = pd.DataFrame.from_records( output['mydata'], columns=[ 'symid', 'dev_name', 'configuration', 'device_group', 'ld_name', 'attached_bcv', 'dev_status', 'snapvx_source', 'snapvx_target', 'thin_pool_name', 'SRP_name', 'vdev_tgt', 'wwn', 'snap_save_device', 'gatekeeper', 'meta', 'megabytes', 'cylinders', 'blocks', 'kilobytes', 'pair_state', 'consistency_state', 'mode', 'adaptive_copy', 'adaptive_copy_write_pending', 'link_status', 'link_status_change_time', 'local_ra_group_num', 'remote_dev_name', 'remote_symid', 'remote_wwn', 'remote_dev_state', 'filename' ]) # df.drop_duplicates(subset='wwn', keep='first', inplace=True) # deleting_nan_values = df.apply(lambda x: pd.Series(x.dropna().values)) # deleting_nan_values.drop_duplicates(subset='wwn', keep='first', inplace=True) manipulating_values = { 'symid': df['symid'][0], 'device_group': 'N/A', 'ld_name': 'N/A', 'snapvx_source': 'N/A', 'snapvx_target': 'N/A', 'vdev_tgt': 'N/A', 'pair_state': 'N/A', 'consistency_state': 'N/A', 'mode': 'N/A', 'adaptive_copy': 'N/A', 'adaptive_copy_write_pending': 'N/A', 'link_status': 'N/A', 'link_status_change_time': 'N/A', 'local_ra_group_num': 'N/A', 'remote_dev_name': 'N/A', 'remote_symid': 'N/A', 'remote_wwn': 'N/A', 'remote_dev_state': 'N/A', 'filename': filex } fillsymid = df.fillna(value=manipulating_values) engine = create_engine('mysql://*****:*****@localhost/mysql') with engine.connect() as conn, conn.begin(): fillsymid.to_sql('symdev_test', conn, if_exists='append', index=False)
profile_quest = """ QuestConfigList QuestConfigData id = dataset:QuestConfigList name = dataset:QuestConfigList repeatable = dataset:QuestConfigList questGiver = dataset:QuestConfigList type = dataset:QuestConfigList minLevel = dataset:QuestConfigList questRegion = dataset:QuestConfigList questLevel = dataset:QuestConfigList""" #%% Parse XML df_shop = xmldataset.parse_using_profile(raw_shop, profile_shop) df_shop = DataFrame(df_shop['ShopItemConfigList']) df_quest = xmldataset.parse_using_profile(raw_quest, profile_quest) df_quest = DataFrame(df_quest['QuestConfigList'])