Exemple #1
0
def parse_xml(ratings):
    '''parses ratings from an XML string as formatted by fide.com'''
    profile = '''
    playerslist
      player
        fideid = dataset:players
        name = dataset:players
        country = dataset:players
        sex = dataset:players
        title = dataset:players
        w_title = dataset:players
        o_title = dataset:players
        foa_title = dataset:players
        rating = dataset:players
        games = dataset:players
        k = dataset:players
        rapid_rating = dataset:players
        rapid_games = dataset:players
        rapid_k = dataset:players
        blitz_rating = dataset:players
        blitz_games = dataset:players
        blitz_k = dataset:players
        birthday = dataset:players
        flag = dataset:players
    '''
    records = xmldataset.parse_using_profile(ratings, profile)
    df = pd.DataFrame.from_records(records['players'])
    logger.info(f'parsed {len(df)} ratings from XML')
    return df
Exemple #2
0
           <publish_date>2000-11-17</publish_date>
           <description>After the collapse of a nanotechnology
           society in England, the young survivors lay the
           foundation for a new society.</description>
        </book>
        <book id="bk104">
           <author>Corets, Eva</author>
           <title>Oberon's Legacy</title>
           <genre>Fantasy</genre>
           <price>5.95</price>
           <publish_date>2001-03-10</publish_date>
           <description>In post-apocalypse England, the mysterious
           agent known only as Oberon helps to create a new life
           for the inhabitants of London. Sequel to Maeve
           Ascendant.</description>
        </book>
     </shop>
  </catalog>"""

profile = """
catalog
    shop
        book
            author = dataset:title_and_author
            title  = dataset:title_and_author dataset:title_and_genre
            genre  = dataset:title_and_genre"""

# Pretty Print the output
output = xmldataset.parse_using_profile(xml, profile)
pp(output)
Exemple #3
0
           <url>http://minwook-shin.github.io/python-html-sanitization-text-linkification-using-bleach/</url>
           <publish_date>2019-05-22T00:00:00+00:00</publish_date>
        </article>
        <article id="https://minwook-shin.github.io/">
           <author>minwook-shin</author>
           <url>http://minwook-shin.github.io/python-iterating-searching-modifying-html-using-beautifulsoup/</url>
           <publish_date>2019-05-21T00:00:00+00:00</publish_date>
        </article>
     </blog>
  </site>"""

profile = """
site
    blog
        number     = external_dataset:blog_information
        article
            id     = dataset:blog_article,prefix:blog_article_
            author = dataset:blog_article,prefix:blog_article_
            url    = dataset:blog_article,prefix:blog_article_
            publish_date  = dataset:blog_article,name:date,prefix:blog_article_
            __EXTERNAL_VALUE__ = blog_information:number:blog_article"""

result = xmldataset.parse_using_profile(xml, profile)

import pprint

pp = pprint.PrettyPrinter(indent=4).pprint

pp(result)

print(type(result))
         detail, with attention to XML DOM interfaces, XSLT processing,
         SAX and more.</description>
      </book>
      <book id="bk112">
         <author>Galos, Mike</author>
         <title>Visual Studio 7: A Comprehensive Guide</title>
         <genre>Computer</genre>
         <price>49.95</price>
         <publish_date>2001-04-16</publish_date>
         <description>Microsoft Visual Studio 7 is explored in depth,
         looking at how Visual Basic, Visual C++, C#, and ASP+ are
         integrated into a comprehensive development
         environment.</description>
      </book>
      <specificafter>
         <specificvalue>123</specificvalue>
      </specificafter>
   </lowest>
</catalog>"""

# ------------------------------------------------------------------------------
#    Setup Pretty Printing
# ------------------------------------------------------------------------------
ppsetup = pprint.PrettyPrinter(indent=4)
pp = ppsetup.pprint

# ------------------------------------------------------------------------------
#    Call parse_using_profile
# ------------------------------------------------------------------------------
print(parse_using_profile(xml, profile))
Exemple #5
0
        </book>
        <book id="bk104">
           <author>Corets, Eva</author>
           <title>Oberon's Legacy</title>
           <genre>Fantasy</genre>
           <price>5.95</price>
           <publish_date>2001-03-10</publish_date>
           <description>In post-apocalypse England, the mysterious
           agent known only as Oberon helps to create a new life
           for the inhabitants of London. Sequel to Maeve
           Ascendant.</description>
        </book>
     </shop>
  </catalog>"""

profile="""
catalog
    shop
        number     = external_dataset:shop_information
        book
            __NEW_DATASET__ = title_and_author title_and_genre
            id     = dataset:title_and_author dataset:title_and_genre
            author = dataset:title_and_author
            title  = dataset:title_and_author dataset:title_and_genre
            genre  = dataset:title_and_genre,name:style
            __EXTERNAL_VALUE__ = shop_information:number:title_and_author shop_information:number:title_and_genre"""

# Pretty Print the output
output = xmldataset.parse_using_profile(xml,profile)
pp(output)
Exemple #6
0
           Ascendant.</description>
        </book>
     </shop>
  </catalog>"""

profile = """
catalog
    shop
        number     = external_dataset:shop_information
        book
            __NEW_DATASET__ = title_and_author title_and_genre
            id     = dataset:title_and_author dataset:title_and_genre
            author = dataset:title_and_author
            title  = dataset:title_and_author dataset:title_and_genre
            genre  = dataset:title_and_genre,name:style
            __EXTERNAL_VALUE__ = shop_information:number:title_and_author shop_information:number:title_and_genre"""


def print_dataset(value):
    pp(value)


# Pretty Print the output
output = xmldataset.parse_using_profile(
    xml,
    profile,
    dispatch={'__generic__': {
        'counter': 2,
        'coderef': print_dataset
    }})
Exemple #7
0
           <author>Corets, Eva</author>
           <title>Oberon's Legacy</title>
           <genre>Fantasy</genre>
           <price>5.95</price>
           <publish_date>2001-03-10</publish_date>
           <description>In post-apocalypse England, the mysterious
           agent known only as Oberon helps to create a new life
           for the inhabitants of London. Sequel to Maeve
           Ascendant.</description>
        </book>
     </shop>
  </catalog>"""

def to_upper(value):
    return value.upper()

profile="""
catalog
    shop
        number     = external_dataset:shop_information
        book
            id     = dataset:title_and_author dataset:title_and_genre
            author = dataset:title_and_author,process:to_upper
            title  = dataset:title_and_author dataset:title_and_genre
            genre  = dataset:title_and_genre,name:style
            __EXTERNAL_VALUE__ = shop_information:number:title_and_author shop_information:number:title_and_genre"""

# Pretty Print the output
output = xmldataset.parse_using_profile(xml,profile, process = { 'to_upper' : to_upper })
pp(output)
Exemple #8
0
           <description>In post-apocalypse England, the mysterious
           agent known only as Oberon helps to create a new life
           for the inhabitants of London. Sequel to Maeve
           Ascendant.</description>
        </book>
     </shop>
  </catalog>"""

profile="""
catalog
    shop
        number     = external_dataset:shop_information
        book
            __NEW_DATASET__ = title_and_author title_and_genre
            id     = dataset:title_and_author dataset:title_and_genre
            author = dataset:title_and_author
            title  = dataset:title_and_author dataset:title_and_genre
            genre  = dataset:title_and_genre,name:style
            __EXTERNAL_VALUE__ = shop_information:number:title_and_author shop_information:number:title_and_genre"""

def print_dataset(value):
    pp(value)

# Pretty Print the output
output = xmldataset.parse_using_profile(xml,profile, dispatch = { 
        '__generic__' : { 
                'counter' : 2, 
                'coderef' : print_dataset 
        } 
})
        </book>
     </shop>
  </catalog>"""

profile="""
catalog
    shop
        number     = external_dataset:shop_information
        book
            __NEW_DATASET__ = title_and_author title_and_genre
            id     = dataset:title_and_author dataset:title_and_genre
            author = dataset:title_and_author
            title  = dataset:title_and_author dataset:title_and_genre
            genre  = dataset:title_and_genre,name:style
            __EXTERNAL_VALUE__ = shop_information:number:title_and_author shop_information:number:title_and_genre"""

def print_dataset(value):
    pp(value)

# Pretty Print the output
output = xmldataset.parse_using_profile(xml,profile, dispatch = { 
        'title_and_author' : { 
                'counter' : 2, 
                'coderef' : print_dataset 
        }, 
        'title_and_genre' : { 
                'counter' : 3, 
                'coderef' : print_dataset 
        } 
})
raw_shop = pandas.io.sql.read_sql(query_shop, myconn)['data'][0]
raw_quest = pandas.io.sql.read_sql(query_quest, myconn)['data'][0]
myconn.close()

#%% xmldataset setup
profile_shop = """
ShopItemConfigList
    ShopItemConfigData
        id = dataset:ShopItemConfigList
        name = dataset:ShopItemConfigList"""

profile_quest = """
QuestConfigList
    QuestConfigData
        id = dataset:QuestConfigList
        name = dataset:QuestConfigList
        repeatable = dataset:QuestConfigList
        questGiver = dataset:QuestConfigList
        type = dataset:QuestConfigList
        minLevel = dataset:QuestConfigList
        questRegion = dataset:QuestConfigList
        questLevel = dataset:QuestConfigList"""

#%% Parse XML
df_shop = xmldataset.parse_using_profile(raw_shop, profile_shop)
df_shop = DataFrame(df_shop['ShopItemConfigList'])

df_quest = xmldataset.parse_using_profile(raw_quest, profile_quest)
df_quest = DataFrame(df_quest['QuestConfigList'])
Exemple #11
0
profile = """
catalog
    shop
        number     = external_dataset:shop_information
        book
            __NEW_DATASET__ = title_and_author title_and_genre
            id     = dataset:title_and_author dataset:title_and_genre
            author = dataset:title_and_author
            title  = dataset:title_and_author dataset:title_and_genre
            genre  = dataset:title_and_genre,name:style
            __EXTERNAL_VALUE__ = shop_information:number:title_and_author shop_information:number:title_and_genre"""


def print_dataset(value):
    pp(value)


# Pretty Print the output
output = xmldataset.parse_using_profile(xml,
                                        profile,
                                        dispatch={
                                            'title_and_author': {
                                                'counter': 2,
                                                'coderef': print_dataset
                                            },
                                            'title_and_genre': {
                                                'counter': 3,
                                                'coderef': print_dataset
                                            }
                                        })
Exemple #12
0
    def new_prog(self, filex):
        # filex = 'symdev1918'
        filename = filex + '.xml'
        xml_data = open(filename).read()

        profile = """
            SymCLI_ML
                Symmetrix
                    Symm_Info
                        symid = dataset:mydata
                    Device
                        Dev_Info
                            dev_name = dataset:mydata
                            configuration = dataset:mydata
                            device_group = dataset:mydata
                            ld_name = dataset:mydata
                            status = dataset:mydata,name:dev_status
                            attached_bcv = dataset:mydata
                            snapvx_source = dataset:mydata
                            snapvx_target = dataset:mydata
                            thin_pool_name = dataset:mydata
                            SRP_name = dataset:mydata
                        Product
                            wwn = dataset:mydata
                        Flags
                            snap_save_device = dataset:mydata
                            gatekeeper = dataset:mydata
                            meta = dataset:mydata
                        Capacity
                            megabytes = dataset:mydata
                            cylinders = dataset:mydata
                            blocks = dataset:mydata
                            kilobytes = dataset:mydata
                        RDF
                            RDF_Info
                                pair_state = dataset:mydata
                                consistency_state = dataset:mydata
                            Mode
                                mode = dataset:mydata
                                adaptive_copy = dataset:mydata
                                adaptive_copy_write_pending = dataset:mydata
                            Status
                                link = dataset:mydata,name:link_status
                                link_status_change_time = dataset:mydata
                            Local
                                ra_group_num = dataset:mydata,name:local_ra_group_num
                            Remote
                                dev_name = dataset:mydata,name:remote_dev_name 
                                remote_symid = dataset:mydata
                                wwn  = dataset:mydata,name:remote_wwn
                                state = dataset:mydata,name:remote_dev_state
                 """
        output = xmldataset.parse_using_profile(xml_data, profile)
        df = pd.DataFrame.from_records(
            output['mydata'],
            columns=[
                'symid', 'dev_name', 'configuration', 'device_group',
                'ld_name', 'attached_bcv', 'dev_status', 'snapvx_source',
                'snapvx_target', 'thin_pool_name', 'SRP_name', 'vdev_tgt',
                'wwn', 'snap_save_device', 'gatekeeper', 'meta', 'megabytes',
                'cylinders', 'blocks', 'kilobytes', 'pair_state',
                'consistency_state', 'mode', 'adaptive_copy',
                'adaptive_copy_write_pending', 'link_status',
                'link_status_change_time', 'local_ra_group_num',
                'remote_dev_name', 'remote_symid', 'remote_wwn',
                'remote_dev_state', 'filename'
            ])
        # df.drop_duplicates(subset='wwn', keep='first', inplace=True)

        # deleting_nan_values = df.apply(lambda x: pd.Series(x.dropna().values))
        # deleting_nan_values.drop_duplicates(subset='wwn', keep='first', inplace=True)

        manipulating_values = {
            'symid': df['symid'][0],
            'device_group': 'N/A',
            'ld_name': 'N/A',
            'snapvx_source': 'N/A',
            'snapvx_target': 'N/A',
            'vdev_tgt': 'N/A',
            'pair_state': 'N/A',
            'consistency_state': 'N/A',
            'mode': 'N/A',
            'adaptive_copy': 'N/A',
            'adaptive_copy_write_pending': 'N/A',
            'link_status': 'N/A',
            'link_status_change_time': 'N/A',
            'local_ra_group_num': 'N/A',
            'remote_dev_name': 'N/A',
            'remote_symid': 'N/A',
            'remote_wwn': 'N/A',
            'remote_dev_state': 'N/A',
            'filename': filex
        }

        fillsymid = df.fillna(value=manipulating_values)
        engine = create_engine('mysql://*****:*****@localhost/mysql')
        with engine.connect() as conn, conn.begin():
            fillsymid.to_sql('symdev_test',
                             conn,
                             if_exists='append',
                             index=False)
profile_quest = """
QuestConfigList
    QuestConfigData
        id = dataset:QuestConfigList
        name = dataset:QuestConfigList
        repeatable = dataset:QuestConfigList
        questGiver = dataset:QuestConfigList
        type = dataset:QuestConfigList
        minLevel = dataset:QuestConfigList
        questRegion = dataset:QuestConfigList
        questLevel = dataset:QuestConfigList"""


#%% Parse XML
df_shop = xmldataset.parse_using_profile(raw_shop, profile_shop)
df_shop = DataFrame(df_shop['ShopItemConfigList'])

df_quest = xmldataset.parse_using_profile(raw_quest, profile_quest)
df_quest = DataFrame(df_quest['QuestConfigList'])