Python jumpTo Beispiele, libhtml.jumpTo Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: extract-equipment-weapons.py Projekt: deurk/pathfinderfr-data

                l['Source'] = source
            found = True
    #if not found:
    #    print("- une description existe pour '" + name + "' mais pas le sommaire!");


for URLDET in URLDETS:

    print(URLDET)

    if MOCK_WD:
        content = BeautifulSoup(open(MOCK_WD),features="lxml").body
    else:
        content = BeautifulSoup(urllib.request.urlopen(URLDET).read(),features="lxml").body

    section = jumpTo(content, 'h1',{'class':'separator'}, u"À mains nues")
    if not section: 
        section = jumpTo(content, 'h1',{'class':'separator'}, u"Description individuelle des armes orientales")
        
    newObj = True
    name = ""
    descr = ""
    source = None
    sourceNext = None
    for s in section:
        if s.name == 'div':
            for e in s.children:
                if e.name == 'h2' or e.name == 'b':
                    if not newObj:
                        addInfos(liste, name, sourceNext)

Beispiel #2

0

Datei anzeigen

Datei: extract-equipment-armors.py Projekt: deurk/pathfinderfr-data

            l['Description'] = descr.strip()
            if not source is None:
                l['Source'] = source
            found = True
    if not found:
        print("- une description existe pour '" + name +
              "' mais pas le sommaire!")


if MOCK_WD:
    content = BeautifulSoup(open(MOCK_WD), features="lxml").body
else:
    content = BeautifulSoup(urllib.request.urlopen(URLDET).read(),
                            features="lxml").body

section = jumpTo(content, 'h1', {'class': 'separator'}, "Armures classiques")

newObj = True
name = ""
descr = ""
source = None
sourceNext = None
for s in section:
    if s.name == 'div':
        for e in s.children:
            if e.name == 'h2' or e.name == 'b':
                if not newObj:
                    addInfos(liste, name, sourceNext)
                sourceNext = source

                if e.name == 'h2':

Beispiel #3

0

Datei anzeigen

Datei: extract-classfeatures-astuces.py Projekt: deurk/pathfinderfr-data

    'Nom', 'Classe', 'Archétype', 'Prérequis', 'Source', 'Niveau', 'Auto',
    'Description', 'Référence'
]
MATCH = ['Nom', 'Classe', 'Archétype']

liste = []

print("Extraction des aptitude (astuces)...")

if MOCK_ASTUCE:
    content = BeautifulSoup(open(MOCK_ASTUCE), features="lxml").body
else:
    content = BeautifulSoup(urllib.request.urlopen(URL).read(),
                            features="lxml").body

section = jumpTo(content, 'h2', {'class': 'separator'},
                 "Description des astuces de ninja")

LVL = 2
astuce = {'Niveau': LVL}
newObj = False
descr = ""
source = 'AG'
for s in section:
    if s.name == 'h2' and "Description des astuces de maître" in s.text:
        LVL = 10
    elif s.name == "table":
        for td in s.find_all('td'):
            for el in td.children:
                if el.name == "h3":
                    nom = cleanSectionName(el.text)
                    reference = URL + el.find_next("a")['href']

Beispiel #4

0

Datei anzeigen

        # add infos to existing weapong in list
        found = False
        for l in liste:
            for n in names:
                if l['Nom'].lower() == n.lower() or l['Nom'].lower(
                ).startswith(n.lower()):
                    l['Complete'] = True
                    l['Description'] = descr.strip()
                    if not source is None:
                        l['Source'] = source
                    found = True
        if not found:
            print("- une description existe pour '" + name +
                  "' mais pas le sommaire!")

    section = jumpTo(content, 'h2', {'class': 'separator'}, u"Descriptions")
    if not section:
        section = jumpTo(content, 'h2', {'class': 'separator'},
                         data["category"])
    if not section:
        print("No descriptions found for %s" % data["category"])
        exit(1)

    newObj = True
    name = ""
    descr = ""
    source = None
    sourceNext = None
    for e in section:
        if e.name == 'h3':
            if not newObj:

Beispiel #5

0

Datei anzeigen

FIELDS = ['Nom', 'Classe', 'Archétype', 'Prérequis', 'Source', 'Niveau', 'Auto', 'Description', 'Référence' ]
MATCH = ['Nom', 'Classe', 'Archétype']

liste = []

print("Extraction des aptitude (exploitations)...")


source = 'MCA'

if MOCK_EXPLOITATION:
    content = BeautifulSoup(open(MOCK_EXPLOITATION),features="lxml").body
else:
    content = BeautifulSoup(urllib.request.urlopen(URL).read(),features="lxml").body

section = jumpTo(content, 'h2',{'class':'separator'}, u"Exploitations d'arcaniste")

level = 0
for s in section:
 
    if s.name == 'div' and s.has_attr('class') and "article_2col" in s['class']:
        level = 2 if level == 0 else 11

        exploitation = {'Source':source,'Niveau':level}
        newObj = False
        brCount = 0
        descr = ""
        for e in s.children:
            if e.name == 'h3':
                if newObj:
                    exploitation['Classe'] = 'Arcaniste'

Beispiel #6

0

Datei anzeigen

#MOCK_MALEFICE = "mocks/malefices.html"       # décommenter pour tester avec les maléfices pré-téléchargées

URL = "http://www.pathfinder-fr.org/Wiki/Pathfinder-RPG.mal%c3%a9fices.ashx"
FIELDS = ['Nom', 'Classe', 'Archétype', 'Prérequis', 'Source', 'Niveau', 'Auto', 'Description', 'Référence' ]
MATCH = ['Nom', 'Classe', 'Archétype']

liste = []

print("Extraction des aptitude (maléfices)...")

if MOCK_MALEFICE:
    content = BeautifulSoup(open(MOCK_MALEFICE),features="lxml").body
else:
    content = BeautifulSoup(urllib.request.urlopen(URL).read(),features="lxml").body

section = jumpTo(content, 'h2',{'class':'separator'}, u"Maléfices")

LVL = 1
malefice = {'Niveau':LVL}
newObj = False
descr = ""
source = 'MJRA'
for s in section:
    if s.name == 'h2' and "Maléfices majeurs" in s.text:
        LVL = 10
    elif s.name == 'h2' and "Grands maléfices" in s.text:
        LVL = 18
    elif s.name == "table":
        for td in s.find_all('td'):
            for el in td.children:
                if el.name == "h3":

Beispiel #7

0

Datei anzeigen

            if el.name == "a" or el.name == "div":
              src = extractSource(el)
              if src:
                source = src
    
    benediction = {}
    benediction['Nom'] = name
    benediction['Classe'] = 'Oracle'
    benediction['Niveau'] = 1
    benediction['Auto'] = False
    benediction['Description'] = cleanDescription(description)
    benediction['Source'] = source
    benediction['Référence'] = u
    liste.append(benediction)

    section = jumpTo(content, 'h2', {'class':'separator'}, 'Révélations')
    
    if not section:
        print('Aucune section "Révélations" trouvée!');
        exit(1)
        
    descr = ""
    for el in section:
    
        if el.name == "b":
            benedictionName = cleanLabel(el.text)
            benediction = {}
            benediction['Nom'] = name + ": " + benedictionName
            benediction['Classe'] = 'Oracle'
            benediction['Niveau'] = 1
            benediction['Auto'] = False

Beispiel #8

0

Datei anzeigen

    'Nom', 'Classe', 'Archétype', 'Prérequis', 'Source', 'Niveau', 'Auto',
    'Description', 'Référence'
]
MATCH = ['Nom', 'Classe', 'Archétype']

liste = []

print("Extraction des aptitude (talents)...")

if MOCK_TALENT:
    content = BeautifulSoup(open(MOCK_TALENT), features="lxml").body
else:
    content = BeautifulSoup(urllib.request.urlopen(URL).read(),
                            features="lxml").body

section = jumpTo(content, 'h2', {'class': 'separator'},
                 u"Description des talents de roublard")

level = 0
for s in section:

    if s.name == 'div' and s.has_attr(
            'class') and "article_2col" in s['class']:
        level = 2 if level == 0 else 10

        talent = {'Source': 'MJ', 'Niveau': level}
        newObj = False
        brCount = 0
        descr = ""
        for e in s.children:
            if e.name == 'h3':
                if newObj:

Beispiel #9

0

Datei anzeigen

            exit(1)
    return "MJ"


liste = []
listePouvoirs = []

print("Extraction des aptitude (lignages)...")

if MOCK_LIGNAGE:
    content = BeautifulSoup(open(MOCK_LIGNAGE), features="lxml").body
else:
    content = BeautifulSoup(urllib.request.urlopen(URL).read(),
                            features="lxml").body

section = jumpTo(content, 'h2', {'class': 'separator'},
                 "Les lignages d'ensorceleurs¶")

for s in section:
    if s.name == "div" and "navmenu" in s.attrs['class']:
        lignages = s.find_all("li")
        for l in lignages:
            link = l.find("a")
            if link is None:
                continue

            lignage = {}
            lignage['Nom'] = "Lignage: " + link.text
            lignage['Classe'] = "Ensorceleur"
            lignage['Source'] = extractSource(l.text)
            lignage['Niveau'] = 1
            lignage['Description'] = ""

Beispiel #10

0

Datei anzeigen

    link = data['Référence']

    if data['Nom'] == "Duergar" or data['Nom'] == "Suli":
        print("Ignore race %s saisie manuellement" % data['Nom'])
        continue

    print("Extraction des traits alternatifs de %s" % data['Nom'])
    pageURL = link

    if MOCK_RACE:
        content = BeautifulSoup(open(MOCK_RACE),features="lxml").body
    else:
        content = BeautifulSoup(urllib.request.urlopen(pageURL).read(),features="lxml").body

    # traits alternatifs
    section = jumpTo(content, 'h2',{'class':'separator'}, u"Traits raciaux alternatifs")
    for s in section:
        if s.name == 'h2':
            if not found:
                print("Aucun trait racial alternatif trouvé!")

            break; # avoid jumping to other sections
        if s.name == 'div' and 'class' in s.attrs and "row" in s.attrs['class']:
            for attr in s.find_all('li'):
                descr = ""
                remplaceText = ""
                modifieText = ""
                for el in attr.children:
                    if el.name == 'b':
                        name = el.text.strip()
                        if name.endswith('.'):

Beispiel #11

0

Datei anzeigen

    'Nom', 'Classe', 'Archétype', 'Prérequis', 'Source', 'Niveau', 'Auto',
    'Description', 'Référence'
]
MATCH = ['Nom', 'Classe', 'Archétype']

liste = []

print("Extraction des aptitude (ordres)...")

if MOCK_ORDRE:
    content = BeautifulSoup(open(MOCK_ORDRE), features="lxml").body
else:
    content = BeautifulSoup(urllib.request.urlopen(URL).read(),
                            features="lxml").body

section = jumpTo(content, 'h2', {'class': 'separator'}, u"Ordres de chevalier")

ordre = {'Niveau': 1}
newObj = False
descr = ""
source = 'MJRA'
for el in section:
    if el.name == "h2":
        break

    if el.name == "h3":
        nom = cleanSectionName(el.text)
        reference = URL + el.find_next("a")['href']

        if newObj:
            ordre['Classe'] = 'Chevalier'

Beispiel #12

0

Datei anzeigen

Datei: extract-conditions.py Projekt: deurk/pathfinderfr-data

URL = "http://www.pathfinder-fr.org/Wiki/Pathfinder-RPG.%c3%89tats%20pr%c3%a9judiciables.ashx"
MOCK_CF = None
#MOCK_CF = "mocks/conditions.html"       # décommenter pour tester avec les conditions pré-téléchargées

FIELDS = ['Nom', 'Source', 'Description', 'Référence']
MATCH = ['Nom']

liste = []

if MOCK_CF:
    content = BeautifulSoup(open(MOCK_CF), features="lxml").body
else:
    content = BeautifulSoup(urllib.request.urlopen(URL).read(),
                            features="lxml").body

section = jumpTo(content, 'h2', {'class': 'separator'},
                 "Liste des états préjudiciables")

SOURCE = "MJ"

condition = {'Source': SOURCE}
newObj = False
advantage = False
descr = ""

for s in section:
    if s.name == 'h2':
        condition['Description'] = descr.strip()
        liste.append(condition)

        # avantages
        SOURCE = "AM"

Beispiel #13

0

Datei anzeigen

    domain['Classe'] = "Prêtre"
    domain['Source'] = "MJ"
    domain['Niveau'] = 1
    domain['Description'] = ""
    domain['Référence'] = "http://www.pathfinder-fr.org/Wiki/" + link["href"]

    print("Traitement: " + link["href"])
    if MOCK_DOMAINE:
        domainHTML = BeautifulSoup(open(MOCK_DOMAINE_PAGE),
                                   features="lxml").body
    else:
        domainHTML = BeautifulSoup(urllib.request.urlopen(
            domain['Référence']).read(),
                                   features="lxml").body

    pouvoirs = jumpTo(domainHTML, 'h2', {'class': 'separator'},
                      "Pouvoirs accordés")
    if pouvoirs is None:
        pouvoirs = jumpTo(domainHTML, 'b', {}, "Pouvoirs accordés")
    if pouvoirs is None:
        print("NOT FOUND!!")
        continue
    for p in pouvoirs:
        if (p.name == 'h2'):
            break
        else:
            domain['Description'] += html2text(p)

    liste.append(domain)

#exit(1)

Beispiel #14

0

Datei anzeigen

Datei: extract-classfeatures-rages.py Projekt: deurk/pathfinderfr-data

    'Nom', 'Classe', 'Archétype', 'Prérequis', 'Source', 'Niveau', 'Auto',
    'Description', 'Référence'
]
MATCH = ['Nom', 'Classe', 'Archétype']

liste = []

print("Extraction des aptitude (rages)...")

if MOCK_RAGE:
    content = BeautifulSoup(open(MOCK_RAGE), features="lxml").body
else:
    content = BeautifulSoup(urllib.request.urlopen(URL).read(),
                            features="lxml").body

section = jumpTo(content, 'h2', {'class': 'separator'},
                 u"Description des pouvoirs de rage")

source = None
sourceNext = None
for s in section:
    if s.name == 'div':
        rage = {'Source': 'MJ', 'Niveau': 1}
        newObj = False
        brCount = 0
        descr = ""
        for e in s.children:
            if e.name == 'h3':
                if newObj:
                    rage['Classe'] = 'Barbare'
                    rage['Description'] = descr.strip()
                    if not sourceNext is None:

Beispiel #15

0

Datei anzeigen

Datei: extract-races.py Projekt: deurk/pathfinderfr-data

    # titre
    name = content.find_next('h1', {'class': 'pagetitle'}).string.strip()
    if name.startswith('Les '):
        name = name[4:-1].title()

    race['Nom'] = data['name']

    # source
    race['Source'] = data['source']

    # référence
    race['Référence'] = link

    # traits
    race['Traits'] = []
    section = jumpTo(content, 'h2', {'class': 'separator'},
                     "Traits raciaux standards")
    for s in section:
        if s.name == 'div' and 'class' in s.attrs and "arrondi" in s.attrs[
                'class']:
            first = True
            for attr in s.find_all('li'):
                trait = {}
                descr = ""
                for el in attr.children:
                    if el.name == 'b':
                        name = el.text.strip()
                        if first:
                            trait['Nom'] = "Caractéristiques"
                            descr = name
                        else:
                            if name.endswith('.'):

Beispiel #16

0

Datei anzeigen

    if not ptsComp:
        print("Points de compétence non-trouvé pour classe: %s" % name)
        exit(1)
    m = re.search('(\d) \\+ modificateur d[\'’]Intelligence', ptsComp)
    if not m:
        print("Points de compétence n'a pas pu être extrait!")
        exit(1)
    cl['RangsParNiveau'] = int(m.group(1))

    # compétences de classe
    cl['CompétencesDeClasse'] = []

    sectionNames = ["Compétences de classe", "Compétences de la classe"]
    section = None
    for s in sectionNames:
        section = jumpTo(content, 'h2', {'class': 'separator'}, s)
        if section:
            break

    if not section:
        print("- Compétences de la classe %s n'a pas être trouvée!!!" %
              cl['Nom'])
        continue

    for s in section:
        if s.name == 'a' and (len(s.text) > 3 or s.text.lower() == "vol"):
            value = s.text
            if value == u"Connaissances":
                idx = s.next_sibling.index(')')
                if idx > 0:
                    value += s.next_sibling[0:idx + 1].lower()