Python get_text Examples, pfsrd2.universal.get_text Python Examples

Example #1

0

Show file

def trait_class_pass(struct, filename):
    parts = filename.split(".")
    parts = parts[:-2]
    fn = ".".join(parts)
    details = parse_universal(fn, max_title=4)
    top = details.pop(0)
    trait_classes = {}
    for section in details:
        assert section['name'].find("Traits") > -1, struct
        name = section['name'].replace("Traits",
                                       "").strip().lower().replace(" ", "_")
        soup = BeautifulSoup(section['text'], "html.parser")
        links = soup.find_all("a")
        for link in links:
            trait = get_text(link)
            trait_classes.setdefault(trait.lower(), []).append(name)
    soup = BeautifulSoup(top, "html.parser")
    links = soup.find_all("a")
    for link in links:
        trait = get_text(link)
        assert trait not in trait_classes, "%s: %s" % (name, trait)
        trait_classes[trait.lower()] = None
    t = find_trait(struct)
    assert t['name'].lower() in trait_classes, t
    c = trait_classes[t['name'].lower()]
    if c:
        t['classes'] = c

Example #2

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

	def parse_spell(html):
		spell = {"type": "stat_block_section", "subtype": "spell"}
		bsh = BeautifulSoup(html, 'html.parser')
		hrefs = bsh.find_all("a")
		links = []
		for a in hrefs:
			_, link = extract_link(a)
			links.append(link)
		spell['links'] = links
		text = get_text(bsh)
		if text.find("(") > -1:
			parts = [t.strip() for t in text.split("(")]
			assert len(parts) == 2, "Failed to parse spell: %s" % (html)
			spell['name'] = parts.pop(0)
			count_text = parts.pop().replace(")", "")
			spell["count_text"] = count_text
			count = None
			for split in [";", ","]:
				remainder = []
				for part in count_text.split(split):
					m = re.match(r"^x\d*$", part.strip())
					if m:
						assert count == None, "Failed to parse spell: %s" % (html)
						count = int(part.strip()[1:])
					else:
						remainder.append(part)
					count_text = split.join(remainder)
			if count:
				spell["count"] = count
		else:
			spell['name'] = text
			spell['count'] = 1
		return spell

Example #3

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

def process_items(section):
	assert section[0] == "Items"
	assert section[2] == None
	parts = rebuilt_split_modifiers(split_stat_block_line(section[1]))
	items = []
	for part in parts:
		text, modifier = extract_modifier(part)
		bs = unwrap_formatting(BeautifulSoup(text, 'html.parser'))
		name = get_text(bs)
		item = {
			'type': 'stat_block_section',
			'subtype': 'item',
			'name': name.strip()}
		if modifier:
			item['modifiers'] = link_modifiers(
				build_objects(
					'stat_block_section', 'modifier', modifier.split(",")))
		links = []
		while bs.a:
		#for a in bs.findAll("a"):
			_, link = extract_link(bs.a)
			links.append(link)
			bs.a.unwrap()
		if len(links) > 0:
			item['links'] = links
		items.append(item)
	return items

Example #4

0

Show file

def trait_cleanup_pass(struct):
    assert 'sections' not in struct, struct  # Right now no traits have other sections
    trait = struct['trait']
    if len(trait['sections']) == 0:
        del trait['sections']
    else:
        assert False, struct
    soup = BeautifulSoup(trait['text'], "html.parser")
    first = list(soup.children)[0]
    if first.name == "i":
        text = get_text(first)
        if text.find("Note from Nethys:") > -1:
            first.clear()
        first.unwrap()
    trait['text'] = str(soup).strip()
    if trait['text'] != "":
        assert 'text' not in struct, struct
        struct['text'] = html2markdown.convert(trait['text'])
    if len(trait.get('sections', [])) > 0:
        assert 'sections' not in struct, struct
        struct['sections'] = trait['sections']
    if trait.get('classes'):
        struct['classes'] = trait['classes']
    if trait.get('links'):
        assert 'links' not in struct, struct
        struct['links'] = trait['links']
    del struct['trait']

Example #5

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

def link_abilities(abilities):
	for a in abilities:
		bs = BeautifulSoup(a['name'], 'html.parser')
		links = get_links(bs)
		if len(links) > 0:
			a['name'] = get_text(bs)
			a['links'] = links
	return abilities

Example #6

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

def link_modifiers(modifiers):
	for m in modifiers:
		bs = BeautifulSoup(m['name'], 'html.parser')
		links = get_links(bs)
		if links:
			m['name'] = get_text(bs)
			m['links'] = links
	return modifiers

Example #7

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

	def parse_offensive_ability(parent_section):
		def _oa_html_reduction(data):
			bs = BeautifulSoup(''.join(data).strip(), 'html.parser')
			if(list(bs.children)[-1].name == 'br'):
				list(bs.children)[-1].unwrap()
			return str(bs)

		text = parent_section['text']
		del parent_section['text']
		section = {
			'type': "stat_block_section", "subtype": "ability",
			'name': parent_section['name'], "ability_type": "offensive"
		}
		if 'action' in parent_section:
			section['action'] = parent_section['action']
			del parent_section['action']
		if 'traits' in parent_section:
			section['traits'] = parent_section['traits']
			del parent_section['traits']
		bs = BeautifulSoup(text, 'html.parser')
		links = get_links(bs)
		if len(links) > 0:
			section['links'] = links
		while bs.a:
			bs.a.unwrap()
		
		children = list(bs)
		addons = {}
		current = None
		parts = []
		addon_names = ["Frequency", "Trigger", "Effect", "Duration",
			"Requirement", "Requirements", "Prerequisite", "Critical Success",
			"Success", "Failure", "Critical Failure", "Range"]		
		if section['name'] == "Planar Incarnation":
			parts = [str(c) for c in children]
		else: 
			while len(children) > 0:
				child = children.pop(0)
				if child.name == 'b':
					current = get_text(child).strip()
					if current == "Requirements":
						current = "Requirement"
					if current == "Prerequisites":
						current = "Prerequisite"
				elif current:
					assert current in addon_names, "%s, %s" % (current, text)
					addon_text = str(child)
					if addon_text.strip().endswith(";"):
						addon_text = addon_text.strip()[:-1]
					addons.setdefault(current.lower().replace(" ", "_"), [])\
						.append(addon_text)
				else:
					parts.append(str(child))
		for k, v in addons.items():
			section[k] = _oa_html_reduction(v)
		if len(parts) > 0:
			section['text'] = _oa_html_reduction(parts)
		parent_section['ability'] = section

Example #8

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

	def parse_affliction(parent_section):
		text = parent_section['text']
		del parent_section['text']
		section = {
			'type': "stat_block_section", "subtype": "affliction",
			'name': parent_section['name']
		}
		if 'action' in parent_section:
			section['action'] = parent_section['action']
			del parent_section['action']
		if 'traits' in parent_section:
			section['traits'] = parent_section['traits']
			del parent_section['traits']
		bs = BeautifulSoup(text, 'html.parser')
		section['links'] = get_links(bs)
		while bs.a:
			bs.a.unwrap()
		text = str(bs)
		parts = [p.strip() for p in text.split(";")]
		for p in parts:
			bs = BeautifulSoup(p, 'html.parser')
			if(bs.b):
				title = get_text(bs.b.extract()).strip()
				newtext = get_text(bs).strip()
				if title == 'Saving Throw':
					assert 'saving_throw' not in section, text
					section['saving_throw'] = newtext
				elif title == 'Onset':
					assert 'onset' not in section, text
					section['onset'] = newtext
				elif title == 'Maximum Duration':
					assert 'maximum_duration' not in section, text
					section['maximum_duration'] = newtext
				elif title.startswith('Stage'):
					section.setdefault("stages", []).append(newtext)
				else:
					assert False, text
			else:
				section.setdefault('text', []).append(get_text(bs))
		if 'text' in section:
			section['text'] = '; '.join(section['text'])
		parent_section['affliction'] = section

Example #9

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

	def parse_spell_list(part):
		spell_list = {"type": "stat_block_section", "subtype": "spell_list"}
		bs = BeautifulSoup(part, 'html.parser')
		level_text = get_text(bs.b.extract())
		if level_text == "Constant":
			spell_list["constant"] = True
			level_text = get_text(bs.b.extract())
		if level_text == "Cantrips":
			spell_list["cantrips"] = True
			level_text = get_text(bs.b.extract())
		m = re.match(r"^\(?(\d*)[snrt][tdh]\)?$", level_text)
		assert m, "Failed to parse spells: %s" % (part)
		spell_list["level"] = int(m.groups()[0])
		spell_list["level_text"] = level_text
		spells_html = split_maintain_parens(str(bs), ",")
		spells = []
		for html in spells_html:
			spells.append(parse_spell(html))
		spell_list["spells"] = spells
		return spell_list

Example #10

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

def link_objects(objects):
	for o in objects:
		bs = BeautifulSoup(o['name'], 'html.parser')
		links = get_links(bs)
		if len(links) > 0:
			o['name'] = get_text(bs)
			o['link'] = links[0]
			if len(links) > 1:
				# TODO: fix []
				assert False, objects
	return objects

Example #11

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

def creature_stat_block_pass(struct):
	def add_to_data(key, value, data, link):
		if key:
			data.append((key, ''.join([str(v) for v in value]).strip(), link))
			key = None
			value = []
			link = None
		return key, value, data, link

	def add_remnants(value, data):
		k,v,_ = data.pop()
		newvalue = [v]
		newvalue.extend(value)
		data.append((k, ''.join([str(v) for v in newvalue]).strip(), link))
		return [], data

	sb = find_stat_block(struct)
	bs = BeautifulSoup(sb["text"], 'html.parser')
	objs = list(bs.children)
	sections = []
	data = []
	key = None
	value = []
	link = None
	for obj in objs:
		if obj.name == 'span' and is_trait(obj):
			trait = trait_parse(obj)
			sb.setdefault('traits', []).append(trait)
		elif obj.name == "br":
			value.append(obj)
			key, value, data, link = add_to_data(key, value, data, link)
		elif obj.name == 'hr':
			key, value, data, link = add_to_data(key, value, data, link)
			if len(value) > 0:
				assert link == None
				value, data = add_remnants(value, data)
			data = strip_br(data)
			sections.append(data)
			data = []
		elif obj.name == "b":
			key, value, data, link = add_to_data(key, value, data, link)
			key = get_text(obj)
			if obj.a:
				_, link = extract_link(obj.a)
		else:
			value.append(obj)
	if key:
		key, value, data, link = add_to_data(key, value, data, link)
	data = strip_br(data)
	sections.append(data)
	assert len(sections) == 3, sections
	process_stat_block(sb, sections)

Example #12

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

	def break_out_movement(movement):
		data = movement['name']
		m = re.match(r"^([a-zA-Z0-9 ]*) \((.*)\)$", data)
		if m:
			# climb 30 feet (<a aonid="299" game-obj="Spells"><i>spider climb</i></a>)
			# burrow 20 feet (snow only)
			data = m.groups()[0]
			content = m.groups()[1]
			content = content.replace("from ", "")
			bs = BeautifulSoup(content, 'html.parser')
			links = get_links(bs)
			if links:
				assert len(links) == 1, movement
				movement['from'] = links[0]
			else:
				#TODO: fix []
				movement['modifiers'] = link_modifiers(
					build_objects(
						'stat_block_section', 'modifier', [content]))
		movement['name'] = data
		if data == "can't move":
			# can't move
			movement['movement_type'] = data
			return
		m = re.match(r"^(\d*) feet$", data)
		if m:
			# 30 feet
			speed = int(m.groups()[0])
			movement['movement_type'] = 'walk'
			movement['value'] = speed
			return
		m = re.match(r"^([a-zA-Z ]*) (\d*) feet$", data)
		if m:
			# fly 30 feet
			mtype = m.groups()[0]
			speed = int(m.groups()[1])
			movement['movement_type'] = mtype
			movement['value'] = speed
			return
		bs = BeautifulSoup(data, 'html.parser')
		if bs.i:
			bs.i.unwrap()
		c = list(bs.children)
		if len(c) == 1 and c[0].name == "a":
			links = get_links(bs)
			movement['name'] = get_text(bs)
			assert len(links) == 1, movement
			movement['from'] = links[0]
			return
		log_element("speed.log")(data)
		assert False, data

Example #13

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

	def parse_attack_effect(parts):
		effect = {
			"type": "stat_block_section", "subtype": "attack_damage"
		}
		bs = BeautifulSoup(' '.join(parts), 'html.parser')
		allA = bs.find_all("a")
		links = []
		for a in allA:
			_, link = extract_link(a)
			links.append(link)
		if links:
			effect["links"] = links
		effect["effect"] = get_text(bs).strip()
		return effect

Example #14

0

Show file

 def _extract_source(section):
     if 'text' in section:
         bs = BeautifulSoup(section['text'], 'html.parser')
         children = list(bs.children)
         if children[0].name == "b" and get_text(children[0]) == "Source":
             children.pop(0)
             book = children.pop(0)
             source = extract_source(book)
             if children[0].name == "sup":
                 assert 'errata' not in source, "Should be no more than one errata."
                 _, source['errata'] = extract_link(
                     children.pop(0).find("a"))
             if children[0].name == "br":
                 children.pop(0)
             assert children[0].name != "a", section
             section['text'] = ''.join([str(c) for c in children])
             return [source]

Example #15

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

	def parse_attack_damage(text):
		ds = split_list(text.strip(), [" plus ", " and "])
		damages = []
		for d in ds:
			damage = {
				"type": "stat_block_section", "subtype": "attack_damage"
			}
			parts = d.split(" ")
			dice = parts.pop(0).strip()
			m = re.match(r"^\d*d\d*.?[0-9]*?$", dice)
			if not m:
				m = re.match(r"^\d*$", dice)
			if m: #damage
				damage["formula"] = dice.replace('–', '-')
				damage_type = ' '.join(parts)
				if damage_type.find("(") > -1:
					parts = damage_type.split("(")
					damage_type = parts.pop(0).strip()
					notes = parts.pop(0).replace(")", "").strip()
					assert len(parts) == 0, "Failed to parse damage: %s" % (text)
					damage["notes"] = notes
				if damage_type.find("damage") > -1:
					# energy touch +36 [<a aonid="322" game-obj="Rules"><u>+32/+28</u></a>] (<a aonid="170" game-obj="Traits"><u>agile</u></a>, <a aonid="99" game-obj="Traits"><u>lawful</u></a>, <a aonid="103" game-obj="Traits"><u>magical</u></a>), <b>Damage</b> 5d8+18 positive or negative damage plus 1d6 lawful
					damage_type = damage_type.replace(" damage", "")
				bs = BeautifulSoup(damage_type, 'html.parser')
				allA = bs.find_all("a")
				links = []
				for a in allA:
					_, link = extract_link(a)
					links.append(link)
				if links:
					damage["links"] = links
				damage_type = get_text(bs).strip()
				if damage_type.startswith("persistent"):
					damage_type = damage_type.replace("persistent ", "")
					damage["persistent"] = True
				if damage_type.find("splash") > -1:
					damage_type = damage_type.replace("splash", "").strip()
					damage["splash"] = True
				damage["damage_type"] = damage_type
			else: #effect
				parts.insert(0, dice)
				damage = parse_attack_effect(parts)
			damages.append(damage)
		return damages

Example #16

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

def get_attacks(sb):
	def is_attack(section):
		text = section['text']
		children = list(BeautifulSoup(text.strip(), 'html.parser').children)
		test = children.pop(0)
		if test.name == "img":
			if test['alt'].startswith("Sidebar"):
				return False
		if section['name'].startswith("All Monsters"):
			return False
		elif section['name'].startswith("Variant"):
			return False
		return True
	
	sections = sb['sections']
	newsections = []
	attacks = []
	for section in sections:
		if is_attack(section):
			if section['name'].endswith("Spells") or section['name'].endswith("Rituals"):
				text = section['text']
				bs = BeautifulSoup(text.strip(), 'html.parser')
				if bs.br:
					parts = re.split(r" *?\<br ?\/\> *?", text)
					section['text'] = parts.pop(0)
					attacks.append(section)
					for part in parts:
						if part.strip() != "":
							newsection = section.copy()
							for k, v in section.items():
								newsection[k] = v
							bs = BeautifulSoup(part, 'html.parser')
							name = get_text(bs.b.extract())
							newsection["sections"] = []
							newsection["name"] = name
							newsection["text"] = str(bs)
							attacks.append(newsection)
				pass
			else:
				attacks.append(section)
		else:
			newsections.append(section)
	sb['sections'] = newsections
	return attacks

Example #17

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

def process_offensive_action(section):
	def remove_html_weapon(text, section):
		bs = BeautifulSoup(text, 'html.parser')
		if list(bs.children)[0].name == "i":
			bs.i.unwrap()
		while bs.a:
			_, link = extract_link(bs.a)
			section.setdefault("links", []).append(link)
			bs.a.unwrap()
		return str(bs)
	
	def parse_attack_action(parent_section, attack_type):
		# tentacle +16 [<a aonid="322" game-obj="Rules"><u>+12/+8</u></a>] (<a aonid="170" game-obj="Traits"><u>agile</u></a>, <a aonid="103" game-obj="Traits"><u>magical</u></a>, <a aonid="192" game-obj="Traits"><u>reach 15 feet</u></a>), <b>Damage</b> 2d8+10 bludgeoning plus slime
		# trident +10 [<a aonid="322" game-obj="Rules"><u>+5/+0</u></a>], <b>Damage</b> 1d8+4 piercing
		# trident +7 [<a aonid="322" game-obj="Rules"><u>+2/-3</u></a>] (<a aonid="195" game-obj="Traits"><u>thrown 20 feet</u></a>), <b>Damage</b> 1d8+3 piercing
		# Sphere of Oblivion +37 [<a aonid="322" game-obj="Rules"><u>+32/+27</u></a>] (<a aonid="103" game-obj="Traits"><u>magical</u></a>), <b>Effect</b> see Sphere of Oblivion
		# piercing hymn +17 [<a aonid="322" game-obj="Rules"><u>+12/+7</u></a>] (<a aonid="83" game-obj="Traits"><u>good</u></a>, <a aonid="103" game-obj="Traits"><u>magical</u></a>, <a aonid="248" game-obj="Traits"><u>range 90 feet</u></a>, <a aonid="147" game-obj="Traits"><u>sonic</u></a>), <b>Damage</b> 4d6 sonic damage plus 1d6 good and deafening aria
		# crossbow +14 [<a aonid="322" game-obj="Rules"><u>+9/+4</u></a>] (<a aonid="248" game-obj="Traits"><u>range increment 120 feet</u></a>, <a aonid=\"254\" game-obj="Traits"><u>reload 1</u></a>), <b>Damage</b> 1d8+2 piercing plus crossbow precision
		text = parent_section['text']
		del parent_section['text']
		section = {
			'type': "stat_block_section", "subtype": "attack",
			'attack_type': attack_type, 'name': parent_section['name']
		}
		if 'action' in parent_section:
			section['action'] = parent_section['action']
			del parent_section['action']
		if 'traits' in parent_section:
			section['traits'] = parent_section['traits']
			del parent_section['traits']

		m = re.search(r"^(.*) ([+-]\d*) \[(.*)\] \((.*)\), (.*)$", text)
		if not m:
			m = re.search(r"^(.*) ([+-]\d*) \[(.*)\], (.*)$", text)
		assert m, "Failed to parse: %s" % (text)
		attack_data = list(m.groups())
		section['weapon'] = remove_html_weapon(attack_data.pop(0), section)
		attacks = [attack_data.pop(0)]
		bs = BeautifulSoup(attack_data.pop(0), 'html.parser')
		children = list(bs.children)
		assert len(children) == 1, "Failed to parse: %s" % (text)
		data, link = extract_link(children[0])
		attacks.extend(data.split("/"))
		attacks = [int(a) for a in attacks]
		section['bonus'] = {
			"type": "stat_block_section", "subtype": "attack_bonus",
			"link": link, "bonuses": attacks
		}
		
		damage = attack_data.pop().split(" ")
		_ = damage.pop(0)
		section['damage'] = parse_attack_damage(" ".join(damage).strip())

		if len(attack_data) > 0:
			_, traits = extract_starting_traits("(%s)" %(attack_data.pop()))
			assert 'traits' not in section
			section['traits'] = traits
		assert len(attack_data) == 0, "Failed to parse: %s" % (text)
		parent_section['attack'] = section

	def parse_attack_damage(text):
		ds = split_list(text.strip(), [" plus ", " and "])
		damages = []
		for d in ds:
			damage = {
				"type": "stat_block_section", "subtype": "attack_damage"
			}
			parts = d.split(" ")
			dice = parts.pop(0).strip()
			m = re.match(r"^\d*d\d*.?[0-9]*?$", dice)
			if not m:
				m = re.match(r"^\d*$", dice)
			if m: #damage
				damage["formula"] = dice.replace('–', '-')
				damage_type = ' '.join(parts)
				if damage_type.find("(") > -1:
					parts = damage_type.split("(")
					damage_type = parts.pop(0).strip()
					notes = parts.pop(0).replace(")", "").strip()
					assert len(parts) == 0, "Failed to parse damage: %s" % (text)
					damage["notes"] = notes
				if damage_type.find("damage") > -1:
					# energy touch +36 [<a aonid="322" game-obj="Rules"><u>+32/+28</u></a>] (<a aonid="170" game-obj="Traits"><u>agile</u></a>, <a aonid="99" game-obj="Traits"><u>lawful</u></a>, <a aonid="103" game-obj="Traits"><u>magical</u></a>), <b>Damage</b> 5d8+18 positive or negative damage plus 1d6 lawful
					damage_type = damage_type.replace(" damage", "")
				bs = BeautifulSoup(damage_type, 'html.parser')
				allA = bs.find_all("a")
				links = []
				for a in allA:
					_, link = extract_link(a)
					links.append(link)
				if links:
					damage["links"] = links
				damage_type = get_text(bs).strip()
				if damage_type.startswith("persistent"):
					damage_type = damage_type.replace("persistent ", "")
					damage["persistent"] = True
				if damage_type.find("splash") > -1:
					damage_type = damage_type.replace("splash", "").strip()
					damage["splash"] = True
				damage["damage_type"] = damage_type
			else: #effect
				parts.insert(0, dice)
				damage = parse_attack_effect(parts)
			damages.append(damage)
		return damages

	def parse_attack_effect(parts):
		effect = {
			"type": "stat_block_section", "subtype": "attack_damage"
		}
		bs = BeautifulSoup(' '.join(parts), 'html.parser')
		allA = bs.find_all("a")
		links = []
		for a in allA:
			_, link = extract_link(a)
			links.append(link)
		if links:
			effect["links"] = links
		effect["effect"] = get_text(bs).strip()
		return effect

	def parse_spells(parent_section):
		text = parent_section['text']
		del parent_section['text']
		section = {
			'type': "stat_block_section", "subtype": "spells",
			'name': parent_section['name']
		}
		if 'action' in parent_section:
			section['action'] = parent_section['action']
			del parent_section['action']
		if 'traits' in parent_section:
			section['traits'] = parent_section['traits']
			del parent_section['traits']

		name_parts = section['name'].split(" ")
		if name_parts[-1] != "Formulas":
			section["spell_tradition"] = name_parts.pop(0)
		section["spell_type"] = " ".join(name_parts)
		parts = split_maintain_parens(text, ";")
		tt_parts = split_maintain_parens(parts.pop(0), ",")
		remains = []
		for tt in tt_parts:
			if tt == '':
				continue
			chunks = tt.split(" ")
			if tt.startswith("DC"):
				section["spell_dc"] = int(chunks.pop())
			elif tt.startswith("attack") or tt.startswith("spell attack"):
				section["spell_attack"] = int(chunks.pop())
			elif tt.endswith("Focus Points"):
				section["focus_points"] = int(tt.replace(" Focus Points", "").strip())
			elif tt.endswith("Focus Point"):
				section["focus_points"] = int(tt.replace(" Focus Point", "").strip())
			else:
				remains.append(tt)
		if len(remains) > 0 and remains != tt_parts:
			section['notes'] = remains
			remains = []
		if len(remains) > 0:
			parts.insert(0, ', '.join(remains))
		spell_lists = []
		assert len(parts) > 0, section
		for p in parts:
			spell_lists.append(parse_spell_list(p))
		section['spell_list'] = spell_lists
		parent_section['spells'] = section	

	def parse_spell_list(part):
		spell_list = {"type": "stat_block_section", "subtype": "spell_list"}
		bs = BeautifulSoup(part, 'html.parser')
		level_text = get_text(bs.b.extract())
		if level_text == "Constant":
			spell_list["constant"] = True
			level_text = get_text(bs.b.extract())
		if level_text == "Cantrips":
			spell_list["cantrips"] = True
			level_text = get_text(bs.b.extract())
		m = re.match(r"^\(?(\d*)[snrt][tdh]\)?$", level_text)
		assert m, "Failed to parse spells: %s" % (part)
		spell_list["level"] = int(m.groups()[0])
		spell_list["level_text"] = level_text
		spells_html = split_maintain_parens(str(bs), ",")
		spells = []
		for html in spells_html:
			spells.append(parse_spell(html))
		spell_list["spells"] = spells
		return spell_list

	def parse_spell(html):
		spell = {"type": "stat_block_section", "subtype": "spell"}
		bsh = BeautifulSoup(html, 'html.parser')
		hrefs = bsh.find_all("a")
		links = []
		for a in hrefs:
			_, link = extract_link(a)
			links.append(link)
		spell['links'] = links
		text = get_text(bsh)
		if text.find("(") > -1:
			parts = [t.strip() for t in text.split("(")]
			assert len(parts) == 2, "Failed to parse spell: %s" % (html)
			spell['name'] = parts.pop(0)
			count_text = parts.pop().replace(")", "")
			spell["count_text"] = count_text
			count = None
			for split in [";", ","]:
				remainder = []
				for part in count_text.split(split):
					m = re.match(r"^x\d*$", part.strip())
					if m:
						assert count == None, "Failed to parse spell: %s" % (html)
						count = int(part.strip()[1:])
					else:
						remainder.append(part)
					count_text = split.join(remainder)
			if count:
				spell["count"] = count
		else:
			spell['name'] = text
			spell['count'] = 1
		return spell

	def parse_affliction(parent_section):
		text = parent_section['text']
		del parent_section['text']
		section = {
			'type': "stat_block_section", "subtype": "affliction",
			'name': parent_section['name']
		}
		if 'action' in parent_section:
			section['action'] = parent_section['action']
			del parent_section['action']
		if 'traits' in parent_section:
			section['traits'] = parent_section['traits']
			del parent_section['traits']
		bs = BeautifulSoup(text, 'html.parser')
		section['links'] = get_links(bs)
		while bs.a:
			bs.a.unwrap()
		text = str(bs)
		parts = [p.strip() for p in text.split(";")]
		for p in parts:
			bs = BeautifulSoup(p, 'html.parser')
			if(bs.b):
				title = get_text(bs.b.extract()).strip()
				newtext = get_text(bs).strip()
				if title == 'Saving Throw':
					assert 'saving_throw' not in section, text
					section['saving_throw'] = newtext
				elif title == 'Onset':
					assert 'onset' not in section, text
					section['onset'] = newtext
				elif title == 'Maximum Duration':
					assert 'maximum_duration' not in section, text
					section['maximum_duration'] = newtext
				elif title.startswith('Stage'):
					section.setdefault("stages", []).append(newtext)
				else:
					assert False, text
			else:
				section.setdefault('text', []).append(get_text(bs))
		if 'text' in section:
			section['text'] = '; '.join(section['text'])
		parent_section['affliction'] = section

	def parse_offensive_ability(parent_section):
		def _oa_html_reduction(data):
			bs = BeautifulSoup(''.join(data).strip(), 'html.parser')
			if(list(bs.children)[-1].name == 'br'):
				list(bs.children)[-1].unwrap()
			return str(bs)

		text = parent_section['text']
		del parent_section['text']
		section = {
			'type': "stat_block_section", "subtype": "ability",
			'name': parent_section['name'], "ability_type": "offensive"
		}
		if 'action' in parent_section:
			section['action'] = parent_section['action']
			del parent_section['action']
		if 'traits' in parent_section:
			section['traits'] = parent_section['traits']
			del parent_section['traits']
		bs = BeautifulSoup(text, 'html.parser')
		links = get_links(bs)
		if len(links) > 0:
			section['links'] = links
		while bs.a:
			bs.a.unwrap()
		
		children = list(bs)
		addons = {}
		current = None
		parts = []
		addon_names = ["Frequency", "Trigger", "Effect", "Duration",
			"Requirement", "Requirements", "Prerequisite", "Critical Success",
			"Success", "Failure", "Critical Failure", "Range"]		
		if section['name'] == "Planar Incarnation":
			parts = [str(c) for c in children]
		else: 
			while len(children) > 0:
				child = children.pop(0)
				if child.name == 'b':
					current = get_text(child).strip()
					if current == "Requirements":
						current = "Requirement"
					if current == "Prerequisites":
						current = "Prerequisite"
				elif current:
					assert current in addon_names, "%s, %s" % (current, text)
					addon_text = str(child)
					if addon_text.strip().endswith(";"):
						addon_text = addon_text.strip()[:-1]
					addons.setdefault(current.lower().replace(" ", "_"), [])\
						.append(addon_text)
				else:
					parts.append(str(child))
		for k, v in addons.items():
			section[k] = _oa_html_reduction(v)
		if len(parts) > 0:
			section['text'] = _oa_html_reduction(parts)
		parent_section['ability'] = section
	
	if len(section['sections']) == 0:
		del section['sections']
	section['type'] = 'stat_block_section'
	section['subtype'] = 'offensive_action'
	text = section['text'].strip()
	text, action = extract_action(text)
	if action:
		section['action'] = action
	text, traits = extract_starting_traits(text)
	if len(traits) > 0:
		section['traits'] = traits
	section['text'] = text.strip()
	if section['name'] in ["Melee", "Ranged"]:
		section['offensive_action_type'] = "attack"
		parse_attack_action(section, section['name'].lower())
	elif section['name'].find("Spells") > -1 \
			or section['name'].endswith("Rituals") \
			or section['name'].endswith("Formulas"):
		section['offensive_action_type'] = "spells"
		parse_spells(section)
	else:
		bs = BeautifulSoup(section['text'], 'html.parser')
		if bs.b:
			title = get_text(bs.b)
			if title.strip() in ['Saving Throw']:
				section['offensive_action_type'] = "affliction"
				parse_affliction(section)
			else:
				section['offensive_action_type'] = "ability"
				parse_offensive_ability(section)
		else:
			section['offensive_action_type'] = "ability"
			parse_offensive_ability(section)
	return section

Example #18

0

Show file

File: creatures.py Project: devonjones/PFSRD2-Parser

def process_languages(section):
	# 1, Unseen Servant
	#  <b>Languages</b> - (understands its creator)
	# 2, Alghollthu Master
	#  <b>Languages</b> <a href="Languages.aspx?ID=13"><u>Aklo</a></u>, <a href="Languages.aspx?ID=24"><u>Alghollthu</a></u>, <a href="Languages.aspx?ID=14"><u>Aquan</a></u>, <a href="Languages.aspx?ID=1"><u>Common</a></u>, <a href="Languages.aspx?ID=11"><u>Undercommon</a></u>
	# 204
	#  <b>Languages</b> pidgin of <a style="text-decoration:underline" href="Languages.aspx?ID=6">Goblin</a>, <a style="text-decoration:underline" href="Languages.aspx?ID=8">Jotun</a>, and <a style="text-decoration:underline" href="Languages.aspx?ID=9">Orcish</a>
	# 211
	#  <b>Languages</b> <a href="Languages.aspx?ID=1"><u>Common</a></u>; one elemental language (Aquan, Auran, Ignan, or Terran), one planar language (Abyssal, Celestial, or Infernal); telepathy 100 feet
	# 343, Quelaunt
	#  <b>Languages</b> <a href="Languages.aspx?ID=13"><u>Aklo</a></u>; (can't speak any language); telepathy 100 feet
	# 639, Drainberry Bush
	#  <b>Languages</b> <a href="Languages.aspx?ID=13"><u>Aklo</a></u>, <a href="Languages.aspx?ID=1"><u>Common</a></u>, <a href="Languages.aspx?ID=10"><u>Sylvan</a></u>; <a style="text-decoration:underline" href="Spells.aspx?ID=340"><i>tongues</i></a>
	# 98, Succubus
	#  <b>Languages</b> <a href="Languages.aspx?ID=12"><u>Abyssal</a></u>, <a href="Languages.aspx?ID=16"><u>Celestial</a></u>, <a href="Languages.aspx?ID=1"><u>Common</a></u>, <a href="Languages.aspx?ID=2"><u>Draconic</a></u>; three additional mortal languages; telepathy 100 feet, <a style="text-decoration:underline" href="Spells.aspx?ID=340"><i>tongues</i></a>

	assert section[0] == "Languages"
	assert section[2] == None
	text = section[1]
	languages = build_object(
		'stat_block_section', 'languages', 'Languages', {'languages': []})
	if text.find(";") > -1:
		parts = text.split(";")
		text = parts.pop(0)
		assert len(parts) in [1,2], parts
		parts = rebuilt_split_modifiers(split_stat_block_line(";".join(parts)))
		abilities = []
		for part in parts:
			newtext, modifier = extract_modifier(part.strip())
			if newtext.strip() == "":
				languages['modifiers'] = link_modifiers(
					build_objects(
						'stat_block_section', 'modifier',
						[m.strip() for m in modifier.split(",")]))
			else:
				bs = BeautifulSoup(newtext, 'html.parser')
				link = None
				if bs.a:
					newtext, link = extract_link(bs.a)
				ability = build_object(
				'stat_block_section', 'ability', newtext, {
					'ability_type': 'communication'})
				if link:
					#TODO: fix []
					ability['links'] = [link]
				if(modifier):
					#TODO: fix []
					ability['modifiers'] = link_modifiers(
						build_objects(
							'stat_block_section', 'modifier', [
								modifier.strip()]))
				abilities.append(ability)
		if len(abilities) > 0:
			languages['communication_abilities'] = abilities
	parts = rebuilt_split_modifiers(split_stat_block_line(text))
	for text in parts:
		text, modifier = extract_modifier(text)
		bs = BeautifulSoup(text, 'html.parser')
		c = list(bs.children)

		if len(c) > 1:
			text = []
			for child in c:
				if child.name == "a":
					name, link = extract_link(child)
					text.append(name)
				elif isinstance(child, str):
					text.append(child)
			language = {
				'name': ''.join(text),
				'type': 'stat_block_section',
				'subtype': 'language',
				'link': link}
		else:
			assert len(c) == 1
			if c[0].name == 'a':
				name, link = extract_link(c[0])
				language = {
					'name': get_text(bs),
					'type': 'stat_block_section',
					'subtype': 'language',
					'link': link}
			else:
				language = {
					'name': get_text(bs),
					'type': 'stat_block_section',
					'subtype': 'language'}
		if modifier:
			#TODO: fix []
			language['modifiers'] = link_modifiers(
				build_objects(
					'stat_block_section', 'modifier', [modifier]))
		languages['languages'].append(language)
	return languages