Esempi in Python per Fandom, esempi in Python per store.Fandom

Esempio n. 1

0

Mostra file

def importFic(fdata):
	global ficImportRename
	ofic = inflateObject(fdata.copy(), ficImportRename)

	fic = Fic.new()
	for field in ofic:
		print('setting "{}" to "{}"'.format(field, ofic[field]))
		fic.__dict__[field] = ofic[field]

	fic.published = util.parseDateAsUnix(fic.published, int(time.time()))
	fic.updated = util.parseDateAsUnix(fic.updated, int(time.time()))
	print('setting "{}" to "{}"'.format('published', fic.published))
	print('setting "{}" to "{}"'.format('updated', fic.updated))

	print('adding "{}" ({}/{})'.format(fic.title, fic.type, fic.localId))

	fic.insert()

	for fandom in fdata['fandoms']:
		print('  adding fandom "{}"'.format(fandom))
		fic.add(Fandom.define(fandom))
	for character in fdata['characters']:
		print(
			'  adding character "{}" from fandom "{}"'.format(
				character['name'], character['fandom']
			)
		)
		fic.add(
			Character.define(Fandom.define(character['fandom']), character['name'])
		)
	for genre in fdata['genres']:
		print('  adding genre "{}"'.format(genre))
		fic.add(Genre.define(genre))
	for tag in fdata['tags']:
		print('  adding tag "{}"'.format(tag))
		fic.add(Tag.define(tag))

	cids = [int(cid) for cid in fdata['chapters']]
	cids.sort()
	for cid in cids:
		print('  adding chapter {}'.format(cid))
		ochap = fdata['chapters'][str(cid)]
		chapter = FicChapter.new()
		chapter.fic = fic
		chapter.ficId = fic.id
		chapter.chapterId = cid
		for field in ochap:
			chapter.__dict__[field] = ochap[field]
		contentPath = './content/{}/{}/{}/content.html'.format(
			fic.type, fic.localId, cid
		)
		if os.path.isfile(contentPath):
			html = None
			with open(contentPath, 'r') as f:
				html = f.read()
			print('    has content: {}'.format(len(html)))
			chapter.setHtml(html)
		chapter.insert()

Esempio n. 2

0

Mostra file

	def handleFandom(self, fic: Fic, fandom: str) -> List[Fandom]:
		# save raw/messy fandom
		fandoms = [Fandom.define(fandom, sourceId=self.ftype)]

		# ensure messy is in our map
		if fandom not in ffNetFandomMap:
			util.logMessage('unknown fandom: {} (from {})'.format(fandom, fic.url))
		else:
			fandoms.append(Fandom.define(ffNetFandomMap[fandom]))

		return fandoms

Esempio n. 3

0

Mostra file

def importDB(data):
	for fandom in data['fandoms']:
		Fandom.define(fandom)
	for character in data['characters']:
		Character.define(Fandom.define(character['fandom']), character['name'])
	for genre in data['genres']:
		Genre.define(genre)
	for tag in data['tags']:
		Tag.define(tag)

	ficKeys = [key for key in data['fics']]
	ficKeys.sort()
	for key in ficKeys:
		here = data['fics'][key]
		importFic(here)

Esempio n. 4

0

Mostra file

File: xenForoAdapter.py Progetto: FanFicDev/hermes

    def updateTitle(self, fic: Fic) -> None:
        if fic.title is None: return
        completeTags = ['complete', 'completed']
        # look for Complete tag in the title
        for cont in self.containers:
            for completeTag in completeTags:
                ctag = cont[0] + completeTag + cont[1]
                cloc = fic.title.lower().find(ctag)
                if cloc != -1:
                    fic.title = fic.title[:cloc] + fic.title[cloc + len(ctag):]
                    fic.ficStatus = FicStatus.complete
                fic.title = fic.title.strip()
                fic.title = fic.title.replace('  ', ' ')

        # strip '[nsfw]' tag from anywhere in title
        for cont in self.containers:
            ntag = cont[0] + 'nsfw' + cont[1]
            nloc = fic.title.lower().find(ntag)
            if nloc != -1:
                fic.title = fic.title[:nloc] + fic.title[nloc + len(ntag):]
                fic.ageRating = 'M'  # TODO?
            fic.title = fic.title.strip()
            fic.title = fic.title.replace('  ', ' ')

        res = self.cleanTitle(fic.title)
        fic.title = res[0]
        for fan in res[1]:
            fic.add(Fandom.define(fan))
        for tag in res[2]:
            fic.add(Tag.define(tag))
        fic.upsert()

Esempio n. 5

0

Mostra file

	def handleCrossoverFandom(
		self, fic: Fic, fandom: str, fIds: List[int], href: str
	) -> List[Fandom]:
		# save raw/messy fandom
		fandoms = [Fandom.define(fandom, sourceId=self.ftype)]

		# ensure fandom ids are in our map

		# check for missing id maps
		missingIds = [fId for fId in fIds if fId not in ffNetFandomIdMap]
		if len(missingIds) > 0:
			util.logMessage(
				'unknown fandom ids: {} from {} in {}'.format(
					missingIds, href, fic.url
				)
			)
			return fandoms

		# translate to messy
		messys = [ffNetFandomIdMap[fId] for fId in fIds]
		# check for missing messy
		missingMessy = [m for m in messys if m not in ffNetFandomMap]
		if len(missingMessy) > 0:
			util.logMessage(
				'unknown messy fandom: {} from {}'.format(missingMessy, href)
			)
			return fandoms

		# check crossover value
		expected = '{}_and_{}_Crossovers'.format(messys[0], messys[1])
		if expected != fandom:
			util.logMessage(
				'crossover got "{}" expected "{}"'.format(fandom, expected)
			)
			return fandoms

		# map messy to clean
		cleans = [ffNetFandomMap[m] for m in messys]
		for clean in cleans:
			if len(clean) > 0:
				fandoms.append(Fandom.define(clean))
		return fandoms

Esempio n. 6

0

Mostra file

File: wavesArisenAdapter.py Progetto: FanFicDev/hermes

    def parseInfoInto(self, fic: Fic, html: str) -> Fic:
        from bs4 import BeautifulSoup
        html = html.replace('\r\n', '\n')
        soup = BeautifulSoup(html, 'html.parser')

        # wooh hardcoding
        fic.fetched = OilTimestamp.now()
        fic.languageId = Language.getId("English")

        fic.title = 'The Waves Arisen'
        fic.ageRating = 'M'

        self.setAuthor(fic, 'wertifloke', 'https://wertifloke.wordpress.com/',
                       str(2))

        # taken from https://www.parahumans.net/about/
        fic.description = '''
A young Naruto found refuge in the village library, and grew up smart, but by blood he is Ninja, and what place is there for curiosity and calculation in this brutal world of warring states?

The Waves Arisen is a complete novel-length work of Rationalist Naruto Fanfiction. No prior knowledge of the Naruto universe is necessary to follow along. '''

        chapterUrls = self.getChapterUrls(html)
        oldChapterCount = fic.chapterCount
        fic.chapterCount = len(chapterUrls)

        # TODO?
        fic.reviewCount = 0
        fic.favoriteCount = 0
        fic.followCount = 0

        if fic.ficStatus is None or fic.ficStatus == FicStatus.broken:
            fic.ficStatus = FicStatus.ongoing

        fic.published = self.getChapterPublishDate(chapterUrls[0])
        fic.updated = self.getChapterPublishDate(chapterUrls[-1])

        if oldChapterCount is None or fic.chapterCount > oldChapterCount:
            fic.wordCount = 0
        if fic.wordCount == 0:
            fic.upsert()
            for cid in range(1, fic.chapterCount + 1):
                c = fic.chapter(cid)
                c.cache()
                chtml = c.html()
                if chtml is not None:
                    fic.wordCount += len(chtml.split())

        fic.add(Fandom.define('Naruto'))
        # TODO: chars/relationship?

        return fic

Esempio n. 7

0

Mostra file

def dumpDB():
	data = {}

	fandomMap = {f.id: f for f in Fandom.select()}
	characterMap = {c.id: c for c in Character.select()}
	genreMap = {g.id: g for g in Genre.select()}
	tagMap = {t.id: t for t in Tag.select()}

	data['fandoms'] = [fandomMap[k].name for k in fandomMap]
	data['characters'] = [
		{
			'name': characterMap[k].name,
			'fandom': fandomMap[characterMap[k].fandom_id].name
		} for k in characterMap
	]
	data['genres'] = [genreMap[k].name for k in genreMap]
	data['tags'] = [tagMap[k].name for k in tagMap]

	data['fics'] = {}

	frename = {'id': None, 'chapters': 'chapterCount'}
	crename = {
		'id': None,
		'ficId': None,
		'cid': None,
		'raw': None,
		'fic': None,
		'lastLine': None
	}
	cdefaults = {
		'line': 0,
		'subLine': 0,
		'notes': None,
		'status': Status.ongoing,
		'fetched': None,
		'url': None
	}

	fics = Fic.select()
	for fic in fics:
		k = '{}/{}'.format(fic.type, fic.localId)
		o = fic.__dict__.copy()
		o = deflateObject(o, frename)

		o['fandoms'] = [f.name for f in fic.fandoms()]
		o['characters'] = [
			{
				'name': c.name,
				'fandom': fandomMap[c.fandom_id].name
			} for c in fic.characters()
		]
		o['tags'] = [t.name for t in fic.tags()]
		o['genres'] = [g.name for g in fic.genres()]

		co = {}
		ficChapters = FicChapter.select({'ficId': fic.id})
		for chapter in ficChapters:
			here = chapter.__dict__.copy()
			ffNetUrl = 'https://www.fanfiction.net/s/{}/{}/{}'.format(
				fic.localId, chapter.chapterId, util.urlTitle(fic.title)
			)
			cdefaults['url'] = ffNetUrl
			cdefaults['lastModified'] = here['fetched']
			here = deflateObject(here, crename, cdefaults)

			co[chapter.chapterId] = here
			if chapter.raw is None:
				continue

			contentPath = './content/{}/{}/{}/'.format(
				fic.type, fic.localId, chapter.chapterId
			)
			if not os.path.isdir(contentPath):
				os.makedirs(contentPath)
			with open(contentPath + 'content.html', 'w') as f:
				f.write(chapter.content())

		o['chapters'] = co

		data['fics'][k] = o

	return data

Esempio n. 8

0

Mostra file

File: sugarQuillAdapter.py Progetto: FanFicDev/hermes

	def parseInfoInto(self, fic: Fic, html: str) -> Fic:
		from bs4 import BeautifulSoup
		html = html.replace('\r\n', '\n')
		soup = BeautifulSoup(html, 'html.parser')

		fic.fetched = OilTimestamp.now()
		fic.languageId = Language.getId("English")  # TODO: don't hard code?

		infoPane = soup.findAll('td', {'class': 'info2_pane'})
		if len(infoPane) != 1:
			raise Exception('unable to find info2_pane: {}'.format(fic.url))
		infoPane = infoPane[0]

		authorHrefPrefix = 'index.php?action=profile&id='
		authorLinks = infoPane.findAll('a')
		authorUrl = None
		for authorLink in authorLinks:
			if not authorLink.get('href').startswith(authorHrefPrefix):
				continue

			authorUrl = self.baseUrl + '/' + authorLink.get('href')
			author = authorLink.getText()
			authorLocalId = authorLink.get('href')[len(authorHrefPrefix):]

			self.setAuthor(fic, author, authorUrl, authorLocalId)
			break
		else:
			raise Exception('unable to find author: {}'.format(fic.url))

		titleMatch = re.search(
			'<b>Story</b>:((.|\r|\n)*)<b>Chapter</b>:', str(infoPane), re.MULTILINE
		)
		if titleMatch is None:
			edumpContent(str(infoPane), 'sugarquill_title')
			raise Exception('could not locate title')

		fic.title = titleMatch.group(1).replace('&nbsp;', ' ').strip()

		chapterOptions = infoPane.findAll('option')
		chapterTitles = {}
		for chapterOption in chapterOptions:
			cid = int(chapterOption.get('value'))
			chapterTitles[cid] = chapterOption.getText().strip()
		fic.chapterCount = len(chapterOptions)

		fic.ageRating = '<unkown>'  # TODO
		fic.favoriteCount = 0
		fic.followCount = 0

		fic.ficStatus = FicStatus.ongoing  # TODO: no uniform way to detect?

		authorProfileHtml = scrape.scrape(authorUrl)['raw']
		authorProfileHtml = authorProfileHtml.replace('\r', '')
		authorSoup = BeautifulSoup(authorProfileHtml, 'html5lib')

		storyTables = authorSoup.findAll('table', {'width': '90%'})
		ourStoryTable = None
		for storyTable in storyTables:
			storyId = None
			for a in storyTable.findAll('a'):
				if not a.get('href').startswith('read.php?storyid='):
					continue
				storyId = a.get('href')[len('read.php?storyid='):]
				storyId = storyId[:storyId.find('&')]
				storyId = str(int(storyId))
			if storyId is None:
				continue
			if storyId != str(fic.localId):
				continue
			ourStoryTable = storyTable
		if ourStoryTable is None:
			raise Exception(f'unable to find story table: {fic.localId} {authorUrl}')

		trs = ourStoryTable.findAll('tr')
		if len(trs) != 3:
			raise Exception(
				f'ourStoryTable does not have 3 trs: {fic.localId} {authorUrl}'
			)

		fic.description = trs[1].find('td').getText().strip()

		reviewsMatch = re.search(
			'\( Reviews: <a[^>]*>(\\d+)</a> \)</td>', str(trs[0]), re.MULTILINE
		)
		if reviewsMatch is None:
			edumpContent(str(trs[0]), 'sugarquill_reviews')
			raise Exception('could not locate reviews')

		fic.reviewCount = int(reviewsMatch.group(1).strip())

		updatedMatch = re.search('Last updated (\\d+/\\d+/\\d+)', str(trs[2]))
		if updatedMatch is None:
			edumpContent(str(trs[2]), 'sugarquill_updated')
			raise Exception('could not locate last updated')

		fic.updated = OilTimestamp(
			util.parseDateAsUnix(updatedMatch.group(1), fic.fetched)
		)
		if fic.published is None:
			fic.published = fic.updated

		fic.wordCount = 0
		fic.upsert()

		for cid in range(fic.chapterCount):
			ch = fic.chapter(cid + 1)
			ch.localChapterId = str(cid + 1)
			ch.title = chapterTitles[cid + 1]
			ch.cache()
			ch.upsert()
			chtml = ch.html()
			if chtml is not None:
				fic.wordCount += len(chtml.split())

		fic.add(Fandom.define('Harry Potter'))
		# TODO: chars/relationship?

		return fic

Esempio n. 9

0

Mostra file

	def parseInfoInto(self, fic: Fic, html: str) -> Fic:
		from bs4 import BeautifulSoup
		soup = BeautifulSoup(html, 'html.parser')

		fic.fetched = OilTimestamp.now()
		fic.languageId = Language.getId("English")  # TODO: don't hard code?

		titleHeadings = soup.findAll('h2', {'class': 'title heading'})
		if len(titleHeadings) != 1:
			raise Exception('unable to find ao3 title {}'.format(fic.url))
		fic.title = titleHeadings[0].get_text().strip()

		summaryModules = soup.findAll('div', {'class': 'summary module'})
		if len(summaryModules) != 1:
			prefaceGroups = soup.findAll('div', {'class': 'preface group'})
			if len(prefaceGroups) == 1:
				summaryModules = prefaceGroups[0].findAll(
					'div', {'class': 'summary module'}
				)

		if len(summaryModules) == 1:
			summaryBq = summaryModules[0].find('blockquote')
			fic.description = summaryBq.decode_contents(formatter='html').strip()
		elif fic.description is None:
			fic.description = "{no summary}"
			# raise Exception('unable to find ao3 summary {}'.format(fic.localId))

		fic.ageRating = '<unkown>'

		# TODO: error handling
		cText = ' '.join(soup.find('dd', {'class': 'chapters'}).contents).strip()
		ps = cText.split('/')
		completedChapters = int(ps[0])
		totalChapters = None if ps[1] == '?' else int(ps[1])
		fic.chapterCount = completedChapters

		wText = ' '.join(soup.find('dd', {'class': 'words'}).contents).strip()
		fic.wordCount = int(wText)

		fic.reviewCount = 0

		fic.favoriteCount = 0
		kDefinition = soup.find('dd', {'class': 'kudos'})
		if kDefinition is not None:
			kText = ' '.join(kDefinition.contents).strip()
			fic.favoriteCount = int(kText)

		fic.followCount = 0

		pText = ' '.join(soup.find('dd', {'class': 'published'}).contents).strip()
		publishedUts = util.parseDateAsUnix(pText, fic.fetched)
		fic.published = OilTimestamp(publishedUts)

		if fic.updated is None:
			fic.updated = fic.published
		if fic.updated is not None:
			updatedUts = util.parseDateAsUnix(fic.updated, fic.fetched)
			fic.updated = OilTimestamp(updatedUts)

		fic.ficStatus = FicStatus.ongoing  # TODO chapter/chapters?

		if totalChapters is None or completedChapters < totalChapters:
			fic.ficStatus = FicStatus.ongoing

		statusDt = soup.find('dt', {'class': 'status'})
		if statusDt is not None:
			if statusDt.contents[0] == 'Completed:':
				fic.ficStatus = FicStatus.complete
				cText = ' '.join(soup.find('dd', {'class': 'status'}).contents).strip()
				updatedUts = util.parseDateAsUnix(cText, fic.fetched)
				fic.updated = OilTimestamp(updatedUts)
			elif statusDt.contents[0] == 'Updated:':
				fic.ficStatus = FicStatus.ongoing
				uText = ' '.join(soup.find('dd', {'class': 'status'}).contents).strip()
				updatedUts = util.parseDateAsUnix(uText, fic.fetched)
				fic.updated = OilTimestamp(updatedUts)
			else:
				raise Exception('unkown status: {}'.format(statusDt.contents[0]))

		byline = soup.find('h3', {'class': 'byline heading'})
		authorLink = byline.find('a')
		if authorLink is None:
			if fic.authorId is not None and len(fic.getAuthorName()) > 0:
				pass  # updated author to anon, don't make changes
			else:
				# first loaded after it was already set to anonymous
				authorUrl = ''
				author = 'Anonymous'
				authorId = 'Anonymous'
				self.setAuthor(fic, author, authorUrl, authorId)
		else:
			authorUrl = authorLink.get('href')
			author = ' '.join(byline.find('a').contents)
			authorId = author  # map pseudo to real?
			self.setAuthor(fic, author, authorUrl, authorId)

		if fic.chapterCount > 1:
			fic.upsert()
			localChapterIdSelect = soup.find(id='selected_id').findAll('option')
			# note: ao3 sometimes says there are less chapters than there really
			# are, possibly due to caching on their end. We just ensure there's _at
			# least_ chapterCount chapters, then fetch whatever the dropdown tells
			# us to
			if len(localChapterIdSelect) > fic.chapterCount:
				fic.chapterCount = len(localChapterIdSelect)
				fic.upsert()
			if len(localChapterIdSelect) != fic.chapterCount:
				raise Exception('mismatching localChapterId count?')

			for cid in range(1, fic.chapterCount + 1):
				chap = fic.chapter(cid)
				chap.url = '{}{}/chapters/{}?view_adult=true'.format(
					self.baseUrl, fic.localId, localChapterIdSelect[cid - 1].get('value')
				)
				chap.localChapterId = localChapterIdSelect[cid - 1].get('value')
				chap.title = localChapterIdSelect[cid - 1].getText().strip()
				if chap.title is not None:
					chap.title = util.cleanChapterTitle(chap.title, cid)
				chap.upsert()

		fandomDd = soup.find('dd', {'class': 'fandom tags'})
		if fandomDd is not None:
			fandomTags = fandomDd.findAll('a', {'class': 'tag'})
			for ft in fandomTags:
				originalF = ft.contents[0].strip()
				f = originalF.lower()
				# TODO: this seriously needs reworked
				if (
					(f.startswith("harry potter ") and f.endswith("rowling"))
					or f == 'harry potter - fandom'
					or f == 'fantastic beasts and where to find them (movies)'
					or f == 'harry potter next generation - fandom'
				):
					fic.add(Fandom.define('Harry Potter'))
				elif (
					f == 'sherlock - fandom' or f == 'sherlock (tv)'
					or f == 'sherlock holmes & related fandoms'
					or f == 'sherlock holmes - arthur conan doyle'
					or f == 'sherlock holmes (downey films)'
				):
					fic.add(Fandom.define('Sherlock Holmes'))
				elif f == 'furry (fandom)' or f == 'harry - fandom':
					continue  # skip
				elif f == 'fleurmione - fandom':
					continue  # skip
				elif f == 'skyfall (2012) - fandom':
					fic.add(Fandom.define('James Bond'))
				elif f == 'orphan black (tv)':
					fic.add(Fandom.define('Orphan Black'))
				elif (
					f == 'naruto' or f == 'naruto shippuden'
					or f == 'naruto shippuuden - fandom'
				):
					fic.add(Fandom.define('Naruto'))
				elif f == 'naruto/harry potter':
					fic.add(Fandom.define('Naruto'))
					fic.add(Fandom.define('Harry Potter'))
				elif f == 'bleach':
					fic.add(Fandom.define('Bleach'))
				elif (
					f == 'iron man (movies)' or f == 'iron man - all media types'
					or f == 'iron man (comic)' or f == 'iron man - fandom'
					or f == 'iron man (comics)'
				):
					fic.add(Fandom.define('Iron Man'))
				elif (
					f == 'the avengers (marvel) - all media types'
					or f == 'the avengers (marvel movies)'
					or f == 'the avengers - ambiguous fandom'
					or f == 'the avengers (2012)' or f == 'the avengers'
					or f == 'avengers (marvel) - all media types'
					or f == 'marvel avengers movies universe' or f == 'avengers'
				):
					fic.add(Fandom.define('Avengers'))
				elif f == 'marvel 616':
					fic.add(Fandom.define('Marvel'))
					fic.add(Fandom.define('Marvel 616'))
				elif f == 'thor (movies)' or f == 'thor - all media types':
					fic.add(Fandom.define('Thor'))
				elif (
					f == 'captain america (movies)'
					or f == 'captain america - all media types'
					or f == 'captain america (comics)'
				):
					fic.add(Fandom.define('Captain America'))
				elif (
					f == 'avatar: the last airbender' or f == 'avatar: legend of korra'
					or f == 'avatar the last airbender - fandom'
				):
					fic.add(Fandom.define('Avatar'))
				elif f == 'original work':
					fic.add(Fandom.define('Original Work'))
				elif f == 'stargate atlantis':
					fic.add(Fandom.define('Stargate Atlantis'))
				elif f == 'stargate sg-1':
					fic.add(Fandom.define('Stargate SG-1'))
				elif f == 'stargate - all series':
					fic.add(Fandom.define('Stargate Atlantis'))
					fic.add(Fandom.define('Stargate SG-1'))
				elif f == 'agents of s.h.i.e.l.d. (tv)':
					fic.add(Fandom.define('Avengers'))
				elif f == 'supernatural':
					fic.add(Fandom.define('Supernatural'))
				elif f == 'teen wolf (tv)':
					fic.add(Fandom.define('Teen Wolf'))
				elif f == 'grimm (tv)':
					fic.add(Fandom.define('Grimm'))
				elif (
					f == 'the amazing spider-man (movies - webb)'
					or f == 'spider-man - all media types'
					or f == 'spider-man: homecoming (2017)'
				):
					fic.add(Fandom.define('Spiderman'))
				elif (
					f == 'x-men - all media types' or f == 'x-men (movieverse)'
					or f == 'x-men (comicverse)'
				):
					fic.add(Fandom.define('X-Men'))
				elif (
					f == 'lord of the rings - j. r. r. tolkien'
					or f == 'the lord of the rings - j. r. r. tolkien'
				):
					fic.add(Fandom.define('Lord of the Rings'))
				elif (
					f == 'crisis core: final fantasy vii'
					or f == 'compilation of final fantasy vii' or f == 'final fantasy vii'
				):
					fic.add(Fandom.define('Final Fantasy VII'))
					fic.add(Fandom.define('Final Fantasy'))
				elif f == 'sen to chihiro no kamikakushi | spirited away':
					fic.add(Fandom.define('Spirited Away'))
				elif f == 'howl no ugoku shiro | howl\'s moving castle':
					fic.add(Fandom.define('Howl\'s Moving Castle'))
				elif f == 'rise of the guardians (2012)':
					fic.add(Fandom.define('Rise of the Guardians'))
				elif (
					f == 'doctor who' or f == 'doctor who (2005)'
					or f == 'doctor who & related fandoms'
				):
					fic.add(Fandom.define('Doctor Who'))
				elif f == 'daredevil (tv)' or f == 'daredevil (comics)':
					fic.add(Fandom.define('DareDevil'))
				elif f == 'labyrinth (1986)':
					fic.add(Fandom.define('Labyrinth'))
				elif f == 'gravity falls':
					fic.add(Fandom.define('Gravity Falls'))
				elif f == 'once upon a time (tv)':
					fic.add(Fandom.define('Once Upon a Time'))
				elif f == 'doctor strange (comics)':
					fic.add(Fandom.define('Doctor Strange'))
				elif f == 'the sentinel':
					fic.add(Fandom.define('The Sentinel'))
				elif f == 'teen titans (animated series)':
					fic.add(Fandom.define('Teen Titans'))
				elif (
					f == 'dcu' or f == 'dcu animated' or f == 'dcu (comics)'
					or f == 'dc extended universe' or f == 'dc animated universe'
				):
					fic.add(Fandom.define('DC'))
				elif f == 'vampire hunter d':
					fic.add(Fandom.define('Vampire Hunter D'))
				elif f == 'homestuck':
					fic.add(Fandom.define('Homestuck'))
				elif f == 'one piece':
					fic.add(Fandom.define('One Piece'))
				elif f == 'batman (movies - nolan)':
					fic.add(Fandom.define('Batman'))
				elif f == 'die hard (movies)':
					fic.add(Fandom.define('Die Hard'))
				elif f == 'discworld - terry pratchett':
					fic.add(Fandom.define('Discworld'))
				elif f == 'gossip girl':
					fic.add(Fandom.define('Gossip Girl'))
				elif (
					f == 'a song of ice and fire - george r. r. martin'
					or f == 'a song of ice and fire & related fandoms'
				):
					fic.add(Fandom.define('A Song of Ice and Fire'))
				elif f == 'supergirl (tv 2015)':
					fic.add(Fandom.define('Supergirl'))
				elif f == 'merlin (tv)':
					fic.add(Fandom.define('Merlin'))
				elif f == 'star trek':
					fic.add(Fandom.define('Star Trek'))
				elif f == 'steven universe (cartoon)':
					fic.add(Fandom.define('Steven Universe'))
				elif f == 'hellsing':
					fic.add(Fandom.define('Hellsing'))
				elif f == 'the breaker':
					fic.add(Fandom.define('The Breaker'))
				elif f == 'smallville':
					fic.add(Fandom.define('Smallville'))
				elif f == '베리타스 | veritas (manhwa)':
					fic.add(Fandom.define('Veritas (manhwa)'))
				elif f == 'guardians of childhood - william joyce':
					fic.add(Fandom.define('Guardians of Childhood'))
				elif f == 'person of interest (tv)':
					fic.add(Fandom.define('Person of Interest'))
				elif f == 'james bond (craig movies)':
					fic.add(Fandom.define('James Bond'))
				elif f == 'the bourne legacy (2012)':
					fic.add(Fandom.define('Jason Bourne'))
				elif f == 'numb3rs':
					fic.add(Fandom.define('Numb3rs'))
				elif f == 'temeraire - naomi novik':
					fic.add(Fandom.define('Temeraire'))
				elif f == 'twilight series - stephenie meyer':
					fic.add(Fandom.define('Twilight'))
				elif f == 'dungeons and dragons - fandom':
					fic.add(Fandom.define('Dungeons and Dragons'))
				elif f == 'american horror story' or f == 'american horror story: cult':
					fic.add(Fandom.define('American Horror Story'))
				elif (
					f == 'worm (web serial novel)' or f == 'worm - wildbow'
					or f == 'parahumans series - wildbow'
					or f == 'worm (web serial) | wildbow' or f == 'worm - fandom'
					or f == 'parahumans - fandom' or f == 'worm (parahumans)'
					or f == 'worm (web serial)' or f == 'worm | parahumans'
					or f == 'worm (web novel)'
				):
					fic.add(Fandom.define('Worm'))
				elif f == 'toaru kagaku no railgun | a certain scientific railgun':
					fic.add(Fandom.define('A Certain Scientific Railgun'))
				elif f == 'toaru majutsu no index | a certain magical index':
					fic.add(Fandom.define('A Certain Magical Index'))
				elif f == 'cthulhu mythos - h. p. lovecraft':
					fic.add(Fandom.define('Cthulhu'))
				elif f == 'transformers - all media types':
					fic.add(Fandom.define('Transformers'))
				elif f == 'destiny (video game)':
					fic.add(Fandom.define('Destiny'))
				elif f == 'fandom - fandom' or f == 'meta - fandom':
					pass  # >_>
				elif f == 'house m.d.':
					fic.add(Fandom.define('House, M.D.'))
				elif f == 'the hobbit (jackson movies)':
					fic.add(Fandom.define('The Hobbit'))
				elif f == 'doctor strange (2016)':
					fic.add(Fandom.define('Doctor Strange'))
				elif f == 'arrow (tv 2012)':
					fic.add(Fandom.define('Arrow'))
				elif f == 'the flash (tv 2014)':
					fic.add(Fandom.define('Flash'))
				elif f == 'senki zesshou symphogear':
					fic.add(Fandom.define('Symphogear'))
				elif (
					f == 'fullmetal alchemist: brotherhood & manga'
					or f == 'fullmetal alchemist - all media types'
					or f == 'fullmetal alchemist (anime 2003)'
				):
					fic.add(Fandom.define('Fullmetal Alchemist'))
				elif (
					f == 'star wars - all media types'
					or f == 'star wars episode vii: the force awakens (2015)'
					or f == 'star wars prequel trilogy'
				):
					fic.add(Fandom.define('Star Wars'))
				elif (
					f == 'guardians of the galaxy (2014)'
					or f == 'guardians of the galaxy - all media types'
					or f == 'guardians of the galaxy (movies)'
				):
					fic.add(Fandom.define('Guardians of the Galaxy'))
				elif f == 'ant man (2015)' or f == 'ant-man (movies)':
					fic.add(Fandom.define('Ant Man'))
				elif f == 'the defenders (marvel tv)':
					fic.add(Fandom.define('The Defenders'))
				elif f == 'elementary (tv)':
					fic.add(Fandom.define('Elementary'))
				elif f == 'good omens - neil gaiman & terry pratchett':
					fic.add(Fandom.define('Good Omens'))
				elif f == 'danny phantom':
					fic.add(Fandom.define('Danny Phantom'))
				elif f == 'katekyou hitman reborn!':
					fic.add(Fandom.define('Katekyo Hitman Reborn!'))
				elif f == 'welcome to night vale':
					fic.add(Fandom.define('Welcome to Night Vale'))
				elif f == 'ncis':
					fic.add(Fandom.define('NCIS'))
				elif f == 'torchwood':
					fic.add(Fandom.define('Torchwood'))
				elif f == 'magic: the gathering':
					fic.add(Fandom.define('Magic: The Gathering'))
				elif f == 'overwatch (video game)':
					fic.add(Fandom.define('Overwatch'))
				elif f == 'detroit: become human (video game)':
					fic.add(Fandom.define('Detroit: Become Human'))
				elif f == 'greek and roman mythology':
					pass
				elif f == 'life is strange (video game)':
					fic.add(Fandom.define('life is strange (video game)'))
				elif f == 'akatsuki no yona | yona of the dawn':
					fic.add(Fandom.define('Yona of the Dawn'))
				elif f == '僕のヒーローアカデミア | boku no hero academia | my hero academia':
					fic.add(Fandom.define('My Hero Academia'))
				elif f == 'voltron: legendary defender':
					fic.add(Fandom.define('Voltron'))
				elif f == 'selfie (tv)':
					fic.add(Fandom.define('Selfie'))
				elif f == 'suits (tv)':
					fic.add(Fandom.define('Suits'))
				elif f == 'fruits basket':
					fic.add(Fandom.define('Fruits Basket'))
				elif f == 'hetalia: axis powers':
					fic.add(Fandom.define('Hetalia: Axis Powers'))
				elif f == 'carmilla (web series)':
					fic.add(Fandom.define('Carmilla'))
				elif f == 'the dresden files - jim butcher':
					fic.add(Fandom.define('Dresden Files'))
				elif f == 'girl genius':
					fic.add(Fandom.define('Girl Genius'))
				elif f == 'unspecified fandom':
					pass  # TODO?
				elif f == 'nightwing (comics)':
					fic.add(Fandom.define('Nightwing'))
				elif f == 'books of the raksura - martha wells':
					fic.add(Fandom.define('Books of the Raksura'))
				elif f == 'fall of ile-rien - martha wells':
					fic.add(Fandom.define('Fall of Ile-Rien'))
				elif f == 'vorkosigan saga - lois mcmaster bujold':
					fic.add(Fandom.define('Vorkosigan Saga'))
				elif (
					f == 'highlander: the series' or f == 'highlander - all media types'
				):
					fic.add(Fandom.define('Highlander'))
				elif f == 'yoroiden samurai troopers | ronin warriors':
					fic.add(Fandom.define('Ronin Warriors'))
				elif f == 'hockey rpf':
					fic.add(Fandom.define('Hockey RPF'))
				elif f == 'pacific rim (2013)':
					fic.add(Fandom.define('Pacific Rim'))
				elif f == 'enchanted forest chronicles - patricia wrede':
					fic.add(Fandom.define('Enchanted Forest Chronicles'))
				elif f == 'tortall - tamora pierce':
					fic.add(Fandom.define('Tortall'))
				elif f == 'protector of the small - tamora pierce':
					fic.add(Fandom.define('Protector of the Small'))
				elif f == 'leverage':
					fic.add(Fandom.define('Leverage'))
				elif f == 'valdemar series - mercedes lackey':
					fic.add(Fandom.define('Valdemar Series'))
				elif (
					f == 'b.p.r.d.' or f == 'bureau for paranormal research and defense'
				):
					fic.add(Fandom.define('B.P.R.D.'))
				elif f == 'hellboy (comic)':
					fic.add(Fandom.define('Hellboy'))
				elif f == 'sga/avatar':
					fic.add(Fandom.define('Stargate Atlantis'))
					fic.add(Fandom.define('Avatar'))
				elif f == 'annihilation (2018 garland)':
					fic.add(Fandom.define('Annihilation'))
				elif f == 'craft sequence - max gladstone':
					fic.add(Fandom.define('Craft Sequence'))
				elif f == 'the good place (tv)':
					fic.add(Fandom.define('The Good Place'))
				elif f == 'jessica jones (tv)':
					fic.add(Fandom.define('Jessica Jones'))
				elif f == 'mad max series (movies)':
					fic.add(Fandom.define('Mad Max'))
				elif f == 'american gods (tv)':
					fic.add(Fandom.define('American Gods'))
				elif f == 'terminator: the sarah connor chronicles':
					fic.add(Fandom.define('Terminator: The Sarah Connor Chronicles'))
					fic.add(Fandom.define('Terminator'))
				elif f == 'wolf 359 (radio)':
					fic.add(Fandom.define('Wolf 359'))
				elif f == 'shadowrun: dragonfall':
					fic.add(Fandom.define('Shadowrun'))
				elif f == 'ars paradoxica (podcast)':
					fic.add(Fandom.define('Ars Paradoxica'))
				elif f == 'love is strange - fandom':
					fic.add(Fandom.define('Love is Strange'))
				elif f == 'dune - all media types':
					fic.add(Fandom.define('Dune'))
				elif f == 'dragon age: origins':
					fic.add(Fandom.define('Dragon Age: Origins'))
				elif f == 'game of thrones (tv)':
					fic.add(Fandom.define('Game of Thrones'))
				elif f == 'chronicles of amber - roger zelazny':
					fic.add(Fandom.define('Chronicles of Amber'))
				elif f == 'the southern reach trilogy - jeff vandermeer':
					fic.add(Fandom.define('The Southern Reach Trilogy'))
				elif f == 'continuum (tv)':
					fic.add(Fandom.define('Continuum'))
				elif f == 'mage: the ascension':
					fic.add(Fandom.define('Mage: The Ascension'))
				elif f == 'the good wife (tv)' or f == 'good wife (tv)':
					fic.add(Fandom.define('The Good Wife'))
				elif f == 'alliance-union - c. j. cherryh':
					fic.add(Fandom.define('Alliance-Union'))
				elif f == 'indexing - seanan mcguire':
					fic.add(Fandom.define('Indexing'))
				elif f == 'ultraviolet (tv)':
					fic.add(Fandom.define('Ultraviolet'))
				elif f == 'veronica mars (tv)':
					fic.add(Fandom.define('Veronica Mars'))
				elif f == 'secret circle (tv)':
					fic.add(Fandom.define('Secret Circle'))
				elif f == 'mahou shoujo madoka magika | puella magi madoka magica':
					fic.add(Fandom.define('Madoka Magica'))
				elif f == 'agent carter (tv)':
					fic.add(Fandom.define('Agent Carter'))
				elif f == 'dracula & related fandoms':
					fic.add(Fandom.define('Dracula'))
				elif f == 'dragon ball':
					fic.add(Fandom.define('Dragon Ball'))
				elif f == 'mass effect - all media types':
					fic.add(Fandom.define('Mass Effect'))
				elif f == 'firefly' or f == 'serenity (2005)':
					fic.add(Fandom.define('Firefly'))
				else:
					anyHere = False
					global ao3FandomsMap
					for fm in ao3FandomsMap:
						here = False
						for uf in fm[0]:
							if f == uf.lower().strip():
								here = True
								break
						if not here:
							continue
						anyHere = True
						for mf in fm[1]:
							fic.add(Fandom.define(mf))
					if not anyHere:
						util.logMessage(f'ao3|unknown fandom|{fic.url}|{originalF}')
						#raise Exception('unknown fandom: {} "{}"'.format(fic.url, originalF))

		ourDoms = fic.fandoms()
		# we have a canonical fandom, try to find our characters
		if len(ourDoms) == 1:
			relationshipDd = soup.find('dd', {'class': 'relationship tags'})
			if relationshipDd is not None:
				relationshipTags = relationshipDd.findAll('a', {'class': 'tag'})
				for rt in relationshipTags:
					r = rt.contents[0]
					chars = r.split('/')
					if len(chars) > 8:  # TODO: sometimes more?
						raise Exception('unable to parse relationship: {}'.format(r))
					for char in chars:
						fic.add(Character.defineInFandom(ourDoms[0], char, self.ftype))

		return fic

Esempio n. 10

0

Mostra file

File: fanficsMeAdapter.py Progetto: FanFicDev/hermes

	def parseInfoInto(self, fic: Fic, wwwHtml: str) -> Fic:
		raise Exception('FIXME TODO fanfics me format has changed')
		from bs4 import BeautifulSoup  # type: ignore
		soup = BeautifulSoup(wwwHtml, 'html5lib')

		ficHead = soup.find('div', {'class': 'FicHead'})

		titleH1 = ficHead.find('h1')
		fic.title = titleH1.getText().strip()

		fandoms: List[str] = []
		trs = ficHead.findAll('div', {'class': 'tr'})
		author = None
		for tr in trs:
			divTitle = tr.find('div', {'class': 'title'})
			divContent = tr.find('div', {'class': 'content'})

			t = str(divTitle.getText()).strip()
			v = str(divContent.getText()).strip()

			if t == 'Автор:':
				author = v
			elif t == 'Фандом:':
				if v == 'Harry Potter' or v == 'Harry Potter - J. K. Rowling':
					fandoms += ['Harry Potter']
				else:
					raise Exception('unknown fandom: ' + v)
			elif t == 'Статус:':
				if v == 'В процессе':
					fic.ficStatus = FicStatus.ongoing
				elif v == 'Закончен':
					fic.ficStatus = FicStatus.complete
				else:
					raise Exception('unknown write status: ' + v)
			elif t == 'Опубликован:':
				fic.published = self.parseRussianDate(v)
			elif t == 'Изменен:':
				fic.updated = self.parseRussianDate(v)
			elif t == 'Ссылка:':
				src = v  # source archive url
			elif t == 'Читателей:':
				fic.followCount = int(v)
			elif t == 'Персонажи:':
				# characters, parse relationship?
				pass
			elif t == 'Рейтинг:':
				fic.ageRating = v
			elif t == 'Предупреждения:':
				# warnings?
				pass
			else:
				raise Exception('unknown metadata: ' + t)

		# TODO?
		assert (author is not None)
		authorUrl = author
		authorId = author
		self.setAuthor(fic, author, authorUrl, authorId)

		fic.fetched = OilTimestamp.now()
		fic.languageId = Language.getId("English")  # TODO: don't hard code?

		if fic.url is None:
			fic.url = self.constructUrl(fic.localId)

		summaryTextDiv = soup.find('div', {'class': 'summary_text'})
		if summaryTextDiv is None:
			summaryTextDiv = soup.find('div', {'class': 'summary_text_fic3'})
		fic.description = summaryTextDiv.getText()

		# default optional fields
		fic.reviewCount = 0
		fic.favoriteCount = 0
		if fic.followCount is None:
			fic.followCount = 0

		fic.ageRating = 'M'

		ficContentsUl = soup.find('ul', {'class': 'FicContents'})
		chapterLinks = ficContentsUl.findAll('li', {'class': 't-b-dotted'})
		fic.chapterCount = len(chapterLinks)

		if fic.wordCount is None:
			fic.wordCount = 0
		fic.upsert()

		wordCount = 0
		for cid in range(1, fic.chapterCount + 1):
			chapter = fic.chapter(cid)
			chapter.localChapterId = str(cid)
			chapter.url = self.constructUrl(fic.localId, cid)

			# try to get it out of current blob first
			if chapter.html() is None:
				contentDiv = soup.find('div', {'id': 'c{}'.format(cid - 1)})
				if contentDiv is not None:
					chapter.setHtml(
						'<div class="ReadContent">' + str(contentDiv) + '</div>'
					)

			if chapter.title is None or len(chapter.title) < 1:
				contentDiv = soup.find('div', {'id': 'c{}'.format(cid - 1)})
				if contentDiv is not None:
					chapterTitle = contentDiv.previous_sibling
					if chapterTitle is not None and chapterTitle.name == 'h2':
						chapter.title = chapterTitle.getText()

			# fallback to scraping it directly
			if chapter.html() is None:
				cdata = scrape.softScrape(chapter.url)
				assert (cdata is not None)
				chapter.setHtml(self.extractContent(fic, cdata))
				csoup = BeautifulSoup(cdata, 'html5lib')
				contentDiv = csoup.find('div', {'id': 'c{}'.format(cid - 1)})
				chapterTitle = contentDiv.previous_sibling
				if chapterTitle is not None and chapterTitle.name == 'h2':
					chapter.title = chapterTitle.getText()

			if chapter.title is not None and len(chapter.title) > 0:
				chapter.title = util.cleanChapterTitle(chapter.title, cid)

			chapter.upsert()
			wordCount += len(chapter.cachedContent().split())

		fic.wordCount = wordCount

		for fandom in fandoms:
			fic.add(Fandom.define(fandom))

		return fic

Esempio n. 11

0

Mostra file

File: siyeAdapter.py Progetto: FanFicDev/hermes

    def parseInfoInto(self, fic: Fic, html: str) -> Fic:
        from bs4 import BeautifulSoup
        html = html.replace('\r\n', '\n')
        soup = BeautifulSoup(html, 'html.parser')

        fic.fetched = OilTimestamp.now()
        fic.languageId = Language.getId("English")  # TODO: don't hard code?

        w95tables = soup.findAll('table', {'width': '95%'})
        if len(w95tables) != 3:
            raise Exception('wrong number of w95 tables: {}'.format(
                len(w95tables)))

        ficInfoTable = w95tables[0]
        ficTitleH3 = ficInfoTable.find('h3')
        fic.title = ficTitleH3.get_text().strip()

        authorUrlMatch = re.search('"viewuser.php\?uid=(\d+)">([^<]*)<', html)
        if authorUrlMatch is None:
            raise Exception('could not locate author url')

        author = authorUrlMatch.group(2)
        authorId = authorUrlMatch.group(1)
        authorUrl = self.baseUrl + '/viewuser.php?uid=' + authorId

        self.setAuthor(fic, author, authorUrl, authorId)

        # TODO: this may miss multiline summaries :(
        summaryMatch = re.search(
            '<b>Summary:</b>((.|\r|\n)*)<b>Hitcount: </b>', html, re.MULTILINE)
        if summaryMatch is None:
            edumpContent(html, 'siye_summary')
            raise Exception('could not locate summary')
        # alternatively: fic.description = "{no summary}" ?

        fic.description = summaryMatch.group(1).strip()

        fic.ageRating = '<unkown>'

        ageRatingMatch = re.search('<b>Rating:</b>(.*)<br>', html)
        if ageRatingMatch is not None:
            fic.ageRating = ageRatingMatch.group(1).strip()

        maxChapter = 0
        baseChapterHref = 'viewstory.php?sid={}&chapter='.format(fic.localId)
        singleChapterHref = 'viewstory.php?sid={}&chapter=Array'.format(
            fic.localId)
        isSingleChapterFic = False
        allAs = soup.find_all('a')
        for a in allAs:
            href = a.get('href')
            if href is None:
                continue
            if not href.startswith(baseChapterHref):
                continue
            if href.startswith(singleChapterHref):
                isSingleChapterFic = True
                maxChapter = max(1, maxChapter)
                continue
            cid = int(href[len(baseChapterHref):])
            maxChapter = max(cid, maxChapter)

        fic.chapterCount = maxChapter

        fic.reviewCount = 0
        fic.favoriteCount = 0
        fic.followCount = 0

        fic.ficStatus = FicStatus.ongoing
        if html.find('Story is Complete'):
            fic.ficStatus = FicStatus.complete

        updatedOnPattern = re.compile('updated on (\d+).(\d+).(\d+)')
        minUpdate = util.parseDateAsUnix(int(time.time()), fic.fetched)
        maxUpdate = util.parseDateAsUnix('1970/01/01', fic.fetched)
        for (year, month, day) in re.findall(updatedOnPattern, html):
            date = '{}/{}/{}'.format(year, month, day)
            dt = util.parseDateAsUnix(date, fic.fetched)

            minUpdate = min(minUpdate, dt)
            maxUpdate = max(maxUpdate, dt)

        if fic.published is None or fic.published.toUTS() > minUpdate:
            fic.published = OilTimestamp(minUpdate)
        if fic.updated is None or fic.updated.toUTS() < maxUpdate:
            fic.updated = OilTimestamp(maxUpdate)
        if fic.updated < fic.published:
            fic.updated = fic.published

        fic.wordCount = 0
        wordsPattern = re.compile('(\d+) words')
        for (words) in re.findall(wordsPattern, html):
            fic.wordCount += int(words)

        if fic.wordCount == 0 and isSingleChapterFic:
            try:
                fic.upsert()
                ch1 = fic.chapter(1)
                ch1.cache()
                chtml = ch1.html()
                if chtml is not None:
                    fic.wordCount = len(chtml.split())
            except:
                pass

        fic.add(Fandom.define('Harry Potter'))
        # TODO: chars/relationship?

        return fic

Esempio n. 12

0

Mostra file

    def parseInfoInto(self, fic: Fic, wwwHtml: str) -> Fic:
        from bs4 import BeautifulSoup
        archive = fic.localId.split('/')[0]
        storyNo = fic.localId.split('/')[1]

        soup = BeautifulSoup(wwwHtml, 'html5lib')

        titleH2 = soup.find('a', {'href': '/story.php?no={}'.format(storyNo)})
        fic.title = str(titleH2.getText())

        membersUrl = 'http://members.adult-fanfiction.org/profile.php?no='
        memberLink = soup.find(
            lambda t: (t.name == 'a' and t.has_attr("href") and t.get("href")
                       is not None and (t.get("href").startswith(membersUrl))))

        author = memberLink.getText()
        authorId = memberLink.get('href')[len(membersUrl):]
        authorUrl = memberLink.get('href')
        self.setAuthor(fic, author, authorUrl, authorId)

        # TODO
        fic.ficStatus = FicStatus.ongoing

        fic.fetched = OilTimestamp.now()
        fic.languageId = Language.getId("English")  # TODO: don't hard code?

        fic.url = self.constructUrl(fic.localId, 1)

        # TODO: description is on search page
        if fic.description is None:
            fic.description = 'TODO: on the search page?'

        # default optional fields
        fic.reviewCount = 0
        fic.favoriteCount = 0
        fic.followCount = 0

        fic.ageRating = 'M'

        # TODO
        if fic.published is None:
            fic.published = OilTimestamp.now()
        if fic.updated is None:
            fic.updated = fic.published

        chapterDropdown = soup.find('div', {'class': 'dropdown-content'})
        chapterLinks = chapterDropdown.findAll('a')
        oldChapterCount = fic.chapterCount
        fic.chapterCount = len(chapterLinks)

        if fic.wordCount is None:
            fic.wordCount = 0
        fic.upsert()

        wordCount = 0
        for cid in range(1, fic.chapterCount + 1):
            chapterContent = scrape.softScrape(
                self.constructUrl(fic.localId, cid))
            chapter = fic.chapter(cid)
            if chapterContent is not None:
                chapter.setHtml(chapterContent)
            chapter.localChapterId = str(cid)
            chapter.url = self.constructUrl(fic.localId, cid)

            chapter.title = chapterLinks[cid - 1].getText().strip()
            if chapter.title is not None:
                chapter.title = util.cleanChapterTitle(chapter.title, cid)

            chapter.upsert()
            if chapterContent is not None:
                wordCount += len(chapterContent.split())

        fic.wordCount = wordCount

        if oldChapterCount is not None and oldChapterCount < fic.chapterCount:
            fic.updated = OilTimestamp.now()  # TODO
        fic.upsert()

        storyUrl = self.constructUrl(fic.localId, chapterId=None)

        # more metadata from search page
        searchUrl = ('http://{}.adult-fanfiction.org/search.php?' +
                     'auth={}&title={}&summary=&tags=&cats=0&search=Search')
        searchUrl = searchUrl.format(archive, author,
                                     fic.title.replace(' ', '+'))
        data = scrape.scrape(searchUrl)['raw']

        metas = self.extractSearchMetadata(data)

        # fallback to pure author search
        if storyUrl not in metas:
            searchUrl = ('http://{}.adult-fanfiction.org/search.php?' +
                         'auth={}&title=&summary=&tags=&cats=0&search=Search')
            searchUrl = searchUrl.format(archive, author)
            data = scrape.scrape(searchUrl)['raw']
            metas = self.extractSearchMetadata(data)

        if storyUrl not in metas:
            raise Exception('cannot find search metadata')

        meta = metas[storyUrl]

        assert (meta.published is not None and meta.updated is not None)
        fic.published = OilTimestamp(meta.published)
        fic.updated = OilTimestamp(meta.updated)

        fic.reviewCount = meta.reviewCount
        fic.favoriteCount = meta.views  # TODO

        fic.ficStatus = meta.ficStatus

        assert (meta.description is not None)
        fic.description = meta.description
        assert (fic.description is not None)
        if len(meta.tags) > 0:
            fic.description += '\n<hr />\nContent Tags: ' + ' '.join(meta.tags)

        for fan in meta.fandoms:
            fic.add(Fandom.define(fan))

        return fic

Esempio n. 13

0

Mostra file

File: hpFanficArchiveAdapter.py Progetto: FanFicDev/hermes

    def parseInfoInto(self, fic: Fic, html: str) -> Fic:
        from bs4 import BeautifulSoup
        soup = BeautifulSoup(html, 'html.parser')

        fic.fetched = OilTimestamp.now()
        fic.languageId = Language.getId("English")  # TODO: don't hard code?

        pagetitle = soup.find(id='pagetitle')
        aTags = pagetitle.findAll('a')
        author = None
        for a in aTags:
            href = a.get('href')
            if href.startswith('viewstory'):
                fic.title = a.contents[0].strip()
            elif href.startswith('viewuser.php?uid='):
                author = a.contents[0]
                authorUrl = self.baseUrl + href
                authorId = str(int(href[len('viewuser.php?uid='):]))
                self.setAuthor(fic, author, authorUrl, authorId)

        if fic.title is None:
            raise Exception('unable to find title')
        if author is None:
            raise Exception('unable to find author')

        lines = html.replace('\r', '\n').replace('<', '\n<').split('\n')
        inDescription = False
        description = ''
        for line in lines:
            cur = line.strip()
            if cur.find('!-- SUMMARY START --') != -1:
                inDescription = True
            elif cur.find('!-- SUMMARY END --') != -1:
                inDescription = False

            if inDescription == True:
                description += cur + '\n'

        fic.description = description

        fic.ageRating = '<unkown>'

        infoBlock = None
        infoText = None
        blocks = soup.findAll('div', {'class': 'block'})
        for block in blocks:
            title = block.find('div', {'class': 'title'})
            if title is None:
                continue
            if title.contents[0] != 'Story Information':
                continue
            infoBlock = block
            infoText = block.get_text()
            break
        else:
            raise Exception('unable to find info text')

        matcher = RegexMatcher(
            infoText, {
                'chapterCount': ('Chapters:\s+(\d+)', int),
                'wordCount': ('Word count:\s+(\S+)', int),
            })
        matcher.matchAll(fic)

        sortDiv = soup.find(id='sort')
        match = re.search('Reviews\s*-\s*([^\]]+)', sortDiv.get_text())
        if match is not None:
            fic.reviewCount = int(match.group(1).replace(',', ''))
        else:
            fic.reviewCount = 0

        fic.favoriteCount = 0
        fic.followCount = 0

        infoBlockHtml = str(infoBlock)
        match = re.search(
            '<!-- PUBLISHED START -->([^<]*)<!-- PUBLISHED END -->',
            infoBlockHtml)
        if match is not None:
            publishedUts = util.parseDateAsUnix(match.group(1), fic.fetched)
            fic.published = OilTimestamp(publishedUts)

        match = re.search('<!-- UPDATED START -->([^<]*)<!-- UPDATED END -->',
                          infoBlockHtml)
        if match is not None:
            updatedUts = util.parseDateAsUnix(match.group(1), fic.fetched)
            fic.updated = OilTimestamp(updatedUts)

        if fic.updated is None:
            fic.updated = fic.published

        match = re.search('Completed:\s+(\S+)', infoText)
        if match is not None:
            complete = match.group(1)
            if complete == 'No':
                fic.ficStatus = FicStatus.ongoing
            elif complete == 'Yes':
                fic.ficStatus = FicStatus.complete
            else:
                raise Exception('unknown complete value: {}'.format(complete))

        match = re.search('Crossovers', infoText)
        if match is not None:
            pass  # raise Exception('Found unknown crossover in {0}: {1}'.format(fic.id, fic.url))
        else:
            # otherwise not a crossover and just harry potter
            fic.add(Fandom.define('Harry Potter'))

        return fic

Esempio n. 14

0

Mostra file

    def parseInfoInto(self, fic: Fic, wwwHtml: str) -> Fic:
        from bs4 import BeautifulSoup  # type: ignore
        soup = BeautifulSoup(wwwHtml, 'html.parser')
        storyMainInfo = soup.findAll('table', {'class': 'storymaininfo'})
        if len(storyMainInfo) != 1:
            raise Exception('unable to find main story info')
        storyMainInfo = storyMainInfo[0]

        fic.fetched = OilTimestamp.now()
        fic.languageId = Language.getId("English")  # TODO: don't hard code?

        disclaimerJs = "javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?psid="
        for a in soup.findAll('a'):
            href = a.get('href')
            if (not href.startswith(disclaimerJs)
                    and href != '?psid={}'.format(fic.localId)):
                continue
            fic.title = a.getText()
            break
        else:
            raise Exception('error: unable to find title')

        fic.url = self.constructUrl(fic.localId)

        storySummaryTable = soup.findAll('table', {'class': 'storysummary'})
        if len(storySummaryTable) != 1:
            raise Exception('cannot find story summary table')
        storySummaryTable = storySummaryTable[0]
        fic.description = (storySummaryTable.getText().strip())
        if fic.description is None:
            raise Exception('error: unable to find description')

        # default optional fields
        fic.reviewCount = 0
        fic.favoriteCount = 0
        fic.followCount = 0

        text = storyMainInfo.getText().replace('\xa0', ' ')
        matcher = RegexMatcher(
            text, {
                'ageRating': ('Rating:\s+(Mature|15\+|12\+)', str),
                'chapterCount': ('Chapters:\s+(\d+)', int),
                'wordCount': ('Words:\s+(\d+)', int),
                'reviewCount': ('Story Reviews:\s*(\d+)', int),
                'favoriteCount': ('Favorite Story Of:\s+(\d+) users', int),
                'updated': ('Last Updated:\s+(\S+)', str),
                'published': ('First Published:\s+(\S+)', str),
            })
        matcher.matchAll(fic)

        if fic.published is not None:
            publishedUts = util.parseDateAsUnix(fic.published, fic.fetched)
            fic.published = OilTimestamp(publishedUts)

        if fic.updated is None:
            fic.updated = fic.published
        elif fic.updated is not None:
            updatedUts = util.parseDateAsUnix(fic.updated, fic.fetched)
            fic.updated = OilTimestamp(updatedUts)

        if fic.chapterCount is None:
            fic.chapterCount = 1

        match = re.search('Status:\s+(Completed|Work In Progress|Abandoned)',
                          text)
        if match is None:
            raise Exception('cannot find write status')

        status = match.group(1)
        if status == 'Completed':
            fic.ficStatus = FicStatus.complete
        elif status == 'Work In Progress':
            fic.ficStatus = FicStatus.ongoing  # should these be abandoned?
        elif status == 'Abandoned':
            fic.ficStatus = FicStatus.abandoned
        else:
            raise Exception('unknown status: {}'.format(status))

        for a in soup.findAll('a'):
            a_href = a.get('href')
            if a_href.startswith('viewuser.php?showuid='):
                author = a.get_text()
                authorUrl = self.baseUrl + '/' + a_href
                authorId = a_href[len('viewuser.php?showuid='):]
                self.setAuthor(fic, author, authorUrl, authorId)
                break
        else:
            raise Exception('unable to find author:\n{}'.format(text))

        # TODO: chars/pairings?
        fic.add(Fandom.define('Harry Potter'))
        return fic

Esempio n. 15

0

Mostra file

File: fictionPressAdapter.py Progetto: FanFicDev/hermes

	def parseInfoInto(self, fic: Fic, wwwHtml: str) -> Fic:
		from bs4 import BeautifulSoup  # type: ignore
		deletedFicText = 'Story Not FoundUnable to locate story. Code 1.'
		soup = BeautifulSoup(wwwHtml, 'html5lib')
		profile_top = soup.find(id='profile_top')
		# story might've been deleted
		if profile_top is None:
			gui_warnings = soup.find_all('span', {'class': 'gui_warning'})
			for gui_warning in gui_warnings:
				if gui_warning.get_text() == deletedFicText:
					fic.ficStatus = FicStatus.abandoned
					fic.upsert()
					return fic

		text = profile_top.get_text()
		pt_str = str(profile_top)

		fic.fetched = OilTimestamp.now()
		fic.languageId = Language.getId("English")  # TODO: don't hard code?

		for b in profile_top.find_all('b'):
			b_class = b.get('class')
			if len(b_class) == 1 and b_class[0] == 'xcontrast_txt':
				fic.title = b.get_text()
				break
		else:
			raise Exception('error: unable to find title:\n{}\n'.format(pt_str))

		fic.url = self.constructUrl(fic.localId, 1, fic.title)

		for div in profile_top.find_all('div'):
			div_class = div.get('class')
			if (
				div.get('style') == 'margin-top:2px' and len(div_class) == 1
				and div_class[0] == 'xcontrast_txt'
			):
				fic.description = div.get_text()
				break
		else:
			raise Exception('error: unable to find description:\n{}\n'.format(pt_str))

		# default optional fields
		fic.reviewCount = 0
		fic.favoriteCount = 0
		fic.followCount = 0

		matcher = RegexMatcher(
			text, {
				'ageRating': ('Rated:\s+Fiction\s*(\S+)', str),
				'chapterCount?': ('Chapters:\s+(\d+)', int),
				'wordCount': ('Words:\s+(\S+)', int),
				'reviewCount?': ('Reviews:\s+(\S+)', int),
				'favoriteCount?': ('Favs:\s+(\S+)', int),
				'followCount?': ('Follows:\s+(\S+)', int),
				'updated?': ('Updated:\s+(\S+)', str),
				'published': ('Published:\s+(\S+)', str),
			}
		)
		matcher.matchAll(fic)

		if fic.published is not None:
			publishedUts = util.parseDateAsUnix(fic.published, fic.fetched)
			fic.published = OilTimestamp(publishedUts)

		if fic.updated is None:
			fic.updated = fic.published
		elif fic.updated is not None:
			updatedUts = util.parseDateAsUnix(fic.updated, fic.fetched)
			fic.updated = OilTimestamp(updatedUts)

		if fic.chapterCount is None:
			fic.chapterCount = 1

		match = re.search('Status:\s+(\S+)', text)
		if match is None:
			fic.ficStatus = FicStatus.ongoing
		else:
			status = match.group(1)
			if status == 'Complete':
				fic.ficStatus = FicStatus.complete
			else:
				raise Exception('unknown status: {}'.format(status))

		for a in profile_top.find_all('a'):
			a_href = a.get('href')
			if a_href.startswith('/u/'):
				author = a.get_text()
				authorUrl = self.baseUrl + a_href
				authorId = a_href.split('/')[2]
				self.setAuthor(fic, author, authorUrl, authorId)
				break
		else:
			raise Exception('unable to find author:\n{}'.format(text))

		preStoryLinks = soup.find(id='pre_story_links')
		preStoryLinksLinks = preStoryLinks.find_all('a')
		for a in preStoryLinksLinks:
			href = a.get('href')
			hrefParts = href.split('/')

			# if it's a top level category
			if (
				len(hrefParts) == 3 and len(hrefParts[0]) == 0
				and len(hrefParts[2]) == 0
			):
				cat = hrefParts[1]
				if cat in fictionPressCategories:
					continue  # skip categories
				raise Exception('unknown category: {}'.format(cat))

			# if it's a regular genre in some category
			if (
				len(hrefParts) == 4 and len(hrefParts[0]) == 0
				and len(hrefParts[3]) == 0
			):
				# ensure category is in our map
				if hrefParts[1] not in fictionPressCategories:
					raise Exception('unknown category: {}'.format(hrefParts[1]))

				# ensure it's in our whitelist
				if hrefParts[2] not in fictionPressGenres:
					util.logMessage(f'FictionPressAdapter: unknown genre {hrefParts[2]}')
					continue

				fic.add(Fandom.define(hrefParts[2]))
				continue

			util.logMessage(f'FictionPressAdapter: unknown genre {fic.id}: {href}')
			continue

		fic.upsert()

		chapterTitles = []
		if fic.chapterCount > 1:
			chapterSelect = soup.find(id='chap_select')
			chapterOptions = []
			if chapterSelect is not None:
				chapterOptions = chapterSelect.findAll('option')
			chapterTitles = [co.getText().strip() for co in chapterOptions]

		for cid in range(fic.chapterCount):
			ch = fic.chapter(cid + 1)
			ch.localChapterId = str(cid + 1)
			if len(chapterTitles) > cid:
				ch.title = util.cleanChapterTitle(chapterTitles[cid], cid + 1)
			elif fic.chapterCount == 1 and cid == 0:
				ch.title = fic.title
			ch.upsert()

		return fic