예제 #1
0
	def createFromZList(self, fic: Fic, ts: int, data: str) -> Fic:
		fic.url = self.constructUrl(fic.localId, 1)

		fic = self.parseZListInfoInto(fic, ts, data)
		fic.upsert()

		return Fic.lookup((fic.id, ))
예제 #2
0
	def create(self, fic: Fic) -> Fic:
		fic.url = self.constructUrl(fic.localId)
		data = scrape.softScrape(fic.url)
		if data is None:
			raise Exception('unable to scrape? FIXME')

		fic = self.parseInfoInto(fic, data)
		fic.upsert()

		return Fic.lookup((fic.id, ))
예제 #3
0
    def get(self, localId: str) -> Fic:
        existing = Fic.select({'sourceId': self.ftype, 'localId': localId})
        if len(existing) == 1:
            return existing[0]

        fic = Fic.new()
        fic.sourceId = self.ftype
        fic.localId = localId
        fic.created = OilTimestamp.now()
        return self.create(fic)
예제 #4
0
    def getCurrentInfo(self, fic: Fic) -> Fic:
        fic.url = self.constructUrl(fic.localId)
        url = self.tocUrl
        data = scrape.scrape(url)
        edumpContent('<!-- {} -->\n{}'.format(url, data['raw']),
                     'wavesarisen_ec')

        fic = self.parseInfoInto(fic, data['raw'])
        fic.upsert()
        return Fic.lookup((fic.id, ))
예제 #5
0
	def getFromZList(self, localId: int, ts: int, html: str) -> Fic:
		fic = None
		existing = Fic.select({'sourceId': self.ftype, 'localId': str(localId)})
		if len(existing) != 1:
			fic = Fic.new()
			fic.sourceId = self.ftype
			fic.localId = str(localId)
			fic.created = OilTimestamp.now()
		else:
			fic = existing[0]
		return self.createFromZList(fic, ts, html)
예제 #6
0
	def get(self, localId: str) -> Fic:
		existing = Fic.select({'sourceId': self.ftype, 'localId': localId})
		if len(existing) == 1:
			return existing[0]
		if not self.cacheable:
			raise Exception('cannot cache {}/{}'.format(localId, self.ftype))

		fic = Fic.new()
		fic.sourceId = self.ftype
		fic.localId = localId
		fic.created = OilTimestamp.now()
		return self.create(fic)
예제 #7
0
	def create(self, fic: Fic) -> Fic:
		fic.url = self.constructUrl(fic.localId)

		# scrape fresh info
		data = scrape.scrape(fic.url)

		edumpContent(data['raw'], 'sugarquill')

		fic = self.parseInfoInto(fic, data['raw'])
		fic.upsert()

		return Fic.lookup((fic.id, ))
예제 #8
0
파일: wselect.py 프로젝트: FanFicDev/hermes
 def __init__(self, parent: Optional['Hermes'], target: Fic = None):
     self.parent = parent
     self.fics = Fic.list() if target is None else Fic.list(
         {'id': target.id})
     self.list = self.fics
     self.idx = 0
     self.filter = ''
     self.width, self.height = 80, 24
     self.msg: Optional[Tuple[int, str]] = None
     self.__refilter(target)
     self._userFicCache: Dict[int, UserFic] = {}
     self._rebuildUserFicCache()
예제 #9
0
    def create(self, fic: Fic) -> Fic:
        fic.url = self.constructUrl(fic.localId)

        # scrape fresh info
        data = scrape.scrape(fic.url)
        time.sleep(self.baseDelay)

        edumpContent(data['raw'], 'hpffa')

        fic = self.parseInfoInto(fic, data['raw'])
        fic.upsert()

        return Fic.lookup((fic.id, ))
예제 #10
0
    def create(self, fic: Fic) -> Fic:
        # TODO: should we try to get the actual url here, including the url safe
        # version of the title before the lid? Needs done elsewhere in this
        # adapter as well
        fic.url = self.baseUrl + 'threads/' + str(fic.localId)

        # scrape fresh info
        data = self.scrapeLike(fic.url)

        fic = self.parseInfoInto(fic, data)
        fic.upsert()

        return Fic.lookup((fic.id, ))
예제 #11
0
    def create(self, fic: Fic) -> Fic:
        fic.url = self.constructUrl(fic.localId, 1)

        # scrape fresh info
        data = scrape.scrape(fic.url)

        fic = self.parseInfoInto(fic, data['raw'])
        fic.insert()

        chapter = fic.chapter(1)
        chapter.setHtml(data['raw'])
        chapter.upsert()

        return Fic.lookup((fic.id, ))
예제 #12
0
	def tryParseUrl(self, url: str) -> Optional[FicId]:
		# by default, we simply try to look up the url in existing chapters or fics
		chaps = FicChapter.select({'url': url})
		if len(chaps) == 1:
			fic = Fic.get((chaps[0].ficId, ))
			if fic is not None:
				return FicId(
					FicType(fic.sourceId), fic.localId, chaps[0].chapterId, False
				)

		fics = Fic.select({'url': url})
		if len(fics) == 1:
			return FicId(FicType(fics[0].sourceId), fics[0].localId)

		raise NotImplementedError()
예제 #13
0
	def getCurrentInfo(self, fic: Fic) -> Fic:
		fic.url = self.baseUrl + str(fic.localId)
		url = fic.url.split('?')[0] + '?view_adult=true'
		# scrape fresh info
		data = scrape.scrape(url)

		return self.parseInfoInto(fic, data['raw'])
예제 #14
0
def v0_fic_all(urlId: str) -> Any:
    fics = Fic.select({'urlId': urlId})
    if len(fics) != 1:
        return Err.urlId_not_found.get()
    fic = fics[0]
    if fic.chapterCount is None:
        print(f'err: fic has no chapter count: {fic.id}')
        return Err.urlId_not_found.get()
    ficChapters = {
        fc.chapterId: fc
        for fc in FicChapter.select({'ficId': fic.id})
    }
    chapters = {}
    for cid in range(1, fic.chapterCount + 1):
        if cid not in ficChapters:
            return Err.cid_not_found.get({'arg': f'{fic.id}/{cid}'})
        chapter = ficChapters[cid]
        cres = chapter.toJSONable()
        try:
            content = cres['content']
            if content is not None:
                content = util.decompress(content)
                content = scrape.decodeRequest(content, f'{fic.id}/{cid}')
                content = cleanHtml(content)
                if content != cleanHtml(content):
                    print(
                        f'v0_fic_all: {fic.id}/{cid} did not round-trip through cleanHtml'
                    )
            cres['content'] = content
            chapters[cid] = cres
        except:
            pass

    res = fic.toJSONable()
    return Err.ok({'info': res, 'chapters': chapters})
예제 #15
0
	def setAuthor(
		self, fic: Fic, author: str, authorUrl: str, authorLocalId: str
	) -> None:
		fic.authorId = Author.getId(author, self.ftype)
		AuthorSource.getId(
			fic.authorId, self.ftype, author, authorUrl, authorLocalId
		)
예제 #16
0
def populateManualTemplate(url, chapterUrls, author):
	existingManual = Fic.select({'type': FicType.manual})
	lid = len(existingManual) + 1

	manRename = {'id': None}
	manDefaults = {
		'fandoms': [],
		'characters': [],
		'tags': [],
		'genres': [],
		'authorUrl': url,
		'author': author,
		'authorId': author,
		'ageRating': 'M',
		'language': 'English',
		'favorites': 0,
		'follows': 0,
		'reviews': 0,
		'url': url,
		'lastUrl': url,
		'type': FicType.manual,
		'lid': lid,
		'ficStatus': Status.complete,
		'wordCount': -1,
		'description': 'FILL IN MY DESCRIPTION',
		'title': 'FILL IN MY TITLE',
		'published': 'FILL IN MY PUBLISHED DATE',
		'updated': 'FILL IN MY UPDATED DATE',
		'added': int(time.time()),
		'fetched': int(time.time())
	}

	fic = Fic.new().__dict__
	fic = inflateObject(fic, manRename, manDefaults)

	fic['chapters'] = {}
	fic['chapterCount'] = len(chapterUrls)

	for cid in range(1, len(chapterUrls) + 1):
		fic['chapters'][cid] = {
			'lastModified': int(time.time()),
			'status': Status.ongoing,
			'fetched': int(time.time()),
			'url': chapterUrls[cid - 1],
		}

	return fic
예제 #17
0
파일: wselect.py 프로젝트: FanFicDev/hermes
 def refresh(self) -> None:
     self.fics = Fic.list()
     self._rebuildUserFicCache()
     target = None
     if self.idx < len(self.list):
         target = self.list[self.idx]
     self.__refilter(target)
     self.pushMessage('refreshed fic list')
예제 #18
0
	def buildUrl(self, chapter: 'FicChapter') -> str:
		# TODO: do we need these 2 lines or will they always be done by however
		# FicChapter is created?
		if chapter.fic is None:
			chapter.fic = Fic.lookup((chapter.ficId, ))
		return self.constructUrl(
			chapter.fic.localId, chapter.chapterId, chapter.fic.title
		)
예제 #19
0
	def create(self, fic: Fic) -> Fic:
		fic.url = self.baseUrl + str(fic.localId)

		# scrape fresh info
		url = fic.url.split('?')[0] + '?view_adult=true'
		data = scrape.scrape(url)

		edumpContent(data['raw'], 'ao3')

		fic = self.parseInfoInto(fic, data['raw'])
		fic.upsert()

		chapter = fic.chapter(1)
		chapter.setHtml(data['raw'])
		chapter.upsert()

		return Fic.lookup((fic.id, ))
예제 #20
0
def populateFATemplate(author, storyAbbreviation, chapterCount):
	url = 'http://www.fictionalley.org/authors/{}/{}.html'.format(
		author, storyAbbreviation
	)
	lastUrl = url[:-5] + '01.html'
	if chapterCount == 1:
		lastUrl = url[:-5] + '01a.html'
	lid = 1

	faRename = {'id': None}
	faDefaults = {
		'fandoms': ['Harry Potter'],
		'characters': [],
		'tags': [],
		'genres': [],
		'authorUrl': 'http://www.fictionalley.org/authors/{}'.format(author),
		'author': author,
		'authorId': author,
		'ageRating': 'PG',
		'language': 'English',
		'favorites': 0,
		'follows': 0,
		'reviews': 0,
		'url': url,
		'lastUrl': lastUrl,
		'type': FicType.fictionalley,
		'lid': lid,
		'ficStatus': Status.complete,
		'wordCount': -1,
		'description': 'FILL IN MY DESCRIPTION',
		'title': 'FILL IN MY TITLE',
		'published': 'FILL IN MY PUBLISHED DATE',
		'updated': 'FILL IN MY UPDATED DATE',
		'added': int(time.time()),
		'fetched': int(time.time())
	}

	fic = Fic.new().__dict__
	fic = inflateObject(fic, faRename, faDefaults)

	fic['chapters'] = {}
	fic['chapterCount'] = chapterCount

	for cid in range(1, chapterCount + 1):
		chapterUrl = url[:-5] + '{:02}.html'.format(cid)
		if chapterCount == 1:
			chapterUrl = url[:-5] + '01a.html'
		fic['chapters'][cid] = {
			'lastModified': int(time.time()),
			'status': Status.ongoing,
			'fetched': int(time.time()),
			'url': chapterUrl
		}
		contentDir = './content/{}/{}/{}'.format(FicType.fictionalley, lid, cid)
		if not os.path.isdir(contentDir):
			os.makedirs(contentDir)

	return fic
예제 #21
0
def importFic(fdata):
	global ficImportRename
	ofic = inflateObject(fdata.copy(), ficImportRename)

	fic = Fic.new()
	for field in ofic:
		print('setting "{}" to "{}"'.format(field, ofic[field]))
		fic.__dict__[field] = ofic[field]

	fic.published = util.parseDateAsUnix(fic.published, int(time.time()))
	fic.updated = util.parseDateAsUnix(fic.updated, int(time.time()))
	print('setting "{}" to "{}"'.format('published', fic.published))
	print('setting "{}" to "{}"'.format('updated', fic.updated))

	print('adding "{}" ({}/{})'.format(fic.title, fic.type, fic.localId))

	fic.insert()

	for fandom in fdata['fandoms']:
		print('  adding fandom "{}"'.format(fandom))
		fic.add(Fandom.define(fandom))
	for character in fdata['characters']:
		print(
			'  adding character "{}" from fandom "{}"'.format(
				character['name'], character['fandom']
			)
		)
		fic.add(
			Character.define(Fandom.define(character['fandom']), character['name'])
		)
	for genre in fdata['genres']:
		print('  adding genre "{}"'.format(genre))
		fic.add(Genre.define(genre))
	for tag in fdata['tags']:
		print('  adding tag "{}"'.format(tag))
		fic.add(Tag.define(tag))

	cids = [int(cid) for cid in fdata['chapters']]
	cids.sort()
	for cid in cids:
		print('  adding chapter {}'.format(cid))
		ochap = fdata['chapters'][str(cid)]
		chapter = FicChapter.new()
		chapter.fic = fic
		chapter.ficId = fic.id
		chapter.chapterId = cid
		for field in ochap:
			chapter.__dict__[field] = ochap[field]
		contentPath = './content/{}/{}/{}/content.html'.format(
			fic.type, fic.localId, cid
		)
		if os.path.isfile(contentPath):
			html = None
			with open(contentPath, 'r') as f:
				html = f.read()
			print('    has content: {}'.format(len(html)))
			chapter.setHtml(html)
		chapter.insert()
예제 #22
0
    def create(self, fic: Fic) -> Fic:
        fic.url = self.constructUrl(fic.localId, 1)

        # scrape fresh info
        data = scrape.softScrape(fic.url)
        if data is None:
            raise Exception('unable to scrape? FIXME')

        fic = self.parseInfoInto(fic, data)
        fic.upsert()

        chapter = fic.chapter(1)
        chapter.setHtml(data)
        chapter.localChapterId = str(1)
        chapter.url = self.constructUrl(fic.localId, 1)
        chapter.upsert()

        return Fic.lookup((fic.id, ))
예제 #23
0
 def buildUrl(self, chapter: 'FicChapter') -> str:
     # TODO: do we need these 2 lines or will they always be done by however
     # FicChapter is created?
     if chapter.fic is None:
         chapter.fic = Fic.lookup((chapter.ficId, ))
     if chapter.localChapterId is None:
         raise Exception('chapter missing localChapterId? FIXME')
     return self.constructUrl(chapter.fic.localId,
                              int(chapter.localChapterId))
예제 #24
0
	def tryParseUrl(self, url: str) -> Optional[FicId]:
		if not url.startswith(self.baseUrl):
			return None

		# by default, we simply try to look up the url in existing chapters or fics
		chaps = FicChapter.select({'url': url})
		if len(chaps) == 1:
			fic = Fic.get((chaps[0].ficId, ))
			if fic is not None:
				ftype = FicType(fic.sourceId)
				return FicId(ftype, fic.localId, chaps[0].chapterId, False)

		fics = Fic.select({'url': url})
		if len(fics) == 1:
			ftype = FicType(fics[0].sourceId)
			return FicId(ftype, fics[0].localId)

		leftover = url[len(self.baseUrl):]
		if not leftover.endswith('.html'):
			return None

		ps = leftover.split('/')
		if len(ps) != 3 or ps[0] != 'authors':
			return None

		author = ps[1]
		storyId = ps[2]
		suffixes = ['01a.html', '.html']
		for suffix in suffixes:
			if storyId.endswith(suffix):
				storyId = storyId[:-len(suffix)]

		# note: seems to be safe to lowercase these
		lid = (author + '/' + storyId).lower()
		#print(lid)
		# make lid author/story ?

		# TODO: we need some sort of local lid mapping...
		raise NotImplementedError()
예제 #25
0
    def getCurrentInfo(self, fic: Fic) -> Fic:
        # grab the content from disk
        info = self.getArchiveStoryInfo(int(fic.localId))
        spath = '{}/archive/{}/{}/summary.html.gz'.format(
            self.archivePath, info[1], info[2])
        data = self.slurp(spath)
        fic = self.parseInfoInto(fic, data)
        fic.upsert()

        chapterCount = fic.chapterCount or 1
        dCount = int(math.floor(math.log(chapterCount, 10) + 1))
        localChapterIdMap = self.getChapterIds(int(fic.localId))
        for cid in range(1, chapterCount + 1):
            pcid = str(cid).zfill(dCount)
            fpath = '{}/archive/{}/{}/chapters/chapter_{}.html.gz'.format(
                self.archivePath, info[1], info[2], pcid)
            data = self.slurp(fpath)
            chapter = fic.chapter(cid)
            chapter.localChapterId = localChapterIdMap[cid]
            chapter.setHtml(data)
            chapter.upsert()

        return Fic.lookup((fic.id, ))
예제 #26
0
 def getDeepAuthorPosts(self, fic: Fic) -> Dict[str, Any]:
     from bs4 import BeautifulSoup
     urls = self.getDeepPageUrls(fic)
     soups: Dict[str, Any] = {}
     for url in urls:
         pageContent = self.scrapeLike(url)
         pageSoup = BeautifulSoup(pageContent, 'html5lib')
         posts = pageSoup.find_all(self.postContainer, {
             'class': 'message',
             'data-author': fic.getAuthorName()
         })
         for post in posts:
             soups[post.get('id')] = post
     return soups
예제 #27
0
	def getCurrentInfo(self, fic: Fic) -> Fic:
		# FIXME when fics are deleted they 404:
		# https://www.royalroad.com/fiction/38947/
		# 404
		# Page Not Found
		# The server has returned the following error:
		# This fiction has been deleted
		fic.url = self.constructUrl(fic.localId)

		data = self.scrape(fic.url)
		if 'raw' not in data:
			raise Exception('unable to scrape? FIXME')
		raw = data['raw']

		return self.parseInfoInto(fic, raw)
예제 #28
0
def v0_cache(urlId: str) -> Any:
    fics = Fic.select({'urlId': urlId})
    if len(fics) != 1:
        return Err.urlId_not_found.get()
    fic = fics[0]
    if fic.chapterCount is None:
        print(f'err: fic has no chapter count: {fic.id}')
        return Err.urlId_not_found.get()
    for cid in range(1, fic.chapterCount + 1):
        try:
            chapter = fic.chapter(cid)
            chapter.cache()
        except Exception as e:
            return Err.failed_to_cache_cid.get({'arg': f'{fic.id}/{cid}'})

    return Err.ok(fic.toJSONable())
예제 #29
0
    def getDeepAuthorPostUrls(self, fic: Fic) -> List[str]:
        urls = self.getDeepPageUrls(fic)
        util.logMessage(
            f'XenForo.getDeepAuthorPostUrls|deep page urls: {urls}')
        # TODO this should probably be more comprehensive...
        author = fic.getAuthorName()
        altAuthor = author.replace("'", '&#039;')
        postUrls: List[str] = []
        seenIdStubs = set()
        for url in urls:
            pageContent = self.scrapeLike(url)

            # See getReaderPostUrls for a fully parsed version
            for b in pageContent.split('<'):
                e = b.find('>')
                if e == -1:
                    continue
                s = b[:e]
                # TODO FIXME this is bad :(
                # looking for li or article (the post container)
                if not (b.startswith('li id=')
                        or b.startswith('article class=')):
                    continue
                # check for 'message' -- simulates checking for message class
                if not 'message' in s:
                    continue
                # to check the data-author we simply look for the author and hope
                # there aren't collisions
                if s.find(author) < 0 and s.find(altAuthor) < 0:
                    continue
                # loop over spaced tokens looking for an unspaced id attribute
                for sb in s.split():
                    if not sb.startswith('id="') or not sb.endswith('"'):
                        continue
                    idStub = sb[len('id="'):-1]
                    if idStub.startswith('js-'):
                        idStub = idStub[len('js-'):]
                    postUrl = url + '#' + idStub
                    if idStub not in seenIdStubs:
                        postUrls += [postUrl]
                    seenIdStubs |= {idStub}
        util.logMessage(f'XenForo.getDeepAuthorPostUrls|postUrls: {postUrls}')
        return postUrls
예제 #30
0
def v0_lookup() -> Any:
    q = request.args.get('q', '').strip()
    if len(q.strip()) < 1:
        return Err.no_query.get({'arg': q})

    print(f'v0_lookup: query: {q}')
    ficId = FicId.tryParse(q)
    if ficId is None:
        return Err.bad_query.get({'arg': q})

    print(f'v0_lookup: ficId: {ficId.__dict__}')
    try:
        fic = Fic.load(ficId)
        return v0_fic(fic.urlId)
    except:
        print('v0_lookup: something went wrong in load:')
        traceback.print_exc()
        pass
    return Err.bad_ficId.get({'arg': ficId.__dict__})