def lookup(query: str, timeout: float = 280.0) -> Dict[str, Any]: url = '/'.join([a.AX_LOOKUP_ENDPOINT, urllib.parse.quote(query, safe='')]) meta = util.reqJson(url, timeout=timeout) if 'error' in meta and 'err' not in meta: meta['err'] = meta.pop('error', None) if 'err' not in meta: FicInfo.save(meta) try: fis = FicInfo.select(meta['urlId']) es.save(fis[0]) except Exception as e: traceback.print_exc() print(e) print('lookup: ^ something went wrong saving es data :/') return meta
def search(q: str, limit: int = 10) -> List[FicInfo]: try: es = Elasticsearch(hosts=["localhost"]) res = es.search(index="fi", body={ "query": { "multi_match": { "query": q, "analyzer": 'standard', }, } }, size=limit) print(f"es.search({q}) => {res['hits']['total']['value']} hits") fis: List[FicInfo] = [] for hit in res['hits']['hits']: if len(fis) >= limit: break fis += FicInfo.select(hit['_id']) return fis[:limit] except Exception as e: traceback.print_exc() print(e) print('fes.search({q}): ^ something went wrong searching es data :/') return [] # TODO
def fic_info(urlId: str) -> FlaskResponse: allInfo = FicInfo.select(urlId) if len(allInfo) < 1: # entirely unknown fic, 404 return page_not_found(NotFound()) ficInfo = allInfo[0] return redirect(url_for('index', q=ficInfo.source, id=ficInfo.id))
def get_cached_export(etype: str, urlId: str, fname: str) -> FlaskResponse: if etype not in ebook.EXPORT_TYPES: # if this is an unsupported export type, 404 return page_not_found(NotFound()) mimetype = ebook.EXPORT_MIMETYPES[etype] suff = ebook.EXPORT_SUFFIXES[etype] if not fname.endswith(suff): # we have a request for the wrong extension, 404 return page_not_found(NotFound()) if FicBlacklist.check(urlId): # blacklisted fic, 404 return render_template('fic_info_blacklist.html'), 404 fhash = request.args.get('h', None) fdir = ebook.buildExportPath(etype, urlId) if fhash is not None: # if the request is for a specific slug, try to serve it directly rname = fname fname = f'{fhash}{suff}' if os.path.isfile(os.path.join(fdir, fname)): return send_from_directory(fdir, fname, as_attachment=True, attachment_filename=rname, mimetype=mimetype, cache_timeout=(60 * 60 * 24 * 365)) # fall through... # otherwise find the most recent export and give them that allInfo = FicInfo.select(urlId) if len(allInfo) < 1: # entirely unknown fic, 404 return page_not_found(NotFound()) ficInfo = allInfo[0] slug = ebook.buildFileSlug(ficInfo.title, ficInfo.author, urlId) rl = RequestLog.mostRecentByUrlId(etype, urlId) if rl is None: return page_not_found(NotFound()) if not os.path.isfile(os.path.join(fdir, f'{rl.exportFileHash}{suff}')): # the most recent export is missing for some reason... regenerate it return get_cached_export_partial(etype, urlId) # redirect back to ourself with the correct filename return redirect( url_for('get_cached_export', etype=etype, urlId=urlId, fname=f'{slug}{suff}', h=rl.exportFileHash))
if create and not os.path.isdir(fdir): os.makedirs(fdir) return fdir def buildExportPath(etype: str, urlId: str, create: bool = False) -> str: urlId = urlId.lower() parts = [TARGET_CACHE_DIR, etype] for i in range(0, len(urlId), 3): parts.append(urlId[i:i + 3]) parts.append(urlId) fdir = os.path.join(*parts) if create and not os.path.isdir(fdir): os.makedirs(fdir) return fdir for fi in FicInfo.select(): urlId = fi.id print(f'urlId: {urlId}') for etype in {'epub', 'html', 'mobi', 'pdf'}: odir = buildLegacyExportPath(etype, urlId) if not os.path.isdir(odir): continue tdir = buildExportPath(etype, urlId, create=True) for entry in os.scandir(odir): src = entry.path dst = os.path.join(tdir, entry.name) print(f' {src} => {dst}') shutil.copy2(src, dst)
def generateFicInfo() -> Iterator[Dict[str, Any]]: for fi in FicInfo.select(): if fi.sourceId == 19: continue yield handleFicInfo(fi)
def index_impl(urlId: str, legacy: bool) -> FlaskResponse: from_pw = request.args.get('from_pw', '').strip() blacklisted = False greylisted = False links = [] ficInfo = None try: if legacy and len(urlId) > 1: fis = FicInfo.select(urlId) if len(fis) == 1: blacklisted = FicBlacklist.blacklisted(urlId) greylisted = FicBlacklist.greylisted(urlId) ficInfo = fis[0] epubRL = RequestLog.mostRecentByUrlId('epub', urlId) if epubRL is None: # we always generate the epub first, so if we don't have it something went # horribly wrong raise Exception("uh oh") slug = ebook.buildFileSlug(ficInfo.title, ficInfo.author, urlId) eh = epubRL.exportFileHash if eh is None: eh = 'unknown' epubUrl = url_for('get_cached_export', etype='epub', urlId=urlId, fname=f'{slug}.epub', h=eh) links = [('epub', True, epubUrl)] for etype in ebook.EXPORT_TYPES: if etype == 'epub': continue pe = ebook.findExistingExport(etype, urlId, eh) if pe is None: # for any etype that hasn't already been exported or is out of date, # create a (re)generate link link = url_for(f'get_cached_export_partial', etype=etype, urlId=urlId, cv=CACHE_BUSTER, eh=eh) links.append((etype, False, link)) else: # otherwise build the direct link fname = slug + ebook.EXPORT_SUFFIXES[etype] fhash = pe[1] link = url_for('get_cached_export', etype=etype, urlId=urlId, fname=fname, h=fhash) links.append((etype, True, link)) except: pass if greylisted: links = [] resp = make_response( render_template('index.html', from_pw=from_pw, ficInfo=ficInfo, blacklisted=blacklisted, greylisted=greylisted, links=links)) if legacy: resp.headers['X-Robots-Tag'] = 'noindex' return resp
def ensure_export(etype: str, query: str, urlId: Optional[str] = None) -> Dict[str, Any]: print(f'ensure_export: query: {query}') if etype not in ebook.EXPORT_TYPES: return getErr(WebError.invalid_etype, { 'fn': 'ensure_export', 'etype': etype }) source = get_request_source() notes = [] axAlive = ax.alive() if not axAlive: print('ensure_export: ax is not alive :(') if urlId is None or len(FicInfo.select(urlId)) != 1: return getErr(WebError.ax_dead) # otherwise fallthrough notes += ['backend api is down; results may be stale'] initTimeMs = int(time.time() * 1000) meta = None lres = None try: if not axAlive: meta = FicInfo.select(urlId)[0] else: lres = ax.lookup(query) if 'err' in lres: endTimeMs = int(time.time() * 1000) RequestLog.insert(source, etype, query, endTimeMs - initTimeMs, None, json.dumps(lres), None, None, None, None) lres['upstream'] = True return lres meta = FicInfo.parse(lres) except Exception as e: traceback.print_exc() print(e) print('ensure_export: ^ something went wrong doing ax.lookup :/') return getErr(WebError.lookup_failed) metaDict = meta.toJson() infoTimeMs = int(time.time() * 1000) infoRequestMs = infoTimeMs - initTimeMs # attempt to find previous epub export if it exists... try: existingEpub = None if meta.contentHash is not None: existingEpub = ebook.findExistingExport('epub', meta.id, meta.contentHash) existingExport = None if etype == 'epub': existingExport = existingEpub elif existingEpub is not None: epub_fname, ehash = existingEpub existingExport = ebook.findExistingExport(etype, meta.id, ehash) if existingExport is not None: print( f'ensure_export({etype}, {query}): attempting to reuse previous export for {meta.id}' ) fname, fhash = existingExport metaString = ebook.metaDataString(meta) slug = ebook.buildFileSlug(meta.title, meta.author, meta.id) suff = ebook.EXPORT_SUFFIXES[etype] exportUrl = url_for(f'get_cached_export', etype=etype, urlId=meta.id, fname=f'{slug}{suff}', h=fhash) endTimeMs = int(time.time() * 1000) exportMs = endTimeMs - infoTimeMs RequestLog.insert(source, etype, query, infoRequestMs, meta.id, json.dumps(lres), exportMs, fname, fhash, exportUrl) print( f'ensure_export({etype}, {query}): reusing previous export for {meta.id}' ) return { 'urlId': meta.id, 'info': metaString, f'{etype}_fname': fname, 'hash': fhash, 'url': exportUrl, 'meta': metaDict, 'slug': slug, 'hashes': { etype: fhash }, 'notes': notes } except Exception as e: traceback.print_exc() print(e) print( 'ensure_export: ^ something went wrong trying to reuse existing export :/' ) etext = None try: # TODO we could be timing this too... metaString = ebook.metaDataString(meta) chapters = ax.fetchChapters(meta) # actually do the export fname, fhash = None, None if etype == 'epub': fname, fhash = ebook.createEpub(meta, chapters) elif etype == 'html': fname, fhash = ebook.createHtmlBundle(meta, chapters) elif etype in ['mobi', 'pdf']: fname, fhash = ebook.convertEpub(meta, chapters, etype) else: raise InvalidEtypeException(f'err: unknown etype: {etype}') slug = ebook.buildFileSlug(meta.title, meta.author, meta.id) suff = ebook.EXPORT_SUFFIXES[etype] exportUrl = url_for(f'get_cached_export', etype=etype, urlId=meta.id, fname=f'{slug}{suff}', h=fhash) endTimeMs = int(time.time() * 1000) exportMs = endTimeMs - infoTimeMs RequestLog.insert(source, etype, query, infoRequestMs, meta.id, json.dumps(lres), exportMs, fname, fhash, exportUrl) return { 'urlId': meta.id, 'info': metaString, f'{etype}_fname': fname, 'hash': fhash, 'url': exportUrl, 'meta': metaDict, 'slug': slug, 'hashes': { etype: fhash } } except Exception as e: endTimeMs = int(time.time() * 1000) exportMs = endTimeMs - infoTimeMs RequestLog.insert(source, etype, query, endTimeMs - initTimeMs, meta.id, json.dumps(lres), exportMs, None, None, None) if e.args is not None and len(e.args) > 0: if isinstance(e, ax.MissingChapterException): etext = e.args[0] elif isinstance(e, InvalidEtypeException): etext = e.args[0] traceback.print_exc() print(e) print('ensure_export: ^ something went wrong :/') return getErr( WebError.export_failed, { 'msg': f'{etype} export failed\nplease try again in a few minutes, or report this on discord if the issue persists', 'etext': etext, 'meta': metaDict, })