Ejemplo n.º 1
0
def lookup(query: str, timeout: float = 280.0) -> Dict[str, Any]:
    url = '/'.join([a.AX_LOOKUP_ENDPOINT, urllib.parse.quote(query, safe='')])
    meta = util.reqJson(url, timeout=timeout)
    if 'error' in meta and 'err' not in meta:
        meta['err'] = meta.pop('error', None)
    if 'err' not in meta:
        FicInfo.save(meta)
        try:
            fis = FicInfo.select(meta['urlId'])
            es.save(fis[0])
        except Exception as e:
            traceback.print_exc()
            print(e)
            print('lookup: ^ something went wrong saving es data :/')
    return meta
Ejemplo n.º 2
0
def search(q: str, limit: int = 10) -> List[FicInfo]:
    try:
        es = Elasticsearch(hosts=["localhost"])
        res = es.search(index="fi",
                        body={
                            "query": {
                                "multi_match": {
                                    "query": q,
                                    "analyzer": 'standard',
                                },
                            }
                        },
                        size=limit)
        print(f"es.search({q}) => {res['hits']['total']['value']} hits")
        fis: List[FicInfo] = []
        for hit in res['hits']['hits']:
            if len(fis) >= limit:
                break
            fis += FicInfo.select(hit['_id'])
        return fis[:limit]
    except Exception as e:
        traceback.print_exc()
        print(e)
        print('fes.search({q}): ^ something went wrong searching es data :/')
        return []  # TODO
Ejemplo n.º 3
0
def fic_info(urlId: str) -> FlaskResponse:
    allInfo = FicInfo.select(urlId)
    if len(allInfo) < 1:
        # entirely unknown fic, 404
        return page_not_found(NotFound())
    ficInfo = allInfo[0]

    return redirect(url_for('index', q=ficInfo.source, id=ficInfo.id))
Ejemplo n.º 4
0
def get_cached_export(etype: str, urlId: str, fname: str) -> FlaskResponse:
    if etype not in ebook.EXPORT_TYPES:
        # if this is an unsupported export type, 404
        return page_not_found(NotFound())

    mimetype = ebook.EXPORT_MIMETYPES[etype]
    suff = ebook.EXPORT_SUFFIXES[etype]
    if not fname.endswith(suff):
        # we have a request for the wrong extension, 404
        return page_not_found(NotFound())

    if FicBlacklist.check(urlId):
        # blacklisted fic, 404
        return render_template('fic_info_blacklist.html'), 404

    fhash = request.args.get('h', None)
    fdir = ebook.buildExportPath(etype, urlId)
    if fhash is not None:
        # if the request is for a specific slug, try to serve it directly
        rname = fname
        fname = f'{fhash}{suff}'
        if os.path.isfile(os.path.join(fdir, fname)):
            return send_from_directory(fdir,
                                       fname,
                                       as_attachment=True,
                                       attachment_filename=rname,
                                       mimetype=mimetype,
                                       cache_timeout=(60 * 60 * 24 * 365))
        # fall through...

    # otherwise find the most recent export and give them that
    allInfo = FicInfo.select(urlId)
    if len(allInfo) < 1:
        # entirely unknown fic, 404
        return page_not_found(NotFound())
    ficInfo = allInfo[0]
    slug = ebook.buildFileSlug(ficInfo.title, ficInfo.author, urlId)
    rl = RequestLog.mostRecentByUrlId(etype, urlId)
    if rl is None:
        return page_not_found(NotFound())

    if not os.path.isfile(os.path.join(fdir, f'{rl.exportFileHash}{suff}')):
        # the most recent export is missing for some reason... regenerate it
        return get_cached_export_partial(etype, urlId)

    # redirect back to ourself with the correct filename
    return redirect(
        url_for('get_cached_export',
                etype=etype,
                urlId=urlId,
                fname=f'{slug}{suff}',
                h=rl.exportFileHash))
Ejemplo n.º 5
0
    if create and not os.path.isdir(fdir):
        os.makedirs(fdir)
    return fdir


def buildExportPath(etype: str, urlId: str, create: bool = False) -> str:
    urlId = urlId.lower()
    parts = [TARGET_CACHE_DIR, etype]
    for i in range(0, len(urlId), 3):
        parts.append(urlId[i:i + 3])
    parts.append(urlId)
    fdir = os.path.join(*parts)
    if create and not os.path.isdir(fdir):
        os.makedirs(fdir)
    return fdir


for fi in FicInfo.select():
    urlId = fi.id
    print(f'urlId: {urlId}')
    for etype in {'epub', 'html', 'mobi', 'pdf'}:
        odir = buildLegacyExportPath(etype, urlId)
        if not os.path.isdir(odir):
            continue
        tdir = buildExportPath(etype, urlId, create=True)
        for entry in os.scandir(odir):
            src = entry.path
            dst = os.path.join(tdir, entry.name)
            print(f'  {src} => {dst}')
            shutil.copy2(src, dst)
Ejemplo n.º 6
0
def generateFicInfo() -> Iterator[Dict[str, Any]]:
    for fi in FicInfo.select():
        if fi.sourceId == 19:
            continue
        yield handleFicInfo(fi)
Ejemplo n.º 7
0
def index_impl(urlId: str, legacy: bool) -> FlaskResponse:
    from_pw = request.args.get('from_pw', '').strip()

    blacklisted = False
    greylisted = False
    links = []
    ficInfo = None
    try:
        if legacy and len(urlId) > 1:
            fis = FicInfo.select(urlId)
            if len(fis) == 1:
                blacklisted = FicBlacklist.blacklisted(urlId)
                greylisted = FicBlacklist.greylisted(urlId)
                ficInfo = fis[0]

                epubRL = RequestLog.mostRecentByUrlId('epub', urlId)
                if epubRL is None:
                    # we always generate the epub first, so if we don't have it something went
                    # horribly wrong
                    raise Exception("uh oh")

                slug = ebook.buildFileSlug(ficInfo.title, ficInfo.author,
                                           urlId)
                eh = epubRL.exportFileHash
                if eh is None:
                    eh = 'unknown'
                epubUrl = url_for('get_cached_export',
                                  etype='epub',
                                  urlId=urlId,
                                  fname=f'{slug}.epub',
                                  h=eh)

                links = [('epub', True, epubUrl)]
                for etype in ebook.EXPORT_TYPES:
                    if etype == 'epub':
                        continue
                    pe = ebook.findExistingExport(etype, urlId, eh)
                    if pe is None:
                        # for any etype that hasn't already been exported or is out of date,
                        # create a (re)generate link
                        link = url_for(f'get_cached_export_partial',
                                       etype=etype,
                                       urlId=urlId,
                                       cv=CACHE_BUSTER,
                                       eh=eh)
                        links.append((etype, False, link))
                    else:
                        # otherwise build the direct link
                        fname = slug + ebook.EXPORT_SUFFIXES[etype]
                        fhash = pe[1]
                        link = url_for('get_cached_export',
                                       etype=etype,
                                       urlId=urlId,
                                       fname=fname,
                                       h=fhash)
                        links.append((etype, True, link))
    except:
        pass

    if greylisted:
        links = []

    resp = make_response(
        render_template('index.html',
                        from_pw=from_pw,
                        ficInfo=ficInfo,
                        blacklisted=blacklisted,
                        greylisted=greylisted,
                        links=links))
    if legacy:
        resp.headers['X-Robots-Tag'] = 'noindex'
    return resp
Ejemplo n.º 8
0
def ensure_export(etype: str,
                  query: str,
                  urlId: Optional[str] = None) -> Dict[str, Any]:
    print(f'ensure_export: query: {query}')
    if etype not in ebook.EXPORT_TYPES:
        return getErr(WebError.invalid_etype, {
            'fn': 'ensure_export',
            'etype': etype
        })
    source = get_request_source()

    notes = []
    axAlive = ax.alive()
    if not axAlive:
        print('ensure_export: ax is not alive :(')
        if urlId is None or len(FicInfo.select(urlId)) != 1:
            return getErr(WebError.ax_dead)
        # otherwise fallthrough
        notes += ['backend api is down; results may be stale']

    initTimeMs = int(time.time() * 1000)
    meta = None
    lres = None
    try:
        if not axAlive:
            meta = FicInfo.select(urlId)[0]
        else:
            lres = ax.lookup(query)
            if 'err' in lres:
                endTimeMs = int(time.time() * 1000)
                RequestLog.insert(source, etype,
                                  query, endTimeMs - initTimeMs, None,
                                  json.dumps(lres), None, None, None, None)
                lres['upstream'] = True
                return lres
            meta = FicInfo.parse(lres)
    except Exception as e:
        traceback.print_exc()
        print(e)
        print('ensure_export: ^ something went wrong doing ax.lookup :/')

        return getErr(WebError.lookup_failed)

    metaDict = meta.toJson()

    infoTimeMs = int(time.time() * 1000)
    infoRequestMs = infoTimeMs - initTimeMs

    # attempt to find previous epub export if it exists...
    try:
        existingEpub = None
        if meta.contentHash is not None:
            existingEpub = ebook.findExistingExport('epub', meta.id,
                                                    meta.contentHash)

        existingExport = None
        if etype == 'epub':
            existingExport = existingEpub
        elif existingEpub is not None:
            epub_fname, ehash = existingEpub
            existingExport = ebook.findExistingExport(etype, meta.id, ehash)

        if existingExport is not None:
            print(
                f'ensure_export({etype}, {query}): attempting to reuse previous export for {meta.id}'
            )
            fname, fhash = existingExport
            metaString = ebook.metaDataString(meta)

            slug = ebook.buildFileSlug(meta.title, meta.author, meta.id)
            suff = ebook.EXPORT_SUFFIXES[etype]
            exportUrl = url_for(f'get_cached_export',
                                etype=etype,
                                urlId=meta.id,
                                fname=f'{slug}{suff}',
                                h=fhash)

            endTimeMs = int(time.time() * 1000)
            exportMs = endTimeMs - infoTimeMs

            RequestLog.insert(source, etype, query, infoRequestMs, meta.id,
                              json.dumps(lres), exportMs, fname, fhash,
                              exportUrl)

            print(
                f'ensure_export({etype}, {query}): reusing previous export for {meta.id}'
            )
            return {
                'urlId': meta.id,
                'info': metaString,
                f'{etype}_fname': fname,
                'hash': fhash,
                'url': exportUrl,
                'meta': metaDict,
                'slug': slug,
                'hashes': {
                    etype: fhash
                },
                'notes': notes
            }
    except Exception as e:
        traceback.print_exc()
        print(e)
        print(
            'ensure_export: ^ something went wrong trying to reuse existing export :/'
        )

    etext = None
    try:
        # TODO we could be timing this too...
        metaString = ebook.metaDataString(meta)
        chapters = ax.fetchChapters(meta)

        # actually do the export
        fname, fhash = None, None
        if etype == 'epub':
            fname, fhash = ebook.createEpub(meta, chapters)
        elif etype == 'html':
            fname, fhash = ebook.createHtmlBundle(meta, chapters)
        elif etype in ['mobi', 'pdf']:
            fname, fhash = ebook.convertEpub(meta, chapters, etype)
        else:
            raise InvalidEtypeException(f'err: unknown etype: {etype}')

        slug = ebook.buildFileSlug(meta.title, meta.author, meta.id)
        suff = ebook.EXPORT_SUFFIXES[etype]
        exportUrl = url_for(f'get_cached_export',
                            etype=etype,
                            urlId=meta.id,
                            fname=f'{slug}{suff}',
                            h=fhash)

        endTimeMs = int(time.time() * 1000)
        exportMs = endTimeMs - infoTimeMs

        RequestLog.insert(source, etype, query, infoRequestMs, meta.id,
                          json.dumps(lres), exportMs, fname, fhash, exportUrl)

        return {
            'urlId': meta.id,
            'info': metaString,
            f'{etype}_fname': fname,
            'hash': fhash,
            'url': exportUrl,
            'meta': metaDict,
            'slug': slug,
            'hashes': {
                etype: fhash
            }
        }
    except Exception as e:
        endTimeMs = int(time.time() * 1000)
        exportMs = endTimeMs - infoTimeMs
        RequestLog.insert(source, etype,
                          query, endTimeMs - initTimeMs, meta.id,
                          json.dumps(lres), exportMs, None, None, None)

        if e.args is not None and len(e.args) > 0:
            if isinstance(e, ax.MissingChapterException):
                etext = e.args[0]
            elif isinstance(e, InvalidEtypeException):
                etext = e.args[0]

        traceback.print_exc()
        print(e)
        print('ensure_export: ^ something went wrong :/')

    return getErr(
        WebError.export_failed, {
            'msg':
            f'{etype} export failed\nplease try again in a few minutes, or report this on discord if the issue persists',
            'etext': etext,
            'meta': metaDict,
        })