Ejemplo n.º 1
0
def getBooksV2(booknames, store):
    print("[V2] Queries: " + str(booknames))
    result = {}
    for book in booknames:
        scraper.kirjat_scrape_err = ""
        bookname = book.replace("\r", "").replace("\n", "")

        cacheToUse = cache
        if store == "san":
            cacheToUse = cache_san
        usedCache = False
        if not bookname in cacheToUse.keys() or flag_nocache:
            print("[V2] \"" + bookname + "\" for the store \"" + store +
                  "\" not in cache, scraping...")
            if store == "jam":
                books = scrape_jam(bookname)
            elif store == "san":
                books = scrape_san(bookname)
            else:
                books = []
                print(f"[V2] Invalid store \"{store}\" specified")
            err = scraper.clean(scraper.kirjat_scrape_err)
            cacheToUse[bookname] = (books, err)
        else:
            usedCache = True
            print("[V2] \"" + bookname + "\" for the store \"" + store +
                  "\" in cache.")
            books, err = cache[bookname]
            scraper.kirjat_scrape_err = ""
        result[book] = ({
            "books": booklistTodictList(books),
            "result_was_cached": usedCache,
            "errors": err
        })
    return result
Ejemplo n.º 2
0
def get_data_TOI(data_path):
    """ Get training data from the articles folder. """
    all_data = []
    print(CATEGORIES.keys())
    for path in glob(os.path.join(data_path, 'toi.json')):
        with open(path, 'r') as jsonfile:
            data = json.loads(jsonfile.read())
            for article in data.get('articles'):
                all_data.extend([scraper.clean(article['content'])])
    jsonfile.close()
    return Bunch(categories=CATEGORIES.keys(), values=None, data=all_data)
def get_data(data_path):

    all_data = []

    for path in glob(os.path.join(data_path, '*.json')):
        with open(path, 'r') as jsonfile:
            data = json.loads(jsonfile.read())
            for article in data.get('articles'):
                all_data.extend([scraper.clean(article['content'])])

    return Bunch(categories=scraper.CATEGORIES.keys(),
                 values=None,
                 data=all_data)
Ejemplo n.º 4
0
def get_data(data_path):
    """ Get training data from the articles folder. """
    all_data = []

    for path in glob(os.path.join(data_path, '*.json')):
        with open(path, 'r') as jsonfile:
            data = json.loads(jsonfile.read())
            for article in data.get('articles'):
                all_data.extend([scraper.clean(article['content'])])

    return Bunch(categories=scraper.CATEGORIES.keys(),
                 values=None,
                 data=all_data)
Ejemplo n.º 5
0
def run():
    if raw_input("Are you sure? Then write 'yes'") == "yes":
        clean()
Ejemplo n.º 6
0
def query():
    print(request.form)
    if 'query' in request.form.keys():
        bookname = request.form.get('query')
        usedCache = False
        if not bookname in cache.keys() or flag_nocache:
            print("\"" + bookname + "\" not in cache, scraping...")
            books = scrape_jam(bookname)
            err = scraper.clean(scraper.kirjat_scrape_err)
            cache[bookname] = (books, err)
        else:
            usedCache = True
            print("\"" + bookname + "\" in cache.")
            books, err = cache[bookname]
        scraper.kirjat_scrape_err = ""
        return jsonify({
            "data": booklistTodictList(books),
            "cached_result": usedCache,
            "err": err,
            "query": bookname
        })
    if 'querysan' in request.form.keys():
        bookname = request.form.get('querysan')
        usedCache = False
        if not bookname in cache_san.keys() or flag_nocache:
            print("\"" + bookname + "\" not in cache, scraping...")
            books = scrape_san(bookname)
            err = scraper.clean(scraper.kirjat_scrape_err)
            cache_san[bookname] = (books, err)
        else:
            usedCache = True
            print("\"" + bookname + "\" in cache.")
            books, err = cache_san[bookname]
        scraper.kirjat_scrape_err = ""
        return jsonify({
            "data": booklistTodictList(books),
            "cached_result": usedCache,
            "err": err,
            "query": bookname
        })
    if 'querym' in request.form.keys():
        booknames = request.form.get('querym').split("\n")
        print("Queries: " + str(booknames))
        result = []
        query = []
        for book in booknames:
            scraper.kirjat_scrape_err = ""
            bookname = book.replace("\r", "").replace("\n", "")
            query.append(bookname)
            usedCache = False
            if not bookname in cache.keys() or flag_nocache:
                print("\"" + bookname + "\" not in cache, scraping...")
                books = scrape_jam(bookname)
                err = scraper.clean(scraper.kirjat_scrape_err)
                cache[bookname] = (books, err)
            else:
                usedCache = True
                print("\"" + bookname + "\" in cache.")
                books, err = cache[bookname]
                scraper.kirjat_scrape_err = ""
            result.append({
                "data": booklistTodictList(books),
                "cached_result": usedCache,
                "err": err,
                "query": query
            })
        return jsonify(result)
    if 'querymsan' in request.form.keys():
        booknames = request.form.get('querymsan').split("\n")
        print("Queries: " + str(booknames))
        result = []
        query = []
        for book in booknames:
            scraper.kirjat_scrape_err = ""
            bookname = book.replace("\r", "").replace("\n", "")
            query.append(bookname)
            usedCache = False
            if not bookname in cache_san.keys() or flag_nocache:
                print("\"" + bookname + "\" not in cache, scraping...")
                books = scrape_san(bookname)
                err = scraper.clean(scraper.kirjat_scrape_err)
                cache_san[bookname] = (books, err)
            else:
                usedCache = True
                print("\"" + bookname + "\" in cache.")
                books, err = cache_san[bookname]
                scraper.kirjat_scrape_err = ""
            result.append({
                "data": booklistTodictList(books),
                "cached_result": usedCache,
                "err": err,
                "query": query
            })
        return jsonify(result)
    return jsonify({
        "code": 400,
        "reason":
        "400: Query form must contain the key \"query\" or \"querym\"",
        "stacktrace": ""
    }), 400
Ejemplo n.º 7
0
 def handle(self, *args, **options):
     if raw_input("Are you sure? Then write 'yes'") == "yes":
         clean()