def getBooksV2(booknames, store): print("[V2] Queries: " + str(booknames)) result = {} for book in booknames: scraper.kirjat_scrape_err = "" bookname = book.replace("\r", "").replace("\n", "") cacheToUse = cache if store == "san": cacheToUse = cache_san usedCache = False if not bookname in cacheToUse.keys() or flag_nocache: print("[V2] \"" + bookname + "\" for the store \"" + store + "\" not in cache, scraping...") if store == "jam": books = scrape_jam(bookname) elif store == "san": books = scrape_san(bookname) else: books = [] print(f"[V2] Invalid store \"{store}\" specified") err = scraper.clean(scraper.kirjat_scrape_err) cacheToUse[bookname] = (books, err) else: usedCache = True print("[V2] \"" + bookname + "\" for the store \"" + store + "\" in cache.") books, err = cache[bookname] scraper.kirjat_scrape_err = "" result[book] = ({ "books": booklistTodictList(books), "result_was_cached": usedCache, "errors": err }) return result
def get_data_TOI(data_path): """ Get training data from the articles folder. """ all_data = [] print(CATEGORIES.keys()) for path in glob(os.path.join(data_path, 'toi.json')): with open(path, 'r') as jsonfile: data = json.loads(jsonfile.read()) for article in data.get('articles'): all_data.extend([scraper.clean(article['content'])]) jsonfile.close() return Bunch(categories=CATEGORIES.keys(), values=None, data=all_data)
def get_data(data_path): all_data = [] for path in glob(os.path.join(data_path, '*.json')): with open(path, 'r') as jsonfile: data = json.loads(jsonfile.read()) for article in data.get('articles'): all_data.extend([scraper.clean(article['content'])]) return Bunch(categories=scraper.CATEGORIES.keys(), values=None, data=all_data)
def get_data(data_path): """ Get training data from the articles folder. """ all_data = [] for path in glob(os.path.join(data_path, '*.json')): with open(path, 'r') as jsonfile: data = json.loads(jsonfile.read()) for article in data.get('articles'): all_data.extend([scraper.clean(article['content'])]) return Bunch(categories=scraper.CATEGORIES.keys(), values=None, data=all_data)
def run(): if raw_input("Are you sure? Then write 'yes'") == "yes": clean()
def query(): print(request.form) if 'query' in request.form.keys(): bookname = request.form.get('query') usedCache = False if not bookname in cache.keys() or flag_nocache: print("\"" + bookname + "\" not in cache, scraping...") books = scrape_jam(bookname) err = scraper.clean(scraper.kirjat_scrape_err) cache[bookname] = (books, err) else: usedCache = True print("\"" + bookname + "\" in cache.") books, err = cache[bookname] scraper.kirjat_scrape_err = "" return jsonify({ "data": booklistTodictList(books), "cached_result": usedCache, "err": err, "query": bookname }) if 'querysan' in request.form.keys(): bookname = request.form.get('querysan') usedCache = False if not bookname in cache_san.keys() or flag_nocache: print("\"" + bookname + "\" not in cache, scraping...") books = scrape_san(bookname) err = scraper.clean(scraper.kirjat_scrape_err) cache_san[bookname] = (books, err) else: usedCache = True print("\"" + bookname + "\" in cache.") books, err = cache_san[bookname] scraper.kirjat_scrape_err = "" return jsonify({ "data": booklistTodictList(books), "cached_result": usedCache, "err": err, "query": bookname }) if 'querym' in request.form.keys(): booknames = request.form.get('querym').split("\n") print("Queries: " + str(booknames)) result = [] query = [] for book in booknames: scraper.kirjat_scrape_err = "" bookname = book.replace("\r", "").replace("\n", "") query.append(bookname) usedCache = False if not bookname in cache.keys() or flag_nocache: print("\"" + bookname + "\" not in cache, scraping...") books = scrape_jam(bookname) err = scraper.clean(scraper.kirjat_scrape_err) cache[bookname] = (books, err) else: usedCache = True print("\"" + bookname + "\" in cache.") books, err = cache[bookname] scraper.kirjat_scrape_err = "" result.append({ "data": booklistTodictList(books), "cached_result": usedCache, "err": err, "query": query }) return jsonify(result) if 'querymsan' in request.form.keys(): booknames = request.form.get('querymsan').split("\n") print("Queries: " + str(booknames)) result = [] query = [] for book in booknames: scraper.kirjat_scrape_err = "" bookname = book.replace("\r", "").replace("\n", "") query.append(bookname) usedCache = False if not bookname in cache_san.keys() or flag_nocache: print("\"" + bookname + "\" not in cache, scraping...") books = scrape_san(bookname) err = scraper.clean(scraper.kirjat_scrape_err) cache_san[bookname] = (books, err) else: usedCache = True print("\"" + bookname + "\" in cache.") books, err = cache_san[bookname] scraper.kirjat_scrape_err = "" result.append({ "data": booklistTodictList(books), "cached_result": usedCache, "err": err, "query": query }) return jsonify(result) return jsonify({ "code": 400, "reason": "400: Query form must contain the key \"query\" or \"querym\"", "stacktrace": "" }), 400
def handle(self, *args, **options): if raw_input("Are you sure? Then write 'yes'") == "yes": clean()