def search(query, file, num): parent_dir = 'RawHTML/' + file dir = str(num) path = os.path.join(parent_dir, dir) os.mkdir(path) se = ws.SearchEngine() se.search(query) soup = ws.make_soup(se.html) results = ws.parse_serp(soup) se.save_serp(save_dir=path) results = [dict(item, question_number=num) for item in results] return results
""" Test parse """ import argparse import pandas as pd import WebSearcher as ws parser = argparse.ArgumentParser() parser.add_argument("-f", "--filepath", help="The SERP html file") args = parser.parse_args() if not args.filepath: print('Must include -f arg') else: soup = ws.load_soup(args.filepath) parsed = ws.parse_serp(soup) results = pd.DataFrame(parsed) cmpts = ws.extract_components(soup)