def __init__(self, name=''): self._name = name print('the app is initialized') # self.say_name() self.index = Index() self.df_amazon = pd.read_csv('Datafiniti_Amazon_Consumer_Reviews_of_Amazon_Products.csv', \ error_bad_lines=False,encoding='utf-8-sig') num_files = self.index.index_product(self.df_amazon) print("indexed %d files" % num_files) self.index.index_review(self.df_amazon) # init predict filename1 = "xgboost.pkl" filename2 = "randomForest.pkl" filename3 = "word_vectorize.pkl" filename4 = "char_vectorize.pkl" with open(filename1, "rb") as f1: self.xgboostModel = pickle.load(f1) with open(filename3, "rb") as f2: self.word_vectorizer = pickle.load(f2) with open(filename4, "rb") as f3: self.char_vectorizer = pickle.load(f3)
# Base query class (parent of all query classes) class Query(object): def __init__(self): pass def get_matches(self, index): # TODO, return all documents IDs (as a set) from the index pass # Query containing a single search term class TermQuery(Query): def __init__(self, term): self.term = term def get_matches(self, index): # TODO, return all documents IDs (as a set) that contain the search term pass # Load index index = Index() index.load_from_file("data/index.txt") # TODO, construct the following (or similar) queries and get results # - "states" # - "NOT washington" # - "united AND states" # - "(us OR (united AND states)) AND NOT washington"
def main(args, loader=None, picker_cls=Picker): """ :param args: commandline arguments :param loader: loader class :param picker_cls: picker class :return: """ shorter_esc_delay() index = Index() picker = picker_cls(args=args) picker.index = index if args.debug: picker.do_debug = True if not sys.stdin.isatty(): while True: stdin_line = sys.stdin.readline() picker.index.add(stdin_line) if not stdin_line: break else: if loader: picker.loader = loader picker.load_lines() elif args.input: file_loader = FileLoader(args.input) picker.loader = file_loader picker.load_lines() else: history_loader = HistoryLoader() picker.loader = history_loader picker.load_lines() f = open("/dev/tty") os.dup2(f.fileno(), 0) picker.win = curses.initscr() curses.noecho() curses.start_color() curses.init_pair(1, curses.COLOR_WHITE, curses.COLOR_BLUE) picker.win.timeout(-1) picker.win.keypad(1) max_y, max_x = picker.get_max_viewport() picker.last_lines = picker.index.last_lines[0:max_y] logger.debug("lastlines %s", picker.last_lines) try: picker.refresh_window("") # thread.start_new_thread( picker.cursor_blink,()) while True: char = picker.win.getch() picker.key_pressed(char) except (KeyboardInterrupt, SystemExit, QuitException): pass finally: picker.win.keypad(0) curses.nocbreak() curses.echo() curses.endwin() if picker.do_print: print picker.last_lines[picker.selected_lineno][1]
def setUp(self): """ Setup index that will be subjected to the tests. """ self.index = Index(sample_stop_words())