def add_books_by_title(self, bookdate) : titlelist = self.get_orititle_list() f = open("Naver Book Search Log_{} {}h{}m{}s.txt". format(self.curt.date(), self.curt.hour, self.curt.minute, self.curt.second), "w", encoding='utf-8') self.searcher.f = f for idx in range(len(bookdate)) : btlist = bookdate[idx][1] dt = bookdate[idx][0] for bt in btlist : if bt in titlelist : print('{}가 갱신됨'.format(bt)) bidx = titlelist.index(bt) b = book_data.BookData() self.searcher.book = b self.searcher.from_title(bt) self.book_list[bidx] = b dtitle = [x.ori_title for x in self.date_to_book[dt]] if bt in dtitle : bdidx = dtitle.index(bt) self.date_to_book[dt][bdidx] = self.book_list[bidx] else : print('왜 {}는 책 리스트에는 있는데 {} 출판 책 리스트에는 없는거지?'.format(bt, dt)) self.date_to_book[dt].append(self.book_list[bidx]) else : b = book_data.BookData() self.searcher.book = b self.searcher.from_title(bt) self.add_book(b, dt) self.searcher.search_finished()
def import_data(self) : self.book_list = list() self.date_to_book = defaultdict(list) book_path = pathlib.Path('book_data.json') if book_path.exists() : temp = book_path.read_text(encoding='utf-16') book_list = json.loads(temp, encoding='utf-16', strict=False) for book in book_list : new_book = book_data.BookData() new_book.from_json_dict(book) self.book_list.append(new_book) book_path = pathlib.Path('date_to_book.json') if book_path.exists() : temp = book_path.read_text(encoding='utf-16') book_dict = json.loads(temp, encoding='utf-16', strict=False) for key in book_dict.keys(): new_list = list() book_list = book_dict[key] for book in book_list : new_book = book_data.BookData() new_book.from_json_dict(book) new_list.append(new_book) self.date_to_book[key] = new_list '''
def add_by_tl_td(self, title_list, title_to_date): self.date_to_book = defaultdict(list) for idx in range(len(title_list)): title = title_list[idx] print("{}/{}".format(idx, len(title_list))) book = book_data.BookData() self.searcher.book = book self.searcher.from_title(title) self.book_list.append(book) self.date_to_book[title_to_date[book.ori_title]].append(book) self.searcher.search_finished()
def add_by_tl_td(self, title_list, title_to_date) : self.date_to_book = defaultdict(list) f = open("Naver Book Search Log_{} {}h{}m{}s.txt". format(self.curt.date(), self.curt.hour, self.curt.minute, self.curt.second), "w", encoding='utf-8') self.searcher.f = f for idx in range(len(title_list)) : title = title_list[idx] print("{}/{}".format(idx, len(title_list))) book = book_data.BookData() self.searcher.book = book self.searcher.from_title(title) self.book_list.append(book) self.date_to_book[title_to_date[book.ori_title]].append(book) self.searcher.search_finished()
def crawl_search_sample(): book = book_data.BookData() sr = exins.get_instance().get_searcher_naver_instance() sr.book = book sr.from_title('시원찮은 그녀를 위한 육성방법 GS 2권')
ax.set_xlim(xmin=0.0, xmax=1.0) ax.set_ylim(ymin=0.0, ymax=1.0) ax.scatter([x[1] for x in ap_xy], [x[2] for x in ap_xy]) plt.show() if __name__ == "__main__": training_set = [] tra_path = pathlib.Path("../training_set.json") if tra_path.exists(): temp = tra_path.read_text(encoding='utf-16') book_dict = json.loads(temp, strict=False) for dic in book_dict: new_book = book_data.BookData() new_book.from_json_dict(dic["book"]) dic["book"] = new_book training_set = book_dict t_json = [] for t in training_set: data = t["book"].title pos = json_file.list_to_json([{ "segment": n, "tag": t } for n, t in nlp_module.pos_Twitter(data)], json_file.data_to_json) t_json.append({"string": data, "pos": pos}) data = t["book"].description