def add_books_by_title(self, bookdate) :
        titlelist = self.get_orititle_list()
        f = open("Naver Book Search Log_{} {}h{}m{}s.txt".
                 format(self.curt.date(), self.curt.hour, self.curt.minute, self.curt.second), "w", encoding='utf-8')
        self.searcher.f = f

        for idx in range(len(bookdate)) :
            btlist = bookdate[idx][1]
            dt = bookdate[idx][0]
            for bt in btlist :
                if bt in titlelist :
                    print('{}가 갱신됨'.format(bt))
                    bidx = titlelist.index(bt)
                    b = book_data.BookData()
                    self.searcher.book = b
                    self.searcher.from_title(bt)
                    self.book_list[bidx] = b

                    dtitle = [x.ori_title for x in self.date_to_book[dt]]
                    if bt in dtitle :
                        bdidx = dtitle.index(bt)
                        self.date_to_book[dt][bdidx] = self.book_list[bidx]
                    else :
                        print('왜 {}는 책 리스트에는 있는데 {} 출판 책 리스트에는 없는거지?'.format(bt, dt))
                        self.date_to_book[dt].append(self.book_list[bidx])
                else :
                    b = book_data.BookData()
                    self.searcher.book = b
                    self.searcher.from_title(bt)
                    self.add_book(b, dt)

        self.searcher.search_finished()
    def import_data(self) :
        self.book_list = list()
        self.date_to_book = defaultdict(list)

        book_path = pathlib.Path('book_data.json')
        if book_path.exists() :
            temp = book_path.read_text(encoding='utf-16')
            book_list = json.loads(temp, encoding='utf-16', strict=False)

            for book in book_list :
                new_book = book_data.BookData()
                new_book.from_json_dict(book)
                self.book_list.append(new_book)

        book_path = pathlib.Path('date_to_book.json')
        if book_path.exists() :
            temp = book_path.read_text(encoding='utf-16')
            book_dict = json.loads(temp, encoding='utf-16', strict=False)

            for key in book_dict.keys():
                new_list = list()
                book_list = book_dict[key]
                for book in book_list :
                    new_book = book_data.BookData()
                    new_book.from_json_dict(book)
                    new_list.append(new_book)

                self.date_to_book[key] = new_list

        '''
    def add_by_tl_td(self, title_list, title_to_date):
        self.date_to_book = defaultdict(list)

        for idx in range(len(title_list)):
            title = title_list[idx]
            print("{}/{}".format(idx, len(title_list)))
            book = book_data.BookData()
            self.searcher.book = book
            self.searcher.from_title(title)
            self.book_list.append(book)
            self.date_to_book[title_to_date[book.ori_title]].append(book)

        self.searcher.search_finished()
    def add_by_tl_td(self, title_list, title_to_date) :
        self.date_to_book = defaultdict(list)
        f = open("Naver Book Search Log_{} {}h{}m{}s.txt".
                 format(self.curt.date(), self.curt.hour, self.curt.minute, self.curt.second), "w", encoding='utf-8')
        self.searcher.f = f

        for idx in range(len(title_list)) :
            title = title_list[idx]
            print("{}/{}".format(idx, len(title_list)))
            book = book_data.BookData()
            self.searcher.book = book
            self.searcher.from_title(title)
            self.book_list.append(book)
            self.date_to_book[title_to_date[book.ori_title]].append(book)

        self.searcher.search_finished()
Exemple #5
0
def crawl_search_sample():
    book = book_data.BookData()
    sr = exins.get_instance().get_searcher_naver_instance()
    sr.book = book
    sr.from_title('시원찮은 그녀를 위한 육성방법 GS 2권')
Exemple #6
0
            ax.set_xlim(xmin=0.0, xmax=1.0)
            ax.set_ylim(ymin=0.0, ymax=1.0)
            ax.scatter([x[1] for x in ap_xy], [x[2] for x in ap_xy])

    plt.show()


if __name__ == "__main__":
    training_set = []
    tra_path = pathlib.Path("../training_set.json")
    if tra_path.exists():
        temp = tra_path.read_text(encoding='utf-16')
        book_dict = json.loads(temp, strict=False)

        for dic in book_dict:
            new_book = book_data.BookData()
            new_book.from_json_dict(dic["book"])
            dic["book"] = new_book

        training_set = book_dict

    t_json = []
    for t in training_set:
        data = t["book"].title
        pos = json_file.list_to_json([{
            "segment": n,
            "tag": t
        } for n, t in nlp_module.pos_Twitter(data)], json_file.data_to_json)
        t_json.append({"string": data, "pos": pos})

        data = t["book"].description