コード例 #1
0
ファイル: spider.py プロジェクト: PinkD/NovelSpider
 def start(self):
     result = self._opener.open(BASE_URL +
                                '/wapsort/11_1.html').read().decode()
     max_page = int(
         re.findall(
             '<input style="width: 50%;" type="number" name="page" value="" id="go_page" min="1" max="([0-9]*?)" />',
             result).pop())
     # max_page = 5
     for i in range(1, max_page):
         result = self._opener.open(BASE_URL + '/wapsort/11_%d.html' %
                                    i).read().decode()
         ids = re.findall(
             '<h3><a href="' + BASE_URL +
             '/novel/([0-9]*)\.html">.*?</a></h3>', result)
         titles = re.findall(
             '<h3><a href="' + BASE_URL +
             '/novel/[0-9]*\.html">(.*?)</a></h3>', result)
         urls = re.findall(
             '<h3><a href="(' + BASE_URL + '/novel/[0-9]*\.html)">', result)
         authors = re.findall('<p>作者:<strong>(.*?)</strong></p>', result)
         descriptions = re.findall(
             '<span class="abstract"><a href="' + BASE_URL +
             '/novel/[0-9]*\.html">([\s\S]*?)</a></span>', result)
         for j in range(0, titles.__len__()):
             novel = Novel(titles[j],
                           authors[j],
                           descriptions[j],
                           id=int(ids[j]))
             if self._db.check_novel_exists(novel.id):
                 print("Skip existing: %s" % novel.title)
                 continue
             self._pool.submit(self.process_novel, self, novel, urls[j])
コード例 #2
0
def html_parse(html, fileHandle):
    """fileHandle is a work in progress feature of creating a reading list offline"""
    reading_list = []
    soup = BeautifulSoup(html, 'html.parser')
    print("\t Parsing Reading List")
    print(" ====================================")
    title_names = soup.find_all("tr", attrs={"class": "rl_links"})
    chapters = soup.find_all("a", attrs={"class": "chp-release"})
    counter = 0
    if (len(chapters) % 2) != 0:
        print("Error: Incorrect number of chapters")
        sys.exit()
    if fileHandle:
        for i in range(len(title_names)):
            title = title_names[i].attrs["data-title"]
            current_chapter = chapters[i + counter].get_text()
            latest_chapter = chapters[i + counter + 1].get_text()
            counter += 1
            temp = Novel(title, latest_chapter, current_chapter)
            reading_list.append(temp)
        return (reading_list)
    else:
        saveFile = open("readingList.txt", "w")
        counter = 0
        for i in range(len(title_names)):
            saveFile.write(title_names[i].attrs["data-title"] + ",")
            saveFile.write(chapters[i + counter].get_text() + ",")
            saveFile.write(chapters[i + counter + 1].get_text() + "\n")
            counter += 1
        print("The save file,'readingList.txt' has been created...")
コード例 #3
0
ファイル: dealias.py プロジェクト: lingsond/novel2graph
    def analyze_text(self, book_folder, out_folder):
        filename = self.input_file.split('.')[0]
        result_book_folder = out_folder + filename + "/"
        if not os.path.exists(result_book_folder):
            os.makedirs(os.path.dirname(result_book_folder))

        novel = Novel(book_folder + self.input_file)
        novel.read()
        novel.parse_persons()
        novel.store(filename=result_book_folder + self.all_names,
                    data=novel.persons)
        # if you do not remove single occurrences, eps behaviour will be unstable
        occurrence_limit = 2
        novel.remove_less_than(occurrences=occurrence_limit)
        novel.store(filename=result_book_folder + filename +
                    "_names_more_than_" + str(occurrence_limit) + ".csv",
                    data=novel.persons)
        novel.cluster_aliases()
        novel.associate_single_names()
        novel.store(filename=result_book_folder + self.clusters,
                    data=novel.cluster_repetitions)
        novel.dealiases()
        novel.store(filename=result_book_folder + self.output_file,
                    data=novel.dealiased_text,
                    type='txt')
        self.novel = novel
コード例 #4
0
def search_novel():
    form = request.form
    text = form.get('content')
    novel_list = search(text)
    n_list = []
    for i in range(len(novel_list[0])):
        n = Novel(novel_list[0][i], novel_list[1][i],novel_list[2][i],novel_list[3][i],novel_list[4][i], novel_list[5][i])
        n_list.append(n)
    return render_template('search_result.html',text=text, novel=n_list)
コード例 #5
0
def scrape():
    try:
        url = request.json["url"]
    except KeyError:
        return "Missing url in request", 400

    if DEBUG:
        d = f"{DATA}/{time()}"
        os.mkdir(d)
        fname = Novel(url).collect(d)
        return send_file(os.path.join(d, fname))
    else:
        try:
            d = f"{DATA}/{time()}"
            os.mkdir(d)
            fname = Novel(url).collect(d)
            return send_file(os.path.join(d, fname))
        except Exception as e:
            print(e)
            return "Failed to scrape book", 400
コード例 #6
0
def fileReader():
    if os.path.isfile("readingList.txt") == False:
        print("Error: Save file, 'readingList.txt' is missing")
        sys.exit()
    readingList = []
    saveFile = open("readingList.txt", "r")
    line = saveFile.readline()
    while (len(line) != 0):
        data = line.split(",")
        newNovel = Novel(data[0], data[1], data[2][0:len(data[2]) - 1])
        readingList.append(newNovel)
        line = saveFile.readline()
    return readingList
コード例 #7
0
ファイル: dealias.py プロジェクト: IDSIA/novel2graph
    def analyze_text(self, book_folder, out_folder):
        filename = self.input_file.split('.')[0]
        result_book_folder = out_folder + filename + "/"
        if not os.path.exists(result_book_folder):
            os.makedirs(os.path.dirname(result_book_folder))

        novel = Novel(book_folder + self.input_file)
        novel.read()
        novel.parse_persons()
        novel.find_persons_title()
        novel.store(filename=result_book_folder + self.all_names,
                    data=novel.persons)
        # if you do not remove single occurrences, eps behaviour will be unstable
        occurrence_limit = 2
        novel.remove_less_than(occurrences=occurrence_limit)
        novel.store(filename=result_book_folder + filename +
                    "_names_more_than_" + str(occurrence_limit) + ".csv",
                    data=novel.persons)
        novel.cluster_aliases()
        novel.associate_simple_single_names()
        novel.associate_single_names()
        novel.store(filename=result_book_folder + self.clusters,
                    data=novel.cluster_repetitions)
        novel.create_cluster_repetitions_df()
        novel.cluster_repetitions_df.to_pickle(result_book_folder + filename +
                                               '.pkl')
        novel.dealiases()
        novel.store(filename=result_book_folder + filename + "_dealiased.txt",
                    data=novel.dealiased_text,
                    type='txt')
        #Do the coreference after the dealias, because sometimes the coreference write a name just after a separation
        # and this lead to some not desired wrong situations in which name are together (e.g. "Potter,Hermione")
        novel.coreference()
        novel.store(filename=result_book_folder + self.output_file,
                    data=novel.dealiased_text,
                    type='txt')
        self.novel = novel
        return novel.cluster_repetitions_df
コード例 #8
0
def grab_volume(url, output_dir, cover_path):
    """
    grab volume
    
    Args:
        url: A string represent the url which was input by user
        output_dir: A string represent the path of the output EPUB file
        cover_file: A string represent the path of the EPUB cover
    """
    try:
        print_info('Getting:' + url)
        novel = Novel(url=url, single_thread=SINGLE_THREAD)
        novel.get_novel_information()
        epub = Epub(output_dir=output_dir,
                    cover_path=cover_path,
                    **novel.novel_information())
        epub.generate_epub()

    except Exception as e:
        if HAS_QT:
            SENDER.sigWarningMessage.emit('错误', str(e) + '\nat:' + url)
            SENDER.sigButton.emit()
        print(url)
        raise e
コード例 #9
0
from novel import Novel
from printer import Printer
from librarian import Librarian

novel = Novel("1984", "George Orwell")

librarian = Librarian()
librarian.setNovel(novel)

printer = Printer()
printer.setNovel(novel)

# Uppdatera årtalet
novel.setYear(1949)

#uppdatera innehållet
novel.setContent(
    "Freedom is the freedom to say that two plus two make four. If that is granted, all else follows."
)
コード例 #10
0
ファイル: test.py プロジェクト: midear8888/Three-body-roles
from novel import Novel

# 分析三体
novel = Novel("三体.txt", "三体主要人物.txt")
novel.draw_picture("三体主要人物名字出现次数")