def book_search(name, author, publisher, destination_folder, force): # searching Libgen for a book libgen_url = mirror_checker() if libgen_url is not None: br = mechanize.Browser() br.set_handle_robots(False) # ignore robots br.set_handle_refresh(False) # br.addheaders = [('User-agent', 'Firefox')] br.open(libgen_url) br.select_form('libgen') input_form = name + ' ' + author + ' ' + publisher br.form['req'] = input_form ac = br.submit() html_from_page = ac soup = Soup(html_from_page, 'html.parser') try: line_with_epub = epub_finder(soup) links_with_text = [a['href'] for a in soup.find_all(title="libgen", href=True) if a.text] Downloading_page = links_with_text[line_with_epub] print("\nDownloading Link: FOUND") print(Downloading_page) nameofbook = file_name(Downloading_page) if nameofbook is None: nameofbook = name.replace('\n', '') + author.replace('\n', '') + '.epub' if force is not True: decision = input(' * BookCut found "'+nameofbook+'" ' '\nDo you want to download? [Y/n] ') decision = decision.capitalize() while decision != "Y" and decision != "N": decision = input(' * BookCut found "'+nameofbook+'" ' '\nDo you want to download? [Y/n] ') if decision == "Y": downloading(Downloading_page, name, author, nameofbook, destination_folder) elif decision == "N": print("\nDownload aborted, try with a different search!") else: downloading(Downloading_page, name, author, nameofbook, destination_folder) except IndexError: print("\nDownloading Link: NOT FOUND") pass print("================================ \n") br.close() else: print('\nNo mirrors found or bad Internet connection.') print('Please,try again.')
def test_single_book_download(): title = "Iliad" author = "Homer" publisher = " " type_format = " " book = Booksearch(title, author, publisher, type_format, mirror_checker()) result = book.search() extensions = result["extensions"] print("extensions: ", extensions) tb = result["table_data"] mirrors = result["mirrors"] assert mirrors[0].startswith("http"), "Not correct format of Mirror URL." assert type(extensions) is list, "Wrong format of extension details." file_details = book.give_result(extensions, tb, mirrors, extensions[0])
def book(book, author, publisher, destination, extension, forced, md5): if book == " " and md5 is None: print("Invalid Input! Check <bookcut book --help> for more.") elif author != " " and book != " ": click.echo( f"\nSearching for {book.capitalize()} by {author.capitalize()}") elif book != " ": click.echo(f"\nSearching for {book.capitalize()}") url = mirror_checker() if url is not None: if md5 is not None: print("\nSearching for book with md5: ", md5) md5_search(md5, url, destination) else: libgen_book_find(book, author, publisher, destination, extension, forced, url)
def search(self): url = mirror_checker() if url is not None: br = mechanize.Browser() br.set_handle_robots(False) # ignore robots br.set_handle_refresh(False) # br.addheaders = [('User-agent', 'Firefox')] br.open(url) br.select_form('libgen') input_form = self.title + self.author + self.publisher br.form['req'] = input_form ac = br.submit() html_from_page = ac soup = Soup(html_from_page, 'html.parser') table = soup.find_all('table')[2] table_data = [] mirrors = [] extensions = [] for i in table: j = 0 try: td = i.find_all('td') for tr in td: # scrape mirror links if j == 9: temp = tr.find('a', href=True) mirrors.append(temp['href']) j = j + 1 row = [tr.text for tr in td] table_data.append(row) extensions.append(row[8]) except: pass table_details = dict() table_details['extensions'] = extensions table_details['table_data'] = table_data table_details['mirrors'] = mirrors return table_details else: print('\nNo results found or bad Internet connection.') print('Please,try again.')
def choose_a_book(dataframe): # asks the user which book to download from the printed DataFrame if dataframe.empty is False: dataframe.index += 1 print(dataframe[["Author(s)", "Title", "Size", "Extension"]]) urls = dataframe["Url"].to_list() titles = dataframe["Title"].to_list() extensions = dataframe["Extension"].to_list() choices = [] temp = len(urls) + 1 for i in range(1, temp): choices.append(str(i)) choices.append("C") choices.append("c") try: while True: tell_me = str( input("\n\nPlease enter a number from 1 to {number}" ' to download a book or press "C" to abort' " search: ".format(number=len(urls)))) if tell_me in choices: if tell_me == "C" or tell_me == "c": print("Aborted!") return None else: c = int(tell_me) - 1 filename = titles[c] + "." + extensions[c] filename = filename_refubrished(filename) if urls[c].startswith("https://export.arxiv.org/"): search_downloader(filename, urls[c]) return False else: mirror_used = mirror_checker(False) link = mirror_used + urls[c] details = link_finder(link, mirror_used) file_link = details[1] search_downloader(filename, file_link) return False except ValueError: print(RESULT_ERROR) print("Please,try again.") return None else: print(RESULT_ERROR)
def booklist_main(file, destination, forced, extension): """executes with the command --list""" Lines = file_list(file) if Lines is not None: print("List imported succesfully!") url = mirror_checker() if url is not None: temp = 1 many = len(Lines) for a in Lines: if a != "": print( f"~[{temp}/{many}] Searching for:", a, ) temp = temp + 1 libgen_book_find(a, "", "", destination, extension, forced, url)
def search(term): # This function is used when searching to LibGen with the command # bookcut search -t "keyword" url = mirror_checker() if url is not None: br = mechanize.Browser() br.set_handle_robots(False) # ignore robots br.set_handle_refresh(False) # br.addheaders = [("User-agent", "Firefox")] br.open(url) br.select_form("libgen") input_form = term br.form["req"] = input_form ac = br.submit() html_from_page = ac soup = Soup(html_from_page, "html.parser") table = soup.find_all("table")[2] table_data = [] mirrors = [] extensions = [] for i in table: j = 0 try: td = i.find_all("td") for tr in td: # scrape mirror links if j == 9: temp = tr.find("a", href=True) mirrors.append(temp["href"]) j = j + 1 row = [tr.text for tr in td] table_data.append(row) extensions.append(row[8]) except: pass # Clean result page for j in table_data: j.pop(0) del j[8:15] headers = [ "Author(s)", "Title", "Publisher", "Year", "Pages", "Language", "Size", "Extension", ] try: tabular = pd.DataFrame(table_data) tabular.index += 1 tabular.columns = headers print(tabular) choices = [] temp = len(mirrors) + 1 for i in range(1, temp): choices.append(str(i)) choices.append("C") choices.append("c") while True: tell_me = str( input("\n\nPlease enter a number from 1 to {number}" ' to download a book or press "C" to abort' " search: ".format(number=len(extensions)))) if tell_me in choices: if tell_me == "C" or tell_me == "c": print("Aborted!") return None else: c = int(tell_me) - 1 results = [mirrors[c], extensions[c]] return results except ValueError: print("\nNo results found or bad Internet connection.") print("Please,try again.") return None else: print("\nNo results found or bad Internet connection.") print("Please,try again.")
def search(term): # This function is used when searching to LibGen with the command # bookcut search -t "keyword" url = mirror_checker() if url is not None: br = mechanize.Browser() br.set_handle_robots(False) # ignore robots br.set_handle_refresh(False) # br.addheaders = [('User-agent', 'Firefox')] br.open(url) br.select_form('libgen') input_form = term br.form['req'] = input_form ac = br.submit() html_from_page = ac soup = Soup(html_from_page, 'html.parser') table = soup.find_all('table')[2] table_data = [] mirrors = [] extensions = [] for i in table: j = 0 try: td = i.find_all('td') for tr in td: # scrape mirror links if j == 9: temp = tr.find('a', href=True) mirrors.append(temp['href']) j = j + 1 row = [tr.text for tr in td] table_data.append(row) extensions.append(row[8]) except: pass # Clean result page for j in table_data: j.pop(0) del j[8:15] headers = [ 'Author(s)', 'Title', 'Publisher', 'Year', 'Pages', 'Language', 'Size', 'Extension' ] try: tabular = pd.DataFrame(table_data) tabular.index += 1 tabular.columns = headers print(tabular) choices = [] temp = len(mirrors) + 1 for i in range(1, temp): choices.append(str(i)) choices.append('C') choices.append('c') while True: tell_me = str( input('\n\nPlease enter a number from 1 to {number}' ' to download a book or press "C" to abort' ' search: '.format(number=len(extensions)))) if tell_me in choices: if tell_me == 'C' or tell_me == 'c': print("Aborted!") return None else: c = int(tell_me) - 1 results = [mirrors[c], extensions[c]] return results except ValueError: print('\nNo results found or bad Internet connection.') print('Please,try again.') return None else: print('\nNo results found or bad Internet connection.') print('Please,try again.')
def libgen_repo(term): # Searching LibGen and returns results DataFrame try: url = mirror_checker() if url is not None: br = mechanize.Browser() br.set_handle_robots(False) # ignore robots br.set_handle_refresh(False) # br.addheaders = [("User-agent", "Firefox")] br.open(url) br.select_form("libgen") input_form = term br.form["req"] = input_form ac = br.submit() html_from_page = ac html_soup = soup(html_from_page, "html.parser") table = html_soup.find_all("table")[2] table_data = [] mirrors = [] extensions = [] for i in table: j = 0 try: td = i.find_all("td") for tr in td: # scrape mirror links if j == 9: temp = tr.find("a", href=True) mirrors.append(temp["href"]) j = j + 1 row = [tr.text for tr in td] table_data.append(row) extensions.append(row[8]) except: pass # Clean result page for j in table_data: j.pop(0) del j[8:15] headers = [ "Author(s)", "Title", "Publisher", "Year", "Pages", "Language", "Size", "Extension", ] tabular = pd.DataFrame(table_data) tabular.columns = headers tabular["Url"] = mirrors return tabular except ValueError: # create emptyDataframe df = pd.DataFrame() return df
def test_mirror_availability(): available_mirror = mirror_checker() assert type(available_mirror) is str, "Not correct type of LibGen Url" assert available_mirror.startswith("http"), "Not correct LibGen Url."