def _loadFile(self): try: f = open(self._fileName, "r") s = f.readline() while len(s)>1: tok = s.split(",") """ function to turn string into entity """ if len(tok) == 3: tok[2] = tok[2].split("\n") book = Book(int(tok[0]), tok[1], tok[2][0]) else: tok[3] = tok[3].split("\n") book = Book(int(tok[0]), tok[1], tok[2], tok[3][0]) BookRepository.add(self, book) s = f.readline() except Exception as e: raise RepoError("Error reading input file: " + str(e)) finally: f.close()
def json_to_book(self, json_book): if "description" in json_book: return Book(int(json_book["id"]), json_book["title"], json_book["author"], json_book["description"]) return Book(int(json_book["id"]), json_book["title"], json_book["author"])
def crawlerBook(url, imagePath): print("now :" + url) header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36' } html = requests.get(url, timeout=(30.0, 30.0), headers=header).text soup = BeautifulSoup(html, "html.parser") book = Book( isbn=crawler.getIsbn(soup), name=crawler.getName(soup), name2=crawler.getName2(soup), author=crawler.getAuthor(soup), author2=crawler.getAuthor2(soup), translator=crawler.getTranslator(soup), publisher=crawler.getPublisher(soup), publicationDate=crawler.getPublicationDate(soup), language=crawler.getLanguage(soup), collection=crawler.getCollection(soup), specification=crawler.getSpecification(soup), publication=crawler.getPublication(soup), classification=crawler.getClassification(soup), coverImageUrl=crawler.getCoverImageUrl(soup), bookIntroduction=crawler.getBookIntroduction(soup), authorIntroduction=crawler.getAuthorIntroduction(soup), catalog=crawler.getCatalog(soup), preface=crawler.getPreface(soup), fromWhere="books" ) # save image book.bookUrl = url if book.coverImageUrl != None: if book.isbn != None: book.coverImageId = book.isbn + "-" + book.fromWhere + ".jpg" imageSaver.saveImageFile(imagePath + book.coverImageId, book.coverImageUrl) return book
def crawling(self): self.book = Book() # Reinitialize Book object for retry log.info("Crawling function is call") if self.root_url: thumb, title, author, chapter_params = self.get_general_info() if Book.general_validate(title, author, chapter_params): self.book.set_thumb(thumb) self.book.set_title(title) self.book.set_author(author) for chapter_param in chapter_params: chapter = self.get_chapter(chapter_param) if not Book.chapter_validate(chapter): log.error("Chapter: %s is fail crawled", chapter) return False self.book.add_chapter(chapter) self.status = True return True else: log.error("No validate general info") return False else: log.error("not define root url") return False
def insertBook(): params = get_param() store = BookStore.get(params['storeName']) book = Book(store.id, params["bookName"], params["price"]) book.insert() return { "message": "Insert done!" }
def create(book_dict): """ Method to create a new Book record given a book dictionary. Does NOT create associated Author or BookCopy records. :param book_dict: dictionary of book values for a new record. :return: a dictionary object of the created book. """ print("BookDao.create()") new_book = Book(**book_dict) new_book.publish_date = parser.parse(new_book.publish_date) db.session.add(new_book) db.session.commit() print("book_dao.create() ==> Complete") return new_book.to_dict()
def user_buy_book(): params = get_param() book_store_name = params.get('book_store_name', None) book_name = params.get('book_name', None) user_id = params.get('id', None) if book_store_name is None or book_name is None or user_id is None: return { 'result': 'Error!' } book_store = BookStore.get(book_store_name) if book_store is None: return { 'result': 'Book Store is not found!' } price = Book.get_price_by_store_name_book_name(book_store.storeName, book_name) if price is None: return { 'result': 'Book is not found!' } user = User.get_by_id(user_id) if user is None: return { 'result': 'User is not found!' } user.cashBalance -= price[0] book_store.cashBalance += price[0] user.update() book_store.update() purchase = PurchaseHistory(user_id, book_name, book_store_name, price[0], "today") purchase.insert() return { 'result': purchage_history_schema.dump(purchase) }
def get(self): # result = memcache.get("index") result = False if not result: logging.info("cache not hit(index)") user = users.get_current_user() if user: greeting = ("%s : <a href=\"%s\">logout</a>" % (user.nickname(), users.create_logout_url("/"))) else: greeting = ("<a href=\"%s\">login</a>" % users.create_login_url("/")) template_values = { 'greeting': greeting, 'user': user, 'activities': Activity.all().order('-created_at').fetch(5), 'books': Book.all().order('-created_at').fetch(1000), } path = os.path.join(os.path.dirname(__file__), '..', 'view', 'index.html') result = template.render(path, template_values) # memcache.set("index", result, 600) self.response.out.write(result)
def test_Repo(self): self.assertEqual(len(self.__repo), 0) self.__repo.add(self.__book) self.assertEqual(len(self.__repo), 1) with self.assertRaises(RepoError): self.__repo.add(self.__book) with self.assertRaises(RepoError): self.__repo.remove(self.__nobook) with self.assertRaises(RepoError): self.__repo.update(self.__nobook) self.__repo.update(self.__book) self.assertEqual( self.__repo.list(), \ "ID \t Title \t Author \t\t\t\t Description \n {}, {} by {}, described as follows: {}\n".\ format(self.__id, self.__title,self.__author,self.__description)) self.__repo.remove(self.__book) with self.assertRaises(RepoError): self.__repo.list() self.__repo.add(self.__book) book = Book(45, "ASC", "Vancea", "du-te la cursurile de ASC") self.__repo.add(book) self.assertEqual(self.__repo.find(45), book) self.assertEqual(self.__repo.find(9), None) self.assertEqual(self.__repo.exists_book(book), True) self.assertNotEqual(len(self.__repo.get_books()), 0) self.assertEqual(len(self.__repo.search_id(45)), 1) self.assertEqual(len(self.__repo.search_author("van")), 1) self.assertEqual(len(self.__repo.search_description("ursuri")), 1) self.assertEqual(len(self.__repo.search_title("asc")), 1)
def book1(): book = Book(book_id=1, title='Old Man', publish_date='1980', subject='Fiction', genre='Novel') return book
def book2(): book = Book(book_id=2, title='The Left Hand of Darkness', publish_date='1975', subject='Fiction', genre='Science Fiction') return book
def get_item(): """ renders the item.html file """ with Book() as book: books, json_data = book.get_all() return render_template('item.html', books=books, json_data=json_data)
def createBook(self, bookList): author = bookList[0] pages = int(bookList[1]) isbn = bookList[2] title = bookList[3] publisher = bookList[4] year = int(bookList[5]) return Book(author, pages, isbn, title, publisher, year)
def create_data(): """ Creates a new document in the mongodb database. """ json_data = request.get_json() with Book() as book: book.insertDocument(json_data) return make_response(jsonify({'req': ''}), 200)
def setUp(self): unittest.TestCase.setUp(self) self.__id = 23 self.__title = "FP" self.__description = "work on your assignments daily" self.__author = "Arthur Molnar" self.__book = Book(self.__id, self.__title, self.__author, self.__description) self.__noid = -85 self.__notitle = "" self.__noauthor = "" self.__nobook = Book(self.__noid, self.__notitle, self.__noauthor, self.__description) self.__validator = BookValidator() self.__repo = BookRepository()
def reandAdCreateBook(self) -> Book: title = input("Podaj tytuł: ") author = input("Autor: ") publisher = input("Wydawnictwo: ") isbn = input("ISBN: ") relaseDate = int(input("Rok wydania: ")) pages = int(input("Podaj liczbe stron: ")) return Book(author, pages, isbn, title, publisher, relaseDate)
def update_data(): """ Updates a document in the mongodb database. """ json_data = request.get_json() _id = json_data['_id']['$oid'] with Book() as book: book.updateDocument(_id, json_data) return make_response(jsonify({'req': ''}), 200)
def gridbooksCurrentCellChanged(self): cell = self.getCurrentCell() if cell is False: return index=cell.RowIndex trace.printlog('currentRowIndex:'+str(index)) dt=self.view.getattr('gridbooks', 'DataSource') dr=dt.Rows[index] book = Book(dr['Id'], dr['BookName'], dr['BuyPrice'], dr['BuyDate'], dr['Flag']) self.setcurrbook(book)
def index(): """ renders the homepage. """ with Book() as book: books, json_data = book.get_all() item = get_item() return render_template('index.html', books=books, item=item, json_data=json_data)
def parse_document_list(document_list): """ Determines the type of documents in the file and adds them to the library """ for i in range( 0, len(document_list) ): # Iterates through the list via index, in order to make parsing a bit easier if ":" not in document_list[ i]: # Makes sure we're processing full documents doc_type = document_list[i].strip().rstrip() doc_key = document_list[i + 1][4:].strip().rstrip() author_list = document_list[i + 2][7:].strip().rstrip().split(",") if "Book".lower() in doc_type.lower(): settings.LIBRARY[doc_key.lower()] = Book( doc_key, # Book Key author_list, # Adds author document_list[i + 3][7:].strip().rstrip(), # Adds title document_list[i + 4][11:].strip().rstrip(), # Adds publisher document_list[i + 5][6:].strip().rstrip(), # Adds date doc_type.strip().rstrip() # Add doc type ) elif "Journal".lower() in doc_type.lower(): settings.LIBRARY[doc_key.lower()] = Journal( doc_key, # Journal key author_list, # Adds author document_list[i + 3][7:].strip().rstrip(), # Adds title document_list[i + 4][9:].strip().rstrip(), # Adds Journal document_list[i + 5][11:].strip().rstrip(), # Adds publisher document_list[i + 6][6:].strip().rstrip(), # Adds date document_list[i + 7][8:].strip().rstrip(), # Adds Volume document_list[i + 8][8:].strip().rstrip(), # Adds Number doc_type # Adds type ) elif "Conference".lower() in doc_type.lower(): settings.LIBRARY[doc_key.lower()] = Conference( doc_key, # Conference key author_list, # Adds authors document_list[i + 3][7:].strip().rstrip(), # Adds title document_list[i + 4][12:].strip().rstrip(), # Adds Conference document_list[i + 5][6:].strip().rstrip(), # Adds date document_list[i + 6][10:].strip().rstrip(), # Adds location document_list[i + 7][7:].strip().rstrip(), # Adds pages doc_type # Adds type ) else: print( f"One of the documents in the list is not a recognized type. Document type: {doc_type}; With key: {doc_key}" ) else: continue # Skips the line b/c its not the start of a document
def delete(self, key): # FIXME: dirty if not users.is_current_user_admin(): return book = Book.get_by_key_name("Book_" + key) if book: for comment in book.comments: comment.delete() for stock in book.stocks: stock.delete() book.delete() Activity(type='delete', book=book).put() self.response.out.write("ok") return
async def get_book_with_isbn(isbn: str): author_dict = {"name": "name1", "book": ["book1", "book2"]} author1 = Author(**author_dict) book_dict = { "name": "mr kishan", "isbn": "124", "author": author1, "year": 1292 } book1 = Book(**book_dict) return book1
def post(self): id = int(self.request.get("id")) bookEdit = Book.get_by_id(id) if bookEdit: bookEdit.isbn = int(self.request.get("isbn")) bookEdit.type = self.request.get("type") bookEdit.title = self.request.get("title") bookEdit.author = self.request.get("author") bookEdit.avaliable = int(self.request.get("avaliable")) book_mgt.update(bookEdit) self.redirect("/seeBooks")
def __can_rent(self, bid, cid): """ Verifies if the book and the client exist so that the rent makes sense Input: bid - positive integer cid - positive integer Output: True if the book which identifies with bid and the client which identifies with cid exist False otherwise """ book = Book(bid, None, None, None) client = Client(cid, None) if self.__book_repo.exists_book(book) and self.__client_repo.exists_client(client): return True return False
def load_library(): if not os.path.isfile('static/books.csv'): raise FileNotFoundError else: with open('static/books.csv', 'r') as f: for item in f: item = item.strip('\n') item = item.split(',') if item[0] == 'book': author = item[1] title = item[2] book = Book(author, title) book.add_copy(int(item[3])) for copy in book.copies: if item[5] == 'y': copy.is_short_term = True for copy in book.copies[:int(item[4])]: copy.is_borrowed = True else: title = item[1] number = item[2] journal = Journal(title, number) if item[3] == 'y': journal.is_borrowed = True
def to_model(transport): return Book(book_id=transport.id, title=transport.title, synopsis=transport.synopsis, isbn10=transport.isbn10, isbn13=transport.isbn13, language=transport.language, publisher=transport.publisher, edition=transport.edition, paperback_price=transport.paperback_price, ebook_price=transport.ebook_price, sold_amount=transport.sold_amount, current_amount=transport.current_amount, category=transport.category, created_time=transport.created_time, modified_time=transport.modified_time)
def get(self): user = users.get_current_user() id = int(self.request.get("id")) book = Book.get_by_id(id) if user and not reserve_mgt.retrieve(book.isbn): reserve = reserve_mgt.create_empty_reserve() reserve.email = users.get_current_user().email() number = book.avaliable - 1 book.avaliable = number reserve.isbn = book.isbn book_mgt.update(book) reserve_mgt.update(reserve) self.redirect("/seeBooks")
def update_book (self, bid, title, author, description = None): """ Creates a book with the given attributes and updates the book with the same id, already existing in the repository Input: bid - positive integer (already existing in one of the books from the repository) title, author - string description - string (default value the empty string) """ book = Book(bid, title, author, description) self.__book_valid.valid_book(book) b = self.__book_repo.update(book) redo = FunctionCall(self.update_book, bid, title, author, description) undo = FunctionCall(self.update_book, bid, b.get_title(), b.get_author(), b.get_description()) oper = Operations(undo, redo) self.__undoService.add(oper)
def get(self): user = users.get_current_user() if user: access_link = users.create_logout_url("/") id = int(self.request.get("id")) book = Book.get_by_id(id) template_values = { "book": book, "titleView": "Ver en detalle", "user": user.email(), "access_link": access_link } jinja = jinja2.get_jinja2(app=self.app) self.response.write(jinja.render_template("showBook.html", **template_values))
def get_content(html): """Collect content from the page for the specified classes.""" soup = BeautifulSoup(html, 'html.parser') books = [] for item in soup.find_all('div', class_='bookkitem'): books.append(Book( item.find('a', class_='bookkitem_name') .get_text(strip=True), item.find('div', class_='bookkitem_genre') .get_text(strip=True) .replace('\n', ''), item.find('span', class_='bookkitem_author') .get_text(strip=True) .replace('авторы', '') .replace('автор', '') if item.find('span', class_='bookkitem_author') else 'Автор неизвестен', item.find('div', class_='bookkitem_meta_block') .get_text(strip=True) .replace('Читает', '') .replace('Читают', '') if 'минут' not in item.find('div', class_='bookkitem_meta_block') .get_text(strip=True) .replace('Читает', '') .replace('Читают', '') else 'Исполнитель неизвестен', item.find('div', class_='bookkitem_about') .get_text(strip=True) .replace('\n', '') .replace('\r', '') .replace('/', '') .replace('а́', 'а') .replace('и́', 'и') .replace('о́', 'о'), HOST + item.find('a', class_='bookkitem_cover') .get('href') )) return books
def post(self): book = Book.get_by_key_name("Book_" + self.request.get('book')) if not book: return body = self.request.get('body') if not body: return comment = Comment( book = book, body = body ) comment.put() Activity(type='comment', book=book).put() user = users.get_current_user() template_values = { 'comment': comment, 'user': user, } path = os.path.join(os.path.dirname(__file__), '..', 'view', 'comment/index.html') result = template.render(path, template_values) self.response.out.write(result)
def setUp(self): self.__id_b = 23 self.__title = "FP" self.__description = "work on your assignments daily" self.__author = "Arthur Molnar" self.__book = Book(self.__id_b, self.__title, self.__author, self.__description) self.__id_cl = 3 self.__name = "Corina" self.__client = Client(self.__id_cl, self.__name) self.__rid = 4 self.__bid = 8 self.__cid = 2 self.__rdate = "25.11.2018" self.__duedate = "2.12.2018" self.__returdate = "28.11.2018" self.__rental = Rental(self.__rid, self.__bid, self.__cid, self.__rdate, self.__duedate, self.__returdate) self.__bvalid = BookValidator() self.__cvalid = ClientValidator() self.__rvalid = RentalValidator() self.__brepo = BookRepository() self.__crepo = ClientRepository() self.__rrepo = RentalRepository() self.__undoserv = UndoService() self.__rental_serv = RentalService( self.__rrepo, self.__rvalid, self.__brepo, self.__crepo, self.__undoserv, ) self.__book_serv = BookService(self.__brepo, self.__bvalid, self.__undoserv, self.__rental_serv) self.__client_serv = ClientService(self.__crepo, self.__cvalid, self.__undoserv, self.__rental_serv)
def add_book(self, bid, title, author, description = None): """ Creates a book with the given attributes and adds it in the repository Input: bid - positive integer title, author - string description - string (default value the empty string) """ book = Book(bid, title, author, description) self.__book_valid.valid_book(book) self.__book_repo.add(book) """ If everything works fine, we try to implement undo and redo command """ redo = FunctionCall(self.add_book, bid, title, author, description) undo = FunctionCall(self.remove_book, bid) oper = Operations(undo, redo) self.__undoService.add(oper)
def get_book(self, book_id): book_from_db = None conn = self.db_connection_pool.getconn() try: with conn.cursor() as cursor: sql = """ SELECT {columns} FROM book WHERE id=%s """.format(columns=BookRepository.columns) cursor.execute(sql, (book_id, )) result = cursor.fetchone() if result is not None: book_from_db = Book(result[0], result[1], result[2], result[3], result[4], result[5], result[6], result[7], result[8], result[9], result[10], result[11], result[12], result[13], result[14]) finally: self.db_connection_pool.putconn(conn) return book_from_db
def post(self): # FIXME: need exception if not users.get_current_user(): return book = Book.create_from_isbn(self.request.get('isbn')) if not book.title: try: book.build_from_isbn() except CantBuildBook: self.redirect('/') return book.put() if self.request.get('owner'): owner = users.User(email = self.request.get('owner')) else: owner = users.get_current_user() Stock(book=book, owner=owner).put() Activity(type='add', book=book).put() self.redirect(book.path())
def deepCopy(self, other): ''' Function to deepCopy another LibraryRepository to this (self) one It copies all the data from another Repository to this one with no references of the objects (so that the states do not depend at all) :param other: another LibraryRepository ''' self._books = [ Book(book.getId(), book.getTitle(), book.getDescription(), book.getAuthor()) for book in other.getBooks() ] self._clients = [ Client(client.getCnp(), client.getName()) for client in other.getClients() ] self._loans = [ Loan(self.searchClient(loan.getClient().getCnp()), self.searchBook(loan.getBook().getId())) for loan in other.getLoans() ]
def get(self, key): if key == 'add': self.post() return # result = memcache.get("index") result = False if result: self.response.out.write(result) return user = users.get_current_user() if user: greeting = ("<a href=\"%s\">logout %s</a>" % (users.create_logout_url("/"), user.nickname())) else: greeting = ("<a href=\"%s\">login</a>" % users.create_login_url("/")) logging.info("cache not hit()") book = Book.get_by_key_name("Book_" + key) template_values = None if book: template_values = { 'user': user, 'book': book, 'greeting': greeting, } path = os.path.join(os.path.dirname(__file__), '..', 'view', 'book/index.html') result = template.render(path, template_values) else: template_values = { 'user': user, 'key': key, 'greeting': greeting, } path = os.path.join(os.path.dirname(__file__), '..', 'view', 'book/not_found.html') result = template.render(path, template_values) # memcache.set("index", result, 600) self.response.out.write(result)
def put(self, key): if not users.get_current_user(): self.response.out.write("deny") return book = Book.get_by_key_name("Book_" + key) if not book: self.response.out.write("ng") return if self.request.get('description'): stock = book.mystock() if stock: stock.description = self.request.get('description') stock.put() self.response.out.write(self.request.get('ok')) return else: self.response.out.write('no stock'); return else: type = book.lent_or_return() Activity(type=type, book=book).put() return
def book_edit(request): '图书编辑' book_facade = BookFacade() #如果是提交信息 if request.method =="POST": book = Book() id = func.get_int_param_from_post(request,'id') book.authors = map(lambda x:x.strip(),request.POST.get('authors','').split('/')) book.translators = map(lambda x:x.strip(),request.POST.get('translators','').split('/')) book.authors_intro = request.POST.get('authors_intro','') book.binding = request.POST.get('binding','') book.dir = request.POST.get('dir','') book.spic = request.POST.get('spic','') book.mpic = request.POST.get('mpic','') book.bpic = request.POST.get('bpic','') book.isbn10 = request.POST.get('isbn10','') book.isbn13 = request.POST.get('isbn13','') book.pages = request.POST.get('pages','') book.price = request.POST.get('price','') book.pubdate = request.POST.get('pubdate','') book.publisher = request.POST.get('publisher','') book.summary = request.POST.get('summary','') book.title = request.POST.get('title','') book.sub_title = request.POST.get('sub_title','') book.tags = map(lambda x:x.strip(),request.POST.get('tags','').split('/')) #修改 if id>0: book.id = id messages = book.validate() if not messages: book_facade.update(book) return_url = 'book_list' return HttpResponseRedirect(return_url) else: message='' if messages: message = messages[0] output = {'message' :message} return render_to_response('admin/book_edit.html',output) else: #插入 book.id = func.create_new_id() messages = book.validate() if not messages: book_facade.insert(book) return_url = 'book_list' return HttpResponseRedirect(return_url) else: message='' if messages: message = messages[0] output = {'message' :message} return render_to_response('admin/book_edit.html',output) id = func.get_int_param_from_get(request,'id') output = {} if id>0: book = book_facade.get_data(id) output['book'] = book return render_to_response('admin/book_edit.html',output)
class Crawler: book = None status = None def __init__(self, root_url): self.root_url = root_url self.book = Book() self.status = False def set_root_url(self, url): self.root_url = url def crawling(self): self.book = Book() # Reinitialize Book object for retry log.info("Crawling function is call") if self.root_url: thumb, title, author, chapter_params = self.get_general_info() if Book.general_validate(title, author, chapter_params): self.book.set_thumb(thumb) self.book.set_title(title) self.book.set_author(author) for chapter_param in chapter_params: chapter = self.get_chapter(chapter_param) if not Book.chapter_validate(chapter): log.error("Chapter: %s is fail crawled", chapter) return False self.book.add_chapter(chapter) self.status = True return True else: log.error("No validate general info") return False else: log.error("not define root url") return False def get_general_info(self): log.info("get_general_info function is call") book_thumb = None book_title = None book_author = None chapter_params = [] # site_rs = requests.get(self.root_url, verify=False) site_rs = self.try_request(self.root_url) soup = BeautifulSoup(site_rs.content, 'html.parser') for acronym in soup.find_all('acronym'): chapter_link = acronym.li.get('onclick') if chapter_link: chapter_re = re.search('noidung1\(\'((\w|\W)*)\'\)', chapter_link) chapter_param = chapter_re.group(1) chapter_params.append(chapter_param) if chapter_params: first_chapter = self.get_chapter(chapter_params[0]) if first_chapter: if first_chapter.book_thumb: book_thumb = first_chapter.book_thumb if first_chapter.book_title: book_title = first_chapter.book_title if first_chapter.book_author: book_author = first_chapter.book_author return book_thumb, book_title, book_author, chapter_params def get_chapter(self, chapter_param): log.info("get_chapter function is call") log.info(chapter_param) book_title = None book_thumb = None book_author = None chapter_title = None chapter_content = None upper_char = None url = system_cfg.CHAPTER_URL chapter_param = html_tool.decode_param_to_dict(chapter_param) site_rs = self.try_request(url, 'POST', data=chapter_param) if site_rs: content_list = site_rs.content.split('--!!tach_noi_dung!!--', 3) if len(content_list) >= 3: ##################### # get book_thumb from css ##################### css_soup = BeautifulSoup(content_list[0], 'html.parser') style_tag = css_soup.find('style') if style_tag: thumb_re = re.search('background:url\((http://(\w|\W)*)\)', style_tag.string) if thumb_re: book_thumb = thumb_re.group(1) ##################### # get book title # get book author # get chapter title ##################### desc_soup = BeautifulSoup(content_list[1], 'html.parser') book_title_tag = desc_soup.find('span', class_='chuto40') if book_title_tag: book_title = book_title_tag.string.strip() tuade_tag = desc_soup.find('div', class_='tuade') if tuade_tag: chutieude_tags = desc_soup.find_all('span', class_='chutieude') chutieude_list = [] for chutieude_tag in chutieude_tags: if chutieude_tag.string and chutieude_tag.string.strip(): chutieude_list.append(chutieude_tag.string.strip()) if len(chutieude_list) >= 2: book_author = chutieude_list[0] del chutieude_list[0] for chutieude in chutieude_list: if chapter_title: chapter_title = chapter_title + chutieude + " " else: chapter_title = chutieude + " " elif len(chutieude_list) == 1: chapter_title = chutieude_list[0] else: tac_gia_tag = desc_soup.find('span', class_='tacgiaphai') if tac_gia_tag: book_author = tac_gia_tag.string.strip() chutieude_tags = desc_soup.find_all('span', class_='chutieude') chutieude_list = [] for chutieude_tag in chutieude_tags: if chutieude_tag.text and chutieude_tag.text.strip(): chutieude_list.append(chutieude_tag.text.strip()) if len(chutieude_list) == 2: chapter_title = chutieude_list[0] + ": " + chutieude_list[1] elif len(chutieude_list) == 1: chapter_title = chutieude_list[0] ##################### # get chapter content( add chapter title to chapter content) ##################### content_soup = BeautifulSoup(content_list[2], 'html.parser') if content_soup: chuhoain_tag = content_soup.find(id='chuhoain') if chuhoain_tag: chuinhoa_img = chuhoain_tag.img if chuinhoa_img: chuhoain_src = chuinhoa_img['src'] if (isinstance(chuhoain_src, str) or isinstance(chuhoain_src, unicode)) and chuhoain_src.find( system_cfg.UPPER_CHAR_URL) != -1: chuhoain = chuhoain_src.replace(system_cfg.UPPER_CHAR_URL, '') chuinhoa_img['src'] = system_cfg.UPPER_CHAR_PATH + chuhoain upper_char = chuhoain chapter_content = content_soup.prettify() # Add chapter title to chapter_content if chapter_title and chapter_content: chapter_content = '<div><h2 align=\'center\'>' + chapter_title + '</h2></div>' + chapter_content chapter = Chapter(title=chapter_title, content=chapter_content) if book_title: chapter.set_book_title(book_title) if book_author: chapter.set_book_author(book_author) if book_thumb: chapter.set_book_thumb(book_thumb) if upper_char: chapter.set_upper_char(upper_char) log.info("Crawler chapter: %s", chapter_title) return chapter else: log.error("Can't get content of this chapter") return None def try_request(self, url, post_type='GET', try_time=0, params=None, data=None): try: if post_type == 'GET': headers = {'User-Agent': system_cfg.USER_AGENT} site_rs = requests.get(url=url, params=params, data=data, headers=headers, verify=False) else: headers = {'User-Agent': system_cfg.USER_AGENT} session = requests.Session() session.get(url, headers=headers) cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies)) site_rs = requests.post(url=url, params=params, data=data, headers=headers, cookies=cookies, verify=False) if not site_rs: raise requests.exceptions.ConnectionError return site_rs except requests.exceptions.ConnectionError: try_time += 1 if try_time >= system_cfg.MAX_RETRY_TIME: return None else: time.sleep(system_cfg.WAITING_TIME) log.warn("Retry url %s %r time" % (url, try_time)) return self.try_request(url, post_type, try_time, params, data)
def testBook(self): ''' Method to test the getter and the setter of the Book class ''' book = Book(1, "Introduction to algorithms", "The Bible", "Thomas H Cormen") self.assertEqual(self.getId(), 1) assert book.getTitle() == "Introduction to algorithms" assert book.getDescription() == "The Bible" assert book.getAuthor() == "Thomas H Cormen" book.setAuthor("Cosmin") assert book.getAuthor() == "Cosmin" book.setTitle("Title") assert book.getTitle() == "Title" book.setDescription("Descr") assert book.getDescription() == "Descr"
def __init__(self, root_url): self.root_url = root_url self.book = Book() self.status = False