예제 #1
0
def lookup_by_isbn(number, forceUpdate=False):
    isbn, price = _process_isbn(number)
    print("Looking up isbn", isbn, "with price", price)

    # if length of isbn>0 and isn't "n/a" or "none"
    if len(isbn) > 0 and not re.match("^n(\s|/){0,1}a|none", isbn, re.I):
        # first we check our database
        titles = Title.select(Title.q.isbn == isbn)
        ##print titles #debug
        known_title = False
        the_titles = list(titles)
        if (len(the_titles) > 0) and (not forceUpdate):
            ##print "in titles"
            known_title = the_titles[0]
            ProductName = the_titles[0].booktitle.format()
            authors = []
            if len(the_titles[0].author) > 0:
                authors = [x.authorName.format() for x in the_titles[0].author]
            authors_as_string = ", ".join(authors)
            categories = []
            if len(the_titles[0].categorys) > 0:
                ##print len(the_titles[0].categorys)
                ##print the_titles[0].categorys
                categories = [x.categoryName.format() for x in the_titles[0].categorys]
            categories_as_string = ", ".join(categories)
            if price == 0:
                if len(the_titles[0].books) > 0:
                    ListPrice = max([x.listprice for x in the_titles[0].books])
                else:
                    ListPrice = 0
            else:
                ListPrice = price
            Manufacturer = the_titles[0].publisher.format()
            Format = the_titles[0].type.format()
            Kind = the_titles[0].kind.kindName
            orig_isbn = the_titles[0].origIsbn.format()
            #            if the_titles[0].images:
            #                 large_url = the_titles[0].images.largeUrl
            #                 med_url = the_titles[0].images.medUrl
            #                 small_url = the_titles[0].images.smallUrl
            #            else:
            #                 large_url = med_url = small_url = ''
            large_url = med_url = small_url = ""

            SpecialOrders = [
                tso.id
                for tso in Title.selectBy(
                    isbn=isbn
                ).throughTo.specialorder_pivots.filter(
                    TitleSpecialOrder.q.orderStatus == "ON ORDER"
                )
            ]
            return {
                "title": ProductName,
                "authors": authors,
                "authors_as_string": authors_as_string,
                "categories_as_string": categories_as_string,
                "list_price": ListPrice,
                "publisher": Manufacturer,
                "isbn": isbn,
                "orig_isbn": orig_isbn,
                "large_url": large_url,
                "med_url": med_url,
                "small_url": small_url,
                "format": Format,
                "kind": Kind,
                "known_title": known_title,
                "special_order_pivots": SpecialOrders,
            }
        else:  # we don't have it yet
            # if we're using amazon ecs
            if use_amazon_ecs:
                sleep(1)  # so amazon doesn't get huffy
                ecs.setLicenseKey(amazon_license_key)
                ecs.setSecretAccessKey(amazon_secret_key)
                ecs.setAssociateTag(amazon_associate_tag)

                ##print "about to search", isbn, isbn[0]
                amazonBooks = []

                idType = ""
                if len(isbn) == 12:
                    idType = "UPC"
                elif len(isbn) == 13:
                    # if we are using an internal isbn
                    if isbn.startswith(internal_isbn_prefix):
                        return []
                    # otherwise search on amazon.
                    elif isbn.startswith("978") or isbn.startswith("979"):
                        idType = "ISBN"
                    else:
                        idType = "EAN"
                try:
                    print("searching amazon for ", isbn, idType, file=sys.stderr)
                    amazonProds = AmzSear(isbn)
                    print(amazonProds, file=sys.stderr)
                except (ecs.InvalidParameterValue, HTTPError):
                    pass
                if amazonProds:
                    print(amazonProds, file=sys.stderr)
                    # inner comprehension tests each prodict for price whose type is in formats
                    # if we find a price which its key is in formats, then we return the coorresponding product
                    format_list = [
                        "Paperback",
                        "Mass Market Paperback",
                        "Hardcover",
                        "Perfect Paperback",
                        "Pamphlet",
                        "Plastic Comb",
                        "Spiral-bound",
                        "Print on Demand (Paperback)",
                        "DVD",
                        "Calendar",
                        "Board book",
                        "Audio Cassette",
                        "Cards",
                        "Audio CD",
                        "Diary",
                        "DVD-ROM",
                        "Library Binding",
                        "music",
                        "Vinyl",
                        "Health and Beauty",
                        "Hardback",
                    ]
                    prods = [
                        x
                        for x in amazonProds.values()
                        if [dum for dum in x["prices"].keys() if dum in format_list]
                    ]

                    for prod1 in prods:
                        print(prod1, file=sys.stderr)
                        price_dict = prod1["prices"]
                        listprice = max(price_dict.values())

                        format = [k for k in format_list if k in price_dict]
                        format = format[0]
                        if not format:
                            continue

                        title = prod1["title"]

                        image_url = prod1["image_url"]

                        authors = [
                            x.replace("by ", "")
                            for x in prod1["subtext"]
                            if x.startswith("by ")
                        ]
                        auth_list = [
                            y.strip()
                            for a in [x.split(", ") for x in authors[0].split(" and ")]
                            for y in a
                        ]
                        # we assume any full name less than five characters is an abbreviation like 'Jr.'
                        # so we add it back to the previous authorname
                        abbrev_list = [i for i, x in enumerate(auth_list) if len(x) < 5]
                        for i in abbrev_list:
                            auth_list[i - 1 : i + 1] = [
                                ", ".join(auth_list[i - 1 : i + 1])
                            ]

                        return {
                            "title": title,
                            "authors": auth_list,
                            "authors_as_string": ",".join(auth_list),
                            "categories_as_string": "",
                            "list_price": listprice,
                            "publisher": "",
                            "isbn": isbn,
                            "orig_isbn": isbn,
                            "large_url": image_url,
                            "med_url": image_url,
                            "small_url": image_url,
                            "format": format,
                            "kind": "books",
                            "known_title": known_title,
                            "special_orders": [],
                        }

                else:
                    traceback.print_exc()
                    print("using isbnlib from ecs", file=sys.stderr)
                    isbnlibbooks = []
                    try:
                        isbnlibbooks = isbnlib.meta(str(isbn))
                    except:
                        pass

                    if isbnlibbooks:
                        return {
                            "title": isbnlibbooks["Title"],
                            "authors": isbnlibbooks["Authors"],
                            "authors_as_string": ",".join(isbnlibbooks["Authors"]),
                            "categories_as_string": None,
                            "list_price": price,
                            "publisher": isbnlibbooks["Publisher"],
                            "isbn": isbn,
                            "orig_isbn": isbn,
                            "large_url": None,
                            "med_url": None,
                            "small_url": None,
                            "format": None,
                            "kind": "books",
                            "known_title": known_title,
                            "special_orders": [],
                        }
                    else:
                        return {}
            else:  # if we're scraping amazon
                print("scraping amazon", file=sys.stderr)
                headers = {
                    "User-Agent": random.sample(user_agents, 1).pop()
                }
                amazon_url_template = "http://www.amazon.com/dp/%s/"
                if len(isbn) == 13:
                    isbn10 = None
                    if isbnlib.is_isbn13(isbn):
                        isbn10 = isbnlib.to_isbn10(isbn)
                    else:
                        return {}
                if isbn10:
                    with requests.Session() as session:
                        try:
                            print("getting amazon")
                            page_response = session.get(
                                amazon_url_template % isbn10,
                                headers=headers,
                                timeout=0.1
                            )
                            print("got response")
                            page_content = BeautifulSoup(page_response.content, "lxml")
                            print("got parsed content")
                            try:
                                booktitle = page_content.select("#productTitle").pop().text
                            except Exception as e:
                                traceback.print_exc()
                                booktitle = ''
                            popover_preload = [
                                a.text
                                for a in page_content.select(
                                    ".author.notFaded .a-popover-preload a.a-link-normal"
                                )
                            ]
                            author_name = [
                                a.text
                                for a in page_content.select(
                                    ".author.notFaded a.a-link-normal"
                                )
                                if a.text not in popover_preload
                            ]
                            contributor_role = page_content.select(".contribution span")
                            try:
                                contributor_role = [
                                    re.findall("\w+", cr.text).pop()
                                    for cr in contributor_role
                                ]
                            except Exception as e:
                                traceback.print_exc()
                                contributor_role = []
                            author_role = zip(author_name, contributor_role)
                            try:
                                listprice = (
                                    page_content.select(".a-text-strike").pop().text
                                )
                            except IndexError as e:
                                print("using bookfinder4u")
                                if "listprice" not in locals():
                                    with requests.Session() as session:
                                        bookfinderurl = "http://www.bookfinder4u.com/IsbnSearch.aspx?isbn='%s'&mode=direct"
                                        url = bookfinderurl % isbn
                                        try:
                                            page_response2 = session.get(
                                                url,
                                                headers=headers,
                                                timeout=0.1
                                            )
                                            page_content2 = BeautifulSoup(
                                                page_response2.content, "lxml"
                                            )
                                        except Exception as e:
                                            traceback.print_exc()
                                            listprice = 0.0
                                        else:
                                            try:
                                                matches = re.search(
                                                    "List\sprice:\s(\w{2,4})\s(\d+(.\d+)?)",
                                                    page_content2.text,
                                                    re.I,
                                                )
                                                if matches:
                                                    listprice = matches.groups()[1]
                                                else:
                                                    listprice = 0.00
                                            except Exception as e:
                                                traceback.print_exc()
                                                listprice = 0.00
                            try:
                                book_edition = (
                                    page_content.select("#bookEdition").pop().text
                                )
                            except Exception as e:
                                traceback.print_exc()
                                book_edition = ""
                            try:
                                matches = re.findall(
                                    "(?<=imageGalleryData'\s:\s\[)\{.*?\}",
                                    page_content.contents[1].text,
                                )
                                image_url_dict = eval(matches[0])
                            except Exception as e:
                                traceback.print_exc()
                                image_url_dict = {"mainUrl": "", "thumbUrl": ""}
                            category_items = page_content.select(".zg_hrsr_ladder a")
                            category_items = [a.text for a in category_items]
                            product_details = page_content.select(
                                "#productDetailsTable"
                            )  # ul:first-of-type")
                            try:
                                product_details1 = product_details.pop().text.splitlines()
                                quit_flag = 0
                                for pd in product_details1:
                                    if pd.endswith("pages"):
                                        format, numpages = pd.split(":")
                                        numpages = numpages.replace(" pages", "").strip()
                                        quit_flag += 1
                                        continue
                                    if pd.startswith("Publisher: "):

                                        matches = re.match(
                                            "Publisher: ([^;^(]*)\s?([^(]*)?\W(.*)\W", pd
                                        ).groups()
                                        publisher = matches[0]
                                        publication_date = matches[2]
                                        quit_flag += 1
                                        continue
                                    if quit_flag == 2:
                                        break
                                else:
                                    publisher = ''
                                    format = ''
                            except Exception as e:
                                traceback.print_exc()
                                publisher = ''
                                format = ''
                            if booktitle:
                                return {
                                    "title": booktitle,
                                    "authors": author_name,
                                    "authors_as_string": ",".join(author_name),
                                    "categories_as_string": ",".join(category_items),
                                    "list_price": listprice,
                                    "publisher": publisher,
                                    "isbn": isbn,
                                    "orig_isbn": isbn,
                                    "large_url": image_url_dict["mainUrl"],
                                    "med_url": image_url_dict["mainUrl"],
                                    "small_url": image_url_dict["thumbUrl"],
                                    "format": format,
                                    "kind": "books",
                                    "known_title": known_title,
                                    "special_orders": [],
                                }
                        except Exception as e:
                            traceback.print_exc()
                            print("using isbnlib from scraper", file=sys.stderr)
                            isbnlibbooks = []
                            try:
                                isbnlibbooks = isbnlib.meta(str(isbn))
                            except:
                                pass

                            if isbnlibbooks:
                                return {
                                    "title": isbnlibbooks["Title"],
                                    "authors": isbnlibbooks["Authors"],
                                    "authors_as_string": ",".join(
                                        isbnlibbooks["Authors"]
                                    ),
                                    "categories_as_string": None,
                                    "list_price": price,
                                    "publisher": isbnlibbooks["Publisher"],
                                    "isbn": isbn,
                                    "orig_isbn": isbn,
                                    "large_url": None,
                                    "med_url": None,
                                    "small_url": None,
                                    "format": None,
                                    "kind": "books",
                                    "known_title": known_title,
                                    "special_orders": [],
                                }
                            else:
                                return {}
                else:
                    if title:
                        return {
                            "title": title,
                            "authors": author_name,
                            "authors_as_string": ",".join(author_name),
                            "categories_as_string": ",".join(category_items),
                            "list_price": listprice,
                            "publisher": publisher,
                            "isbn": isbn,
                            "orig_isbn": isbn,
                            "large_url": image_url_dict["mainUrl"],
                            "med_url": image_url_dict["mainUrl"],
                            "small_url": image_url_dict["thumbUrl"],
                            "format": format,
                            "kind": "books",
                            "known_title": known_title,
                            "special_orders": [],
                        }
                    else:
                        return {}
    else:
        return {}
예제 #2
0
    finally:
        readline.set_startup_hook()


should_quit = False
while should_quit != True:
    isbn = raw_input('isbn or title >> ')
    if isbn.lower().strip() == 'quit' or isbn.lower().strip() == 'q':
        should_quit = True
        continue
    if re.match('^[0-9]{13}$|^[0-9]{18}$', isbn):
        try:
            isbn, price = isbn[0:13], float(isbn[13, -1])
        except:
            isbn, price = isbn[0:13], 0.00
        titles = Title.selectBy(isbn=isbn)
        book = None
        if list(titles):
            for t1 in titles:
                ourprice = rlinput("price >> ", prefill=price)
                try:
                    float(ourprice)
                except:
                    continue
                books = Book.selectBy(titleID=t1.id,
                                      ourprice=float(ourprice),
                                      status='STOCK')
                if list(books):
                    ourprice = books[0].ourprice
                    listprice = books[0].listprice
                    book = books[0]