Beispiel #1
0
def getNullPubdatePageData(node, minPrice, maxPrice):
    global sleepTime
    pageCnt = 0
    amazon = Amazon.Amazon()
    totalPage = 10
    totalDic = {"item": []}

    # 出版年月が入っていないコンテンツを取得。コミックが特に結構ある。
    power = "not pubdate:after 1000"
    print("min:" + str(minPrice) + " max:" + str(maxPrice) + " page:" +
          str(pageCnt) + "power: " + power)
    dic, status = amazon.amazonItemSearch(pageCnt, node, "Books", minPrice,
                                          maxPrice, power)
    totalDic["item"].extend(dic["item"])
    totalPage = int(dic["totalpages"])

    if (status == 1):
        print("node is over flow by not after pubData:" + power + " min:" +
              str(minPrice) + " max:" + str(maxPrice) + " totalPage:" +
              str(totalPage) + " totalresults:" + dic["totalresults"])
        totalDic["item"].extend(
            getYomiganaPageData(node, minPrice, maxPrice, power))
    else:
        while (totalPage > pageCnt):
            time.sleep(sleepTime)
            pageCnt += 1
            print("page:" + str(pageCnt) + " totalPage:" + str(totalPage))
            dic, status = amazon.amazonItemSearch(pageCnt, node, "Books",
                                                  minPrice, maxPrice, power)
            totalDic["item"].extend(dic["item"])

    return totalDic["item"]
Beispiel #2
0
def call(search):
    a = Amazon.amazon(search)
    b = ajio.ajio(search)
    c = ebay.ebay(search)
    d = snapdeal.snapdeal(search)
    a.extend(b)
    a.extend(c)
    a.extend(d)
    print(a)
Beispiel #3
0
def mainSingleCategory(nodes):
    global sleepTime
    # kindle読み放題のnode
    dic = {}
    amazon = Amazon.Amazon()
    nodeAry = nodes.split(",")

    for node in nodeAry:
        dic = amazon.amazonBrowseNode(node)
        time.sleep(sleepTime)
        print("getNodeItem start Node:" + node + " NodeName:" +
              dic["parent_name"])
        getNodeItem(node)
        print("getNodeItem end Node:" + node + " NodeName:" +
              dic["parent_name"])
Beispiel #4
0
def main(node):
    global sleepTime
    # kindle読み放題のnode
    dic = {}
    amazon = Amazon.Amazon()
    dic = amazon.amazonBrowseNode(node)
    time.sleep(sleepTime)

    print(dic["parent_name"])
    for caegoryLine in dic["category"]:
        print("getNodeItem start Node:" + caegoryLine["browsenode_id"] +
              " NodeName:" + caegoryLine["browsenode_name"])
        getNodeItem(caegoryLine["browsenode_id"])
        print("getNodeItem end Node:" + caegoryLine["browsenode_id"] +
              " NodeName:" + caegoryLine["browsenode_name"])
        print("============================================")
Beispiel #5
0
 def createUnitToBuy(self):
     self.uniteToBuy = [
         Amazon(),
         Dwarf(),
         Elf(),
         Giant(),
         Hobbit(),
         Humans(),
         Mago(),
         Orc(),
         Rats(),
         Skeletton(),
         Triton(),
         Troll(),
         Wizzard(),
         Zombie()
     ]
     shuffle(self.uniteToBuy)
Beispiel #6
0
def getNodeItem(node):
    amazon = Amazon.Amazon()
    totalDic = {"item": []}
    dic = {}
    status = 0

    # 3200207051のブラウズノードを取得
    ## ノードごとにアイテム取得のループをする

    ### 取得がなくなるまでループ
    pageCnt = 1
    nextFlg = 0
    minPrice = 0
    maxPrice = 2000
    priceRange = (maxPrice - minPrice) / 10
    cutPage = 0

    print("node:" + node + " start")

    dic, status = amazon.amazonItemSearch(pageCnt, node, "Books")
    totalDic["item"].extend(dic["item"])
    totalPage = int(dic["totalpages"])
    print("page:" + str(pageCnt) + " totalPage:" + str(totalPage))

    # 戻ってきた件数が多すぎる。
    ## 価格のレンジ指定でデータを取得
    if (status == 1):
        print("node is 10page over:" + node)
        getCutPageData(node, minPrice, priceRange, 99999)

    else:
        while (totalPage > pageCnt):
            time.sleep(sleepTime)
            pageCnt += 1
            print("page:" + str(pageCnt) + " totalPage:" + str(totalPage))
            dic, status = amazon.amazonItemSearch(pageCnt, node, "Books")
            totalDic["item"].extend(dic["item"])

    print("total:" + str(len(totalDic["item"])))
    setOutputeDB(totalDic)

    print("node:" + node + " end")
def main():
    file = open("BooksDatabase.txt", "r")
    data = file.readlines()
    update = 1
    for isbn in data:
        isbn.strip()
        mydb = mysql.connector.connect(user='******',
                                       password='******',
                                       host='127.0.0.1',
                                       database='project')
        mycursor = mydb.cursor()
        sql = "SELECT * FROM book WHERE isbn13 = %(value)s"
        params = {'value': isbn}
        try:
            mycursor.execute(sql, params)
        except Exception as e:
            print(e)
        if (mycursor.rowcount == 1):
            for row in mycursor:
                lastVisited = row['lastVisited']
                refreshPeriod = row['refreshPeriod']
                if (time.time() - lastVisited < refreshPeriod):
                    update = 0
        if (update == 1):
            goodreads_data = Goodreads.get_goodreads(isbn[:-1])
            if goodreads_data == []:
                continue
            amazon_data = Amazon.get_amazon(isbn[:-1])
            flipkart_data = Flipkart.get_flipkart(isbn[:-1])
            infibeam_data = Infibeam.get_infibeam(isbn[:-1])
            snapdeal_data = Snapdeal.get_snapdeal(isbn[:-1])
            Book.Book(
                goodreads_data['isbn'], isbn, goodreads_data['title'],
                goodreads_data['image'], goodreads_data['authorID'],
                goodreads_data['rating'], goodreads_data['genre1'],
                goodreads_data['genre2'], goodreads_data['genre3'],
                amazon_data['amazon_url'], amazon_data['amazon_price'],
                flipkart_data['flipkart_url'], flipkart_data['flipkart_price'],
                infibeam_data['infibeam_url'], infibeam_data['infibeam_price'],
                snapdeal_data['snapdeal_url'], snapdeal_data['snapdeal_price'])
Beispiel #8
0
def getSortPricePageData(node, minPrice, maxPrice, powerIn, sortIn):
    global sleepTime
    pageCnt = 0
    amazon = Amazon.Amazon()
    totalPage = 10
    totalDic = {"item": []}

    print("min:" + str(minPrice) + " max:" + str(maxPrice) + " page:" +
          str(pageCnt) + " power: " + powerIn + " sort:" + sortIn)
    while (totalPage > pageCnt):
        pageCnt += 1
        time.sleep(sleepTime)
        print("page:" + str(pageCnt) + " totalPage:" + str(totalPage))
        dic, status = amazon.amazonItemSearch(pageCnt,
                                              node,
                                              "Books",
                                              minPrice,
                                              maxPrice,
                                              powerIn,
                                              sort=sortIn)
        totalDic["item"].extend(dic["item"])

    return totalDic["item"]
Beispiel #9
0
def getCutPageData(node, minPrice, priceRange, limit):
    global sleepTime
    pageCnt = 1
    totalDic = {"item": []}
    amazon = Amazon.Amazon()
    maxPrice = minPrice + priceRange

    ru = resource.getrusage(resource.RUSAGE_SELF)
    #print ("use memory:" + str(ru.ru_maxrss))

    tracemalloc.start()

    today = datetime.date.today()
    # YYYYで表示
    yyyy = int(today.strftime("%Y"))

    for i in range(10):
        time.sleep(sleepTime)
        if (i != 0):
            minPrice = maxPrice + 1
            maxPrice = maxPrice + priceRange

        print("min:" + str(minPrice) + " max:" + str(maxPrice) + " page:" +
              str(pageCnt))
        dic, status = amazon.amazonItemSearch(pageCnt, node, "Books", minPrice,
                                              maxPrice)
        totalDic["item"].extend(dic["item"])
        totalPage = int(dic["totalpages"])
        if (status == 1):
            print("node is over flow by Price:" + node + " min:" +
                  str(minPrice) + " max:" + str(maxPrice) + " totalPage:" +
                  str(totalPage) + " totalresults:" + dic["totalresults"])
            # ページ分割を繰り返す
            if (int((maxPrice - minPrice) / 10) < 1):
                totalDic["item"].extend(
                    getPubdatePageData(node, minPrice, maxPrice, yyyy, 0))
                totalDic["item"].extend(
                    getNullPubdatePageData(node, minPrice, maxPrice))
            else:
                getCutPageData(node, minPrice, int((maxPrice - minPrice) / 10),
                               maxPrice)
        else:
            while (totalPage > pageCnt):
                time.sleep(sleepTime)
                pageCnt += 1
                print("page:" + str(pageCnt) + " totalPage:" + str(totalPage))
                dic, status = amazon.amazonItemSearch(pageCnt, node, "Books",
                                                      minPrice, maxPrice)
                totalDic["item"].extend(dic["item"])

        pageCnt = 1

    #ループを抜けてリミットまで達していない場合はリミットまでを再度取得する
    if (maxPrice < limit):
        minPrice = maxPrice + 1
        maxPrice = limit
        print("min:" + str(minPrice) + " max:" + str(maxPrice) + " page:" +
              str(pageCnt))
        dic, status = amazon.amazonItemSearch(pageCnt, node, "Books", minPrice,
                                              maxPrice)
        totalDic["item"].extend(dic["item"])
        totalPage = int(dic["totalpages"])
        if (status == 1):
            print("node is over flow by Price:" + node + " min:" +
                  str(minPrice) + " max:" + str(maxPrice) + " totalPage:" +
                  str(totalPage) + " totalresults:" + dic["totalresults"])
            # ページ分割を繰り返す
            if (int((maxPrice - minPrice) / 10) < 1):
                totalDic["item"].extend(
                    getPubdatePageData(node, minPrice, maxPrice, yyyy, 0))
                totalDic["item"].extend(
                    getNullPubdatePageData(node, minPrice, maxPrice))
            else:
                getCutPageData(node, minPrice, int((maxPrice - minPrice) / 10),
                               maxPrice)

        else:
            while (totalPage > pageCnt):
                time.sleep(sleepTime)
                pageCnt += 1
                print("page:" + str(pageCnt) + " totalPage:" + str(totalPage))
                dic, status = amazon.amazonItemSearch(pageCnt, node, "Books",
                                                      minPrice, maxPrice)
                totalDic["item"].extend(dic["item"])

    setOutputeDB(totalDic)
    del totalDic
    gc.collect()

    snapshot = tracemalloc.take_snapshot()
    top_stats = snapshot.statistics('lineno')
    print("[ Top 10 ]")
    for stat in top_stats[:10]:
        print(stat)

    return
Beispiel #10
0
def getPubdatePageData(node, minPrice, maxPrice, pubDate, month):
    global sleepTime
    pageCnt = 1
    totalDic = {"item": []}

    # 仮で年も月も12回分集計するものとする
    getCount = 0
    amazon = Amazon.Amazon()
    power = ""
    month_flg = False

    countLimit = 5
    if (month != 0):
        month_flg = True
        countLimit = 12

    while (getCount < countLimit):
        if (month == 0):
            power = "pubdate:during " + str(pubDate)
        else:
            power = "pubdate:during " + str(month) + "-" + str(pubDate)

        print("min:" + str(minPrice) + " max:" + str(maxPrice) + " page:" +
              str(pageCnt) + "power: " + power)
        time.sleep(sleepTime)
        dic, status = amazon.amazonItemSearch(pageCnt, node, "Books", minPrice,
                                              maxPrice, power)
        totalDic["item"].extend(dic["item"])
        totalPage = int(dic["totalpages"])
        print("totalPage: " + str(totalPage))

        if (status == 1):
            print("node is over flow by pubData:" + power + " min:" +
                  str(minPrice) + " max:" + str(maxPrice) + " totalPage:" +
                  str(totalPage) + " totalresults:" + dic["totalresults"])
            # さらに月で分割する
            if (not month_flg):
                totalDic["item"].extend(
                    getPubdatePageData(node, minPrice, maxPrice, pubDate, 12))
            else:
                print("node is over flow by pubDataMonth:" + power + " min:" +
                      str(minPrice) + " max:" + str(maxPrice) + " totalPage:" +
                      str(totalPage) + " totalresults:" + dic["totalresults"])
                totalDic["item"].extend(
                    getYomiganaPageData(node, minPrice, maxPrice, power))
        else:
            while (totalPage > pageCnt):
                time.sleep(sleepTime)
                pageCnt += 1
                print("page:" + str(pageCnt) + " totalPage:" + str(totalPage))
                dic, status = amazon.amazonItemSearch(pageCnt, node, "Books",
                                                      minPrice, maxPrice,
                                                      power)
                totalDic["item"].extend(dic["item"])

        # 1年引く
        if (month_flg is True):
            month -= 1
        else:
            pubDate -= 1

        getCount += 1
        pageCnt = 1

    if (not month_flg):
        # 1900年より前のものをまとめて取得
        power = "pubdate:before " + str(pubDate)
        print("min:" + str(minPrice) + " max:" + str(maxPrice) + " page:" +
              str(pageCnt) + "power: " + power)
        dic, status = amazon.amazonItemSearch(pageCnt, node, "Books", minPrice,
                                              maxPrice, power)
        totalDic["item"].extend(dic["item"])
        totalPage = int(dic["totalpages"])

        if (status == 1):
            if (pubDate < 1900):
                print("node is over flow by pubDataMinmum:" + str(pubDate) +
                      " min:" + str(minPrice) + " max:" + str(maxPrice) +
                      " totalPage:" + str(totalPage) + " totalresults:" +
                      dic["totalresults"])
                totalDic["item"].extend(
                    getYomiganaPageData(node, minPrice, maxPrice, power))
            else:
                print("node is over flow by before pubData:" + str(pubDate) +
                      " min:" + str(minPrice) + " max:" + str(maxPrice) +
                      " totalPage:" + str(totalPage) + " totalresults:" +
                      dic["totalresults"])
                totalDic["item"].extend(
                    getPubdatePageData(node, minPrice, maxPrice, pubDate, 0))

        else:
            while (totalPage > pageCnt):
                time.sleep(sleepTime)
                pageCnt += 1
                print("page:" + str(pageCnt) + " totalPage:" + str(totalPage))
                dic, status = amazon.amazonItemSearch(pageCnt, node, "Books",
                                                      minPrice, maxPrice,
                                                      power)
                totalDic["item"].extend(dic["item"])

    return totalDic["item"]
Beispiel #11
0
def getYomiganaPageData(node, minPrice, maxPrice, powerIn):
    global sleepTime
    global blackListTitle

    pageCnt = 1
    totalDic = {"item": []}

    amazon = Amazon.Amazon()

    #カタカナリストを取得
    moji = moji_list((12449, 12532 + 1))
    mojitwo = moji_list((12449, 12532 + 1))
    mojitwo.extend(moji_list((12532 + 8, 12532 + 9)))

    for chr in moji:
        power = powerIn + " and title-begins:" + chr
        print("min:" + str(minPrice) + " max:" + str(maxPrice) + " page:" +
              str(pageCnt) + "power: " + power)
        time.sleep(sleepTime)
        dic, status = amazon.amazonItemSearch(pageCnt, node, "Books", minPrice,
                                              maxPrice, power)

        totalDic["item"].extend(dic["item"])
        totalPage = int(dic["totalpages"])
        print("totalPage: " + str(totalPage))

        if (status == 1):
            print("node is over flow by power:" + power + " min:" +
                  str(minPrice) + " max:" + str(maxPrice) + " totalPage:" +
                  str(totalPage) + " totalresults:" + dic["totalresults"])
            for chrtwo in mojitwo:
                pageCnt = 1
                powertwo = power + chrtwo + blackListTitle
                print("min:" + str(minPrice) + " max:" + str(maxPrice) +
                      " page:" + str(pageCnt) + "powertwo: " + powertwo)
                time.sleep(sleepTime)
                dic, status = amazon.amazonItemSearch(pageCnt, node, "Books",
                                                      minPrice, maxPrice,
                                                      powertwo)
                totalDic["item"].extend(dic["item"])
                totalPage = int(dic["totalpages"])
                print("totalPage: " + str(totalPage))
                if (status == 1):
                    print("can't node is over flow by powertwo:" + powertwo +
                          " min:" + str(minPrice) + " max:" + str(maxPrice) +
                          " totalPage:" + str(totalPage) + " totalresults:" +
                          dic["totalresults"])
                    # ソートしてページ数を指定して取得
                    # 昇順
                    totalDic["item"].extend(
                        getSortPricePageData(node, minPrice, maxPrice,
                                             powertwo, "-price"))
                    # 降順
                    totalDic["item"].extend(
                        getSortPricePageData(node, minPrice, maxPrice,
                                             powertwo, "price"))

                    pageCnt = 1
                    continue

                while (totalPage > pageCnt):
                    time.sleep(sleepTime)
                    pageCnt += 1
                    print("page:" + str(pageCnt) + " totalPage:" +
                          str(totalPage))
                    dic, status = amazon.amazonItemSearch(
                        pageCnt, node, "Books", minPrice, maxPrice, powertwo)
                    totalDic["item"].extend(dic["item"])

                pageCnt = 1

        else:
            while (totalPage > pageCnt):
                time.sleep(sleepTime)
                pageCnt += 1
                print("page:" + str(pageCnt) + " totalPage:" + str(totalPage))
                dic, status = amazon.amazonItemSearch(pageCnt, node, "Books",
                                                      minPrice, maxPrice,
                                                      power)
                totalDic["item"].extend(dic["item"])

        pageCnt = 1

    return totalDic["item"]
Beispiel #12
0
    # カテゴリ巡回モードかを確認
    args = sys.argv
    if (len(args) > 1 and args[1] == 'cate'):
        categoryUP = True

    if (categoryUP is True):
        print("カテゴリ巡回モードです")
        # main処理開始、kindle読みの登録コンテンツを対象とする

        #ノード指定がある場合は、そのノードから調べる
        if (len(args) > 2 and len(args[2]) > 0):
            node = args[2]

        # kindle読み放題のnode
        dic = {}
        amazon = Amazon.Amazon()
        dic = amazon.amazonBrowseNode(node)
        for caegoryLine in dic["category"]:
            print("getNodeItem start Node:" + caegoryLine["browsenode_id"] +
                  " NodeName:" + caegoryLine["browsenode_name"])
            main(caegoryLine["browsenode_id"])
            #main("3198378051")
            print("getNodeItem end Node:" + caegoryLine["browsenode_id"] +
                  " NodeName:" + caegoryLine["browsenode_name"])
            print("============================================")

    else:
        print("全件取得モードです")
        # カテゴリコードの指定がある場合は、該当カテゴリ内の全件取得とする
        if (len(args) > 2 and len(args[2]) > 0):
            mainSingleCategory(args[2])
Beispiel #13
0
def scrap(given_name: str, given_url, given_model_no=None):
    try:
        selected = url[given_url]
    except Exception as e:
        sys.exit(e)

    url_id = selected[2]
    scrape_url = selected[0]
    get_filter_level = selected[1]
    scrape_data = []
    print(f'Scraping data from {given_url}')
    if url_id == 1:
        if given_model_no is not None:
            scrape_data = Amazon.run(given_name, scrape_url, given_model_no)
        else:
            scrape_data = Amazon.run(given_name, scrape_url)

    if url_id == 2:
        if given_model_no is not None:
            scrape_data = HarveyNorman.run(given_name, scrape_url,
                                           given_model_no)
        else:
            scrape_data = HarveyNorman.run(given_name, scrape_url)

    if url_id == 3:
        if given_model_no is not None:
            scrape_data = TheGoodGuys.run(given_name, scrape_url,
                                          given_model_no)
        else:
            scrape_data = TheGoodGuys.run(given_name, scrape_url)

    if url_id == 4:
        if given_model_no is not None:
            scrape_data = Becex.run(given_name, scrape_url, given_model_no)
        else:
            scrape_data = Becex.run(given_name, scrape_url)

    if url_id == 5:
        if given_model_no is not None:
            scrape_data = Catch.run(given_name, scrape_url, given_model_no)
        else:
            scrape_data = Catch.run(given_name, scrape_url)

    if url_id == 6:
        if given_model_no is not None:
            scrape_data = MobileCiti.run(given_name, scrape_url,
                                         given_model_no)
        else:
            scrape_data = MobileCiti.run(given_name, scrape_url)

    if url_id == 7:
        if given_model_no is not None:
            scrape_data = Ebay.run(given_name, scrape_url, given_model_no)
        else:
            scrape_data = Ebay.run(given_name, scrape_url)

    if url_id == 8:
        if given_model_no is not None:
            scrape_data = JbHiFi.run(given_name, scrape_url, given_model_no)
        else:
            scrape_data = JbHiFi.run(given_name, scrape_url)

    if url_id == 9:
        if given_model_no is not None:
            scrape_data = OfficeWorks.run(given_name, scrape_url,
                                          given_model_no)
        else:
            scrape_data = OfficeWorks.run(given_name, scrape_url)

    if url_id == 10:
        if given_model_no is not None:
            scrape_data = BingLee.run(given_name, scrape_url, given_model_no)
        else:
            scrape_data = BingLee.run(given_name, scrape_url)

    if url_id == 11:
        if given_model_no is not None:
            scrape_data = Kogan.run(given_name, scrape_url, given_model_no)
        else:
            scrape_data = Kogan.run(given_name, scrape_url)

    if url_id == 12:
        if given_model_no is not None:
            scrape_data = DickSmith.run(given_name, scrape_url, given_model_no)
        else:
            scrape_data = DickSmith.run(given_name, scrape_url)

    return scrape_data, get_filter_level