Beispiel #1
0
def get_kaos(page=1):

    endpoint = '/c/fashion-wanita/kaos'
    response = request.get(endpoint, page)

    try:
        f = open("html/bukalapak.html", "w")
        f.write(response.text)
    except:
        print('failed to save html/bukalapak.html')

    html = BeautifulSoup(response.content, "html.parser")

    products = list(html.select('div[class="product-detail-track"]'))

    result = []
    for product in products:
        data_product = json.loads(product['data-product'])
        category = data_product['category']

        result.append({
            'nama': data_product['name'],
            'harga': data_product['price'],
            'tipe': category['structure'][1]
        })

    return result
Beispiel #2
0
def get_handuk(page=1):
    endpoint = '/c/rumah-tangga/kamar-mandi?from=omnisearch&from_keyword_history=false&search%5Bkeywords%5D=handuk&search_source=omnisearch_category&source=navbar'
    response = request.get(endpoint, page)
    try:
        f = open("html/bukalapak.html", "w")
        f.write(response.text)
    except:
        print('failed to save html/bukalapak.html')

    html = BeautifulSoup(response.content, "html.parser")

    products = list(html.select('div[class="product-detail-track"]'))

    result = []
    for product in products:
        data_product = json.loads(product['data-product'])
        category = data_product['category']

        result.append({
            'nama': data_product['name'],
            'harga': data_product['price'],
            'tipe': category['structure'][1]
        })

    return result
Beispiel #3
0
def get_tas(page=1):
    '''
  Akses web bukalapak di url: https://www.bukalapak.com/c/fashion-wanita.
  '''
    endpoint = '/c/fashion-wanita/tas-wanita'
    response = request.get(endpoint, page)
    '''
  Dikarenakan website BukaLapak suka berganti strukturnya maka pagen htmlnya
  akan disave terlebih dahulu menjadi file supaya bisa dicek kalau ada
  perubahan pada struktur htmlnya
  '''
    try:
        f = open("html/bukalapak.html", "w")
        f.write(response.text)
    except:
        print('failed to save html/bukalapak.html')
    '''
  Hasil request-nya di-parse pake BeautifulSoup supaya bisa akses elemen-elemen
  html-nya.
  '''
    html = BeautifulSoup(response.content, "html.parser")
    '''
  Ambil elemen html yang relevan buat ambil data yang diinginkan.
  Cara nemu elemen html-nya itu liat langsung ke source html-nya Bukalapak,
  dan dicari elemen yang memuat data yang mau diambil.
  Source html bukalapak ada di folder html.
  '''
    products = list(html.select('div[class="product-detail-track"]'))
    '''
  Akses atribut dari elemen html yang memuat data yang dicari.
  nama sepeda di atribut  data-product['name']
  harga sepeda di atribut data_product['price']
  tipe sepeda di atribut  category['structure'][2]
  '''

    result = []
    for product in products:
        data_product = json.loads(product['data-product'])
        category = data_product['category']

        result.append({
            'nama': data_product['name'],
            'harga': data_product['price'],
            'tipe': category['structure'][2]
        })

    return result