Esempio n. 1
0
def main():

    # First, we are going to create a browser
    google_chrome_driver_path = '/home/hamid/Desktop/javad/research/VENV/JTest/chromedriver'
    google_chrome_object = Browser(google_chrome_driver_path)
    google_chrome = google_chrome_object.create_chrome_tab()

    # Then we are going to start our browser to crawl the webpage
    url_for_scrapping = 'https://marketplace.atlassian.com/search?category=Time%20tracking&product=jira'
    google_chrome.get(url_for_scrapping)

    # After that we are using the wait().until to make us sure that the webpage loads successfully
    try:

        wait_time = 5000
        element = WebDriverWait(google_chrome, wait_time).until(
            EC.presence_of_element_located(
                (By.XPATH,
                 "//h3[@class='sc-ghsgMZ giyMKw sc-kkbgRg hkWXqv']")))

        javad_spider = Jscrapper(google_chrome)

        # Calculate the number of click on "More Results" button
        number_per_page = 24
        search_result_count = javad_spider.search_result_count_finder()
        number_for_click_on_button = (search_result_count) // (number_per_page)

        for num in range(number_for_click_on_button):

            # Click on button for seeing all plugins
            javad_spider.more_result()

            # wait for loading the page successfully
            javad_spider.wait_for_loading()

        # Create CSV file
        javad_spider.csv_maker()

        # Start collecting all the plugins information
        plugins = javad_spider.plugin_finder()
        javad_spider.fetch_data(plugins)

    finally:
        google_chrome.quit()
Esempio n. 2
0
def main():
    # First, we are going to create a browser
    google_chrome_driver_path = '/home/hamid/Desktop/javad/research/VENV/Jscrapper/chromedriver'
    google_chrome_object = Browser(google_chrome_driver_path)
    google_chrome = google_chrome_object.create_chrome_tab()

    # Then we are going to start our browser to crawl the webpage
    url_for_scrapping = 'https://marketplace.atlassian.com/search?category=Time%20tracking&product=jira'
    google_chrome.get(url_for_scrapping)

    # After that we are using the wait().until to make us sure that the webpage loads successfully
    try:

        wait_time = 50
        element = WebDriverWait(google_chrome, wait_time).until(
            EC.presence_of_element_located(
                (By.XPATH,
                 "//h3[@class='sc-ghsgMZ giyMKw sc-kkbgRg hkWXqv']")))

        javad_spider = Jscrapper(google_chrome)

        # Calculate the number of click on "More Results" button
        number_per_page = 24
        search_result_count = javad_spider.search_result_count_finder()
        number_for_click_on_button = (search_result_count) // (number_per_page)

        for num in range(number_for_click_on_button):

            # Click on button for seeing all plugins
            javad_spider.more_result()

            # wait for loading the page successfully
            javad_spider.wait_for_loading()

        # Creating price miner object
        price_miner = PriceMiner(google_chrome)

        # Making file for output
        price_miner.csv_maker()

        plugins = price_miner.plugin_finder()
        href = price_miner.fetch_data(plugins)

        services = {
            'cloud': "//*[@class='pup-pricing-block-amount']",
            'server': "//*[@class='amount']"
        }

        for key in href:

            href[key] = href[key].replace('overview', 'pricing')
            google_chrome.get(href[key])
            google_chrome.implicitly_wait(8)
            active_installs = google_chrome.find_elements_by_xpath(
                "//*[@class='plugin-active-installs-total']")

            if len(active_installs) != 0:
                active_installs = float(active_installs[0].text.replace(
                    ',', ''))
            else:
                active_installs = 0

            print(href[key])
            service_type = href[key][href[key].find('?'):href[key].
                                     find('&')].split('=')[1]
            is_finished = False
            line = [key, active_installs]

            while not is_finished:

                if service_type == 'cloud':
                    print('im in cloud')
                    google_chrome.get(href[key])
                    google_chrome.implicitly_wait(8)

                    # Check is it a free app or not
                    is_free = google_chrome.find_elements_by_xpath(
                        "//*[@class='free-addon-text']")
                    if len(is_free) == 0:
                        cloud_prices = google_chrome.find_elements_by_xpath(
                            services[service_type])

                        for price in cloud_prices:
                            price = price.text.replace(',', '')
                            price = float(price.split('$')[1])
                            line.append(price)

                        # Check server service
                        href[key] = href[key].replace('cloud', 'server')
                        google_chrome.get(href[key])
                        google_chrome.implicitly_wait(8)

                        # Check server prices
                        server_prices = google_chrome.find_elements_by_xpath(
                            services['server'])
                        if len(server_prices) != 0:
                            i = 1
                            for price in server_prices:
                                if i <= 5:
                                    print(price.text, i)
                                    price = price.text.replace(',', '')
                                    price = float(price.split('$')[1])
                                    line.append(price)
                                    i += 1

                if service_type == 'server':
                    print('im in server')
                    is_free = google_chrome.find_elements_by_xpath(
                        "//*[@class='free-addon-text']")
                    if len(is_free) == 0:
                        server_prices = google_chrome.find_elements_by_xpath(
                            services[service_type])
                        for price in server_prices:
                            price = price.text.replace(',', '')
                            price = float(price.split('$')[1])
                            line.append(price)

                # write data in file
                with open('price.csv', 'a') as out_csv:
                    writer = csv.writer(out_csv)
                    writer.writerow(line)

                is_finished = True
        google_chrome.quit()

    finally:
        print('Done!!')