Beispiel #1
0
    def scrap_item_details(self, current_win_handle):
        logger.info('Scraping item details')
        all_win_handles = self.driver.window_handles
        try:
            for win_handle in all_win_handles:
                if win_handle != current_win_handle:
                    logger.info('switching window handle')
                    self.driver.switch_to.window(win_handle)
                    sleep(7)
                    logger.info('collecting item details')
                    name = ''
                    phone = ''
                    street = ''
                    locality = ''
                    country = ''
                    email = ''

                    try:
                        name = WebDriverWait(
                            self.driver,
                            10).until(lambda x: x.find_element_by_css_selector(
                                'h1#HEADING.heading_title'))
                        name = name.text
                        name = name.encode('latin-1')
                    except Exception, e:
                        pass

                    try:
                        phone = WebDriverWait(
                            self.driver, 10
                        ).until(lambda x: x.find_element_by_css_selector(
                            'div.phone.directContactInfo span:nth-child(2)'))
                        phone = phone.text
                        phone = phone.encode('latin-1')
                    except Exception, e:
                        pass

                    try:
                        email = WebDriverWait(
                            self.driver, 10
                        ).until(lambda x: x.find_element_by_css_selector(
                            'div.details_tab div.additional_info div.content ul.detailsContent li a'
                        ))
                        email = email.get_attribute("href").encode(
                            'utf-8').replace('mailto:', '')
                        email = email.encode('latin-1')
                    except Exception, e:
                        pass

                    try:
                        street = WebDriverWait(self.driver, 10).until(
                            lambda x: x.find_elements_by_css_selector(
                                'div.address span.street-address'))
                        street = street[0].text.encode('latin-1')
                    except Exception, e:
                        pass
Beispiel #2
0
def Results_Search(driver):
    namelist = []
    nameUrlist = []
    try:
        element = WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located(
                (By.XPATH, '//td[@class="summary_data"]//a')))
        title_list = driver.find_elements_by_xpath(
            '//td[@class="summary_data"]//a')
    except Exception as e:
        logger.error('連結抓取錯誤')

    try:
        element = WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located(
                (By.CLASS_NAME, 'authorSetsNum')))
        authorSetsNums = driver.find_elements_by_class_name('authorSetsNum')
    except Exception as e:
        logger.error('編號抓取錯誤')

    for index, title in enumerate(title_list):
        nameUrlist.append('http://www.researcherid.com/rid/' +
                          title.get_attribute('title'))
        namelist.append(authorSetsNums[index].text + " : " + title.text)

    for index, element in enumerate(namelist):
        logger.info(
            '--------------------------------------------------------------------------'
        )
        logger.info("NAME : " +
                    element.encode("utf8").decode("cp950", "ignore"))
        try:
            #if index > 17 :
            script = "window.open('" + nameUrlist[index] + "', 'new_window')"
            logger.info(script)
            driver.execute_script(script)
            driver.switch_to_window(driver.window_handles[1])
            Work_Search(nameUrlist[index], driver)
            time.sleep(3)
            #driver.implicitly_wait(5)
            driver.switch_to_window(driver.window_handles[0])
        except Exception as e:
            driver.switch_to_window(driver.window_handles[0])
            logger.info("NAME : " + element + ' Crawl Failed ' + str(e))
            traceback = sys.exc_info()[2]
            logger.error(sys.exc_info())
            logger.error(traceback.tb_lineno)
            logger.error(e)
            continue
        logger.info("NAME : " +
                    element.encode("utf8").decode("cp950", "ignore") +
                    ' Crawl Over')
        logger.info(
            '--------------------------------------------------------------------------'
        )
    def test_platform(self):
        advertise = ThatAdvertise(self.driver)
        advertise.set_product()

        platform = Platform(self.driver)
        platform.set_platform()

        create_ads = CreateAds(self.driver)
        create_ads.set_title('test')
        create_ads.set_text('test')
        create_ads.set_image_small(self.IMAGE_SMALL)
        create_ads.set_image_big(self.IMAGE_BIG)
        create_ads.set_link('http://www.odnoklassniki.ru/event/ID')
        create_ads.add()

        create = CreateCompany(self.driver)
        create.click()

        edit = Edit(self.driver)
        edit.click_edit()

        platform_result = WebDriverWait(self.driver, 30, 0.1).until(
            lambda d: d.find_element_by_css_selector('.base-setting__pads-item__label').text
        )

        self.assertEquals('Одноклассники: мобильная версия', platform_result.encode('utf-8'))
Beispiel #4
0
    def add_data_to_list(driver):
        temp = []
        sub_cat = ''
        Category = ''
        #complaint_number
        X = driver.find_element_by_xpath(
            "//div[@id='content']//form/table[1]/tbody/tr[1]/td[2]/b").text
        complaint_number = X.encode('utf-8').strip()

        #Date of Application
        X = driver.find_element_by_xpath(
            "//div[@id='content']//form/table[1]/tbody/tr[1]/td[4]").text
        Date_of_Application = X.encode('utf-8').strip()
        #hindi prbdescription: /html/body/div/form/table[1]/tbody/tr[2]/td[2]
        #hindi complainaint_name: /html/body/div/form/table[1]/tbody/tr[2]/td[1]

        #English prbdescription: /html/body/div/form/table[1]/tbody/tr[3]/td[2]
        #English complainaint_name: /html/body/div/form/table[1]/tbody/tr[2]/td[2]

        try:
            #Complaint_description
            X = driver.find_element_by_xpath(
                "//div[@id='content']//form/table[1]/tbody/tr[3]/td[2]").text
            Complaint_description = X.encode('utf-8').strip()

        except NoSuchElementException:
            #Complaint_description
            X = driver.find_element_by_xpath(
                "//div[@id='content']//form/table[1]/tbody/tr[2]/td[2]").text
            Complaint_description = X.encode('utf-8').strip()

        #Department_name
        X = driver.find_element_by_xpath(
            "//div[@id='content']//form/table[2]/tbody/tr[2]/td[2]").text
        Department_name = X.encode('utf-8').strip()

        #Status
        X = driver.find_element_by_xpath(
            "//div[@id='content']//form/table[2]/tbody/tr[3]/td[2]").text
        Status = X.encode('utf-8').strip()

        temp.append(complaint_number)
        temp.append(Date_of_Application)
        temp.append(Complaint_description)
        temp.append(Department_name)
        temp.append(Status)

        #Marathi complaint
        try:
            Print = driver.find_element_by_id('PRINT')
            driver.execute_script(
                "window.scrollTo(0, document.body.scrollHeight);")
            hover = ActionChains(driver).move_to_element(Print)
            hover.click(Print).perform()
            driver.implicitly_wait(5)

            #complaint number
            c_n = driver.find_element_by_xpath(
                "/html/body/form/div/table[1]/tbody/tr[3]/td[2]/b").text

            #Category
            X = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located(
                    (By.XPATH,
                     "//div[@id='content']//table[1]/tbody/tr[4]/td[2]"))).text
            Category = X.encode('utf-8').strip()

            #sub Category
            X = driver.find_element_by_xpath(
                "//div[@id='content']//table[1]/tbody/tr[5]/td[2]").text
            sub_cat = X.encode('utf-8').strip()

            #location of complaint
            X = driver.find_element_by_xpath(
                "//div[@id='content']//table[5]/tbody/tr[2]/td[2]").text
            house_name = X.encode('utf-8').strip()  #1

            Y = driver.find_element_by_xpath(
                "//div[@id='content']//table[5]/tbody/tr[2]/td[4]").text
            house_no = Y.encode('utf-8').strip()  #2

            Y = driver.find_element_by_xpath(
                "//div[@id='content']//table[5]/tbody/tr[3]/td[2]").text
            Street1 = Y.encode('utf-8').strip()  #3

            Y = driver.find_element_by_xpath(
                "//div[@id='content']//table[5]/tbody/tr[3]/td[4]").text
            Street2 = Y.encode('utf-8').strip()  #4

            Y = driver.find_element_by_xpath(
                "//div[@id='content']//table[5]/tbody/tr[4]/td[2]").text
            Area1 = Y.encode('utf-8').strip()  #5

            Y = driver.find_element_by_xpath(
                "//div[@id='content']//table[5]/tbody/tr[4]/td[4]").text
            Area2 = Y.encode('utf-8').strip()  #6

            Y = driver.find_element_by_xpath(
                "//div[@id='content']//table[5]/tbody/tr[5]/td[2]").text
            city = Y.encode('utf-8').strip()  #7

            Y = driver.find_element_by_xpath(
                "//div[@id='content']//table[5]/tbody/tr[5]/td[4]").text
            Pincode = Y.encode('utf-8').strip()  #8

            #Address of  complaint
            Y = driver.find_element_by_xpath(
                "/html/body/form/div/table[5]/tbody/tr[10]/td[2]").text
            X_House_no = Y.encode('utf-8').strip()

            Y = driver.find_element_by_xpath(
                "/html/body/form/div/table[5]/tbody/tr[10]/td[4]").text
            X_House_name = Y.encode('utf-8').strip()

            Y = driver.find_element_by_xpath(
                "/html/body/form/div/table[5]/tbody/tr[11]/td[2]").text
            X_street1 = Y.encode('utf-8').strip()

            Y = driver.find_element_by_xpath(
                "/html/body/form/div/table[5]/tbody/tr[11]/td[4]").text
            X_street2 = Y.encode('utf-8').strip()

            Y = driver.find_element_by_xpath(
                "/html/body/form/div/table[5]/tbody/tr[12]/td[2]").text
            X_area1 = Y.encode('utf-8').strip()

            Y = driver.find_element_by_xpath(
                "/html/body/form/div/table[5]/tbody/tr[12]/td[4]").text
            X_area2 = Y.encode('utf-8').strip()

            Y = driver.find_element_by_xpath(
                "/html/body/form/div/table[5]/tbody/tr[13]/td[2]").text
            X_city = Y.encode('utf-8').strip()

            X_pincode = ' '

            Y = driver.find_element_by_xpath(
                "/html/body/form/div/table[5]/tbody/tr[14]/td[2]").text
            X_state = Y.encode('utf-8').strip()

            Y = driver.find_element_by_xpath(
                "/html/body/form/div/table[5]/tbody/tr[14]/td[4]").text
            X_country = Y.encode('utf-8').strip()
            temp.insert(2, '######')
            temp.insert(3, Category)
            temp.insert(4, sub_cat)
            #no landmark in Marathi complain
            temp.insert(5, ' ')

            loc = house_name + ',' + house_no + ',' + Street1 + ',' + Street2 + ',' + Area1 + ',' + Area2 + ',' + city + ',' + Pincode
            add_ = X_House_no + ',' + X_House_name + ',' + X_street1 + ',' + X_street2 + ',' + X_area1 + ',' + X_area2 + ',' + X_city + ',' + X_pincode + ',' + X_state + ',' + X_country

            temp.append(loc)
            temp.append(add_)

            driver.switch_to.default_content()
            return temp, 1, Date_of_Application

        #English Complaints
        except:
            #switching to appropriate frame
            x1 = driver.find_element_by_xpath(
                "//div[@id='content']//iframe[2]")
            driver.switch_to.frame(x1)

            #sub-category
            x = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located(
                    (By.XPATH, "//div[@id='viewer']/div/div[2]/div[11]")))
            content = x.get_attribute('innerHTML')
            Val = content.strip(':')
            #Val = Val.encode('utf-8').strip(':')
            sub_cat = Val[2:]

            x = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located(
                    (By.XPATH, "//div[@id='viewer']/div/div[2]/div[12]")))
            content = x.get_attribute('innerHTML')
            Y = content.encode('utf-8').strip()

            if Y != 'Description':
                sub_cat = sub_cat + ' ' + Y

            #22-85, fetching arbitary location to perform linear search on it
            location = list()
            for i in range(22, 85):
                try:
                    x = WebDriverWait(driver, 5).until(
                        EC.presence_of_element_located(
                            (By.XPATH, "//div[@id='viewer']/div/div[2]/div[" +
                             str(i) + "]")))
                    content = x.get_attribute('innerHTML')
                    location.append(content.encode('utf-8').strip(' '))
                except:
                    break

            #location of complaint
            i = location.index('House  Name')
            n = location.index('Pincode')
            label1 = [
                'House  Name', 'House  No.', 'Street1', 'Street2', 'Area1',
                'Area2', 'City', 'Pincode'
            ]

            List1 = location[i:n + 2]
            for i in label1:
                try:
                    List1.remove(i)
                except:
                    continue

            Landmark = ' '
            try:
                i = location.index('Landmark')
                loc = location[i + 1].strip(':')
                Landmark = loc[2:]
            except:
                Landmark = ' '

            #Address of Applicant
            i = location.index('Address  of  Applicant  :')
            n = location.index('Telephone(O)')
            label2 = [
                'House  No', 'House  Name', 'Street1', 'Street2', 'Area1',
                'Area  2', 'City', 'Pin  Code', 'State', 'Country'
            ]

            List2 = location[i + 1:n]

            for i in label2:
                try:
                    List2.remove(i)
                except:
                    continue

            date = '######'
            try:
                date_end_index = location.index('Responsible  :')
                if date_end_index != -1:
                    date_ = location[date_end_index + 1]
                    date_i = date_.index('Date')
                    date = date_[date_i + 7:]

                else:
                    date = '######'
            except:
                date = '######'

            temp.insert(2, date)
            temp.insert(3, Category)
            temp.insert(4, sub_cat)
            temp.insert(5, Landmark)

            loc = ','.join(List1)
            add_ = ','.join(List2)

            temp.append(loc)
            temp.append(add_)

            return temp, 0, Date_of_Application