コード例 #1
0
            '''
            for j in range(1, 4):
                movie['star'].append(star_list[j].findAll('td')[1].find('a').text.strip())
            #extract poster url
            movie['poster'] = movie_details.find('div', {'class' : 'poster'}).find('img')['src']
            #extrcat rating
            movie['rating'] = movie_details.find('span', {'itemprop' : 'ratingValue'}).text
            #extract release dates of each movie
            dates_url = movie_details.find('a', {'title' : 'See more release dates'})['href']
            dates_response = driver.request('GET', 'https://www.imdb.com/' + dates_url)
            dates_details = bs(dates_response.text, 'lxml')
            movie['date'] = {}
            dates_list = dates_details.findAll('tr', {'class': 'ipl-zebra-list__item release-date-item'})
            for date in dates_list:
                date_info = date.findAll('td')
                if(date_info[0].text.strip() not in movie['date'].keys()):
                    movie['date'][date_info[0].text.strip()] = date_info[1].text
            data['movie'].append(movie)

#Create scraper object and initialize with url
scraper = Scarper(imdb_url)
#send request to the giver url
scraper.load_url()
#get all links of each movie
scraper.get_movie_links()
#get details of each movie
scraper.get_movie_details()
with open('movies.json', 'w') as outfile:
    json.dump(data, outfile)
driver.close()
コード例 #2
0
ファイル: stream.py プロジェクト: bmsiegel/Band-Streaming
browser.get('https://band.us/band/{}/create-live'.format(JBS_BAND_ID))

time.sleep(5)
browser.find_element_by_xpath(
    '/html/body/div[1]/section/div/div[1]/div/div[2]/button[1]').click()
browser.find_element_by_xpath(
    '/html/body/div[1]/section/div/div[2]/div[3]/div[4]/div/button').click()

button = browser.find_element_by_xpath(
    '/html/body/div[1]/section/div/div[2]/div[3]/div[3]/div/button')
action = ActionChains(browser)
action.move_to_element(button)
action.perform()
time.sleep(2.5)

browser.find_element_by_xpath(
    '/html/body/div[1]/section/div/div[2]/div[3]/div[3]/div/button').click()
time.sleep(2.5)
browser.switch_to.alert.accept()

streamKey = clipboard.paste()
print('Stream Key: {}'.format(streamKey))

args = ['rtmp://global-rtmp.lip2.navercorp.com:8080/relay', streamKey]
args.insert(0, './stream.sh')

print(args)
subprocess.Popen(args, cwd=os.getcwd())

browser.close()
コード例 #3
0
ファイル: request_law.py プロジェクト: taozhijiang/dust_repos
	print(ri)

	pass

if __name__ == "__main__":
	url_prefix = "http://www.hshfy.sh.cn/shfy/gweb/flws_list_content.jsp"
	page_id = 1
	
	while True:
		print("Current page: %d" % page_id)
		url = url_prefix 
		
		webdriver = Firefox()
		response = webdriver.request('POST', url_prefix, data={'fydm':'200', 'ajlb':'%E6%B0%91%E4%BA%8B', 'pagesnum':'2'})
		r_read = response.text

		soup = BeautifulSoup(r_read)
		info_soup = soup.findAll('tr', attrs={"style":"cursor:hand"})
		if info_soup:
			print("LEN: %d" %(len(info_soup)))
			for item in info_soup:
				track_info(item)
		
		webdriver.close()
		page_id += 1
		if page_id > 234:
			break
	
	db_conn.close()
	print("Done!")
コード例 #4
0
class AlgebraixSession(object):
    """Launch an Algebraix session."""
    def __init__(self):
        """Initialise the session by opening the web browser."""
        self.browser = Firefox()
        self.browser.get("https://c1-summit.algebraix.com/")
        self.regex = re.compile(r"(.+\.\w{3,4}) \(\d+\.?\d+[KM]\)")

    def set_names(self):
        """Find and sets current message’s sender’s name."""
        self.names = [
            name.text for name in self.browser.find_elements_by_class_name(
                "material-card__text--primary")
        ]
        self.sender_name = self.names[0]

    def replace_sender_name(self, names):
        """
        Check if parent’s name can be substituted with student’s.

        Inputs: names, a dictionary of various data types.
        """
        for student, v in names.items():
            if self.sender_name in v[1]:
                self.sender_name = student

    def set_group(self, names):
        """
        Check student’s group if possible.

        Inputs: names, a dictionary of various data types.
        """
        self.group = names.get(self.sender_name, [""])[0]

    def set_bodies(self):
        """Find and set current message’s body text."""
        self.bodies = []
        for item in self.browser.find_elements_by_class_name(
                "material-card__body--paragraph." +
                "material-card__body--respect-lines.text-break"):
            self.bodies.append(item.text + "\n\n")
        self.bodies[-1] = self.bodies[-1][:-3]

    def set_dates(self):
        """Find and set date and time for each message."""
        self.dates = [
            date.text for date in self.browser.find_elements_by_class_name(
                "material-card__body--title-secondary")
        ]

    def set_attachments(self):
        """Set a list of attachments for current message."""
        self.attachments = [
            link for link in self.browser.find_elements_by_tag_name("a")
            if self.regex.search(link.text)
        ]

    def create_download_directory(self):
        """Create download directory for current sender."""
        self.targetPath = os.path.expanduser(
            os.path.join(
                "~", "Downloads", "AlgebraixInbox",
                f"{self.group}{self.sender_name.title().replace(' ', '')}"))
        os.makedirs(self.targetPath, exist_ok=True)

    def download_files(self):
        """Download and save current body text and attachments."""
        n = 1
        while os.path.isfile(os.path.join(self.targetPath, f"{n:02}.txt")):
            n += 1

        file = open(os.path.join(self.targetPath, f"{n:02}.txt"), "w")
        for name, date, body in zip(self.names, self.dates, self.bodies):
            file.write(name.title() + "\n" + date + "\n" + body)
        file.close()

        for link in self.attachments:
            res = self.browser.request("GET", link.get_attribute("href"))
            res.raise_for_status()
            file = open(
                os.path.join(
                    self.targetPath,
                    f"{n:02}_{self.regex.search(link.text).group(1)}"), "wb")
            for chunk in res.iter_content(10000):
                file.write(chunk)
            file.close()

    def find_next(self):
        """
        Find and returns the link to the next message.

        Returns False if it is the last message.

        Returns: a Selenium object or a bool.
        """
        links = self.browser.find_elements_by_class_name("action-item")
        for link in links:
            if link.get_attribute("data-original-title") == "Next":
                return link
        return False

    def browser_close(self):
        """Close the web browser."""
        self.browser.close()
コード例 #5
0
ファイル: sunny.py プロジェクト: rethore/kunst
class Sunny(object):
    def __init__(self, login, password):
        self.start_display()

        profile = webdriver.FirefoxProfile()
        profile.set_preference('browser.download.folderList', 2) # custom location
        profile.set_preference('browser.download.manager.showWhenStarting', False)
        profile.set_preference('browser.download.dir', current_dir)
        profile.set_preference('browser.helperApps.neverAsk.saveToDisk', "text/csv,application/vnd.ms-excel")
        #profile.set_preference('browser.helperApps.neverAsk.saveToDisk', "text/plain")

        self.driver = Firefox(profile)
        self.login(login, password)
        self._login = login
        self._password = password

    def start_display(self):
        self.display = Display(visible=0, size=(800, 600))
        self.display.start()

    def close(self):
        self.driver.close()
        self.display.stop()

    def login(self, login=None, password=None):
        """Login on the Sunny portal website using the credentials

        Parameters
        ----------

        login: str
            The login credential to sunnyportal

        password: str
            The password credential of sunnyportal
        """
        if not login:
            login = self._login
            password = self._password

        self.driver.get("https://www.sunnyportal.com/Templates/Start.aspx?ReturnUrl=%2f")
        self.driver.find_element_by_id("txtUserName").clear()
        self.driver.find_element_by_id("txtUserName").send_keys(login)
        self.driver.find_element_by_id("txtPassword").clear()
        self.driver.find_element_by_id("txtPassword").send_keys(password)
        self.driver.find_element_by_id("ctl00_ContentPlaceHolder1_Logincontrol1_LoginBtn").click()
        #time.sleep(0.5)

    def wait_n_get(self, element_type, value):
        """ Wait for an element to be present and get it

        Paramters
        ---------
        element_type: By.ID | By.LINK_TEXT...
            The type of value to identify the element to get
        value: str
            the value describing the element to get

        Returns
        -------
        el: element
            The driver element requested
        """
        return WebDriverWait(self.driver, TIME_DELAY).until(EC.presence_of_element_located((element_type, value)))

    def goto(self, n_house):
        """Go to the page of an house given it's number, from the plant list page

        Parameters
        ----------

        n_house: int
            The number of the house to go to
        """
        el = self.wait_n_get(By.LINK_TEXT, houses[n_house])
        el.click()


    def goto_2(self, n_house):
        """Go to a house from the plant pannel on the Dashboard page

        Parameters
        ----------

        n_house: int
            The number of the house to go to

        """
        self.wait_n_get(By.CLASS_NAME, 'plantselect').click()
        self.wait_n_get(By.LINK_TEXT, houses[n_house]).click()


    def hover_over(self, id):
        """Hover over an element of the page given its id

        Parameter
        ---------

        id: str
            The id of the element to hover over
        """
        el = self.wait_n_get(By.ID, id)
        hover = ActionChains(self.driver).move_to_element(el)
        hover.perform()

    def click(self, id):
        """Click on an element of the page given its id

        Parameter
        ---------

        id: str
            The id of the element to click on
        """
        el = self.wait_n_get(By.ID, id)
        el.click()

    def select_date(self, day, month, year):
        id_date =    'ctl00_ContentPlaceHolder1_UserControlShowDashboard1_UserControlShowEnergyAndPower1__datePicker_textBox'
        id_before =  'ctl00_ContentPlaceHolder1_UserControlShowDashboard1_UserControlShowEnergyAndPower1_btn_prev'
        id_after =   'ctl00_ContentPlaceHolder1_UserControlShowDashboard1_UserControlShowEnergyAndPower1_btn_next'
        try:
            el = self.wait_n_get(By.ID, id_date)
            self.driver.execute_script('$("#%s").val("%d/%d/%d")'%(id_date, month, day, year))
            sleep(0.2)
            self.click(id_before)
            sleep(0.2)
            self.click(id_after)
            sleep(0.2)
        except Exception as e:
            if "Element is not clickable at point" in str(e):
                print(e)
                print('trying again!')
                self.select_date(day, month, year)



    def download(self, day=None, month=None, year=None):
        """Download the CSV file
        """
        # Make sure we see the "Day" pannel
        tabactive = self.wait_n_get(By.CLASS_NAME, 'tabactive')
        if not tabactive.text == 'Day':
            self.click(id_day)

        # Select the right day
        if day:
            self.select_date(day, month, year)

        # Hover over the download button
        try:
            self.hover_over(id_hover)
            self.click(id_click)
        except Exception as e_1:
            # Check if the data is available for that day by looking for the info bubble
            try:
                el = self.wait_n_get(By.ID, id_info)
                if 'info.png' in el.get_attribute('src'):
                    print('no data available for this day')
                    return None
                else:
                    # Not sure what just happen there
                    raise(e_1)
            except Exception as e_2:
                if 'Unable to locate element' in str(e_2):
                    # The info icon isn't available
                    print(e_2)
                    raise(e_1)
                else:
                    # Not sure what just happen there
                    print(e_1)
                    print(e_2)
                    #raise (e1, e2)

        # Download the data for the day
        res = self.driver.request('GET', url_data_graph)
        if res.status_code == 200:
            print('sucess')
        else:
            raise Exception('Error:', res.text)
        return res


    def download_house(self, n, day=None, month=None, year=None):
        """ Download the house power production of the day
        Parameters
        ----------
        driver: WebDriver
            The WebDriver instance to action

        n_house: int
            The number of the house to go to

        Return
        ------
        df: pandas.DataFrame | None
            A dataframe containing the house day power production, or None if there isn't any data available
        """

        try:
            # Check what is the starting point
            if 'Start.aspx' in self.driver.current_url:
                # We are on the login screen, we first need to login
                print('-- login in main screen')
                self.login()
                print('-- accessing house', n)
                self.goto(n)
            elif 'sunnyportal.com/Plants' in self.driver.current_url:
                # We are on the plant list, lets
                self.goto(n)
            elif 'sunnyportal.com/FixedPages/Dashboard.aspx' in self.driver.current_url:
                # We are on a dashboard, so we should be able to click on the left hand pannel to go to the new house
                self.goto_2(n)
            else:
                # No idea where we are
                raise Exception('I dont know where we are:', self.driver.current_url)
            print('-- downloading house', n, 'power data')
            res = self.download(day, month, year)
            self.date = self.wait_n_get(By.ID, id_date).get_attribute('value')
            if day:
                if not self.date == "%d/%d/%d"%(month, day, year):
                    print('Error the date wasnt fixed correctly: '+self.date)

            if res:
                # There seems to be a positive response, so let's put it in a pandas dataframe
                df = pd.read_csv(StringIO(res.text), sep=';', names=['power', 'avg'], skiprows=1)
                print('-- download sucessful')
                return df
            else:
                print('-- download failed')
                # No response, we return a None object
                return res

        except Exception as e_1:
            # Something whent wrong
            try:
                # Check if sunny portal has banned us for some time
                text = self.wait_n_get(By.ID, 'ctl00_ContentPlaceHolder1_Logincontrol1_DivLogin').text
                if 'Login failed! Login will be blocked for' in text:
                    # It does seem like we have been banned for some time
                    print(text)
                    n_sec = int(text.split('for')[1].split(' seconds')[0])
                    print('going to sleep for %d sec'%(n_sec))
                    time.sleep(n_sec)
                    print('retrying this house')
                    return self.download_house(n, day, month, year)
            except Exception as e_2:
                # I don't know what went wrong
                print(e_1)
                print(e_2)
                raise(e_1)

    def img(self):
        """A simple screenshot function to show on the notebook"""
        return Image(self.driver.get_screenshot_as_png())

    def download_all(self, day=None, month=None, year=None):
        df_dict = {}
        for k, v in houses.items():
            print(k)
            df = self.download_house(k, day, month, year)
            if isinstance(df, pd.DataFrame):
                df_dict['House %d'%(k)] = df
        # Save the data into a DataFrame
        self.data = pd.DataFrame({k:v.power for k, v in df_dict.items() if isinstance(v, pd.DataFrame)}, index=df.index)

        # Save the data into a file
        m,d,y = self.date.split('/')
        self.data.to_csv('svalin_%s_%s_%s.csv'%(d,m,y))
        return self.data