def StartScraping(self): # Login to amazon account username = self.txtusername.text() password = self.txtpassword.text() web = Browser() web.go_to('https://affiliate-program.amazon.com/home/promohub/promocodes?ac-ms-src=nav&type=mpc&active_date_range=0') web.type(username , into='Username' , id='ap_email') web.type(password , into='Password' , id='ap_password') web.click(classname='a-button-inner') # you are logged in . oohoooo time.sleep(60) #end login # Reading CSV file # Open the input_file in read mode and output_file in write mode with open('product-data-input.csv', 'r') as read_obj, \ open('product-data-output.csv', 'w', newline='') as write_obj: # Create a csv.reader object from the input file object csv_reader = reader(read_obj) # Create a csv.writer object from the output file object csv_writer = writer(write_obj) line_count = 0 # Read each row of the input csv file as list for row in csv_reader: if line_count == 0: row.append("Full URL") csv_writer.writerow(row) line_count += 1 # Append the default text in the row / list else: self.lblcurrent.setText(str(line_count)) web.go_to(row[4]) # time.sleep(5) content = web.get_page_source() soup = BeautifulSoup(content) samples = soup.find_all("a", {"class":"a-size-base-plus a-link-normal titleLink"}) link = "aasdf" for tag in samples: link = "https://www.amazon.com"+tag['href'] web.go_to(link) break time.sleep(5) web.click( "Text", 'a') time.sleep(3) web.click( "Full Link", 'span') content = web.get_page_source() soup = BeautifulSoup(content) samples = soup.find_all("textarea", {"class":"amzn-ss-text-fulllink-textarea"}) for textareas in samples: row.append(textareas.text) # Add the updated row / list to the output file csv_writer.writerow(row) line_count += 1
def get_profile(user_data, out: str): """ Gets user profile from ebird. Users webbot to login to ebird.org and searches for user profile address on sample checklist entry for user. :param checklist_url: path to a checklist url, extracted from users dataframe column :param out: path to output text file :return: None """ user_name = user_data[1].split('/')[0].strip() checklist_url = user_data[0] web = Browser(showWindow=False) web.go_to("https://secure.birds.cornell.edu/cassso/login") web.type('birds_of_a_feather', into='Username') web.type('y&2m#9B3B2NGGzp', into='Password') web.press(web.Key.ENTER) web.go_to(checklist_url) source = web.get_page_source() soup = BeautifulSoup(source, 'lxml') try: url = soup.find('a', {'title': f'Profile page for {user_name}'})['href'] profile = f"https://ebird.org/{url}" with open(out, 'a') as src: src.write(f"{checklist_url}_{profile}\n") except TypeError: with open(out, 'a') as src: src.write(f"{checklist_url}_None\n")
def getPage(): global queries global soup print("Scarico la pagina") web = Browser() web.go_to(url_login) web.type(email, into='E-mail') web.type(password, into='Password') web.click('ACCEDI', classname='submit_access') time.sleep(delay) web.click('OFFERTE DI LAVORO') time.sleep(delay) page = web.get_page_source() web.close_current_tab() soup = BeautifulSoup(str(page), 'html.parser') print("Cerco il box degli annunci") soup = soup.find("div", id="js-grid-blog-posts") print("Inzio a filtrare i risultati per regione") global regioni_cercate for reg in regioni_cercate: print("Filtro per: " + reg) filter(soup, reg) print("Ho concluso l'esecuzione")
def run(self): """@brief A thread that reads stats from the LB2120 4G modem""" web = Browser() web.go_to('http://{}/index.html'.format(self._options.address)) web.click(id='session_password') web.type(self._password) #web.type('QtV6Dq4s') web.click('Sign In') web.click(id='session_password') startTime = time() while True: web.go_to("http://{}/index.html#settings/network/status".format( self._options.address)) content = web.get_page_source() now = time() elapsedTime = now - startTime startTime = now self._pollSeconds = elapsedTime soup = BeautifulSoup(content, 'html.parser') item = soup.body.find( 'dd', attrs={'class': 'm_wwan_signalStrength_rsrp'}) self._lock.acquire() self._rxp = float(item.string) self._uio.debug("4G RXP (dBm): {}".format(self._rxp)) self._lock.release() sleep(LB2120.POLL_DELAY_SECONDS)
def webot(user, passwd): web = Browser(False) web.go_to("https://midas.unioeste.br/login/#/") time.sleep(5) web.type(user, id="login-username") web.type(passwd, id="login-password") web.press(web.Key.ENTER) time.sleep(5) web.click('Academus') time.sleep(5) web.click('Matrículas') time.sleep(3) data = web.get_page_source() web.close_current_tab() return data
def getJapaTalkCalendar(userName, password, useCache=False): if useCache and os.path.exists(CACHED_FILE_FN): with io.open(CACHED_FILE_FN, 'r', encoding="utf-8") as fd: pageSource = fd.read() else: web = Browser() web.go_to('https://www.japatalk.com/login_form.php') web.click(id="wID") web.type(userName) web.click(id="wPasswd") web.type(password) web.click(classname="btn-next") #web.click(classname="from-cal") web.go_to('https://www.japatalk.com/reservation_calendar.php') pageSource = web.get_page_source() if useCache: with io.open(CACHED_FILE_FN, 'w', encoding="utf-8") as fd: fd.write(pageSource) return pageSource
def download_file_rj(music_or_video, file_type, regex_file, ch_actions): context.bot.send_message(chat_id=chat_id, text="کمی صبر کنید...") if res != "inv": web = Browser() web.go_to(url) s = web.get_page_source() web.close_current_tab() soup = BeautifulSoup(s, 'html.parser') # finde mp3 link file_name = str(re.findall(fr"{regex_file}", str(soup))) file_name = file_name.replace("['", "") file_name = file_name.replace("']", "") file_url = f"https://host2.rj-mw1.com/{file_type}{file_name}.mp{music_or_video}" req = urllib.request.Request(file_url) with urllib.request.urlopen(req) as response: the_file_url_page = str(response.read()) if the_file_url_page != "b'Not found'": wget.download(file_url, f'{file_name}.mp{music_or_video}') else: try: os.remove(f"{file_name}.mp{music_or_video}") except: pass file_url = f"https://host1.rj-mw1.com/{file_type}{file_name}.mp{music_or_video}" wget.download(file_url, f'{file_name}.mp{music_or_video}') file_caption = str(file_name) #name fixed file_caption = file_caption.replace("-"," ") if str(file_name) == "[]": context.bot.send_chat_action(chat_id, ChatAction.TYPING) context.bot.send_message(chat_id=chat_id, text="لینک اشتباه است. \n\n لطفا لینک آهنگ یا موزیک ویدیوی مورد نظر را از رادیو جوان بفرستید.") else: if ch_actions == "music": context.bot.send_chat_action(chat_id, ChatAction.UPLOAD_AUDIO) context.bot.send_audio(chat_id=chat_id, audio=open(f"./{file_name}.mp{music_or_video}", "rb"), caption=f"{file_caption}") elif ch_actions == "video": context.bot.send_chat_action(chat_id, ChatAction.UPLOAD_VIDEO) context.bot.send_video(chat_id=chat_id, video=open(f"./{file_name}.mp{music_or_video}", "rb"), caption=f"{file_caption}") if os.path.exists(f"{file_name}.mp{music_or_video}"): os.remove(f"{file_name}.mp{music_or_video}")
user=input("user ") passw=input("password ") from webbot import Browser web=Browser() link="https://lz95.instructure.com/login/canvas" id="pseudonym_session_password" web.go_to(link) web.type(user) web.click(id=id) web.type(passw) web.press(web.Key.ENTER) sauce=web.get_page_source() print(sauce.count("Course card color region")) l=sauce.split("Course card color region") n=sauce.count("Course card color region") links=[] for a in range(n): links.append(l[a+1].split('href="')[1].split('"')[0]) s="https://lz95.instructure.com/" print(links) for link in links: site=s+link web.go_to(site) web.quit() import time print("finished") time.sleep(1)
web.go_to(target) id = 'sc-alm-buy-box-ptc-button-VUZHIFdob2xlIEZvb2Rz' web.click(id=id, tag='input') #id = 'a-autoid-0' #web.click(id=id,tag='input') pyautogui.click(1007, 257) #print(pyautogui.position()) id = 'subsContinueButton' web.click(id=id, tag='input') time.sleep(15.0) html = web.get_page_source() findSpot = False while not findSpot: #if 'No delivery' in html: if not available(html): print('Not Available yet') pyautogui.click(154, 77) time.sleep(15.0) html = web.get_page_source() else: client = boto3.client('sns') msg = "A slot is available, go get it" pn = "+1" client.publish(PhoneNumber=pn, Message=msg) print(msg)
async def update_database(): my_file = 'classes.csv' # check if file exists if os.path.exists(my_file): os.remove(my_file) # Print the statement once the file is deleted print(f'An existing {my_file} has been found. The file: {my_file} will be deleted.') else: print(f'The file: {my_file} does not exist. Creating file now...') web = Browser() web.go_to('http://aisis.ateneo.edu/') web.click('click here') mirror = web.get_current_url().split('j_aisis') # enters credentials into the site ID_NUM = os.getenv('ID_NUM') PASSWORD = os.getenv('PASSWORD') web.type(ID_NUM, into='userName') web.type(PASSWORD, into='password') web.click('Sign in') # successfully signs into AISIS # web.click('CLASS SCHEDULE') web.go_to(f'{mirror[0]}j_aisis/J_VCSC.do') # html parsing page_soup = soup(web.get_page_source(), "html.parser") filename = 'subjects.csv' f = open(filename, 'w') headers = 'subject_code,section,course_title,units,time,room,instructor,max_no,lang,level,free_slots,remarks,s,p\n' f.write(headers) # grabs each product departments = page_soup.findAll( lambda t: t.name == 'option' and t.parent.attrs.get('name') == 'deptCode' ) subjects = [] subject_info = [] i = 1 for dept in departments: web.click('Display Class Schedule') page_soup = soup(web.get_page_source(), "html.parser") raw_data = page_soup.findAll('td', {'class':'text02'}) # gets the info in the subjects table j = 0 # column counter for data in raw_data: if j == 14: j = 0 subjects.append(subject_info) subject_info = [] f.write('\n') f.write(data.text.replace(',', '|') + ',') subject_info.append(data.text) j += 1 f.write('\n') if i < len(departments): web.click(departments[i].text) i += 1 f.close() databasefile = 'subjects.csv' # rearranges the csv file for more optimal searching db = pd.read_csv(databasefile, skip_blank_lines=True, names=['subject_code', 'section', 'course_title', 'units', 'time', 'room', 'instructor', 'max_no', 'lang', 'level', 'free_slots', 'remarks', 's', 'p']) # removes space in subject code db['subject_code'] = db['subject_code'].apply(remove_space) db.sort_values(by=['subject_code'], ascending=True).to_csv(databasefile, index=False)
from webbot import Browser import re web = Browser() num = str(16044 / 2) url = 'http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=' web.go_to(url + num) patter = re.compile('and the next nothing is sfdsg (\d+)') times = 0 while True: tmp = web.get_page_source() match = patter.search(tmp) new = match.group(1) if match == None: break web.go_to(url + new)
import smtplib import requests from webbot import Browser from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart web = Browser() # open a new window web.go_to('https://www.tucan.tu-darmstadt.de') # go to the url web.type('your username', into='usrname', id='field_user') # enter username web.click('NEXT', tag='span') # tab to go to password field web.type('your password', into='pass', id='field_pass') # enter password web.click('Anmelden') # login web.click('Prüfungen') # go to the tab Prüfungen web.click('Leistungsspiegel') # go to the tab Leistungsspiegel htmlsource = web.get_page_source() # Download the page source myList = [] # a list to add elements cmpList = [ ] # a list to compare the above list with in order to check for changes available = False with open('tucan.txt') as file: # save a .txt file first and then read it data = file.read() soup = bs4.BeautifulSoup( htmlsource, features="lxml") # get all the content from htmlsource everything = soup.find_all( "td" ) # find all tags with td and return. td contains all the information from the Leistungsspiegel
def extractData(url): try: print ("TIMESTAMP", url[28:-43]) web = Browser() web.go_to(url) time.sleep(7) data = web.get_page_source() soup = BeautifulSoup(data, features="lxml") year = soup.find("li", class_="ranking").a.contents print ("YEAR", year[0][:4]) table = soup.select("#list-table-body") scroll_counter = 8 while len(table[0].findAll('tr')) < 30: web.scrolly(1000) time.sleep(2) data = web.get_page_source() soup = BeautifulSoup(data, features="lxml") table = soup.select("#list-table-body") scroll_counter-= 1 if scroll_counter == 0: web.quit() return [] web.quit() rows = table[0].findAll('tr') ret_list = [] for r in rows: cols = r.findAll('td') if 'ad' in cols[0].get("class"): continue else: row_data = {} row_data['year'] = year[0][:4] row_data['timestamp'] = url[28:-43] #current rank row_data['rank'] = cols[1].contents[0][1:] #teamname row_data['name'] = cols[2].a.contents[0] #current value row_data['val'] = clean_numbers(cols[3].contents[0]) #one year value change percentage row_data['oneyear'] = clean_percentage(cols[4].contents[0]) #debt value row_data['debt'] = clean_percentage(cols[5].contents[0]) #revenue row_data['revenue'] = clean_numbers(cols[6].contents[0]) #income row_data['income'] = clean_numbers(cols[7].contents[0]) ret_list.append(row_data) return ret_list except: web.quit() return []
from bs4 import BeautifulSoup import requests from webbot import Browser debug = True quoteString = 'XOM' webLink = 'https://finance.yahoo.com/quote/' + quoteString + '/sustainability' browser = Browser(debug) browser.go_to(webLink) source = browser.get_page_source() #source = requests.get(webLink).text soup = BeautifulSoup(source, features="html.parser") print(soup.prettify()) sus = soup.find_all("div", {"data-reactid":'20'}) print(sus)
from urllib import request import time import re import os from bs4 import BeautifulSoup, Tag, CData from webbot import Browser while True: web = Browser() web.go_to("https://my.unsw.edu.au/active/studentClassEnrol/courses.xml") web.click('Sign on') web.type('z5151465', into='USER ID') web.type("Sword450869241_", into='Password', id='passwordFieldId') web.click('Agree and sign on') web.click('Sign on') web.click('My Student Profile') web.click('Update Your Enrolment') web.click('Update Enrolment') web.click('Term 2 2019') content = web.get_page_source() if "Course GSOE9340 is full." in content: os.system('say "WOW WOW WOW."') web.quit() time.sleep(5)
print('Enter your sti password:'******'password:'******'a'): if "/ICS/Academics/CIS" in link.get('href'): coursework_pages.append(link.get('href')) #first attempt at getting getting assignments for page in coursework_pages: web.go_to(root_url + page + "Coursework.jnz") soup = BeautifulSoup(web.get_page_source(), "html5lib") for data in soup.select('.sidebar-link-title a'): print(data.text) print(soup.find(id='pg0_V__dueNext__rptDueNext_ctl00__hypAssign')) print(soup.find(id='pg0_V__dueNext__rptDueNext_ctl01__hypAssign')) #pyforms web (yesssssssssssss)
#tao se la come doblada # - - - - - - - - - - Set-up - - - - - - - - - - time_start = time.time() time_load_search = 3 time_load_article = 2 date_today = dt.date.today() # data frame & browser db = pd.DataFrame( columns=["anime_title", "anime_link", "anime_op_list", "anime_ed_list"]) link_base = 'https://myanimelist.net/topanime.php?type=airing' # - - - - - - - - - - Webbot Navigation - - - - - - - - - - web = Browser() web.go_to(link_base) time.sleep(time_load_search) content = web.get_page_source() #Saca la código fuente de la página soup = BeautifulSoup( content, 'html.parser') #Acá le defino a soup que lo que estoy viendo es un HTML ranking_lists = soup.find_all( "tr", {"class": "ranking-list"} ) #el "tr" y "class" lo saco de la pagina web y acá le digo que buscar. Para este caso lo que buscabamos era eso for anime in ranking_lists: # title, link anime_title = anime.find("div", {"class": "di-ib"}).getText() anime_link = anime.find("div", { "class": "di-ib" }).find("a")["href"] #href siempre es la fuente del link # - - - anime page
repo = g.get_repo(repository) prTitle = repo.get_pull(int(prNum)) fullPath = 'https://github.com/' + repository + '/pull/' + prNum web = Browser() #web.driver.set_window_position(-10000, 0) web.go_to('https://github.com') web.click('Sign in') web.type(user, into='Username or email address') web.click('NEXT', tag='span') web.type(password, into='Password') web.click('Sign in', tag='span') print(prNum) web.go_to(fullPath) a = web.get_page_source() b = str(a) if b.find("Subscribe to our newsletter") != -1: # this means it's a 404 # theoretically this could change I guess notify(message='PR %s does not exist' % prNum) sys.exit(0) while True: web.go_to(fullPath) a = web.get_page_source() b = str(a) web.driver.minimize_window() if b.find("Some checks haven’t completed yet") != -1:
def run(self): """@brief A thread that reads stats from the LB2120 4G modem""" web = Browser() web.go_to('http://{}/index.html'.format(self._options.address)) web.click(id='session_password') web.type(self._password) #web.type('QtV6Dq4s') web.click('Sign In') web.click(id='session_password') startTime = time() lastTotalData = -1 lastDataRX = -1 lastDataTX = -1 self.running = True while self.running: try: web.go_to( "http://{}/api/model.json?internalapi=1&x=11228".format( self._options.address)) content = web.get_page_source() now = time() elapsedTime = now - startTime startTime = now #Remove html text from the response jsonContent = content.replace( "<html xmlns=\"http://www.w3.org/1999/xhtml\"><head></head><body><pre style=\"word-wrap: break-word; white-space: pre-wrap;\">", "") jsonContent = jsonContent.replace("</pre></body></html>", "") #Convert json text to a dict data = json.loads(jsonContent) #Grab the values associated with throughput dataRX = int(data['wwan']['dataTransferredRx']) dataTX = int(data['wwan']['dataTransferredTx']) tempC = float(data['general']['devTemperature']) devTempCritical = data['power']['deviceTempCritical'] if lastDataRX != -1: if dataRX < lastDataRX: print("<<<<<<<<<< dataRX: {} < {}".format( dataRX, lastDataRX)) if dataTX < lastDataTX: print("<<<<<<<<<< dataTX: {} < {}".format( dataTX, lastDataTX)) deltaDataRX = dataRX - lastDataRX deltaDataTX = dataTX - lastDataTX downLoadBps = (deltaDataRX / elapsedTime) * 8 upLoadBps = (deltaDataTX / elapsedTime) * 8 downLoadMBps = float(downLoadBps) / 1E6 upLoadMBps = float(upLoadBps / 1E6) lb2120Stats = LB2120Stats() lb2120Stats.downMbps = downLoadMBps lb2120Stats.upMbps = upLoadMBps lb2120Stats.tempC = tempC lb2120Stats.tempCrticial = devTempCritical lb2120Stats.sampleTime = datetime.datetime.now() self._queue.put(lb2120Stats) #Save the last results for use next time around lastDataRX = dataRX lastDataTX = dataTX except: lines = traceback.format_exc().split('\n') for l in lines: self._uio.error(l) sleep(self._options.psec)
import csv # credentials: they are not real credentials as this code is to be published. username = "******" password = "******" # log into admin page web = Browser() web.go_to('https://www.zumostraining.co.uk/Login.aspx') web.type(username) web.click(id='ContentPlaceHolder1_txtPassword') web.type(password) web.press(web.Key.ENTER) # class="btn btn-primary btn-block top-margin-sm text-left" # navigate to titles page, create soup and select values web.go_to('https://www.zumostraining.co.uk/Zumos_Admin/Titles.aspx') soup = BeautifulSoup(web.get_page_source(), 'lxml') values = soup.find_all( 'input', {'class': 'btn btn-primary btn-block top-margin-sm text-left'}) # print(values) # test - success links = ['nothing'] * len(values) i = 0 print("beginning the loop into every button...") for value in values: # loops into every button web.click(id=value['id']) time.sleep( 1 ) # this is needed to make the browser get on the new page before getting the source. It would be a nice improvement to have it done as a webbot function, but I can't find that. rawpage = web.get_page_source() # print(rawpage) titlePage = BeautifulSoup(rawpage, 'lxml')
def input_url(update: Update, context:CallbackContext): while True: chat_id = update.message.chat_id # input url url = update.message.text # -------------------- # check url url_check_regex = re.findall(r"(www\.radiojavan\.com/mp3s/mp3/)", url) url_check_regex_app = re.findall(r"(rj\.app/m/)", url) url_check_regex_podcast_app = re.findall(r"rj\.app/p/", url) url_check_regex_podcast = re.findall(r"www\.radiojavan\.com/podcasts/podcast/", url) url_check_regex_video = re.findall(r"www\.radiojavan\.com/videos/video/", url) url_check_regex_video_app = re.findall(r"rj\.app/v/", url) url_check_regex_playlist = re.findall(r"www\.radiojavan\.com/playlists/playlist/", url) url_check_regex_playlist_app = re.findall(r"rj\.app/pm/", url) list_url = [ url_check_regex, url_check_regex_app, url_check_regex_podcast, url_check_regex_podcast_app, url_check_regex_video, url_check_regex_video_app, url_check_regex_playlist, url_check_regex_playlist_app ] what_is_link_type = "" count = 0 for check_url_link_list in list_url: if str(check_url_link_list) != "[]" : url = url what_is_link_type = check_url_link_list else: count += 1 res = "" if count == 6: context.bot.send_chat_action(chat_id, ChatAction.TYPING) context.bot.send_message(chat_id=chat_id, text="لینک اشتباه است. \n\n لطفا لینک آهنگ یا موزیک ویدیوی مورد نظر را از رادیو جوان بفرستید.") res = "inv" break # try to download files_url = { "music": "media/mp3/mp3-256/", "podcast": "media/podcast/mp3-192/", "video_lq": "media/music_video/lq/", "video_hd": "media/music_video/hd/", "video_hq": "media/music_video/hq/", "video_4k": "media/music_video/4k/" } regex_music_and_video = { "music": "RJ\.currentMP3Perm\ =\ \'(.*)\'\;", "video": "RJ\.videoPermlink\ =\ \'(.*)\'\;" } def download_file_rj(music_or_video, file_type, regex_file, ch_actions): context.bot.send_message(chat_id=chat_id, text="کمی صبر کنید...") if res != "inv": web = Browser() web.go_to(url) s = web.get_page_source() web.close_current_tab() soup = BeautifulSoup(s, 'html.parser') # finde mp3 link file_name = str(re.findall(fr"{regex_file}", str(soup))) file_name = file_name.replace("['", "") file_name = file_name.replace("']", "") file_url = f"https://host2.rj-mw1.com/{file_type}{file_name}.mp{music_or_video}" req = urllib.request.Request(file_url) with urllib.request.urlopen(req) as response: the_file_url_page = str(response.read()) if the_file_url_page != "b'Not found'": wget.download(file_url, f'{file_name}.mp{music_or_video}') else: try: os.remove(f"{file_name}.mp{music_or_video}") except: pass file_url = f"https://host1.rj-mw1.com/{file_type}{file_name}.mp{music_or_video}" wget.download(file_url, f'{file_name}.mp{music_or_video}') file_caption = str(file_name) #name fixed file_caption = file_caption.replace("-"," ") if str(file_name) == "[]": context.bot.send_chat_action(chat_id, ChatAction.TYPING) context.bot.send_message(chat_id=chat_id, text="لینک اشتباه است. \n\n لطفا لینک آهنگ یا موزیک ویدیوی مورد نظر را از رادیو جوان بفرستید.") else: if ch_actions == "music": context.bot.send_chat_action(chat_id, ChatAction.UPLOAD_AUDIO) context.bot.send_audio(chat_id=chat_id, audio=open(f"./{file_name}.mp{music_or_video}", "rb"), caption=f"{file_caption}") elif ch_actions == "video": context.bot.send_chat_action(chat_id, ChatAction.UPLOAD_VIDEO) context.bot.send_video(chat_id=chat_id, video=open(f"./{file_name}.mp{music_or_video}", "rb"), caption=f"{file_caption}") if os.path.exists(f"{file_name}.mp{music_or_video}"): os.remove(f"{file_name}.mp{music_or_video}") if what_is_link_type == url_check_regex_podcast: context.bot.send_message(chat_id=chat_id, text="به دلیل محدودیت ارسال فایل های حجم بالا توسط ربات ها از سمت تلگرام ، امکان ارسال پادکست وجود ندارد...") elif what_is_link_type == url_check_regex_podcast_app: context.bot.send_message(chat_id=chat_id, text="به دلیل محدودیت ارسال فایل های حجم بالا توسط ربات ها از سمت تلگرام ، امکان ارسال پادکست وجود ندارد...") elif what_is_link_type == url_check_regex: download_file_rj("3",files_url["music"],regex_music_and_video["music"], "music") elif what_is_link_type == url_check_regex_app: download_file_rj("3",files_url["music"],regex_music_and_video["music"], "music") elif what_is_link_type == url_check_regex_playlist: web = Browser() web.go_to(url) play_list_source_page = web.get_page_source() web.close_current_tab() soup_playlist = BeautifulSoup(play_list_source_page, "html.parser") list_artists_playlist = soup_playlist.findAll("span", {"class": "artist"}) list_songs_playlist = soup_playlist.findAll("span", {"class": "song"}) playlist_count = 0 context.bot.send_message(chat_id=chat_id, text="🔴 توجه 🔴 : \n\n ممکن است برخی آهنگ های موجود در پلی لیست ناقص فرستاده شوند و یا فرستاده نشود و پیام لینک اشتباه است نمایش داده شود . در صورت نیاز بعد از اتمام دانلود پلی لیست و مشاهده پیام :) لینک آهنگ فرستاده نشده یا خراب را از اپلیکیشن یا وب سایت رادیو جوان ارسال کنید.🌷🌹") for artists in list_artists_playlist: re_artists = re.findall(r"(?=>).*(?=<)", str(artists)) re_songs = re.findall(r"(?=>).*(?=<)", str(list_songs_playlist[playlist_count])) url = f"www.radiojavan.com/mp3s/mp3/{re_artists[0]}-{re_songs[0]}" playlist_count += 1 url = url.replace(" ", "-") url = url.replace("['>", "") url = url.replace("']", "") url = url.replace(">", "") download_file_rj("3", files_url["music"], regex_music_and_video["music"], "music") elif what_is_link_type == url_check_regex_playlist_app: web = Browser() web.go_to(url) play_list_source_page = web.get_page_source() web.close_current_tab() soup_playlist = BeautifulSoup(play_list_source_page, "html.parser") list_artists_playlist = soup_playlist.findAll("span", {"class": "artist"}) list_songs_playlist = soup_playlist.findAll("span", {"class": "song"}) playlist_count = 0 context.bot.send_message(chat_id=chat_id, text="🔴 توجه 🔴 : \n\n ممکن است برخی آهنگ های موجود در پلی لیست ناقص فرستاده شوند و یا فرستاده نشود و پیام لینک اشتباه است نمایش داده شود . در صورت نیاز بعد از اتمام دانلود پلی لیست و مشاهده پیام :) لینک آهنگ فرستاده نشده یا خراب را از اپلیکیشن یا وب سایت رادیو جوان ارسال کنید.🌷🌹") for artists in list_artists_playlist: re_artists = re.findall(r"(?=>).*(?=<)", str(artists)) re_songs = re.findall(r"(?=>).*(?=<)", str(list_songs_playlist[playlist_count])) url = f"www.radiojavan.com/mp3s/mp3/{re_artists[0]}-{re_songs[0]}" playlist_count += 1 url = url.replace(" ", "-") url = url.replace("['>", "") url = url.replace("']", "") url = url.replace(">", "") download_file_rj("3", files_url["music"], regex_music_and_video["music"], "music") elif what_is_link_type == url_check_regex_video_app: try: context.bot.send_message(chat_id=chat_id, text="متاسفانه به دلیل محدودیت حجم آپلود فایل توسط ربات ها از سمت تلگرام ، موزیک ویدیو فقط با کیفیت 480p LQ برایتان آپلود خواهد شد . \n\n منتظر دریافت موزیک ویدیو باشید...") download_file_rj("4", files_url["video_lq"], regex_music_and_video["video"], "video") except: pass elif what_is_link_type == url_check_regex_video: try: context.bot.send_message(chat_id=chat_id, text="متاسفانه به دلیل محدودیت حجم آپلود فایل توسط ربات ها از سمت تلگرام ، موزیک ویدیو فقط با کیفیت 480p LQ برایتان آپلود خواهد شد .\n\n منتظر دریافت موزیک ویدیو باشید...") download_file_rj("4", files_url["video_lq"], regex_music_and_video["video"], "video") except: pass context.bot.send_message(chat_id=chat_id, text=":)") break
def coursework(user): root_url = 'https://stinet.southeasttech.edu' coursework_pages = [] coursework = [] userName = user.get('user') password = user.get('password') web = Browser() web.fullscreen_window() web.go_to(root_url) #login web.type(userName, id="userName") web.type(password, id="password") web.click(id="siteNavBar_btnLogin") #get the html and add it to the parser soup = BeautifulSoup(web.get_page_source(), "html5lib") mycourses = soup.find(id='myCourses') for link in mycourses.find_all('a'): if "/ICS/Academics/" in link.get('href'): coursework_pages.append(link.get('href')) #first attempt at getting getting assignments for page in coursework_pages: className = None web.go_to(root_url + page + "Coursework.jnz") soup = BeautifulSoup(web.get_page_source(), "html5lib") for data in soup.select('.sidebar-link-title a'): className = data.text #assignment1 = soup.find(id='pg0_V__dueNext__rptDueNext_ctl00__hypAssign') #assignment2 = soup.find(id='pg0_V__dueNext__rptDueNext_ctl01__hypAssign') assignment1 = try_find( soup, 'pg0_V__dueNext__rptDueNext_ctl00__hypAssign') assignment2 = try_find( soup, 'pg0_V__dueNext__rptDueNext_ctl01__hypAssign') course = { 'courseName': className, 'courseUrl': root_url + page + "Coursework.jnz", } # course = { # 'courseName': className, # 'courseUrl': root_url + page + "Coursework.jnz", if assignment1 is not None: course['assignment1'] = assignment1.text course['assignment1Desc'] = assignment1.get('aria-label') course['assignment1Link'] = root_url + assignment1.get('href') course['assignment1Progress'] = soup.find( id='pg0_V__dueNext__rptDueNext_ctl00__lblInfo').text else: course['assignment1'] = '' course['assignment1Desc'] = '' course['assignment1Link'] = '' course['assignment1Progress'] = '' if assignment2 is not None: course['assignment2'] = assignment2.text course['assignment2Desc'] = assignment2.get('aria-label') course['assignment2Link'] = root_url + assignment2.get('href') course['assignment2Progress'] = soup.find( id='pg0_V__dueNext__rptDueNext_ctl01__lblInfo').text else: course['assignment2'] = '' course['assignment2Desc'] = '' course['assignment2Link'] = '' course['assignment2Progress'] = '' # } coursework.append(course.copy()) web.close_current_tag() return coursework