class pa: def __init__(self, html): self.soup_main = BeautifulSoup(html, 'html.parser') table_main = BeautifulSoup(str(self.soup_main.findAll('table', class_='data')), 'html.parser') # Main table soup self.data_table = BeautifulSoup(str(table_main.findAll('tr')), 'html.parser') # table data with <tr></tr> self.thead = BeautifulSoup(str(self.data_table.findAll('th')), 'html.parser') # thead of table self.data = BeautifulSoup(str(self.data_table.findAll('td')), 'html.parser') # td table with normal data def __logic(self, search): # Getting thead self.thead = self.thead.text.replace('[', '') self.thead = self.thead.replace(']', '') self.thead = self.thead.rsplit(', ') self.__SEARCH = 4 self.__NAME = 1 # Deleting thead from table tada self.data = self.data.text.replace('[', '') self.data = self.data.replace(']', '') self.data = self.data.rsplit(', ') return self.__finde_course(search) def __finde_course(self, search): count = 0 for x in self.data: if x == search: self.course = self.data[int(self.__SEARCH) + int(count)] self.name = self.data[int(self.__NAME) + int(count)] break count = count + 1 return self.course def find_by_code(self, code): self.__logic(code) def get_course(self): return self.course def get_name(self): return self.name
def getAtv(self): path_att = self.chrome.find_elements_by_xpath( "//td[@headers='atividades_data']") path_desc_att = self.chrome.find_elements_by_xpath( "//td[@headers='descricao-atividade']") for i in path_att: a = BeautifulSoup(i.get_attribute('innerHTML'), 'html.parser').text.rsplit('\t') a = ''.join(a) b = a.rsplit('\n') b = ''.join(b) self.att.append(b) for i in path_desc_att: a = BeautifulSoup(i.get_attribute('innerHTML'), 'html.parser').text.rsplit('\t') a = ''.join(a) b = a.rsplit('\n') b = ''.join(b) self.desc.append(b) print(self.att) print(self.desc)
def get_talks(schedule_path, talk_root): response = requests.get(schedule_path) assert response.status_code == 200 schedule = response.json() talks = [] for day in schedule['days']: for entry in day['entries']: if 'talks' in entry.keys(): for room, slug in entry['talks'].iteritems(): if slug and not room == 'tutorial': # Filter out tutorials, and no slug (which means there's no talk scheduled) response = requests.get(talk_root + slug + '.json') assert response.status_code == 200 response_json = response.json() description = BeautifulSoup( response_json['description'], 'html.parser').get_text() description = description.split('Bio')[0] description = description.rsplit('\n', 1)[0] talk = Talk( **{ 'date': response_json.get('date'), 'room': room, 'slug': slug, 'title': response_json.get('title'), 'speakers': response_json.get('speakers'), 'start_time': response_json.get('start_time'), 'description': description }) talks.append(talk) elif 'keynote' in entry['title'].lower(): # there is special handling for keynotes because they're not in the same format as other talks response_json = Talk( **{ 'date': day['date'], 'room': '1-067', 'slug': entry['link'], 'title': entry['title'], 'speakers': entry['content'], 'start_time': entry['start_time'], 'description': '' # keynotes do not have descriptions }) talks.append(response_json) return talks
def cmd_post(self, data): all_data = data.message.strip().split(' ', 1) img_url = all_data[1] # Download image img_url = BeautifulSoup(img_url, 'html.parser').get_text() try: IH.download_image_stream(img_url) except exceptions.HTTPError: gs.gui_service.quick_gui( "Encountered an HTTP Error while trying to retrieve the image.", text_type='header', text_align='left', box_align='left') return except exceptions.InvalidSchema: gs.gui_service.quick_gui( "Encountered an Invalid Schema Error while trying to retrieve the image.", text_type='header', text_align='left', box_align='left') return except exceptions.RequestException: gs.gui_service.quick_gui( "Encountered a Request Error while trying to retrieve the image.", text_type='header', text_align='left', box_align='left') return img_ext = img_url.rsplit('.', 1)[1] formatted_string = IH.format_image( "_image", img_ext, f'{dir_utils.get_temp_med_dir()}/internal/images') rprint("Posting an image to the mumble channel chat.") # Display image with PGUI system gs.gui_service.quick_gui_img( f"{dir_utils.get_temp_med_dir()}/internal/images", formatted_string, bgcolor=self.metadata[C_PLUGIN_SETTINGS][P_FRAME_COL], cellspacing=self.metadata[C_PLUGIN_SETTINGS][P_FRAME_SIZE], format_img=False) log(INFO, f"Posted an image to the mumble channel chat from: {img_url}.")
def buildings(): # Supplement previous UW Building scrape with additional data names = [] buildings = BeautifulSoup(requests.get( 'https://www.washington.edu/students/reg/buildings.html').text, features='lxml') buildings = str(buildings.html).split( '<h2>Code - Building Name (Map Grid)</h2>', 1)[-1] buildings = BeautifulSoup(buildings.rsplit('<div class="uw-footer">', 1)[0], features='lxml') for building_group in buildings.find_all('p'): for building in str(building_group).split('<br/>'): if 'a href' in building: building = BeautifulSoup(building, features='lxml') abbreviation = building.find('code').text name = building.find('a').text names.append((abbreviation, name.split('\n', 1)[0].strip())) return names
def states(): state_select = self.get_state_select() state_select_option_values = [ '%s' % o.get_attribute('value') for o in state_select.options[1:] ] for v in state_select_option_values: state_select = self.select_state_option(v) self.driver.page_source text=BeautifulSoup(self.driver.page_source, "html.parser").get_text() meta_prices=[] for keyword in keywords: prices = [] counter=text.count(keyword) for z in range(counter): prices.append(text.rsplit(keyword, z+1)[1].splitlines()[0]) prices=[float(price) for price in prices] meta_prices.append(prices) yield (state_select.first_selected_option.text,meta_prices)
rm.write("Welcome to the git version of the Revised Code of Washington (RCW). It is an *unofficial* copy derived from http://apps.leg.wa.gov/rcw/[the official website]. This root commit will stay the same but all others may change if/when we import historical changes. Tags will be redone as things change so they should be stable.\n\n") for title in titles: info = titles[title] title_folder_name = pad_number(title, 2) + "_" + filename_friendly(info["title"]) title_folder = root / title_folder_name title_folder.mkdir(exist_ok=True) title_readme = title_folder / "README.adoc" rm.write("* link:" + str(title_folder_name) + "[" + title + " - " + info["title"] + "]\n") with title_readme.open("w") as tf: tf.write("= ") tf.write(title + " " + info["title"]) tf.write("\n\n") max_len = 0 for chapter in info["chapters"]: max_len = max(max_len, len(chapter.rsplit(".", maxsplit=1)[-1].strip(string.ascii_uppercase))) for chapter in info["chapters"]: chapter_info = info["chapters"][chapter] chapter_name = pad_number(chapter, max_len) + "_" + filename_friendly(chapter_info["title"]) + ".adoc" chapter_path = title_folder / chapter_name link_path = str(chapter_name) tf.write("* link:" + link_path + "[" + chapter + " - " + chapter_info["title"] + "]\n") with chapter_path.open("w") as f: f.write("= " + chapter + " - " + chapter_info["title"] + "\n") f.write(":toc:\n\n") for section in chapter_info["sections"]: section_info = chapter_info["sections"][section] f.write("== ") f.write(section) f.write(" - ")
def load_hearing_response(fname, split_on=' Present:'): with open(fname, 'rU') as inf: html = inf.read() txt = BeautifulSoup(html).get_text() return txt.rsplit(split_on, 1)[-1] # return everything after last occurrence of split_on
def cmd_post(self, data): all_data = data.message.strip().split(' ', 1) if len(all_data) != 2: log(ERROR, CMD_INVALID_POST, origin=L_COMMAND, error_type=CMD_INVALID_ERR, print_mode=PrintMode.VERBOSE_PRINT.value) gs.gui_service.quick_gui(CMD_INVALID_POST, text_type='header', box_align='left') return # Download image from the provided URL. img_url = all_data[1].strip() img_url = BeautifulSoup(img_url, 'html.parser').get_text() try: # Download the image in 1024 byte chunks and save it as a temporary image. IH.download_image_stream(img_url) except exceptions.HTTPError: log(ERROR, GEN_HTTP_ERROR, origin=L_COMMAND, error_type=CMD_PROCESS_ERR, print_mode=PrintMode.VERBOSE_PRINT.value) gs.gui_service.quick_gui(GEN_HTTP_ERROR, text_type='header', box_align='left') return except exceptions.InvalidSchema: log(ERROR, GEN_INVALID_SCHEMA_ERROR, origin=L_COMMAND, error_type=CMD_PROCESS_ERR, print_mode=PrintMode.VERBOSE_PRINT.value) gs.gui_service.quick_gui(GEN_INVALID_SCHEMA_ERROR, text_type='header', box_align='left') return except exceptions.RequestException: log(ERROR, GEN_REQUESTS_ERROR, origin=L_COMMAND, error_type=CMD_PROCESS_ERR, print_mode=PrintMode.VERBOSE_PRINT.value) gs.gui_service.quick_gui(GEN_REQUESTS_ERROR, text_type='header', box_align='left') return # Format the retrieved image into the b64 variant for mumble usage. img_ext = img_url.rsplit('.', 1)[1] formatted_string = IH.format_image( T_TEMP_IMG_NAME, img_ext, f'{dir_utils.get_temp_med_dir()}/internal/images') # Display image with PGUI system and log the event. gs.gui_service.quick_gui_img( f"{dir_utils.get_temp_med_dir()}/internal/images", formatted_string, bgcolor=self.metadata[C_PLUGIN_SETTINGS][P_FRAME_COL], cellspacing=self.metadata[C_PLUGIN_SETTINGS][P_FRAME_SIZE], format_img=False) log(INFO, INFO_POSTED_IMAGE, origin=L_COMMAND, print_mode=PrintMode.VERBOSE_PRINT.value)