Example #1
0
class pa:
    def __init__(self, html):
        self.soup_main = BeautifulSoup(html, 'html.parser')
        table_main = BeautifulSoup(str(self.soup_main.findAll('table', class_='data')), 'html.parser') # Main table soup
        self.data_table = BeautifulSoup(str(table_main.findAll('tr')), 'html.parser') # table data with <tr></tr>

        self.thead = BeautifulSoup(str(self.data_table.findAll('th')), 'html.parser') # thead of table

        self.data = BeautifulSoup(str(self.data_table.findAll('td')), 'html.parser') # td table with normal data

    def __logic(self, search):
        # Getting thead 
        self.thead = self.thead.text.replace('[', '')
        self.thead = self.thead.replace(']', '')
        self.thead = self.thead.rsplit(', ')
        
        self.__SEARCH = 4
        self.__NAME = 1

        # Deleting thead from table tada
        self.data = self.data.text.replace('[', '')
        self.data = self.data.replace(']', '')
        self.data = self.data.rsplit(', ')
        return self.__finde_course(search)

    def __finde_course(self, search):
        count = 0
        for x in self.data:
            if x == search:
                self.course = self.data[int(self.__SEARCH) + int(count)]
                self.name = self.data[int(self.__NAME) + int(count)]
                break
            count = count + 1       
        return self.course

    def find_by_code(self, code):
        self.__logic(code)

    def get_course(self):
        return self.course
    
    def get_name(self):
        return self.name
Example #2
0
 def getAtv(self):
     path_att = self.chrome.find_elements_by_xpath(
         "//td[@headers='atividades_data']")
     path_desc_att = self.chrome.find_elements_by_xpath(
         "//td[@headers='descricao-atividade']")
     for i in path_att:
         a = BeautifulSoup(i.get_attribute('innerHTML'),
                           'html.parser').text.rsplit('\t')
         a = ''.join(a)
         b = a.rsplit('\n')
         b = ''.join(b)
         self.att.append(b)
     for i in path_desc_att:
         a = BeautifulSoup(i.get_attribute('innerHTML'),
                           'html.parser').text.rsplit('\t')
         a = ''.join(a)
         b = a.rsplit('\n')
         b = ''.join(b)
         self.desc.append(b)
     print(self.att)
     print(self.desc)
Example #3
0
def get_talks(schedule_path, talk_root):
    response = requests.get(schedule_path)
    assert response.status_code == 200
    schedule = response.json()

    talks = []

    for day in schedule['days']:
        for entry in day['entries']:
            if 'talks' in entry.keys():
                for room, slug in entry['talks'].iteritems():
                    if slug and not room == 'tutorial':  # Filter out tutorials, and no slug (which means there's no talk scheduled)
                        response = requests.get(talk_root + slug + '.json')
                        assert response.status_code == 200

                        response_json = response.json()
                        description = BeautifulSoup(
                            response_json['description'],
                            'html.parser').get_text()
                        description = description.split('Bio')[0]
                        description = description.rsplit('\n', 1)[0]

                        talk = Talk(
                            **{
                                'date': response_json.get('date'),
                                'room': room,
                                'slug': slug,
                                'title': response_json.get('title'),
                                'speakers': response_json.get('speakers'),
                                'start_time': response_json.get('start_time'),
                                'description': description
                            })
                        talks.append(talk)

            elif 'keynote' in entry['title'].lower():
                # there is special handling for keynotes because they're not in the same format as other talks

                response_json = Talk(
                    **{
                        'date': day['date'],
                        'room': '1-067',
                        'slug': entry['link'],
                        'title': entry['title'],
                        'speakers': entry['content'],
                        'start_time': entry['start_time'],
                        'description': ''  # keynotes do not have descriptions
                    })
                talks.append(response_json)

    return talks
Example #4
0
 def cmd_post(self, data):
     all_data = data.message.strip().split(' ', 1)
     img_url = all_data[1]
     # Download image
     img_url = BeautifulSoup(img_url, 'html.parser').get_text()
     try:
         IH.download_image_stream(img_url)
     except exceptions.HTTPError:
         gs.gui_service.quick_gui(
             "Encountered an HTTP Error while trying to retrieve the image.",
             text_type='header',
             text_align='left',
             box_align='left')
         return
     except exceptions.InvalidSchema:
         gs.gui_service.quick_gui(
             "Encountered an Invalid Schema Error while trying to retrieve the image.",
             text_type='header',
             text_align='left',
             box_align='left')
         return
     except exceptions.RequestException:
         gs.gui_service.quick_gui(
             "Encountered a Request Error while trying to retrieve the image.",
             text_type='header',
             text_align='left',
             box_align='left')
         return
     img_ext = img_url.rsplit('.', 1)[1]
     formatted_string = IH.format_image(
         "_image", img_ext,
         f'{dir_utils.get_temp_med_dir()}/internal/images')
     rprint("Posting an image to the mumble channel chat.")
     # Display image with PGUI system
     gs.gui_service.quick_gui_img(
         f"{dir_utils.get_temp_med_dir()}/internal/images",
         formatted_string,
         bgcolor=self.metadata[C_PLUGIN_SETTINGS][P_FRAME_COL],
         cellspacing=self.metadata[C_PLUGIN_SETTINGS][P_FRAME_SIZE],
         format_img=False)
     log(INFO,
         f"Posted an image to the mumble channel chat from: {img_url}.")
Example #5
0
 def buildings():
     # Supplement previous UW Building scrape with additional data
     names = []
     buildings = BeautifulSoup(requests.get(
         'https://www.washington.edu/students/reg/buildings.html').text,
                               features='lxml')
     buildings = str(buildings.html).split(
         '<h2>Code - Building Name (Map Grid)</h2>', 1)[-1]
     buildings = BeautifulSoup(buildings.rsplit('<div class="uw-footer">',
                                                1)[0],
                               features='lxml')
     for building_group in buildings.find_all('p'):
         for building in str(building_group).split('<br/>'):
             if 'a href' in building:
                 building = BeautifulSoup(building, features='lxml')
                 abbreviation = building.find('code').text
                 name = building.find('a').text
                 names.append((abbreviation, name.split('\n',
                                                        1)[0].strip()))
     return names
Example #6
0
        def states():
            state_select = self.get_state_select()
            state_select_option_values = [
                '%s' % o.get_attribute('value')
                for o
                in state_select.options[1:]
            ]

            for v in state_select_option_values:
                state_select = self.select_state_option(v)
                self.driver.page_source
                text=BeautifulSoup(self.driver.page_source, "html.parser").get_text()
                meta_prices=[]
                for keyword in keywords:
                    prices = []
                    counter=text.count(keyword)
                    for z in range(counter):
                        prices.append(text.rsplit(keyword, z+1)[1].splitlines()[0])
                    prices=[float(price) for price in prices]
                    meta_prices.append(prices)
                yield (state_select.first_selected_option.text,meta_prices)
        def states():
            state_select = self.get_state_select()
            state_select_option_values = [
                '%s' % o.get_attribute('value')
                for o
                in state_select.options[1:]
            ]

            for v in state_select_option_values:
                state_select = self.select_state_option(v)
                self.driver.page_source
                text=BeautifulSoup(self.driver.page_source, "html.parser").get_text()
                meta_prices=[]
                for keyword in keywords:
                    prices = []
                    counter=text.count(keyword)
                    for z in range(counter):
                        prices.append(text.rsplit(keyword, z+1)[1].splitlines()[0])
                    prices=[float(price) for price in prices]
                    meta_prices.append(prices)
                yield (state_select.first_selected_option.text,meta_prices)
Example #8
0
    rm.write("Welcome to the git version of the Revised Code of Washington (RCW). It is an *unofficial* copy derived from http://apps.leg.wa.gov/rcw/[the official website]. This root commit will stay the same but all others may change if/when we import historical changes. Tags will be redone as things change so they should be stable.\n\n")
    for title in titles:
        info = titles[title]
        title_folder_name = pad_number(title, 2) + "_" + filename_friendly(info["title"])
        title_folder = root / title_folder_name
        title_folder.mkdir(exist_ok=True)
        title_readme = title_folder / "README.adoc"
        rm.write("* link:" + str(title_folder_name) + "[" + title + " - " + info["title"] + "]\n")
        with title_readme.open("w") as tf:
            tf.write("= ")
            tf.write(title + " " + info["title"])
            tf.write("\n\n")

            max_len = 0
            for chapter in info["chapters"]:
                max_len = max(max_len, len(chapter.rsplit(".", maxsplit=1)[-1].strip(string.ascii_uppercase)))
            for chapter in info["chapters"]:
                chapter_info = info["chapters"][chapter]
                chapter_name = pad_number(chapter, max_len) + "_" + filename_friendly(chapter_info["title"]) + ".adoc"
                chapter_path = title_folder / chapter_name
                link_path = str(chapter_name)
                tf.write("* link:" + link_path + "[" + chapter + " - " + chapter_info["title"] + "]\n")
                with chapter_path.open("w") as f:
                    f.write("= " + chapter + " - " + chapter_info["title"] + "\n")
                    f.write(":toc:\n\n")

                    for section in chapter_info["sections"]:
                        section_info = chapter_info["sections"][section]
                        f.write("== ")
                        f.write(section)
                        f.write(" - ")
def load_hearing_response(fname, split_on='    Present:'):
    with open(fname, 'rU') as inf:
        html = inf.read()
    txt  = BeautifulSoup(html).get_text()
    return txt.rsplit(split_on, 1)[-1]     # return everything after last occurrence of split_on
Example #10
0
    def cmd_post(self, data):
        all_data = data.message.strip().split(' ', 1)
        if len(all_data) != 2:
            log(ERROR,
                CMD_INVALID_POST,
                origin=L_COMMAND,
                error_type=CMD_INVALID_ERR,
                print_mode=PrintMode.VERBOSE_PRINT.value)
            gs.gui_service.quick_gui(CMD_INVALID_POST,
                                     text_type='header',
                                     box_align='left')
            return

        # Download image from the provided URL.
        img_url = all_data[1].strip()
        img_url = BeautifulSoup(img_url, 'html.parser').get_text()
        try:
            # Download the image in 1024 byte chunks and save it as a temporary image.
            IH.download_image_stream(img_url)
        except exceptions.HTTPError:
            log(ERROR,
                GEN_HTTP_ERROR,
                origin=L_COMMAND,
                error_type=CMD_PROCESS_ERR,
                print_mode=PrintMode.VERBOSE_PRINT.value)
            gs.gui_service.quick_gui(GEN_HTTP_ERROR,
                                     text_type='header',
                                     box_align='left')
            return
        except exceptions.InvalidSchema:
            log(ERROR,
                GEN_INVALID_SCHEMA_ERROR,
                origin=L_COMMAND,
                error_type=CMD_PROCESS_ERR,
                print_mode=PrintMode.VERBOSE_PRINT.value)
            gs.gui_service.quick_gui(GEN_INVALID_SCHEMA_ERROR,
                                     text_type='header',
                                     box_align='left')
            return
        except exceptions.RequestException:
            log(ERROR,
                GEN_REQUESTS_ERROR,
                origin=L_COMMAND,
                error_type=CMD_PROCESS_ERR,
                print_mode=PrintMode.VERBOSE_PRINT.value)
            gs.gui_service.quick_gui(GEN_REQUESTS_ERROR,
                                     text_type='header',
                                     box_align='left')
            return

        # Format the retrieved image into the b64 variant for mumble usage.
        img_ext = img_url.rsplit('.', 1)[1]
        formatted_string = IH.format_image(
            T_TEMP_IMG_NAME, img_ext,
            f'{dir_utils.get_temp_med_dir()}/internal/images')
        # Display image with PGUI system and log the event.
        gs.gui_service.quick_gui_img(
            f"{dir_utils.get_temp_med_dir()}/internal/images",
            formatted_string,
            bgcolor=self.metadata[C_PLUGIN_SETTINGS][P_FRAME_COL],
            cellspacing=self.metadata[C_PLUGIN_SETTINGS][P_FRAME_SIZE],
            format_img=False)
        log(INFO,
            INFO_POSTED_IMAGE,
            origin=L_COMMAND,
            print_mode=PrintMode.VERBOSE_PRINT.value)