Exemple #1
0
            EC.presence_of_all_elements_located((By.XPATH, f'{THE_XPATH}')))
        value = browser.find_element_by_xpath(f'{THE_XPATH}').text
        course_data['Career_Outcomes'] = value
    except (AttributeError, TimeoutException, NoSuchElementException,
            ElementNotInteractableException):
        print('cant extract outcomes')

    # DURATION
    try:
        THE_XPATH = "(//*[text()='FULL TIME'][1]/following::*[1])[1]"
        WebDriverWait(browser, delay).until(
            EC.presence_of_all_elements_located((By.XPATH, f'{THE_XPATH}')))
        value = browser.find_element_by_xpath(f'{THE_XPATH}').text
        course_data['Full_Time'] = 'Yes'

        duration = convert_duration(
            value.replace('trimester', 'semester').replace('yrs', 'years'))
        course_data['Duration'] = duration[0]
        course_data['Duration_Time'] = duration[1]
        if duration[0] < 2 and 'month' in duration[1].lower():
            course_data['Duration'] = duration[0]
            course_data['Duration_Time'] = 'Month'
        if duration[0] < 2 and 'year' in duration[1].lower():
            course_data['Duration'] = duration[0]
            course_data['Duration_Time'] = 'Year'
        if 'week' in duration[1].lower():
            course_data['Duration'] = duration[0]
            course_data['Duration_Time'] = 'Weeks'

    except (AttributeError, TypeError, TimeoutException,
            NoSuchElementException, ElementNotInteractableException) as e:
        print('cant extract full time duration')
    except NoSuchElementException:
        html_ = browser.page_source
        print('got duration source')
        soup_ = bs4.BeautifulSoup(html_, 'lxml')
        duration_tag = soup_.find('li', {'id': 'course-overview-duration'})
        if duration_tag:
            duration = tag_text(duration_tag)
            print(f'duration so far: {duration}')
            if 'part time' in duration.lower(
            ) or 'part-time' in duration.lower():
                course_data['Part_Time'] = 'Yes'
            if 'full time' in duration.lower(
            ) or 'full-time' in duration.lower():
                course_data['Full_Time'] = 'Yes'

            duration = convert_duration(
                duration.replace('trimester', 'semester'))
            course_data['Duration'] = duration[0]
            course_data['Duration_Time'] = duration[1]
            if duration[0] < 2 and 'month' in duration[1].lower():
                course_data['Duration'] = duration[0]
                course_data['Duration_Time'] = 'Month'
            if duration[0] < 2 and 'year' in duration[1].lower():
                course_data['Duration'] = duration[0]
                course_data['Duration_Time'] = 'Year'
            if 'week' in duration[1].lower():
                course_data['Duration'] = duration[0]
                course_data['Duration_Time'] = 'Weeks'
    else:
        html_ = browser.page_source
        print('got duration source')
        soup_ = bs4.BeautifulSoup(html_, 'lxml')