コード例 #1
0
def getCoursesCards(soup, isSetCategories=False):
    selector = 'div.row.course-list.list > div'
    cards = soup.select(selector)

    data = []
    for card in cards:
        data.append(extractCourseCard(card))

    if isSetCategories:
        s = Scraper()

        categoriesUrls = getCategoriesURLs(soup)
        for category in categoriesUrls:
            name = category.get('name', '')
            url = category.get('url')

            s.get(url)
            tempSoup = s.html_soup()
            categoryCards = getCoursesCards(tempSoup)
            ids = [c['publicId'] for c in categoryCards]

            for course in data:
                if course['publicId'] in ids:
                    if course.get('categories', None):
                        course['categories'].append(name)
                    else:
                        course['categories'] = [name]

    return data
コード例 #2
0
def extraBlogData(url):
    s = Scraper()
    s.get(url)
    soup = s.html_soup()

    data = {
        'publicId': url.split('/')[4],
    }

    thumbnailElm = soup.select_one('.post-content img')

    data.update({
        'thumbnail': thumbnailElm['src'] if thumbnailElm else None,
    })

    return data
コード例 #3
0
def getTheGoodZoneDataAndMyPathes():

    s = Scraper()

    coursesUrl = f'{protocol}://{domain}/courses'
    s.get(coursesUrl)
    coursesSoup = s.html_soup()

    blogsUrl = f'{protocol}://{domain}/blog'
    s.get(blogsUrl)
    blogsSoup = s.html_soup()

    return [
        {
            'delPath': '/delete/courses/',
            'setPath': '/set/course/',
            'items': getCourses()
        },
        {
            'delPath': '/delete/coaches/',
            'setPath': '/set/coach/',
            'items': getCoaches(coursesSoup)['coach']
        },
        {
            'delPath': '/delete/instructors/',
            'setPath': '/set/instructor/',
            'items': getCoaches(coursesSoup)['course']
        },
        {
            'delPath': '/delete/live-events/',
            'setPath': '/set/live-event/',
            'items': getCoaches(coursesSoup)['live event']
        },
        {
            'setPath': '/set-course-description/',
            'items': getCoaches(coursesSoup)['description']
        },
        {
            'delPath': '/delete/blogs/',
            'setPath': '/set/blog/',
            'items': getBlogs(blogsSoup)
        },
    ]
コード例 #4
0
def getCoachingCards(soup):
    selector = 'div.row.services-row.list > div'
    cards = soup.select(selector)

    data = {
        'total': [],
        'coach': [],
        'live event': [],
        'course': [],
        'description': []
    }

    for card in cards:
        data['total'].append(extractCoachingCard(card))

    s = Scraper()

    # print(data['total'])

    for cardData in data['total']:
        url = cardData.get('url')
        s.get(url)
        tempSoup = s.html_soup()

        descriptionContentELm = tempSoup.select_one(
            '.course-block.custom_html')

        # print('Desc: ', descriptionContentELm.text)
        types = ['description', 'course', 'coach', 'live event']
        if not (descriptionContentELm
                and any([t in descriptionContentELm.text for t in types])):
            continue

        descriptionContent = descriptionContentELm.text
        cardData.update(parseCoaching(descriptionContent))

        card = handleCoachingCardData(cardData)
        data[card.pop('type')].append(card)

    data.pop('total')
    return data
コード例 #5
0
def getCourses():
    # print('get all courses')
    allCourses = []
    courses = []
    first = True

    s = Scraper()
    pageNum = 1
    while courses or first:
        first = False
        allCourses.extend(courses)

        coursesUrl = f'{protocol}://{domain}/courses?page={pageNum}'
        # print('\n\n\nurl: ', coursesUrl, '\n\n\n\n')
        s.get(coursesUrl)
        coursesSoup = s.html_soup()

        courses = getCoursesCards(coursesSoup, isSetCategories=True)

        pageNum += 1

    return allCourses