コード例 #1
0
    def parseCourse(self, course, dow):

        course =  re.compile( '</a.*\n.*br.*">').sub(', ', course)
        course =  re.compile( '<t.*">|<.*.*>').sub('', course)
        course = re.split('(\n|\t|\r)*' , course) 

        times = ParserUtil.parseTime(course[2], ":")
        startDate = ParserUtil.parseStartDate(course[8], "/")

        originalLevel = self.parseOriginalLevel(course[4])
        level = self.parseLevel(course[4])
        genre = ParserUtil.parseGenre(course[4])
        type = ParserUtil.parseType(course[4])
        if not type:
            type = "partnerwork"

        instructors = re.split(',', course[6])

        #name, description, school, level, days, genre, classCount, instructors, type, startDate, startTime, endTime
        courseLine = self.toJSONFormatCourse(course[4], course[8], self.SCHOOL_NAME, level, originalLevel, [dow], genre, type, self.DEFAULT_COURSE_DAYS_NUM, instructors, startDate, times[0], times[1], ParserUtil.getDuration(times[1], times[0]))

        return courseLine
コード例 #2
0
    def parseCourse(self, course, week):
        courseSplit = re.split("<br />", course)

        timeLine = courseSplit[0]
        if timeLine[0].isdigit():
            dow=int(timeLine[0])-1
            if not week:
                dow = dow + 5
        else:
            return

        name = courseSplit[1].strip()
        times = ParserUtil.parseTime(timeLine[3:].strip(), '.')
        startDate = ParserUtil.parseStartDate(courseSplit[2].strip(), ".")

        if len(courseSplit) > 3:
            if startDate == '':
                startDate = ParserUtil.parseStartDate(courseSplit[3], ".")

        instructors = self.getInstructors(courseSplit)

        type = ParserUtil.parseType(courseSplit[1])
        if not type:
            type = ParserUtil.parseType(courseSplit[2])
        if not type:
            type = "partnerwork"

        level = self.parseLevel(courseSplit[1])
        if level == "":
            level = self.parseLevel(courseSplit[2])
        if level == "" and type == TYPE_PARTNER :
            if ParserUtil.getDiffInDaysFromNow(startDate) > 84:
                level = LEVEL_INTERMEDIATE
            else:
                level = LEVEL_BASIC

        if level == "" and type == TYPE_SOLO :
            level = LEVEL_OPEN

        genre = ParserUtil.parseGenre(courseSplit[1])

        description = ''
        courseCount = self.DEFAULT_COURSE_DAYS_NUM

        if genre and genre[0] == GENRE_ZOUK:
            courseCount = 2
            description = courseSplit[3].strip() + " " + courseSplit[4].strip()
            dates = re.findall("\d{1,2}.\d{2}", description)
            pastEvent = True;
            currentDate = date.today()
            for dateString in dates:
                month =  int(dateString[3:5])
                if month > currentDate.month:
                    pastEvent = False
                if month == currentDate.month:
                    day = int(dateString[0:2])
                    if day >= currentDate.day:
                        pastEvent = False

            if pastEvent:
                return

        originalLevel = level
        #name, description, school, level, days, genre, classCount, instructors, type, startDate, startTime, endTime
        courseLine = self.toJSONFormatCourse(name, description, self.SCHOOL_NAME, level, originalLevel, [dow], genre, type, courseCount, instructors, startDate, times[0], times[1], ParserUtil.getDuration(times[1], times[0]))

        return courseLine
コード例 #3
0
    def extract(self, soup):
        evening = range(17, 22)

        week = dict()
        days = soup.findAll('div', 'dzien')

        courses = []
        for day in days:

            name = day.find('strong', 'dayname').text.encode('utf-8')

            for hour in evening:
                for course in day.findAll('div', 'godzina godzina%s' % hour):
                    styles = course.findAll('div', 'technika')
                    for style in styles:
                        if style:
                            c = dict()
                            genre = style.find('strong').text
                            parsedGenre = genresLofty[genre.encode('utf-8')]
                            if parsedGenre and parsedGenre[0] != GENRE_PARTY:


                                c = dict(c, genre=parsedGenre)
                                parsedType = style.text.replace(genre, '').encode('utf-8')

                                courseName = genre.encode('utf-8')
                                if parsedType and parsedType!="":
                                    courseName = courseName  + " (" + parsedType + ")"
                                c = dict(c, name=courseName.replace('&amp;', "and").replace("ż", "z").replace("ń", "n").replace("ę", "e"))

                                if parsedType != "":
                                    parsedType = typesLofty[parsedType]
                                if parsedType == "":
                                    parsedType = typesLofty[style.text.encode('utf-8')]
                                c = dict(c, type=parsedType)

                                for l in levels.keys():
                                    level = course.find('div', {'class': re.compile(l)})
                                    if level:
                                        c = dict(c, level=levels[l])
                                        c = dict(c, originalLevel=levels[l])
                                        break

                                c = dict(c, startTime=int('%s00' % hour))
                                c = dict(c, endTime=int('%s00' % str(int(hour) + 1)))
                                c = dict(c, duration=60)
                                c = dict(c, classCount=4)

                                instructors = course.find('div', 'instruktorzy')
                                if instructors:
                                    instructors = instructors.text.replace(unicode('\xc5\x81', 'utf-8'), 'L')
                                    c = dict(c, instructors=instructors.split(','))

                                start = course.find('div', {'class': re.compile('start')})
                                if start:
                                    c = dict(c, startDate=ParserUtil.parseStartDate(start.text.split()[-1], '.'))

                                c = dict(c, days=[weekdays[name]])
                                c = dict(c, school=self.SCHOOL_NAME)

                                courses.append(self.toJSONFormatCourse(c["name"], "", c["school"], c["level"], c["originalLevel"], c["days"], c["genre"], c["type"], c["classCount"], c["instructors"], c["startDate"], c["startTime"], c["endTime"], c["duration"]))

        return courses
コード例 #4
0
    def get_courses(self, location, fields):
        courses = []
        for i in location:
            day_courses = i[2:]
            if day_courses:
                for c in day_courses:
                    d = dict()
                    for key, expression in fields.iteritems():
                        try:
                            d[key]=expression.findall(c)[0]
                        except IndexError:
                            pass
                    if d:
                        try:
                            start = d['hours'].split('-')[0]
                            hour, minute = start.split(':')
                            start_time = int(hour)*60 + int(minute)
                            start = start.replace(':', '')

                            stop = d['hours'].split('-')[1]
                            hour, minute = stop.split(':')
                            stop_time = int(hour)*60 + int(minute)
                            stop = stop.replace(':', '')

                        except KeyError:
                            continue

                        d = dict(d, duration=stop_time-start_time)
                        d = dict(d, startTime=int(start))
                        d = dict(d, endTime=int(stop))

                        name = re.sub('\s+',' ', string.join(d['name'], ' ')).strip()
                        progress = d['progress'].split('/')[0]
                        classCount = d['progress'].split('/')[1]
                        wday = weekdays[i[1].encode('utf-8')]
                        duplicate = False
                        for exc in courses:
                            if exc['name'] == name:
                                if progress != '1':
                                    duplicate = True
                                    if wday not in exc['days']:
                                        exc['days'].append(wday)

                        if duplicate is True:
                            continue

                        d = dict(d, name = name)

                        if dict.has_key(d, 'level'):
                            d = dict(d, originalLevel=d['level'])
                            d = dict(d, level=levels[d['level']])
                        else:
                            if name.find("zaawan") > 0:
                                d = dict(d, level=LEVEL_ADVANCED)
                            elif name.find("podstawowy") > 0:
                                d = dict(d, level=LEVEL_BASIC)
                            else:
                                d = dict(d, level=LEVEL_OPEN)

                        if dict.has_key(d, 'genre'):
                            d = dict(d, genre=[genres[d['genre']]])
                        else:
                            d = dict(d, genre=[genres[None]])

                        startDate = '%s/2012' % i[0].replace('.', '/')
                        day, month, year = startDate.split('/')
                        date = datetime.date(int(year), int(month), int(day))

                        if progress != '1':
                            timedelta = datetime.timedelta(days=(int(progress)-1)*7)
                            date -= timedelta

                        d = dict(d, startDate=date.strftime('%Y-%m-%d'))
                        d = dict(d, days=[wday])
                        d = dict(d, school=self.SCHOOL_NAME)
                        type = ParserUtil.parseTypeCustomMap(d["name"], typesViva)
                        courses.append(self.toJSONFormatCourse(d["name"], "", d["school"], d["level"], d["level"], d["days"], d["genre"], type, classCount, [], d["startDate"], d["startTime"], d["endTime"], d["duration"]))


        return courses