def filter(self, el): index = 1 if len(el) > 1 else 0 content = CleanText.clean(CleanText('.', ['HORAIRES'])(el[index])) a_time = content.split(' - ')[-2] regexp = re.compile(ur'(?P<hh>\d+)h?(?P<mm>\d+)') m = regexp.search(a_time) return time(int(m.groupdict()['hh'] or 0), int(m.groupdict()['mm'] or 0))
def filter(self, el): index = 1 if len(el) > 1 else 0 content = CleanText.clean(CleanText('.', ['HORAIRES'])(el[index])) a_price = content.split(' - ')[-1] parsed_price = re.findall(r"\d*\,\d+|\d+", " ".join(a_price)) if parsed_price and len(parsed_price) > 0: return float(parsed_price[0].replace(',', '.')) return float(0)
def filter(self, el): content = CleanText.clean(CleanText(CleanHTML('.'), ['*'])(el[0])) a_date = content[0:content.index(' - ')] for fr, en in date_util.DATE_TRANSLATE_FR: a_date = fr.sub(en, a_date) try: _month = datetime.strptime(a_date, "%A %d %B").month if (datetime.now().month > _month): a_date += u' %i' % (datetime.now().year + 1) else: a_date += u' %i' % (datetime.now().year) except ValueError: pass return datetime.strptime(a_date, "%A %d %B %Y")
def next_page(self): try: form = self.page.get_form('//form[@id="paginationForm"]') except FormNotFound: return text = CleanText.clean(form.el) m = re.search(u"(\d+) / (\d+)", text or "", flags=re.MULTILINE) if not m: return cur = int(m.group(1)) last = int(m.group(2)) if cur == last: return form["page"] = str(cur + 1) return form.request