def parse_hours(self, store_hours): opening_hours = OpeningHours() if store_hours is None: return for store_day in store_hours: day = time = open_time = close_time = "" try: (day, time) = store_day.split() except ValueError as e: print("Error in store_day: {}".format(store_day)) if time: (open_time, close_time) = time.split('-') open_time = open_time.replace('24:00', '00:00') close_time = close_time.replace('24:00', '00:00') if open_time is None and close_time is None: continue opening_hours.add_range(day=day, open_time=open_time, close_time=close_time, time_format='%H:%M') return opening_hours.as_opening_hours()
def parse_hours(self, hours): opening_hours = OpeningHours() if hours: hours = hours[0].get("value").split("<br>") or [] for hour in hours: try: day, open_time, close_time = re.search( r'(.*?):\s(.*?)\s-\s(.*?)$', hour).groups() except AttributeError: # closed continue open_time = (datetime.strptime(open_time, '%I:%M %p') if ":" in open_time else datetime.strptime( open_time, '%I %p')).strftime('%H:%M') close_time = (datetime.strptime(close_time, '%I:%M %p') if ":" in close_time else datetime.strptime( close_time, '%I %p')).strftime('%H:%M') if '-' in day: start_day, end_day = day.split('-') start_day = start_day.strip() end_day = end_day.strip() for d in DAY_MAPPING[DAY_MAPPING.index(start_day[:2]): DAY_MAPPING.index(end_day[:2]) + 1]: opening_hours.add_range(day=d, open_time=open_time, close_time=close_time, time_format='%H:%M') return opening_hours.as_opening_hours()
def parse_hours(self, store_hours): opening_hours = OpeningHours() match = re.match(r'(.+?)-(.+?) +(\d.*?)-(.+?) Uhr', store_hours) if match: from_day = match.group(1).strip() to_day = match.group(2).strip() from_time = match.group(3).strip().replace(':','.') to_time = match.group(4).strip().replace(':','.') fhours = int(float(from_time)) fminutes = (float(from_time) * 60) % 60 fmt_from_time = "%d:%02d" % (fhours, fminutes) thours = int(float(to_time)) tminutes = (float(to_time) * 60) % 60 fmt_to_time = "%d:%02d" % (thours, tminutes) for day in range(DAY_MAPPING[from_day], DAY_MAPPING[to_day] + 1): opening_hours.add_range( day=DAY_MAPPING[day], open_time=fmt_from_time, close_time=fmt_to_time, time_format='%H:%M' ) return opening_hours.as_opening_hours()
def parse_hours(self, hours): opening_hours = OpeningHours() for group in hours: if 'closed' in group: continue if 'open 24 hours' in group: days = re.search(r'([a-zA-Z\-]+)\s+open 24 hours', group).groups()[0] open_time, close_time = '00:00:00', '23:59:00' else: try: days, open_time, close_time = re.search( r'([a-zA-Z\-]+)\s+([\d:\sapm]+)-([\d:\sapm]+)', group).groups() except AttributeError: continue # no hours listed, just day try: start_day, end_day = days.split('-') except ValueError: start_day, end_day = days, days for day in DAYS[DAYS.index(start_day):DAYS.index(end_day) + 1]: if 'm' in open_time: open_time = open_time.strip(' apm') + ":00" if 'm' in close_time: close_time = close_time.strip(' apm') + ":00" opening_hours.add_range(day=day, open_time=open_time.strip(), close_time=close_time.strip(), time_format='%H:%M:%S') return opening_hours.as_opening_hours()
def parse_hours(self, restaurant_item): opening_hours = OpeningHours() opening_hours_str = restaurant_item.xpath( './/*[@itemprop="openingHours"]/@content').get() if opening_hours_str: regex = re.compile( r'(.+)\:\s*(\d{1,2}:\d{2} [A|P][M])\s*-\s*(\d{1,2}:\d{2} [A|P][M])', flags=re.IGNORECASE) # Opening hours specifications may look like either of the following: # # Open Daily: 11:00 AM - 9:00 PM # Mon-Thur: 11:00 AM - 8:00 PM,Fri-Sat: 11:00 AM - 9:00 PM,Sunday: 11:00 AM - 8:00 PM # for hours in opening_hours_str.split(','): hours = hours.strip() match = re.search(regex, hours) if match: day_range = match.group(1) open_time = match.group(2) close_time = match.group(3) if close_time == "00:00": close_time = "23:59" for day in self.get_days(day_range): opening_hours.add_range(day, open_time, close_time, time_format="%I:%M %p") return opening_hours.as_opening_hours()
def parse_store(self, response): ldjson = json.loads( response.css('script[type="application/ld+json"]::text').get()) opening_hours = OpeningHours() for spec in ldjson["openingHoursSpecification"]: if not spec.keys() >= {"dayOfWeek", "opens", "closes"}: continue for day in spec["dayOfWeek"]: opening_hours.add_range(day[:2], spec["opens"], spec["closes"]) properties = { "website": response.url, "ref": ldjson["branchCode"], "lat": ldjson["geo"]["latitude"], "lon": ldjson["geo"]["longitude"], "name": ldjson["name"], "phone": ldjson["telephone"], "addr_full": ldjson["address"]["streetAddress"], "city": ldjson["address"]["addressLocality"], "state": ldjson["address"]["addressRegion"], "postcode": ldjson["address"]["postalCode"], "opening_hours": opening_hours.as_opening_hours(), } yield GeojsonPointItem(**properties)
def store_hours(self, response): opening_hours = OpeningHours() weekdays = response for day, hrs in weekdays.items(): if 'closedDays' in day or 'textColor' in day or 'timeZone' in day: continue elif 'CLOSED' in hrs: continue else: try: open, close = hrs.split('-') open = open.strip() close = close.strip() if ':' in open: open_time = datetime.datetime.strptime( open, '%I:%M%p').strftime('%H:%M') else: open_time = datetime.datetime.strptime( open, '%I%p').strftime('%H:%M') if ':' in close: close_time = datetime.datetime.strptime( close, '%I:%M%p').strftime('%H:%M') else: close_time = datetime.datetime.strptime( close, '%I%p').strftime('%H:%M') except ValueError: continue opening_hours.add_range(DAY_MAPPING[day], open_time=open_time, close_time=close_time, time_format='%H:%M') return opening_hours.as_opening_hours()
def parse_location(self, location): hours = json.loads(location.xpath( '//script[@type="text/data"][@class="js-hours-config"]/text()' ).extract_first()) opening_hours = OpeningHours() for row in hours['hours']: day = row['day'][:2].capitalize() for interval in row['intervals']: start_hour, start_minute = divmod(interval['start'], 100) end_hour, end_minute = divmod(interval['end'], 100) start_time = f'{start_hour:02}:{start_minute:02}' end_time = f'{end_hour:02}:{end_minute:02}' opening_hours.add_range(day, start_time, end_time) props = { 'addr_full': location.xpath( '//meta[@itemprop="streetAddress"]/@content').extract_first(), 'lon': float(location.xpath( '//meta[@itemprop="longitude"]/@content').extract_first()), 'lat': float(location.xpath( '//meta[@itemprop="latitude"]/@content').extract_first()), 'city': location.css( 'span.Address-city::text').extract_first(), 'postcode': location.xpath( '//span[@itemprop="postalCode"]/text()').extract_first(), 'state': location.xpath( '//abbr[@itemprop="addressRegion"]/text()').extract_first(), 'phone': location.xpath( '//div[@itemprop="telephone"]/text()').extract_first(), 'ref': location.url, 'website': location.url, 'opening_hours': opening_hours.as_opening_hours() } return GeojsonPointItem(**props)
def process_trading_hours(self, store_hours): opening_hours = OpeningHours() for day in store_hours: if 'NULL' not in day['OpeningTime'] and 'NULL' not in day['ClosingTime']: opening_hours.add_range(DAYS[day['DayOfWeek']], day['OpeningTime'], day['ClosingTime']) return opening_hours.as_opening_hours()
def parse_store(self, response): json_text = response.xpath( '//script[@class="js-map-config"]/text()').get() if json_text is None: # These stores are "opening soon" return js = json.loads(json_text)["entities"][0]["profile"] opening_hours = OpeningHours() for row in js["hours"]["normalHours"]: day = row["day"][:2].capitalize() for interval in row["intervals"]: start_time = "{:02}:{:02}".format( *divmod(interval["start"], 100)) end_time = "{:02}:{:02}".format(*divmod(interval["end"], 100)) opening_hours.add_range(day, start_time, end_time) properties = { "name": js["name"], "addr_full": js["address"]["line1"], "ref": js["meta"]["id"], "website": response.url, "city": js["address"]["city"], "state": js["address"]["region"], "postcode": js["address"]["postalCode"], "country": js["address"]["countryCode"], "opening_hours": opening_hours.as_opening_hours(), "phone": js["mainPhone"]["number"], "lat": response.xpath('//meta[@itemprop="latitude"]/@content').get(), "lon": response.xpath('//meta[@itemprop="longitude"]/@content').get(), } yield GeojsonPointItem(**properties)
def parse_store(self, response): o = OpeningHours() for d in response.xpath('//time[@itemprop="openingHours"]/@datetime').extract(): day, times = d.split(' ', 1) s, f = times.split('-') # They seem to have a bug where they put down 24:00 when they mean noon if s == '24:00': s = '12:00' o.add_range(day, s, f) store_number_results = response.xpath('//dt[@class="lsp_number"]/text()') if store_number_results: ref = store_number_results[-1].extract().strip() yield GeojsonPointItem( lat=response.xpath('//meta[@itemprop="latitude"]/@content').extract_first(), lon=response.xpath('//meta[@itemprop="longitude"]/@content').extract_first(), phone=response.xpath('//p[@itemprop="telephone"]/text()').extract_first(), addr_full=response.xpath('//p[@itemprop="streetAddress"]/text()').extract_first(), city=response.xpath('//p[@itemprop="addressLocality"]/text()').extract_first(), state=response.xpath('//p[@itemprop="addressRegion"]/text()').extract_first(), postcode=response.xpath('//p[@itemprop="postalCode"]/text()').extract_first(), website=response.url, ref=ref, opening_hours=o.as_opening_hours(), )
def parse_hours(self, hours): opening_hours = OpeningHours() for day, times in hours.items(): if times == "CLOSED" or times == "closed" or times == "Closed": pass else: if "-" in times: time = times.split("-") else: time = times.split(" ") open_time = time[0].replace(" ", "") close_time = time[1].replace(" ", "") open_time = datetime.datetime.strptime( open_time, '%I:%M%p').strftime('%H:%M') close_time = datetime.datetime.strptime( close_time, '%I:%M%p').strftime('%H:%M') opening_hours.add_range(day=day, open_time=open_time, close_time=close_time, time_format='%H:%M') return opening_hours.as_opening_hours()
def parse_store(self, response): for ldjson in response.xpath( '//script[@type="application/ld+json"]/text()' ).extract(): data = json.loads(ldjson) if data["@type"] == "ComputerStore": break else: raise ValueError opening_hours = OpeningHours() for spec in data["openingHoursSpecification"]: day = spec["dayOfWeek"][:2] open_time = spec["opens"] + ":00" close_time = spec["closes"] + ":00" opening_hours.add_range(day, open_time, close_time) properties = { "ref": response.url, "name": data["name"], "website": response.url, "phone": data["telephone"], "addr_full": data["address"]["streetAddress"], "city": data["address"]["addressLocality"], "state": data["address"]["addressRegion"], "postcode": data["address"]["postalCode"], "country": data["address"]["addressCountry"], "lat": data["geo"]["latitude"], "lon": data["geo"]["longitude"], "opening_hours": opening_hours.as_opening_hours(), } yield GeojsonPointItem(**properties)
def parse_hours(self, hours): "Sun - Sat 7:00 AM - 10:00 PM" opening_hours = OpeningHours() hours = [h.strip() for h in hours.split(';')] for hour in hours: if hour == "Sun - Sat open 24 hrs": return "24/7" range_match = re.search( r'([A-Za-z]{3})\s-\s([A-Za-z]{3})\s([\d:\sAMP]+)\s-\s([\d:\sAMP]+)', hour) if range_match: start_day, end_day, start_time, end_time = range_match.groups() else: single_match = re.search( r'([A-Za-z]{3})\s([\d:\sAMP]+)\s-\s([\d:\sAMP]+)', hour) if not single_match: continue start_day, start_time, end_time = single_match.groups() end_day = start_day for day in DAYS[DAYS.index(start_day):DAYS.index(end_day) + 1]: opening_hours.add_range(day=DAY_MAPPING[day], open_time=start_time.strip(), close_time=end_time.strip(), time_format='%I:%M %p') return opening_hours.as_opening_hours()
def store_hours(self, store_hours): o = OpeningHours() for day in ('monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'): hours = store_hours.get(day) d = day[:2] d = d.title() if not hours: continue try: m = re.match( r'((1[0-2]|0?[1-9]):([0-5][0-9]) ?([AaPp][Mm]))-((1[0-2]|0?[1-9]):([0-5][0-9]) ?([AaPp][Mm]))', hours) try: open = datetime.datetime.strptime( m.group(1), '%I:%M%p').strftime('%H:%M') close = datetime.datetime.strptime( m.group(5), '%I:%M%p').strftime('%H:%M') except: continue o.add_range(d, open_time=open, close_time=close, time_format='%H:%M') except AttributeError: continue return o.as_opening_hours()
def parse_hours(self, data): """ Eg: { "MonHours": "4:00PM-10:00PM", "TueHours": "4:00PM-10:00PM", "WedHours": "4:00PM-10:00PM", "ThuHours": "4:00PM-10:00PM", "FriHours": "4:00PM-11:00PM", "SatHours": "11:00AM-11:00PM", "SunHours": "11:00AM-10:00PM" } :param hours: :return: """ hours = OpeningHours() for k, v in DAY_MAPPING.items(): open, close = data[k].split('-') if not open or not close: continue hours.add_range(day=v, open_time=self.convert_to_24hr(open), close_time=self.convert_to_24hr(close)) return hours.as_opening_hours()
def add_details(self, response): properties = response.meta['item_properties'] opening_hours = OpeningHours() details_html = json.loads(response.body_as_unicode()).get('d') details = scrapy.Selector(text=details_html) days = details.css('.hours th::text').getall() hours = details.css('.hours td::text').getall() for day, hours in zip(days, hours): (open_time, close_time) = hours.split() opening_hours.add_range(day=day[0:2], open_time=f"{open_time}M", close_time=f"{close_time}M", time_format='%I:%M%p') properties.update({ 'phone': details.css('.phone::text').get().replace('Phone: ', ''), 'opening_hours': opening_hours.as_opening_hours() }) yield GeojsonPointItem(**properties)
def parse(self, response): for location in json.loads(response.body_as_unicode()): opening_hours = OpeningHours() for key, val in location.items(): if not key.endswith('_Hours'): continue day = key[:2].capitalize() if val == '24 hours': open_time = close_time = '12 AM' else: open_time, close_time = val.split(' to ') opening_hours.add_range(day, open_time, close_time, '%I %p') yield GeojsonPointItem( ref=location['Store_ID'], lon=location['Longitude'], lat=location['Latitude'], # name as shown on the Sunoco site name=f"Sunoco #{location['Store_ID']}", addr_full=location['Street_Address'], city=location['City'], state=location['State'], postcode=location['Postalcode'], country='US', phone=location['Phone'], opening_hours=opening_hours.as_opening_hours(), extras={ 'amenity:fuel': True, 'atm': location['ATM'] == 'Y', 'car_wash': location['CarWash'], 'fuel:diesel': location['HasDiesel'] == 'Y', 'fuel:kerosene': location['HasKero'] == 'Y' })
def parse_hours(self, hours): opening_hours = OpeningHours() groups = re.findall(rf'(({"|".join(DAYS)}) \S+ - \S+)', hours) for (g, _) in groups: day, open, _, close = g.split() opening_hours.add_range(day, open, close) return opening_hours.as_opening_hours()
def parse_hours(self, data): """ Eg: { "MonHours": "4:00PM-10:00PM", "TueHours": "4:00PM-10:00PM", "WedHours": "4:00PM-10:00PM", "ThuHours": "4:00PM-10:00PM", "FriHours": "4:00PM-11:00PM", "SatHours": "11:00AM-11:00PM", "SunHours": "11:00AM-10:00PM" } :param hours: :return: """ hours = OpeningHours() for k, v in DAY_MAPPING.items(): open, close = data[k].split('-') if not open or not close: continue hours.add_range(day=v, open_time=self.convert_to_24hr(open), close_time=self.convert_to_24hr(close)) return hours.as_opening_hours()
def parse_hours(self, hours): opening_hours = OpeningHours() for hour in hours: hour = hour.strip() if hour == "Hours of Operation:": continue try: day, open_time, close_time = re.search( r'(.*?):\s(.*?)\s-\s(.*?)$', hour).groups() except AttributeError: # closed continue open_time = open_time.replace('.', '') close_time = close_time.replace('.', '') open_time = (datetime.strptime(open_time, '%I:%M %p') if ":" in open_time else datetime.strptime( open_time, '%I %p')).strftime('%H:%M') close_time = (datetime.strptime(close_time, '%I:%M %p') if ":" in close_time else datetime.strptime( close_time, '%I %p')).strftime('%H:%M') opening_hours.add_range(day=day[:2], open_time=open_time, close_time=close_time, time_format='%H:%M') return opening_hours.as_opening_hours()
def parse_store(self, response): data = json.loads( response.xpath('//script[@type="application/ld+json"]/text()'). extract_first()) if not data or isinstance(data, dict): return data = data[0] o = OpeningHours() for h in response.xpath('//span[@class="daypart"]'): d = h.xpath('@data-daypart').extract_first()[:2] open_time = h.xpath( '//span[@class="time-open"]/text()').extract_first() close_time = h.xpath( '//span[@class="time-close"]/text()').extract_first() o.add_range(d, open_time, close_time, '%I:%M%p') yield GeojsonPointItem( lat=float(data['geo']['latitude']), lon=float(data['geo']['longitude']), website=response.xpath( '//head/link[@rel="canonical"]/@href').extract_first(), ref=response.xpath( '//head/link[@rel="canonical"]/@href').extract_first(), opening_hours=o.as_opening_hours(), addr_full=data.get('address', {}).get('streetAddress'), city=data.get('address', {}).get('addressLocality'), state=data.get('address', {}).get('addressRegion'), postcode=data.get('address', {}).get('postalCode'), country=data.get('address', {}).get('addressCountry'), phone=data.get('address', {}).get('telephone'), )
def parse_hours(self, hours): opening_hours = OpeningHours() for hour in hours: day, open_time, close_time = hour.split('|') opening_hours.add_range(day=day[:2], open_time=open_time, close_time=close_time) return opening_hours.as_opening_hours()
def parse_store(self, response): for ldjson in response.xpath( '//script[@type="application/ld+json"]/text()' ).extract(): data = json.loads(ldjson) if data["@type"] != "GroceryStore": continue opening_hours = OpeningHours() for spec in data["openingHoursSpecification"]: opening_hours.add_range( spec["dayOfWeek"][:2], spec["opens"], spec["closes"] ) properties = { "ref": response.url, "website": response.url, "name": data["name"], "phone": data["telephone"], "lat": data["geo"]["latitude"], "lon": data["geo"]["longitude"], "addr_full": data["address"]["streetAddress"], "city": data["address"]["addressLocality"], "postcode": data["address"]["postalCode"], "country": data["address"]["addressCountry"], "opening_hours": opening_hours.as_opening_hours(), } yield GeojsonPointItem(**properties)
def parse(self, response): results = response.body_as_unicode() locations = json.loads(results) for data in locations: properties = { 'ref': data['location']['id'], 'name': data['location']['name'], 'lat': data['location']['latitude'], 'lon': data['location']['longitude'], 'addr_full': data['location']['street_address'], 'city': data['location']['locality'], 'state': data['location']['region'], 'postcode': data['location']['postal_code'], 'phone': data['location']['phone'], 'website': 'https://www.potbelly.com/stores/%s' % data['location']['id'], } hours = json.loads(data['location']['meta']['open_hours']) if hours: oh = OpeningHours() for d, v in hours.items(): for r in v: open_time = r['opens_at'] close_time = r['closes_at'] oh.add_range(d[:2], open_time, close_time, '%H:%M:%S') properties['opening_hours'] = oh.as_opening_hours() yield GeojsonPointItem(**properties)
def parse_hours(self, elem): days = elem.xpath('.//dt/text()').extract() hours = elem.xpath('.//dd/text()').extract() opening_hours = OpeningHours() for d, h in zip(days, hours): day = DAY_MAPPING[d.replace(':', '')] try: open_time, close_time = h.split('-') except ValueError: continue if ':' in open_time: open_time = datetime.datetime.strptime( open_time, '%I:%M%p').strftime('%H:%M') else: open_time = datetime.datetime.strptime( open_time, '%I%p').strftime('%H:%M') if ':' in close_time: close_time = datetime.datetime.strptime( close_time, '%I:%M%p').strftime('%H:%M') else: close_time = datetime.datetime.strptime( close_time, '%I%p').strftime('%H:%M') opening_hours.add_range(day=day, open_time=open_time, close_time=close_time) return opening_hours.as_opening_hours()
def get_opening_hours(self, store): o = OpeningHours() for day in store["coreOpeningHours"]: o.add_range( day["day"][:2], day["open"], day["close"].replace("24:00", "23:59") ) return o.as_opening_hours()
def parse_ca_store(self, response): properties = response.meta["properties"] data = json.loads(response.body_as_unicode())["data"] hours = scrapy.Selector( text=data).xpath('//span[@class="hours"]/text()').extract() pattern = re.compile(r'([a-z]+)\s*:\s*(.*)', re.IGNORECASE) opening_hours = OpeningHours() for hour in hours: try: hour = hour.strip() day, open_close = re.search(pattern, hour).groups() if open_close == "Closed": continue open_time, close_time = open_close.split('-') opening_hours.add_range(day=day[:2], open_time=open_time, close_time=close_time, time_format='%I:%M%p') except: continue opening_hours = opening_hours.as_opening_hours() if opening_hours: properties["opening_hours"] = opening_hours yield GeojsonPointItem(**properties)
def parse_store(self, response, city, state, address, phone): opening_hours = OpeningHours() store_hours = response.xpath( '//div[@class="workspacearea"]/div/div/p/text()').extract()[2:] for hours in store_hours: day, time = hours.strip().split(':') if day != 'Sun': time_range = time.split('-') if time_range[0] != 'Closed': opening_hours.add_range( day=day[:2], open_time=time_range[0].strip() + ":00", close_time=time_range[1].strip() + ":00") store_coordinates = response.xpath('//script/text()').re_first( 'lat .*[\n].*').split(';')[:2] properties = { 'addr_full': address, 'city': city, 'phone': phone, 'state': state, 'lat': store_coordinates[0].split('"')[1], 'lon': store_coordinates[1].split('"')[1], 'opening_hours': opening_hours.as_opening_hours(), 'ref': re.search(r'.+/(.+?)/?(?:\.inc|$)', response.url).group(1) } yield GeojsonPointItem(**properties)
def parse_hours(self, response): opening_hours = OpeningHours() store = response.xpath('//*[@itemtype="http://schema.org/Store"]') if (store): # If holiday hours are currently in effect, you may see also see holiday openingHours specified # in the HTML, but they should appear outside of the <div class="Core-hours" />. all_hours = store.xpath( '//*/div[@class="Core-hours"]//*[@itemprop="openingHours"]') regex = re.compile( r'(Su|Mo|Tu|We|Th|Fr|Sa)\s+(\d{2}:\d{2})\s*-(\d{2}:\d{2})') for hours in all_hours: hours_str = hours.get().strip() match = re.search(regex, hours_str) if match: day_of_week = match.group(1) open_time = match.group(2) close_time = match.group(3) if (close_time == "00:00"): close_time = "23:59" opening_hours.add_range(day=day_of_week, open_time=open_time, close_time=close_time) return opening_hours.as_opening_hours()
def store_hours(self, hours): opening_hours = OpeningHours() for i in range(0, len(hours), 2): try: day_ranges = hours[i] times = hours[i+1].replace("Noon", "12:00 p.m.").replace('.', '') if times == 'Closed': continue open_time, close_time = re.search(r'([\d:]+\s[apm]+)\s-\s([\d:]+\s[apm]+)', times).groups() for day_range in day_ranges.split(','): if '-' in day_range: start_day, end_day = day_range.split(' - ') else: start_day, end_day = day_range, day_range start_day = start_day.strip(' :') end_day = end_day.strip(' :') for day in DAY_MAPPING[DAY_MAPPING.index(start_day):DAY_MAPPING.index(end_day)]: opening_hours.add_range(day=day[:2], open_time=open_time, close_time=close_time, time_format='%I:%M %p') except: continue return opening_hours.as_opening_hours()
def parse_hours(self, hours): opening_hours = OpeningHours() for hour in hours: opening_hours.add_range(day=DAY_MAPPING[hour["dayOfWeek"]], open_time=hour["opens"], close_time=hour["closes"]) return opening_hours.as_opening_hours()
def parse_hours(self, hours): opening_hours = OpeningHours() for hour in hours: day, hours = re.search(r'([a-z]{2})\s(.*)', hour, re.IGNORECASE).groups() if hours == 'Closed': continue open_time, close_time = hours.split('-') opening_hours.add_range(day, open_time=open_time, close_time=close_time) return opening_hours.as_opening_hours()
def parse_hours(self, elements): opening_hours = OpeningHours() for item in elements: day, open_time, close_time = re.search( r'([a-z]{3}):.([0-9:\sAPM]+)\s-\s([0-9:\sAPM]+)', item, flags=re.IGNORECASE).groups() opening_hours.add_range(day=day[0:2], open_time=datetime.datetime.strptime(open_time, '%I:%M %p').strftime('%H:%M'), close_time=datetime.datetime.strptime(close_time, '%I:%M %p').strftime('%H:%M')) return opening_hours.as_opening_hours()
def parse_hours(self, hours): opening_hours = OpeningHours() for i in range(0, len(hours), 2): day = hours[i] open_time, close_time = hours[i+1].split('-') opening_hours.add_range(day=day[:2], open_time=open_time.strip(), close_time=close_time.strip(), time_format='%I:%M %p') return opening_hours.as_opening_hours()
def test_twentyfour_seven(): o = OpeningHours() o.add_range('Mo', '0:00', '23:59') o.add_range('Tu', '0:00', '23:59') o.add_range('We', '0:00', '23:59') o.add_range('Th', '0:00', '23:59') o.add_range('Fr', '0:00', '23:59') o.add_range('Sa', '0:00', '23:59') o.add_range('Su', '0:00', '23:59') assert o.as_opening_hours() == '24/7'
def parse_hours(self, response): days = response.xpath('//meta[@property="business:hours:day"]/@content').extract() start_times = response.xpath('//meta[@property="business:hours:start"]/@content').extract() end_times = response.xpath('//meta[@property="business:hours:end"]/@content').extract() opening_hours = OpeningHours() for day, open_time, close_time in zip(days, start_times, end_times): opening_hours.add_range(day=DAY_MAPPING[day], open_time=open_time, close_time=close_time) return opening_hours.as_opening_hours()
def parse_hours(self, hours): "1,1030,2200;2,1030,2200;3,1030,2200;4,1030,2200;5,1030,2300;6,1030,2300;7,1030,2200;" opening_hours = OpeningHours() if hours: days = [day for day in hours.split(';') if day] for day in days: day, from_hr, to_hr = day.split(',') opening_hours.add_range(day=NUMBER_DAY[day], open_time=from_hr, close_time=to_hr, time_format='%H%M') return opening_hours.as_opening_hours()
def parse_hours(self, days): opening_hours = OpeningHours() days = json.loads(days) for day in days: if not day["intervals"]: continue opening_hours.add_range(day=DAY_MAPPING[day["day"]], open_time=str(day["intervals"][0]["start"]), close_time=str(day["intervals"][0]["end"]), time_format="%H%M") return opening_hours.as_opening_hours()
def normalize_hours(self, hours): o = OpeningHours() for hour in hours: if hour.get('holidayHoursIsRegular') == False: continue short_day = hour['day'].title()[:2] for r in hour['intervals']: open_time = '%04d' % r['start'] close_time = '%04d' % r['end'] o.add_range(short_day, open_time, close_time, '%H%M') return o.as_opening_hours()
def parse_store(self, response): elem = response.xpath('//div[contains(@class, "our-hospitals-location")]') script_body = ' '.join(elem.xpath('.//script/text()').extract()) match = re.search(r'.*google.maps.LatLng\(([0-9.-]+),\s([0-9.-]+)\)', script_body) lat, lon = match.groups() # use last 3 elements of the store url as unique identifier (store number does not appear to be unique) ref = "_".join(urllib.parse.urlsplit(response.url).path.split('/')[-3:]) number = elem.xpath('//div[@class="vcard"]/p[@id="hospitalAddressHospitalNumber"]/text()').extract_first() number = re.search(r'Hospital\sNumber:\s+(\d+)', number).group(1) properties = { 'name': elem.xpath('//div[@class="vcard"]/p[@class="fn"]/text()').extract_first(), 'addr_full': elem.xpath('//div[@class="vcard"]/span[@class="street-address"]/text()').extract_first(), 'phone': elem.xpath('//div[@class="vcard"]/p[@id="hospitalAddressPhone"]/text()').extract_first(), 'city': elem.xpath('//div[@class="vcard"]/span[@class="region"]/text()').extract_first(), 'state': elem.xpath('//div[@class="vcard"]/span[@class="state"]/text()').extract_first(), 'postcode': elem.xpath('//div[@class="vcard"]/span[@class="postal-code"]/text()').extract_first(), 'ref': ref, 'website': response.url, 'lat': lat, 'lon': lon, 'extras': { 'number': number } } days = elem.xpath('//div[@class="hours"]/div[contains(@class, "day")]/@content').extract() opening_hours = OpeningHours() for d in days: match = re.search(r'([A-Za-z]{2})\s([\d:]+)-([\d:]+)', d) if match: day, open, close = match.groups() opening_hours.add_range(day=day, open_time=open, close_time=close) hours = opening_hours.as_opening_hours() if hours and hours != 'Mo-Su ': properties['opening_hours'] = hours yield GeojsonPointItem(**properties)
def parse_hours(self, hours): o = OpeningHours() for h in hours: day = DAY_MAPPING[h['DayNumber']] open = h['Range'].get('Open') close = h['Range'].get('Close') if h['IsOpenedAllDay']: open = '0:00' close = '23:59' elif h['IsClosedAllDay']: continue if open and close: o.add_range(day=day, open_time=open, close_time=close) return o.as_opening_hours()
def test_mixed_ranges(): o = OpeningHours() o.add_range('Mo', '08:00', '17:00') o.add_range('Tu', '08:00', '17:00') o.add_range('We', '09:00', '18:00') o.add_range('Th', '09:00', '18:00') o.add_range('Fr', '07:00', '17:00') o.add_range('Su', '09:00', '17:00') assert o.as_opening_hours() == "Mo-Tu 08:00-17:00; We-Th 09:00-18:00; Fr 07:00-17:00; Su 09:00-17:00"
def test_closed_sunday(): o = OpeningHours() o.add_range('Mo', '07:00', '17:00') o.add_range('Tu', '07:00', '17:00') o.add_range('We', '07:00', '17:00') o.add_range('Th', '07:00', '17:00') o.add_range('Fr', '07:00', '17:00') o.add_range('Sa', '07:00', '17:00') assert o.as_opening_hours() == "Mo-Sa 07:00-17:00"
def test_two_ranges(): o = OpeningHours() o.add_range('Mo', '07:00', '17:00') o.add_range('Tu', '07:00', '17:00') o.add_range('We', '07:00', '17:00') o.add_range('Fr', '08:00', '17:00') o.add_range('Sa', '08:00', '17:00') assert o.as_opening_hours() == "Mo-We 07:00-17:00; Fr-Sa 08:00-17:00"
def test_closed_tuesday(): o = OpeningHours() o.add_range('Mo', '07:00', '17:00') o.add_range('We', '07:00', '17:00') o.add_range('Th', '07:00', '17:00') o.add_range('Fr', '07:00', '17:00') o.add_range('Sa', '07:00', '17:00') o.add_range('Su', '07:00', '17:00') assert o.as_opening_hours() == "Mo 07:00-17:00; We-Su 07:00-17:00"
def parse_hours(self, elements): opening_hours = OpeningHours() days = elements.xpath('//span[@itemprop="dayOfWeek"]/text()').extract() today = (set(day_mapping) - set(days)).pop() days.remove('TODAY') days.insert(0,today) open_hours = elements.xpath('//div[@class="store-hours"]/time[@itemprop="opens"]/@content').extract() close_hours = elements.xpath('//div[@class="store-hours"]/time[@itemprop="closes"]/@content').extract() store_hours = dict((z[0],list(z[1:])) for z in zip(days, open_hours, close_hours)) for day, hours in store_hours.items(): if 'CLOSED' in hours: continue opening_hours.add_range(day=day_mapping[day], open_time=convert_24hour(hours[0]), close_time=convert_24hour(hours[1])) return opening_hours.as_opening_hours()
def parse_hours(self, items): opening_hours = OpeningHours() for day in items: open_time = day["Open"] close_time = day["Close"] if close_time == 'Closed' or open_time == 'Closed': continue elif close_time == 'Open 24 Hrs' or open_time == 'Open 24 Hrs': open_time = '12:00 AM' close_time = '12:00 AM' elif close_time == 'Open for Special Events': continue opening_hours.add_range(day=day["Day"][:2], open_time=open_time, close_time=close_time, time_format='%I:%M %p') return opening_hours.as_opening_hours()
def parse(self, response): shops = json.loads(response.text) for shop in shops['records']: o = OpeningHours() for d, ranges in shop['hoursOfOperation'].items(): for r in ranges: o.add_range(d[:2], r[0], r[1]) yield GeojsonPointItem( website=shop['website'], ref=shop['branch'], opening_hours=o.as_opening_hours(), phone=shop['phone'], addr_full=shop['address'], postcode=shop['postalCode'], city=shop['city'], state=shop['province'], country=shop['country'], lat=shop['geo'][1], lon=shop['geo'][0], )
def parse_hours(self, days): opening_hours = OpeningHours() for day in days: day = day.strip('\n ') if not day: continue parts = day.split(':') weekday = parts[0] hours = ":".join(parts[1:]) try: open, close = hours.split('-') if (not open.strip()) or (open.strip() == 'Closed'): continue if close.strip() in ('24:00', '23:59'): # two oddball banks continue opening_hours.add_range(day=weekday[:2], open_time="{} AM".format(open.strip()), close_time="{} PM".format(close.strip()), time_format="%I:%M %p") except: continue return opening_hours.as_opening_hours()
def parse_store(self, response): properties = { 'ref': response.url, 'lat': response.xpath('//div[@class="map-container"]/div/@data-latitude').extract_first(), 'lon': response.xpath('//div[@class="map-container"]/div/@data-longitude').extract_first(), 'phone': response.xpath('//a[@class="phone-link"]/span/text()').extract_first(), 'addr_full': response.xpath('//span[@itemprop="streetAddress"]/text()').extract_first().strip(), 'name': response.xpath('//meta[@itemprop="name legalName"]/@content').extract_first(), 'city': response.xpath('//span[@itemprop="addressLocality"]/text()').extract_first()[:-1], 'state': response.xpath('//span[@itemprop="addressRegion"]/text()').extract_first().strip(), 'postcode': response.xpath('//span[@itemprop="postalCode"]/text()').extract_first().strip(), } o = OpeningHours() for h in response.css('#LocalMapAreaOpenHourBanner li.h-day'): day = h.xpath('em/span/text()').extract_first().strip()[:2] day_range = h.xpath('em/text()').extract_first().strip(':').strip() open_time, close_time = day_range.split(' - ') o.add_range(day, open_time, close_time, '%I:%M %p') properties['opening_hours'] = o.as_opening_hours() yield GeojsonPointItem(**properties)
def parse_hours(self, elements): opening_hours = OpeningHours() location_hours = elements.xpath('//div[@class="c-location-hours-details-wrapper js-location-hours"]/@data-days').extract_first() location_hours = json.loads(location_hours) for weekday in location_hours: if not weekday['intervals']: continue open_time = str(weekday['intervals'][0]['start']) open_time = open_time[0:2]+":"+open_time[2:4] close_time = str(weekday['intervals'][0]['end']) if close_time in {'0','0000','2400'}: close_time = "23:59" else: close_time = close_time[0:2]+":"+close_time[2:4] opening_hours.add_range(day = day_mapping[weekday['day']], open_time = open_time, close_time = close_time) return opening_hours.as_opening_hours()
def parse_hours(self, hours): "Sun - Sat 7:00 AM - 10:00 PM" opening_hours = OpeningHours() hours = [h.strip() for h in hours.split(';')] for hour in hours: if hour == "Sun - Sat open 24 hrs": return "24/7" range_match = re.search(r'([A-Za-z]{3})\s-\s([A-Za-z]{3})\s([\d:\sAMP]+)\s-\s([\d:\sAMP]+)', hour) if range_match: start_day, end_day, start_time, end_time = range_match.groups() else: single_match = re.search(r'([A-Za-z]{3})\s([\d:\sAMP]+)\s-\s([\d:\sAMP]+)', hour) if not single_match: continue start_day, start_time, end_time = single_match.groups() end_day = start_day for day in DAYS[DAYS.index(start_day):DAYS.index(end_day)+1]: opening_hours.add_range(day=DAY_MAPPING[day], open_time=start_time.strip(), close_time=end_time.strip(), time_format='%I:%M %p') return opening_hours.as_opening_hours()
def parse_hours(self, elem): days = elem.xpath('.//dt/text()').extract() hours = elem.xpath('.//dd/text()').extract() opening_hours = OpeningHours() for d, h in zip(days, hours): day = DAY_MAPPING[d.replace(':', '')] try: open_time, close_time = h.split('-') except ValueError: continue if ':' in open_time: open_time = datetime.datetime.strptime(open_time, '%I:%M%p').strftime('%H:%M') else: open_time = datetime.datetime.strptime(open_time, '%I%p').strftime('%H:%M') if ':' in close_time: close_time = datetime.datetime.strptime(close_time, '%I:%M%p').strftime('%H:%M') else: close_time = datetime.datetime.strptime(close_time, '%I%p').strftime('%H:%M') opening_hours.add_range(day=day, open_time=open_time, close_time=close_time) return opening_hours.as_opening_hours()
def parse_hours(self, elem): opening_hours = OpeningHours() day = elem.xpath('.//dt/text()').extract() times = elem.xpath('.//dd/text()').extract() for day, times in zip(day, times): if times == "Closed": continue start_time, end_time = times.split(' - ') if start_time == 'Noon': start_time = '12:00 PM' if end_time == 'Noon': end_time = '12:00 PM' if '-' in day: days = list(DAY_MAPPING.keys()) start_day, end_day = day.split(' - ') for i in days[days.index(start_day):days.index(end_day) + 1]: opening_hours.add_range(day=DAY_MAPPING[i], open_time=start_time, close_time=end_time, time_format='%I:%M %p') elif ',' in day: days = list(DAY_MAPPING.keys()) start_day, end_day = day.split(', ') for i in days[days.index(start_day):days.index(end_day) + 1]: opening_hours.add_range(day=DAY_MAPPING[i], open_time=start_time, close_time=end_time, time_format='%I:%M %p') else: opening_hours.add_range(day=DAY_MAPPING[day], open_time=start_time, close_time=end_time, time_format='%I:%M %p') return opening_hours.as_opening_hours()
def parse_hours(self, elements): opening_hours = OpeningHours() for elem in elements: day = elem.xpath('.//td[@class="c-location-hours-details-row-day"]/text()').extract_first() intervals = elem.xpath('.//td[@class="c-location-hours-details-row-intervals"]') if intervals.xpath('./text()').extract_first() == "Closed": continue if intervals.xpath('./span/text()').extract_first() == "Open 24 hours": opening_hours.add_range(day=DAY_MAPPING[day], open_time='0:00', close_time='23:59') else: start_time = elem.xpath( './/span[@class="c-location-hours-details-row-intervals-instance-open"]/text()').extract_first() end_time = elem.xpath( './/span[@class="c-location-hours-details-row-intervals-instance-close"]/text()').extract_first() opening_hours.add_range(day=DAY_MAPPING[day], open_time=datetime.datetime.strptime(start_time, '%H:%M %p').strftime('%H:%M'), close_time=datetime.datetime.strptime(end_time, '%H:%M %p').strftime('%H:%M')) return opening_hours.as_opening_hours()
def test_no_opening_hours(): o = OpeningHours() assert o.as_opening_hours() == ''