Beispiel #1
0
    def process_schedule(self, meal, dayStr):
        if len(meal.xpath('.//address/text()').extract()) == 0:
            return

        address = meal.xpath('.//address/text()').extract()[0].strip()
        hoursStr = meal.xpath(
            './/div[@id="hours"]/text()').extract()[0].strip()

        startTimeStr = hoursStr.split("-")[0].lower().strip()
        endTimeStr = hoursStr.split("-")[1].lower().strip()
        if "am" not in startTimeStr and "pm" not in startTimeStr:
            startTimeStr += endTimeStr[len(endTimeStr) - 2:]
        startTime = self.time_in_seconds(startTimeStr)
        endTime = self.time_in_seconds(endTimeStr)

        item = SchedulesItem()
        item['day'] = DAYS_MAP[dayStr]
        item['address'] = utility.formalize_address(address)
        item['start_time'] = startTime
        item['end_time'] = endTime

        addressParts = item['address'].split(",")
        item['city'] = addressParts[len(addressParts) - 2].strip()
        item['state'] = "CA"
        item['zip'] = addressParts[len(addressParts) - 1].strip().split()[1]

        coordinates = utility.get_coordinate_by_address(item['address'])
        item['latitude'] = coordinates['latitude']
        item['longitude'] = coordinates['longitude']
        item['truck_id'] = 4

        schedule = utility.convert_to_pojo(item)
        schedule.save()

        print item
Beispiel #2
0
    def parse(self, response):

        # transaction begins
        get_db().begin()
        try:
            Schedule.delete().where(Schedule.truck_id == 7).execute()
            for schedule in response.xpath(
                    '//div[@class="rounded-corners clearfix grpelem"]'):
                locationAndTime = schedule.xpath(
                    './/div[@class="content-brown clearfix colelem"]')
                if len(locationAndTime) < 2:
                    continue

                locations = locationAndTime[0].xpath('.//span/text()')

                address = ""
                for loc in locations:
                    address += loc.extract().replace('.', '') + ", "

                address = address[0:len(address) - 2].strip()

                dateTime = locationAndTime[1].xpath('.//span/text()')
                dayStr = dateTime[0].extract()
                timeStr = dateTime[1].extract()

                timestamps = utility.parse_times_pair_string(timeStr)

                startDay = utility.DAYS_MAP[dayStr.split('-')
                                            [0].lower().strip()]
                endDay = utility.DAYS_MAP[dayStr.split('-')[1].lower().strip()]
                for i in range(startDay, endDay + 1):

                    item = SchedulesItem()
                    item['day'] = i
                    item['address'] = address
                    item['start_time'] = timestamps[0]
                    item['end_time'] = timestamps[1]

                    item['city'] = 'San Francisco'
                    item['state'] = "CA"
                    item['zip'] = address.split(' ')[len(address.split(' ')) -
                                                     1].strip()

                    coordinates = utility.get_coordinate_by_address(address)
                    item['latitude'] = coordinates['latitude']
                    item['longitude'] = coordinates['longitude']
                    item['truck_id'] = 7

                    print item

                    schedule = utility.convert_to_pojo(item)
                    schedule.save()

        except:
            get_db().rollback()
        get_db().commit()
Beispiel #3
0
    def parse(self, response):

        # transaction begins
        get_db().begin()
        try:
            Schedule.delete().where(Schedule.truck_id == 6).execute()

            for schedule in response.xpath('//div[@class="slide"]'):
                dates = schedule.xpath('.//h2/text()').extract()[0].strip()
                if len(dates) == 0:
                    continue

                day = DAYS_MAP[dates.split(" ")[1].lower().strip()]
                times = schedule.xpath(
                    './/strong[@class="title"]/text()').extract()
                addresses = schedule.xpath('.//address/text()').extract()

                for i in range(0, len(times)):
                    if '-' not in times[i]:
                        continue

                    time = self.process_times(times[i])
                    startTime = time[0]
                    endTime = time[1]

                    originalAddress = addresses[i]
                    address = utility.formalize_address(originalAddress)
                    coordinates = utility.get_coordinate_by_address(address)
                    latitude = coordinates['latitude']
                    longitude = coordinates['longitude']

                    item = SchedulesItem()
                    item['truck_id'] = 6
                    item['day'] = day
                    item['address'] = address
                    item['start_time'] = startTime
                    item['end_time'] = endTime
                    item['latitude'] = latitude
                    item['longitude'] = longitude
                    item['zip'] = coordinates['zip']

                    addressParts = item['address'].split(",")
                    item['city'] = addressParts[len(addressParts) - 2].strip()
                    item['state'] = "CA"

                    # yield item

                    schedule = utility.convert_to_pojo(item)
                    schedule.save()
        except:
            get_db().rollback()
        get_db().commit()
Beispiel #4
0
    def parse(self, response):
        # transaction begins
        get_db().begin()
        try:
            Schedule.delete().where(Schedule.truck_id == TRUCK_ID).execute()

            for schedule in response.xpath('//div[@id="locations"]/p'):
                timeDateStr = schedule.xpath('.//strong/text()')[0].extract()

                dayStrSearch = re.search('(.*),.*', timeDateStr, re.IGNORECASE)
                dayStr = dayStrSearch.group(1).strip().lower()
                day = utility.FULL_DAYS_MAP[dayStr]

                timeStrSearch = re.search('.*@(.*)', timeDateStr,
                                          re.IGNORECASE)
                timeStr = timeStrSearch.group(1).strip().lower()
                startTime = utility.parse_times_string(timeStr)

                # the website doesn't specify an end time, so put 1pm as a placeholder
                endTime = 13 * 3600

                addrString = schedule.xpath('.//a/text()')[0].extract()
                addrString = addrString.replace('near', 'and')
                addrString += ", San Francisco"

                print addrString

                coords = utility.get_coordinate_by_address(addrString)

                item = SchedulesItem()
                item['day'] = day
                item['latitude'] = coords['latitude']
                item['longitude'] = coords['longitude']
                item['zip'] = coords['zip']
                item['address'] = coords['address']
                item['city'] = 'San Francisco'
                item['state'] = 'CA'
                item['truck_id'] = TRUCK_ID
                item['start_time'] = startTime
                item['end_time'] = endTime

                schedule = utility.convert_to_pojo(item)
                schedule.save()

        except Exception as e:
            print "ERROR: " + e
            get_db().rollback()
        get_db().commit()
Beispiel #5
0
    def parse(self, response):
        time_delimiter = "-"

        # transaction begins
        get_db().begin()

        try:
            Schedule.delete().where(Schedule.truck_id == 1).execute()
            for schedule in response.xpath('//dl[@class="schedule-grid"]'):
                day = schedule.xpath(
                    'dt[@class="date"]/span[@class="day"]/text()').extract()[0]
                times = schedule.xpath('dd[@class="time"]/text()').extract()[0]
                times = times.split(time_delimiter)
                address = schedule.xpath(
                    'dd[@class="place"]/text()').extract()[0]
                city = "San Francisco"
                state = "CA"

                item = SchedulesItem()

                if (times and len(times) == 2):
                    day_num = DAYS_MAP[day.lower()]
                    start_time_str = times[0].strip()
                    end_time_str = times[1].strip()

                    # if start_time is after the end_time, means time is in 12-hour format
                    times_pair = self.time_in_seconds(start_time_str,
                                                      end_time_str)

                    item['start_time'] = times_pair[0]
                    item['end_time'] = times_pair[1]
                    item['address'] = address + ", " + city + ", " + state
                    item['city'] = city
                    item['state'] = state
                    item['day'] = day_num

                    coordinates = utility.get_coordinate_by_address(address)
                    item['latitude'] = coordinates['latitude']
                    item['longitude'] = coordinates['longitude']
                    item['truck_id'] = 1
                    item['zip'] = coordinates['zip']
                    item['address'] += " " + item['zip']

                    schedule = utility.convert_to_pojo(item)
                    schedule.save()
        except:
            get_db().rollback()
        get_db().commit()
Beispiel #6
0
    def parse(self, response):

        # transaction begins
        get_db().begin()
        try:
            Schedule.delete().where(Schedule.truck_id == 2).execute()

            for schedule in response.xpath(
                    '//div[@id="loc-wrap"]/section[contains(@data-wcal-date, "2017")]'
            ):
                day_str = schedule.xpath(
                    'div[@class="row map-row"]/div[@class="span2 date"]/text()'
                ).extract()[0].strip().lower()

                if not day_str:
                    continue
                address = schedule.xpath(
                    'div[@class="row map-row"]/div[@class="span4 location"]/a/@data-ext-map-link'
                ).extract()[0]
                times = schedule.xpath(
                    'div[@class="row map-row"]/div[@class="span1 time"]/span/text()'
                ).extract()
                item = SchedulesItem()
                item['day'] = DAYS_MAP[day_str]
                item['address'] = address
                timeInSeconds = self.get_start_and_end_time(times[0], times[1])
                item['start_time'] = timeInSeconds[0]
                item['end_time'] = timeInSeconds[1]

                addrItems = address.split(",")
                item['city'] = addrItems[len(addrItems) - 2].strip()
                item['state'] = "CA"
                item['zip'] = addrItems[len(addrItems) - 1].strip().split()[1]

                coordinates = utility.get_coordinate_by_address(address)
                item['latitude'] = coordinates['latitude']
                item['longitude'] = coordinates['longitude']
                item['truck_id'] = 2

                schedule = utility.convert_to_pojo(item)
                schedule.save()

        except:
            get_db().rollback()
        get_db().commit()
Beispiel #7
0
        endTime = item['end']['dateTime']
        endTimeArr = endTime.split('-')[0:len(endTime.split('-')) - 1]
        endTime = '-'.join(endTimeArr)

        starttimeObj = datetime.datetime.strptime(startTime,
                                                  '%Y-%m-%dT%H:%M:%S')
        endtimeObj = datetime.datetime.strptime(endTime, '%Y-%m-%dT%H:%M:%S')

        location = item['summary'] + " , San Francisco, CA"

        if 'location' in item:
            location = item['summary']

        address = utility.formalize_address(location)
        coordinates = utility.get_coordinate_by_address(address)
        city = address.split(',')[len(address.split(',')) - 2]

        schedule = Schedule(day=starttimeObj.weekday(),
                            truck_id=5,
                            address=address.strip(),
                            latitude=coordinates['latitude'],
                            longitude=coordinates['longitude'],
                            city=city.strip(),
                            state="CA",
                            zip=coordinates['zip'],
                            start_time=get_time(starttimeObj),
                            end_time=get_time(endtimeObj))

        schedule.save()