def parse_epg(self, response): content_div = response.xpath("//div[@class='schedule']/div[@class='outer-container']/" "div[@class='inner-container']/div[contains(@class,'schedule-row')]") if len(content_div) == 0: raise CloseSpider('no epg info!') day = response.url.split("/")[-1]; nextday = next_day(self.formats, day) for div in content_div: starttime = div.xpath(".//div[contains(@class,'program-time')]/text()").extract()[0].strip() if starttime < "06:00": starttime = trans_format(nextday+starttime, self.formats+"%H:%M") else: starttime = trans_format(day+starttime, self.formats+"%H:%M") title = div.xpath(".//div[contains(@class,'program-info')]/h4").extract()[0].strip() reg = re.compile(r'\<h4.*?\>\s*(?:\<a href.*?\>)*(.*?)(?:\</a\>)*\s*\</h4\>', re.S) s = reg.search(title) title = s.group(1) meta = div.xpath(".//div[contains(@class,'program-info')]/p/text()").extract()[0] meta = meta.split(":")[0].strip() name = "%s %s" % (title, meta) desc = div.xpath(".//div[contains(@class,'program-info')]/div[contains(@class,'program-synopsis')]/p/text()").extract()[0] item = EpgItem() item['name'] = name item['desc'] = desc.strip() item['starttime'] = starttime item['endtime'] = '' yield item
def parse_epg(self, response): json_str = json.loads(response.body) for channel in json_str: if channel['channelName'] == "si4a": programs = channel['channels'] for program in programs: name = program['genre'] starttime = program['date'] + " " + program['start_time'] endtime = program['date'] + " " + program['end_time'] starttime = trans_format(starttime, "%m/%d/%Y %H:%M:%S") endtime = trans_format(endtime, "%m/%d/%Y %H:%M:%S") item = EpgItem() item['name'] = name item['starttime'] = starttime item['endtime'] = endtime item['desc'] = '' yield item
def parse(self, response): program_position = response.xpath("//div[@class='box-container-wrapper']/div[contains(@class,'date-program-wrapper')]") for dates in program_position: date = dates.xpath("./div[@class='box-inner-container-header']/h2/text()").extract()[0][-10:] date = trans_format(date, "%d-%m-%Y", "%Y.%m.%d") programs = dates.xpath("./div[@class='box-inner-container-wrapper']/div") for program in programs: name = program.xpath("./div[@class='title']/h2/text()").extract()[0] times = program.xpath("./div[@class='timing']/time/text()").extract()[0] times = times.split("/")[0][0:7].strip().replace(":", ".") times = time12to24(times) starttime = trans_format("%s %s" % (date, times), "%Y.%m.%d %H:%M") ftime = datetime.datetime.strptime(starttime, "%Y.%m.%d %H:%M:%S") ftime = ftime + datetime.timedelta(hours=8) starttime = ftime.strftime("%Y.%m.%d %H:%M:%S") item = EpgItem() item['name'] = name item['starttime'] = starttime item['endtime'] = '' item['desc'] = '' yield item
def parse_epg(self, response): date_str = response.url[-8:] program_position = response.xpath("//div[@class='schedule_grid ']") reg = re.compile(r"\s\s+") for dates in program_position: program = dates.xpath("./div[@class='schedule_details']") program_time = program.xpath("./p[@class='info']/text()").extract()[0] title = program.xpath("./p[@class='title']/a/text()").extract()[0] try: subtitle = program.xpath("./p[@class='title']/a/span/text()").extract()[0] except IndexError: subtitle = "" except ValueError: subtitle = "" program_time = time12to24(reg.sub("", program_time).replace(" ", "")) starttime = trans_format("%s %s" % (date_str, program_time), self.formats + " %H:%M") item = EpgItem() item["name"] = reg.sub("", title.strip() + subtitle.strip()) item["starttime"] = starttime item["endtime"] = "" item["desc"] = "" yield item