def parseSeries(self, data): serieslist = [] for line in json.loads(data): id = line['id'] idname = line['value'] splog(id, idname) if not idname.endswith("/person"): serieslist.append( ( id, idname ) ) serieslist.reverse() return serieslist
def parseSeries(self, data): serieslist = [] for line in json.loads(data): id = line['id'] idname = line['value'] splog(id, idname) if not idname.endswith("/person"): serieslist.append((id, idname)) serieslist.reverse() return serieslist
def getSeries(self, name): #url = SERIESLISTURL + urlencode({ 'q' : re.sub("[^a-zA-Z0-9-*]", " ", name) }) url = SERIESLISTURL + urlencode({'q': name}) data = self.getPage(url) if data and isinstance(data, basestring): data = self.parseSeries(data) self.doCacheList(url, data) if data and isinstance(data, list): splog("WunschlisteFeed ids", data) return self.filterKnownIds(data)
def getSeries(self, name): parameter = urlencode({'term': re.sub("[^a-zA-Z0-9*]", " ", name)}) url = SERIESLISTURL + parameter data = self.getPage(url, Headers) if data and isinstance(data, basestring): data = self.parseSeries(data) self.doCacheList(url, data) if data and isinstance(data, list): splog("Fernsehserien ids", data) return self.filterKnownIds(data)
def parseSeries(self, data): serieslist = [] for line in data.splitlines(): values = line.split("|") if len(values) == 4: idname, countryyear, id, temp = values splog(id, idname) serieslist.append( (id, idname) ) else: splog("WunschlisteFeed: ParseError: " + str(line)) serieslist.reverse() return serieslist
def getSeries(self, name): #url = SERIESLISTURL + urlencode({ 'q' : re.sub("[^a-zA-Z0-9-*]", " ", name) }) url = SERIESLISTURL + urlencode({ 'q' : name }) data = self.getPage( url ) if data and isinstance(data, basestring): data = self.parseSeries(data) self.doCacheList(url, data) if data and isinstance(data, list): splog("WunschlisteFeed ids", data) return self.filterKnownIds(data)
def getSeries(self, name): parameter = urlencode({ 'term' : re.sub("[^a-zA-Z0-9*]", " ", name) }) url = SERIESLISTURL + parameter data = self.getPage(url) if data and isinstance(data, basestring): data = self.parseSeries(data) self.doCacheList(url, data) if data and isinstance(data, list): splog("Fernsehserien ids", data) return self.filterKnownIds(data)
def parseSeries(self, data): serieslist = [] for line in data.splitlines(): values = line.split("|") if len(values) == 4: idname, countryyear, id, temp = values splog(id, idname) serieslist.append((id, idname)) else: splog("WunschlisteFeed: ParseError: " + str(line)) serieslist.reverse() return serieslist
def getEpisode(self, name, begin, end=None, service=None): # On Success: Return a single season, episode, title tuple # On Failure: Return a empty list or String or None self.begin = begin self.end = end self.service = service self.knownids = [] self.returnvalue = None # Check preconditions if not name: splog(_("Skip Wunschliste: No show name specified")) return _("Skip Wunschliste: No show name specified") if not begin: splog(_("Skip Wunschliste: No begin timestamp specified")) return _("Skip Wunschliste: No begin timestamp specified") splog("WunschlisteFeed getEpisode") while name: ids = self.getSeries(name) while ids: idserie = ids.pop() if idserie and len(idserie) == 2: id, idname = idserie # Handle encodings self.series = str_to_utf8(idname) result = self.getNextPage( id ) if result: return result else: name = self.getAlternativeSeries(name) else: return ( self.returnvalue or _("No matching series found") )
def getEpisode(self, name, begin, end=None, service=None): # On Success: Return a single season, episode, title tuple # On Failure: Return a empty list or String or None self.begin = begin self.end = end self.service = service self.knownids = [] self.returnvalue = None # Check preconditions if not name: splog(_("Skip Wunschliste: No show name specified")) return _("Skip Wunschliste: No show name specified") if not begin: splog(_("Skip Wunschliste: No begin timestamp specified")) return _("Skip Wunschliste: No begin timestamp specified") splog("WunschlisteFeed getEpisode") while name: ids = self.getSeries(name) while ids: idserie = ids.pop() if idserie and len(idserie) == 2: id, idname = idserie # Handle encodings self.series = str_to_utf8(idname) result = self.getNextPage(id) if result: return result else: name = self.getAlternativeSeries(name) else: return (self.returnvalue or _("No matching series found"))
def str_to_utf8(s): # Convert a byte string with unicode escaped characters splog("WL: str_to_utf8: s: ", repr(s)) #unicode_str = s.decode('unicode-escape') #splog("WL: str_to_utf8: s: ", repr(unicode_str)) ## Python 2.x can't convert the special chars nativly #utf8_str = utf8_encoder(unicode_str)[0] #splog("WL: str_to_utf8: s: ", repr(utf8_str)) #return utf8_str #.decode("utf-8").encode("ascii", "ignore") if type(s) == unicode: # Default shoud be here try: s = s.encode('utf-8') splog("WL: str_to_utf8 encode utf8: s: ", repr(s)) except: s = s.encode('utf-8', 'ignore') splog("WL: str_to_utf8 except encode utf8 ignore: s: ", repr(s)) else: try: s = s.decode('utf-8') splog("WL: str_to_utf8 decode utf8: s: ", repr(s)) except: try: s = unicode(s, 'ISO-8859-1') s = s.encode('utf-8') splog("WL: str_to_utf8 decode ISO-8859-1: s: ", repr(s)) except: try: s = unicode(s, 'cp1252') s = s.encode('utf-8') splog("WL: str_to_utf8 decode cp1252: s: ", repr(s)) except: s = unicode(s, 'ISO-8859-1', 'ignore') s = s.encode('utf-8') splog("WL: str_to_utf8 decode ISO-8859-1 ignore: s: ", repr(s)) s = s.replace('\xe2\x80\x93', '-').replace('\xe2\x80\x99', "'").replace('\xc3\x9f', 'ß') return CompiledRegexpNonASCII.sub('', s)
def parseNextPage(self, data): trs = [] #parser = FSParser() #parser.feed(data) #return parser.list # Handle malformed HTML issues data = data.replace('\\"','"') # target=\"_blank\" data = data.replace('\'+\'','') # document.write('<scr'+'ipt soup = BeautifulSoup(data) div = soup.find('div', 'gray-bar-header nicht-nochmal') if div and div.string: year = div.string[-4:] splog( "FS year by div", year) else: year = self.year splog( "FS year not found", year) table = soup.find('table', 'sendetermine') if table: for trnode in table.find_all('tr'): # TODO skip first header row tdnodes = trnode and trnode.find_all('td') if tdnodes: # Filter for known rows if len(tdnodes) >= 11: # >= 6 and tdnodes[COL_DATE].string and len(tdnodes[COL_DATE].string) >= 10: tds = [] for idx, tdnode in enumerate(tdnodes): if idx == COL_TIME: tds.append( tdnode.string[0:5] ) elif idx == COL_DATE: tds.append( tdnode.string[0:11] ) elif idx == COL_CHANNEL: #tds[COL_CHANNEL] = tdnode[COL_CHANNEL]['title'] spans = tdnode.find('span') if spans: splog( "spans", len(spans), spans) tds.append( spans.get('title', '') ) else: tds.append(tdnode.string or "") else: tds.append(tdnode.string or "") if tds[COL_TIME].find('\xc2\xa0') != -1: splog( "tdnodes xc2xa0", len(tdnodes), tdnodes) continue if tds[COL_DATE].find('\xc2\xa0') != -1: splog( "tdnodes xc2xa0", len(tdnodes), tdnodes) continue tds.append( year ) splog( "FS table tds", tds) trs.append( tds ) # This row belongs to the previous #TODO #elif trs and len(tdnodes) == 5: # trs[-1][5] += ' ' + (tdnodes[3].string or "") # trs[-1][6] += ' ' + (tdnodes[4].string or "") #else: # splog( "tdnodes", len(tdnodes), tdnodes) #else: # splog( "tdnodes", tdnodes ) #http://www.fernsehserien.de/weisst-du-eigentlich-wie-lieb-ich-dich-hab/sendetermine/-1 for idx,tds in enumerate(trs): if tds[COL_TIME] == " ": if idx > 0: tds[COL_TIME] = trs[idx-1][COL_TIME] if tds[COL_DATE] == " ": if idx > 0: tds[COL_DATE] = trs[idx-1][COL_DATE] else: splog( "FS table not found") #splog(trs) return trs
def parseNextPage(self, data): trs = [] #parser = FSParser() #parser.feed(data) #return parser.list # Handle malformed HTML issues data = data.replace('\\"', '"') # target=\"_blank\" data = data.replace('\'+\'', '') # document.write('<scr'+'ipt soup = BeautifulSoup(data) div = soup.find('div', 'gray-bar-header nicht-nochmal') if div and div.string: year = div.string[-4:] splog("FS year by div", year) else: year = self.year splog("FS year not found", year) table = soup.find('table', 'sendetermine') if table: for trnode in table.find_all('tr'): # TODO skip first header row tdnodes = trnode and trnode.find_all('td') if tdnodes: # Filter for known rows if len( tdnodes ) >= 11: # >= 6 and tdnodes[COL_DATE].string and len(tdnodes[COL_DATE].string) >= 10: tds = [] for idx, tdnode in enumerate(tdnodes): if idx == COL_TIME: tds.append(tdnode.string[0:5]) elif idx == COL_DATE: tds.append(tdnode.string[0:11]) elif idx == COL_CHANNEL: #tds[COL_CHANNEL] = tdnode[COL_CHANNEL]['title'] spans = tdnode.find('span') if spans: splog("spans", len(spans), spans) tds.append(spans.get('title', '')) else: tds.append(tdnode.string or "") else: tds.append(tdnode.string or "") if tds[COL_TIME].find('\xc2\xa0') != -1: splog("tdnodes xc2xa0", len(tdnodes), tdnodes) continue if tds[COL_DATE].find('\xc2\xa0') != -1: splog("tdnodes xc2xa0", len(tdnodes), tdnodes) continue tds.append(year) splog("FS table tds", tds) trs.append(tds) # This row belongs to the previous #TODO #elif trs and len(tdnodes) == 5: # trs[-1][5] += ' ' + (tdnodes[3].string or "") # trs[-1][6] += ' ' + (tdnodes[4].string or "") #else: # splog( "tdnodes", len(tdnodes), tdnodes) #else: # splog( "tdnodes", tdnodes ) #http://www.fernsehserien.de/weisst-du-eigentlich-wie-lieb-ich-dich-hab/sendetermine/-1 for idx, tds in enumerate(trs): if tds[COL_TIME] == " ": if idx > 0: tds[COL_TIME] = trs[idx - 1][COL_TIME] if tds[COL_DATE] == " ": if idx > 0: tds[COL_DATE] = trs[idx - 1][COL_DATE] else: splog("FS table not found") #splog(trs) return trs
def getNextPage(self, id): url = EPISODEIDURL % (id, self.page) data = self.getPage(url, Headers) if data and isinstance(data, basestring): splog("getNextPage: basestring") data = self.parseNextPage(data) self.doCacheList(url, data) if data and isinstance(data, list): splog("getNextPage: list") trs = data yepisode = None ydelta = maxint #first = trs[0][2] #last = trs[-1][2] #print first[0:5] #print last[6:11] # trs[0] first line [2] second element = timestamps [a:b] use first time cust_date = trs[0][COL_TIME] + trs[0][COL_DATE] if len(cust_date) == 11: cust_date += trs[0][-1] splog(cust_date) if len(cust_date) != 15: return first = datetime.strptime(cust_date, "%H:%M%d.%m.%Y") # trs[-1] last line [2] second element = timestamps [a:b] use second time cust_date = trs[-1][COL_TIME] + trs[-1][COL_DATE] if len(cust_date) == 11: cust_date += trs[-1][-1] splog(cust_date) if len(cust_date) != 15: return last = datetime.strptime(cust_date, "%H:%M%d.%m.%Y") #first = first - self.td_max_time_drift #last = last + self.td_max_time_drift if self.page != 0: new_page = (self.first != first or self.last != last) splog( "getNextPage: first_on_prev_page, first, last_on_prev_page, last, if: ", self.first, first, self.last, last, new_page) self.first = first self.last = last else: new_page = True if new_page: test_future_timespan = ( (first - self.td_max_time_drift) <= self.begin and self.begin <= (last + self.td_max_time_drift)) test_past_timespan = ( (first + self.td_max_time_drift) >= self.begin and self.begin >= (last - self.td_max_time_drift)) splog("first_on_page, self.begin, last_on_page, if, if:", first, self.begin, last, test_future_timespan, test_past_timespan) if (test_future_timespan or test_past_timespan): #search in page for matching datetime for tds in trs: if tds and len(tds) >= 11: # Grey's Anathomy #OLD [None, u'31.10.2012', u'20:15\u201321:15 Uhr', u'ProSieben', u'8.', u'15', u'Richtungswechsel'] # # Gute Zeiten #OLD [None, u'20.11.2012', u'06:40\u201307:20 Uhr', u'NDR', None, u'4187', u'Folge 4187'] #OLD [None, u'\xa0', None, u'5132', u'Folge 5132'] # Wahnfried #OLD [u'Sa', u'26.12.1987', u'\u2013', u'So', u'27.12.1987', u'1Plus', None] # First part: date, times, channel xdate = tds[COL_DATE] xbegin = tds[COL_TIME] #splog( "tds", tds ) #xend = xbegin[6:11] #xbegin = xbegin[0:5] cust_date = xbegin + xdate if len(cust_date) == 11: cust_date += tds[-1] splog(cust_date) if len(cust_date) != 15: continue xbegin = datetime.strptime(cust_date, "%H:%M%d.%m.%Y") #xend = datetime.strptime( xend+xdate, "%H:%M%d.%m.%Y" ) #print "xbegin", xbegin #Py2.6 delta = abs(self.begin - xbegin) delta = delta.seconds + delta.days * 24 * 3600 #Py2.7 delta = abs(self.begin - xbegin).total_seconds() splog(self.begin, xbegin, delta, self.max_time_drift) if delta <= self.max_time_drift: if self.compareChannels( self.service, tds[COL_CHANNEL]): if delta < ydelta: splog("tds", len(tds), tds) if len(tds) >= 10: # Second part: s1e1, s1e2, xseason = tds[COL_SEASON] or "1" xepisode = tds[COL_EPISODE] xtitle = tds[COL_TITLE] elif len(tds) >= 7: #TODO # Second part: s1e1, s1e2, xseason = tds[4] xepisode = tds[5] if xseason and xseason.find( ".") != -1: xseason = xseason[:-1] xtitle = tds[6] else: xseason = "1" xtitle = tds[6] elif len(tds) == 6: xseason = "0" xepisode = "0" xtitle = tds[5] if xseason and xepisode and xtitle and self.series: # Handle encodings xtitle = str_to_utf8(xtitle) yepisode = (xseason, xepisode, xtitle, self.series) ydelta = delta else: #if delta >= ydelta: break else: self.returnvalue = _( "Check the channel name") elif yepisode: break if yepisode: return (yepisode) else: # TODO calculate next page : use firstrow lastrow datetime if not self.future: if first > self.begin: self.page -= 1 return else: if self.begin > last: self.page += 1 return self.page = None return
def str_to_utf8(s): # Convert a byte string with unicode escaped characters splog("WL: str_to_utf8: s: ", repr(s)) #unicode_str = s.decode('unicode-escape') #splog("WL: str_to_utf8: s: ", repr(unicode_str)) ## Python 2.x can't convert the special chars nativly #utf8_str = utf8_encoder(unicode_str)[0] #splog("WL: str_to_utf8: s: ", repr(utf8_str)) #return utf8_str #.decode("utf-8").encode("ascii", "ignore") if type(s) != unicode: # Default shoud be here try: s = s.decode('ISO-8859-1') splog("WL: str_to_utf8 decode ISO-8859-1: s: ", repr(s)) except: try: s = unicode(s, 'utf-8') s = s.encode('ISO-8859-1') splog("WL: str_to_utf8 decode utf-8: s: ", repr(s)) except: try: s = unicode(s, 'cp1252') s = s.encode('ISO-8859-1') splog("WL: str_to_utf8 decode cp1252: s: ", repr(s)) except: s = unicode(s, 'utf-8', 'ignore') s = s.encode('ISO-8859-1') splog("WL: str_to_utf8 decode utf-8 ignore: s: ", repr(s)) else: try: s = s.encode('ISO-8859-1') splog("WL: str_to_utf8 encode ISO-8859-1: s: ", repr(s)) except: s = s.encode('ISO-8859-1', 'ignore') splog("WL: str_to_utf8 except encode ISO-8859-1 ignore: s: ", repr(s)) return s
def getNextPage(self, id): splog("WunschlisteFeed getNextPage") url = EPISODEIDURLATOM + urlencode({ 's' : id }) data = self.getPage( url ) if data and isinstance(data, basestring): data = self.parseNextPage(data) self.doCacheList(url, data) if data and isinstance(data, list): trs = data yepisode = None ydelta = maxint for tds in trs: if tds and len(tds) == 2: xtitle, xupdated = tds if xtitle is not None and xupdated is not None: #import iso8601 #http://code.google.com/p/pyiso8601/ xbegin = parse_date(xupdated) xbegin = xbegin.replace(tzinfo=None) #"2014-11-10T20:15:00+01:00" #xbegin = datetime.strptime(xupdated[0:-6], "%Y-%m-%dT%H:%M:%S"); #Py2.6 delta = abs(self.begin - xbegin) delta = delta.seconds + delta.days * 24 * 3600 #Py2.7 delta = abs(self.begin - xbegin).total_seconds() splog(self.begin, xbegin, delta, self.max_time_drift) if delta <= self.max_time_drift: result = CompiledRegexpAtomChannel.search(xtitle) if result and len(result.groups()) >= 1: if self.compareChannels(self.service, result.group(1)): if delta < ydelta: # Slice string to remove channel xtitle = xtitle[:result.start()] result = CompiledRegexpAtomDate.search(xtitle) if result and len(result.groups()) >= 1: # Slice string to remove date xtitle = xtitle[:result.start()] result = CompiledRegexpAtomEpisode.search(xtitle) if result and len(result.groups()) >= 1: # Extract season and episode xepisode = result.group(1) # Slice string to remove season and episode xtitle = xtitle[:result.start()] result = CompiledRegexpEpisode.search(xepisode) if result and len(result.groups()) >= 3: xseason = result and result.group(2) or "1" xepisode = result and result.group(3) or "0" else: splog("WunschlisteFeed wrong episode format", xepisode) xseason = "1" xepisode = "0" else: splog("WunschlisteFeed wrong title format", xtitle) xseason = "0" xepisode = "0" result = CompiledRegexpAtomTitle.search(xtitle) if result and len(result.groups()) >= 1: # Extract episode title xtitle = result.group(1) # Handle encodings xtitle = str_to_utf8(xtitle) yepisode = (xseason, xepisode, xtitle, self.series) ydelta = delta else: #if delta >= ydelta: break else: self.returnvalue = _("Check the channel name") elif yepisode: break if yepisode: return ( yepisode )
def getEpisode(self, name, begin, end=None, service=None): # On Success: Return a single season, episode, title tuple # On Failure: Return a empty list or String or None self.begin = begin self.year = begin.year self.end = end self.service = service self.series = "" self.first = None self.last = None self.page = 0 self.td_max_time_drift = timedelta(seconds=self.max_time_drift) self.knownids = [] self.returnvalue = None # Check preconditions if not name: splog(_("Skip Fernsehserien: No show name specified")) return _("Skip Fernsehserien: No show name specified") if not begin: splog(_("Skip Fernsehserien: No begin timestamp specified")) return _("Skip Fernsehserien: No begin timestamp specified") if self.begin > datetime.now(): self.future = True else: self.future = False splog("Fernsehserien getEpisode future", self.future) while name: ids = self.getSeries(name) while ids: idserie = ids.pop() if idserie and len(idserie) == 2: id, idname = idserie # Handle encodings self.series = str_to_utf8(idname) #self.page = 0 if self.future: self.page = 0 else: if self.actual_year == self.year: #if self.begin > self.now-timedelta(seconds=3600): self.page = 0 #else: # self.page = -1 else: self.page = 0 year_base_url = EPISODEIDURL % (id, '') splog("year_base_url: ", year_base_url) year_url = year_base_url + "jahr-" + str( self.year + 1) splog("year_url: ", year_url) #/sendetermine/jahr-2014 # Increment year by one, because we want to start at the end of the year response = urlopen(year_url) #redirecturl = http://www.fernsehserien.de/criminal-intent-verbrechen-im-visier/sendetermine/-14 redirect_url = response.geturl() splog("redirect_url: ", redirect_url) try: self.page = int( redirect_url.replace(year_base_url, '')) except: self.page = 0 self.first = None self.last = None while self.page is not None: result = self.getNextPage(id) if result: return result else: name = self.getAlternativeSeries(name) else: return (self.returnvalue or _("No matching series found"))
def getNextPage(self, id): splog("WunschlistePrint getNextPage") url = EPISODEIDURLPRINT + urlencode({ 's' : id }) data = self.getPage( url, Headers ) if data and isinstance(data, basestring): data = self.parseNextPage(data) self.doCacheList(url, data) if data and isinstance(data, list): trs = data yepisode = None ydelta = maxint actual_year = self.actual_year for tds in trs: if tds and len(tds) >= 5: #print tds xchannel, xday, xdate, xbegin, xend = tds[:5] xtitle = "".join(tds[4:]) if self.actual_month == 12 and xdate.endswith(".01."): year = str(self.actual_year+1) else: year = str(self.actual_year) xbegin = datetime.strptime( xdate+year+" "+xbegin, "%d.%m.%Y %H.%M Uhr" ) #xend = datetime.strptime( xdate+xend, "%d.%m.%Y%H.%M Uhr" ) #splog(xchannel, xdate, xbegin, xend, xtitle) #splog(datebegin, xbegin, abs((datebegin - xbegin))) #Py2.6 delta = abs(self.begin - xbegin) delta = delta.seconds + delta.days * 24 * 3600 #Py2.7 delta = abs(self.begin - xbegin).total_seconds() splog(self.begin, xbegin, delta, self.max_time_drift) if delta <= self.max_time_drift: if self.compareChannels(self.service, xchannel): if delta < ydelta: print len(tds), tds if len(tds) >= 7: xepisode, xtitle = tds[5:7] if xepisode: result = CompiledRegexpEpisode.search(xepisode) if result and len(result.groups()) >= 3: xseason = result and result.group(2) or "1" xepisode = result and result.group(3) or "0" else: xseason = "1" xepisode = "0" else: xseason = "1" xepisode = "0" elif len(tds) == 6: xtitle = tds[5] xseason = "0" xepisode = "0" # Handle encodings xtitle = str_to_utf8(xtitle) yepisode = (xseason, xepisode, xtitle, self.series) ydelta = delta else: #if delta >= ydelta: break else: self.returnvalue = _("Check the channel name") elif yepisode: break if yepisode: return ( yepisode )
def getNextPage(self, id): url = EPISODEIDURL % (id, self.page) data = self.getPage(url, Headers) if data and isinstance(data, basestring): splog("getNextPage: basestring") data = self.parseNextPage(data) self.doCacheList(url, data) if data and isinstance(data, list): splog("getNextPage: list") trs = data # trs[x] = [None, u'31.10.2012', u'20:15\u201321:15 Uhr', u'ProSieben', u'8.', u'15', u'Richtungswechsel'] yepisode = None ydelta = maxint # first = trs[0][2] # last = trs[-1][2] # print first[0:5] # print last[6:11] # trs[0] first line [2] second element = timestamps [a:b] use first time first = datetime.strptime(trs[0][2][0:5] + trs[0][1], "%H:%M%d.%m.%Y") # trs[-1] last line [2] second element = timestamps [a:b] use second time # last = datetime.strptime( trs[-1][2][6:11] + trs[-1][1], "%H:%M%d.%m.%Y" ) # Problem with wrap around use also start time # Sa 30.11.2013 23:35 - 01:30 Uhr ProSieben 46 3. 13 Showdown 3 last = datetime.strptime(trs[-1][2][0:5] + trs[-1][1], "%H:%M%d.%m.%Y") # first = first - self.td_max_time_drift # last = last + self.td_max_time_drift if self.page != 0: new_page = self.first != first or self.last != last splog( "getNextPage: first_on_prev_page, first, last_on_prev_page, last, if: ", self.first, first, self.last, last, new_page, ) self.first = first self.last = last else: new_page = True if new_page: test_future_timespan = (first - self.td_max_time_drift) <= self.begin and self.begin <= ( last + self.td_max_time_drift ) test_past_timespan = (first + self.td_max_time_drift) >= self.begin and self.begin >= ( last - self.td_max_time_drift ) splog( "first_on_page, self.begin, last_on_page, if, if:", first, self.begin, last, test_future_timespan, test_past_timespan, ) if test_future_timespan or test_past_timespan: # search in page for matching datetime for tds in trs: if tds and len(tds) >= 6: # 7: # Grey's Anathomy # [None, u'31.10.2012', u'20:15\u201321:15 Uhr', u'ProSieben', u'8.', u'15', u'Richtungswechsel'] # # Gute Zeiten # [None, u'20.11.2012', u'06:40\u201307:20 Uhr', u'NDR', None, u'4187', u'Folge 4187'] # [None, u'01.12.2012', u'10:45\u201313:15 Uhr', u'RTL', None, u'5131', u'Folge 5131'] # [None, u'\xa0', None, u'5132', u'Folge 5132'] # [None, u'\xa0', None, u'5133', u'Folge 5133'] # [None, u'\xa0', None, u'5134', u'Folge 5134'] # [None, u'\xa0', None, u'5135', u'Folge 5135'] # Wahnfried # [u'Sa', u'26.12.1987', u'\u2013', u'So', u'27.12.1987', u'1Plus', None] # First part: date, times, channel xdate, xbegin = tds[1:3] # splog( "tds", tds ) # xend = xbegin[6:11] xbegin = xbegin[0:5] xbegin = datetime.strptime(xbegin + xdate, "%H:%M%d.%m.%Y") # xend = datetime.strptime( xend+xdate, "%H:%M%d.%m.%Y" ) # print "xbegin", xbegin # Py2.6 delta = abs(self.begin - xbegin) delta = delta.seconds + delta.days * 24 * 3600 # Py2.7 delta = abs(self.begin - xbegin).total_seconds() splog(self.begin, xbegin, delta, self.max_time_drift) if delta <= self.max_time_drift: if self.compareChannels(self.service, tds[3]): if delta < ydelta: splog("tds", len(tds), tds) if len(tds) >= 10: # Second part: s1e1, s1e2, xseason = tds[7] or "1" xepisode = tds[8] xtitle = " ".join(tds[10:]) # Use all available titles elif len(tds) >= 7: # Second part: s1e1, s1e2, xseason = tds[4] xepisode = tds[5] if xseason and xseason.find(".") != -1: xseason = xseason[:-1] xtitle = " ".join(tds[6:]) # Use all available titles else: xseason = "1" xtitle = " ".join(tds[6:]) # Use all available titles elif len(tds) == 6: xseason = "0" xepisode = "0" xtitle = tds[5] if xseason and xepisode and xtitle and self.series: # Handle encodings xtitle = str_to_utf8(xtitle) yepisode = (xseason, xepisode, xtitle, self.series) ydelta = delta else: # if delta >= ydelta: break else: self.returnvalue = _("Check the channel name") elif yepisode: break if yepisode: return yepisode else: # TODO calculate next page : use firstrow lastrow datetime if not self.future: if first > self.begin: self.page -= 1 return else: if self.begin > last: self.page += 1 return self.page = None return
def getNextPage(self, id): splog("WunschlisteFeed getNextPage") url = EPISODEIDURLATOM + urlencode({'s': id}) data = self.getPage(url) if data and isinstance(data, basestring): data = self.parseNextPage(data) self.doCacheList(url, data) if data and isinstance(data, list): trs = data yepisode = None ydelta = maxint for tds in trs: if tds and len(tds) == 2: xtitle, xupdated = tds if xtitle is not None and xupdated is not None: #import iso8601 #http://code.google.com/p/pyiso8601/ xbegin = parse_date(xupdated) xbegin = xbegin.replace(tzinfo=None) #"2014-11-10T20:15:00+01:00" #xbegin = datetime.strptime(xupdated[0:-6], "%Y-%m-%dT%H:%M:%S"); #Py2.6 delta = abs(self.begin - xbegin) delta = delta.seconds + delta.days * 24 * 3600 #Py2.7 delta = abs(self.begin - xbegin).total_seconds() splog(self.begin, xbegin, delta, self.max_time_drift) if delta <= self.max_time_drift: result = CompiledRegexpAtomChannel.search(xtitle) if result and len(result.groups()) >= 1: if self.compareChannels( self.service, result.group(1)): if delta < ydelta: # Slice string to remove channel xtitle = xtitle[:result.start()] result = CompiledRegexpAtomDate.search( xtitle) if result and len( result.groups()) >= 1: # Slice string to remove date xtitle = xtitle[:result.start()] result = CompiledRegexpAtomEpisode.search( xtitle) if result and len( result.groups()) >= 1: # Extract season and episode xepisode = result.group(1) # Slice string to remove season and episode xtitle = xtitle[:result.start( )] result = CompiledRegexpEpisode.search( xepisode) if result and len( result.groups()) >= 3: xseason = result and result.group( 2) or "1" xepisode = result and result.group( 3) or "0" else: splog( "WunschlisteFeed wrong episode format", xepisode) xseason = "1" xepisode = "0" else: splog( "WunschlisteFeed wrong title format", xtitle) xseason = "0" xepisode = "0" result = CompiledRegexpAtomTitle.search( xtitle) if result and len( result.groups()) >= 1: # Extract episode title xtitle = result.group(1) # Handle encodings xtitle = str_to_utf8(xtitle) yepisode = (xseason, xepisode, xtitle, self.series) ydelta = delta else: #if delta >= ydelta: break else: self.returnvalue = _( "Check the channel name") elif yepisode: break if yepisode: return (yepisode)
def getEpisode(self, name, begin, end=None, service=None): # On Success: Return a single season, episode, title tuple # On Failure: Return a empty list or String or None self.begin = begin self.year = begin.year self.end = end self.service = service self.series = "" self.first = None self.last = None self.page = 0 self.td_max_time_drift = timedelta(seconds=self.max_time_drift) self.knownids = [] self.returnvalue = None # Check preconditions if not name: splog(_("Skip Fernsehserien: No show name specified")) return _("Skip Fernsehserien: No show name specified") if not begin: splog(_("Skip Fernsehserien: No begin timestamp specified")) return _("Skip Fernsehserien: No begin timestamp specified") if self.begin > datetime.now(): self.future = True else: self.future = False splog("Fernsehserien getEpisode future", self.future) while name: ids = self.getSeries(name) while ids: idserie = ids.pop() if idserie and len(idserie) == 2: id, idname = idserie # Handle encodings self.series = str_to_utf8(idname) #self.page = 0 if self.future: self.page = 0 else: if self.actual_year == self.year: #if self.begin > self.now-timedelta(seconds=3600): self.page = 0 #else: # self.page = -1 else: self.page = 0 year_base_url = EPISODEIDURL % (id, '') splog("year_base_url: ", year_base_url) year_url = year_base_url+"jahr-"+str(self.year+1) splog("year_url: ", year_url) #/sendetermine/jahr-2014 # Increment year by one, because we want to start at the end of the year from plugin import PROXY response = urlopen( PROXY + year_url ) #redirecturl = http://www.fernsehserien.de/criminal-intent-verbrechen-im-visier/sendetermine/-14 redirect_url = response.geturl() splog("redirect_url: ", redirect_url) try: self.page = int( redirect_url.replace(year_base_url,'') ) except: self.page = 0 self.first = None self.last = None while self.page is not None: result = self.getNextPage(id) if result: return result else: name = self.getAlternativeSeries(name) else: return ( self.returnvalue or _("No matching series found") )
def getNextPage(self, id): url = EPISODEIDURL % (id, self.page) data = self.getPage(url) if data and isinstance(data, basestring): splog("getNextPage: basestring") data = self.parseNextPage(data) self.doCacheList(url, data) if data and isinstance(data, list): splog("getNextPage: list") trs = data yepisode = None ydelta = maxint #first = trs[0][2] #last = trs[-1][2] #print first[0:5] #print last[6:11] # trs[0] first line [2] second element = timestamps [a:b] use first time cust_date = trs[0][COL_TIME] + trs[0][COL_DATE] if len(cust_date) == 11: cust_date += trs[0][-1] splog(cust_date) if len(cust_date) != 15: return first = datetime.strptime( cust_date, "%H:%M%d.%m.%Y" ) # trs[-1] last line [2] second element = timestamps [a:b] use second time cust_date = trs[-1][COL_TIME] + trs[-1][COL_DATE] if len(cust_date) == 11: cust_date += trs[-1][-1] splog(cust_date) if len(cust_date) != 15: return last = datetime.strptime( cust_date, "%H:%M%d.%m.%Y" ) #first = first - self.td_max_time_drift #last = last + self.td_max_time_drift if self.page != 0: new_page = (self.first != first or self.last != last) splog("getNextPage: first_on_prev_page, first, last_on_prev_page, last, if: ", self.first, first, self.last, last, new_page) self.first = first self.last = last else: new_page = True if new_page: test_future_timespan = ( (first-self.td_max_time_drift) <= self.begin and self.begin <= (last+self.td_max_time_drift) ) test_past_timespan = ( (first+self.td_max_time_drift) >= self.begin and self.begin >= (last-self.td_max_time_drift) ) splog("first_on_page, self.begin, last_on_page, if, if:", first, self.begin, last, test_future_timespan, test_past_timespan ) if ( test_future_timespan or test_past_timespan ): #search in page for matching datetime for tds in trs: if tds and len(tds) >= 11: # Grey's Anathomy #OLD [None, u'31.10.2012', u'20:15\u201321:15 Uhr', u'ProSieben', u'8.', u'15', u'Richtungswechsel'] # # Gute Zeiten #OLD [None, u'20.11.2012', u'06:40\u201307:20 Uhr', u'NDR', None, u'4187', u'Folge 4187'] #OLD [None, u'\xa0', None, u'5132', u'Folge 5132'] # Wahnfried #OLD [u'Sa', u'26.12.1987', u'\u2013', u'So', u'27.12.1987', u'1Plus', None] # First part: date, times, channel xdate = tds[COL_DATE] xbegin = tds[COL_TIME] #splog( "tds", tds ) #xend = xbegin[6:11] #xbegin = xbegin[0:5] cust_date = xbegin+xdate if len(cust_date) == 11: cust_date += tds[-1] splog(cust_date) if len(cust_date) != 15: continue xbegin = datetime.strptime( cust_date, "%H:%M%d.%m.%Y" ) #xend = datetime.strptime( xend+xdate, "%H:%M%d.%m.%Y" ) #print "xbegin", xbegin #Py2.6 delta = abs(self.begin - xbegin) delta = delta.seconds + delta.days * 24 * 3600 #Py2.7 delta = abs(self.begin - xbegin).total_seconds() splog(self.begin, xbegin, delta, self.max_time_drift) if delta <= self.max_time_drift: if self.compareChannels(self.service, tds[COL_CHANNEL]): if delta < ydelta: splog( "tds", len(tds), tds ) if len(tds) >= 10: # Second part: s1e1, s1e2, xseason = tds[COL_SEASON] or "1" xepisode = tds[COL_EPISODE] xtitle = tds[COL_TITLE] elif len(tds) >= 7: #TODO # Second part: s1e1, s1e2, xseason = tds[4] xepisode = tds[5] if xseason and xseason.find(".") != -1: xseason = xseason[:-1] xtitle = tds[6] else: xseason = "1" xtitle = tds[6] elif len(tds) == 6: xseason = "0" xepisode = "0" xtitle = tds[5] if xseason and xepisode and xtitle and self.series: # Handle encodings xtitle = str_to_utf8(xtitle) yepisode = (xseason, xepisode, xtitle, self.series) ydelta = delta else: #if delta >= ydelta: break else: self.returnvalue = _("Check the channel name") elif yepisode: break if yepisode: return ( yepisode ) else: # TODO calculate next page : use firstrow lastrow datetime if not self.future: if first > self.begin: self.page -= 1 return else: if self.begin > last: self.page += 1 return self.page = None return
def str_to_utf8(s): # Convert a byte string with unicode escaped characters splog("WL: str_to_utf8: s: ", repr(s)) #unicode_str = s.decode('unicode-escape') #splog("WL: str_to_utf8: s: ", repr(unicode_str)) ## Python 2.x can't convert the special chars nativly #utf8_str = utf8_encoder(unicode_str)[0] #splog("WL: str_to_utf8: s: ", repr(utf8_str)) #return utf8_str #.decode("utf-8").encode("ascii", "ignore") if type(s) == unicode: # Default shoud be here try: s = s.encode('utf-8') splog("WL: str_to_utf8 encode utf8: s: ", repr(s)) except: s = s.encode('utf-8', 'ignore') splog("WL: str_to_utf8 except encode utf8 ignore: s: ", repr(s)) else: try: s = s.decode('utf-8') splog("WL: str_to_utf8 decode utf8: s: ", repr(s)) except: try: s = unicode(s, 'ISO-8859-1') s = s.encode('utf-8') splog("WL: str_to_utf8 decode ISO-8859-1: s: ", repr(s)) except: try: s = unicode(s, 'cp1252') s = s.encode('utf-8') splog("WL: str_to_utf8 decode cp1252: s: ", repr(s)) except: s = unicode(s, 'ISO-8859-1', 'ignore') s = s.encode('utf-8') splog("WL: str_to_utf8 decode ISO-8859-1 ignore: s: ", repr(s)) s = s.replace('\xe2\x80\x93','-').replace('\xe2\x80\x99',"'").replace('\xc3\x9f','ß') return CompiledRegexpNonASCII.sub('', s)
def getNextPage(self, id): splog("WunschlistePrint getNextPage") url = EPISODEIDURLPRINT + urlencode({ 's' : id }) data = self.getPage( url ) if data and isinstance(data, basestring): data = self.parseNextPage(data) self.doCacheList(url, data) if data and isinstance(data, list): trs = data yepisode = None ydelta = maxint actual_year = self.actual_year for tds in trs: if tds and len(tds) >= 5: #print tds xchannel, xday, xdate, xbegin, xend = tds[:5] xtitle = "".join(tds[4:]) if self.actual_month == 12 and xdate.endswith(".01."): year = str(self.actual_year+1) else: year = str(self.actual_year) xbegin = datetime.strptime( xdate+year+" "+xbegin, "%d.%m.%Y %H.%M Uhr" ) #xend = datetime.strptime( xdate+xend, "%d.%m.%Y%H.%M Uhr" ) #splog(xchannel, xdate, xbegin, xend, xtitle) #splog(datebegin, xbegin, abs((datebegin - xbegin))) #Py2.6 delta = abs(self.begin - xbegin) delta = delta.seconds + delta.days * 24 * 3600 #Py2.7 delta = abs(self.begin - xbegin).total_seconds() splog(self.begin, xbegin, delta, self.max_time_drift) if delta <= self.max_time_drift: if self.compareChannels(self.service, xchannel): if delta < ydelta: print len(tds), tds if len(tds) >= 7: xepisode, xtitle = tds[5:7] if xepisode: result = CompiledRegexpEpisode.search(xepisode) if result and len(result.groups()) >= 3: xseason = result and result.group(2) or "1" xepisode = result and result.group(3) or "0" else: xseason = "1" xepisode = "0" else: xseason = "1" xepisode = "0" elif len(tds) == 6: xtitle = tds[5] xseason = "0" xepisode = "0" # Handle encodings xtitle = str_to_utf8(xtitle) yepisode = (xseason, xepisode, xtitle, self.series) ydelta = delta else: #if delta >= ydelta: break else: self.returnvalue = _("Check the channel name") elif yepisode: break if yepisode: return ( yepisode )