def parseItempage(self): if not self.hasnext: log("%s: no more items" % self.site) return [] #print >>sys.stderr,"web2: gs: Self.newpage = true" url = self.get('URL_SEARCH') % (self.query, self.pagecount) if DEBUG: print >>sys.stderr,"web2: gs: Retrieving url", url try: pageconn = urllib.urlopen(url) except: return [] page = pageconn.read().replace('\n','') pageconn.close() if DEBUG: #print >>sys.stderr,'web2: gs: The page:\n%s' % page print >>sys.stderr,'web2: gs: Regexp: %s' % self.get('RE_SEARCHITEM') items = re.findall(self.get('RE_SEARCHITEM'), page, re.S) self.pagecount += 1 RE_RESULTS_HASNEXT = self.get('RE_RESULTS_HASNEXT') if RE_RESULTS_HASNEXT: if len(re.findall(RE_RESULTS_HASNEXT, page)) == 0: self.hasnext = False if DEBUG: print >>sys.stderr,'web2: gs: Items found: %s' % str(items) #print >>sys.stderr,'web2: gs: Page',page return items
def update(self, subject, item): self.lock.acquire() if item == None: log("CompoundSearch: update -> received None") subject.detach(self) subject.quit() self.searches.remove(subject) if len(self.searches) == 0: log("CompoundSearch: no more items") self.notify(None) else: self.total += 1 log("CompoundSearch: update -> wanted:" + str(self.wanted)+ ", update -> new total:" + str(self.total)) if self.total == self.wanted: log("CompoundSearch: update -> Enough") self.giveSearchFeedback(True, self.total) for i in range(len(self.searches)): self.searches[i].enough() if self.total < self.wanted: self.giveSearchFeedback(False, self.total) if self.total <= self.wanted: if DEBUG: print >>sys.stderr,"web2: db: CompoundSearch: returning an item" self.notify(item) else: self.items.append(item) self.lock.release()
def update(self, subject, item): self.lock.acquire() if item == None: log("CompoundSearch: update -> received None") subject.detach(self) subject.quit() self.searches.remove(subject) if len(self.searches) == 0: log("CompoundSearch: no more items") self.notify(None) else: self.total += 1 log("CompoundSearch: update -> wanted:" + str(self.wanted) + ", update -> new total:" + str(self.total)) if self.total == self.wanted: log("CompoundSearch: update -> Enough") self.giveSearchFeedback(True, self.total) for i in range(len(self.searches)): self.searches[i].enough() if self.total < self.wanted: self.giveSearchFeedback(False, self.total) if self.total <= self.wanted: if DEBUG: print >> sys.stderr, "web2: db: CompoundSearch: returning an item" self.notify(item) else: self.items.append(item) self.lock.release()
def VideoTranscode(input, output): tmpout = os.path.join(os.path.dirname(output), "." + os.path.basename(output)) cmd = copy.deepcopy(settings.VIDDECODE_CMD) cmd[settings.VIDDECODE_CMD_IO[0]] = input cmd[settings.VIDDECODE_CMD_IO[1]] = tmpout log("Video transcoder: doing: " + str(cmd)) proc = subprocess.Popen(cmd) rcode = proc.wait() if rcode == 0 and os.path.exists(tmpout): shutil.move(tmpout, output) else: os.remove(tmpout) raise RuntimeError("Transcoding failed")
def run(self): if self.rate != None: self.notify(self.rate) return try: url = utilsettings.RATINGGET % (self.flatid, settings.INSTALL_ID) log(url) conn = urllib.urlopen(url) response = conn.read() log(response) rating = re.findall("([^ ]*) ([^ ]*)", response)[0] log(str(rating)) self.notify((int(rating[0]), int(rating[1]))) except: pass
def parseItem(self, workitem): if DEBUG: print >>sys.stderr,'web2: gs: parseItem called with %s' % str(workitem) item = {} id = workitem[0] item['infohash'] = id url = self.get('URL_WATCH') % id if DEBUG: print >>sys.stderr,"web2: gs: getting URL", url conn = urllib.urlopen(url) itempage = conn.read().replace('\n','') srcpage = itempage conn.close() # Liveleak needs a separate page to get video url url_for_src = self.get('URL_SRC') if url_for_src: url_for_src = url_for_src % id if DEBUG: print >>sys.stderr,"web2: gs: getting URL", url_for_src conn = urllib.urlopen(url_for_src) srcpage = conn.read().replace('\n','') conn.close() trynum = 0 success = False # Either get link by formatting id if self.get('VIDEO_URL'): src = self.get('VIDEO_URL') % id else: # Or with a regexp while(True): regexp = self.get('RE_VIDEO_URL%d' % trynum) if not regexp: break src = re.findall(regexp, srcpage, re.S | re.I) if len(src) == 1: success = True src = src[0] break trynum += 1 if not success: if DEBUG: print >>sys.stderr,'web2: gs: Error, src=%s' % src return None # Youtube needs id parsed in url id2url = self.get('URL_DL_VIDEO') if id2url: src = self.get('URL_DL_VIDEO') % src unquote_url = self.get('UNQUOTE') if DEBUG: print >>sys.stderr,'web2: gs: unquote: %s' % unquote_url if unquote_url: src = urllib.unquote(src) if DEBUG: print >>sys.stderr,'web2: gs: Got video url: %s' % src name = re.findall(self.get('RE_NAME'), itempage) if len(name) == 0: if DEBUG: print >>sys.stderr,'web2: gs: Cannot find item name, error' return None #print name[0] ENCODING = self.get('ENCODING') name = unicode(name[0], ENCODING) name = codec.decodehtml(name) #print name item['content_name'] = name # category = re.findall(RE_CAT, itempage) # if len(category) == 0: # if DEBUG: # print >>sys.stderr,'web2: gs: Youtube: Category error' # return None # category = unicode(category[0], ENCODING) # category = codec.decodehtml(category) # # item['category'] = category tags = re.findall(self.get('RE_TAG'), itempage) if len(tags) == 1: tags = re.findall(self.get('RE_TAG2'), tags[0]) else: tags = [] unicodetags = [] for tag in tags: unicodetags.append(unicode(tag, ENCODING)) item['tags'] = tags #item = video.VideoItem((site, id), name, YoutubeDownload, unicodetags, category) if workitem[1].lower().startswith('http'): conn = urllib.urlopen(workitem[1]) item['preview'] = conn.read() if item == None: log("Generic search: returning None") else: log("Generic search: returning " + item['content_name']) try: desc = re.findall(self.get('RE_DESC'), itempage, re.S)[0] if desc.strip() == '': raise RuntimeError() item['description'] = "from %s: %s" % (self.site, desc) except: item['description'] = "from %s" % self.site item['description'] = codec.decodehtml(unicode(item['description'], ENCODING)) item['web2'] = True assert type(src) == str, "Url of video was not string, but %s (site:%s, name:%s)" % (repr(src), self.site, name) item['url'] = src item['views'] = 'unknown' if self.get('RE_VIEWS'): views = re.findall(self.get('RE_VIEWS'), itempage) if views: item['views'] = int(filter(lambda x: x.isdigit(), views[0])) if self.get('RE_DATE'): date = re.findall(self.get('RE_DATE'), itempage, re.S) if date: d = GenericDateParser(date[0], self) if d is None: d = date[0] item['info'] = { 'creation date' : d } item['date'] = item['info']['creation date'] item['status'] = 'good' item['seeder'] = 1 item['leecher'] = 1 try: item['length'] = workitem[2] except: item['length'] = 'unknown' return item