def get_wiki_preview(self, project, page, data): if (not self.wiki_edit.is_here() or self.page.params['project'] != project or self.page.params['page'] != page): url = self.absurl('projects/%s/wiki/%s/edit' % (project, quote(page)), True) self.location(url) url = self.absurl('projects/%s/wiki/%s/preview' % (project, quote(page)), True) params = self.get_submit() params['content[text]'] = data #params['authenticity_token'] = self.page.get_authenticity_token() preview_html = lxml.html.fragment_fromstring(self.open(url, data=params), create_parent='div') preview_html.find("fieldset").drop_tag() preview_html.find("legend").drop_tree() return lxml.html.tostring(preview_html)
def get_wiki_source(self, project, page, version=None): url = '%s/projects/%s/wiki/%s/edit' % (self.BASEPATH, project, quote(page.encode('utf-8'))) if version: url += '?version=%s' % version self.location(url) return self.page.get_source()
def get_wiki_source(self, project, page, version=None): url = self.absurl('projects/%s/wiki/%s/edit' % (project, quote(page)), True) if version: url += '?version=%s' % version self.location(url) return self.page.get_source()
def search_videos(self, pattern): return self.search.go(lang=self.lang['site'], pattern=quote(pattern), page='1').iter_videos() class_name = 'videos/plus7' method_name = 'search' parameters = '/'.join([self.lang.get('webservice'), 'L1', pattern, 'ALL', 'ALL', '-1', self.order, '10', '0']) return self.webservice.go(class_name=class_name, method_name=method_name, parameters=parameters).iter_videos()
def get_wiki_preview(self, project, page, data): if (not self.wiki_edit.is_here() or self.page.params['project'] != project or self.page.params['page'] != page): url = self.absurl( 'projects/%s/wiki/%s/edit' % (project, quote(page)), True) self.location(url) url = self.absurl( 'projects/%s/wiki/%s/preview' % (project, quote(page)), True) params = self.get_submit() params['content[text]'] = data #params['authenticity_token'] = self.page.get_authenticity_token() preview_html = lxml.html.fragment_fromstring(self.open(url, data=params), create_parent='div') preview_html.find("fieldset").drop_tag() preview_html.find("legend").drop_tree() return lxml.html.tostring(preview_html)
def get_wiki_preview(self, project, page, data): if (not self.is_on_page(WikiEditPage) or self.page.groups[0] != project or self.page.groups[1] != page): self.location( '%s/projects/%s/wiki/%s/edit' % (self.BASEPATH, project, quote(page.encode('utf-8')))) url = '%s/projects/%s/wiki/%s/preview' % (self.BASEPATH, project, quote(page.encode('utf-8'))) params = {} params['content[text]'] = data.encode('utf-8') params['authenticity_token'] = "%s" % self.page.get_authenticity_token( ) preview_html = lxml.html.fragment_fromstring(self.readurl( url, urlencode(params)), create_parent='div') preview_html.find("fieldset").drop_tag() preview_html.find("legend").drop_tree() return lxml.html.tostring(preview_html)
def search_videos(self, pattern): return self.search.go(lang=self.lang['site'], pattern=quote(pattern), page='1').iter_videos() class_name = 'videos/plus7' method_name = 'search' parameters = '/'.join([ self.lang.get('webservice'), 'L1', pattern, 'ALL', 'ALL', '-1', self.order, '10', '0' ]) return self.webservice.go(class_name=class_name, method_name=method_name, parameters=parameters).iter_videos()
def params_from_js(self, text): l = [] for sub in re.findall("'([^']*)'", text): l.append(sub) if len(l) <= 1: #For account that have no history return None, None url = '/vos-comptes/IPT/appmanager/transac/' + self.browser.account_type + '?_nfpb=true&_windowLabel=portletInstance_18&_pageLabel=page_synthese_v1' + '&_cdnCltUrl=' + "/transacClippe/" + quote( l.pop(0)) args = {} for input in self.doc.xpath('//form[@name="detail"]/input'): args[input.attrib['name']] = input.attrib.get('value', '') for i, key in enumerate(self.ARGS): args[key] = unicode(l[self.ARGS.index(key)]).encode( self.browser.ENCODING) args['PageDemandee'] = 1 args['PagePrecedente'] = 1 return url, args
def search_videos(self, pattern): self.location('/videolist/searchmodevideo/query%s/' % (quote(pattern.encode('utf-8')))) assert self.is_on_page(ResultsPage) return self.page.iter_videos()
def obj_id(self): return quote(Field('name')(self).encode('utf-8'))
def params_from_js(self, text): l = [] for sub in re.findall("'([^']*)'", text): l.append(sub) if len(l) <= 1: #For account that have no history return None, None url = '/vos-comptes/IPT/appmanager/transac/' + self.browser.account_type + '?_nfpb=true&_windowLabel=portletInstance_18&_pageLabel=page_synthese_v1' + '&_cdnCltUrl=' + "/transacClippe/" + quote(l.pop(0)) args = {} for input in self.doc.xpath('//form[@name="detail"]/input'): args[input.attrib['name']] = input.attrib.get('value', '') for i, key in enumerate(self.ARGS): args[key] = unicode(l[self.ARGS.index(key)]).encode(self.browser.ENCODING) args['PageDemandee'] = 1 args['PagePrecedente'] = 1 return url, args
def set_wiki_source(self, project, page, data, message): self.location(self.absurl('projects/%s/wiki/%s/edit' % (project, quote(page)), True)) self.page.set_source(data, message)
def obj_id(self): return quote(Field('name')(self))
def set_wiki_source(self, project, page, data, message): self.location('%s/projects/%s/wiki/%s/edit' % (self.BASEPATH, project, quote(page.encode('utf-8')))) self.page.set_source(data, message)
def get_video(self, video=None): # check for slides id variant want_slides = False m = re.match('.*#slides', self.url) if m: want_slides = True # not sure it's safe self.group_dict['id'] += '#slides' if video is None: video = GDCVaultVideo(self.group_dict['id']) # the config file has it too, but in CDATA and only for type 4 obj = self.parser.select(self.document.getroot(), 'title') title = None if len(obj) > 0: try: title = unicode(obj[0].text) except UnicodeDecodeError as e: title = None if title is None: obj = self.parser.select(self.document.getroot(), 'meta[name=title]') if len(obj) > 0: if 'content' in obj[0].attrib: try: # FIXME: 1013483 has buggus title (latin1) # for now we just pass it as-is title = obj[0].attrib['content'] except UnicodeDecodeError as e: # XXX: this doesn't even works!? title = obj[0].attrib['content'].decode('iso-5589-15') if title is not None: title = title.strip() m = re.match('GDC Vault\s+-\s+(.*)', title) if m: title = m.group(1) video.title = title #TODO: POST back the title to /search.php and filter == id to get # cleaner (JSON) data... (though it'd be much slower) # try to find an iframe (type 3 and 4) obj = self.parser.select(self.document.getroot(), 'iframe') if len(obj) == 0: # type 1 or 2 (swf+js) # find which script element contains the swf args for script in self.parser.select(self.document.getroot(), 'script'): m = re.match(".*new SWFObject.*addVariable\('type', '(.*)'\).*", unicode(script.text), re.DOTALL) if m: video.ext = m.group(1) m = re.match(".*new SWFObject.*addVariable\(\"file\", encodeURIComponent\(\"(.*)\"\)\).*", unicode(script.text), re.DOTALL) if m: video.url = "http://gdcvault.com%s" % (m.group(1)) # TODO: for non-free (like 769), # must be logged to use /mediaProxy.php # FIXME: doesn't seem to work yet, we get 2 bytes as html # 769 should give: # http://twvideo01.ubm-us.net/o1/gdcradio-net/2007/gdc/GDC07-4889.mp3 # HACK: we use mechanize directly here for now... FIXME #print "asking for redirect on '%s'" % (video.url) #self.browser.addheaders += [['Referer', 'http://gdcvault.com/play/%s' % self.group_dict['id']]] #print self.browser.addheaders self.browser.set_handle_redirect(False) try: self.browser.open_novisit(video.url) # headers = req.info() # if headers.get('Content-Type', '') == 'text/html' and headers.get('Content-Length', '') == '2': # print 'BUG' #print req.code except HTTPError as e: #print e.getcode() if e.getcode() == 302 and hasattr(e, 'hdrs'): #print e.hdrs['Location'] video.url = unicode(e.hdrs['Location']) self.browser.set_handle_redirect(True) video.set_empty_fields(NotAvailable) return video #XXX: raise error? return None obj = obj[0] if obj is None: return None # type 3 or 4 (iframe) # get the config file for the rest iframe_url = obj.attrib['src'] # 1015020 has a boggus url m = re.match('http:/event(.+)', iframe_url) if m: iframe_url = 'http://event' + m.group(1) # print iframe_url # 1013798 has player169.html # 1012186 has player16x9.html # some other have /somethingplayer.html... # 1441 has a space in the xml filename, which we must not strip m = re.match('(http:.*/)[^/]*player[0-9a-z]*\.html\?.*xmlURL=([^&]+\.xml).*\&token=([^& ]+)', iframe_url) if not m: m = re.match('/play/mediaProxy\.php\?sid=(\d+)', iframe_url) if m is None: return None # TODO: must be logged to use /mediaProxy.php # type 3 (pdf slides) video.ext = u'pdf' video.url = "http://gdcvault.com%s" % (unicode(iframe_url)) # HACK: we use mechanize directly here for now... FIXME # print "asking for redirect on '%s'" % (video.url) self.browser.set_handle_redirect(False) try: self.browser.open_novisit(video.url) except HTTPError as e: if e.getcode() == 302 and hasattr(e, 'hdrs'): video.url = unicode(e.hdrs['Location']) self.browser.set_handle_redirect(True) video.set_empty_fields(NotAvailable) return video # type 4 (dual screen video) # token doesn't actually seem required # 1441 has a space in the xml filename xml_filename = quote(m.group(2)) config_url = m.group(1) + xml_filename + '?token=' + m.group(3) # self.browser.addheaders += [['Referer', 'http://gdcvault.com/play/%s' % self.group_dict['id']]] # print self.browser.addheaders # TODO: fix for 1015021 & others (forbidden) #config = self.browser.openurl(config_url).read() config = self.browser.get_document(self.browser.openurl(config_url)) obj = self.parser.select(config.getroot(), 'akamaihost', 1) host = obj.text if host is None: raise BrokenPageError('Missing tag in xml config file') if host == "smil": # the rtmp URL is described in a smil file, # with several available bitrates obj = self.parser.select(config.getroot(), 'speakervideo', 1) smil = self.browser.get_document(self.browser.openurl(obj.text)) obj = self.parser.select(smil.getroot(), 'meta', 1) # TODO: error checking base = obj.attrib.get('base', '') best_bitrate = 0 path = None obj = self.parser.select(smil.getroot(), 'video') # choose the best bitrate for o in obj: rate = int(o.attrib.get('system-bitrate', 0)) if rate > best_bitrate: path = o.attrib.get('src', '') video.url = unicode(base + '/' + path) else: # not smil, the rtmp url is directly here as host + path # for id 1373 host is missing '/ondemand' # only add it when only a domain is specified without path m = re.match('^[^\/]+$', host) if m: host += "/ondemand" videos = {} obj = self.parser.select(config.getroot(), 'speakervideo', 1) if obj.text is not None: videos['speaker'] = 'rtmp://' + host + '/' + quote(obj.text) obj = self.parser.select(config.getroot(), 'slidevideo', 1) if obj.text is not None: videos['slides'] = 'rtmp://' + host + '/' + quote(obj.text) # print videos # XXX if 'speaker' in videos: video.url = unicode(videos['speaker']) elif 'slides' in videos: # 1016627 only has slides, so fallback to them video.url = unicode(videos['slides']) if want_slides: if 'slides' in videos: video.url = unicode(videos['slides']) # if video.url is none: raise ? XXX obj = self.parser.select(config.getroot(), 'date', 1) if obj.text is not None: # 1016634 has "Invalid Date" try: video.date = parse_dt(obj.text) except ValueError as e: video.date = NotAvailable obj = self.parser.select(config.getroot(), 'duration', 1) m = re.match('(\d\d):(\d\d):(\d\d)', obj.text) if m: video.duration = datetime.timedelta(hours = int(m.group(1)), minutes = int(m.group(2)), seconds = int(m.group(3))) obj = self.parser.select(config.getroot(), 'speaker', 1) #print obj.text_content() #self.set_details(video) video.set_empty_fields(NotAvailable) return video obj = self.parser.select(self.document.getroot(), 'title') if len(obj) < 1: return None title = obj[0].text.strip() m = re.match('GDC Vault\s+-\s+(.*)', title) if m: title = m.group(1)
def set_wiki_source(self, project, page, data, message): self.location( self.absurl('projects/%s/wiki/%s/edit' % (project, quote(page)), True)) self.page.set_source(data, message)
def get_video(self, video=None): # check for slides id variant want_slides = False m = re.match('.*#slides', self.url) if m: want_slides = True # not sure it's safe self.group_dict['id'] += '#slides' if video is None: video = GDCVaultVideo(self.group_dict['id']) # the config file has it too, but in CDATA and only for type 4 obj = self.parser.select(self.document.getroot(), 'title') title = None if len(obj) > 0: try: title = unicode(obj[0].text) except UnicodeDecodeError as e: title = None if title is None: obj = self.parser.select(self.document.getroot(), 'meta[name=title]') if len(obj) > 0: if 'content' in obj[0].attrib: try: # FIXME: 1013483 has buggus title (latin1) # for now we just pass it as-is title = obj[0].attrib['content'] except UnicodeDecodeError as e: # XXX: this doesn't even works!? title = obj[0].attrib['content'].decode('iso-5589-15') if title is not None: title = title.strip() m = re.match('GDC Vault\s+-\s+(.*)', title) if m: title = m.group(1) video.title = title #TODO: POST back the title to /search.php and filter == id to get # cleaner (JSON) data... (though it'd be much slower) # try to find an iframe (type 3 and 4) obj = self.parser.select(self.document.getroot(), 'iframe') if len(obj) == 0: # type 1 or 2 (swf+js) # find which script element contains the swf args for script in self.parser.select(self.document.getroot(), 'script'): m = re.match( ".*new SWFObject.*addVariable\('type', '(.*)'\).*", unicode(script.text), re.DOTALL) if m: video.ext = m.group(1) m = re.match( ".*new SWFObject.*addVariable\(\"file\", encodeURIComponent\(\"(.*)\"\)\).*", unicode(script.text), re.DOTALL) if m: video.url = "http://gdcvault.com%s" % (m.group(1)) # TODO: for non-free (like 769), # must be logged to use /mediaProxy.php # FIXME: doesn't seem to work yet, we get 2 bytes as html # 769 should give: # http://twvideo01.ubm-us.net/o1/gdcradio-net/2007/gdc/GDC07-4889.mp3 # HACK: we use mechanize directly here for now... FIXME #print "asking for redirect on '%s'" % (video.url) #self.browser.addheaders += [['Referer', 'http://gdcvault.com/play/%s' % self.group_dict['id']]] #print self.browser.addheaders self.browser.set_handle_redirect(False) try: self.browser.open_novisit(video.url) # headers = req.info() # if headers.get('Content-Type', '') == 'text/html' and headers.get('Content-Length', '') == '2': # print 'BUG' #print req.code except HTTPError as e: #print e.getcode() if e.getcode() == 302 and hasattr(e, 'hdrs'): #print e.hdrs['Location'] video.url = unicode(e.hdrs['Location']) self.browser.set_handle_redirect(True) video.set_empty_fields(NotAvailable) return video #XXX: raise error? return None obj = obj[0] if obj is None: return None # type 3 or 4 (iframe) # get the config file for the rest iframe_url = obj.attrib['src'] # 1015020 has a boggus url m = re.match('http:/event(.+)', iframe_url) if m: iframe_url = 'http://event' + m.group(1) # print iframe_url # 1013798 has player169.html # 1012186 has player16x9.html # some other have /somethingplayer.html... # 1441 has a space in the xml filename, which we must not strip m = re.match( '(http:.*/)[^/]*player[0-9a-z]*\.html\?.*xmlURL=([^&]+\.xml).*\&token=([^& ]+)', iframe_url) if not m: m = re.match('/play/mediaProxy\.php\?sid=(\d+)', iframe_url) if m is None: return None # TODO: must be logged to use /mediaProxy.php # type 3 (pdf slides) video.ext = u'pdf' video.url = "http://gdcvault.com%s" % (unicode(iframe_url)) # HACK: we use mechanize directly here for now... FIXME # print "asking for redirect on '%s'" % (video.url) self.browser.set_handle_redirect(False) try: self.browser.open_novisit(video.url) except HTTPError as e: if e.getcode() == 302 and hasattr(e, 'hdrs'): video.url = unicode(e.hdrs['Location']) self.browser.set_handle_redirect(True) video.set_empty_fields(NotAvailable) return video # type 4 (dual screen video) # token doesn't actually seem required # 1441 has a space in the xml filename xml_filename = quote(m.group(2)) config_url = m.group(1) + xml_filename + '?token=' + m.group(3) # self.browser.addheaders += [['Referer', 'http://gdcvault.com/play/%s' % self.group_dict['id']]] # print self.browser.addheaders # TODO: fix for 1015021 & others (forbidden) #config = self.browser.openurl(config_url).read() config = self.browser.get_document(self.browser.openurl(config_url)) obj = self.parser.select(config.getroot(), 'akamaihost', 1) host = obj.text if host is None: raise BrokenPageError('Missing tag in xml config file') if host == "smil": # the rtmp URL is described in a smil file, # with several available bitrates obj = self.parser.select(config.getroot(), 'speakervideo', 1) smil = self.browser.get_document(self.browser.openurl(obj.text)) obj = self.parser.select(smil.getroot(), 'meta', 1) # TODO: error checking base = obj.attrib.get('base', '') best_bitrate = 0 path = None obj = self.parser.select(smil.getroot(), 'video') # choose the best bitrate for o in obj: rate = int(o.attrib.get('system-bitrate', 0)) if rate > best_bitrate: path = o.attrib.get('src', '') video.url = unicode(base + '/' + path) else: # not smil, the rtmp url is directly here as host + path # for id 1373 host is missing '/ondemand' # only add it when only a domain is specified without path m = re.match('^[^\/]+$', host) if m: host += "/ondemand" videos = {} obj = self.parser.select(config.getroot(), 'speakervideo', 1) if obj.text is not None: videos['speaker'] = 'rtmp://' + host + '/' + quote(obj.text) obj = self.parser.select(config.getroot(), 'slidevideo', 1) if obj.text is not None: videos['slides'] = 'rtmp://' + host + '/' + quote(obj.text) # print videos # XXX if 'speaker' in videos: video.url = unicode(videos['speaker']) elif 'slides' in videos: # 1016627 only has slides, so fallback to them video.url = unicode(videos['slides']) if want_slides: if 'slides' in videos: video.url = unicode(videos['slides']) # if video.url is none: raise ? XXX obj = self.parser.select(config.getroot(), 'date', 1) if obj.text is not None: # 1016634 has "Invalid Date" try: video.date = parse_dt(obj.text) except ValueError as e: video.date = NotAvailable obj = self.parser.select(config.getroot(), 'duration', 1) m = re.match('(\d\d):(\d\d):(\d\d)', obj.text) if m: video.duration = datetime.timedelta(hours=int(m.group(1)), minutes=int(m.group(2)), seconds=int(m.group(3))) obj = self.parser.select(config.getroot(), 'speaker', 1) #print obj.text_content() #self.set_details(video) video.set_empty_fields(NotAvailable) return video obj = self.parser.select(self.document.getroot(), 'title') if len(obj) < 1: return None title = obj[0].text.strip() m = re.match('GDC Vault\s+-\s+(.*)', title) if m: title = m.group(1)