Exemple #1
0
 def get_wiki_preview(self, project, page, data):
     if (not self.wiki_edit.is_here() or self.page.params['project'] != project
             or self.page.params['page'] != page):
         url = self.absurl('projects/%s/wiki/%s/edit' % (project, quote(page)), True)
         self.location(url)
     url = self.absurl('projects/%s/wiki/%s/preview' % (project, quote(page)), True)
     params = self.get_submit()
     params['content[text]'] = data
     #params['authenticity_token'] = self.page.get_authenticity_token()
     preview_html = lxml.html.fragment_fromstring(self.open(url, data=params), create_parent='div')
     preview_html.find("fieldset").drop_tag()
     preview_html.find("legend").drop_tree()
     return lxml.html.tostring(preview_html)
Exemple #2
0
 def get_wiki_source(self, project, page, version=None):
     url = '%s/projects/%s/wiki/%s/edit' % (self.BASEPATH, project,
                                            quote(page.encode('utf-8')))
     if version:
         url += '?version=%s' % version
     self.location(url)
     return self.page.get_source()
Exemple #3
0
 def get_wiki_source(self, project, page, version=None):
     url = self.absurl('projects/%s/wiki/%s/edit' % (project, quote(page)),
                       True)
     if version:
         url += '?version=%s' % version
     self.location(url)
     return self.page.get_source()
Exemple #4
0
    def search_videos(self, pattern):
        return self.search.go(lang=self.lang['site'], pattern=quote(pattern), page='1').iter_videos()

        class_name = 'videos/plus7'
        method_name = 'search'
        parameters = '/'.join([self.lang.get('webservice'), 'L1', pattern, 'ALL', 'ALL', '-1',
                               self.order, '10', '0'])
        return self.webservice.go(class_name=class_name, method_name=method_name, parameters=parameters).iter_videos()
Exemple #5
0
 def get_wiki_preview(self, project, page, data):
     if (not self.wiki_edit.is_here()
             or self.page.params['project'] != project
             or self.page.params['page'] != page):
         url = self.absurl(
             'projects/%s/wiki/%s/edit' % (project, quote(page)), True)
         self.location(url)
     url = self.absurl(
         'projects/%s/wiki/%s/preview' % (project, quote(page)), True)
     params = self.get_submit()
     params['content[text]'] = data
     #params['authenticity_token'] = self.page.get_authenticity_token()
     preview_html = lxml.html.fragment_fromstring(self.open(url,
                                                            data=params),
                                                  create_parent='div')
     preview_html.find("fieldset").drop_tag()
     preview_html.find("legend").drop_tree()
     return lxml.html.tostring(preview_html)
Exemple #6
0
 def get_wiki_preview(self, project, page, data):
     if (not self.is_on_page(WikiEditPage) or self.page.groups[0] != project
             or self.page.groups[1] != page):
         self.location(
             '%s/projects/%s/wiki/%s/edit' %
             (self.BASEPATH, project, quote(page.encode('utf-8'))))
     url = '%s/projects/%s/wiki/%s/preview' % (self.BASEPATH, project,
                                               quote(page.encode('utf-8')))
     params = {}
     params['content[text]'] = data.encode('utf-8')
     params['authenticity_token'] = "%s" % self.page.get_authenticity_token(
     )
     preview_html = lxml.html.fragment_fromstring(self.readurl(
         url, urlencode(params)),
                                                  create_parent='div')
     preview_html.find("fieldset").drop_tag()
     preview_html.find("legend").drop_tree()
     return lxml.html.tostring(preview_html)
Exemple #7
0
    def search_videos(self, pattern):
        return self.search.go(lang=self.lang['site'],
                              pattern=quote(pattern),
                              page='1').iter_videos()

        class_name = 'videos/plus7'
        method_name = 'search'
        parameters = '/'.join([
            self.lang.get('webservice'), 'L1', pattern, 'ALL', 'ALL', '-1',
            self.order, '10', '0'
        ])
        return self.webservice.go(class_name=class_name,
                                  method_name=method_name,
                                  parameters=parameters).iter_videos()
Exemple #8
0
    def params_from_js(self, text):
        l = []
        for sub in re.findall("'([^']*)'", text):
            l.append(sub)

        if len(l) <= 1:
            #For account that have no history
            return None, None

        url = '/vos-comptes/IPT/appmanager/transac/' + self.browser.account_type + '?_nfpb=true&_windowLabel=portletInstance_18&_pageLabel=page_synthese_v1' + '&_cdnCltUrl=' + "/transacClippe/" + quote(
            l.pop(0))
        args = {}
        for input in self.doc.xpath('//form[@name="detail"]/input'):
            args[input.attrib['name']] = input.attrib.get('value', '')

        for i, key in enumerate(self.ARGS):
            args[key] = unicode(l[self.ARGS.index(key)]).encode(
                self.browser.ENCODING)

        args['PageDemandee'] = 1
        args['PagePrecedente'] = 1

        return url, args
Exemple #9
0
 def search_videos(self, pattern):
     self.location('/videolist/searchmodevideo/query%s/' % (quote(pattern.encode('utf-8'))))
     assert self.is_on_page(ResultsPage)
     return self.page.iter_videos()
Exemple #10
0
 def obj_id(self):
     return quote(Field('name')(self).encode('utf-8'))
Exemple #11
0
    def params_from_js(self, text):
        l = []
        for sub in re.findall("'([^']*)'", text):
            l.append(sub)

        if len(l) <= 1:
            #For account that have no history
            return None, None

        url = '/vos-comptes/IPT/appmanager/transac/' + self.browser.account_type + '?_nfpb=true&_windowLabel=portletInstance_18&_pageLabel=page_synthese_v1' + '&_cdnCltUrl=' + "/transacClippe/" + quote(l.pop(0))
        args = {}
        for input in self.doc.xpath('//form[@name="detail"]/input'):
            args[input.attrib['name']] = input.attrib.get('value', '')

        for i, key in enumerate(self.ARGS):
            args[key] = unicode(l[self.ARGS.index(key)]).encode(self.browser.ENCODING)

        args['PageDemandee'] = 1
        args['PagePrecedente'] = 1

        return url, args
Exemple #12
0
 def set_wiki_source(self, project, page, data, message):
     self.location(self.absurl('projects/%s/wiki/%s/edit' % (project, quote(page)), True))
     self.page.set_source(data, message)
Exemple #13
0
 def get_wiki_source(self, project, page, version=None):
     url = self.absurl('projects/%s/wiki/%s/edit' % (project, quote(page)), True)
     if version:
         url += '?version=%s' % version
     self.location(url)
     return self.page.get_source()
Exemple #14
0
 def obj_id(self):
     return quote(Field('name')(self))
Exemple #15
0
 def set_wiki_source(self, project, page, data, message):
     self.location('%s/projects/%s/wiki/%s/edit' %
                   (self.BASEPATH, project, quote(page.encode('utf-8'))))
     self.page.set_source(data, message)
Exemple #16
0
    def get_video(self, video=None):
        # check for slides id variant
        want_slides = False
        m = re.match('.*#slides', self.url)
        if m:
            want_slides = True
            # not sure it's safe
            self.group_dict['id'] += '#slides'

        if video is None:
            video = GDCVaultVideo(self.group_dict['id'])

        # the config file has it too, but in CDATA and only for type 4
        obj = self.parser.select(self.document.getroot(), 'title')
        title = None
        if len(obj) > 0:
            try:
                title = unicode(obj[0].text)
            except UnicodeDecodeError as e:
                title = None

        if title is None:
            obj = self.parser.select(self.document.getroot(), 'meta[name=title]')
            if len(obj) > 0:
                if 'content' in obj[0].attrib:
                    try:
                        # FIXME: 1013483 has buggus title (latin1)
                        # for now we just pass it as-is
                        title = obj[0].attrib['content']
                    except UnicodeDecodeError as e:
                        # XXX: this doesn't even works!?
                        title = obj[0].attrib['content'].decode('iso-5589-15')

        if title is not None:
            title = title.strip()
            m = re.match('GDC Vault\s+-\s+(.*)', title)
            if m:
                title = m.group(1)
            video.title = title

        #TODO: POST back the title to /search.php and filter == id to get
        # cleaner (JSON) data... (though it'd be much slower)

        # try to find an iframe (type 3 and 4)
        obj = self.parser.select(self.document.getroot(), 'iframe')
        if len(obj) == 0:
            # type 1 or 2 (swf+js)
            # find which script element contains the swf args
            for script in self.parser.select(self.document.getroot(), 'script'):
                m = re.match(".*new SWFObject.*addVariable\('type', '(.*)'\).*", unicode(script.text), re.DOTALL)
                if m:
                    video.ext = m.group(1)

                m = re.match(".*new SWFObject.*addVariable\(\"file\", encodeURIComponent\(\"(.*)\"\)\).*", unicode(script.text), re.DOTALL)
                if m:
                    video.url = "http://gdcvault.com%s" % (m.group(1))
                    # TODO: for non-free (like 769),
                    # must be logged to use /mediaProxy.php

                    # FIXME: doesn't seem to work yet, we get 2 bytes as html
                    # 769 should give:
                    # http://twvideo01.ubm-us.net/o1/gdcradio-net/2007/gdc/GDC07-4889.mp3
                    # HACK: we use mechanize directly here for now... FIXME
                    #print "asking for redirect on '%s'" % (video.url)
                    #self.browser.addheaders += [['Referer', 'http://gdcvault.com/play/%s' % self.group_dict['id']]]
                    #print self.browser.addheaders
                    self.browser.set_handle_redirect(False)
                    try:
                        self.browser.open_novisit(video.url)
                        # headers = req.info()
                        # if headers.get('Content-Type', '') == 'text/html' and headers.get('Content-Length', '') == '2':
                        # print 'BUG'

                        #print req.code
                    except HTTPError as e:
                        #print e.getcode()
                        if e.getcode() == 302 and hasattr(e, 'hdrs'):
                            #print e.hdrs['Location']
                            video.url = unicode(e.hdrs['Location'])
                    self.browser.set_handle_redirect(True)

                    video.set_empty_fields(NotAvailable)
                    return video

            #XXX: raise error?
            return None

        obj = obj[0]
        if obj is None:
            return None
        # type 3 or 4 (iframe)
        # get the config file for the rest
        iframe_url = obj.attrib['src']

        # 1015020 has a boggus url
        m = re.match('http:/event(.+)', iframe_url)
        if m:
            iframe_url = 'http://event' + m.group(1)

        # print iframe_url
        # 1013798 has player169.html
        # 1012186 has player16x9.html
        # some other have /somethingplayer.html...
        # 1441 has a space in the xml filename, which we must not strip
        m = re.match('(http:.*/)[^/]*player[0-9a-z]*\.html\?.*xmlURL=([^&]+\.xml).*\&token=([^& ]+)', iframe_url)

        if not m:
            m = re.match('/play/mediaProxy\.php\?sid=(\d+)', iframe_url)
            if m is None:
                return None
            # TODO: must be logged to use /mediaProxy.php
            # type 3 (pdf slides)
            video.ext = u'pdf'
            video.url = "http://gdcvault.com%s" % (unicode(iframe_url))

            # HACK: we use mechanize directly here for now... FIXME
            # print "asking for redirect on '%s'" % (video.url)
            self.browser.set_handle_redirect(False)
            try:
                self.browser.open_novisit(video.url)
            except HTTPError as e:
                if e.getcode() == 302 and hasattr(e, 'hdrs'):
                    video.url = unicode(e.hdrs['Location'])
            self.browser.set_handle_redirect(True)

            video.set_empty_fields(NotAvailable)
            return video

        # type 4 (dual screen video)

        # token doesn't actually seem required
        # 1441 has a space in the xml filename
        xml_filename = quote(m.group(2))
        config_url = m.group(1) + xml_filename + '?token=' + m.group(3)

        # self.browser.addheaders += [['Referer', 'http://gdcvault.com/play/%s' % self.group_dict['id']]]
        # print self.browser.addheaders
        # TODO: fix for 1015021 & others (forbidden)
        #config = self.browser.openurl(config_url).read()
        config = self.browser.get_document(self.browser.openurl(config_url))

        obj = self.parser.select(config.getroot(), 'akamaihost', 1)
        host = obj.text
        if host is None:
            raise BrokenPageError('Missing tag in xml config file')

        if host == "smil":
            # the rtmp URL is described in a smil file,
            # with several available bitrates
            obj = self.parser.select(config.getroot(), 'speakervideo', 1)
            smil = self.browser.get_document(self.browser.openurl(obj.text))
            obj = self.parser.select(smil.getroot(), 'meta', 1)
            # TODO: error checking
            base = obj.attrib.get('base', '')
            best_bitrate = 0
            path = None
            obj = self.parser.select(smil.getroot(), 'video')
            # choose the best bitrate
            for o in obj:
                rate = int(o.attrib.get('system-bitrate', 0))
                if rate > best_bitrate:
                    path = o.attrib.get('src', '')
            video.url = unicode(base + '/' + path)

        else:
            # not smil, the rtmp url is directly here as host + path
            # for id 1373 host is missing '/ondemand'
            # only add it when only a domain is specified without path
            m = re.match('^[^\/]+$', host)
            if m:
                host += "/ondemand"

            videos = {}

            obj = self.parser.select(config.getroot(), 'speakervideo', 1)
            if obj.text is not None:
                videos['speaker'] = 'rtmp://' + host + '/' + quote(obj.text)

            obj = self.parser.select(config.getroot(), 'slidevideo', 1)
            if obj.text is not None:
                videos['slides'] = 'rtmp://' + host + '/' + quote(obj.text)

            # print videos
            # XXX
            if 'speaker' in videos:
                video.url = unicode(videos['speaker'])
            elif 'slides' in videos:
                # 1016627 only has slides, so fallback to them
                video.url = unicode(videos['slides'])

            if want_slides:
                if 'slides' in videos:
                    video.url = unicode(videos['slides'])
            # if video.url is none: raise ? XXX

        obj = self.parser.select(config.getroot(), 'date', 1)
        if obj.text is not None:
            # 1016634 has "Invalid Date"
            try:
                video.date = parse_dt(obj.text)
            except ValueError as e:
                video.date = NotAvailable

        obj = self.parser.select(config.getroot(), 'duration', 1)
        m = re.match('(\d\d):(\d\d):(\d\d)', obj.text)
        if m:
            video.duration = datetime.timedelta(hours = int(m.group(1)),
                                                minutes = int(m.group(2)),
                                                seconds = int(m.group(3)))

        obj = self.parser.select(config.getroot(), 'speaker', 1)
        #print obj.text_content()

        #self.set_details(video)

        video.set_empty_fields(NotAvailable)
        return video

        obj = self.parser.select(self.document.getroot(), 'title')
        if len(obj) < 1:
            return None
        title = obj[0].text.strip()
        m = re.match('GDC Vault\s+-\s+(.*)', title)
        if m:
            title = m.group(1)
Exemple #17
0
 def set_wiki_source(self, project, page, data, message):
     self.location(
         self.absurl('projects/%s/wiki/%s/edit' % (project, quote(page)),
                     True))
     self.page.set_source(data, message)
Exemple #18
0
 def obj_id(self):
     return quote(Field('name')(self))
Exemple #19
0
 def obj_id(self):
     return quote(Field('name')(self).encode('utf-8'))
Exemple #20
0
    def get_video(self, video=None):
        # check for slides id variant
        want_slides = False
        m = re.match('.*#slides', self.url)
        if m:
            want_slides = True
            # not sure it's safe
            self.group_dict['id'] += '#slides'

        if video is None:
            video = GDCVaultVideo(self.group_dict['id'])

        # the config file has it too, but in CDATA and only for type 4
        obj = self.parser.select(self.document.getroot(), 'title')
        title = None
        if len(obj) > 0:
            try:
                title = unicode(obj[0].text)
            except UnicodeDecodeError as e:
                title = None

        if title is None:
            obj = self.parser.select(self.document.getroot(),
                                     'meta[name=title]')
            if len(obj) > 0:
                if 'content' in obj[0].attrib:
                    try:
                        # FIXME: 1013483 has buggus title (latin1)
                        # for now we just pass it as-is
                        title = obj[0].attrib['content']
                    except UnicodeDecodeError as e:
                        # XXX: this doesn't even works!?
                        title = obj[0].attrib['content'].decode('iso-5589-15')

        if title is not None:
            title = title.strip()
            m = re.match('GDC Vault\s+-\s+(.*)', title)
            if m:
                title = m.group(1)
            video.title = title

        #TODO: POST back the title to /search.php and filter == id to get
        # cleaner (JSON) data... (though it'd be much slower)

        # try to find an iframe (type 3 and 4)
        obj = self.parser.select(self.document.getroot(), 'iframe')
        if len(obj) == 0:
            # type 1 or 2 (swf+js)
            # find which script element contains the swf args
            for script in self.parser.select(self.document.getroot(),
                                             'script'):
                m = re.match(
                    ".*new SWFObject.*addVariable\('type', '(.*)'\).*",
                    unicode(script.text), re.DOTALL)
                if m:
                    video.ext = m.group(1)

                m = re.match(
                    ".*new SWFObject.*addVariable\(\"file\", encodeURIComponent\(\"(.*)\"\)\).*",
                    unicode(script.text), re.DOTALL)
                if m:
                    video.url = "http://gdcvault.com%s" % (m.group(1))
                    # TODO: for non-free (like 769),
                    # must be logged to use /mediaProxy.php

                    # FIXME: doesn't seem to work yet, we get 2 bytes as html
                    # 769 should give:
                    # http://twvideo01.ubm-us.net/o1/gdcradio-net/2007/gdc/GDC07-4889.mp3
                    # HACK: we use mechanize directly here for now... FIXME
                    #print "asking for redirect on '%s'" % (video.url)
                    #self.browser.addheaders += [['Referer', 'http://gdcvault.com/play/%s' % self.group_dict['id']]]
                    #print self.browser.addheaders
                    self.browser.set_handle_redirect(False)
                    try:
                        self.browser.open_novisit(video.url)
                        # headers = req.info()
                        # if headers.get('Content-Type', '') == 'text/html' and headers.get('Content-Length', '') == '2':
                        # print 'BUG'

                        #print req.code
                    except HTTPError as e:
                        #print e.getcode()
                        if e.getcode() == 302 and hasattr(e, 'hdrs'):
                            #print e.hdrs['Location']
                            video.url = unicode(e.hdrs['Location'])
                    self.browser.set_handle_redirect(True)

                    video.set_empty_fields(NotAvailable)
                    return video

            #XXX: raise error?
            return None

        obj = obj[0]
        if obj is None:
            return None
        # type 3 or 4 (iframe)
        # get the config file for the rest
        iframe_url = obj.attrib['src']

        # 1015020 has a boggus url
        m = re.match('http:/event(.+)', iframe_url)
        if m:
            iframe_url = 'http://event' + m.group(1)

        # print iframe_url
        # 1013798 has player169.html
        # 1012186 has player16x9.html
        # some other have /somethingplayer.html...
        # 1441 has a space in the xml filename, which we must not strip
        m = re.match(
            '(http:.*/)[^/]*player[0-9a-z]*\.html\?.*xmlURL=([^&]+\.xml).*\&token=([^& ]+)',
            iframe_url)

        if not m:
            m = re.match('/play/mediaProxy\.php\?sid=(\d+)', iframe_url)
            if m is None:
                return None
            # TODO: must be logged to use /mediaProxy.php
            # type 3 (pdf slides)
            video.ext = u'pdf'
            video.url = "http://gdcvault.com%s" % (unicode(iframe_url))

            # HACK: we use mechanize directly here for now... FIXME
            # print "asking for redirect on '%s'" % (video.url)
            self.browser.set_handle_redirect(False)
            try:
                self.browser.open_novisit(video.url)
            except HTTPError as e:
                if e.getcode() == 302 and hasattr(e, 'hdrs'):
                    video.url = unicode(e.hdrs['Location'])
            self.browser.set_handle_redirect(True)

            video.set_empty_fields(NotAvailable)
            return video

        # type 4 (dual screen video)

        # token doesn't actually seem required
        # 1441 has a space in the xml filename
        xml_filename = quote(m.group(2))
        config_url = m.group(1) + xml_filename + '?token=' + m.group(3)

        # self.browser.addheaders += [['Referer', 'http://gdcvault.com/play/%s' % self.group_dict['id']]]
        # print self.browser.addheaders
        # TODO: fix for 1015021 & others (forbidden)
        #config = self.browser.openurl(config_url).read()
        config = self.browser.get_document(self.browser.openurl(config_url))

        obj = self.parser.select(config.getroot(), 'akamaihost', 1)
        host = obj.text
        if host is None:
            raise BrokenPageError('Missing tag in xml config file')

        if host == "smil":
            # the rtmp URL is described in a smil file,
            # with several available bitrates
            obj = self.parser.select(config.getroot(), 'speakervideo', 1)
            smil = self.browser.get_document(self.browser.openurl(obj.text))
            obj = self.parser.select(smil.getroot(), 'meta', 1)
            # TODO: error checking
            base = obj.attrib.get('base', '')
            best_bitrate = 0
            path = None
            obj = self.parser.select(smil.getroot(), 'video')
            # choose the best bitrate
            for o in obj:
                rate = int(o.attrib.get('system-bitrate', 0))
                if rate > best_bitrate:
                    path = o.attrib.get('src', '')
            video.url = unicode(base + '/' + path)

        else:
            # not smil, the rtmp url is directly here as host + path
            # for id 1373 host is missing '/ondemand'
            # only add it when only a domain is specified without path
            m = re.match('^[^\/]+$', host)
            if m:
                host += "/ondemand"

            videos = {}

            obj = self.parser.select(config.getroot(), 'speakervideo', 1)
            if obj.text is not None:
                videos['speaker'] = 'rtmp://' + host + '/' + quote(obj.text)

            obj = self.parser.select(config.getroot(), 'slidevideo', 1)
            if obj.text is not None:
                videos['slides'] = 'rtmp://' + host + '/' + quote(obj.text)

            # print videos
            # XXX
            if 'speaker' in videos:
                video.url = unicode(videos['speaker'])
            elif 'slides' in videos:
                # 1016627 only has slides, so fallback to them
                video.url = unicode(videos['slides'])

            if want_slides:
                if 'slides' in videos:
                    video.url = unicode(videos['slides'])
            # if video.url is none: raise ? XXX

        obj = self.parser.select(config.getroot(), 'date', 1)
        if obj.text is not None:
            # 1016634 has "Invalid Date"
            try:
                video.date = parse_dt(obj.text)
            except ValueError as e:
                video.date = NotAvailable

        obj = self.parser.select(config.getroot(), 'duration', 1)
        m = re.match('(\d\d):(\d\d):(\d\d)', obj.text)
        if m:
            video.duration = datetime.timedelta(hours=int(m.group(1)),
                                                minutes=int(m.group(2)),
                                                seconds=int(m.group(3)))

        obj = self.parser.select(config.getroot(), 'speaker', 1)
        #print obj.text_content()

        #self.set_details(video)

        video.set_empty_fields(NotAvailable)
        return video

        obj = self.parser.select(self.document.getroot(), 'title')
        if len(obj) < 1:
            return None
        title = obj[0].text.strip()
        m = re.match('GDC Vault\s+-\s+(.*)', title)
        if m:
            title = m.group(1)