def getDLurl(self, url):
		try:
			content = self.getUrl(url)
			match = re.findall('flashvars.playlist = \'(.*?)\';', content)
			if match:
				for url in match:
					url = 'http://ua.canna.to/canna/'+url
					content = self.getUrl(url)
					match = re.findall('<location>(.*?)</location>', content)
					if match:
						for url in match:
							req = mechanize.Request('http://ua.canna.to/canna/single.php')
							response = mechanize.urlopen(req)
							url = 'http://ua.canna.to/canna/'+url
							req = mechanize.Request(url)
							req.add_header('User-Agent', canna_agent)
							response = mechanize.urlopen(req)
							response.close()
							code=response.info().getheader('Content-Location')
							url='http://ua.canna.to/canna/avzt/'+code
							return url

		except urllib2.HTTPError, error:
			printl(error,self,"E")
			message = self.session.open(MessageBoxExt, (_("Error: %s") % error), MessageBoxExt.TYPE_INFO, timeout=3)
			return False
Exemple #2
0
 def test_gzip(self):
     p = HTTPGzipProcessor()
     url = "https://www.example.com/"
     req = p.https_request(mechanize.Request(url))
     self.assertIsNone(req.get_header('Accept-Encoding'))
     p.request_gzip = True
     req = p.https_request(mechanize.Request(url))
     self.assertEqual(req.get_header('Accept-Encoding'), 'gzip')
     req = mechanize.Request(url)
     req.add_header('Accept-Encoding', 'moo, *')
     req = p.https_request(req)
     self.assertEqual(req.get_header('Accept-Encoding'), 'moo, *, gzip')
     data = os.urandom(1024 * 1024)
     cdata = b''.join(compress_readable_output(BytesIO(data)))
     r = MockResponse(
         url,
         data=cdata,
         info={
             'Content-Encoding': 'gzip',
             'Content-Length': str(len(cdata))
         })
     r = p.https_response(req, r)
     self.assertEqual(r.read(), data)
     h = r.info()
     self.assertFalse(h.getheaders('content-encoding'))
     self.assertFalse(h.getheaders('content-length'))
Exemple #3
0
    def getDLurl(self, url):
        try:
            content = self.getUrl(url)
            match = re.findall('flashvars.playlist = \'(.*?)\';', content)
            if match:
                for url in match:
                    url = 'http://ua.canna.to/canna/' + url
                    content = self.getUrl(url)
                    match = re.findall('<location>(.*?)</location>', content)
                    if match:
                        for url in match:
                            url = 'http://ua.canna.to/canna/' + url
                            req = mechanize.Request(
                                'http://ua.canna.to/canna/single.php')
                            response = mechanize.urlopen(req)
                            req = mechanize.Request(url)
                            req.add_header(
                                'User-Agent',
                                ' Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'
                            )
                            response = mechanize.urlopen(req)
                            response.close()
                            code = response.info().getheader(
                                'Content-Location')
                            url = 'http://ua.canna.to/canna/avzt/' + code
                            print url
                            return url

        except urllib2.HTTPError, error:
            printl(error, self, "E")
            message = self.session.open(MessageBox, ("Fehler: %s" % error),
                                        MessageBox.TYPE_INFO,
                                        timeout=3)
            return False
Exemple #4
0
def ieee_get_csv(keyword):
    import urllib
    import mechanize
    # Keyword processing for ieee
    keyword = keyword.replace('"', '.QT.')
    # Crawl intelligent browser
    br = crawler_browser()
    # Fake request to mimic normal user
    # TODO: Play with their analytics requests too for even more real request.
    URL = 'http://ieeexplore.ieee.org/search/searchresult.jsp?queryText=' + urllib.quote_plus(
        keyword) + '&newsearch=true'
    fake = br.open(URL)
    # Search request as browser
    br.set_header('Referer', URL)
    data = '{"queryText":"' + keyword + '","newsearch":"true"}'
    search = br.open(
        mechanize.Request('http://ieeexplore.ieee.org/rest/search',
                          data=data,
                          headers={"Content-type": "application/json"}))
    # Export as csv request
    params = {'bulkSetSize': 2000}
    data = urllib.urlencode(params)
    csv_request = br.open(
        mechanize.Request('http://ieeexplore.ieee.org/search/searchExport.jsp',
                          data=data))
    csv_data = csv_request.read()
    return csv_data
    def __callRequest(self):
        cookieJar = mechanize.LWPCookieJar()
        try: #TODO ohne try evtl.
            cookieJar.load(self._cookiePath, self.__bIgnoreDiscard, self.__bIgnoreExpired)
        except Exception as e:
            logger.info(e)
        sParameters = urllib.urlencode(self.__aParameters)

        opener = mechanize.build_opener(SmartRedirectHandler,
                                        mechanize.HTTPEquivProcessor,
                                        mechanize.HTTPRefreshProcessor)
        if (len(sParameters) > 0):
            oRequest = mechanize.Request(self.__sUrl, sParameters)
        else:
            oRequest = mechanize.Request(self.__sUrl)

        for aHeader in self.__aHeaderEntries:                
                for sHeaderKey, sHeaderValue in aHeader.items():
                    oRequest.add_header(sHeaderKey, sHeaderValue)
        cookieJar.add_cookie_header(oRequest)
        
        if self.caching and self.cacheTime > 0:
            sContent = self.readCache(self.getRequestUri())
            if sContent:
                return sContent
        try:
            oResponse = opener.open(oRequest,timeout = 60)             
        except mechanize.HTTPError, e:
            if not self.ignoreErrors:
                xbmcgui.Dialog().ok('xStream','Fehler beim Abrufen der Url:',self.__sUrl, str(e))
                logger.error("HTTPError "+str(e)+" Url: "+self.__sUrl)
                return ''
            else:
                oResponse = e                 
def rtnHTMLformat(tmpddGenrcgenPresent, sppPrefx, pthwcod, ouPthwpng):
    inpx = '\n'.join(tmpddGenrcgenPresent)  # inpx="ALDH2 color \nALDH3A1	color"
    request = mechanize.Request(
        "http://www.genome.jp/kegg/tool/map_pathway2.html")
    response = mechanize.urlopen(request)
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    form = forms[0]
    form["unclassified"] = inpx
    form["org"] = sppPrefx
    request2 = form.click()
    response2 = mechanize.urlopen(request2)
    a = str(response2.read()).split('href="/kegg-bin/show_pathway?')[1]
    code = a.split('/')[0]  # response2.read()
    request = mechanize.Request(
        "http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args" % (code, pthwcod))  # request=mechanize.Request("http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args"%('13171478854246','hsa00410'))
    response = mechanize.urlopen(request)
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    form = forms[1]
    status = ' NOT '
    try:
        imgf = str(forms[1]).split('/mark_pathway')[1].split('/')[0]
        os.system("wget --quiet http://www.genome.jp/tmp/mark_pathway%s/%s.png -O %s" % (imgf, pthwcod, ouPthwpng))
        status = ' '
    except:
        pass
    return 'A pathway image was%ssuccefully produced...' % status
Exemple #7
0
    def login(self):
        headers = {
            'User-Agent':
            'Mozilla/5.0 (X11; Linux x86_64; rv:81.0) Gecko/20100101 Firefox/81.0',
            'Accept': '*/*',
            'Accept-Language': 'en-US,en;q=0.5',
            'Referer': 'https://www.coned.com/',
            'Content-Type': 'application/json',
            'Origin': 'https://www.coned.com'
        }
        data = json.dumps({
            "LoginEmail": self.user,
            "LoginPassword": self.password,
            "LoginRememberMe": False,
            "ReturnUrl": "",
            "OpenIdRelayState": ""
        })
        request = mechanize.Request(
            "https://www.coned.com/sitecore/api/ssc/ConEd-Cms-Services-Controllers-Okta/User/0/Login",
            data, headers)
        self.browser.open(request)

        totp = pyotp.TOTP(self.totp)
        thing = json.dumps({
            "MFACode": totp.now(),
            "ReturnUrl": "",
            "OpenIdRelayState": ""
        })
        request = mechanize.Request(
            'https://www.coned.com/sitecore/api/ssc/ConEd-Cms-Services-Controllers-Okta/User/0/VerifyFactor',
            thing, headers)
        response = self.browser.open(request)
        redirect_url = json.loads(response.read())["authRedirectUrl"]
        response = self.browser.open(redirect_url)
Exemple #8
0
    def __get_csv(self, letter='a', now=False):

        #open the url
        current_url = self.overview_url + '1111&b=' + letter
        overview_req = mechanize.Request(current_url)
        overview_res = mechanize.urlopen(overview_req)

        #find the list of entries to post
        py_query = PyQuery(overview_res.read())
        titlelist = py_query("input[name='titelnrliste']").val()

        #create the post request
        post_data = {
            'url': current_url,
            'download': '[Download]',
            'titelnrliste': titlelist
        }

        if (now):
            #find the checked box (the current quartal)
            default_quartal = py_query(".quartal input:checked").attr('name')
            post_data[str(default_quartal)] = 'ON'
        else:
            #enable all quartal's checkbox
            quartals = [1, 2, 3, 4]
            for i in quartals:
                if i in range(1, 5):
                    post_data[str(self.year) + str(i)] = 'ON'

        #send the post request
        csv_req = mechanize.Request(current_url, urllib.urlencode(post_data))
        csv_res = mechanize.urlopen(csv_req)
        self.csv_parser.process_result(response=csv_res)
Exemple #9
0
    def __callRequest(self):
        sParameters = urllib.urlencode(self.__aParamaters)

        if (self.__cType == cRequestHandler.REQUEST_TYPE_GET):
            if (len(sParameters) > 0):
                if (self.__sUrl.find('?') == -1):
                    self.__sUrl = self.__sUrl + '?' + str(sParameters)
                    sParameters = ''
                else:
                    self.__sUrl = self.__sUrl + '&' + str(sParameters)
                    sParameters = ''

        if (len(sParameters) > 0):
            oRequest = mechanize.Request(self.__sUrl, sParameters)
        else:
            oRequest = mechanize.Request(self.__sUrl)

        for aHeader in self.__aHeaderEntries:
            for sHeaderKey, sHeaderValue in aHeader.items():
                oRequest.add_header(sHeaderKey, sHeaderValue)

        try:
            oResponse = mechanize.urlopen(oRequest)
        except urllib2.HTTPError, e:
            xbmcgui.Dialog().ok('xStream', 'Fehler beim Abrufen der Url:',
                                self.__sUrl, str(e))
            logger.error("HTTPError " + str(e) + " Url: " + self.__sUrl)
            return ''
    def fanboxGetPostsFromArtist(self, artist_id, next_url=""):
        ''' get all posts from the supported user from https://www.pixiv.net/ajax/fanbox/creator?userId=15521131 '''
        if next_url is None or next_url == "":
            url = "https://www.pixiv.net/ajax/fanbox/creator?userId={0}".format(
                artist_id)
        elif next_url.startswith("https://"):
            url = next_url
        else:
            url = "https://www.pixiv.net" + next_url

        # Fix #494
        PixivHelper.print_and_log('info', 'Getting posts from ' + url)
        referer = "https://www.pixiv.net/fanbox/creator/{0}".format(artist_id)
        req = mechanize.Request(url)
        req.add_header('Accept', 'application/json, text/plain, */*')
        req.add_header('Referer', referer)
        req.add_header('Origin', 'https://www.pixiv.net')
        req.add_header('User-Agent', self._config.useragent)

        res = self.open_with_retry(req)
        response = res.read()
        PixivHelper.get_logger().debug(response.decode('utf8'))
        res.close()
        # Issue #420
        _tzInfo = None
        if self._config.useLocalTimezone:
            _tzInfo = PixivHelper.LocalUTCOffsetTimezone()
        result = FanboxArtist(artist_id, response, tzInfo=_tzInfo)

        pixivArtist = PixivArtist(artist_id)
        self.getMemberInfoWhitecube(artist_id, pixivArtist)
        result.artistName = pixivArtist.artistName
        result.artistToken = pixivArtist.artistToken

        for post in result.posts:
            # https://fanbox.pixiv.net/api/post.info?postId=279561
            # https://www.pixiv.net/fanbox/creator/104409/post/279561
            p_url = "https://fanbox.pixiv.net/api/post.info?postId={0}".format(
                post.imageId)
            p_referer = "https://www.pixiv.net/fanbox/creator/{0}/post/{1}".format(
                artist_id, post.imageId)
            PixivHelper.get_logger().debug('Getting post detail from %s',
                                           p_url)
            p_req = mechanize.Request(p_url)
            p_req.add_header('Accept', 'application/json, text/plain, */*')
            p_req.add_header('Referer', p_referer)
            p_req.add_header('Origin', 'https://www.pixiv.net')
            p_req.add_header('User-Agent', self._config.useragent)

            p_res = self.open_with_retry(p_req)
            p_response = p_res.read()
            PixivHelper.get_logger().debug(p_response.decode('utf8'))
            p_res.close()
            js = demjson.decode(p_response)
            post.parsePost(js["body"])

        return result
Exemple #11
0
    def resolve(self, url, cookie_jar, user_agent):
        headers = {'User-agent': user_agent, 'Referer': url}

        try:
            cookie_jar.load(ignore_discard=True)
        except Exception as e:
            logger.info(e)

        opener = mechanize.build_opener(
            mechanize.HTTPCookieProcessor(cookie_jar))

        request = mechanize.Request(url)
        for key in headers:
            request.add_header(key, headers[key])

        try:
            response = opener.open(request)
        except mechanize.HTTPError as e:
            response = e

        body = response.read()

        cookie_jar.extract_cookies(response, request)
        cookie_helper.check_cookies(cookie_jar)

        parsed_url = urlparse(url)
        submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme,
                                                      parsed_url.netloc)

        params = {}

        try:
            params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"',
                                           body).group(1)
            params["pass"] = re.search(r'name="pass" value="(.+?)"',
                                       body).group(1)

            js = self._extract_js(body)
        except mechanize.HTTPError as e:
            return None

        params["jschl_answer"] = str(js + len(parsed_url.netloc))

        sParameters = urllib.urlencode(params, True)

        request = mechanize.Request("%s?%s" % (submit_url, sParameters))
        for key in headers:
            request.add_header(key, headers[key])

        sleep(5)

        try:
            response = opener.open(request)
        except mechanize.HTTPError as e:
            response = e

        return response
Exemple #12
0
    def __callRequest(self):
        if self.caching and self.cacheTime > 0:
            sContent = self.readCache(self.getRequestUri())
            if sContent:
                return sContent

        cookieJar = mechanize.LWPCookieJar(filename=self._cookiePath)
        try:  # TODO ohne try evtl.
            cookieJar.load(ignore_discard=self.__bIgnoreDiscard,
                           ignore_expires=self.__bIgnoreExpired)
        except Exception as e:
            logger.info(e)

        sParameters = urllib.urlencode(self.__aParameters, True)

        handlers = [
            SmartRedirectHandler, mechanize.HTTPEquivProcessor,
            mechanize.HTTPRefreshProcessor
        ]
        if sys.version_info >= (2, 7, 9) and sys.version_info < (2, 7, 11):
            handlers.append(newHTTPSHandler)
        opener = mechanize.build_opener(*handlers)
        if (len(sParameters) > 0):
            oRequest = mechanize.Request(self.__sUrl, sParameters)
        else:
            oRequest = mechanize.Request(self.__sUrl)

        for key, value in self.__headerEntries.items():
            oRequest.add_header(key, value)
        cookieJar.add_cookie_header(oRequest)

        user_agent = self.__headerEntries.get(
            'User-Agent',
            'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'
        )

        try:
            oResponse = opener.open(oRequest, timeout=self.requestTimeout)
        except mechanize.HTTPError, e:
            if e.code == 503 and e.headers.get("Server") == 'cloudflare-nginx':
                html = e.read()
                oResponse = self.__check_protection(html, user_agent,
                                                    cookieJar)
                if not oResponse:
                    logger.error("Failed to get CF-Cookie for Url: " +
                                 self.__sUrl)
                    return ''
            elif not self.ignoreErrors:
                xbmcgui.Dialog().ok('xStream', 'Fehler beim Abrufen der Url:',
                                    self.__sUrl, str(e))
                logger.error("HTTPError " + str(e) + " Url: " + self.__sUrl)
                return ''
            else:
                oResponse = e
Exemple #13
0
    def test_set_handled_schemes(self):
        class MockHandlerClass(make_mock_handler()):
            def __call__(self):
                return self

        class BlahHandlerClass(MockHandlerClass):
            pass

        class BlahProcessorClass(MockHandlerClass):
            pass

        BlahHandler = BlahHandlerClass([("blah_open", None)])
        BlahProcessor = BlahProcessorClass([("blah_request", None)])

        class TestUserAgent(mechanize.UserAgent):
            default_schemes = ["http"]
            default_others = []
            default_features = []
            handler_classes = mechanize.UserAgent.handler_classes.copy()
            handler_classes.update({
                "blah": BlahHandler,
                "_blah": BlahProcessor
            })

        ua = TestUserAgent()

        self.assertEqual(list(h.__class__.__name__ for h in ua.handlers),
                         ["HTTPHandler"])
        ua.set_handled_schemes(["http", "file"])
        self.assertEqual(sorted(h.__class__.__name__ for h in ua.handlers),
                         ["FileHandler", "HTTPHandler"])
        self.assertRaises(ValueError, ua.set_handled_schemes,
                          ["blah", "non-existent"])
        self.assertRaises(ValueError, ua.set_handled_schemes,
                          ["blah", "_blah"])
        ua.set_handled_schemes(["blah"])

        req = mechanize.Request("blah://example.com/")
        ua.open(req)
        exp_calls = [("blah_open", (req, ), {})]
        assert len(ua.calls) == len(exp_calls)
        for got, expect in zip(ua.calls, exp_calls):
            self.assertEqual(expect, got[1:])

        ua.calls = []
        req = mechanize.Request("blah://example.com/")
        ua._set_handler("_blah", True)
        ua.open(req)
        exp_calls = [("blah_request", (req, ), {}), ("blah_open", (req, ), {})]
        assert len(ua.calls) == len(exp_calls)
        for got, expect in zip(ua.calls, exp_calls):
            self.assertEqual(expect, got[1:])
        ua._set_handler("_blah", True)
Exemple #14
0
    def login(self, username, password):
        try:
            PixivHelper.print_and_log('info', 'Logging in...')
            # url = "https://accounts.pixiv.net/login"
            # get the post key
            # res = self.open_with_retry(url)
            # parsed = BeautifulSoup(res, features="html5lib")
            # js_init_config = self._getInitConfig(parsed)

            data = {}
            data['pixiv_id'] = username
            data['password'] = password
            data['captcha'] = ''
            data['g_recaptcha_response'] = ''
            data['return_to'] = 'https://www.pixiv.net'
            data['lang'] = 'en'
            # data['post_key'] = js_init_config["pixivAccount.postKey"]
            data['source'] = "accounts"
            data['ref'] = ''

            request = mechanize.Request("https://accounts.pixiv.net/api/login?lang=en", urllib.parse.urlencode(data))
            response = self.open_with_retry(request)

            return self.processLoginResult(response, username, password)
        except BaseException:
            traceback.print_exc()
            PixivHelper.print_and_log('error', 'Error at login(): {0}'.format(sys.exc_info()))
            raise
def get_vorlage(session_id, url):
    try:
        response = mechanize.urlopen(mechanize.Request(url))
        pprint.pprint(response)
    except URLError:
        return
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    for form in forms:
        # All forms are iterated. Might not all be attachment-related.
        for control in form.controls:
            if control.name == 'DT':
                print control.name, control.value
                request2 = form.click()
                try:
                    response2 = mechanize.urlopen(request2)
                    form_url = response2.geturl()
                    if "getfile.asp" in form_url:
                        #print "ERFOLG:", response2.info()
                        pdf = response2.read()
                        md5 = hashlib.md5(pdf).hexdigest()
                        scraperwiki.sqlite.save(
                            unique_keys=['session_id', 'dt', 'md5', 'size'],
                            data={
                                'session_id': session_id,
                                'dt': control.value,
                                'md5': md5,
                                'size': len(pdf)
                            })
                        continue
                except mechanize.HTTPError, response2:
                    print "HTTP-FEHLER :("
                except URLError:
                    pass
def RA_do_search(request):
    campings = pd.DataFrame()
    searchString1 = 'currentPage='
    searchString2 = '&paging=true&facilityType=all&agencyKey=&facilityAvailable=show_all&viewType=view_list&selectedLetter=ALL&owner=&hiddenFilters=false'
    r = prep_header_req(request)
    soup = BeautifulSoup(r.read(), "html.parser")
    # print soup
    pages_str = soup.find_all(
        "div",
        {"class": "usearch_results_label"})[0].contents[0].encode('ascii')
    m = re.match(r"Search Results: (\d+)-(\d+) of (\d+)", pages_str)
    pages = int(m.group(3)) / (int(m.group(2)) - int(m.group(1)) + 1)
    campings = collect_data(soup, campings)
    for page in range(1, pages):
        searchResultURL = r.geturl() + '?' + searchString1 + str(
            page) + searchString2
        req2 = mechanize.Request(searchResultURL)
        r2 = prep_header_req(req2)
        soup = BeautifulSoup(r2.read(), "html.parser")
        print page + 1,
        # f = open('/Users/hillenr/tmp/sample_mech.html', 'w')
        # f.write(r2.read())
        # f.close()
        campings = collect_data(soup, campings)
    print
    return campings
Exemple #17
0
    def api_call(self, apiUrl, apiMethod='GET', apiBody=''):
        br = self._get_browser()
        if not self.logged_in:
            self._login()

        cookiejar = br.cookiejar
        ajaxkey = None
        for cookie in cookiejar:
            if 'afg' == cookie.name:
                ajaxkey = cookie.value
        log.debug("ajaxkey is %s", ajaxkey)
        apiCall = mechanize.Request('https://www.alarm.com/web/api/' + apiUrl,
                                    data=apiBody,
                                    method=apiMethod)
        apiCall.add_header('ajaxrequestuniquekey', ajaxkey)
        apiCall.add_header('Accept', 'application/vnd.api+json')
        apiCall.add_header('Content-Type', 'application/json; charset=UTF-8')
        result = None
        try:
            response = br.open(apiCall)
            content = response.read()
            log.debug("Post command JSON is  %s", content)
            result = json.loads(content)
            log.debug(result)
        except:
            e = sys.exc_info()[0]
            log.debug("got an error %s", e)
        return result
Exemple #18
0
    def set_video_metadata(self, video):

        # The player html code with all the required information is loaded
        # after the main page using javascript and a special XmlHttpRequest
        # we emulate this behaviour
        from_request = self.group_dict['from']

        query = urllib.urlencode({
            'from_request': from_request,
            'request': '/video/%s?get_video=1' % video.id
        })

        request = mechanize.Request(KidsVideoPage.CONTROLLER_PAGE % query)
        # This header is mandatory to have the correct answer from dailymotion
        request.add_header('X-Requested-With', 'XMLHttpRequest')
        player_html = self.browser.readurl(request)

        try:
            m = re.search('<param name="flashvars" value="(?P<flashvars>.*?)"',
                          player_html)
            flashvars = urlparse.parse_qs(m.group('flashvars'))
            info = json.loads(flashvars['sequence'][0])

            # The video parameters seem to be always located at the same place
            # in the structure: ['sequence'][0]['layerList'][0]['sequenceList']
            #   [0]['layerList'][0]['param']['extraParams'])
            #
            # but to be more tolerant to future changes in the structure, we
            # prefer to look for the parameters everywhere in the structure

            def find_video_params(data):
                if isinstance(data, dict):
                    if 'param' in data and 'extraParams' in data['param']:
                        return data['param']['extraParams']
                    data = data.values()

                if not isinstance(data, list):
                    return None

                for item in data:
                    ret = find_video_params(item)
                    if ret:
                        return ret

                return None

            params = find_video_params(info['sequence'])

            video.title = unicode(params['videoTitle'])
            video.author = unicode(params['videoOwnerLogin'])
            video.description = unicode(params['videoDescription'])
            video.thumbnail = BaseImage(params['videoPreviewURL'])
            video.thumbnail.url = unicode(params['videoPreviewURL'])
            video.duration = datetime.timedelta(
                seconds=params['mediaDuration'])

        except:
            # If anything goes wrong, we prefer to return normally, this will
            # allow video download to work even if we don't have the metadata
            pass
Exemple #19
0
def support(x_cord, y_cord, browser):

    branfrage = mechanize.Request(
        "http://de101.die-staemme.de/game.php?village=5512&screen=place")

    response = browser.open(branfrage)
    forms = ParseResponse(response)
    form = forms[0]
    #print form

    control = form.find_control(name="support", type="submit")
    #print control.name, control.value, control.type

    form["sword"] = "180"
    form["x"] = str(x_cord)
    form["y"] = str(y_cord)

    oeffnen = form.click(control.name)

    antwort = browser.open(oeffnen)
    forms2 = ParseResponse(antwort)
    form2 = forms2[0]
    #print form2

    control2 = form2.find_control(type="submit")
    #print control2.name, control2.value, control2.type

    oeffnen = form2.click(control2.type)
    browser.open(oeffnen)
Exemple #20
0
def load_more_elements_to_process(browser, used_asins, offset):
    """
    Emulates the addNextBook Javascript function called when the user approaches the bottom of the Kindle highlights page
    This function generates HTML on the backend (why is it being built on the backend???), then sends it to the frontend which will drop it into the DOM
    We hit the same endpoint to get the new piece of HTML that should be inserted, then pull out the new highlight tags with Beautiful Soup
    This is necessary because not all books are shown on pageload
    Return - triple of (new BeautifulSoup tags loaded, ASIN of new book, new offset to use)
    """
    params = {
        "current_offset": offset,
        "used_asins[]": used_asins,
        "upcoming_asins[]": ""  # Unused, as far as I can tell
    }
    encoded_params = urllib.urlencode(params,
                                      True)  # Amazon uses the doseq style
    request = mechanize.Request(KINDLE_HIGHLIGHTS_URL + "/next_book?" +
                                encoded_params)
    request.add_header("Referer", KINDLE_HIGHLIGHTS_URL)
    response = browser.open(request)
    response_data = response.get_data()
    if len(response_data.strip()) == 0:
        return ([], used_asins, offset)  # No more books
    soup = BeautifulSoup(response.read())
    """
    def filter_func(tag): 
        tag_classes = tag["class"]
        return tag.name == "div" and (BOOK_DIV_CLASS in tag_classes or HIGHLIGHT_DIV_CLASS in tag_classes)
    """
    new_elements = soup.select(
        "> div")  # Get top-level divs which will be the nodes we want
    new_book_tag = soup.select("div." + BOOK_DIV_CLASS)[0]
    new_book_asin, new_offset = new_book_tag["id"].split("_")
    return (new_elements, new_book_asin, new_offset)
Exemple #21
0
  def _scrapeUrl(self, url):
      """scrape a generic url
      """
      #grab the data -- go internets!
      request3 = mechanize.Request(url)
      self.cj.add_cookie_header(request3)
      response3 = mechanize.urlopen(request3)
      maincontent = response3.read()
      #make the soup
      soup = BeautifulSoup(maincontent)
      
      #parse the soup
      #This thing is a beast

      # date/times and games are intersperced
      # The first thing should be a date
      # then all games following are on that date
      # So - we find all dates and games with our query and handle them
      # as they happen in order
      date=None
      tags = soup.findAll(**{'class':["schedules-list-date", 'schedules-list-hd pre', 'schedules-list-hd post']})
      print "found %s tags" %len(tags)
      for tag in tags:
        #we got a date!
        if tag['class']=='schedules-list-date':
          #we've found a new date
          gameDateStr = str(tag.find('span').text)
          monthStr, date = gameDateStr.split(',')[1].strip().split()
          monthNum = self.MONTH_MAP[str(monthStr)]
          if monthNum in (1,2):
            year = self.year+1
          else:
            year = self.year
          dateInt = int(''.join([x for x in date if x.isdigit()]))
          date = datetime.date(year, monthNum, dateInt) 
        else:
          #we've got a new game -parse out home and away team
          home = str(tag.find(**{'class':['team-name home ', 'team-name home lost']}).text)
          away = str(tag.find(**{'class':['team-name away ', 'team-name away lost']}).text)
          #need to get the time as well
          time = str(tag.find(**{'class':'time'}).text)
          if time=='FINAL':
            print "CANNOT GET VALID TIME FOR GAMES that are in the past" 
            hr=0
            minute=0
          else:
            hr, minute = time.split(':')
            amPm = str(tag.find(**{'class':['am', 'pm']}).text).strip()
            hr = int(hr)
            minute=int(minute)
            #adjust times to take into account am/pm  
            if amPm=="PM" and hr <12:      
              hr+=12
            if amPm=="AM" and hr==12:
              hr=0
          d={'week':self.week,
             'home':self.TEAM_MAP[home],
             'away':self.TEAM_MAP[away],
             'kickoff':datetime.datetime(date.year, date.month, date.day, hr, minute, tzinfo=self.EASTERN_TIME_ZONE)}
          self.games.append(d)
Exemple #22
0
    def test_sending_headers(self):
        handler = self._make_request_handler([(200, [], "we don't care")])

        req = mechanize.Request("http://localhost:%s/" % handler.port,
                                headers={'Range': 'bytes=20-39'})
        mechanize.urlopen(req)
        self.assertEqual(handler.received_headers['Range'], 'bytes=20-39')
	def getUrl(self,url):
		req = mechanize.Request(url)
		req.add_header('User-Agent', canna_agent)
		response = mechanize.urlopen(req)
		link = response.read()
		response.close()
		return link
    def getPixivPage(self, url, referer="https://www.pixiv.net", returnParsed=True, enable_cache=True):
        ''' get page from pixiv and return as parsed BeautifulSoup object or response object.

            throw PixivException as server error
        '''
        url = self.fixUrl(url)
        while True:
            req = mechanize.Request(url)
            req.add_header('Referer', referer)

            read_page = self._get_from_cache(url)
            if read_page is None:
                try:
                    temp = self.open_with_retry(req)
                    read_page = temp.read()
                    read_page = read_page.decode('utf8')
                    if enable_cache:
                        self._put_to_cache(url, read_page)
                    temp.close()
                except urllib.error.HTTPError as ex:
                    if ex.code in [403, 404, 503]:
                        read_page = ex.read()
                        raise PixivException("Failed to get page: {0} => {1}".format(
                            url, ex), errorCode=PixivException.SERVER_ERROR)
                    else:
                        PixivHelper.print_and_log(
                            'error', 'Error at getPixivPage(): {0}'.format(str(sys.exc_info())))
                        raise PixivException("Failed to get page: {0}".format(
                            url), errorCode=PixivException.SERVER_ERROR)

            if returnParsed:
                parsedPage = BeautifulSoup(read_page, features="html5lib")
                return parsedPage
            return read_page
Exemple #25
0
def mechanize_cookie(config, log):
    """Returns a new Intel Ingress cookie via mechanize."""
    import mechanize

    log.info("Logging into Facebook using mechanize")
    browser = mechanize.Browser()

    if log.level <= 10:
        browser.set_debug_http(True)
        browser.set_debug_responses(True)
        browser.set_debug_redirects(True)

    browser.set_handle_robots(False)
    cookies = mechanize.CookieJar()
    browser.set_cookiejar(cookies)
    browser.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.41 Safari/534.7'
    )]
    browser.set_handle_refresh(False)
    log.info("Everything set - Let's go")

    url = 'https://www.facebook.com/v3.2/dialog/oauth?client_id=449856365443419&redirect_uri=https%3A%2F%2Fintel.ingress.com%2F'
    browser.open(url)
    log.info("Opened Facebook Login Page")
    log.debug(browser.geturl())

    # sometimes you have to fill in the form multiple times for whatever reason
    tries = 0
    while "https://intel.ingress.com/" not in browser.geturl() and tries < 5:
        tries += 1
        log.info(f"Trying to log into Intel: Try {tries}/5")
        browser.select_form(nr=0)
        try:
            browser.form['email'] = config.ingress_user
            browser.form['pass'] = config.ingress_password
        except:
            pass
        response = browser.submit()
        time.sleep(2)
        log.debug(browser.geturl())

    if "https://intel.ingress.com/" in response.geturl() and response.getcode(
    ) == 200:
        log.info("Got through. Now getting that cookie")
        log.debug(browser.geturl())

        # this is magic
        req = mechanize.Request(browser.geturl())
        cookie_list = browser._ua_handlers['_cookies'].cookiejar.make_cookies(
            response, req)

        final_cookie = _write_cookie(log,
                                     {c.name: c.value
                                      for c in cookie_list})
        return final_cookie
    else:
        log.error("Failed to login into Intel")
        log.info(browser.geturl())
        return ""
Exemple #26
0
    def fanboxGetPostsFromArtist(self, artist_id, next_url=""):
        ''' get all posts from the supported user from https://www.pixiv.net/ajax/fanbox/creator?userId=15521131 '''
        if next_url is None or next_url == "":
            url = "https://www.pixiv.net/ajax/fanbox/creator?userId={0}".format(
                artist_id)
        elif next_url.startswith("https://"):
            url = next_url
        else:
            url = "https://www.pixiv.net" + next_url

        # Fix #494
        PixivHelper.print_and_log('info', 'Getting posts from ' + url)
        referer = "https://www.pixiv.net/fanbox/creator/{0}".format(artist_id)
        req = mechanize.Request(url)
        req.add_header('Accept', 'application/json, text/plain, */*')
        req.add_header('Referer', referer)
        req.add_header('Origin', 'https://www.pixiv.net')
        req.add_header('User-Agent', self._config.useragent)

        response = self.open_with_retry(req).read()
        # Issue #420
        _tzInfo = None
        if self._config.useLocalTimezone:
            _tzInfo = PixivHelper.LocalUTCOffsetTimezone()
        result = FanboxArtist(artist_id, response, tzInfo=_tzInfo)

        pixivArtist = PixivArtist(artist_id)
        self.getMemberInfoWhitecube(artist_id, pixivArtist)
        result.artistName = pixivArtist.artistName
        result.artistToken = pixivArtist.artistToken

        return result
def getPropertyPins(streetName):

    url = r'https://taxcommissioner.dekalbcountyga.gov/TaxCommissioner/TCSearch.asp'
    request = mechanize.Request(url)
    response = mechanize.urlopen(request)
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    response.close()

    form = forms[0]

    form['StreetName'] = sys.argv[1]
    propertyList = mechanize.urlopen(form.click()).read()

    tree = html.fromstring(propertyList)
    pins = tree.xpath('//tr/td[1]/a/@href')
    addresses = tree.xpath('//tr/td[1]/a/text()')

    pinList = []
    i = 0
    for pin in pins:
        #print pin
        newpin = pin.split('=')
        pinList.append([newpin[3], addresses[i]])
        print newpin[3] + '\t' + addresses[i]
        i = i + 1

    return pinList
Exemple #28
0
    def requestGetQuestao(self, id_questao):
        ''' Recebe dados da API apartir de uma número de id de questão.'''

        url_api = "http://intranet.unicesumar.edu.br/sistemas/bancoDeQuestoes/action/questaoAction.php"
        payload = {
            "action": "filtrar",
            "data[filtroJSON][idQuestao]": id_questao,
            "data[filtroJSON][temaAleatorio]": 0,
            "data[filtroJSON][tagAndOr]": "tagAnd",
            "data[filtroJSON][destinoAndOr]": "destinoAnd",
            "data[filtroJSON][tipoRequest]": "questaoListRequest"
        }
        try:
            data = parse.urlencode(payload)
            request_form_questao = mechanize.Request(url_api, data)
            response = self.br.open(request_form_questao)
            dados_questao = response.get_data().decode("latin1")
            resp = "Retorno: " + str(
                self.br.response().getcode()) + " -> " + str(
                    self.br.response().geturl())
            logf = open(dir_path + "log.txt", "a+")
            logf.write(datetime.today().strftime("%d/%m/%Y, %H:%M:%S") +
                       " - " + str(resp) + "\n")
            logf.close()
            return dados_questao
        except Exception as e:
            now = datetime.now()
            logf = open(dir_path + "log.txt", "a+")
            logf.write(
                now.strftime("%d/%m/%Y, %H:%M:%S") + " - " + str(e) + "\n")
            logf.close()
Exemple #29
0
    def test_auth(self):
        """
        Test Authentication Headers
        """
        # Setup
        port = 8001
        handler = ThreadingSimpleServer(('localhost', port), AuthHandler)
        with handler.obtain():
            url = "http://localhost:%s/" % (port, )
            username = '******'
            password = '******'
            b = mechanize.Browser()
            passman = mechanize.HTTPPasswordMgrWithDefaultRealm()
            passman.add_password(None, url, username, password)

            # other authentication handlers
            auth_digest = urllib_request.HTTPDigestAuthHandler(passman)
            auth_basic = urllib_request.HTTPBasicAuthHandler(passman)

            b.set_handle_robots(False)  # pylint: disable=no-member
            b.add_handler(auth_digest)  # pylint: disable=no-member
            b.add_handler(auth_basic)  # pylint: disable=no-member
            req = mechanize.Request(url)
            # Exercise
            b.open(req)
            # Verify
            assert b.response().code == 200
Exemple #30
0
	def getUrl(self,url):
		req = mechanize.Request(url)
		req.add_header('User-Agent', ' Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3')
		response = mechanize.urlopen(req)
		link = response.read()
		response.close()
		return link