def handle_data(self, data): if self._state == self.states.FOUND_CLUE: # look for the number in the data # if all is well, we should already have the number stored in currentClue # from the regex in IN_CLUE_CLASS state # this state is largely redundant, included for robustness number = re.compile(r"(\d+):\s*") match = re.match(number, data) if match: if match.group(1) != self._currentClue[0]: raise HTMLParser.HTMLParseError( 'Inconsistent clue numbers') else: raise HTMLParser.HTMLParseError('Expected clue number') self._state = self.states.FOUND_CLUE_NUMBER #print 'entering FOUND_CLUE_NUMBER state' elif self._state == self.states.FOUND_CLUE_NUMBER: # look for the clue text, remove whitespace clue = re.compile(r"\s*(.+)\s*") match = re.match(clue, data) if match: self._currentClue[1] = match.group(1) self._clueList.append(tuple(self._currentClue)) self._state = self.states.FOUND_CLUE_TEXT #print 'entering FOUND_CLUE_TEXT state' else: raise HTMLParser.HTMLParseError('Expected clue')
def handle_starttag(self, tag, attrs): if self._state == self.states.NEUTRAL: # look for <div id=<clueType>Clues if tag == 'div' and ('id', self._clueType + 'Clues') in attrs: self._state = self.states.IN_CLUE_CLASS #print 'entering IN_CLUE_CLASS state' elif self._state == self.states.IN_CLUE_CLASS: # look for <div id=divClue<clueNumber>> if tag == 'div': # all we know is that the id attribute should end in some number # since we don't know what the number is, use regex to parse and capture the number clueID = re.compile(r"divClue(\d+)") for attrib in attrs: if attrib[0] == 'id': #print 'looking for clue #: ' + attrib[1] match = re.match(clueID, attrib[1]) if match: # we have the number now, so add it to currentClue self._currentClue[0] = match.group(1) self._state = self.states.FOUND_CLUE 'entering FOUND_CLUE state' return else: raise HTMLParser.HTMLParseError( 'Expected clue number')
def error(self, message): print >> sys.stderr, self.getpos() if self.error_count > 10 or self.started: raise HTMLParser.HTMLParseError(message, self.getpos()) self.rawdata = '\n'.join( self.html.split('\n')[self.getpos()[0]:]) # skip one line self.error_count += 1 self.goahead(1)
def sacar_link_a_foto_instagram(self, url_completa): fichero = requests.get(url_completa, params=None, verify=False) html_source = fichero.content soup = BeautifulSoup(html_source) meta_tag = soup.findAll('meta', {'property': 'og:image'}) if not meta_tag: raise HTMLParser.HTMLParseError( "No se ha podido parsear el html para sacar la url de la foto") img_url = meta_tag[0]['content'] return img_url
def error(self, message): """Override superclass' error() method to ignore errors.""" # construct error message message += ', ' + self._location() # store error message logger.debug('problem parsing html: %s', message) if self.errmsg is None: self.errmsg = message # increment error count self.errcount += 1 if self.errcount > 10: raise HTMLParser.HTMLParseError(message, self.getpos())
def obtener_url_foto_de_tweet(self, tweet): url_imagen = None try: if self.tiene_foto(tweet): url_imagen = tweet['entities']['media'][0]['media_url'] #twimg elif self.tiene_foto_externa(tweet): url_completa = tweet['entities']['urls'][0][ 'expanded_url'] #instagram url_imagen = self.sacar_link_a_foto_instagram(url_completa) return url_imagen except exceptions.ConnectionError as conerr: raise exceptions.ConnectionError( u'No se ha podido conectar para obtener la url de la foto. {0}' .format(conerr.message)) except HTMLParser.HTMLParseError as prserr: raise HTMLParser.HTMLParseError(prserr.message)
def handle_starttag(self, tag, attrs): if DEBUG: print("starttag:", tag, attrs) tag = tag.lower() if (tag == "p") and (tag in self.stack): raise HTMLParser.HTMLParseError("nested <p> found") self.stack.append(tag) if not self.enabled: return if tag in ("b", "strong", "code"): self.data += "\\fB" if tag in ("i", "em"): self.data += "\\fI" if tag == "dt": self.newblock(cmd=".br") if tag == "dd": self.newblock(cmd=".RS") if tag == "pre": self.newblock(cmd=".nf") if tag in ("h2", "h3"): self.newblock()
def hit_pager(self): """Process "显示<b>56</b>个问题中的 <b>1</b> 到 <b>50</b>""" nod = self._nodes[-1] if self._path.endswith(u'td.jiraformheader'): bs = [chld for chld in nod.childNodes if chld.tagName == u'b'] if len(bs) != 3: return True try: self.pager = map(lambda chld: int(chld.text_data[0]), bs) except (ValueError, AttributeError): raise HTMLParser.HTMLParseError( u"App detected: failed to parse N1, N2 and N3 in " u"'显示<b>N1</b>个问题中的 <b>N2</b> 到 <b>N3</b>'", nod.pos) # Is it finished: disable this hit self._hits.remove(self.hit_pager) return True
def handle_endtag(self, tag): if DEBUG: print("endtag:", tag) tag = tag.lower() if not(self.stack) or (self.stack[-1] != tag): raise HTMLParser.HTMLParseError("line %s: start/end tag mismatch\nstack is %r, got %r" % (self.getpos()[0], self.stack, tag)) del self.stack[-1] if not self.enabled: return if tag == "p": self.newblock(cmd=".PP") if tag in ("b", "strong", "code", "i", "em"): self.data += "\\fR" if tag == "dd": self.newblock(cmd=".RE") if self.listspace and not("dd" in self.stack): self.f.write(".PP\n") if tag == "pre": self.newblock(cmd=".fi") if tag == "h2": self.endheading("Header") if tag == "h3": self.endheading("Subsection", 4)
def handle_data(self, data): if self._state == self.states.NEUTRAL or self._state == self.states.ACCEPTING: namePat = re.compile(r"Crossword for.+") if re.match(namePat, data): self._grid.name = data elif self._state == self.states.WHITE_SQUARE: numPat = re.compile(r"\D*(\d*)") match = re.match(numPat, data) if match: #print 'Adding white square: ' + match.group(1) self._grid.setSquare( self._row, self._col, Square(self._grid, self._row, self._col, letter='', number=match.group(1))) self._state = self.states.IN_ROW #print 'reentering IN_ROW state' else: raise HTMLParser.HTMLParseError('Expected number or blank')
def handle_endtag(self, tag): nod = self._nodes[-1] if nod.tagName != tag: try: # JIRA HTML page is really bad!! try auto merge top = len(self._nodes)-1 for i in xrange(top-1, -1, -1): if self._nodes[i].tagName == tag: break else: raise HTMLParser.HTMLParseError(u"App detected: Nested tags!" u"(The opening tag is '%s', but close '%s', path='%s')" % (nod.tagName, tag, self._path), self.getpos()) for j in xrange(top, i, -1): _tag = self._nodes[-1].tagName if DEBUG: print >>sys.stderr, u"Malform HTML page detected! " \ u"Auto close tag '%s', path='%s'" \ u"(current tag is '%s' line %d, column %d)" \ % (_tag, self._path, tag, self.getpos()[0], self.getpos()[1]) self.handle_endtag(_tag) except HTMLParser.HTMLParseError: if tag == 'tbody': if DEBUG: print >>sys.stderr, u"Malform HTML page detected! " \ u"Drop 'tbody' tag, path='%s'" \ u"(line %d, column %d)" \ % (self._path, self.getpos()[0], self.getpos()[1]) return else: raise self.check_hits() self._path = self._path.rsplit(u'/', 1)[0] self._nodes.pop()
class TestConfig(unittest.TestCase): fsock = None mockdate = "2017-09-06 00:44:47.7" def _redirectOut(self): # pragma: no cover self.fsock = open('out.log', 'a+') sys.stdout = self.fsock def tearDown(self): # pragma: no cover if self.fsock is not None: self.fsock.close() self.fsock = None sys.stdout = sys.__stdout__ def setUp(self): self.config = Config() self.configXMLString = XMLString self.config.parseFromString(self.configXMLString) self.configFBXML = FBXML def test_timestamp(self): """ test getlogtime :return: """ stamp = helpers.getLoggingTime() self.assertRegexpMatches(stamp, "\d{4}-\d{2}-\d{2}", "should have time stamp,\n" + stamp) def test_createdir(self): """ test dir with file :return: """ helpers.createResultsDir("none") self.assertEqual(os.path.isdir(helpers.RESULTS_DIR), True, "missing directory " + helpers.RESULTS_DIR) @patch('os.path.dirname', new=Mock(side_effect=OSError("fail to mock a write", errno.EACCES))) def test_createdir_raise(self): """ test dir raise error :return: """ self.assertRaisesRegexp(OSError, "fail to mock", helpers.createResultsDir, "none") @patch('os.path.dirname', new=Mock(side_effect=OSError("fail to mock a write", errno.EEXIST))) def test_createdir_fail(self): """ test dir with failing oserror :return: """ self.assertRaisesRegexp(OSError, "fail to mock", helpers.createResultsDir, "none") def test_dump_none(self): """ test none page to a file :return: """ self.assertRaisesRegexp(TypeError, "None", helpers.dumpErrorPage, None) def test_dump(self): """ test dump page to a file :return: """ filename = helpers.dumpErrorPage(self.mockdate) output = "" with open("result/" + filename, "r") as fd: output += fd.readline() self.assertRegexpMatches(output, "\d{4}-\d{2}-\d{2}", "should have time stamp,\n" + output) def test_errorontext(self): """ test exception from helper's errorOnText :return: """ err = 'Authentication has not been passed: Invalid password' # not found so no assertion output = helpers.errorOnText("", 'That password is incorrect.', err) # should raise if it sees an assertion self.assertRaisesRegexp(helpers.BingAccountError, "Invalid", helpers.errorOnText, 'That password is incorrect.', 'That password is incorrect.', err) def test_node(self): """ test node's children :return: """ import xml.etree.ElementTree as ET root = ET.fromstring(self.configXMLString) node = helpers.getXmlChildNodes(root) self.assertIsNotNone(node, "should not be null " + str(node)) @patch('sys.version_info') def test_node_fail(self, mockver): sys.version_info = [2, 1] import xml.etree.ElementTree as ET root = ET.fromstring(self.configXMLString) node = helpers.getXmlChildNodes(root) self.assertIsNotNone(node, "should not be null " + str(node)) def test_accounts(self): self.assertIsNotNone(self.config.accounts) self.assertEqual(len(self.config.accounts), 1) accounts = dict() acc = Config.Account() acc.accountLogin = "******" acc.password = "******" acc.accountType = "Live" acc.disabled = False acc.ua_desktop = "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10136" acc.ua_mobile = "mozilla/5.0 (iphone; cpu iphone os 7_0_2 like mac os x) applewebkit/537.51.1 (khtml, like gecko) version/7.0 mobile/11a501 safari/9537.53" accounts[acc.getRef()] = acc self.assertEqual(accounts, self.config.accounts) def test_history(self): """ test history parsing :return: """ self.assertRaisesRegexp(TypeError, "None", bingHistory.parse, None) output = bingHistory.parse("") self.assertIsNotNone(output, "missing output " + str(output)) page = '<span class="query_t">' page += '<div id="results_area"></div><div id="sidebar"></div>' output = bingHistory.parse(page) self.assertIsNotNone(output, "missing output " + str(output)) page = '<span class="sh_item_qu_query">' page += '<ul class="sh_dayul"></ul>' page += ' value == 0' page += '</span>' output = bingHistory.parse(page) self.assertIsNotNone(output, "missing output " + str(output)) page = '<ul class="sh_dayul"> </ul>' output = bingHistory.parse(page) self.assertIsNotNone(output, "missing output " + str(output)) output = bingHistory.getBingHistoryTodayURL() self.assertRegexpMatches(output, "https", "missing url " + str(output)) @patch('helpers.getResponseBody', return_value='"WindowsLiveId":"" "WindowsLiveId":""') @patch('time.sleep', return_value='') def test_auth_url(self, timemock, helpmock): # pragma: no cover """ test authentication decoding error :return: """ self.assertRaisesRegexp(ValueError, "unknown url type", run, self.config) @patch('bingAuth.BingAuth.authenticate', new=Mock(side_effect=SocketError(errno.ECONNREFUSED, "errno.ECONNREFUSED"))) def test_auth_exceptionSock(self): self.assertRaisesRegexp(SocketError, "", run, self.config) @patch('bingAuth.BingAuth.authenticate', new=Mock(side_effect=SocketError(errno.ECONNRESET, "errno.ECONNRESET"))) def test_auth_exceptionSockReset(self): self.assertIsNone(run(self.config), "should not return anything") @patch('bingAuth.BingAuth.authenticate', new=Mock(side_effect=helpers.BingAccountError(None))) def test_auth_exceptionBing(self): self.assertIsNone(run(self.config), "should not return anything") @patch('bingAuth.BingAuth.authenticate', new=Mock(side_effect=urllib2.URLError(""))) def test_auth_exceptionURL(self): self.assertIsNone(run(self.config), "should not return anything") @patch('bingAuth.BingAuth.authenticate', new=Mock(side_effect=HTMLParser.HTMLParseError("error"))) def test_auth_exceptionParser(self): self.assertIsNone(run(self.config), "should not return anything") @patch('bingAuth.BingAuth.authenticate', new=Mock(side_effect=urllib2.HTTPError("", "", "", "", open("tmp", "a+")))) def test_auth_exceptionHTTP(self): self.assertIsNone(run(self.config), "should not return anything") def test_stringify(self): self.assertRaisesRegexp(ValueError, "too small", stringify, None, -1) @patch('urllib2.Request', return_value="") @patch('helpers.getResponseBody', return_value="") @patch('urllib2.Request.add_header', return_value=urllib2.Request(bingCommon.BING_URL, bingCommon.HEADERS)) def test_auth_url(self, headermock, helpmock, urlmock): """ test auth class :param headermock: :param helpmock: :param urlmock: :return: """ self.assertRaisesRegexp(TypeError, "opener is not", bingAuth.BingAuth, bingCommon.HEADERS, None) auth = bingAuth.BingAuth(bingCommon.HEADERS, urllib2.OpenerDirector()) self.assertIsNotNone(auth, "should return class") def test_config(self): """ test config module :return: """ configobj = Config() self.assertIsNotNone(configobj, "should return class") self.assertIsNotNone(Config.General(), "should return class") self.assertIsNotNone(ConfigError("ok"), "should return exception") self.assertIsNotNone(Config.Proxy(), "should return class") self.assertIsNotNone(Config.EventAccount(), "should return class") self.assertIsNotNone(Config.Event.Notify(), "should return class") ifs = Config.Event.IfStatement() ifs.op = lambda x, y: x ifs.lhs = lambda x: x ifs.rhs = "b" self.assertIsNotNone(str(ifs), "should return class") self.assertRaisesRegexp(ValueError, "None", ifs.evaluate, None) self.assertRaisesRegexp(TypeError, "is not of", ifs.evaluate, []) self.assertIsNotNone(ifs.evaluate(BingRewardsReportItem())) spec = Config.Event.Specifier() self.assertIsNotNone(spec, "should return class") self.assertRaisesRegexp(ValueError, "is None", spec.evaluate, None, BingRewardsReportItem()) self.assertRaisesRegexp(TypeError, "list", spec.evaluate, [], BingRewardsReportItem()) self.assertRaisesRegexp(ValueError, "is None", spec.evaluate, [], None) self.assertRaisesRegexp(TypeError, "not of BingRewardsReportItem type", spec.evaluate, [], self.config) self.assertIsNotNone(spec.evaluate("%a", BingRewardsReportItem()), "should return string") dist = os.path.join(os.path.dirname(__file__), "..", "config.xml") self.assertIsNone(configobj.parseFromFile(dist), "should be none") self.assertRaisesRegexp(ValueError, "_configFile_ is None", configobj.parseFromFile, None) self.assertRaisesRegexp(ValueError, "is None", self.config.parseFromString, None) self.assertRaisesRegexp(ConfigError, "Invalid subnode", configobj.parseFromString, InvalidXML) self.assertRaisesRegexp(ConfigError, "is not found", configobj.parseFromString, LOGINXML) self.assertRaisesRegexp(ConfigError, "is not found", configobj.parseFromString, PWDXML) self.assertRaisesRegexp(ConfigError, "should be either set", self.config.parseFromString, PROXYLOGINXML) self.assertRaisesRegexp(KeyError, "_specifier_ is not", validateSpecifier, "%not") self.assertRaisesRegexp(ConfigError, "Invalid subnode", self.config.parseFromString, FBXML) def test_config_attr(self): self.assertRaisesRegexp(ConfigError, "MUST", self.config.parseFromString, FLOAT) self.assertRaisesRegexp(ConfigError, "MUST", self.config.parseFromString, INT) self.assertRaisesRegexp(ConfigError, "must", self.config.parseFromString, NONFLOAT) self.assertRaisesRegexp(ConfigError, "must", self.config.parseFromString, NONINT) def test_config_notify(self): self.assertRaisesRegexp(ConfigError, "is not found", self.config.parseFromString, NONREF) self.assertRaisesRegexp(ConfigError, "is not found", self.config.parseFromString, NONACCREF) self.assertRaisesRegexp(ConfigError, "not supported", self.config.parseFromString, NONEV) def test_config_retry(self): self.assertRaisesRegexp(ConfigError, "is not found", self.config.parseFromString, RETRY) self.assertRaisesRegexp(ConfigError, "must be", self.config.parseFromString, INVRETRY) self.assertRaisesRegexp(ConfigError, "MUST BE", self.config.parseFromString, NEGRETRY) self.assertRaisesRegexp(ConfigError, "is not found", self.config.parseFromString, RETRYCNT) self.assertRaisesRegexp(ConfigError, "must be", self.config.parseFromString, INVRETRYCNT) self.assertRaisesRegexp(ConfigError, "MUST BE", self.config.parseFromString, NEGRETRYCNT) def test_config_if(self): self.assertRaisesRegexp(ConfigError, "is invalid", self.config.parseFromString, NONIF2) self.assertRaisesRegexp(ConfigError, "is invalid", self.config.parseFromString, NONIFRHS) self.assertRaisesRegexp(ConfigError, "is invalid", self.config.parseFromString, NONIFOP) def test_event(self): """ test event :return: """ self.assertIsNone(EventsProcessor.onScriptFailure(self.config, Exception()), "should be none") self.assertIsNone(EventsProcessor.onScriptComplete(self.config), "should be none") self.assertRaisesRegexp(ConfigError, "not found", self.config.parseFromString, EVENT) self.config.parseFromString(EVENTLESS) self.assertRaisesRegexp(Exception, ".*", EventsProcessor.onScriptFailure, self.config, Exception()) self.assertIsNone(EventsProcessor.onScriptComplete(self.config), "should be none") ep = EventsProcessor(self.config, BingRewardsReportItem()) self.assertIsNotNone(ep.processReportItem(), "should not be none and be done") @patch('main.earnRewards', return_value=None) @patch('eventsProcessor.EventsProcessor.processReportItem', return_value=(-1, None)) def test_event_dontcare(self, mockep, mockmain): # not retry nor ok with -1 self.assertIsNone(processAccount(self.config), "should return nothing") def test_event_getEvent_returnsEvent(self): """ test onScriptFailure using echo from xml config string :return: """ event = self.config.getEvent(Config.Event.onScriptFailure) self.assertIsNotNone(event) self.assertTrue(len(event.notifies) == 1) self.assertEqual(event.notifies[0].cmd, "echo") def test_event_getEvent_returnsNoneIfEventDoesntExist(self): """ test no event call does not exist :return: """ self.assertIsNone(self.config.getEvent("does_not_exist")) self.assertRaisesRegexp(ValueError, "None", self.config.getEvent, None) def test_reward_bfp_hit(self): self._rewards_hit(bfp.RewardV1()) self._rewards_hit(bdp.Reward()) @patch('helpers.getResponseBody') def _rewards_hit(self, classobj, helpmock): """ test rewards object :return: """ self.config.proxy = False reward = BingRewards(bingCommon.HEADERS, "", self.config) page = '"WindowsLiveId":"" "WindowsLiveId":"" ' page += 'action="0" value="0" ' page += 'value= "0" NAP value="0" ' page += 'ANON value="0" ' page += 'id="t" value="0" ' page += '<div> 999 livetime points</div> ' helpmock.return_value = page # if not login should have not found error for url self.assertIsNotNone(reward.getLifetimeCredits, "Should return int") page = "t.innerHTML='100'" helpmock.return_value = page self.assertIsNotNone(reward.getRewardsPoints(), "should not be None") self.assertRaisesRegexp(TypeError, "not an instance", reward.process, None, True) # NONE case newbfp = classobj newbfp.tp = None rewards = [newbfp] self.assertIsNotNone(reward.process(rewards, True), "handle not none") # HIT case newbfp.tp = mock.Mock() newbfp.tp = [0, 1, 2, 3, bfp.RewardV1.Type.Action.HIT] # SEARCH case newbfp.tp = mock.Mock() newbfp.tp = [0, 1, 2, 3, bfp.RewardV1.Type.Action.SEARCH] newbfp.progressCurrent = 100 rewards = [newbfp] self.assertIsNotNone(reward.process(rewards, True), "should return res") self.assertRaisesRegexp(TypeError, "not an instance", reward.printResults, None, True) result = mock.Mock() result.action = bfp.RewardV1.Type.Action.SEARCH result.isError = True result.o = newbfp result.message = "done" newbfp.progressCurrent = 1 newbfp.progressMax = 100 newbfp.url = "http:0.0.0.0" self.assertIsNone(reward.printResults([result], True), "should return None") self.assertRaisesRegexp(TypeError, "rewards is not", reward.printRewards, None) rewards[0].isDone = True self.assertIsNone(reward.printRewards(rewards), "should return None") self.assertRaisesRegexp(TypeError, "reward is not", reward.RewardResult, None) self.assertIsNotNone(reward.RewardResult(newbfp), "should return class") proxy = mock.Mock() proxy.login = True proxy.password = "******" proxy.url = "http://127.0.0.1" proxy.protocols = "http" self.config.proxy = proxy self.assertIsNotNone(BingRewards(bingCommon.HEADERS, "", self.config), "should return class") proxy.login = False self.config.proxy = proxy self.assertIsNotNone(BingRewards(bingCommon.HEADERS, "", self.config), "should return class") self.assertRaisesRegexp(ConfigError, "not found", self.config.parseFromString, PROTXML) self.assertRaisesRegexp(ConfigError, "not found", self.config.parseFromString, URLXML)