def test_008(self): chs = IceFire(config=Configuration.instance().config, type0='books', logger=self.logger) ichs = iter(chs) v0 = list(next(chs))[0] v1 = v0[0] self.logger.info(type(v1)) self.logger.info(v1['url']) v1 = v0[-1] self.logger.info(type(v1)) self.logger.info(v1['url']) v0 = list(next(chs))[0] v1 = v0[0] self.logger.info(type(v1)) self.logger.info(v1['url']) v1 = v0[-1] self.logger.info(type(v1)) self.logger.info(v1['url'])
def test_002(self): chs = IceFire(config=Configuration.instance().config, type0='characters') v0 = chs.mkUrl(page=1, pageSize=10) self.assertIsNotNone(v0) self.assertIsInstance(v0, str) self.assertTrue(len(v0) > 0) self.logger.info(v0)
def test_006(self): chs = IceFire(config=Configuration.instance().config, type0='characters') v0 = chs.index() self.logger.info(v0) self.logger.info(chs.index0) self.logger.info(str(chs.pages) + "; " + str(chs.pageSize))
def test_017(self): """ Houses use the Link response header to get the number of pages and the page size. """ ftchr = IceFire(logger=self.logger, config=Configuration.instance().config) idx0 = "https://www.anapioficeandfire.com/api/houses" ftchr.index(url=idx0) x0 = ftchr.cpage self.logger.info(x0[-1]) self.logger.info(ftchr.index0) x1 = urlparse(ftchr.index0[0]) self.assertIsNotNone(x1) self.logger.info(x1.query) d0 = Configuration.instance().qparts(x1.query) self.logger.info(str(d0))
def _refs0(self, l0): """ Extract valid URLs from a string Separated by comma, then by semi-colon """ l1 = l0.split(',') parts = [] for x in l1: l2 = re.sub(r'[<>]', '', x) l3 = l2.strip().split(';') for y in l3: l4 = y.strip() self.logger.info( str(Configuration.instance().isvalid0(l4)) + "; " + l4) if Configuration.instance().isvalid0(l4): parts.append(l4) return parts
def test_000(self): try: with open('hdrs.json') as f0: l0 = f0.read() self.logger.info(l0) except: return l1 = l0.split(',') parts = [] for x in l1: l2 = re.sub(r'[<>]', '', x) l3 = l2.strip().split(';') for y in l3: l4 = y.strip() self.logger.info( str(Configuration.instance().isvalid0(l4)) + "; " + l4) if Configuration.instance().isvalid0(l4): parts.append(l4) self.logger.info(parts)
def test_009(self): config = Configuration.instance(file='blkswn.cfg').config v0 = config.sections() self.logger.info(str(v0)) self.assertIsNotNone(v0) self.assertTrue(config['fetcher-proxy']) self.logger.info(config['fetcher-proxy']['host']) self.logger.info(config['fetcher-proxy']['port']) self.logger.info(config['fetcher-proxy']['type']) ftchr = Fetcher(logger=self.logger, config=config) # ftchr = Fetcher(logger=self.logger) self.assertIsNotNone(ftchr) self.logger.info(ftchr)
def test_013(self): """ This uses the singleton constructed above. """ ftchr = Fetcher(logger=self.logger, config=Configuration.instance().config) v0 = "http://www.anapioficeandfire.com/api/characters" ctr = 1 while ctr > 0: ctr -= 1 r = ftchr.fetch(url=v0) self.assertIsNotNone(r) self.logger.info(str(r.info())) self.logger.info(r.read())
def extract(self, **kwargs): """ Utility method to extract information. 'index' extracts URLs; 'list' returns a data payload; 'pages' gets max and min pages. """ if 'index' in kwargs: return self._refs0(kwargs['index']) if 'list' in kwargs: r1 = kwargs['list'] return ast.literal_eval(r1.decode()) if 'pages' in kwargs: r1 = kwargs['pages'] c0 = Configuration.instance().qparts f0 = lambda x: c0(x, fconv=int)['page'] return [f0(x) for x in r1] if 'pageSize' in kwargs: # returns a set r1 = kwargs['pageSize'] c0 = Configuration.instance().qparts f0 = lambda x: c0(x, fconv=int)['pageSize'] return {f0(x) for x in r1}
def test_000(self): return chs = IceFireR(config=Configuration.instance().config, type0='books', logger=self.logger) ichs = iter(chs) v0 = next(chs) self.logger.info("dict: {type0} {cnt} {keys}".format(type0=type(v0), cnt=len(v0), keys=", ".join( v0.keys()))) self.logger.info("dict: {url}".format(url=v0['url'])) v0 = next(chs) self.logger.info("dict: {type0} {cnt} {keys}".format(type0=type(v0), cnt=len(v0), keys=", ".join( v0.keys()))) self.logger.info("dict: {url}".format(url=v0['url']))
def test_015(self): """ Houses use the Link response header to get the number of pages and the page size. """ ftchr = Fetcher(logger=self.logger, config=Configuration.instance().config) idx0 = "https://www.anapioficeandfire.com/api/houses" r = ftchr.fetch(url=idx0) hdrs = r.info() r1 = r.read() self.assertTrue('Link' in hdrs) self.logger.info(hdrs['Link']) with open('houses.bytes', 'wb') as f0: f0.write(r1) with open('hdrs.json', 'w') as f0: f0.write(hdrs['Link']) x0 = ast.literal_eval(r1.decode()) self.logger.info(type(x0)) self.assertTrue(len(x0) > 0) self.logger.info(x0[-1])
def setUpClass(cls): global logger cls.logger = logger Configuration.instance(file='blkswn.cfg') # singleton