def listFromQuery(site, queryresult): """Generate a list of pages from an API query result queryresult is the list of pages from a list or generator query e.g. - for a list=categorymembers query, use result['query']['categorymembers'] for a generator query, use result['query']['pages'] """ ret = [] if isinstance(queryresult, list): for item in queryresult: pageid = False if 'pageid' in item: pageid = item['pageid'] if item['ns'] == 14: item = category.Category(site, title=item['title'], check=False, followRedir=False, pageid=pageid) elif item['ns'] == 6: item = wikifile.File(site, title=item['title'], check=False, followRedir=False, pageid=pageid) else: item = page.Page(site, title=item['title'], check=False, followRedir=False, pageid=pageid) ret.append(item) else: for key in queryresult.keys(): item = queryresult[key] pageid = False if 'pageid' in item: pageid = item['pageid'] if item['ns'] == 14: item = category.Category(site, title=item['title'], check=False, followRedir=False, pageid=pageid) elif item['ns'] == 6: item = wikifile.File(site, title=item['title'], check=False, followRedir=False, pageid=pageid) else: item = page.Page(site, title=item['title'], check=False, followRedir=False, pageid=pageid) ret.append(item) return ret
def errorPage(): if current_user.is_authenticated(): p = page.Page("Oops!", False) userID = current_user.get_id() u = User.query.filter_by(id=userID).one() thisUser = userPageUser.userPageUser(u.firstName, u.lastName, u.id) else: thisUser = None p = page.Page("Oops!", True) return render_template("error.html", page=p, user=thisUser)
def feedback(): if current_user.is_authenticated(): p = page.Page("Feedback!", False) userID = current_user.get_id() u = User.query.get(userID) thisUser = userPageUser.userPageUser(u.firstName, u.lastName, u.id) else: p = page.Page("Feedback!", True) thisUser = None return render_template("feedback.html", page=p, user=thisUser)
def readFile(self, fileName): # Read data from file with open(fileName, 'r', encoding='utf-8') as f: lines = f.readlines() paraStr = '' pg = page.Page() for line in lines: # Para break line = line.strip() if '' == line: if '' != paraStr: self.addPara(paraStr, pg) paraStr = '' # Page break <- Manully elif line.startswith('---'): logging.info('Page break') if 0 != len(pg.paras): self.addPage(pg) paraStr = '' pg = page.Page() # Page break <- Automatically elif line.startswith('# ') or line.startswith('## '): logging.info('Page break') if 0 != len(pg.paras): self.addPage(pg) self.addSec() pg = page.Page() self.addPara(line, pg) paraStr = '' # Author elif line.startswith(r'%author: '): line = line[9:] logging.info('Found author: ' + line) self.author = line # Date elif line.startswith(r'%date: '): line = line[7:] logging.info('Found date: ' + line) self.date = line else: if '' != paraStr: paraStr += '\n' paraStr += line if 0 != len(pg.paras): self.addPage(pg)
def navigate_to(start, end, path=None, rel=[0], debug=False): # Path initialization if (path == None): path = [start.link] # Base cases if (start == end): return path, rel if (end.link in str(start.html)): return navigate_to(end, end, path + [end.link], rel + [1.0]) if(debug): print(path[-1], rel[-1]) indices = start.sort_by_relevance_to(end) for i in indices: # Check if selected chunk contains links if(len(start.links[i]) > 0): # Change this part to support multi-depth relevance checks relevant_links = start.links[i] rels = np.array([start.max_relevance(page.Page(link[1])) for link in relevant_links]) sorted_idx = rels.argsort() ## Get the most relevant chunk that contains links ## Follow links to {depth=d} ## Get the maximum value of relevance OR the actual page (if found) ## and then return that path. ## We're basically taking d steps at a time, and increasing the complexity ## from O(b) to O(b^d)... ## For this reason we need to include pruning to limit this. ## We can prune based on tfidf-- ## At level 1 ## Get all the link pages ## Prune all but the b most relevant ## Do the same until level d ## Return the 5 paths with the most relevance, sorted. ## Maybe make a list of possible alternatives at each search level ## to aid backtracking in the case of an error ## So this will give us O(b*d) # Make sure we're not looping for idx in sorted_idx: link = (relevant_links[idx])[1] if(link not in path): return navigate_to(page.Page(link), end, path + [link], rel + [rels[idx]]) # Backtracking in case of any errors with finding links on the selected page return navigate_to(page.Page(path[-1]), end, path[:-1], rel[:-1])
def get_logs(OOOOOO0O00OO00OO0, O00OOO0O0OO0O00OO): #line:262 import page #line:263 page = page.Page() #line:264 OOOO0O000OO0OO00O = public.M('logs').where( 'type=? or type=?', (OOOOOO0O00OO00OO0.__O0O0O0O00O0O0OO00, u'SSH登录')).count() #line:265 OOOOOO000O0O0OO0O = 12 #line:266 O0O00000O000O0OO0 = {} #line:267 O0O00000O000O0OO0['count'] = OOOO0O000OO0OO00O #line:268 O0O00000O000O0OO0['row'] = OOOOOO000O0O0OO0O #line:269 O0O00000O000O0OO0['p'] = 1 #line:270 if hasattr(O00OOO0O0OO0O00OO, 'p'): #line:271 O0O00000O000O0OO0['p'] = int(O00OOO0O0OO0O00OO['p']) #line:272 O0O00000O000O0OO0['uri'] = {} #line:273 O0O00000O000O0OO0['return_js'] = '' #line:274 if hasattr(O00OOO0O0OO0O00OO, 'tojs'): #line:275 O0O00000O000O0OO0['return_js'] = O00OOO0O0OO0O00OO.tojs #line:276 OOO0O0000000OOO00 = {} #line:277 OOO0O0000000OOO00['page'] = page.GetPage(O0O00000O000O0OO0, '1,2,3,4,5') #line:278 OOO0O0000000OOO00['data'] = public.M('logs').where( 'type=? or type=?', (OOOOOO0O00OO00OO0.__O0O0O0O00O0O0OO00, u'SSH登录')).order( 'id desc').limit(str(page.SHIFT) + ',' + str(page.ROW)).field('log,addtime').select() #line:279 return OOO0O0000000OOO00
def __init__(self, site, name, check=True): """ wiki - A wiki object name - The username, as a string check - Checks for existence, normalizes name """ self.site = site self.name = name if not isinstance(self.name, unicode): self.name = unicode(self.name, 'utf8') self.exists = True # If we're not going to check, assume it does self.blocked = None # So we can tell the difference between blocked/not blocked/haven't checked self.editcount = -1 self.groups = [] if check: self.setUserInfo() self.isIP = False try: s = socket.inet_aton(self.name.replace(' ', '_')) if socket.inet_ntoa(s) == self.name: self.isIP = True self.exists = False except: pass self.page = page.Page(self.site, ':'.join( [self.site.namespaces[2]['*'], self.name]), check=check, followRedir=False)
def POST(self, path): cookies = web.cookies() clientId = self._ensureClientId(cookies) if not clientId: web.seeother('/login') return False # Collect POST data input = web.input(_method='post') # Do stuff based on POST data for key, val in input.iteritems(): self.config.setVar(key, val) # Create page myPage = page.Page() myPage.setTitle('Appliance Webgui - Login') myPage.setCSS('/static/styles.css') myPage.setBody(self._top()) myPage.setBody(self._navMenu(clientId)) myPage.setBody('<div id="main">') if self.config.needCommit: myPage.setBody( 'Settings have changed. Be sure to commit new configuration to disk.<br>' ) for key, val in input.iteritems(): myPage.setBody('%s: %s<br>' % (key, val)) myPage.setBody('<a href="/">Go to the main page</a>') myPage.setBody('</div>') web.output(myPage.output())
def GET(self, path): cookies = web.cookies() clientId = self._ensureClientId(cookies) if not clientId: web.seeother('/login') return False # Build the page myPage = page.Page() web.header("Content-Type", myPage.getContentType()) myPage.setTitle(u'Appliance Webgui - Status') myPage.setCSS('/static/styles.css') myPage.setBody(self._top()) myPage.setBody(self._navMenu(clientId)) myPage.setBody(u'<div id="main">') myPage.setBody(u'<p>Appliance Status</p>') myPage.setBody(u'<p>Hostname: %s.%s</p>' % (self.config.getVar('sys.hostname'), self.config.getVar('net.domain'))) myPage.setBody(u'<p>Time zone: %s</p>' % self.config.getVar('sys.tz')) myPage.setBody( u'<p>Virtual Memory: %s</p>' % escape(appliance.sysctl('vm.vmtotal')).replace('\n', '<br>')) myPage.setBody('clientId: %s' % clientId) myPage.setBody(u'</div>') web.output(myPage.output())
def POST(self, path): cookies = web.cookies() clientId = self._ensureClientId(cookies) if not clientId: web.seeother('/login') return False # Collect POST data input = web.input(_method='post') myPage = page.Page() web.header("Content-Type", myPage.getContentType()) myPage.setTitle(u'Appliance Webgui - Shutdown') myPage.setCSS('/static/styles.css') myPage.setBody(self._top()) myPage.setBody(self._navMenu(clientId)) myPage.setBody(u'<div id="main">') # Do stuff based on POST data if input['downMethod'] == 'reboot' or input['downMethod'] == 'halt': myPage.setHead(u'<meta http-equiv="refresh" content="70;url=/">') myPage.setBody(u'Performing action %s. Please wait.' % input['downMethod']) web.background(self.appliance.shutdown(input['downMethod'])) else: myPage.setHead(u'<meta http-equiv="refresh" content="5;url=/">') myPage.setBody( u'downMethod "%s" is not valid. Redirecting to front page.' % input['downMethod']) syslog.syslog( syslog.LOG_ERR, 'invalid shutdown option "%s" passed to web gui' % input['downMethod']) myPage.setBody(u'</div>') web.output(myPage.output())
def GET(self, path): cookies = web.cookies() clientId = self._ensureClientId(cookies) if not clientId: web.seeother('/login') return False myPage = page.Page() web.header("Content-Type", myPage.getContentType()) myPage.setTitle(u'Appliance Webgui - Shutdown') myPage.setCSS('/static/styles.css') myPage.setBody(self._top()) myPage.setBody(self._navMenu(clientId)) myPage.setBody(u'<div id="main">') # Build the page myPage.setBody(u'<form method="post">') myPage.setBody(u'<select name="downMethod">') myPage.setBody(u'<option value="reboot" selected>Reboot</option>') myPage.setBody(u'<option value="halt">Shutdown</option>') myPage.setBody(u'</select> the appliance.') myPage.setBody(u' <input type="submit" value="Go >">') myPage.setBody(u'</form>') myPage.setBody('clientId: %s' % clientId) myPage.setBody(u'</div>') web.output(myPage.output())
def GET(self, logfile): cookies = web.cookies() clientId = self._ensureClientId(cookies) if not clientId: web.seeother('/login') return False allowed = ('/messages', '/access') myPage = page.Page() web.header('Content-Type', myPage.getContentType()) myPage.setTitle(u'Appliance Webgui') myPage.setCSS(u'/static/styles.css') if logfile in allowed: myPage.setBody(self._top()) myPage.setBody(self._navMenu(clientId)) myPage.setBody(u'<div id="main">') myPage.setBody(u'<div id="logtext">') if logfile == 'access': logfile = 'lighttpd.access.log' proc = subprocess.Popen(['cat', '/var/log/%s' % logfile], shell=False, stdin=None, stdout=subprocess.PIPE, stderr=None) logOutput = escape(proc.communicate()[0]) myPage.setBody(logOutput.replace('\n', '<br>')) myPage.setBody(u'</div>') myPage.setBody(u'</div>') web.output(myPage.output()) else: web.notfound()
def GET(self, path): # Handle cookies cookies = web.cookies() clientId = cookies.get('clientId') # Make the page myPage = page.Page() web.header("Content-Type", myPage.getContentType()) myPage.setTitle(u'Appliance Webgui - Login') myPage.setCSS(u'/static/styles.css') myPage.setBody(self._top()) myPage.setBody(u'<div id="login">') if clientMap.has_key(clientId): myPage.setBody(u'You are already logged in with ID %s.<br>' % clientId) myPage.setBody( u'Do you want to <a href="/logout">log out</a>?<br>') myPage.setBody( u'Do you want to <a href="javascript:history.back()">go back</a>?' ) else: myPage.setBody(u'<form method="post">') myPage.setBody( u'Username: <input type="text" name="username"><br>') myPage.setBody( u'Password: <input type="password" name="password"><br>') myPage.setBody(u'<input type="submit" value="Submit">') myPage.setBody(u'</form>') myPage.setBody(u'</div>') web.output(myPage.output())
def GET(self, path): cookies = web.cookies() clientId = self._ensureClientId(cookies) if not clientId: web.seeother('/login') return False # Make the page myPage = page.Page() web.header("Content-Type", myPage.getContentType()) myPage.setTitle(u'Appliance Webgui - Dump Configuration') myPage.setCSS(u'/static/styles.css') myPage.setBody(self._top()) myPage.setBody(self._navMenu(clientId)) myPage.setBody(u'<div id="main">') myPage.setBody( u'How do you want to dump the configuration database?<br>') myPage.setBody(u'<form method="post">') myPage.setBody(u'<select name="dumpMethod">') myPage.setBody( u'<option value="display">Display config database in browser</option>' ) myPage.setBody( u'<option value="download">Download a dump file of config database</option>' ) myPage.setBody(u'</select>') myPage.setBody('<input type="submit" value="Go >">') myPage.setBody('</form>') myPage.setBody('</div>') web.output(myPage.output())
def loadPages(self, element): self.pagesList = [] pageNode = element.firstChild() while not pageNode.isNull(): pageElement = pageNode.toElement() if not pageElement.isNull(): if "Page" == pageElement.tagName(): newPage = page.Page(self) newPage.loadXMLConfiguration(pageElement) self.pagesList.append(newPage) elif "CommonWidgets" == pageElement.tagName(): newPage = page.Page(self) newPage.loadXMLConfiguration(pageElement) newPage.show() pageNode = pageNode.nextSibling() self.processPageChanged()
def get_replace_logs(self, get): import page page = page.Page() count = public.M('panel_search_log').order('id desc').count() limit = 12 info = {} info['count'] = count info['row'] = limit info['p'] = 1 if hasattr(get, 'p'): info['p'] = int(get['p']) info['uri'] = get info['return_js'] = '' if hasattr(get, 'tojs'): info['return_js'] = get.tojs data = {} data['page'] = page.GetPage(info, '1,2,3,4,5,8') data['data'] = public.M('panel_search_log').field( 'id,rtext,exts,path,mode,isword,iscase,noword,backup_path,time' ).order('id desc').limit(str(page.SHIFT) + ',' + str(page.ROW)).select() if isinstance(data['data'], str): return public.returnMsg(False, []) for i in data['data']: if not isinstance(i, dict): continue if 'backup_path' in i: path = i['backup_path'] if os.path.exists(path): i['is_path_status'] = True else: i['is_path_status'] = False return public.returnMsg(True, data)
def get_page(count,p=1,rows=12,callback='',result='1,2,3,4,5,8'): import page from BTPanel import request page = page.Page(); info = { 'count':count, 'row':rows, 'p':p, 'return_js':callback ,'uri':request.full_path} data = { 'page': page.GetPage(info,result), 'shift': str(page.SHIFT), 'row': str(page.ROW) } return data
def next(self): info = List.next(self, full=True) if info['ns'] == 14: return Category(self.site, u'', info) if info['ns'] == 6: return page.Image(self.site, u'', info) return page.Page(self.site, u'', info)
def get_ssh_login_logs(O00OO0O0O000O0OO0, O0O0000OO0OOOO0OO): #line:182 import page #line:183 page = page.Page() #line:184 O00O0OOOOOO0O0O0O = public.M('logs').where('type=?', (u'SSH登录', )).count() #line:185 OO000O0OOO0O000O0 = 12 #line:186 OO0O000O0O000OOOO = {} #line:187 OO0O000O0O000OOOO['count'] = O00O0OOOOOO0O0O0O #line:188 OO0O000O0O000OOOO['row'] = OO000O0OOO0O000O0 #line:189 OO0O000O0O000OOOO['p'] = 1 #line:190 if hasattr(O0O0000OO0OOOO0OO, 'p'): #line:191 OO0O000O0O000OOOO['p'] = int(O0O0000OO0OOOO0OO['p']) #line:192 OO0O000O0O000OOOO['uri'] = {} #line:193 OO0O000O0O000OOOO['return_js'] = '' #line:194 if hasattr(O0O0000OO0OOOO0OO, 'tojs'): #line:195 OO0O000O0O000OOOO['return_js'] = O0O0000OO0OOOO0OO.tojs #line:196 OOOO0O00O0O00O00O = {} #line:197 OOOO0O00O0O00O00O['page'] = page.GetPage(OO0O000O0O000OOOO, '1,2,3,4,5') #line:198 OOOO0O00O0O00O00O['data'] = public.M('logs').where( 'type=?', (u'SSH登录', )).order('id desc').limit( str(page.SHIFT) + ',' + str(page.ROW)).field('log,addtime').select() #line:199 return OOOO0O00O0O00O00O
def morph_page_distribution(fname, page_sampler, outdir): """Morph original page to look as it comes from the specified distribution. Parameters ---------- fname : str File name of the HTML file of the original page. page_sampler : sampling.PageSampler Page sampler. outdir : str Destination directory. """ original = page.Page(fname) html_size = original.html['size'] sizes = original.get_sizes() number = len(sizes) # Number of objects. if not sizes: min_objs = 0 else: min_objs = min(sizes) target_html_size, target_sizes = page_sampler.sample_page( min_count=number, min_html=html_size, min_objs=min_objs) # Try to morph. If it doesn't work (some sizes where too # small), notify and try again. try: morph_page(original, target_html_size, target_sizes, outdir) except NotImplementedError as e: raise e except: print "Couldn't morph {} with {}".format(sizes, target_sizes) morph_page_distribution(fname, page_sampler, outdir)
def makePage(key, result, site): title = False if 'title' in result: title = result['title'] if 'ns' in result and result['ns'] == 14: item = category.Category(site, title=title, check=False, followRedir=False, pageid=key) elif 'ns' in result and result['ns'] == 6: item = wikifile.File(site, title=title, check=False, followRedir=False, pageid=key) else: item = page.Page(site, title=title, check=False, followRedir=False, pageid=key) if 'missing' in result: item.exists = False if 'invalid' in result: item = False if 'ns' in result: item.setNamespace(int(result['ns'])) return item
def GetPage(self,data,get): #包含分页类 import page #实例化分页类 page = page.Page(); info = {} info['count'] = len(data) info['row'] = 15 info['p'] = 1 if hasattr(get,'p'): info['p'] = int(get['p']) info['uri'] = {} info['return_js'] = '' if hasattr(get,'tojs'): info['return_js'] = get.tojs #获取分页数据 result = {} result['page'] = page.GetPage(info) n = 0; result['data'] = []; for i in range(info['count']): if n > page.ROW: break; if i < page.SHIFT: continue; n += 1; result['data'].append(data[i]); return result;
def main(): count = getint("Number of new pages: ") pwd = getpass.getpass("Confluence password: "******"Confluence login failed." return 1 stamp = datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S") fname = "hwdb_blanks_" + stamp + ".csv" outfile = open(fname, mode="w") print "Writing URLs to file: %s" % fname outfile.write("Long URL, Short URL\n") blank_page = page.Page() for i in xrange(count): # Create the page using a title based on process ID and current date+time, # to ensure uniqueness. No page ID is available until after creation. stamp = str(os.getpid()) + datetime.datetime.utcnow().isoformat() title = page.BLANK_TITLE_TEMPLATE.substitute(pageId=stamp) cid = cli.addPage(title, parentTitle=page.BLANK_PARENT, content=blank_page.getMarkup()) if not cid: return 1 # Rename the page to a title with the true page ID. if not cli.renamePage(page.BLANK_TITLE_TEMPLATE.substitute(pageId=cid), cid=cid): return 1 long_url = page.PAGE_URL_TEMPLATE.substitute(cid=cid) short_url = page.extractPageShortURL(long_url) print "%s, %s" % (long_url, short_url) outfile.write("%s, %s\n" % (long_url, short_url)) return 0
def get(self, name, info = ()): if self.namespace == 14: return Category(self.site, self.site.namespaces[14] + ':' + name, info) elif self.namespace == 6: return page.Image(self.site, self.site.namespaces[6] + ':' + name, info) elif self.namespace != 0: return page.Page(self.site, self.site.namespaces[self.namespace] + ':' + name, info) else: # Guessing page class namespace = self.guess_namespace(name) if namespace == 14: return Category(self.site, name, info) elif namespace == 6: return page.Image(self.site, name, info) else: return page.Page(self.site, name, info)
def get_exception_logs(self, get): import page page = page.Page() count = public.M('logs').where( "type=? and strftime('%m-%d','now','localtime') = strftime('%m-%d',addtime)", (u'消息推送', )).count() limit = 12 info = {} info['count'] = count info['row'] = limit info['p'] = 1 if hasattr(get, 'p'): info['p'] = int(get['p']) info['uri'] = get info['return_js'] = '' if hasattr(get, 'tojs'): info['return_js'] = get.tojs data = {} # 获取分页数据 data['page'] = page.GetPage(info, '1,2,3,4,5,8') data['data'] = public.M('logs').where("type=? and strftime('%m-%d','now','localtime') = strftime('%m-%d',addtime)", (u'消息推送',))\ .order('id desc').limit(str(page.SHIFT) + ',' + str(page.ROW)).field('log,addtime').select() return data
def GetSql(self,get,result = '1,2,3,4,5,8'): #判断前端是否传入参数 order = "id desc" if hasattr(get,'order'): order = get.order limit = 20 if hasattr(get,'limit'): limit = int(get.limit) if hasattr(get,'result'): result = get.result; data = {} #取查询条件 where = '' if hasattr(get,'search'): where = self.GetWhere(get.table,get.search); if get.table == 'backup': where += " and type='" + get.type+"'"; field = self.GetField(get.table) #实例化数据库对象 SQL = db.Sql(); #是否直接返回所有列表 if hasattr(get,'list'): data = SQL.table(get.table).where(where,()).field(field).order(order).select() return data #取总行数 count = SQL.table(get.table).where(where,()).count(); #get.uri = get #包含分页类 import page #实例化分页类 page = page.Page(); del(get.data) del(get.zunfile) info = {} info['count'] = count info['row'] = limit info['p'] = 1 if hasattr(get,'p'): info['p'] = int(get['p']) info['uri'] = get info['return_js'] = '' if hasattr(get,'tojs'): info['return_js'] = get.tojs data['where'] = where; #获取分页数据 data['page'] = page.GetPage(info,result) #取出数据 data['data'] = SQL.table(get.table).where(where,()).order(order).field(field).limit(bytes(page.SHIFT)+','+bytes(page.ROW)).select() return data;
def createUser(): p = page.Page("Sign Up!", True) try: userID = current_user.get_id() u = User.query.get(userID) thisUser = userPageUser.userPageUser(u.firstName, u.lastName, 0) except Exception as ex: thisUser = None return render_template("signup.html", page=p, user=thisUser)
def select(self, ind): if ind >= len(self.search_results): (_, results) = wiki_api.searchQuery(self.page_name, ind - 1, 1) if len(results) == 0: print('Your request cannot be satisfied') return else: link = results[0]['link'] # response = main.http.request('GET', link) response = requests.get(link) parser = BeautifulSoup(response.text, 'html.parser') return page.Page(parser) link = self.search_results[ind - 1]['link'] # response = main.http.request('GET', link) response = requests.get(link) parser = BeautifulSoup(response.text, 'html.parser') return page.Page(parser)
def _update_link(self, url, depth, pool): """向队列中添加新的待爬取链接. Args: url: 字符,表示带爬取网页链接. depth: 数字,表示链接深度,当大于置顶深度时不再向队列添加. """ if depth < self.conf.max_depth: if self.prog.match(url): pool.put(page.Page(url, depth + 1))
def handle(self, task): """Fetching routine Args: task Task object Return: None """ history_lock = lock.Lock.retrive_lock() history_lock.acquire() if task.url in self.__history: logging.info("Url has been fetched: {url}".format(url=task.url)) history_lock.release() return self.__history.append(task.url) history_lock.release() if task.depth > self.__max_dep: raise SpiderException( "Not a valid task: {task}".format(task=str(task))) time.sleep(self.__frequency) fetched_page = page.Page(task.url, 5) try: fetched_page.hydrate() except urllib2.HTTPError as e: logging.error("HTTP ERROR {url}: {error}".format(url=task.url, error=str(e))) return except urllib2.URLError as e: logging.error("Url ERROR {url}: {error}".format(url=task.url, error=str(e))) return if task.depth < self.__max_dep: self.__add_task(fetched_page, task.depth) imgs = fetched_page.get_resource_url(self.__image_suffix) if len(imgs) == 0: return path = os.path.join(utils.realpath(self.__output_dir), utils.url2path(fetched_page.url)) try: if not os.path.isfile(path): output_file = open(path, "w") else: output_file = open(path, "a") except IOError as e: logging.error("Can't open file {path}: {error}".format(path=path, error=e[1])) return for img in imgs: url = self.__fix_up_url(img, fetched_page) output_file.write(url + "\n") output_file.close()