def get_Crawl_Delay(url): global cache,cache_MAX_SIZE, agent baseURL=utils.getBaseUrl(url) cache=cache if(cache.has_key(baseURL)): rerp = cache.get(baseURL) if(rerp.get_crawl_delay(agent)!=None): return rerp.get_crawl_delay(agent) else: return 0 else: add_To_Cache(baseURL) return get_Crawl_Delay(url)
def is_Valid(url): global cache,cache_MAX_SIZE, agent try: baseURL=utils.getBaseUrl(url) if(cache.has_key(baseURL)): #keep_Fresh(baseURL) #print cache.get(baseURL).is_allowed(agent, url) return cache.get(baseURL).is_allowed(agent, url) else: add_To_Cache(baseURL) return cache.get(baseURL).is_allowed(agent, url) except Exception: return False
def initiate(): global count, search_max_depth while(URLProvider.isIncomplete()): curr_url = URLProvider.get_URL() if(int(curr_url[1]) <= int(search_max_depth) ): #Print can be removed print curr_url, count my_tuple=('00000',{'customError':'NO ACCESS'},None,None,0,0) if(SiteHandler.is_Valid(curr_url[0])): #Process further response_data=URLFetcher.getURL(curr_url[0]) if(response_data==1): my_tuple=('2222',{'customError':'Unable to Fetch Correctly'}, None, None,0, curr_url[1]) else: myMime =response_data.headers.get('Content-Type') if(valid_mime_type(myMime)): base_url=utils.getBaseUrl(curr_url[0]) mystring = response_data.read() links = AttributeExtraction.extract_Links(base_url,mystring) stored_tup = pagestoragehandler.write_log(curr_url[0],mystring) my_tuple = (response_data.code, response_data.headers, stored_tup, len(links), len(mystring),curr_url[1]) #links or None for link in set(links): if(valid_protocol_request(urlsplit(link)[0])): if(infologger.not_in_list(link)): URLProvider.add_URL(link) else: #We need to keep a log of Data-Stored. If a log of data downloaded is required #We can put the response object's data to find its size. #Ideally headers do provide the size in headers, but we found some cases #it was not provided. So implemented it in this fashion. my_tuple=('1111',{'customError':'Unsupported Mime'}, None, None,0, curr_url[1]) infologger.write_summary(curr_url[0], my_tuple ) count += 1 # Force writes and commits pagestoragehandler.store_log() infologger.store_log()
def addMedia(self, seqId): self.media_dict["url"] = '{}{}.pdf'.format(getBaseUrl(), seqId[:-1]) current_app.logger.debug(self.media_dict["url"])
def constructHeadline(self, pageId, pageNum, paperName): return "<a href='{}{}'>{}</a>".format(getBaseUrl(), pageId, paperName)
import subprocess import xbmc import xbmcgui import requests import ipcheck import kill PROFILE = utils.PROFILE ADDONID = utils.ADDONID REGEX = 'server name="(.+?)" capacity="(.+?)" city="(.+?)" country="(.+?)" icon="(.+?)" ip="(.+?)" status="(.+?)" visible="(.+?)"' COUNTRIES = {'AU':'Australia', 'AT':'Austria', 'BE':'Belguim', 'BR':'Brazil', 'DK':'Denmark', 'DE':'Germany', 'ES':'Spain', 'FR':'France', 'HU':'Hungary', 'JP':'Japan', 'KR':'South Korea', 'NL':'Netherlands', 'PL':'Poland', 'SG':'Singapore', 'CH':'Switzerland', 'SE':'Sweden', 'UK':'United Kingdom', 'US':'United States'} URL = utils.getBaseUrl() ADDON = utils.ADDON HOME = utils.HOME PROFILE = utils.PROFILE TITLE = utils.TITLE RESPONSE = os.path.join(PROFILE, 'openvpn.log') import quicknet LOGINURL = utils.LOGINURL USERNAME = utils.GetSetting('USER') PASSWORD = utils.GetSetting('PASS') class MyVPN():