def get_resp(): _url = request.args.get("url") if not _url: return json_resp({"error": "invalid_url"}) url = URL(_url) n = str(url) return json_resp(get_meta_data_json(n, url))
def setUpClass(self): self.utility = Utility() # CHANGE THE LOG FILE NAME IN THE NEXT LINE****************************************************************************************** self.log = open(self.utility.logpath + "/WV-00.txt", "a+") self.suite_start_time = time.time() self.log.write("Suite started at {}\n".format( str(time.ctime(int(self.suite_start_time))))) self.url = URL() self.loginPageStaticTexts = LoginPageStaticText() self.loginPageTestData = LoginPageTestData() self.configTestCase = configparser.RawConfigParser() # CHANGE THE CONFIG PROPERTY FILE NAME IN THE NEXT LINE****************************************************************************************** self.configTestCase.read( os.path.dirname(os.getcwd()) + '/TestCases/WV_00_Config.properties') self.configECG = configparser.RawConfigParser() self.configECG.read( os.path.dirname(os.getcwd()) + '/Scripts/ECGRelatedData.properties') self.configDevice = configparser.RawConfigParser() self.configDevice.read( os.path.dirname(os.getcwd()) + '/Scripts/DeviceRelatedData.properties') self.sendECG = SendECG() yield self.suite_end_time = time.time() self.total_time_taken_suite = self.suite_end_time - self.suite_start_time self.log.write("Suite ended at {}\n".format( str(time.ctime(int(self.suite_end_time))))) self.log.write( "Total time taken by Test Suite to finish: {} seconds\n".format( self.total_time_taken_suite)) self.log.close()
def processURL(s, raw_url): tweet_id = s.tweet_id url = raw_url['expanded_url'] my_url = URL(tweet_id, url) # only grab external URLs if not (url.startswith('https://twitter.com/')): s.urls.append(my_url)
def cannonicalize(self): parsed_url = urlsplit(self.url) url = URL(parsed_url[:]) self.url = url.make() if self.url == "": return None self.url = self.url.replace("https://", "http://") self.url = re.sub('#.*', "", self.url) return self.url
def __init__(self, url, method='GET', headers=None, cookies=None, referer=None, data=None, user_agent=DEFAULT_USER_AGENT, **kwargs): ''' ''' if isinstance(url, URL): self._url = url else: self._url = URL(url) self._method = method self.id = uuid.uuid1() self._headers = {} if headers: self._headers.update(headers) self._cookies = cookies self._referer = referer self._user_agent = user_agent if self._cookies: self._headers.update({"Cookie": self._cookies}) if self._referer: self._headers.update({"Referer": self._referer}) if self._user_agent: self._headers.update({"User-Agent": self._user_agent}) self._get_data = self._url.get_querystring() self._post_data = data if data else ""
def setup(request, setUpClass): print("initiating chrome driverd") driver = Browser().getbrowser("chrome") url = URL() driver.get(url.webViewerUAT) utility = Utility() # utility.createLogFolder() log = open(utility.logpath + "/WV-00.txt", "a+") driverUtility = DriverUtility(driver, log) loginPageObject = LoginPageObject(driverUtility, log) request.cls.driver = driver request.cls.url1 = url request.cls.utility = utility request.cls.driverUtility = driverUtility request.cls.loginPageObject = loginPageObject print("setup ended") yield driver driver.close()
import collections import httplib2 from URL import URL import urllib.request from SrcubOrigUrls import scrub_orig_urls # Define list to store data directly from list url_data = collections.defaultdict(set) # Define list to store objects of URLs urls = [] # Define source data file and reader url_data_file = open('data.csv', 'r') my_reader = csv.reader(url_data_file) # Put from csv file into list print("Getting data from file...") for row in my_reader: url_data[row[0]].add(row[1]) print("Done getting data from file!") # Form objects from data and put into list print("Putting objects in list...") for url in url_data: new_url = URL(url, url_data[url]) urls.append(new_url) print("Done putting objects in list!") del urls[0] scrub_orig_urls(urls)
import sys from DB import DB from URL import URL db = DB('citeseerx.db') db.create_tables() # db.del_all() # http://citeseerx.ist.psu.edu/viewdoc/summary?cid=16057 if len(sys.argv) == 2: url = URL(sys.argv[1]) url.open() db.insert('link', {'doi': url.get_doi(), 'url': url.get_url()}) else: print 'Please supply proper URL.'
from URL import URL from BSOUP import BSOUP from time import sleep import re # change hosts path according to your OS hosts_path = r"C:\Windows\System32\drivers\etc\hosts" # localhost's IP redirect = "127.0.0.1" u=URL() b=BSOUP() urlis=[] class CWBP: def cwblocker(self,lis): global urlis self.ur=u.giveurl() self.a=re.search('/',self.ur) self.kw=b.keyword("https://www."+self.ur) for i in lis: if i in self.kw: urlis.append("www."+self.ur[:self.a.start()]) with open(hosts_path, 'r+') as file: self.content = file.read() if self.ur in self.content: pass else: # mapping hostnames to your localhost IP address file.write(redirect + " " +"www."+self.ur[:self.a.start()]+ "\n") def unblocker(self): global urlis with open(hosts_path, 'r+') as file:
def setup(request, setUpClass): print("initiating chrome driverd") driver = Browser().getbrowser("chrome") url = URL() driver.get(url.webViewerUAT) utility = Utility() # utility.createLogFolder() log = open(utility.logpath + "/WV-00.txt", "a+") driverUtility = DriverUtility(driver, log) loginPageObject = LoginPageObject(driverUtility, log) request.cls.driver = driver request.cls.url1 = url request.cls.utility = utility request.cls.driverUtility = driverUtility request.cls.loginPageObject = loginPageObject print("setup ended") yield driver driver.close() # from datetime import datetime # def pytest_logger_config(logger_config): # logger_config.add_loggers(['foo', 'bar', 'baz'], stdout_level='debug') # logger_config.set_log_option_default('foo,bar') # def pytest_logger_logdirlink(config): # print("1") # path = os.path.dirname(os.getcwd()) + '/Logs/' # foldername = datetime.now().strftime("%Y%m%d-%H%M%S") # logpath = path+foldername # try: # # return os.mkdir(logpath) # return os.path.join(path, foldername) # # return logpath # except OSError as e: # print("Creation of the directory failed") # print(traceback.format_exc()) # else: # print("Successfully created the directory") # return os.path.join(os.path.dirname(__file__), 'mylogs') # @pytest.yield_fixture(scope='session') # def session_thing(): # foo.debug('constructing session thing') # yield # foo.debug('destroying session thing') # @pytest.yield_fixture # def testcase_thing(): # foo.debug('constructing testcase thing') # yield # foo.debug('destroying testcase thing') # @pytest.fixture(scope="class") # def setup(request): # print("initiating chrome driver") # driver = Browser().getbrowser("chrome") #if not added in PATH # url = URL() # utility = Utility() # # driver.maximize_window() # request.cls.d = driver # request.cls.u = utility # request.cls.url1 = url # yield # driver.close() # import pytest # from selenium import webdriver # @pytest.fixture(scope="class") # def setup(request): # print("initiating chrome driver") # driver = Browser().getbrowser("chrome") #if not added in PATH # url = URL() # utility = Utility() # # driver.maximize_window() # request.cls.d = driver # request.cls.u = utility # request.cls.url1 = url # yield driver # driver.close() # @pytest.fixture(scope='session') # def config(): # with open('WV_00_Config.json') as config_file: # data = json.load(config_file) # for r in data['Enabled']: # print (r[b]) # return data
from URL import URL from DB import DB from bs4 import BeautifulSoup db = DB('citeseerx.db') count = 0 while db.count_unpr(): # url = URL('http://citeseerx.ist.psu.edu/viewdoc/summary?cid=4320') count = count + 1 url = db.get_unpr() print url url = URL(url) url.open() db.update_link(url.get_doi(), 2) if (not db.exists('link', url.get_doi()) and url.redirect_occured()): db.insert('link', { 'doi': url.get_doi(), 'url': url.get_redirect_url() }) if (not db.exists('metadata', url.get_doi())): html = url.fetch() # extract abstract soup = BeautifulSoup(html, "html.parser") title = soup.find('h2').findAll(text=True)[0] abstract_div = soup.find("div", {"id": "abstract"}) for tag in abstract_div: if tag.name == 'p': abstract = tag.findAll(text=True)
def __init__(self, url1, url2): self.ut1 = URL(url1) self.ut2 = URL(url2) self.params = self._allparams()
if self.ut1.getBaseUrl() != self.ut2.getBaseUrl(): add("baseUrls are different") p1 = self.ut1.getParamMap() p2 = self.ut2.getParamMap() for p in self._allparams(): if not p1.has_key(p): add("'%s' is not defined in 1" % p) elif not p2.has_key(p): add("'%s' is not defined in 2" % p) elif p1[p] != p2[p]: add("different values for '%s'" % p) add("\t1 - %s\n\t2 - %s" % (p1[p], p2[p])) if not msg: return "no diff" else: return '\n'.join(msg) def cmpUrls(url1, url2): ct = UrlComparator(url1, url2) ct.ut1.report("URL 1") ct.ut2.report("URL 2") print "\nDiff:" print ct.diff() if __name__ == "__main__": ut = URL(ssDoc) print ut.getTuple()