Example #1
0
def get_resp():
    _url = request.args.get("url")
    if not _url:
        return json_resp({"error": "invalid_url"})
    url = URL(_url)
    n = str(url)
    return json_resp(get_meta_data_json(n, url))
Example #2
0
 def setUpClass(self):
     self.utility = Utility()
     # CHANGE THE LOG FILE NAME IN THE NEXT LINE******************************************************************************************
     self.log = open(self.utility.logpath + "/WV-00.txt", "a+")
     self.suite_start_time = time.time()
     self.log.write("Suite started at {}\n".format(
         str(time.ctime(int(self.suite_start_time)))))
     self.url = URL()
     self.loginPageStaticTexts = LoginPageStaticText()
     self.loginPageTestData = LoginPageTestData()
     self.configTestCase = configparser.RawConfigParser()
     # CHANGE THE CONFIG PROPERTY FILE NAME IN THE NEXT LINE******************************************************************************************
     self.configTestCase.read(
         os.path.dirname(os.getcwd()) +
         '/TestCases/WV_00_Config.properties')
     self.configECG = configparser.RawConfigParser()
     self.configECG.read(
         os.path.dirname(os.getcwd()) +
         '/Scripts/ECGRelatedData.properties')
     self.configDevice = configparser.RawConfigParser()
     self.configDevice.read(
         os.path.dirname(os.getcwd()) +
         '/Scripts/DeviceRelatedData.properties')
     self.sendECG = SendECG()
     yield
     self.suite_end_time = time.time()
     self.total_time_taken_suite = self.suite_end_time - self.suite_start_time
     self.log.write("Suite ended at {}\n".format(
         str(time.ctime(int(self.suite_end_time)))))
     self.log.write(
         "Total time taken by Test Suite to finish: {} seconds\n".format(
             self.total_time_taken_suite))
     self.log.close()
Example #3
0
 def processURL(s, raw_url):
     tweet_id = s.tweet_id
     url = raw_url['expanded_url']
     my_url = URL(tweet_id, url)
     # only grab external URLs
     if not (url.startswith('https://twitter.com/')):
         s.urls.append(my_url)
Example #4
0
    def cannonicalize(self):
        parsed_url = urlsplit(self.url)
        url = URL(parsed_url[:])
        self.url = url.make()

        if self.url == "":
            return None

        self.url = self.url.replace("https://", "http://")
        self.url = re.sub('#.*', "", self.url)

        return self.url
Example #5
0
    def __init__(self,
                 url,
                 method='GET',
                 headers=None,
                 cookies=None,
                 referer=None,
                 data=None,
                 user_agent=DEFAULT_USER_AGENT,
                 **kwargs):
        '''
        '''
        if isinstance(url, URL):
            self._url = url
        else:
            self._url = URL(url)

        self._method = method

        self.id = uuid.uuid1()

        self._headers = {}
        if headers:
            self._headers.update(headers)

        self._cookies = cookies

        self._referer = referer

        self._user_agent = user_agent

        if self._cookies:
            self._headers.update({"Cookie": self._cookies})

        if self._referer:
            self._headers.update({"Referer": self._referer})

        if self._user_agent:
            self._headers.update({"User-Agent": self._user_agent})

        self._get_data = self._url.get_querystring()

        self._post_data = data if data else ""
Example #6
0
def setup(request, setUpClass):
    print("initiating chrome driverd")
    driver = Browser().getbrowser("chrome")
    url = URL()
    driver.get(url.webViewerUAT)
    utility = Utility()
    # utility.createLogFolder()
    log = open(utility.logpath + "/WV-00.txt", "a+")
    driverUtility = DriverUtility(driver, log)
    loginPageObject = LoginPageObject(driverUtility, log)

    request.cls.driver = driver
    request.cls.url1 = url
    request.cls.utility = utility
    request.cls.driverUtility = driverUtility
    request.cls.loginPageObject = loginPageObject

    print("setup ended")
    yield driver
    driver.close()
Example #7
0
import collections
import httplib2
from URL import URL
import urllib.request

from SrcubOrigUrls import scrub_orig_urls

# Define list to store data directly from list
url_data = collections.defaultdict(set)

# Define list to store objects of URLs
urls = []

# Define source data file and reader
url_data_file = open('data.csv', 'r')
my_reader = csv.reader(url_data_file)

# Put from csv file into list
print("Getting data from file...")
for row in my_reader:
    url_data[row[0]].add(row[1])

print("Done getting data from file!")
# Form objects from data and put into list
print("Putting objects in list...")
for url in url_data:
    new_url = URL(url, url_data[url])
    urls.append(new_url)
print("Done putting objects in list!")
del urls[0]
scrub_orig_urls(urls)
Example #8
0
import sys
from DB import DB
from URL import URL

db = DB('citeseerx.db')
db.create_tables()
# db.del_all()

# http://citeseerx.ist.psu.edu/viewdoc/summary?cid=16057
if len(sys.argv) == 2:
    url = URL(sys.argv[1])
    url.open()
    db.insert('link', {'doi': url.get_doi(), 'url': url.get_url()})
else:
    print 'Please supply proper URL.'
Example #9
0
from URL import URL
from BSOUP import BSOUP
from time import sleep
import re

# change hosts path according to your OS 
hosts_path = r"C:\Windows\System32\drivers\etc\hosts"
# localhost's IP 
redirect = "127.0.0.1"
u=URL()
b=BSOUP()
urlis=[]
class CWBP:
    def cwblocker(self,lis):
        global urlis
        self.ur=u.giveurl()
        self.a=re.search('/',self.ur)
        self.kw=b.keyword("https://www."+self.ur)
        for i in lis:
            if i in self.kw:
                urlis.append("www."+self.ur[:self.a.start()])
                with open(hosts_path, 'r+') as file: 
                    self.content = file.read() 
                    if self.ur in self.content: 
                        pass    
                    else: 
			           # mapping hostnames to your localhost IP address 
                        file.write(redirect + " " +"www."+self.ur[:self.a.start()]+ "\n")
    def unblocker(self):
        global urlis
        with open(hosts_path, 'r+') as file: 
Example #10
0
def setup(request, setUpClass):
    print("initiating chrome driverd")
    driver = Browser().getbrowser("chrome")
    url = URL()
    driver.get(url.webViewerUAT)
    utility = Utility()
    # utility.createLogFolder()
    log = open(utility.logpath + "/WV-00.txt", "a+")
    driverUtility = DriverUtility(driver, log)
    loginPageObject = LoginPageObject(driverUtility, log)

    request.cls.driver = driver
    request.cls.url1 = url
    request.cls.utility = utility
    request.cls.driverUtility = driverUtility
    request.cls.loginPageObject = loginPageObject

    print("setup ended")
    yield driver
    driver.close()


# from datetime import datetime

# def pytest_logger_config(logger_config):

#     logger_config.add_loggers(['foo', 'bar', 'baz'], stdout_level='debug')
#     logger_config.set_log_option_default('foo,bar')

# def pytest_logger_logdirlink(config):
# 	print("1")
# 	path = os.path.dirname(os.getcwd()) + '/Logs/'
# 	foldername = datetime.now().strftime("%Y%m%d-%H%M%S")
# 	logpath = path+foldername
# 	try:
# 		# return os.mkdir(logpath)
# 		return os.path.join(path, foldername)
# 		# return logpath
# 	except OSError as e:
# 		print("Creation of the directory failed")
# 		print(traceback.format_exc())
# 	else:
# 		print("Successfully created the directory")

# return os.path.join(os.path.dirname(__file__), 'mylogs')

# @pytest.yield_fixture(scope='session')
# def session_thing():
#     foo.debug('constructing session thing')
#     yield
#     foo.debug('destroying session thing')

# @pytest.yield_fixture
# def testcase_thing():
#     foo.debug('constructing testcase thing')
#     yield
#     foo.debug('destroying testcase thing')

# @pytest.fixture(scope="class")
# def setup(request):
#     print("initiating chrome driver")
#     driver = Browser().getbrowser("chrome") #if not added in PATH
#     url = URL()
#     utility = Utility()

#     # driver.maximize_window()
#     request.cls.d = driver
#     request.cls.u = utility
#     request.cls.url1 = url
#     yield
#     driver.close()

# import pytest
# from selenium import webdriver

# @pytest.fixture(scope="class")
# def setup(request):
#     print("initiating chrome driver")
#     driver = Browser().getbrowser("chrome") #if not added in PATH
#     url = URL()
#     utility = Utility()
#     # driver.maximize_window()
#     request.cls.d = driver
#     request.cls.u = utility
#     request.cls.url1 = url

#     yield driver
#     driver.close()

# @pytest.fixture(scope='session')
# def config():
# 	with open('WV_00_Config.json') as config_file:
# 		data = json.load(config_file)
# 		for r in data['Enabled']:
# 			print (r[b])
# 	return data
Example #11
0
from URL import URL
from DB import DB
from bs4 import BeautifulSoup

db = DB('citeseerx.db')

count = 0
while db.count_unpr():
    # url = URL('http://citeseerx.ist.psu.edu/viewdoc/summary?cid=4320')
    count = count + 1
    url = db.get_unpr()
    print url
    url = URL(url)
    url.open()
    db.update_link(url.get_doi(), 2)

    if (not db.exists('link', url.get_doi()) and url.redirect_occured()):
        db.insert('link', {
            'doi': url.get_doi(),
            'url': url.get_redirect_url()
        })

    if (not db.exists('metadata', url.get_doi())):
        html = url.fetch()
        # extract abstract
        soup = BeautifulSoup(html, "html.parser")
        title = soup.find('h2').findAll(text=True)[0]
        abstract_div = soup.find("div", {"id": "abstract"})
        for tag in abstract_div:
            if tag.name == 'p':
                abstract = tag.findAll(text=True)
Example #12
0
 def __init__(self, url1, url2):
     self.ut1 = URL(url1)
     self.ut2 = URL(url2)
     self.params = self._allparams()
Example #13
0
        if self.ut1.getBaseUrl() != self.ut2.getBaseUrl():
            add("baseUrls are different")
        p1 = self.ut1.getParamMap()
        p2 = self.ut2.getParamMap()

        for p in self._allparams():
            if not p1.has_key(p):
                add("'%s' is not defined in 1" % p)
            elif not p2.has_key(p):
                add("'%s' is not defined in 2" % p)
            elif p1[p] != p2[p]:
                add("different values for '%s'" % p)
                add("\t1 - %s\n\t2 - %s" % (p1[p], p2[p]))
        if not msg:
            return "no diff"
        else:
            return '\n'.join(msg)


def cmpUrls(url1, url2):
    ct = UrlComparator(url1, url2)
    ct.ut1.report("URL 1")
    ct.ut2.report("URL 2")
    print "\nDiff:"
    print ct.diff()


if __name__ == "__main__":
    ut = URL(ssDoc)
    print ut.getTuple()