Exemple #1
0
def get_html_str(url):
    res_obj=request.urlopen(url)
    html_byte=res_obj.read()
    if isinstance(html_byte,bytes):
        return html_byte.decode("utf-8")
    elif isinstance(html_byte,str):
        return html_byte
Exemple #2
0
    def update(self, rowid, new_values):
        url = self.endpoint_url + 'subscribers/' + self.list_id + '/subscribe.json?' + 'apikey=' + self.api_key
        raw_params = {
            "Name":
            new_values.get("Name"),
            "Email":
            new_values.get("Email"),
            "HasExternalDoubleOptIn":
            True,
            "CustomFields": [
                str(k) + "=" + str(new_values.get(k, None))
                for k in self.custom_fields
                if new_values.get(k, None) is not None
            ]
        }
        params = json.dumps(raw_params)
        log_to_postgres(params, DEBUG)
        headers = {
            "Content-Type": "application/json",
            "Accept": "application/json"
        }
        request = Request(url, data=params, headers=headers)
        response = urlopen(request)
        result = json.loads(response.read())

        log_to_postgres(result, DEBUG)

        if result["Code"] != 0:
            log_to_postgres("MoosendFDW: " + result["Error"], ERROR)
            return None

        return {c: self.col(c, result["Context"]) for c in self.columns}
Exemple #3
0
def get_page(reg_url):
    headers = {'User-Agent': 'Mozilla/5.0 (Window NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2228.0 Safari/537.3'}
    req = Request(url=reg_url, headers=headers)
    if req is not None :
        html = urlopen(req).read()
        if html is not None :
            page = BeautifulSoup(html, 'lxml')
    return (page)
Exemple #4
0
    def fetch_page(self, page_num):
        response = urlopen(self.endpoint_url + 'lists/' + self.list_id +
                           '/subscribers/Subscribed.json?' + 'apikey=' +
                           self.api_key + '&Page=' + str(page_num) +
                           '&PageSize=' + str(self.page_size))
        results = json.loads(response.read())

        log_to_postgres(results, DEBUG)
        if results["Code"] != 0:
            log_to_postgres("MoosendFDW: " + results["Error"], ERROR)
            return (None, None)

        return (results["Context"],
                results["Context"]["Paging"]["TotalPageCount"])
Exemple #5
0
    def delete(self, rowid):
        url = self.endpoint_url + "subscribers/" + self.list_id + "/remove.json?apikey=" + self.api_key
        raw_params = {"Email": rowid}
        params = json.dumps(raw_params)
        log_to_postgres(params, DEBUG)
        headers = {
            "Content-Type": "application/json",
            "Accept": "application/json"
        }
        request = Request(url, data=params, headers=headers)
        response = urlopen(request)
        results = json.loads(response.read())

        log_to_postgres(results, DEBUG)

        if results["Code"] != 0:
            log_to_postgres("MoosendFDW: " + results["Error"], ERROR)
Exemple #6
0
def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=True, html=False, meanings=True, **kwargs):
    """
    Query DuckDuckGo, returning a Results object.

    Here's a query that's unlikely to change:

    >>> result = query('1 + 1')
    >>> result.type
    'nothing'
    >>> result.answer.text
    '1 + 1 = 2'
    >>> result.answer.type
    'calc'

    Keword arguments:
    useragent: UserAgent to use while querying. Default: "python-duckduckgo %d" (str)
    safesearch: True for on, False for off. Default: True (bool)
    html: True to allow HTML in output. Default: False (bool)
    meanings: True to include disambiguations in results (bool)
    Any other keyword arguments are passed directly to DuckDuckGo as URL params.
    """ % __version__

    safesearch = '1' if safesearch else '-1'
    html = '0' if html else '1'
    meanings = '0' if meanings else '1'
    params = {
        'q': query,
        'o': 'json',
        'kp': safesearch,
        'no_redirect': '1',
        'no_html': html,
        'd': meanings,
        }
    params.update(kwargs)
    encparams = urlencode(params)
    url = 'http://api.duckduckgo.com/?' + encparams

    request = Request(url, headers={'User-Agent': useragent})
    response = urlopen(request)
    json = j.loads(response.read())
    response.close()

    return Results(json)
Exemple #7
0
def main():

    sortby = 1          # Magnitude = 0, Place = 1, Distance = 2

    # Using data feed from the USGS
    # quakeData = "http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson"
    # quakeData = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_week.geojson"
    quakeData = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_month.geojson"

    # Open the URL and read the data, call printResults() to format and output

    try:
        webUrl = urlopen(quakeData)
    except:
        print ("Error opening: {}".format(quakeData))
    else:
        if (webUrl.getcode() == 200):
            data = webUrl.read()
            start = timer()
            printResults(data, sortby)
            print ("Processed data in {:2.3f} seconds".format(timer() - start))
        else:
            print ("Error from USGS server, cannot retrieve data " + str(webUrl.getcode()))
Exemple #8
0
    def __create_remote_webdriver_from_config(self, testname=None):
        '''
        Reads the config value for browser type.
        '''
        desired_capabilities = self._generate_desired_capabilities(testname)
        
        remote_url = self._config_reader.get(
            WebDriverFactory.REMOTE_URL_CONFIG)

        # Instantiate remote webdriver.
        driver = webdriver.Remote(
            desired_capabilities=desired_capabilities,
            command_executor=remote_url
        )

        # Log IP Address of node if configured, so it can be used to
        # troubleshoot issues if they occur.
        log_driver_props = \
            self._config_reader.get(
                WebDriverFactory.LOG_REMOTEDRIVER_PROPS, default_value=False
            ) in [True, "true", "TRUE", "True"]
        if "wd/hub" in remote_url and log_driver_props:
            try:
                grid_addr = remote_url[:remote_url.index("wd/hub")]
                info_request_response = urlopen(
                    grid_addr + "grid/api/testsession?session=" + driver.session_id, "", 5000)
                node_info = info_request_response.read()
                _wtflog.info(
                    u("RemoteWebdriver using node: ") + u(node_info).strip())
            except:
                # Unable to get IP Address of remote webdriver.
                # This happens with many 3rd party grid providers as they don't want you accessing info on nodes on
                # their internal network.
                pass

        return driver
Exemple #9
0
# -*- coding: utf-8 -*-
"""
Created on Wed Aug  8 11:36:39 2018

@author: SilverDoe
"""

try:
    from urllib2.request import urlopen  # python 2.7
except:
    print("urllib2 doesn't exist")
    from urllib.request import urlopen

my_url = "https://wccftech.com"

fileurl = urlopen(my_url)

fileurl.read()
Exemple #10
0
import numpy as np
from urllib2.request import urlopen
import matplotlib.pyplot as plt  # Visuals
import pandas as pd

np.set_printoptions(threshold=np.nan)

url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data'
names = [
    'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
    'exang', 'oldpeak', 'slope', 'ca', 'thal', 'heartdisease'
]
heartDisease = pd.read_csv(urlopen(url), names=names)
heartDisease.head()
print(heartDisease.head())

# Delete COST
del heartDisease['ca']
del heartDisease['slope']
del heartDisease['thal']
del heartDisease['oldpeak']

heartDisease = heartDisease.replace('?', np.nan)
heartDisease.dtypes
print(heartDisease.dtypes)
heartDisease.columns

from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator
model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'),
                       ('sex', 'trestbps'), ('sex', 'trestbps'),
Exemple #11
0
# From Web Scraping with Python with modifications
__author__ = 'phil'

from urllib2.request import urlopen
from bs4 import BeautifulSoup

html = urlopen("http://dowjones.com")
bsObj = BeautifulSoup(html)
for link in bsObj.findAll("a"):
    if 'href' in link.attrs:
        print(link.attrs['href'])
Exemple #12
0
from urllib2 import request, parse
url = "http://httpbin.org/get"
parms={ 'name1' : 'value1',  'name2' : 'value2' }
qrystr = parse.urlencode(parms)
u = request.urlopen(url + '?' + qrystr)
resp = u.read()
print(resp)