Example #1
0
def download_data(url):
    response = uopen(url)
    page_html = response.read()
    response.close()
    soup = BeautifulSoup(page_html, 'html.parser')
    containers = soup.findAll('div', {'class': 'a-section a-spacing-medium'})
    cnt = len(containers)
    for i in range(0, cnt):
        print(i + 1, '--------')
        try:
            print(containers[i].find(
                'span', {
                    'class': 'a-size-medium a-color-base a-text-normal'
                }).text)
        except AttributeError:
            print("", end='')
        try:
            print("Rs.", (containers[i].find('span', {
                'class': 'a-price-whole'
            }).text))
        except AttributeError:
            print("", end='')
        try:
            print('Rating:', (containers[i].find('span', {
                'class': 'a-icon-alt'
            }).text))
        except AttributeError:
            print("", end='')
Example #2
0
    def getScore(self, your_team):

        request = uopen("https://www.cricbuzz.com")
        html_code = request.read()
        request.close()
        soup_page = soup(html_code, "html.parser")

        matches = soup_page.findAll("div",
                                    {"class": "cb-col cb-col-25 cb-mtch-blk"})
        l = len(matches)
        c = c1 = 0
        for i in range(l):
            if (your_team in matches[i].a["title"]):
                team = matches[i].a["title"]
                str1 = matches[i].div.text
                c1 = 1
                for j in str1:
                    if j.isdigit():
                        msg = matches[i].div.text
                        c = 1
                        break
                if (c == 0):
                    msg = "Match not Started Yet!!!"
                    break
            if (c1 == 1):
                break

        if (c1 == 0):
            team = your_team
            msg = "Match Not Found!!!"
        app = Qt.QApplication(sys.argv)
        systemtray = Qt.QSystemTrayIcon(app)
        systemtray.show()
        systemtray.showMessage(team, msg)
Example #3
0
def getrangking(isbn):
    """
    目的:使用正则表达式来拉取和返回当前的排名
    步骤:根据 ISBN,创建与 Amazon 服务器通信的最终 URL,然后调用 urllib.request.urlopen 来打开这个地址.

    :param isbn: isbn 代码
    :return: 排名
    """
    # page = uopen('%s%s' %(AMAZN, isbn))
    url = '%s%s' % (AMZN, isbn)
    req = Request(
        url,
        headers={
            'Connection':
            'Keep-Alive',
            'Accept':
            'text/html, application/xhtml+xml, */*',
            'Accept-Language':
            'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'
        })
    page = uopen(req)
    data = page.read()
    page.close()
    return REGEX.findall(data.decode())[0]
Example #4
0
def create_overview_table_row(content_file): 
    global pattern_list
    pattern_path = str(content_file.path)
    pattern_name = pattern_path.replace("src/patterns/","")
    raw_yaml = pattern_dir + pattern_name
    try:
      contents = uopen(raw_yaml).read()
      yaml_content = yaml.round_trip_load(contents,preserve_quotes=True)
    except:
      print(raw_yaml+ " could not be loaded!")
      return {"pattern": pattern_name, "done": "ERROR", "issue": None, "contr": 0}
    
    pname = get_pattern_name(pattern_name)
    gh_paths[pname] = pattern_path
    pattern_list[pname] = yaml_content
    if "contributors" in yaml_content.keys(): 
        contributors = yaml_content["contributors"]
    else:
        contributors = []
    pattern_curators[pname] = list(contributors)
    orcid = get_orcid()
    if orcid in contributors:
        processed = "Yes"
    else:
        processed = "No"
    issue = get_issue(pattern_name)
    return {"pattern": pattern_name, "done": processed, "issue": issue, "contr": len(list(contributors))}
Example #5
0
def getRanking(isbn):
    setdefaulttimeout(10)  # 设置socket超时

    # print(f"{AMZN}{isbn}")
    page = uopen(f"{AMZN}{isbn}")
    data = page.read()
    page.close()
    return str(REGEX.findall(data)[0], "utf-8")  # 转Unicode字符串
Example #6
0
def getrangking(isbn):
    url = '%s%s' % (AMZN, isbn)
    req = Request(url, headers={
        'Connection': 'Keep-Alive',
        'Accept': 'text/html, application/xhtml+xml, */*',
        'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'
    })
    with uopen(req) as page:
        return REGEX.findall(page.read().decode())[0]
Example #7
0
def getRanking(isbn):
    try:
        page = uopen("%s%s" % (AMIZ, isbn))  # or str.format
        data = page.read()
        return REGEX.findall(data)[0]
    except:
        return None
    finally:
        if page in locals():
            page.close()
def get_ranking(isbn):
    url = '{0}{1}'.format(amazon, isbn)
    page = uopen(url)
    data = page.read()
    page.close()
    # print(type(data))
    # 将data转换为str
    data1 = data.decode('utf-8')

    result = regex.search(data1)
    # print(result)
    return result.group(1)
Example #9
0
def download(num):
    data = get_json(num)

    # Save JSON
    if sys.version_info[0] >= 3:
        json_file = open('{0}.json'.format(num), 'w', encoding='utf-8')
        json_file.write(str(data))
    else:
        json_file = open('%d.json' % num, 'w')
        json_file.write(str(data).encode('utf-8'))

    # Create HTML from template
    update_meta(data)
    meta_labels = {
        'num': 'Number:',
        'date': 'Published:',
        'news': 'News:',
        'link': 'Link:'
    }

    # Write HTML and image to file
    if sys.version_info[0] >= 3:
        file = open('{0}.html'.format(num), 'w', encoding='utf-8')
        file.write(TEMPLATES['head'].substitute(data))
        for i in filter((lambda i: data[i] or False), meta_labels.keys()):
            file.write(TEMPLATES['entry'].substitute({
                'label':
                meta_labels[i],
                'value':
                cgi.escape(str(data[i]), quote=True)
            }))
        file.write(TEMPLATES['tail'].substitute(data))
        file.close()
    else:
        file = open('%d.html' % num, 'w')
        file.write((TEMPLATES['head'].substitute(data)).encode('utf-8'))
        for i in filter((lambda i: data[i] or False), meta_labels.keys()):
            file.write((TEMPLATES['entry'].substitute({
                'label':
                meta_labels[i],
                'value':
                cgi.escape(str(data[i]), quote=True)
            })).encode('utf-8'))
        file.write((TEMPLATES['tail'].substitute(data)).encode('utf-8'))
        file.close()

    image = uopen(data['img'])
    try:
        img = open('{0}.png'.format(num), 'wb')
    except AttributeError:
        img = open('%d.png' % num, 'wb')
    img.write(image.read())
Example #10
0
    def getFoodInfo(url):

        try:
            newurl = FHandler.mobile_url + url
            tempdata = uopen(newurl).read()
            #tempdata = open('test_foodinfo.html','r').read()

            bsobj = bs(tempdata, 'html.parser')
            name = bsobj.body.find('div', attrs={
                'class': 'page-title'
            }).text.strip(' \\r\\t\\n').lower()

        except URLError:
            RESULT(" stopped, no connection?")
            FHandler.__debugDump(tempdata,
                                 '/tmp/debug_foodinfo.no_connection.html')
            exit(-1)

        except AttributeError:
            RESULT(" stopped, page is being redirected?")
            FHandler.__debugDump(tempdata,
                                 '/tmp/debug_foodinfo.redirected.html')
            exit(-1)

        food_tagline = bsobj.find('div', attrs={
            'class': 'page-info-text'
        }).text.strip(' \\r\\t\\n').lower()
        food_table = bsobj.find('div', attrs={'class': 'nutpanel'})

        food_info = FHandler.handleFoodInfo(food_table)

        name = ' '.join(name.split())

        yem = Yemek(name, food_info[0], food_info[1], food_info[2],
                    food_info[3], food_info[4], food_info[5])

        #import pdb; pdb.set_trace()
        section_titles = bsobj.body.find_all('div', {'class': 'section-title'})
        portion_data = [
            x for x in section_titles if x.text == "Common serving sizes"
        ]

        if len(portion_data) > 0:
            portion_data = portion_data[0]
            portion_info = FHandler.handlePortionData(
                portion_data.findNext('table'))

            if portion_info != -1:
                for key, val in portion_info:
                    yem.portions.insert(key, val)

        return yem
Example #11
0
def score():
    output_score = []
    my_url2 = uopen('https://www.matchendirect.fr/espagne')
    html_p2 = my_url2.read()
    my_url2.close()
    soup2 = BeautifulSoup(html_p2, "html.parser")
    container_score = soup2.findAll("div", {"id": "livescore"})
    for con in container_score:
        team_con = con.findAll("td", {"class": "lm3"})
        for x in team_con:
            score_c = x.findAll("span", {"class": "lm3_score"})
            score = score_c[0].text
            output_score.append(score)
    return output_score
Example #12
0
def horaire():
    output_horaire = []
    my_url3 = uopen('https://www.matchendirect.fr/espagne')
    html_p3 = my_url3.read()
    my_url3.close()
    soup3 = BeautifulSoup(html_p3, "html.parser")
    container_statut = soup3.findAll("div", {"id": "livescore"})
    for cont in container_statut:
        team_cont = cont.findAll("td", {"class": "lm2 lm2_0"})
        for x in team_cont:
            horaire = x.next_element.text.strip("-- :&nbsp--")
            output_horaire.append(horaire)
#            print(output_horaire)
    return output_horaire
Example #13
0
def scrap():
    if request.method == "POST":
        searchString = request.form['content'].replace(" ", "")
        noc = int(request.form['numOfComments'])
        try:
            flipkart_search_url = r"https://www.flipkart.com/search?q=" + searchString
            page = uopen(flipkart_search_url)
            page_content = page.read()
            page.close()
            page_html = bs(page_content, "html.parser")
            boxes = page_html.findAll("div", {"class": "bhgxx2 col-12-12"})
            boxes = boxes[2:]
            box = boxes[0]
            product_url = r"https://www.flipkart.com" + box.div.div.div.a[
                'href']
            product = uopen(product_url)
            product_content = product.read()
            product.close()
            product_html = bs(product_content, "html.parser")
            reviews = product_html.findAll("div", {"class": "_3nrCtb"})
            reviews_df = []
            for i in range(noc):
                Heading = reviews[i].div.div.div.p.text
                comm = reviews[i].findAll("div", {"class": ""})
                Content = comm[0].div.text
                reviews_df.append([Heading, Content])

            reviews_df = pd.DataFrame(reviews_df,
                                      columns=["Heading", "Content"])
            reviews_df = reviews_df.to_dict("records")
            print(reviews_df)

            return (render_template("result.html", reviews_df=reviews_df))
        except Exception as e:
            return (str(e))
    else:
        return "not post"
Example #14
0
def statut():
    output_statut = []
    my_url3 = uopen('https://www.matchendirect.fr/espagne')
    html_p3 = my_url3.read()
    my_url3.close()
    soup3 = BeautifulSoup(html_p3, "html.parser")
    container_statut = soup3.findAll("div", {"id": "livescore"})
    for cont in container_statut:
        team_cont = cont.findAll("td", {"class": "lm2 lm2_0"})
        for x in team_cont:

            statut_c = x.span.decompose()
            statut = x.text
            output_statut.append(statut)
    return output_statut
Example #15
0
def get_json(num):
    url = get_url(num)

    # Download JSON, retrying in case of an error
    while True:
        try:
            comic = uopen(url).read().decode()
            break
        except:
            raise

    # A crutch for a comic json with apparently an error in it
    if num == 971:
        comic = comic.replace("\u00e2\u0080\u0099", "'")

    # Open JSON file
    return json.loads(comic)
Example #16
0
def download_data(url):
    response = uopen(url)
    page_html = response.read()
    response.close()
    soup = BeautifulSoup(page_html, 'html.parser')
    containers = soup.findAll("div", {'class': "_13oc-S"})
    cnt = len(containers)
    for i in range(0, cnt):
        print(i + 1, '--------')
        try:
            print("Name:", containers[i].find("a", {'class': 's1Q9rs'}).text)
        except AttributeError:
            print("", end='')
        try:
            print("Name:", containers[i].find("div", {
                'class': '_4rR01T'
            }).text)
        except AttributeError:
            print("", end='')
        try:
            print("It's price is ", containers[i].find("div", {
                'class': '_30jeq3'
            }).text)
        except AttributeError:
            print("", end='')
        try:
            print("It's price is ",
                  containers[i].find("div", {
                      'class': '_30jeq3 _1_WHN1'
                  }).text)
        except AttributeError:
            print("", end='')
        try:
            print("Some description:",
                  containers[i].find("li", {
                      'class': 'rgWa7D'
                  }).text)
        except AttributeError:
            print("", end='')
        try:
            print("Some description:",
                  containers[i].find("div", {
                      'class': '_3Djpdu'
                  }).text)
        except AttributeError:
            print("")
Example #17
0
def resMatch():
    output = []
    my_url = uopen('https://www.matchendirect.fr/espagne')
    html_p = my_url.read()
    my_url.close()
    soup = BeautifulSoup(html_p, "html.parser")
    container = soup.findAll("div", {"id": "livescore"})
    for c in container:
        team_c = c.findAll("td", {"class": "lm3"})
        for x in team_c:
            team = x.a["title"]
            regex_team = re.findall(r'[^Détail du match \:].*', team)
            score_c = x.findAll("span", {"class": "lm3_score"})
            score = score_c[0].text
            output.append("".join(regex_team))

    return output
Example #18
0
def get_json(num):
    url = get_url(num)

    # Download JSON, retrying in case of an error
    while True:
        try:
            comic = uopen(url).read().decode()
            break
        except:
            raise

    # A crutch for a comic json with apparently an error in it
    if num == 971:
        comic = comic.replace("\u00e2\u0080\u0099", "'")

    # Open JSON file
    return json.loads(comic)
Example #19
0
def download(num):
    data = get_json(num)

    # Save JSON
    if sys.version_info[0] >= 3:
        json_file = open("{0}.json".format(num), "w", encoding="utf-8")
        json_file.write(str(data))
    else:
        json_file = open("%d.json" % num, "w")
        json_file.write(str(data).encode("utf-8"))

    # Create HTML from template
    update_meta(data)
    meta_labels = {"num": "Number:", "date": "Published:", "news": "News:", "link": "Link:"}

    # Write HTML and image to file
    if sys.version_info[0] >= 3:
        file = open("{0}.html".format(num), "w", encoding="utf-8")
        file.write(TEMPLATES["head"].substitute(data))
        for i in filter((lambda i: data[i] or False), meta_labels.keys()):
            file.write(
                TEMPLATES["entry"].substitute({"label": meta_labels[i], "value": cgi.escape(str(data[i]), quote=True)})
            )
        file.write(TEMPLATES["tail"].substitute(data))
        file.close()
    else:
        file = open("%d.html" % num, "w")
        file.write((TEMPLATES["head"].substitute(data)).encode("utf-8"))
        for i in filter((lambda i: data[i] or False), meta_labels.keys()):
            file.write(
                (
                    TEMPLATES["entry"].substitute(
                        {"label": meta_labels[i], "value": cgi.escape(str(data[i]), quote=True)}
                    )
                ).encode("utf-8")
            )
        file.write((TEMPLATES["tail"].substitute(data)).encode("utf-8"))
        file.close()

    image = uopen(data["img"])
    try:
        img = open("{0}.png".format(num), "wb")
    except AttributeError:
        img = open("%d.png" % num, "wb")
    img.write(image.read())
Example #20
0
def get_ranking(isbn):
    url = '%s%s' % (AMZN, isbn)
    # headers = {'User-Agent': user_agent}
    # req = Request(url, headers=headers)
    print('request url:', url)
    try:
        # python3 需要加上context,否则报错<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED]
        # certificate verify failed (_ssl.c:748)>
        page = uopen(url, timeout=30, context=context)  # or use str.format()
        print('Open', url, 'success')
        data = page.read()
        # python3,必须要加这句,否则会报错:TypeError: cannot use a string pattern on a bytes-like object
        data = data.decode('utf-8')
        page.close()
        result = REGEX.findall(data)
        return result[0]
    except Exception as e:
        print(str(e))
    return 'unknow'
Example #21
0
    def __init__(self, query, foodobj=0):
        INFO("\r\tChecking online...", end=' ')
        self.query = HTMLMethods.toHTMLChars(query)

        try:
            self.pagedata = uopen(FHandler.query_url + self.query).read()
        except URLError:
            print(" stopped, no connection?")
            exit(-1)

#		# offline saved
#		print(self.pagedata)
#		exit(0)
#		self.pagedata = open("test_sub.html").read()

        self.results = self.ParseResults()
        if foodobj == 0:
            self.found = self.resHandler()
        else:
            #Check current food obj against results list
            self.found = self.checkFoodHomology(foodobj)
Example #22
0
def download(num):
    data = get_json(num)

    # Save JSON
    if sys.version_info[0] >= 3:
        json_file = open('{0}.json'.format(num), 'w', encoding='utf-8')
        json_file.write(str(data))
    else:
        json_file = open('%d.json' % num, 'w')
        json_file.write(str(data).encode('utf-8'))

    # Create HTML from template
    update_meta(data)
    meta_labels = {'num': 'Number:', 'date': 'Published:', 'news': 'News:',
                'link': 'Link:'}

    # Write HTML and image to file
    if sys.version_info[0] >= 3:
        file = open('{0}.html'.format(num), 'w', encoding='utf-8')
        file.write(TEMPLATES['head'].substitute(data))
        for i in filter((lambda i: data[i] or False), meta_labels.keys()):
            file.write(TEMPLATES['entry'].substitute({'label': meta_labels[i],
                'value': cgi.escape(str(data[i]), quote=True)}))
        file.write(TEMPLATES['tail'].substitute(data))
        file.close()
    else:
        file = open('%d.html' % num, 'w')
        file.write((TEMPLATES['head'].substitute(data)).encode('utf-8'))
        for i in filter((lambda i: data[i] or False), meta_labels.keys()):
            file.write((TEMPLATES['entry'].substitute({'label': meta_labels[i],
                'value': cgi.escape(str(data[i]), quote=True)})).encode('utf-8'))
        file.write((TEMPLATES['tail'].substitute(data)).encode('utf-8'))
        file.close()

    image = uopen(data['img'])
    try:
        img = open('{0}.png'.format(num), 'wb')
    except AttributeError:
        img = open('%d.png' % num, 'wb')
    img.write(image.read())
Example #23
0
def scrapeSyl(link=""):
    uclient = uopen(link)
    read_html = uclient.read()
    uclient.close()
    ssoup = soup(read_html, "html.parser")
    table = ssoup.find("table", {"class": "sc_courselist"})

    headers_soup = table.find_all("span",
                                  class_="courselistcomment areaheader")
    headers = []
    for i in headers_soup:
        i = cleanScrape(i)
        if i != "":
            headers.append(i)
    # print(headers)

    slots = []
    code_soup = table.find_all("a", class_="bubblelink code")
    codes = []
    for c in code_soup:
        n = c.findParent(class_="codecol").findNextSibling("td")
        h = n.findNextSibling(class_="hourscol")
        c = cleanScrape(c)
        n = cleanScrape(n)
        if h != "" and h != None:
            h = cleanScrape(h)
        if h == None or h == "":
            h = "N/A"
        if c != "":
            c = c.replace(u"\xa0", u" ")
        # debug print cnh
        # print(f"{c}: {n} - {h}")

        newSlot = infoSlot(c, n, h)
        slots.append(newSlot)
    # print(slots)

    return headers, slots
Example #24
0
from urllib.parse import urlparse as upar
from urllib.request import urlopen as uopen
from bs4 import BeautifulSoup as soup

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

# using Amazon with keyword of laptop
my_url = "https://www.amazon.com/s/ref=nb_sb_noss_1?url=search-alias%3Daps&field-keywords=laptop"

try:
    #my_url = "https://www.newegg.com/Product/ProductList.aspx?Submit=ENE&DEPA=0&Order=BESTMATCH&Description=graphic+card&ignorear=0&N=-1&isNodeId=1"
    # opening up connection
    uClient = uopen(my_url, context=ctx)
    page_html = uClient.read()
    if page_html.getcode() != 200:
        print("Error on page", page_html.getcode())
    uClient.close()
except:
    print("Unable to retrieve or parse page")
    #continue

#html parsing
page_soup = soup(page_html, "html.parser")

#graps each product: s-item-container is for Amazon,
# while for newegg, it will be item-container
#containers = page_soup.findAll("li",{"class":"s-result-item celwidget"})
containers = page_soup.findAll("div", {"class": "s-item-container"})
Example #25
0
def getRanking(isbn):
    page = uopen('%s%s' % (AMZN, isbn))
    data = page.read()
    page.close()
    return str(REGEX.findall(data)[0], 'utf-8')
Example #26
0
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as uopen

my_url = 'https://www.footlocker.co.uk/en/men/_air-max-95?shoesize=7'

client = uopen(my_url)
page = client.read()
client.close()

page_soup = soup(page, "html.parser")

grab_crep = page_soup.findAll("div",
                              {"class": "fl-category--productlist--item"})

print("")

for crp in grab_crep:

    print(crp.a.span.string.replace(" - Men Shoes", ""))
    print(crp.a.get('href'))
    print("")
    print("")
Example #27
0
def getRanking(isbn):
    page = uopen('%s%s' % (AMZN, isbn)) # '{0}{1}'.format(AMZN, isbn)) for 2.6+
    data = page.read()
    page.close()
    # print(data)
    return str(REGEX.findall(data)[0], 'utf-8')
from urllib.request import urlopen as uopen
from bs4 import BeautifulSoup as soup
import time

graphicurl = "https://www.newegg.com/p/pl?N=100007709%20600030348&page=1"
cpuurl = "https://www.newegg.com/Processors-Desktops/SubCategory/ID-343?Tid=7671"

#Opening GPU site, reading contents, closing site
graphic = uopen(graphicurl)
graphicread = graphic.read()
graphic.close()

#Parsing site to HTML
graphic_soup = soup(graphicread, "html.parser")

#Grabs Product
gcontainers = graphic_soup.findAll("div", {"class": "item-container"})
gcontainer = gcontainers[0]

#Names and Opens File
Gfilename = "GPU Products.csv"
gf = open(Gfilename, "w")

headers = "Brand, Product_name, Price, Shipping_Fee\n"

gf.write(headers)

for gcontainer in gcontainers:
    gbrand = gcontainer.div.div.a.img["title"]
    
    gtitle = gcontainer.findAll("a", {"class":"item-title"})
Example #29
0
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as uopen
from urllib.request import Request as rq
import csv


with open('test','w') as new_file:
    test_writer = csv.writer(new_file,delimiter = ",")

    my_url = 'https://www.amazon.com/'
    req = rq(my_url, headers = {'User-Agent':'Mozilla/5.0'})
    uClient = uopen(req)
    page_html = uClient.read()
    uClient.close()
    page_soup = bs(page_html, "html.parser")
    containers = page_soup.findAll('div')
    print(containers)


def get_ranking(is_bn):
    page = uopen('%s%s' % (AMZN, is_bn))
    data = page.read()
    page.close()
    return REGEX.findall(data)[0]
import bs4
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as uopen

url = "https://www.newegg.com/Video-Cards-Video-Devices/Category/ID-38?Tpk=graphics%20card"
client = uopen(url)
page_html = client.read()
client.close()
page = soup(page_html, "html.parser")

containers = page.findAll("div", {"class": "item-container"})
len(containers)
container = containers[0]
print(container)

container.div.div.a.img["title"]

container.a.img["title"]

#prices=page.findAll("li",{"class":"price-ship"})
#price=prices[0]
shipping = container.findAll("li", {"class": "price-ship"})
shipping[0].text
#for removing initial and final /r\r\n
shipping[0].text.strip()

#forming loop
for container in containers:
    brand = container.div.div.a.img["title"]
    brand_info = container.a.img["title"]
    shipping = container.findAll("li", {"class": "price-ship"})
def getRanking(isbn):
    page = uopen('%s%s' % (AMZN, isbn)) # '{0}{1}'.format(AMZN, isbn)) for 2.6+
    data = page.read()
    page.close()
    return str(REGEX.findall(data)[0], 'utf-8')
Example #33
0
def getRanking(isbn):
    req = request.Request('%s%s' % (AMZN, isbn), {}, head)
    with uopen(req) as page:
        return str(REGEX.findall(page.read().decode('utf-8'))[0])
Example #34
0
def getRanking(isbn):
    page = uopen('{0}{1}'.format(AMZN, isbn))
    data = page.read()
    page.close()
    return REGEX.findall(data)[0]
Example #35
0
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as uopen

my_url = 'https://www.medicalnewstoday.com/articles/321533.php'
# data taken from

# Opening URl and grabbing content
Page = uopen(my_url)
page_html = Page.read()
Page.close()

# HTML parsing
page_soup = soup(page_html,"html.parser")#parses html data


body = page_soup.find("div",{"class":"article_body"})#finds all div tags with class = article_body
f = open("Could gut bacteria cause joint pain?.txt", "w") #opens a file and writes content to it

f.write(body.text)


f.close()  # Close the file
Example #36
0
def getRanking(isbn):
    with uopen('{0}{1}'.format(AMZN, isbn)) as page:
        return str(REGEX.findall(page.read())[0], 'utf-8')
Example #37
0
def getRanking(isbn):
	with uopen('{0}{1}'.format(AMZN, isbn)) as page:
		return str(REGEX.findall(page.read())[0], 'utf-8')