Python withAttributeの例、pyparsing.withAttribute Pythonの例

コード例 #1

0

ファイルを表示

ファイル: daum.py プロジェクト: byunghyunpark/popcorn-backend

def video_search(request):
    video_response = requests.get(request)
    bs_videos = BeautifulSoup(video_response.text, 'html.parser')
    meta, metaEng = makeHTMLTags("meta")
    img_meta = meta.copy().setParseAction(
        withAttribute(('property', 'og:image')))
    for img in img_meta.searchString(bs_videos):
        content = img.content
        video_trailer_id = content.split("/")[-2]
        video_trailer_url = "http://videofarm.daum.net/controller/video/viewer/Video.html?vid={}&play_loc=daum_movie&autoplay=true".format(
            video_trailer_id)
    return video_trailer_url

コード例 #2

0

ファイルを表示

def has_ssl_disabled(apphostconf_dest: str, exclude: list = None) -> bool:
    """
    Check if SSL is disabled in ``ApplicationHost.config``.

    Search for access tag in security section in an ``ApplicationHost.config``
    source file or package.

    :param apphostconf_dest: Path to an ``ApplicationHost.config``
                             source file or package.
    :param exclude: Paths that contains any string from this list are ignored.
    """
    tk_tag_s, _ = makeXMLTags('security')
    tk_access, _ = makeXMLTags('access')
    tag_no_comm = tk_access.ignore(htmlComment)
    tk_access_none = copy(tag_no_comm)
    tk_access_none.setParseAction(withAttribute(sslFlags='None'))
    result = False
    try:
        sec_tag = lang.check_grammar(tk_tag_s, apphostconf_dest,
                                     LANGUAGE_SPECS, exclude)
        if not sec_tag:
            show_unknown('Not files matched',
                         details=dict(code_dest=apphostconf_dest))
            return False
    except FileNotFoundError:
        show_unknown('File does not exist',
                     details=dict(code_dest=apphostconf_dest))
        return False
    access_tags = {}
    none_sslflags = {}
    for code_file, val in sec_tag.items():
        access_tags.update(
            lang.block_contains_grammar(tk_access, code_file, val['lines'],
                                        _get_block))

        none_sslflags.update(
            lang.block_contains_grammar(tk_access_none, code_file,
                                        val['lines'], _get_block))
    if not access_tags or none_sslflags:
        show_open('SSL is disabled',
                  details=dict(
                      matched=access_tags if access_tags else none_sslflags))
        result = True
    else:
        show_close('SSL is enabled',
                   details=dict(file=apphostconf_dest,
                                fingerprint=get_sha256(apphostconf_dest)))
    return result

コード例 #3

0

ファイルを表示

def is_header_x_powered_by_present(webconf_dest: str,
                                   exclude: list = None) -> bool:
    """
    Search for X-Powered-By headers in a Web.config source file or package.

    :param webconf_dest: Path to a Web.config source file or package.
    :param exclude: Paths that contains any string from this list are ignored.
    """
    tk_tag_s, _ = makeXMLTags('customHeaders')
    tk_add_tag, _ = makeXMLTags('add')
    tk_clear_tag, _ = makeXMLTags('clear')
    tk_remove_tag, _ = makeXMLTags('remove')
    tk_remove_tag.setParseAction(withAttribute(name='X-Powered-By'))
    tk_child_tag = MatchFirst(
        [Suppress(tk_add_tag),
         Suppress(tk_clear_tag), tk_remove_tag])
    result = False
    try:
        custom_headers = lang.check_grammar(tk_tag_s, webconf_dest,
                                            LANGUAGE_SPECS, exclude)
        if not custom_headers:
            show_unknown('Not files matched',
                         details=dict(code_dest=webconf_dest))
            return False
    except FileNotFoundError:
        show_unknown('File does not exist',
                     details=dict(code_dest=webconf_dest))
        return False

    tk_rem = Suppress(tk_tag_s) + OneOrMore(tk_child_tag)

    vulns = {}
    for code_file, val in custom_headers.items():
        vulns.update(
            lang.block_contains_empty_grammar(tk_rem, code_file, val['lines'],
                                              _get_block))
    if vulns:
        show_open('Header "X-Powered-By" is present',
                  details=dict(matched=vulns, total_lines=len(custom_headers)))
        result = True
    else:
        show_close('Header "X-Powered-By" is not present',
                   details=dict(file=webconf_dest,
                                fingerprint=get_sha256(webconf_dest)))
    return result

コード例 #4

0

ファイルを表示

def has_debug_enabled(webconf_dest: str, exclude: list = None) -> bool:
    """
    Check if debug flag is enabled in Web.config.

    Search for debug tag in compilation section in a Web.config source file
    or package.

    :param webconf_dest: Path to a Web.config source file or package.
    :param exclude: Paths that contains any string from this list are ignored.
    """
    tk_tag_s, _ = makeXMLTags('system.web')
    tk_compilation, _ = makeXMLTags('compilation')
    tag_no_comm = tk_compilation.ignore(htmlComment)
    tk_comp_debug = copy(tag_no_comm)
    tk_comp_debug.setParseAction(withAttribute(debug='true'))
    result = False
    try:
        sysweb_tag = lang.check_grammar(tk_tag_s, webconf_dest, LANGUAGE_SPECS,
                                        exclude)
        if not sysweb_tag:
            show_unknown('Not files matched',
                         details=dict(code_dest=webconf_dest))
            return False
    except FileNotFoundError:
        show_unknown('File does not exist',
                     details=dict(code_dest=webconf_dest))
        return False

    debug_tags = {}
    for code_file, val in sysweb_tag.items():
        debug_tags.update(
            lang.block_contains_grammar(tk_comp_debug, code_file, val['lines'],
                                        _get_block))
    if debug_tags:
        show_open('Debug is enabled',
                  details=dict(matched=debug_tags,
                               total_lines=len(sysweb_tag)))
        result = True
    else:
        show_close('Debug is disabled',
                   details=dict(file=webconf_dest,
                                fingerprint=get_sha256(webconf_dest)))
    return result

コード例 #5

0

ファイルを表示

def not_custom_errors(webconf_dest: str, exclude: list = None) -> bool:
    """
    Check if customErrors flag is set to off in Web.config.

    CWE-12: ASP.NET Misconfiguration: Missing Custom Error Page

    :param webconf_dest: Path to a Web.config source file or package.
    :param exclude: Paths that contains any string from this list are ignored.
    """
    tk_tag_s, _ = makeXMLTags('system.web')
    tk_custom_errors, _ = makeXMLTags('customErrors')
    tag_no_comm = tk_custom_errors.ignore(htmlComment)
    tk_comp_custom_errors = copy(tag_no_comm)
    tk_comp_custom_errors.setParseAction(withAttribute(mode='Off'))
    result = False
    try:
        sysweb_tag = lang.check_grammar(tk_tag_s, webconf_dest, LANGUAGE_SPECS,
                                        exclude)
        if not sysweb_tag:
            show_unknown('Not files matched',
                         details=dict(code_dest=webconf_dest))
            return False
    except FileNotFoundError:
        show_unknown('File does not exist',
                     details=dict(code_dest=webconf_dest))
        return False

    vulns = {}
    for code_file, val in sysweb_tag.items():
        vulns.update(
            lang.block_contains_grammar(tk_comp_custom_errors, code_file,
                                        val['lines'], _get_block))
    if vulns:
        show_open('Custom errors are not enabled',
                  details=dict(matches=vulns, total_lines=len(sysweb_tag)))
        result = True
    else:
        show_close('Custom errors are enabled',
                   details=dict(file=webconf_dest,
                                fingerprint=get_sha256(webconf_dest)))
    return result

コード例 #6

0

ファイルを表示

ファイル: parseBOMdata.py プロジェクト: jimjshields/python_learning

import urllib

year = '2014'

conn = urllib.urlopen('http://www.boxofficemojo.com/yearly/chart/?yr=' + year + '&p=.htm')

""" looking for this recurring pattern:
          <td valign="top" tdalign="center">00-03</td>
          <td valign="top">.50</td>
          <td valign="top">.50</td>

    and want a dict with keys 0, 1, 2, and 3 all with values (.50,.50)
"""

td,tdend = makeHTMLTags("td")
keytd = td.copy().setParseAction(withAttribute(tdalign="center"))
td,tdend,keytd = map(Suppress,(td,tdend,keytd))

# realnum = Regex(r'1?\.\d+').setParseAction(lambda t:float(t[0]))
# integer = Regex(r'\d{1,3}').setParseAction(lambda t:int(t[0]))
DASH = Suppress('-')

# build up an expression matching the HTML bits above
entryExpr = (keytd + tdend + 
                    Group(2*(td + tdend))("vals"))
                    
# search the input HTML for matches to the entryExpr expression, and build up lookup dict
lookup = {}
for entry in entryExpr.searchString(conn):
    for i in range(entry.start, entry.end+1):
        lookup[i] = tuple(entry.vals)

コード例 #7

0

ファイルを表示

ファイル: style_guide_codmark.py プロジェクト: zroehr/openguide

import argparse
import jsonpickle
import json
import markdown
import re

from pygments import highlight
from pygments.lexers import guess_lexer
from pygments.formatters import HtmlFormatter

from pyparsing import makeHTMLTags, replaceWith, withAttribute

cod = "../frontend/node_modules/.bin/cod"

spanOpen, spanClose = makeHTMLTags("span")
emptySpans = spanOpen.copy().setParseAction(withAttribute(empty=True))
removeSpans = emptySpans | spanOpen + spanClose
removeSpans.setParseAction(replaceWith(" "))

extensions = ['.less', '.css', '.sass', '.scss']
markup_blocks = {}
formatter = HtmlFormatter(cssclass='source-highlight')


def highlight_source(source):
    if not source: return ''
    lexer = guess_lexer(source)
    return highlight(source, lexer, formatter)


def add_markup_block(block):

コード例 #8

0

ファイルを表示

ファイル: Yelp_Scrape.py プロジェクト: edgedatascience/Web-Scraping-Scripts

def data_scrape(master_list_of_links):
    prefix = 'http://wwww.yelp.com'
    big_list = []
    for i in range(len(master_list_of_links)):
        time_between_big_links = randint(between_big_links_lower_bound, between_big_links_upper_bound)
        big_link = prefix + master_list_of_links[i]
        print big_link
        print "Scrape initiated"
        soup = link_opener(big_link)
        street = soup.find_all("span", itemprop="streetAddress")
        locality = soup.find_all("span", itemprop="addressLocality")
        state = soup.find_all("span", itemprop="addressRegion")
        zip_code = soup.find_all("span", itemprop="postalCode")
        phone = soup.find_all("span", class_="biz-phone")
        suffix = '?start='
        # review_count specifies how many search pages of reviews you will crawl through. This is set to go through at
        # most 320 reviews
        review_count = ['0', '40', '80', '120', '160', '200', '240', '280', '320']
        for j in review_count:
            time_between_review_pages = randint(between_review_pages_lower_bound, between_review_pages_upper_bound)
            print "processing..."
            new_link = big_link + suffix + j
            soup = link_opener(new_link)
            review_content = soup.find_all("div", class_="review-content")
            if not review_content:
                break
            meta_date = makeHTMLTags('meta')[0]
            meta_date.setParseAction(withAttribute(itemprop="datePublished"))
            meta_rating = makeHTMLTags('meta')[0]
            meta_rating.setParseAction(withAttribute(itemprop="ratingValue"))
            for k in review_content:
                indiv_list = [big_link]
                if not street:
                    indiv_list.append("Missing")
                else:
                    indiv_list.append(street[0].text)
                if not locality:
                    indiv_list.append("Missing")
                else:
                    indiv_list.append(locality[0].text)
                if not state:
                    indiv_list.append("DC")
                else:
                    indiv_list.append(state[0].text)
                if not zip_code:
                    indiv_list.append("Missing")
                else:
                    indiv_list.append(zip_code[0].text)
                if not phone:
                    indiv_list.append("Missing")
                else:
                    indiv_list.append(phone[0].text.strip())
                date = next(meta_date.scanString(k))[0]
                indiv_list.append(date.content)
                stars = next(meta_rating.scanString(k))[0]
                indiv_list.append(stars.content)
                indiv_list.append(k.p.text.encode("utf-8"))
                big_list.append(indiv_list)
            time.sleep(time_between_review_pages)
        print "Scrape complete!"
        time.sleep(time_between_big_links)
        print ""
    return big_list

コード例 #9

0

ファイルを表示

ファイル: parseBOMdata.py プロジェクト: jimjshields/python_learning

import urllib

year = '2014'

conn = urllib.urlopen('http://www.boxofficemojo.com/yearly/chart/?yr=' + year +
                      '&p=.htm')
""" looking for this recurring pattern:
          <td valign="top" tdalign="center">00-03</td>
          <td valign="top">.50</td>
          <td valign="top">.50</td>

    and want a dict with keys 0, 1, 2, and 3 all with values (.50,.50)
"""

td, tdend = makeHTMLTags("td")
keytd = td.copy().setParseAction(withAttribute(tdalign="center"))
td, tdend, keytd = map(Suppress, (td, tdend, keytd))

# realnum = Regex(r'1?\.\d+').setParseAction(lambda t:float(t[0]))
# integer = Regex(r'\d{1,3}').setParseAction(lambda t:int(t[0]))
DASH = Suppress('-')

# build up an expression matching the HTML bits above
entryExpr = (keytd + tdend + Group(2 * (td + tdend))("vals"))

# search the input HTML for matches to the entryExpr expression, and build up lookup dict
lookup = {}
for entry in entryExpr.searchString(conn):
    for i in range(entry.start, entry.end + 1):
        lookup[i] = tuple(entry.vals)

コード例 #10

0

ファイルを表示

ファイル: withAttribute.py プロジェクト: delavodix/pyparsing

#
#  withAttribute.py
#  Copyright, 2007 - Paul McGuire
#
#  Simple example of using withAttribute parse action helper
#  to define
#
import pyparsing as pp

data = """\
    <td align=right width=80><font size=2 face="New Times Roman,Times,Serif">&nbsp;49.950&nbsp;</font></td>
    <td align=left width=80><font size=2 face="New Times Roman,Times,Serif">&nbsp;50.950&nbsp;</font></td>
    <td align=right width=80><font size=2 face="New Times Roman,Times,Serif">&nbsp;51.950&nbsp;</font></td>
    """

td, tdEnd = pp.makeHTMLTags("TD")
font, fontEnd = pp.makeHTMLTags("FONT")
realNum = pp.pyparsing_common.real
NBSP = pp.Literal("&nbsp;")
patt = td + font + NBSP + realNum("value") + NBSP + fontEnd + tdEnd

# always use addParseAction when adding withAttribute as a parse action to a start tag
td.addParseAction(pp.withAttribute(align="right", width="80"))

for s in patt.searchString(data):
    print(s.value)

コード例 #11

0

ファイルを表示

def movie_search(keyword, daum_id=None):
    r = requests.get(
        "https://apis.daum.net/contents/movie?apikey={}&q={}&output=json".
        format(settings.DAUM_API_KEY, keyword))
    movie_search = r.json()
    movies_search = []
    num_of_movies = movie_search.get("channel").get("totalCount")

    for num in range(num_of_movies):
        img_url = movie_search.get("channel").get("item")[int(num)].get(
            "thumbnail")[0].get("content")
        # 이미지 사이즈 (S M L)
        image_split = img_url.rsplit('/', 5)
        index = 4
        replacement = ['R200x0.q99', 'R500x0.q99', 'R700x0.q99']
        movie_img_url = []
        for nums in range(3):
            image_split[index] = replacement[nums]
            movie_img_url.append('/'.join(image_split))

        title_link = movie_search.get("channel").get("item")[int(num)].get(
            "title")[0].get("link")
        daum_id = re.findall(r'\d+', title_link)
        title_kor = movie_search.get("channel").get("item")[int(num)].get(
            "title")[0].get("content")
        title_eng = movie_search.get("channel").get("item")[int(num)].get(
            "eng_title")[0].get("content")
        created_year = movie_search.get("channel").get("item")[int(num)].get(
            "year")[0].get("content")
        run_time = movie_search.get("channel").get("item")[int(num)].get(
            "open_info")[2].get("content")
        grade = movie_search.get("channel").get("item")[int(num)].get(
            "open_info")[1].get("content")
        synopsis = movie_search.get("channel").get("item")[int(num)].get(
            "story")[0].get("content")

        photo_list = []
        count = 1
        while True:
            try:
                photos = movie_search.get("channel").get("item")[int(num)].get(
                    "photo{}".format(count)).get("content")
                photo_list.append(photos)
                count += 1
            except:
                break

        resized_photo_url = []
        for image in photo_list:
            image_split = image.rsplit('/', 5)
            index = 4
            replacement = ['R200x0.q99', 'R500x0.q99', 'R700x0.q99']
            each_movie_photo_url = []
            for nums in range(3):
                image_split[index] = replacement[nums]
                each_movie_photo_url.append('/'.join(image_split))
            resized_photo_url.append(each_movie_photo_url)

        count = 0
        nation_list = []
        while True:
            try:
                nations = movie_search.get("channel").get("item")[int(
                    num)].get("nation")[count].get("content")
                nation_list.append(nations)
                count += 1
            except:
                break

        count = 0
        genre_list = []
        while True:
            try:
                genres = movie_search.get("channel").get("item")[int(num)].get(
                    "genre")[count].get("content")
                genre_list.append(genres)
                count += 1
            except:
                break

        director_info = []
        actor_info = []
        try:
            title_link = movie_search.get("channel").get("item")[int(num)].get(
                "title")[0].get("link")
            response = requests.get(title_link)
            bs = BeautifulSoup(response.text, "html.parser")
            count = 0

            while True:
                used_link = bs.select("ul.list_join li")[count]

                # 역할
                actor_role = used_link.select('span.txt_join')[0].text
                if "감독" in actor_role:
                    name_kor = used_link.select('em.emph_point')[0].text
                    name_kor_eng = used_link.select('strong.tit_join')[0].text
                    len_of_name_kor = len(name_kor) + 1
                    # 영문 이름
                    name_eng = name_kor_eng[len_of_name_kor:]
                    a_tag = used_link.findAll(
                        'a', attrs={'href': re.compile("/person/")})[0]
                    # 배우 아이디
                    actor_id = re.findall(r'\d+', a_tag['href'])
                    img_tag = used_link.select("img")[0]
                    # 배우 사진
                    profile_url = img_tag['src']

                    director_info.append({
                        'daum_id': actor_id,
                        'name_eng': name_eng,
                        'name_kor': name_kor,
                        'profile_url': profile_url
                    })
                    count += 1
                else:
                    name_kor = used_link.select('em.emph_point')[0].text
                    name_kor_eng = used_link.select('strong.tit_join')[0].text
                    len_of_name_kor = len(name_kor) + 1
                    # 영문 이름
                    name_eng = name_kor_eng[len_of_name_kor:]
                    a_tag = used_link.findAll(
                        'a', attrs={'href': re.compile("/person/")})[0]
                    # 배우 아이디
                    actor_id = re.findall(r'\d+', a_tag['href'])
                    img_tag = used_link.select("img")[0]
                    # 배우 사진
                    profile_url = img_tag['src']

                    actor_info.append({
                        'daum_id': actor_id,
                        'name_eng': name_eng,
                        'name_kor': name_kor,
                        'profile_url': profile_url,
                        'character_name': actor_role
                    })
                    count += 1
        except:
            pass

        video_list = []
        count = 0
        while True:
            try:
                videos = movie_search.get("channel").get("item")[int(num)].get(
                    "video")[count].get("link")
                if videos:
                    response_videos = requests.get(videos)
                    bs_videos = BeautifulSoup(response_videos.text,
                                              "html.parser")
                    meta, metaEnd = makeHTMLTags("meta")
                    img_meta = meta.copy().setParseAction(
                        withAttribute(('property', 'og:image')))
                    img_ref = img_meta
                    for img in img_ref.searchString(bs_videos):
                        content = img.content
                    video_trailer_id = content.split("/")[-2]
                    video_trailer_url = "http://videofarm.daum.net/controller/video/viewer/Video.html?vid={}&play_loc=daum_movie&autoplay=true".format(
                        video_trailer_id)
                    video_list.append(video_trailer_url)
                count += 1
            except:
                break

        trailer_link = movie_search.get("channel").get("item")[int(num)].get(
            "trailer")[0].get("link")
        if trailer_link:
            response = requests.get(trailer_link)
            bs = BeautifulSoup(response.text, "html.parser")
            meta, metaEnd = makeHTMLTags("meta")
            img_meta = meta.copy().setParseAction(
                withAttribute(('property', 'og:image')))
            img_ref = img_meta
            for img in img_ref.searchString(bs):
                content = img.content
            trailer_id = content.split("/")[-2]
            trailer_url = "http://videofarm.daum.net/controller/video/viewer/Video.html?vid={}&play_loc=daum_movie&autoplay=true".format(
                trailer_id)
        movies_search.append({
            'title_kor': title_kor,
            'title_eng': title_eng,
            'nation_list': nation_list,
            # 'created_year': created_year,
            'img_url': movie_img_url,
            'run_time': run_time,
            'grade': grade,
            'director_info': director_info,
            'actor_info': actor_info,
            'genre_list': genre_list,
            # 'synopsis': synopsis,
            # 'photo_list': photo_list,
            # 'video_list': video_list,
        })

        if daum_id:
            for genres in genre_list:
                try:
                    genre = Genre.objects.create(genre=genres, )
                except:
                    genre = Genre.objects.filter(genre=genres)

            try:
                grade = Grade.objects.create(grade=grade, )
            except:
                grade = Grade.objects.get(grade=grade)

            for nations in nation_list:
                try:
                    nation = MakingCountry.objects.create(
                        making_country=nations, )
                except:
                    pass
                    nation = MakingCountry.objects.filter(
                        making_country=nations)

            movie = Movie.objects.create(
                daum_id=daum_id[0],
                title_kor=title_kor,
                title_eng=title_eng,
                created_year=created_year,
                synopsis=synopsis,
                grade=grade,
                run_time=run_time,
                img_url=movie_img_url,
            )

            for actor in actor_info:
                actors = Actor.objects.get_or_create(
                    daum_id=actor['daum_id'][0],
                    name_eng=actor['name_eng'],
                    name_kor=actor['name_kor'],
                    profile_url=actor['profile_url'])
                movie_actor = MovieActor.objects.get_or_create(
                    movie=movie,
                    actor=actors[0],
                    character_name=actor['character_name'])

            for directors in director_info:
                director = Director.objects.get_or_create(
                    daum_id=directors['daum_id'][0],
                    name_eng=directors['name_eng'],
                    name_kor=directors['name_kor'],
                    profile_url=directors['profile_url'])

            for photo in resized_photo_url:
                try:
                    movie_image = MovieImages.objects.create(
                        movie=movie,
                        url=photo,
                    )
                except:
                    pass

            specific_movie = Movie.objects.get(daum_id=daum_id[0])
            for genre in genre_list:
                g, created = Genre.objects.get_or_create(genre=genre)
                specific_movie.genre.add(g)

            for nation in nation_list:
                n, created = MakingCountry.objects.get_or_create(
                    making_country=nation)
                specific_movie.making_country.add(n)

            for director in director_info:
                d, created = Director.objects.get_or_create(
                    daum_id=director['daum_id'][0],
                    name_eng=director['name_eng'],
                    name_kor=director['name_kor'],
                    profile_url=director['profile_url'])
                specific_movie.director.add(d)
    return movies_search

コード例 #12

0

ファイルを表示

ファイル: dictionary.py プロジェクト: kmackinnon/Chess_Calculator

#!/usr/bin/python

from pyparsing import makeHTMLTags,withAttribute,Suppress,Regex,Group
import os.path

""" looking for this recurring pattern:
  <td valign="top" bgcolor="#FFFFCC">0-3</td>
  <td valign="top">.50</td>
  <td valign="top">.50</td>
    
  and want a dict with keys 0, 1, 2, and 3 all with values (.50,.50)
"""
td,tdend = makeHTMLTags("td")
keytd = td.copy().setParseAction(withAttribute(bgcolor="#FFFFCC"))
td,tdend,keytd = map(Suppress,(td,tdend,keytd))

realnum = Regex(r'1?\.\d+').setParseAction(lambda t:float(t[0]))
integer = Regex(r'\d{1,3}').setParseAction(lambda t:int(t[0]))
DASH = Suppress('-')

# build up an expression matching the HTML bits above
entryExpr = (keytd + integer("start") + DASH + integer("end") + tdend + 
                Group(2*(td + realnum + tdend))("vals"))

# search the input HTML for matches to the entryExpr expression, and build up lookup dict
lookup = {}
for entry in entryExpr.searchString(open(os.path.dirname(__file__) + '../../fide.html').read()):
  for i in range(entry.start, entry.end+1):
    lookup[i] = tuple(entry.vals)

# print the first column of the dictionary to a text file

コード例 #13

0

ファイルを表示

def lr1():
    import urllib.request
    import urllib.parse
    # import requests
    from pyparsing import makeHTMLTags, SkipTo, withAttribute
    from prettytable import PrettyTable

    print("Parsing https://www.worldcoinindex.com/")
    url = 'https://www.worldcoinindex.com'
    req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
    resp = urllib.request.urlopen(req)
    respData = str(resp.read())
    resp.close()
    tbody_Start, tbody_End = makeHTMLTags('tbody')
    tbody = tbody_Start + SkipTo(tbody_End)("body") + tbody_End
    tbody_string = ""
    for tokens, start, end in tbody.scanString(respData):
        tbody_string = tbody_string + tokens.body
    # print(tbody_string)

    # creating a list for bitcoin names
    btc = []
    # parsing bitcoin names
    h1_Start, h1_End = makeHTMLTags('h1')
    h1_body = h1_Start + SkipTo(h1_End)("body") + h1_End
    bitcoin_name = ""
    for tokens, start, end in h1_body.scanString(tbody_string):
        bitcoin_name = bitcoin_name + "\n" + tokens.body

    # getting rid of <span>
    span_start, span_end = makeHTMLTags("span")
    span_body = span_start + SkipTo(span_start | span_end)("body")
    for tokens, start, end in span_body.scanString(bitcoin_name):
        btc.append(tokens.body)

    # creating a list for bitcoin prices
    prices = []
    # parsing bitcoin prices
    price_start, price_end = makeHTMLTags('td')
    price_td = price_start.setParseAction(
        withAttribute(**{"class": "number pricekoers lastprice"}))
    price_body = price_td + SkipTo(price_start | price_end)("body")
    price_string = ""
    for tokens, start, end in price_body.scanString(respData):
        price_string = price_string + "\n" + tokens.body

    # getting rid of <span>
    span_class = span_start.setParseAction(withAttribute(**{"class": "span"}))
    span_body = span_class + SkipTo(span_class | span_end)("body")
    for tokens, start, end in span_body.scanString(price_string):
        prices.append(tokens.body)
    # print(prices)

    # generating PrettyTable
    t = PrettyTable()
    t.field_names = [" ", "Name", "Resent Price"]
    i = 0
    for x in btc:
        t.add_row([i + 1, x, prices[i]])
        i = i + 1
    t.align["Name"] = "c"
    t.align["Recent Price"] = "c"
    print(t)

    # saving data
    f = open('logs.txt', 'w')
    f.writelines(str(t))
    f.close()