コード例 #1
0
def scrp_craigs(neighbor):
    neighbor = neighbor.replace(' ', '+')
    my_url = 'https://pittsburgh.craigslist.org/search/apa?query=' + neighbor + '&availabilityMode=0&sale_date=all+dates'
    # print(my_url)
    request = url(my_url)
    htmlscrap = request.read()
    request.close()
    page_soup = soup(htmlscrap, "html.parser")  # parsing as html
    container = page_soup.find('ul', {'class': 'rows'})
    result_rows = container.findAll('li', {'class': 'result-row'})
    names = [i.find('a', {'class': 'result-title hdrlnk'}) for i in result_rows[0:50]]
    prices = [i.find('span', {'class': 'result-price'}) for i in result_rows[0:50]]
    url_rows = [i.find('a').get('href') for i in result_rows[: 50]]
    result_name = []
    result_price = []
    for i in names:
        try:
            result_name.append(i.get_text())
        except AttributeError:
            result_name.append('N/A')
    for i in prices:
        try:
            result_price.append(i.get_text().lstrip())
        except AttributeError:
            result_price.append('N/A')



    # print(result_name)
    result_address = open_url(url_rows)
    result = []
    for i in range(len(result_name)):
        result.append([result_name[i], result_price[i], result_address[i], url_rows[i]])
    return result
コード例 #2
0
 def acceptNavigationRequest(self, frame, request, navigation_type):
     if navigation_type == QWebPage.NavigationTypeLinkClicked and self.linkDelegationPolicy(
     ) == QWebPage.DontDelegateLinks and request.url().scheme() in ('sip',
                                                                    'sips'):
         blink = QApplication.instance()
         contact, contact_uri = URIUtils.find_contact(
             request.url().toString())
         session_manager = SessionManager()
         session_manager.create_session(contact, contact_uri,
                                        [StreamDescription('audio')])
         blink.main_window.raise_()
         blink.main_window.activateWindow()
         return False
     return super(WebPage,
                  self).acceptNavigationRequest(frame, request,
                                                navigation_type)
コード例 #3
0
ファイル: web_scraping.py プロジェクト: JianhangYin/ToolBox
def scraping():
    script_dir = os.path.abspath(os.path.dirname(sys.argv[0]) or '.')
    nutrition_info_path = os.path.join(
        script_dir, './resource/meal_problem/nutrition_info.csv')
    final_rating_data_path = os.path.join(
        script_dir, './resource/meal_problem/final_rating_data.csv')

    all_url = [
        'https://www.epicurious.com/recipes-menus/what-to-cook-this-weekend-february-22-24-gallery',
        'https://www.epicurious.com/recipes-menus/what-to-cook-this-weekend-february-8-10-gallery',
        "https://www.epicurious.com/ingredients/acorn-delicata-kabocha-spaghetti-squash-winter-recipes-gallery",
        'https://www.epicurious.com/recipes-menus/easy-dinner-recipes-for-cook90-gallery',
        'https://www.epicurious.com/recipes-menus/our-favorite-cook90-lunches-gallery',
        'https://www.epicurious.com/recipes-menus/make-ahead-weeknight-dinners-stew-soup-freezer-casserole-quick-easy'
        '-recipes-gallery',
    ]

    ep_urls = set()
    for i in all_url:
        initializer = url(i)
        res = bs(initializer.read(), "html.parser")
        for div in res.findAll(
                'div', {'class': 'gallery-slide-caption__dek-container'}):
            ep_urls.update([div.find('a')['href']])

    p = multiprocessing.Pool(4)
    output = p.map(Recipe, ep_urls)
    ar = []
    for i in output:
        ar.append(i.__dict__)
    df = pd.DataFrame(ar)
    df = df.dropna(axis=0)
    df = df[df['personal_rating'].map(len) > 9]
    df = df.sort_values(by=['title'])
    df.to_csv(nutrition_info_path)

    p_r = pd.DataFrame(columns=['title', 'user', 'rating'])
    count = 0
    for i in range(df.shape[0]):
        for j in df.iloc[i, 6]:
            p_r.loc[count] = [df.iloc[i, 0], j[0], j[1]]
            count += 1

    user_reviews2 = p_r['user'].value_counts()[
        p_r['user'].value_counts() > 0].index
    trun_recipes_user_review = p_r[p_r['user'].isin(user_reviews2)]
    trun_recipes_user_review = trun_recipes_user_review.drop_duplicates(
        ['user', 'title'])
    trun_recipes_user_review_matrix = trun_recipes_user_review.pivot(
        index='user', columns='title', values='rating')
    final_rating_data = pd.DataFrame(
        columns=trun_recipes_user_review_matrix.columns)
    for i in range(trun_recipes_user_review_matrix.shape[0] // 50):
        temp = trun_recipes_user_review_matrix.iloc[50 * i:50 * i + 50].mean(
            skipna=True, axis=0)
        temp.name = 'user' + str(i)
        final_rating_data.loc[i] = temp

    final_rating_data.to_csv(final_rating_data_path)
コード例 #4
0
def sound(text):
    t = encode((('text', text), ('format', 'wav'), ('lang', 'ru-RU'),
                ('speaker',
                 'zahar'), ('key', '1e0146ea-0b80-425b-8057-fd2feb8ef590')))
    ans = url('https://tts.voicetech.yandex.net/generate?' + t).read()
    f = open('bereza.wav', 'wb')
    f.write(ans)
    play('bereza.wav', fn)
コード例 #5
0
def add_news(wiki_url):
    """
    input : an url from wikipedia
    action : add the url to the bdd and its computed keywords
    """
    with url(wiki_url) as response:
        html = response.read()
        wiki_keywords = keywords(html).split('\n')
        wiki_keywords = wiki_keywords[:10]
        ID_news = set_news(html,wiki_keywords)
    return ID_news, wiki_keywords
コード例 #6
0
    def createRequest(self, op, request, device):
        # prefer valid cache instead of network
        self.reqCount = self.reqCount + 1
        if re.match(".*?pubads\.g\.doubleclick\.net*", request.url().toString()):
            request.setUrl(QUrl("http://img6a.flixcart.com/www/prod/images/flipkart_india-e5f5aa9f.png"))
            # print "REQ :",self.reqCount," ",request.url()

        request.setAttribute(QNetworkRequest.CacheLoadControlAttribute, QNetworkRequest.PreferCache)

        reply = QNetworkAccessManager.createRequest(self, op, request, device)
        reply.error.connect(self.logNetworkErrors)

        return reply
コード例 #7
0
def seed():
    """
    action : add multiple urls to the bdd and its computed keywords
            and show a progress bar
    """
    urls = ["https://fr.wikipedia.org/wiki/Redis",
        "https://fr.wikipedia.org/wiki/C_(langage)",
        "https://fr.wikipedia.org/wiki/Licence_BSD",
        "https://fr.wikipedia.org/wiki/NoSQL"
       ]
    bar = Bar('seed of redis database', max=len(urls))
    for wiki_url in urls:
        with url(wiki_url) as response:
            html = response.read()
            wiki_keywords = keywords(html).split('\n')
            set_news(html,wiki_keywords[:10])
            bar.next()
    bar.finish()
コード例 #8
0
    def createRequest(self, op, request, device):
        # prefer valid cache instead of network
        self.reqCount = self.reqCount + 1
        if re.match(".*?pubads\.g\.doubleclick\.net*",
                    request.url().toString()):
            request.setUrl(
                QUrl(
                    "http://img6a.flixcart.com/www/prod/images/flipkart_india-e5f5aa9f.png"
                ))
            # print "REQ :",self.reqCount," ",request.url()

        request.setAttribute(QNetworkRequest.CacheLoadControlAttribute,
                             QNetworkRequest.PreferCache)

        reply = QNetworkAccessManager.createRequest(self, op, request, device)
        reply.error.connect(self.logNetworkErrors)

        return reply
コード例 #9
0
ファイル: parseData.py プロジェクト: ctwplyp/Python
def getAllPages():
    pizza_url = "https://www.yelp.com/search?find_desc=pizza&find_loc=New+York+NY&ns=1"
    request = url(pizza_url)
    htmlscrap = request.read()
    request.close
    page_soup = soup(htmlscrap, "html.parser")  #parsing as html
    #review_counter=i.findAll("li",{"class":"review-count responsive-small-display-inline-block"})

    body = page_soup.findAll("h3", {"class": "search-result-title"})
    pageRef = {}
    for i in range(1, len(body)):  #Skipping the first record
        body1 = body[i].findAll("a", {"class": "biz-name js-analytics-click"})
        name = body1[0].find("span")
        href = body1[0]["href"]
        #  print(body1)
        name_val = name.string
        #  name = name.text.replace("</span>","")
        #  print(href)
        #  print(name_val)
        pageRef[href] = name_val
        for p in pageRef:
            print(p)
コード例 #10
0
def open_url(args):
    try:
        iter(args)
        result_ = []
        for i in args:
            # print(i)
            open_request = url(i)
            html_scrap = open_request.read()
            # open_request.close()
            house_page = soup(html_scrap, 'html.parser')
            try:
                address = house_page.find('div', {'class': 'mapaddress'}).get_text()
                result_.append(address)
            except AttributeError:
                result_.append('N/A')
            # print(i)
            # print(address)

        # print(i for i in result_)
        # result = [i.get_text() for i in result_]
        return result_
    except TypeError:
        return 'not data'
コード例 #11
0
ファイル: parseData.py プロジェクト: ctwplyp/Python
def parseReviews(review_count):
    x = 0
    filename = "datasets.csv"  #saving data as csv
    f = open(filename, "w")
    headers = "Name,Friend Count,Photo Count,Review Count,Elite Member,Funny Count,Cool Count,Useful Count,Review Length,Checkin Count\n"  #these are the features that are scraped
    f.write(headers)
    total_rev_rating = 0
    for count in range(
            review_count
    ):  #regex could have been used here but this is to increment the url page(keeping it simple.)
        my_url = "https://www.yelp.com/biz/julianas-pizza-brooklyn-5?o?sort_by=date_desc"
        request = url(my_url)  #taking url as a paramter
        htmlscrap = request.read()
        request.close()
        page_soup = soup(htmlscrap, "html.parser")  #parsing as html
        body_t = page_soup.findAll("script", {"type": "application/ld+json"})
        body_text = body_t[0].text
        json1_data = json.loads(body_text)
        json_reviews1 = json1_data['review'][count]
        rating = json_reviews1['reviewRating']
        rating_value = rating['ratingValue']
        description = json_reviews1['description']
        date = json_reviews1['datePublished']
        reviewTemp = Review(description, rating_value, date)
        total_rev_rating = total_rev_rating + rating_value
        #print(rating_value)
    average = total_rev_rating / review_count
    print(average)
    # print(ratingValue)
    # print(description)
    # print(reviewTemp.descr)
    #urls = [el['url'] for el in json.loads(body_t.text)['itemListElement']]

    #cprint(urls)

    #print(type(body_t))
    f.close()
コード例 #12
0
  def _Dynamic_Fetch(self, request, response):
    """Trivial implementation of URLFetchService::Fetch().

    Args:
      request: the fetch to perform, a URLFetchRequest
      response: the fetch response, a URLFetchResponse
    """


    if len(request.url()) >= _MAX_URL_LENGTH:
      logging.error('URL is too long: %s...' % request.url()[:50])
      raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

    (protocol, host, path, query, fragment) = (
        urllib.parse.urlsplit(request.url().decode()))

    payload = None
    if request.method() == urlfetch_service_pb.URLFetchRequest.GET:
      method = 'GET'
    elif request.method() == urlfetch_service_pb.URLFetchRequest.POST:
      method = 'POST'
      payload = request.payload()
    elif request.method() == urlfetch_service_pb.URLFetchRequest.HEAD:
      method = 'HEAD'
    elif request.method() == urlfetch_service_pb.URLFetchRequest.PUT:
      method = 'PUT'
      payload = request.payload()
    elif request.method() == urlfetch_service_pb.URLFetchRequest.DELETE:
      method = 'DELETE'
    elif request.method() == urlfetch_service_pb.URLFetchRequest.PATCH:
      method = 'PATCH'
      payload = request.payload()
    else:
      logging.error('Invalid method: %s', request.method())
      raise apiproxy_errors.ApplicationError(
        urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

    if not (protocol == 'http' or protocol == 'https'):
      logging.error('Invalid protocol: %s', protocol)
      raise apiproxy_errors.ApplicationError(
        urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

    if not host:
      logging.error('Missing host.')
      raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

    self._SanitizeHttpHeaders(_UNTRUSTED_REQUEST_HEADERS,
                              request.header_list())
    deadline = _API_CALL_DEADLINE
    if request.has_deadline():
      deadline = request.deadline()
    validate_certificate = _API_CALL_VALIDATE_CERTIFICATE_DEFAULT
    if request.has_mustvalidateservercertificate():
      validate_certificate = request.mustvalidateservercertificate()

    fetch_function = self._GetFetchFunction(request.url())
    fetch_function(request.url().decode(), payload, method,
                   request.header_list(), request, response,
                   follow_redirects=request.followredirects(),
                   deadline=deadline,
                   validate_certificate=validate_certificate)
コード例 #13
0
from urllib.request import urlopen as url  # importing urllib for url request
from bs4 import BeautifulSoup as soup
x = 0
filename = "datasets.csv"  # saving data as csv
f = open(filename, "w")
# these are the features that are scraped
headers = "Name,Friend Count,Photo Count,Review Count,Elite Member,Funny Count,Cool Count,Useful Count,Review Length,Checkin Count\n"
f.write(headers)

for _ in range(
        4
):  # regex could have been used here but this is to increment the url page(keeping it simple.)

    my_url = "https://www.yelp.ca/search?find_desc=Indian&find_loc=Toronto%2C+ON" + \
        str(x)
    request = url(my_url)  # taking url as a paramter
    htmlscrap = request.read()
    request.close()
    page_soup = soup(htmlscrap, "html.parser")  # parsing as html
    # the class name where all the features are contained
    container = page_soup.findAll("div",
                                  {"class": "review review--with-sidebar"})
    # print(len(container))

    for i in container:

        # print(containers)
        friend_counter = i.findAll(
            "li",
            {"class": "friend-count responsive-small-display-inline-block"})
        friend_count = friend_counter[0].b.text
コード例 #14
0
 def _Dynamic_Fetch(self, request, unused_response):
   if request.url() == 'exception':
     raise IOError('the remote error')
   elif request.url() == 'application_error':
     raise apiproxy_errors.ApplicationError(23, 'details')
コード例 #15
0
    def check_citation(self, citation):
        #NOTE: when implementing, wrap the method in a try catch and print out any error + the citation status

        try:
            pattern = re.compile("[ ][0-9]{4}")
            result = pattern.search(citation)
            self.year = result.group(0)[1:]
        except:
            raise Exception("Unable to find year in citation.")

        self.citation_status = MLACitationStatus.AUTHOR

        cursor = 0

        while True:
            ascii_value = ord(citation[cursor])

            # check if the current character is not " &-'." or any alphanumeric in English or Latin-1
            if citation[cursor:cursor + 2] != ". " and (
                    ascii_value == 32 or ascii_value == 39
                    or 44 <= ascii_value <= 46 or 65 <= ascii_value <= 90
                    or 97 <= ascii_value <= 122 or 192 <= ascii_value <= 255):
                cursor += 1
            else:
                break

        if cursor != 0:
            author_section = ""
            if citation[cursor:cursor + 2] == ". ":
                author_section = citation[:cursor + 1]
            else:
                raise Exception(
                    "Bad formatting in the author section (unknown error).")

            # three or more authors
            if ", et al." in author_section:
                temp = author_section.replace(", et al", "")
                authors = temp.split(", ")
                filteredAuthor = [self.filter_latin(i) for i in authors]

                if re.match("^[A-Za-z][A-Za-z-' ]+$", filteredAuthor[0]) is not None \
                and re.match("^[A-Z][A-Za-z-'. ]+[.]$", filteredAuthor[1]) is not None:
                    self.authors.append(authors[0] + ", et al.")
                else:
                    raise Exception("Bad formatting in the author section: '" +
                                    author_section + "'")

            # two authors
            elif ", and " in author_section:
                authors = author_section.split(", and ")
                if ", " not in authors[0]:
                    raise Exception("Bad formatting in the author section: '" +
                                    author_section + "'")

                firstAuthor = authors[0].split(", ")
                filteredFirstAuthor = [
                    self.filter_latin(i) for i in firstAuthor
                ]

                if re.match("^[A-Za-z][A-Za-z-' ]+$", filteredFirstAuthor[0]) is not None \
                and re.match("^[A-Z][A-Za-z-'. ]+$", filteredFirstAuthor[1]) is not None:
                    self.authors.append(firstAuthor[0])
                else:
                    raise Exception("Bad formatting in the author section: '" +
                                    author_section + "'")

                if " " not in authors[1]:
                    raise Exception("Bad formatting in the author section: '" +
                                    author_section + "'")

                secondAuthor = authors[1].split(" ", 1)
                filteredSecondAuthor = [
                    self.filter_latin(i) for i in secondAuthor
                ]

                if re.match("^[A-Z][A-Za-z-']+$", filteredSecondAuthor[0]) is not None \
                and re.match("^[A-Za-z][A-Za-z-'. ]+[.]$", filteredSecondAuthor[1]) is not None:
                    self.authors.append(filteredSecondAuthor[1][:-1])

                elif re.match("^[A-Za-z][.]$",
                              filteredSecondAuthor[1]) is not None:
                    author_cursor = cursor + 2
                    actualSecondAuthor = ""

                    while citation[author_cursor:author_cursor + 2] != ". ":
                        actualSecondAuthor += citation[author_cursor]
                        author_cursor += 1

                    self.authors.append(actualSecondAuthor)

                else:
                    raise Exception("Bad formatting in the author section: '" +
                                    author_section + "'")

            # one author
            elif ", " in author_section:
                authors = author_section.split(", ")
                filteredAuthor = [self.filter_latin(i) for i in authors]

                if re.match("^[A-Za-z][A-Za-z-' ]+$", filteredAuthor[0]) is not None \
                and re.match("^[A-Z][A-Za-z-' ]+[.]$", filteredAuthor[1]) is not None:
                    self.authors.append(authors[0])
                else:
                    raise Exception("Bad formatting in the author section: '" +
                                    author_section + "'")

            elif "et. al." in author_section or "et.al." in author_section:
                raise Exception(
                    "'Et al.' should not have a period after the 'Et'.")
            # no match; bad formatting
            else:
                raise Exception("Bad formatting in the author section: '" +
                                author_section + "'")

        self.citation_status = MLACitationStatus.TITLE
        cursor += 1
        # check the title section
        if citation[cursor:cursor + 3] == "<i>":
            cursor += 3
        elif citation[cursor + 1:cursor + 4] == "<i>":
            cursor += 4
        elif citation[cursor + 1] == "\"":
            cursor += 2
        elif citation[cursor - 1:cursor + 1] == ".\"":
            raise Exception("Bad formatting in the title section.")

        title = ""

        while citation[cursor] != ".":
            title += citation[cursor]
            cursor += 1

        title = title.replace("\"", "")
        title = title.replace("</i>", "")

        if title[0] == " ":
            title = title[1:]

        if citation[cursor + 1] == "\"":
            cursor += 2
        else:
            cursor += 1
        #now cursor should be at the beginning of italics

        result = url("https://brettterpstra.com/titlecase/?title=" + title)
        title_cased_title = result.read().decode('utf-8')

        if title != title_cased_title:
            self.warnings.append(
                "the title might contain improper capitalization: '" + title +
                "'")

        self.title = title

        # check for url
        self.citation_status = MLACitationStatus.URL

        extractor = URLExtract()
        if extractor.has_urls(citation):
            urls = extractor.find_urls(citation)
            self.url = urls[0][:-1]
            if self.url + "." not in citation:
                raise Exception("Bad formatting in the URL section.")

            if citation[cursor:cursor +
                        3] != "<i>" and citation[cursor + 1:cursor +
                                                 4] != "<i>":
                self.warnings.append(
                    "the container may not exist or may not be italicized")

        elif citation[cursor:cursor +
                      3] == "<i>" and citation[cursor + 1:cursor + 4] == "<i>":
            self.warnings.append(
                "the container might exist when not necessary (if the citation is about a book), or the block immediately following the title may be improperly italicized."
            )

        if self.url != "":
            citation.replace(self.url + ".", "")

        # check for other info
        # right now, it's too complex to validate the entire MLA citation without prior knowledge on what type of citation it is,
        # so the other info is just stored without checking
        self.citation_status = MLACitationStatus.OTHER_INFO

        remainingText = citation[cursor:]
        info = remainingText.split(", ")
        self.otherInfo = [i for i in info]
コード例 #16
0
ファイル: web_scraping.py プロジェクト: JianhangYin/ToolBox
 def __init__(self, page):
     print('attempting to build from: ' + page)
     try:
         self.build_recipe(bs(url(page), 'html.parser'))
     except Exception as x:
         print('Could not build from %s, %s' % (page, x))
コード例 #17
0
#Testerino

#Libs
from urllib.request import urlopen as url
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from util import parse_damage_stats
import csv
import MySQLdb

#Loading the page into bs from weapon page url
page_link = 'http://darksouls.wikidot.com/weapons'
page = url(page_link)
soup = BeautifulSoup(page, 'html.parser')

#Writing to local mysql db
conn = MySQLdb.connect(host='localhost', user='******', passwd='Fr0ntranger')
cursor = conn.cursor()
cursor.execute('USE dark_souls')
#
page_content = soup.findAll('div', {'id': 'page-content'})
weapon_table = page_content[0].table.tr.findAll('td')[1]
weapon_list = weapon_table.findAll('a')

print('There are ' + str(len(weapon_list)) + ' weapons in Dark Souls 1: ')

#Write weapon stats to a csv
with open('weapon_data.csv', 'w') as csv_file:

    writer = csv.writer(csv_file)
    header = ["Name", "Physical", "Magic", "Fire", "Lightning", "Total AR"]
コード例 #18
0
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as url
file = open("comic.txt", "r")
name = input("Enter Name of Comic")
var = bs(file, "html.parser")
print("URL parsered to BS4")
list = var.findAll("img", {"rel": "noreferrer"})
index = 1
for images in list:
    file_loc = str(images["src"])
    temp = url(file_loc)
    output = open((+ "Comics\ file" + str(index) + ".jpg"), "wb")
    output.write(temp.read())
    output.close()
    print("Downloaded file " + str(index) + " of " + str(len(list)))
    index = index + 1
    break
print("Download Complete")
コード例 #19
0
def sound(text):
    t = encode((('text', text), ('format', 'wav'), ('lang', 'ru-RU'), ('speaker', 'zahar'), ('key', '1e0146ea-0b80-425b-8057-fd2feb8ef590')))
    ans = url('https://tts.voicetech.yandex.net/generate?' + t).read()
    f = open('bereza.wav', 'wb')
    f.write(ans)
    play('bereza.wav', fn)
コード例 #20
0
from winsound import PlaySound as play
from winsound import SND_FILENAME as fn
from urllib.request import urlopen as url
from urllib.parse import urlencode as encode
URL = "http://baneks.ru/"
aneks = []

def sound(text):
    t = encode((('text', text), ('format', 'wav'), ('lang', 'ru-RU'), ('speaker', 'zahar'), ('key', '1e0146ea-0b80-425b-8057-fd2feb8ef590')))
    ans = url('https://tts.voicetech.yandex.net/generate?' + t).read()
    f = open('bereza.wav', 'wb')
    f.write(ans)
    play('bereza.wav', fn)
    
for i in range(1, int(input()) + 1):
    html = url(URL + str(i)).read().decode("utf8")
    start = html.find('<meta name="description" content="') + len('<meta name="description" content="')
    end = html.find('<meta name="keywords"') - 7
    aneks.append(html[start:end])
for i in range(0, len(aneks)):
    sound(aneks[i])
コード例 #21
0
from ssl import SSLContext, PROTOCOL_TLSv1
from urllib.request import urlopen as url
import datetime

recognize = cv2.cv2.face.LBPHFaceRecognizer_create()
recognize.read('trainer/trainer.yml')
cascade = 'haarcascade_frontalface_default.xml'
faceClassifier = cv2.CascadeClassifier(cascade)
fontStyle = cv2.FONT_HERSHEY_SIMPLEX
webcamServerIP = 'https://192.168.1.93:8080/shot.jpg'

while True:

    now = datetime.datetime.now()
    contxt = SSLContext(PROTOCOL_TLSv1)
    inf = url(webcamServerIP, context=contxt).read()
    npImg = n.array(bytearray(inf), dtype=n.uint8)
    i = cv2.imdecode(npImg, -1)
    grayscale = cv2.cvtColor(i, cv2.COLOR_BGR2GRAY)
    face = faceClassifier.detectMultiScale(grayscale, 1.3, 5)
    unknownCounter = 0

    with open("personnel.log", "w") as f:

        for x, y, w, z in face:

            cv2.rectangle(i, (x - 20, y - 20), (x + w + 20, y + z + 20),
                          (0, 255, 0), 4)
            ID, person = recognize.predict(grayscale[y:y + z, x:x + w])
            log = []
コード例 #22
0
from winsound import PlaySound as play
from winsound import SND_FILENAME as fn
from urllib.request import urlopen as url
from urllib.parse import urlencode as encode

URL = "http://baneks.ru/"
aneks = []


def sound(text):
    t = encode((('text', text), ('format', 'wav'), ('lang', 'ru-RU'),
                ('speaker',
                 'zahar'), ('key', '1e0146ea-0b80-425b-8057-fd2feb8ef590')))
    ans = url('https://tts.voicetech.yandex.net/generate?' + t).read()
    f = open('bereza.wav', 'wb')
    f.write(ans)
    play('bereza.wav', fn)


for i in range(1, int(input()) + 1):
    html = url(URL + str(i)).read().decode("utf8")
    start = html.find('<meta name="description" content="') + len(
        '<meta name="description" content="')
    end = html.find('<meta name="keywords"') - 7
    aneks.append(html[start:end])
for i in range(0, len(aneks)):
    sound(aneks[i])
コード例 #23
0
  def _RetrieveURL(url, payload, method, headers, request, response,
                   follow_redirects=True, deadline=_API_CALL_DEADLINE,
                   validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT):
    """Retrieves a URL over network.

    Args:
      url: String containing the URL to access.
      payload: Request payload to send, if any; None if no payload.
        If the payload is unicode, we assume it is utf-8.
      method: HTTP method to use (e.g., 'GET')
      headers: List of additional header objects to use for the request.
      request: A urlfetch_service_pb.URLFetchRequest proto object from
          original request.
      response: A urlfetch_service_pb.URLFetchResponse proto object to
          populate with the response data.
      follow_redirects: optional setting (defaulting to True) for whether or not
        we should transparently follow redirects (up to MAX_REDIRECTS)
      deadline: Number of seconds to wait for the urlfetch to finish.
      validate_certificate: If true, do not send request to server unless the
        certificate is valid, signed by a trusted CA and the hostname matches
        the certificate.

    Raises:
      Raises an apiproxy_errors.ApplicationError exception with
      INVALID_URL_ERROR in cases where:
        - The protocol of the redirected URL is bad or missing.
        - The port is not in the allowable range of ports.
      Raises an apiproxy_errors.ApplicationError exception with
      TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
    """
    last_protocol = ''
    last_host = ''
    if isinstance(payload, str):
      payload = payload.encode('utf-8')

    for redirect_number in range(MAX_REDIRECTS + 1):
      parsed = urllib.parse.urlsplit(url)
      protocol, host, path, query, fragment = parsed







      port = urllib.parse.splitport(urllib.parse.splituser(host)[1])[1]

      if not _IsAllowedPort(port):
        logging.error(
          'urlfetch received %s ; port %s is not allowed in production!' %
          (url, port))





        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

      if protocol and not host:

        logging.error('Missing host on redirect; target url is %s' % url)
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.INVALID_URL)




      if not host and not protocol:
        host = last_host
        protocol = last_protocol






      adjusted_headers = {
          'User-Agent':
          'AppEngine-Google; (+http://code.google.com/appengine)',
          'Host': host,
          'Accept-Encoding': 'gzip',
      }
      if payload is not None:


        adjusted_headers['Content-Length'] = str(len(payload))


      if method == 'POST' and payload:
        adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded'

      passthrough_content_encoding = False
      for header in headers:
        if header.key().decode().title().lower() == 'user-agent':
          adjusted_headers['User-Agent'] = (
              '%s %s' %
              (header.value().decode(), adjusted_headers['User-Agent']))
        else:
          if header.key().decode().lower() == 'accept-encoding':
            passthrough_content_encoding = True
          adjusted_headers[header.key().decode().title()] = (
              header.value().decode())

      if payload is not None:
        escaped_payload = payload.encode('string_escape')
      else:
        escaped_payload = ''
      logging.debug('Making HTTP request: host = %r, '
                    'url = %r, payload = %.1000r, headers = %r',
                    host, url, escaped_payload, adjusted_headers)
      try:
        if protocol == 'http':
          connection_class = http.client.HTTPConnection
        elif protocol == 'https':
          if (validate_certificate and _CanValidateCerts() and
              CERT_PATH):

            connection_class = fancy_urllib.create_fancy_connection(
                ca_certs=CERT_PATH)
          else:
            connection_class = http.client.HTTPSConnection
        else:

          error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
          logging.error(error_msg)
          raise apiproxy_errors.ApplicationError(
              urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg)






        if _CONNECTION_SUPPORTS_TIMEOUT:
          connection = connection_class(host, timeout=deadline)
        else:
          connection = connection_class(host)



        last_protocol = protocol
        last_host = host

        if query != '':
          full_path = path + '?' + query
        else:
          full_path = path

        if not _CONNECTION_SUPPORTS_TIMEOUT:
          orig_timeout = socket.getdefaulttimeout()
        try:
          if not _CONNECTION_SUPPORTS_TIMEOUT:


            socket.setdefaulttimeout(deadline)
          connection.request(method, full_path, payload, adjusted_headers)
          http_response = connection.getresponse()
          if method == 'HEAD':
            http_response_data = ''
          else:
            http_response_data = http_response.read()
        finally:
          if not _CONNECTION_SUPPORTS_TIMEOUT:
            socket.setdefaulttimeout(orig_timeout)
          connection.close()
      except _fancy_urllib_InvalidCertException as e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR,
          str(e))
      except _fancy_urllib_SSLError as e:





        app_error = (
            urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED
            if 'timed out' in e.message else
            urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR)
        raise apiproxy_errors.ApplicationError(app_error, str(e))
      except socket.timeout as e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e))
      except (http.client.error, socket.error, IOError) as e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e))




      if http_response.status in REDIRECT_STATUSES and follow_redirects:

        url = http_response.getheader('Location', None)
        if url is None:
          error_msg = 'Redirecting response was missing "Location" header'
          logging.error(error_msg)
          raise apiproxy_errors.ApplicationError(
              urlfetch_service_pb.URLFetchServiceError.MALFORMED_REPLY,
              error_msg)



        if (http_response.status != http.client.TEMPORARY_REDIRECT and
            method not in PRESERVE_ON_REDIRECT):
          logging.warn('Received a %s to a %s. Redirecting with a GET',
                       http_response.status, method)
          method = 'GET'
          payload = None
      else:
        response.set_statuscode(http_response.status)
        if (http_response.getheader('content-encoding') == 'gzip' and
            not passthrough_content_encoding):
          gzip_stream = io.StringIO(http_response_data)
          gzip_file = gzip.GzipFile(fileobj=gzip_stream)
          http_response_data = gzip_file.read()
        response.set_content(http_response_data[:MAX_RESPONSE_SIZE])


        for header_key in list(http_response.msg.keys()):
          for header_value in http_response.msg.get_all(header_key):
            if (header_key.lower() == 'content-encoding' and
                header_value == 'gzip' and
                not passthrough_content_encoding):
              continue
            if header_key.lower() == 'content-length' and method != 'HEAD':
              header_value = str(len(response.content()))
            header_proto = response.add_header()
            header_proto.set_key(header_key.encode())
            header_proto.set_value(header_value.encode())

        if len(http_response_data) > MAX_RESPONSE_SIZE:
          response.set_contentwastruncated(True)



        if request.url() != url:
          response.set_finalurl(url.encode())


        break
    else:
      error_msg = 'Too many repeated redirects'
      logging.error(error_msg)
      raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.TOO_MANY_REDIRECTS,
          error_msg)
コード例 #24
0
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as url

apple = url('https://www.flipkart.com/search?q=apple+mobiles&as=on&as-show=on&otracker=AS_Query_OrganicAutoSuggest_1_5_na_na_na&otracker1=AS_Query_OrganicAutoSuggest_1_5_na_na_na&as-pos=1&as-type=RECENT&suggestionId=apple+mobiles&requestId=5e21fc5c-dff2-4485-8f4c-657dcba0ce2c&as-searchtext=apple')


# opening the conection
mobile_html = apple.read()
apple.close()

mobile_soup = soup(mobile_html, "html.parser")
# print(mobile_soup.head)
# print(mobile_soup.p)

# grabs the product
iphones = mobile_soup.findAll("div", {"class": "_3wU53n"})
print(len(iphones))  # total items

firstproduct = iphones[0]
tenthprdct = iphones[10]
print(firstproduct)
print(tenthprdct)

# no of ratings of mobiles
mob_rating = mobile_soup.findAll("span", {"class": "_38sUEc"})
print(len(mob_rating))
print(mob_rating[10].text)


# specs of mobiles
mob_specs = mobile_soup.findAll("ul", {"class": "vFw0gD"})
コード例 #25
0
ファイル: Task_2.py プロジェクト: Creinman/University-works
from urllib.request  import urlopen as url
from pathlib import Path as pth
import re
from scipy.special import spherical_jn as jn, spherical_yn as yn
import numpy as np
import matplotlib.pyplot as plt

variant = 9 
C=300000000
PI=3.1415926535

file = pth('./taskfile.txt')
if not file.exists():
  txt = url('https://jenyay.net/uploads/Student/Modelling/task_02.txt').read()
f0 = file.open('wb')
f0.write(txt)
f0.close()

if file.exists():
  f1 = file.open()
lines = [x for x in f1]
p = re.compile(r'[0-9\.\-e]+')
m = p.findall(lines[variant-1])
print(m[1:])
f1.close()

D = float(m[1])
fmin = float(m[2])
fmax = float(m[3])
f = np.linspace(fmin, fmax, 400)
r = D/2
コード例 #26
0
ファイル: scrape.py プロジェクト: nocount/dnd-gen
from random import randint

from bs4 import BeautifulSoup
from urllib.request import urlopen as url

# Loading the page into bs from weapon page url
page_link = 'http://dnd5e.wikidot.com/'
page = url(page_link)
soup = BeautifulSoup(page, 'html.parser')

page_content = soup.findAll('div', {'id': 'page-content'})


def scrape_classes():
    classes = {}
    class_links = {}
    class_categories = page_content[0].findAll('div', {'class': 'col-md-7'})
    for c in class_categories:
        single_class = c.find('h1')
        if single_class:
            class_name = single_class.find('a').text

            if c.find('p'):
                sub_classes_ref = c.p.findAll('a')
                sub_classes = []
                for i in sub_classes_ref:
                    sub_classes.append(i.text)
                    class_links[i.text] = i.get('href')

                classes[class_name] = sub_classes
コード例 #27
0
 def _Dynamic_Fetch(self, request, unused_response):
     if request.url() == 'exception':
         raise IOError('the remote error')
     elif request.url() == 'application_error':
         raise apiproxy_errors.ApplicationError(23, 'details')