Example #1
0
 def test_visualize_network(self):
     rv = self.app.post("/models/visualize-network", data={"custom_network": self.CAFFE_NETWORK})
     s = BeautifulSoup(rv.data)
     body = s.select("body")
     assert rv.status_code == 200, "POST failed with %s\n\n%s" % (rv.status_code, body)
     image = s.select("img")
     assert image is not None, "didn't return an image"
def get_Data(url):
    start = time.time()
    page = requests.get(url).text
    soup = BeautifulSoup(page, from_encoding="gb2312")
    title = soup.find_all("div", class_="report-title")[0]("h1")[0].text
    end = time.time()
    print "%s runs %0.2f seconds." % (title, (end - start))
Example #3
0
def getDelays(URL):
    r = requests.get(URL)
    soup = BeautifulSoup(r.text, "html")
    div = soup.find("div", "internal-box2-inner")
    delays = [p.string for p in div.findAll("p")]
    # last three <p></p> instances are junk
    return delays[:-3]
Example #4
0
 def get_img(self, url="http://www.mzitu.com/"):
     self.url = url
     html = self.get_html()
     soup = BeautifulSoup(html, "lxml")
     img_list = soup.find_all("img", class_="lazy")
     url_list = [i.get("data-original") for i in img_list if i]
     return set(url_list)
Example #5
0
def get_results(moss_url):
    resp = r.get(moss_url)
    soup = BeautifulSoup(resp.content.decode("utf-8"), "html5lib")

    ps = soup("p")
    name = None
    if len(ps) >= 2:
        name = ps[2].text.strip()
    if not name:
        name = "moss_%s" % date_str()

    matches = []

    for row in soup.table("tr")[1:]:
        first, second, lines = map(lambda x: x.text, row("td"))
        first = parse_col(first)
        second = parse_col(second)
        lines = int(lines)
        url = row.a["href"]
        matches.append(Match(first, second, lines, url))

    fil = Filter()
    matches = list(filter(fil.include, matches))

    return Results(name, matches)
Example #6
0
    def postStory(self, title, link, text):
        if not self.loggedIn:
            raise LoginRequiredException("Not signed in!")

        f = open(readerutils.COOKIE, "rb")
        cookies = requests.utils.cookiejar_from_dict(pickle.load(f))
        f.close()

        if text != "":
            text = cgi.escape(text)

        try:
            r = self.session.get(readerutils.hnUrl("submit"))
        except:
            return False
        soup = BeautifulSoup(r.content)
        fnid = soup.find("input", {"name": "fnid"})["value"]
        fnop = soup.find("input", {"name": "fnop"})["value"]
        endpoint = soup.find("form", {"method": "post"})["action"]
        endpoint = endpoint.replace("/", "")

        params = {"fnid": fnid, "fnop": fnop, "title": title, "url": link, "text": text}
        try:
            r = self.session.post(readerutils.hnUrl(endpoint), data=params)
        except:
            return False

        if r.url == "https://news.ycombinator.com/newest":
            return True
        return False
def downloadthread(fil):
    apps_links = read_from_file(fil)
    for link in apps_links[1:3]:
        #    link = apps_links[1] # dev version of for loop
        req = urllib2.Request("%s%s" % (base_url, link), None, headers)
        html_doc = urllib2.urlopen(req).read()
        ################        Title extract
        startstring = "<title>"
        start = html_doc.find(startstring) + len(startstring)
        endstring = "| AppBrain Android Market</title>"
        end = html_doc.find(endstring)
        Title = html_doc[start:end]
        ################        Description extract
        startstring = '<div class="app_descriptiontab">'
        start = html_doc.find(startstring) + len(startstring)
        endstring = '<div style="position: absolute; right: 0px; bottom: 0px">'
        end = html_doc.find(endstring)
        description = html_doc[start:end]
        ################
        description = description.strip()  # get rid of whitespace
        description = BeautifulSoup(description)
        description = description.get_text()  # get rid of html
        subdir = "\\fulldescriptions\\" + fil[1 : fil.find(".")] + "\\"
        path = basedir + subdir
        filename = path + Title + ".txt"
        if not os.path.exists(path):  # if folder does not exist create it.
            os.makedirs(path)
        with open(filename, "w") as txtfile:
            txtfile.write(description.encode("utf8"))
        print("link:%s done" % link)
def list_qualities(M3UURL=None):
    exception = False
    video_url = common.args.url
    video_data = connection.getURL(video_url)
    if "link.theplatform.com" not in video_url:
        video_tree = BeautifulSoup(video_data, "html.parser")
        try:
            player_url = "http:" + video_tree.find("div", class_="video-player-wrapper").iframe["src"]
        except:
            player_url = "http:" + video_tree.find("div", id="pdk-player")["data-src"]
        player_data = connection.getURL(player_url)
        player_tree = BeautifulSoup(player_data, "html.parser")
        video_url = player_tree.find("link", type="application/smil+xml")["href"]
        video_url = video_url + "&format=SCRIPT"

        script_data = connection.getURL(video_url)
        script_menu = simplejson.loads(script_data)
        if script_menu["pl1$entitlement"] != "auth":
            bitrates, exception = smil_bitrates(video_url)
        else:
            captions = script_menu["captions"][0]["src"]
            id = re.compile("([0-9]+.[0-9]+.*).tt").findall(captions)[0]
            td = datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)
            unow = int((td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6)
            master_url = M3UURL % (id, str(unow), str(unow + 60))
            bitrates = m3u_bitrates(master_url)
            return bitrates
            # need to set captions on player
    else:
        bitrates, exception = smil_bitrates(video_url)
    if not exception:
        return bitrates
    else:
        common.show_exception(video_tree.ref["title"], video_tree.ref["abstract"])
def smil_bitrates(video_url):
    bitrates = []
    video_data = connection.getURL(video_url)
    video_tree = BeautifulSoup(video_data, "html.parser")
    video_rtmp = video_tree.meta
    playpath_url = None
    lplaypath_url = None
    try:
        base_url = video_rtmp["base"]
    except:
        base_url = None
    if base_url is not None:
        video_url2 = video_tree.switch.find_all("video")
        for video_index in video_url2:
            bitrate = int(video_index["system-bitrate"])
            bitrates.append((int(bitrate) / 1024, bitrate))
    else:
        video_data = connection.getURL(video_url + "&manifest=m3u&Tracking=true&Embedded=true&formats=F4M,MPEG4")
        video_tree = BeautifulSoup(video_data, "html.parser")
        if video_tree.find("param", attrs={"name": "isException", "value": "true"}) is None:
            video_url2 = video_tree.body.seq.video
            video_url3 = video_url2["src"]
            bitrates = m3u_bitrates(video3_url)
        else:
            exception = True
    return bitrates, exception
Example #10
0
    def getPriceAndStops(self):
        soup = BeautifulSoup(self.html)
        resultlist = []

        maxstay = time.strptime(self.maxStay, "%H:%M") if self.maxStay != None else None

        # Iterate over all results
        for htmlDiv in soup.find_all(id=re.compile("outer-[0-9]*", re.I)):

            result = SearchResult()
            price = htmlDiv.find("span", {"class": "price"}).text.encode("utf-8").strip()
            price = float(re.sub(r"[^\w|\.]", "", price.replace(".", "").replace(",", ".")))

            # Filter Max Price
            if self.maxPrice != None and price >= self.maxPrice:
                break
            result.setPrice(price)

            staydurationResult = self.getStayDurations(htmlDiv, maxstay)

            # Filter StayDuration
            if staydurationResult:
                result.setStay(staydurationResult)
                resultlist.append(result)

        return resultlist
Example #11
0
def parse_page(page_html):
    """Parse a page's HTML and process the listings concurrently"""
    doc = Soup(page_html, "html.parser")
    listing_paths = (link["href"] for link in doc.find_all("a", {"class": "title"}))
    with ThreadPool(max_workers=8) as pool:
        for path in listing_paths:
            yield pool.submit(listing, path)
def getLinks(articleUrl):
    html = urlopen("http://en.wikipedia.org" + articleUrl)
    bsObj = BeautifulSoup(html, "html.parser")
    title = bsObj.find("h1").get_text()
    content = bsObj.find("div", {"id": "mw-content-text"}).find("p").get_text()
    store(title, content)
    return bsObj.find("div", {"id": "bodyContent"}).findAll("a", href=re.compile("^(/wiki/)((?!:).)*$"))
Example #13
0
 def __init__(self, login, password):
     """ Init: get authenticity token """
     with requests.session() as self.session:
         try:
             # ~ 1/ Get login token and authentify
             payload = {}
             log_soup = BeautifulSoup(self.session.get("https://127.0.0.1/users/login", verify=False).text)
             payload["utf8"] = log_soup.findAll("input", attrs={"name": "utf8"})[0].get("value")
             payload["authenticity_token"] = log_soup.findAll("input", attrs={"name": "authenticity_token"})[0].get(
                 "value"
             )
             if payload["authenticity_token"] == None:
                 raise requests.exceptions.RequestException("Bad catch of authenticity_token")
             payload["commit"] = "Login"
             payload["login[login]"] = login
             payload["login[password]"] = password
             # ~ 2/ Log in
             r = self.session.post("https://127.0.0.1/users/login", verify=False, data=payload)
             if r.status_code != 200:
                 raise requests.exceptions.RequestException("Bad login or password")
             # ~ Get token for host creation
             log_soup = BeautifulSoup(self.session.get("https://127.0.0.1/hosts/new", verify=False).text)
             self.authenticity_token = log_soup.findAll("input", attrs={"name": "authenticity_token"})[0].get(
                 "value"
             )
             if payload["authenticity_token"] == None:
                 raise requests.exceptions.RequestException("Bad catch of authenticity_token")
         except requests.exceptions.RequestException as e:
             print("Error connection Foreman to get a free ip")
             print(e)
             sys.exit(1)
     pass
Example #14
0
def getaqi(daerah):
    soup = BeautifulSoup(urllib2.urlopen("http://aqicn.org/city/indonesia/" + daerah))
    parse_data = soup.find_all("td", {"id": re.compile("^cur_")})
    data_clean = [tag.text for tag in soup.find_all("td")]
    if len(data_clean) <= 34:
        raise Exception("%s not exists on aqicn.org database!" % daerah)
    return data_clean
Example #15
0
def eztv_shows_by_letter(letter):
    import re
    import xbmc
    import xbmcgui
    from bs4 import BeautifulSoup
    from contextlib import nested, closing
    from itertools import izip, groupby
    from concurrent import futures
    from xbmctorrent.scrapers import ungenerate
    from xbmctorrent.utils import terminating, url_get, SafeDialogProgress
    from xbmctorrent import tvdb

    plugin.set_content("tvshows")

    with shelf("it.eztv.shows") as eztv_shows:
        if not eztv_shows:
            response = url_get("%s/showlist/" % BASE_URL, headers=HEADERS)
            soup = BeautifulSoup(response, "html5lib")
            nodes = soup.findAll("a", "thread_link")
            for node in nodes:
                show_id, show_named_id = node["href"].split("/")[2:4]
                show_name = node.text
                show_first_letter = show_name[0].lower()
                if re.match("\d+", show_first_letter):
                    show_first_letter = "0-9"
                eztv_shows.setdefault(show_first_letter, {}).update(
                    {show_id: {"id": show_id, "named_id": show_named_id, "name": node.text}}
                )

    shows_list = sorted(eztv_shows[letter.lower()].values(), key=lambda x: x["name"].lower())

    with closing(SafeDialogProgress(delay_close=0)) as dialog:
        dialog.create(plugin.name)
        dialog.update(percent=0, line1="Fetching serie information...", line2="", line3="")

        state = {"done": 0}

        def on_serie(future):
            data = future.result()
            state["done"] += 1
            dialog.update(percent=int(state["done"] * 100.0 / len(shows_list)), line2=data and data["seriesname"] or "")

        with futures.ThreadPoolExecutor(max_workers=5) as pool_tvdb:
            tvdb_list = [pool_tvdb.submit(tvdb.search, show["name"], True) for show in shows_list]
            [future.add_done_callback(on_serie) for future in tvdb_list]
            while not all(job.done() for job in tvdb_list):
                if dialog.iscanceled():
                    return
                xbmc.sleep(100)

    tvdb_list = [job.result() for job in tvdb_list]
    for i, (eztv_show, tvdb_show) in enumerate(izip(shows_list, tvdb_list)):
        if tvdb_show:
            item = tvdb.get_list_item(tvdb_show)
            item.update(
                {"path": plugin.url_for("eztv_get_show_seasons", show_id=eztv_show["id"], tvdb_id=tvdb_show["id"])}
            )
            yield item
        else:
            yield {"label": eztv_show["name"], "path": plugin.url_for("eztv_get_show_seasons", show_id=eztv_show["id"])}
Example #16
0
def profile_handler(doc, name, url, path):
    filename = os.path.join(path, name + ".html")
    employee = Employee(name=name, url=url)

    # 只保存名称和个人主页,个人简历文件另存当前目录
    soup = BeautifulSoup(doc, Config.SOUP_PARSER)
    divs = soup.find_all(name="div", attrs={"class": "main"}, limit=1)
    if not divs or len(divs) == 0:
        div = soup
    else:
        div = divs[0]

    if not os.path.exists(filename):
        with open(filename, "wb") as fp:
            content = div.prettify()
            fp.write(content)
            fp.close()

    # 使用纯文本方式处理
    lines = div.stripped_strings
    # text=div.get_text(strip=True)
    parser = ProfileParser(
        lines=lines, employee=employee, set_attr_hook=set_attr_hook, max_line=999, force_email=True, force_tel=False
    )
    return parser.parse()
 def cleanText(self, text, printableOnly=True):
     soup = BeautifulSoup(text, "html.parser")
     text = soup.get_text()
     text = re.sub("( +|\n|\r|\t|\0|\x0b|\xa0|\xbb|\xab)+", " ", text).strip()
     if printableOnly:
         return filter(lambda x: x in string.printable, text)
     return text
Example #18
0
def get_program_2012(content):
    """
    author <div> class is different form get_program_since_2013()
    :param content:
    :return:
    """
    talks = []
    soup = BeautifulSoup(content, "html.parser")
    # print content
    all_talks = soup.find_all("div", {"class": "node-paper"})
    # print "number of talks", len(all_talks)
    for talk in all_talks:
        title = talk.find("h2", {"class": "node-title"}).find("a").text.strip()
        div_content = talk.find("div", {"class": "node-content"})
        if div_content is None:
            talks.append("Title:" + title)
            continue

        try:
            talk_type = (
                div_content.find("div", {"class": "field-name-field-presentation-label"})
                .find("div", {"class": "even"})
                .text.strip()
            )
        except:
            talk_type = "None"

        # speaker of this talk
        try:
            speakers = (
                div_content.find("div", {"class": "field-name-field-paper-people-text"})
                .find("div", {"class": "even"})
                .text.strip()
            )
        except:
            speakers = "None"

        try:
            description = div_content.find("div", {"class": "field-name-field-paper-description-long"}).text.strip()
        except:
            description = "None"

        # what resource they provide for this talk
        avaiable_res = []
        if div_content.find("div", {"class": "pdf"}):
            avaiable_res.append("pdf")
        if div_content.find("div", {"class": "slides"}):
            avaiable_res.append("slides")
        if div_content.find("div", {"class": "vedio"}):
            avaiable_res.append("vedio")
        if div_content.find("div", {"class": "audio"}):
            avaiable_res.append("audio")
        if len(avaiable_res) == 0:
            avaiable_res.append("None")
        talks.append(
            "Title:{Title}\nType:{Type}\nSpeakers:{Speakers}\nDescription:{Description}\nResource:{Resource}\n".format(
                Title=title, Type=talk_type, Speakers=speakers, Description=description, Resource="/".join(avaiable_res)
            )
        )
    return talks
Example #19
0
    def postComment(self, source, comment):
        if not self.loggedIn:
            raise LoginRequiredException("Not signed in!")

        f = open(readerutils.COOKIE, "rb")
        cookies = requests.utils.cookiejar_from_dict(pickle.load(f))
        f.close()

        comment = cgi.escape(comment)
        try:
            r = self.session.get(readerutils.hnUrl("item?id=" + source))
        except:
            print("error getting page")
            return False
        soup = BeautifulSoup(r.content)
        hmac = soup.find("input", {"name": "hmac"})["value"]
        endpoint = soup.find("form", {"method": "post"})["action"]
        params = {"hmac": hmac, "text": comment, "parent": source, "goto": "item?id=" + source}

        try:
            r = self.session.post(readerutils.hnUrl(endpoint), data=params)
        except Exception:
            print(Exception)
            return False
        if r.url == ("https://news.ycombinator.com/item?id=" + source):
            return True
        else:
            print(r.url)
        return False
Example #20
0
def get_program_between_2011_and_2008(year, content):
    """
    get the talk title <p class="techdesc"></p>, which contains title and speakers information

    but talk description is also warped in <p class="techdesc"></p>, but the content is only raw text,
    we can use this characteristics to filter the talk description

    and in 2008, talk mp3 resource is also warped in <p class="techdesc"></p>

    :param content:
    :return:
    """
    soup = BeautifulSoup(content, "html.parser")
    techdesc = soup.find_all("p", {"class": "techdesc"})
    talks = []
    for talk in techdesc:
        # filter mp3 paragraph
        if "Listen in MP3 format" in talk.text:
            continue
        # filter description paragraph
        if talk.find("b") is None or talk.find("i") is None:
            continue
        title = talk.find("b").text
        if year == 2009 and ("p.m." in title or "a.m." in title):
            title = talk.find_all("b")[1].text
        corp = "/".join([a.text for a in talk.find_all("i")])
        talks.append("Title:{Title}\nCorporation:{Corporation}\n".format(Title=title, Corporation=corp))
    return talks
Example #21
0
    def get_hot(self, url="http://m.qiushibaike.com/hot"):
        self.url = url
        html = self.get_html()
        soup = BeautifulSoup(html, "lxml")
        article_tag_list = soup.find_all("div", class_="article block untagged mb15")
        res_list = []
        try:
            for article_tag in article_tag_list:
                author_a_tag = article_tag.find("div", class_="author")
                if author_a_tag:
                    author_tag = author_a_tag.find("a")
                    author = author_tag.text if author_tag else ""
                else:
                    author = ""
                content_tag = article_tag.find("div", class_="content")
                content = content_tag.text if content_tag else ""
                thumb_tag = article_tag.find("div", class_="thumb")
                img = thumb_tag.find("img").get("src") if thumb_tag else ""

                d = {"content": content, "img": img, "author": author}
                res_list.append(d)
        except:
            traceback.print_exc()

        return res_list
Example #22
0
    def _doSearch(self, searchString, show=None, season=None, french=None):

        if not self.login_done:
            self._doLogin(sickbeard.T411_USERNAME, sickbeard.T411_PASSWORD)

        results = []
        searchUrl = self.url + "/torrents/search/?" + searchString.replace("!", "")
        logger.log(u"Search string: " + searchUrl, logger.DEBUG)

        r = self.opener.open(searchUrl)
        soup = BeautifulSoup(r, "html.parser")
        resultsTable = soup.find("table", {"class": "results"})
        if resultsTable:
            rows = resultsTable.find("tbody").findAll("tr")

            for row in rows:
                link = row.find("a", title=True)
                title = link["title"]
                id = row.find_all("td")[2].find_all("a")[0]["href"][1:].replace("torrents/nfo/?id=", "")
                downloadURL = "http://www.t411.in/torrents/download/?id=%s" % id

                quality = Quality.nameQuality(title)
                if quality == Quality.UNKNOWN and title:
                    if "720p" not in title.lower() and "1080p" not in title.lower():
                        quality = Quality.SDTV
                if show and french == None:
                    results.append(
                        T411SearchResult(self.opener, link["title"], downloadURL, quality, str(show.audio_lang))
                    )
                elif show and french:
                    results.append(T411SearchResult(self.opener, link["title"], downloadURL, quality, "fr"))
                else:
                    results.append(T411SearchResult(self.opener, link["title"], downloadURL, quality))

        return results
Example #23
0
def menu():
    menuData = cache.get("menu")
    if menuData is None:
        html = session.get("http://teamportal/sites/admin/Culinary/Lists/Menu Items/Simplified.aspx").text
        soup = BeautifulSoup(html)
        today = datetime.today()
        today_str = " : {}/{}/{}".format(today.month, today.day, today.year)
        today_element = soup.find_all(text=today_str, limit=1)
        menu_tbody = today_element[0].parent.parent.parent.next_sibling
        menuData = {}
        for tr in menu_tbody.next_siblings:
            if not tr.contents:
                break
            if not tr.contents[0].string:
                continue
            category = tr.contents[0].string
            name = tr.contents[1].string
            price = tr.contents[2].string
            if category in menuData:
                menuData[category].append((name, price))
            else:
                menuData[category] = [(name, price)]
        menuData = json.dumps(menuData)
        cache.set("menu", menuData, timeout=14400)  # cache will last 4 hours
    return menuData
Example #24
0
    def collect_links(self, url, relative=True, name=True, **kwargs):
        """Collects links for given page URL.

        If name is True, then links will be collected for whole page.
        Use name argument to pass tag name of element.
        Use kwargs to pass id of element or its class name.
        Because 'class' is a reserved keyword in Python,
        you need to pass class as: **{'class': 'container row'}.

        Read more about searching elements with BeautifulSoup.
        See: http://goo.gl/85BuZ
        """

        # support for relative URLs
        if relative:
            url = "%s%s" % (self.base_url, url)

        # get the page and verify status code is OK
        r = requests.get(url)
        Assert.true(r.status_code == requests.codes.ok, u"{0.url} returned: {0.status_code} {0.reason}".format(r))

        # collect links
        parsed_html = BeautifulSoup(r.text)
        urls = [anchor["href"] for anchor in parsed_html.find(name, attrs=kwargs).findAll("a")]

        # prepend base_url to relative links
        return map(lambda u: u if u.startswith("http") else "%s%s" % (self.base_url, u), urls)
Example #25
0
def UDD(keyword):
    if str(keyword) is None:
        keyword = "pwned"
    if str(keyword) == "pag":
        keyword = "pwned"

    keyword = keyword.replace(" ", "+")
    url = "http://www.urbandictionary.com/define.php?term=" + str(keyword)
    print(url)  # temp log
    data = urllib2.urlopen(url, timeout=3).read()
    bs = BeautifulSoup(data)

    body = bs.find("td", {"id": "middle_column"})
    word = body.find("td", {"class": "word"})

    if word is None:
        s.send("PRIVMSG %s :No definition found.\r\n" % LOBBY)
        return
    else:
        word = word.find("span").get_text()
        definition = body.find("div", {"class": "definition"}).get_text()

        word = word.strip()
        definition = definition.strip()
        s.send("PRIVMSG %s :%s : %s\r\n" % (LOBBY, word, definition))
 def get_domain_name(self, url, headers, count):
     ctest = 0
     while True:
         if ctest == count:
             return False
         else:
             if ctest != 0:
                 ctest += 1
                 print "Retry %s times" % ctest
             else:
                 ctest += 1
             try:
                 domain_data = requests.get(
                     url + "/console.portal?_nfpb=true&_pageLabel=CoreServerServerTablePage",
                     headers=headers,
                     cookies=self.cookies,
                     timeout=10,
                 )
                 domain_soup = BeautifulSoup(domain_data.text)
                 for name in domain_soup.find_all("a"):
                     if name.get("href") != None:
                         name = re.search(r"\:Name=[\w]*\,Type\=Domain", urllib.unquote(name.get("href")))
                         if name:
                             self.domain_name = name.group()
                             break
                 self.domain_name = re.search(r"Name=[\w]*", self.domain_name)
                 self.domain_name = self.domain_name.group()[5:]
                 print "DomainName:%s\r\n" % self.domain_name
                 return True
             except:
                 print "get_domain_name Error!\n"
                 if ctest == 3:
                     f = open("error.txt", "a")
                     f.write("get_domain_name Error! " + url + "\n")
                     f.close()
Example #27
0
def scrap_items():
    for itemlist in ITEMLIST:
        soup = BS(urllib2.urlopen("".join([LOLWIKI, itemlist])).read())
        item_table = soup.find("table", class_="stdt sortable")

        for tr in item_table.find_all("tr"):
            tds = tr.find_all("td")
            if len(tds) < 1:
                continue
            if tr.find("p") == None:
                continue

            item_name = tr.find("p").text.strip()
            item_url = tr.find("img")["src"]

            if item_url.split(":")[0] == "data":
                item_url = tr.find("img")["data-src"]

            if not HOOKED:
                continue

                # store item in database
            d_item = Item()
            d_item.name = item_name

            t_img = NamedTemporaryFile(delete=True)
            t_img.write(urllib2.urlopen(item_url).read())
            t_img.flush()
            t_img.name = ".".join([item_name, "jpg"])

            d_item.picture = File(t_img)
            d_item.save()
Example #28
0
 def getDownloadData(self):
     fl = self.downloadList()
     if fl.geturl() != self.course["lectures_url"]:
         self.login(fl.geturl())
         fl = self.downloadList()
         if fl.geturl() != self.course["lectures_url"]:
             raise Exception("File list could not be retrieved successfully. Got url: " + fl.geturl())
     html = BeautifulSoup(fl.read())
     topics = html.find_all("div", "course-item-list-header")
     contents = html.find_all("ul", "course-item-list-section-list")
     alldict = []
     for itr in range(0, len(contents)):
         title = topics[itr].find("h3").contents[1].strip().lower()
         headingsdict = []
         for content in contents[itr].find_all("li"):
             links = content.find_all("a")
             heading = links[0].contents[0].strip().lower()
             linksdict = dict()
             linktypes = ["txt", "srt", "pdf", "pptx", "mp4", "java", "sml", "zip"]
             for link in links[1:]:
                 for linktype in linktypes:
                     if linktype in link["href"]:
                         linksdict[linktype] = link["href"]
                         break
             headingsdict.append({"title": heading, "values": linksdict})
         alldict.append({"title": title, "values": headingsdict})
     return alldict
def pingback_ping(source, target):
    """pingback.ping(sourceURI, targetURI) => 'Pingback message'

    Notifies the server that a link has been added to sourceURI,
    pointing to targetURI.

    See: http://hixie.ch/specs/pingback/pingback-1.0"""
    try:
        if source == target:
            return UNDEFINED_ERROR

        site = Site.objects.get_current()
        try:
            document = "".join(map(lambda byte_line: byte_line.decode("utf-8"), urlopen(source).readlines()))
        except (HTTPError, URLError):
            return SOURCE_DOES_NOT_EXIST

        if not target in document:
            return SOURCE_DOES_NOT_LINK

        scheme, netloc, path, query, fragment = urlsplit(target)
        if netloc != site.domain:
            return TARGET_DOES_NOT_EXIST

        try:
            view, args, kwargs = resolve(path)
        except Resolver404:
            return TARGET_DOES_NOT_EXIST

        try:
            entry = Entry.published.get(
                slug=kwargs["slug"],
                creation_date__year=kwargs["year"],
                creation_date__month=kwargs["month"],
                creation_date__day=kwargs["day"],
            )
            if not entry.pingbacks_are_open:
                return TARGET_IS_NOT_PINGABLE
        except (KeyError, Entry.DoesNotExist):
            return TARGET_IS_NOT_PINGABLE

        soup = BeautifulSoup(document)
        title = six.text_type(soup.find("title"))
        title = title and strip_tags(title) or _("No title")
        description = generate_pingback_content(soup, target, PINGBACK_CONTENT_LENGTH)

        pingback, created = comments.get_model().objects.get_or_create(
            content_type=ContentType.objects.get_for_model(Entry),
            object_pk=entry.pk,
            user_url=source,
            site=site,
            defaults={"comment": description, "user_name": title, "submit_date": timezone.now()},
        )
        if created:
            pingback.flags.create(user=get_user_flagger(), flag=PINGBACK)
            pingback_was_posted.send(pingback.__class__, pingback=pingback, entry=entry)
            return "Pingback from %s to %s registered." % (source, target)
        return PINGBACK_ALREADY_REGISTERED
    except:
        return UNDEFINED_ERROR
Example #30
0
def get_url_thumbnail(url):
    """Save url's image, if does not exist already locally."""
    # TODO optimization: get chunks of data until find the og:image
    # same to the script for suggesting the title.
    try:
        response = requests.get(url)
    except OSError:  # Host might now allow extenrnal requests
        return None
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        img_has_link = soup.find("meta", {"property": "og:image"})
        img_link = None
        if img_has_link:
            img_link = img_has_link.get("content")
        if img_link is not None:
            img_name = basename(img_link)
            destination = current_app.static_folder + "/img/" + img_name
            if not isfile(destination):
                img_response = requests.get(img_link, stream=True)
                if img_response.status_code == 200:
                    with open(destination, "wb") as fob:
                        for chunk in img_response:
                            fob.write(chunk)
                else:
                    # TODO if not accessible i should re-try to download
                    return None
            return img_name
    return None