コード例 #1
0
ファイル: utils.py プロジェクト: tomersa/OpenGraphScraper
def og_scrape_worker(canonical_url):
    # Get open graph data
    try:
        og_data = OpenGraph(url="http://%s" % canonical_url)
        if og_data.is_valid():
            return add_data_to_url_in_db(canonical_url, og_data)
        else:
            update_to_error_url_status_in_db(canonical_url)

    except Exception as e:
        print "Error occurred on scrap worker thread: %s" % e
        update_to_error_url_status_in_db(canonical_url)
コード例 #2
0
ファイル: core.py プロジェクト: Makeystreet/makeystreet
 def __fetch_open_graph_details(self):
     try:
         og = OpenGraph(url=self.url)
         if og.is_valid():
             _json = json.loads(og.to_json())
             self.title = self.title or _json['title']
             self.description = self.description or _json['description']
             self.image_url = self.image_url or _json['image']
     except Exception as e:
         print(e.__doc__)
         print(e.message)
     return
コード例 #3
0
ファイル: core.py プロジェクト: Makeystreet/makeystreet
    def __fetch_open_graph_details(self):
        try:
            og = OpenGraph(url=self.url)
            if og.is_valid():
                _json = json.loads(og.to_json())
                self.name = _json['title']

                img = Image(user=self.user, large_url=_json['image'],
                            added_time=timezone.now())
                img.save()
                self.image = img
        except Exception as e:
            print(e.__doc__)
            print(e.message)
        return
コード例 #4
0
def get_url_data(url):
    try:
        raw = urlopen(url)

        if raw is None:
            return None

        html = raw.read()
        data = OpenGraph(html=html, scrape=True)

        if data.is_valid():
            return dict(data)
        else:
            return None

    except (HTTPError, URLError, AttributeError):
        return None
コード例 #5
0
def _parse_open_graph(article):
    og = OpenGraph(html=article.html)
    if not og.is_valid():
        return

    if og["type"] != "article":
        raise NotImplementedError("Cannot parse a OG type: %s" % og["type"])

    og.setdefault(None)

    article.title = article.title or og.get("title")
    article.summary = article.summary or og.get("description")
    article.images = article.images or [og.get("image")]
    article.meta_lang = article.meta_lang or og.get("locale")
    article.keywords = article.keywords or og.get("tag")
    article.categories = article.categories or [og.get("category")]
    article.authors = article.authors or [og.get("author")]
    article.pub_date = article.pub_date or og.get("modified_date")