Python OpenGraphの例、opengraph.OpenGraph Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test.py プロジェクト: natanocr/opengraph

 def test_scrape(self):
     og = opengraph.OpenGraph(url='http://graingert.co.uk/', required_attrs=("description",), scrape=True)
     self.assertTrue(og.is_valid())
     self.assertTrue(og.items["description"])
     
     og = opengraph.OpenGraph(url='http://www.crummy.com/software/BeautifulSoup/bs3/documentation.html', required_attrs=("description",), scrape=True)
     self.assertEqual(og.items["description"], "Beautiful Soup Documentation")

コード例 #2

0

ファイルを表示

ファイル: unfurl.py プロジェクト: drivet/indieweb-utils

def fetch_og_result(url):
    result = opengraph.OpenGraph(url=url)
    if result:
        if 'title' in result or 'description' in result:
            fetch_image_dimensions(result)
            return result
    return {}

コード例 #3

0

ファイルを表示

def fetch_og_metadata(user_agent, links):
    res = []
    for l in links:
        check_url(l)

        # Remove any AP actor from the list
        try:
            p = lookup(l)
            if p.has_type(ap.ACTOR_TYPES):
                continue
        except NotAnActivityError:
            pass

        r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15)
        r.raise_for_status()
        if not r.headers.get("content-type").startswith("text/html"):
            logger.debug(f"skipping {l}")
            continue

        r.encoding = "UTF-8"
        html = r.text
        try:
            data = dict(opengraph.OpenGraph(html=html))
        except Exception:
            logger.exception(f"failed to parse {l}")
            continue
        if data.get("url"):
            res.append(data)

    return res

コード例 #4

0

ファイルを表示

ファイル: test.py プロジェクト: irk3n-dev/opengraph-1

 def test_no_json(self):
     if getattr(opengraph, 'import_json', None) is not None:  # python2
         opengraph.import_json = False
     else:  # python3
         opengraph.opengraph.import_json = False
     og = opengraph.OpenGraph(url='http://www.ogp.me/')
     self.assertEqual(og.to_json(), "{'error':'there isn't json module'}")

コード例 #5

0

ファイルを表示

def parse_url(url):
    ogp_result = opengraph.OpenGraph(url=url)

    if ogp_result.is_valid():
        return parse_valid_url(url, ogp_result)
    else:
        return parse_non_valid_url(url)

コード例 #6

0

ファイルを表示

ファイル: tasks.py プロジェクト: andrzejkrecicki/sm8

def get_opengraph(self, post, url):
    try:
        og = opengraph.OpenGraph(url=url)
        if og.is_valid():
            post.opengraph = og
            post.save()
    except (URLError, HTTPError), e:
        raise self.retry(exc=e)

コード例 #7

0

ファイルを表示

def parse_html(html):
    ogp_result = opengraph.OpenGraph(html=html)

    if ogp_result.is_valid() is True:
        return parse_valid_html(html, ogp_result)

    else:
        return parse_non_valid_html(html)

コード例 #8

0

ファイルを表示

ファイル: views.py プロジェクト: meletakis/collato

def rendered_wall_posts( wall_posts ):
	for wall_post in wall_posts:
		title = ''
		desc = ''
		site_image = ''
		article_title = ''
		urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', wall_post.data['post_content'])
		for url in urls: 
			parse_obj = urlparse.urlparse(url)
			site = parse_obj.netloc
			path = parse_obj.path
			conn = httplib.HTTPConnection(site)
			conn.request('HEAD',path)
			response = conn.getresponse()
			conn.close()
			ctype = response.getheader('Content-Type')
			if response.status < 400 and ctype.startswith('image'):
				wall_post.data['post_content'] = wall_post.data['post_content']+"<br/><a href='"+url+"' target='_blank'><img width=300 src='"+url+"' target = '_blank'/></a>"
			else:
				og = opengraph.OpenGraph(url)
				if not len(og.items()) == 2:
					for x,y in og.items():
						if x == 'type' and y == 'video':
							for k,l in og.items():
								if k == 'site_name' and l == 'YouTube':
							
									url_data = urlparse.urlparse(url)
									query = urlparse.parse_qs(url_data.query)
									video = query["v"][0]
									wall_post.data['post_content'] = wall_post.data['post_content'].replace(url,"")+"<br/><iframe width='300' height='200' src='//www.youtube.com/embed/"+video+"' frameborder='0' allowfullscreen></iframe>"
								elif k == 'site_name' and l == 'Vimeo':
									url_data = urlparse.urlparse(url)
									video = url_data.path
									wall_post.data['post_content'] = wall_post.data['post_content'].replace(url,"")+"<br/><iframe src='//player.vimeo.com/video"+video+"' width='300' height='200' frameborder='0' webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe> <p></p>"
						elif x == 'type' and y == 'article':
							for k,l in og.items():
								if k == 'title':
									article_title = l
								elif k == 'site_name':
									title = l
								elif k=='description':
									desc = l
								elif k=='image':
									site_image = l
							wall_post.data['post_content'] = wall_post.data['post_content'] +"<br/><table><tr><td><img width='50' src='"+site_image+"'</td><td><a href='"+url+"' target='_blank'/>"+article_title+"</a><br/>"+title+"</td></td></table>"
						elif x=='type':
							for k,l in og.items():
								if k == 'site_name':
									title = l
								elif k=='description':
									desc = l
								elif k=='image':
									site_image = l
							wall_post.data['post_content'] = wall_post.data['post_content'].replace(url, "<table><tr><td><img width='50' src='"+site_image+"'</td><td><a href='"+url+"' target='_blank'/>"+title+"</a><br/>"+desc+"</td></td></table>")
				else:
					wall_post.data['post_content'] = wall_post.data['post_content'].replace(url, "<a href='"+url+"' target='_blank'>"+url+"</a>")	
	return wall_posts

コード例 #9

0

ファイルを表示

 def otherThumb(self, url):
     try:
         site = opengraph.OpenGraph(url=url)
     except:
         return 'failed'
     if site.is_valid():
         image = site.image
     else:
         return 'failed'
     return image

コード例 #10

0

ファイルを表示

ファイル: utils.py プロジェクト: ImgBotApp/website-18

def extract_ograph_title(text):
    text_without_hashtag = ' '.join(text.split(' ')[1:])
    url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]' \
                  + '|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
    urls = re.findall(url_pattern, text_without_hashtag)
    if urls:
        content = opengraph.OpenGraph(url=urls[0])
        title = content.get('title', text_without_hashtag)
        return urls[0], title.encode('utf-8')
    return None, text_without_hashtag.encode('utf-8')

コード例 #11

0

ファイルを表示

def fetch_and_extract(url):
    data = opengraph.OpenGraph(url=url)
    p = Page()
    p.title = data.get("title", None)
    p.description = data.get("description", None)
    p.canon_url = data.get("url", None)
    p.image_url = data.get("image", None)
    p.ogp_type = data.get("type", None)
    p.fetch_url = data.get("_url", None)
    p.site_name = data.get("site_name", None)
    return p

コード例 #12

0

ファイルを表示

ファイル: minimioche.py プロジェクト: dubtran/upchoose

def getProductInfo(product_url):
    '''gathering data from Open Graph'''

    prod_url = minimioche_url + product_url
    #     print prod_url
    prod_site = opengraph.OpenGraph(url=prod_url)
    #     product_data = dict.fromkeys(product_keys)
    if prod_site.is_valid():
        product_data = json.loads(prod_site.to_json().encode('utf-8'))
        product_data['colors'] = getColors(prod_url)
        return product_data
    else:
        print prod_url, ' didnt work '
        return 'na'

コード例 #13

0

ファイルを表示

ファイル: serializers.py プロジェクト: snguyenthanh/kidsbook-backend

 def create(self, data):
     try:
         group = Group.objects.get(id=self.context['view'].kwargs.get("pk"))
     except Exception:
         raise serializers.ValidationError({'error': 'Group Not found'})
     current_user = self.context['request'].user
     data = self.context['request'].data
     return Post.objects.create(ogp=opengraph.OpenGraph(
         url=data["link"]).__str__() if 'link' in data else "",
                                link=data.get("link", None),
                                picture=data.get("picture", None),
                                content=data["content"],
                                group=group,
                                creator=current_user)

コード例 #14

0

ファイルを表示

ファイル: twyt.py プロジェクト: kwiff/8DZoneUploader

    async def unwatch(self, ctx, url: str):
        """Remove a YouTube/Twitch channel from the watch queue.

		XXX Only works with YouTube channels right now."""

        if not url:
            await self.bot.responses.failure(
                title="No URL Specified", message="You need to give me a URL!")
            return

        found = False

        og = opengraph.OpenGraph(url=url)
        channel_url = og.get('url', '')
        if channel_url.startswith("https://www.youtube.com/channel/"):
            channel_id = channel_url.replace(
                "https://www.youtube.com/channel/", "")

            for i in range(len(self.checklist)):
                if self.checklist[i].channel_id == channel_id:
                    for j in range(len(self.checklist[i].discord_channels)):
                        if self.checklist[i].discord_channels[
                                j].channel == ctx.message.channel.id:
                            self.checklist[i].discord_channels.pop(j)
                            found = True
                    if len(self.checklist[i].discord_channels) == 0:
                        self.checklist.pop(i)
                    break
        elif False:
            pass
        else:
            await self.bot.responses.failure(
                title="Not a YouTube/Twitch Channel",
                message=
                "The URL you have given me is not a YouTube/Twitch channel!")
            return

        if found:
            await self.bot.responses.basic(
                message="This channel has been removed!")
        else:
            await self.bot.responses.failure(
                title="Channel Never Watched",
                message=
                "I was never watching this YouTube/Twitch channel in this Discord channel!"
            )

        self._save()

コード例 #15

0

ファイルを表示

ファイル: views.py プロジェクト: h1-the-swan/science_history_institute_chp_app

def media(entity_id=None):
    service_url = current_app.hypothesis_client.service
    # hypothesis_api_url = "https://hypothes.is/api/"
    hypothesis_api_url = service_url + '/api/'
    hypothesis_username = "******".format(
        username=current_user.username,
        authority=os.environ.get('HYPOTHESIS_AUTHORITY'))
    if entity_id is None:
        media_base_url = url_for("main.media", _external=True)
        entities_media = Entity.query.filter(
            Entity.description.like('media:%')).all()
        return render_template('main/media.html',
                               data=entities_media,
                               hypothesis_api_url=hypothesis_api_url,
                               hypothesis_username=hypothesis_username,
                               media_base_url=media_base_url)

    if sys.version_info[0] < 3:
        import opengraph
    else:
        import opengraph_py3 as opengraph
    entity_meta = EntityMeta.query \
                    .filter_by(entity_id=entity_id) \
                    .filter(EntityMeta.type_.like("opengraph_url")) \
                    .all()
    if not entity_meta:
        data = None
    else:
        entity_meta = entity_meta[-1]
        url = entity_meta.description
        data = opengraph.OpenGraph(url=url)

    hypothesis_grant_token = current_app.hypothesis_client.grant_token(
        username=current_user.username)

    keyword = request.args.get('mark', None)
    return render_template(
        'main/display_media.html',
        data=data,
        entity_meta=entity_meta,
        hypothesis_api_url=hypothesis_api_url,
        hypothesis_grant_token=hypothesis_grant_token.decode(),
        service_url=service_url)

コード例 #16

0

ファイルを表示

 def create(self, data):
     try:
         group = Group.objects.get(id=self.context['view'].kwargs.get("pk"))
     except Exception:
         raise PermissionError('Group Not found')
     current_user = self.context['request'].user
     data = self.context['request'].data
     return Post.objects.create(
         ogp=opengraph.OpenGraph(
             url=data["link"]).__str__() if 'link' in data else "",
         link=data.get("link", None),
         picture=data.get("picture", None),
         content=data["content"],
         group=group,
         creator=current_user,
         is_sponsored=(data.get("is_sponsored",
                                'false').strip().lower() == 'true'),
         is_announcement=(data.get("is_announcement",
                                   'false').strip().lower() == 'true'))

コード例 #17

0

ファイルを表示

ファイル: app1.py プロジェクト: RAHUL04/ogparser

def results():
    try:
        want1 = request.form['keyword']

        header = "http://"
        url = header + want1
        print url
        f = urlopen(url).read()
        tree = etree.HTML(f)
        general = tree.xpath("//meta[@name='description']")[0].get("content")
        meta = opengraph.OpenGraph(url)
        print meta
        return render_template("results.html", data=meta, gog=general)
    except IOError:
        meta = {}
        general = "INvalid Url"
        return render_template("results.html", data=meta, gog=general)
    except Exception:
        meta = {}
        general = "these sites dont have a meta tags description they are THE BOSS!!!!!"
        return render_template("results.html", data=meta, gog=general)

コード例 #18

0

ファイルを表示

ファイル: twyt.py プロジェクト: kwiff/8DZoneUploader

    async def watch(self,
                    ctx,
                    url: str,
                    message="%(title)s by %(channelTitle)s just published!"):
        """Add a YouTube/Twitch channel to watch for new uploads.

		XXX Only works with YouTube channels right now.
		XXX Sort of works with livestreams on YouTube.  The bot announces new video uploads, and YouTube treats livestreams like videos.
		XXX Doesn't allow you to change the Discord channel."""

        if not url:
            await self.bot.responses.failure(
                title="No URL Specified", message="You need to give me a URL!")
            return

        og = opengraph.OpenGraph(url=url)
        channel_url = og.get('url', '')
        if channel_url.startswith("https://www.youtube.com/channel/"):
            self.checklist.append(
                YouTubeItem(
                    self.youtube,
                    channel_url.replace("https://www.youtube.com/channel/",
                                        ""),
                    DiscordChannel(self.bot, ctx.message.channel.id,
                                   message + "  %(url)s")))
            await self.bot.responses.basic(
                message="This YouTube channel has been added!")
        elif False:
            pass
        else:
            await self.bot.responses.failure(
                title="Not a YouTube/Twitch Channel",
                message=
                "The URL you have given me is not a YouTube/Twitch channel!")
            return

        self._save()

コード例 #19

0

ファイルを表示

ファイル: opengraph.py プロジェクト: dsblank/microblog.pub

def fetch_og_metadata(user_agent, col, remote_id):
    doc = col.find_one({'remote_id': remote_id})
    if not doc:
        raise ValueError
    note = doc['activity']['object']
    print(note)
    links = links_from_note(note)
    if not links:
        return 0
    # FIXME(tsileo): set the user agent by giving HTML directly to OpenGraph
    htmls = []
    for l in links:
        check_url(l)
        r = requests.get(l, headers={'User-Agent': user_agent})
        r.raise_for_status()
        htmls.append(r.text)
    links_og_metadata = [
        dict(opengraph.OpenGraph(html=html)) for html in htmls
    ]
    col.update_one({'remote_id': remote_id},
                   {'$set': {
                       'meta.og_metadata': links_og_metadata
                   }})
    return len(links)

コード例 #20

0

ファイルを表示

ファイル: test.py プロジェクト: natanocr/opengraph

 def test_is_not_valid(self):
     og = opengraph.OpenGraph(url='http://vdubmexico.com')
     self.assertFalse(og.is_valid())

コード例 #21

0

ファイルを表示

ファイル: test.py プロジェクト: natanocr/opengraph

 def test_required(self):
     og = opengraph.OpenGraph(url='http://grooveshark.com', required_attrs=("description",), scrape=True)
     self.assertTrue(og.is_valid())

コード例 #22

0

ファイルを表示

ファイル: test.py プロジェクト: natanocr/opengraph

 def test_to_html(self):
     og = opengraph.OpenGraph(html=HTML)
     self.assertTrue(og.to_html())

コード例 #23

0

ファイルを表示

ファイル: test.py プロジェクト: natanocr/opengraph

 def test_is_valid(self):
     og = opengraph.OpenGraph(url='http://grooveshark.com')
     self.assertTrue(og.is_valid())

コード例 #24

0

ファイルを表示

ファイル: test.py プロジェクト: natanocr/opengraph

 def test_isinstace(self):
     data = opengraph.OpenGraph()
     self.assertTrue(isinstance(data,opengraph.OpenGraph))

コード例 #25

0

ファイルを表示

ファイル: test.py プロジェクト: natanocr/opengraph

 def test_url(self):
     data = opengraph.OpenGraph(url='http://vimeo.com/896837')
     self.assertEqual(data.items['url'], 'http://vimeo.com/896837')

コード例 #26

0

ファイルを表示

ファイル: controller.py プロジェクト: oranmoshe/scrap_project

def scrap(url_):
    print('work!' + url_)
    og = opengraph.OpenGraph(url=url_)
    db.update(url_, og)
    print(og)

コード例 #27

0

ファイルを表示

 def test_no_json(self):
     opengraph.import_json = False
     og = opengraph.OpenGraph(url='http://grooveshark.com')
     self.assertEqual(og.to_json(), "{'error':'there isn't json module'}")

コード例 #28

0

ファイルを表示

ファイル: opengraph.py プロジェクト: sorpaas/microblogpub

def fetch_og_metadata(user_agent, links):
    res = []
    for l in links:
        # Try to skip media early
        mimetype, _ = mimetypes.guess_type(l)
        if mimetype and mimetype.split("/")[0] in ["image", "video", "audio"]:
            logger.info(f"skipping media link {l}")
            continue

        check_url(l)

        # Remove any AP objects
        try:
            lookup(l)
            continue
        except NotAnActivityError:
            pass
        except Exception:
            logger.exception(
                f"skipping {l} because of issues during AP lookup")
            continue

        try:
            h = requests.head(l,
                              headers={"User-Agent": user_agent},
                              timeout=3,
                              allow_redirects=True)
            h.raise_for_status()
        except requests.HTTPError as http_err:
            logger.debug(
                f"failed to HEAD {l}, got a {http_err.response.status_code}: {http_err.response.text}"
            )
            continue
        except requests.RequestException as err:
            logger.debug(f"failed to HEAD {l}: {err!r}")
            continue

        if h.headers.get("content-type") and not h.headers.get(
                "content-type").startswith("text/html"):
            logger.debug(f"skipping {l} for bad content type")
            continue

        try:
            r = requests.get(l,
                             headers={"User-Agent": user_agent},
                             timeout=5,
                             allow_redirects=True)
            r.raise_for_status()
        except requests.HTTPError as http_err:
            logger.debug(
                f"failed to GET {l}, got a {http_err.response.status_code}: {http_err.response.text}"
            )
            continue
        except requests.RequestException as err:
            logger.debug(f"failed to GET {l}: {err!r}")
            continue

        # FIXME(tsileo): check mimetype via the URL too (like we do for images)
        if not r.headers.get("content-type") or not r.headers.get(
                "content-type").startswith("text/html"):
            continue

        r.encoding = "UTF-8"
        html = r.text
        try:
            data = dict(opengraph.OpenGraph(html=html))
        except Exception:
            logger.exception(f"failed to parse {l}")
            continue

        # Keep track of the fetched URL as some crappy websites use relative URLs everywhere
        data["_input_url"] = l
        u = urlparse(l)

        # If it's a relative URL, build the absolute version
        if "image" in data and data["image"].startswith("/"):
            data["image"] = u._replace(path=data["image"],
                                       params="",
                                       query="",
                                       fragment="").geturl()

        if "url" in data and data["url"].startswith("/"):
            data["url"] = u._replace(path=data["url"],
                                     params="",
                                     query="",
                                     fragment="").geturl()

        if data.get("url"):
            res.append(data)

    return res

コード例 #29

0

ファイルを表示

ファイル: test.py プロジェクト: natanocr/opengraph

 def test_absolute(self):
     og = opengraph.OpenGraph(url='http://www.crummy.com/software/BeautifulSoup/bs3/documentation.html', required_attrs=("image",), scrape=True)
     self.assertEqual(og.items["image"], "http://www.crummy.com/software/BeautifulSoup/bs3/6.1.jpg")

コード例 #30

0

ファイルを表示

 def test_to_json(self):
     og = opengraph.OpenGraph(
         url='http://www.youtube.com/watch?v=XAyNT2bTFuI')
     self.assertTrue(og.to_json())
     self.assertTrue(isinstance(og.to_json(), str))