Beispiel #1
0
def collect_linter_previews():
    linters = megalinter.utils.list_all_linters()
    # Read file
    with open(LINKS_PREVIEW_FILE, "r", encoding="utf-8") as json_file:
        data = json.load(json_file)
    updated = False
    # Collect info using web_preview
    for linter in linters:
        if (linter.linter_name not in data or megalinter.config.get(
                "REFRESH_LINTER_PREVIEWS", "false") == "true"):
            logging.info(
                f"Collecting link preview info for {linter.linter_name} at {linter.linter_url}"
            )
            title, description, image = web_preview(linter.linter_url,
                                                    parser="html.parser",
                                                    timeout=1000)
            item = {
                "title": megalinter.utils.decode_utf8(title),
                "description": megalinter.utils.decode_utf8(description),
                "image": image,
            }
            data[linter.linter_name] = item
            updated = True
    # Update file
    if updated is True:
        with open(LINKS_PREVIEW_FILE, "w", encoding="utf-8") as outfile:
            json.dump(data, outfile, indent=4, sort_keys=True)
Beispiel #2
0
    async def codex(self, context):
        offset = len("!senpai codex")

        mod_name = context.message.content[offset + 1:].strip()

        # check if user actually asked a question
        if (len(mod_name) == 0):
            await context.send("`Operator, what codex entry are looking for?`")
            return

        tmp_list = [elem.capitalize() for elem in mod_name.split()]
        mod_name = "_".join(elem for elem in tmp_list)

        mod_url = _WARFRAME_WIKIA_URL.format(mod_name)

        try:
            title, description, image_url = webpreview.web_preview(mod_url)
            embed_msg = discord.Embed(title=title, url=mod_url, color=COLOR)
            embed_msg.add_field(name="Description",
                                value=description,
                                inline=True)
            embed_msg.set_image(url=image_url)
            await context.send(embed=embed_msg)
        except Exception as e:
            print(repr(e))
            await context.send(
                "`Operator, my codex does not seem to have an entry for this`")
Beispiel #3
0
def get_url_info(url):
    '''
    Capture opengraph data from links.
    It tries to get everything from Facebook.
    TO DO: Have a default image when no image is found
    '''
    token = settings.FACEBOOK_TOKEN
    fb_graph = GraphAPI(access_token=token, version='2.10')
    fb_info = fb_graph.get_object(
        id=quote(url),
        fields=['engagement,og_object{image,description,title,updated_time}'])
    if fb_info:
        try:
            return dict(thumbnail=fb_info['og_object']['image'][0]['url'],
                        facebook_shares=fb_info['engagement']['share_count'],
                        title=fb_info['og_object']['title'],
                        description=fb_info['og_object']['description'],
                        source=url.split('/')[2])
        except KeyError:
            from webpreview import web_preview
            metadata = web_preview(url)
            return dict(thumbnail=metadata[2],
                        facebook_shares=fb_info['engagement']['share_count'],
                        title=metadata[0],
                        description=metadata[1],
                        source=url.split('/')[2])
    else:
        return dict(thumbnail='',
                    facebook_shares=0,
                    title='',
                    description='',
                    source=url.split('/')[2])
Beispiel #4
0
    def post(self, request, *args, **kwargs):
        content = request.POST['content']
        try:
            oembed_preview = providers.request(content)
        except ProviderNotFoundException:
            oembed_preview = {}

        # Always generate web_preview as fallback for oembed_preview
        try:
            title, description, image = web_preview(content,
                                                    parser='html.parser')
        except requests.exceptions.InvalidURL:
            return JsonResponse({'preview': None, 'title': None})
        except webpreview.excepts.EmptyURL:
            return JsonResponse({'preview': None, 'title': None})

        if 'html' in oembed_preview:
            preview_html = oembed_preview['html']
        elif image:
            preview_html = f'<img src="{image}" alt="Website Preview" />'
        else:
            preview_html = '<div class="unavailable">Preview not available for this URL</div>'

        if 'title' in oembed_preview:
            title = oembed_preview['title']

        return JsonResponse({'preview': preview_html, 'title': title})
    async def yugioh(self, context):
        offset = len("!senpai yugioh")

        card_name = context.message.content[offset + 1:].strip()

        # check if user actually asked a question
        if (len(card_name) == 0):
            await context.send("`Usage: !senpai yugioh [card name]`")
            return

        tmp_list = card_name.split()
        card_name = "_".join(elem for elem in tmp_list)

        formatted_url = _YUGIOH_WIKIA_URL.format(card_name)
        print("url: ", formatted_url)

        try:
            title, description, image_url = webpreview.web_preview(
                formatted_url)
            embed_msg = discord.Embed(title=title,
                                      url=formatted_url,
                                      color=COLOR)
            embed_msg.add_field(name="Description",
                                value=description,
                                inline=True)
            embed_msg.set_image(url=image_url)
            await context.send(embed=embed_msg)
        except Exception as e:
            print(repr(e))
            await context.send(
                "`KaibaCorp does not have any information on this card`")
Beispiel #6
0
 def _get_site_title_and_description(url, content):
   try:
     logger.info('Getting metadata for {}'.format(url))
     title, description, _ = web_preview(url, content=content)
     description = description[:450] + '...' if description and len(description) > 450 else description
     return title, description
   except:
     logger.exception('Could not get metadata for {}'.format(url))
     return '', ''
Beispiel #7
0
def share(request):
    tags = [t[0] for t in Content.TAGS]
    if request.method == 'GET':
        return render(request, 'web/share.jinja2', {"tags": tags})
    if request.method == 'POST':
        was_limited = getattr(request, 'limited', False)
        print(was_limited)
        if was_limited:
            return render(
                request, 'web/share.jinja2', {
                    "error":
                    "Please don't spam, you can share 2 URL every 5 minute. Thanks!",
                    "tags": tags
                })

        url = request.POST.get('url')
        tag = request.POST.get('tag')
        print("mama", tag)

        if not is_valid_url(url):
            return render(request, 'web/share.jinja2', {
                "error": "Not a valid URL!",
                "tags": tags
            })
        if tag not in tags:
            return render(request, 'web/share.jinja2', {
                "error": "Enter valid tag!",
                "tags": tags
            })

        if not is_domain_in_list(url):
            return render(request, 'web/share.jinja2', {
                "error": "This is not a supported URL!",
                "tags": tags
            })

        title, desc, img_url = web_preview(url)
        if title and img_url:
            cntnt = Content(link=url,
                            title=title,
                            description=desc,
                            image=img_url,
                            tag=tag)
            cntnt.save()
            return render(request, 'web/share.jinja2', {
                "success": "URL added successfully",
                "tags": tags
            })

        return render(request, 'web/share.jinja2', {
            "error": "URL cannot be parsed, please try another URL.",
            "tags": tags
        })
Beispiel #8
0
    async def Ladd(self, ctx, link, *tags):
        """
        params :
        link -> lien
        tag1/2/3 -> Les trois tags
        """
        msg = ""
        authID = ctx.author.id
        chanName = ctx.channel.name

        lienAjoute = bool()

        for tag in tags:
            tag = tag.lower()

        if val.url(link):
            # Cas où ça marche
            title, description = "", ""
            if ".pdf" not in link:
                try:
                    ret = web_preview(link, timeout=1)
                    title, description = ret[0], ret[1]
                    if description:
                        description = description.replace("\"", "'")
                except:
                    pass
            else:
                title, description = link.split("/")[len(link.split("/"))-1], "Fichier PDF a télécharger."

            lienAjoute = mdb.addLien(link, chanName, "??", authID, title, description)
            if tags:
                msg = "Lien ajouté avec les tags :"
                for tag in tags:
                    tag_tmp = mdb.searchSynonymeByPrimKey(tag)
                    if tag_tmp:
                        tag = tag_tmp[0][2]

                    mdb.addTag(tag, "", authID)
                    mdb.addTagmap(link, tag)
                    msg += " " + tag
            else:
                msg = "Lien ajouté sans tag"
            if not lienAjoute:
                msg = "Le lien existe déjà dans la base de donnée ou une erreur a eu lieu."
        else:
            "Le lien n'est pas conforme"

        await ctx.channel.send(msg)
Beispiel #9
0
def find_artist(update: Update, context: CallbackContext) -> None:
    data = update.message.text
    logger.info(f"data from user {data}")

    try:
        web_prev = web_preview(data)
        full = web_prev[1]
        artist = full[full.find("a song by") +
                      len("a song by "):full.find(" on Spotify")]
        wiki_info = wikipedia.summary(artist)
        update.message.reply_text(wiki_info)

    except:
        update.message.reply_text(
            "Your link is incorect! Please provide a track link (you can get it from share button in spotify)"
        )
Beispiel #10
0
    def parse_using_web(self):

        try:
            url_info = {}
            url_info["parser"] = "html_parser"
            title, description, image = web_preview(self.url,
                                                    parser="html.parser")

            url_info['title'] = title
            url_info['description'] = description
            url_info['image'] = image

            return url_info
        except Exception as e:
            url_info['error'] = e
            return url_info
Beispiel #11
0
def save_images_from_rss():

    # Needed to prevent bozo_exception
    if hasattr(ssl, '_create_unverified_context'):
        ssl._create_default_https_context = ssl._create_unverified_context

    # Delete all old image files
    file_list = os.listdir(app.root_path + '/img/')
    for file in file_list:
        os.remove(app.root_path + '/img/' + file)

    # Image naming doesn't really matter, so we'll name it a number
    image_number = 0

    # Loop through each rss feed
    for pub, feed_url in rss_feeds.items():
        feed = feedparser.parse(feed_url)
        for post in range(POSTS_FROM_EACH_FEED):
            try:
                # Get share image and save to server
                title, description, image = web_preview(
                    feed.entries[post]['link'])
                path = urlparse(image).path
                ext = os.path.splitext(path)[1]
                # If no extension save as jpg
                VALID_IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".png", ".gif"]
                if ext in VALID_IMAGE_EXTENSIONS:
                    urllib.request.urlretrieve(
                        image,
                        app.root_path + '/img/' + str(image_number) + ext)
                elif not ext:
                    urllib.request.urlretrieve(
                        image,
                        app.root_path + '/img/' + str(image_number) + '.jpg')
                image_number += 1
            except:
                print('Could not get image')

    # Delete files that are not of a certain size
    file_list = os.listdir(app.root_path + '/img/')
    for file in file_list:
        if os.path.getsize(app.root_path + '/img/' +
                           file) < MIN_IMAGE_SIZE * 1024:
            os.remove(app.root_path + '/img/' + file)

    return
Beispiel #12
0
def get_url_metadata(preview_link):
    title, description, image_url = web_preview(preview_link,
                                                parser='html.parser')
    favicon_url = get_favicon_url_from_url(preview_link)
    domain_url = get_url_domain(preview_link)
    if image_url is not None:
        image_url = make_proxy_image_url(image_url)
    if favicon_url is not None:
        favicon_url = make_proxy_image_url(favicon_url)

    return {
        'title': title,
        'description': description,
        'image_url': image_url,
        'favicon_url': favicon_url,
        'domain_url': domain_url
    }
def geturl(stringsubmitted):
    headers = {'User-Agent': 'Mozilla/5.0'}
    splits = stringsubmitted.split()
    value = ''
    urltitle = ''
    urldescription = ''
    urlimage = ''

    for f in splits:
        try:
            if len(f) > 5:
                if f.endswith(('.jpg', '.png', '.gif', '.png', '.jpeg', '.JPG',
                               '.webp')):
                    pass
                else:
                    if not f.lower().startswith(("http://", "https://")):
                        f = 'https://' + f

                    value = validators.url(f)
                    mainurl = (checkers.is_url(value))
                    if mainurl is True:
                        urltitle, urldescription, urlimage = web_preview(
                            value, headers=headers)
                        break
                    else:
                        value = ''
                        urltitle = ''
                        urldescription = ''
                        urlimage = ''
        except:
            pass

    if value is None:
        value = ''
    if urltitle is None:
        urltitle = ''
    if urldescription is None:
        urldescription = ''
    if urlimage is None:
        urlimage = ''

    return value, urltitle, urldescription, urlimage
Beispiel #14
0
def get_articles(request):
    limit = int(request.GET['limit'])
    articles = Press.objects.order_by('-id')[:limit]
    body = []

    for a in articles:
        art_dict = {}
        title = a.title
        description = a.description
        image = a.image
        site = a.site
        if not (title and description and image and site):
            title, description, image = web_preview(a.url)
            site = "/".join(a.url.split("/")[:3])
            try:
                if "http" not in image:
                    image = "{}/{}".format(site, image)
            except:
                pass
            a.title = title
            a.description = description
            a.site = site
            a.image = image
            a.save()

        art_dict['image'] = image
        print(a.url)
        art_dict['title'] = title
        art_dict['description'] = description
        art_dict['site'] = site.split("/")[-1]
        art_dict['url'] = a.url
        body.append(art_dict)

    data = {
        'headers': {
            'Content-Type': 'application/json',
            'Access-Control-Allow-Origin': '*',
        },
        'body': body
    }
    return HttpResponse(simplejson.dumps(data))
Beispiel #15
0
    def _get_article_image(self, article_q, final_q):
        """ Gets article link and other info, then gets URL for that article image
        """
        try:
            work = True
            while work:
                data = article_q.get(block = True)
                if(data is None):
                    work = False
                    final_q.put(None)
                    return
                else:
                    source = data["source"]
                    article_title = data["title"]
                    article_summary = data["summary"]
                    article_link = data["link"]
                    t = data["time"]

                    tc = TwitterCard(article_link, ["twitter:image"])
                    if(tc.image is None):
                        _, _, image = web_preview(article_link, parser = "html.parser")
                        if(image is None):
                            if(source == "standard_agrix"):
                                img = "https://www.farmers.co.ke/assets/images/logo.png"
                            elif(source == "standard_biz"):
                                img = "https://www.standardmedia.co.ke/common/i/standard-digital-world-inner-page.png"
                            elif(source == "business_daily"):
                                img = "https://www.businessdailyafrica.com/image/view/-/3818190/medRes/1349497/-/3ijc6bz/-/logoNew.png"
                        else:
                            img = image

                    else:
                        img = tc.image

                    fin_dict = {"title": article_title, "summary": article_summary, "link": article_link, "source": source, "image": img, "time": t}

                    final_q.put(fin_dict)
        except Exception as e:
            raise RuntimeError(e) 
 def handle(self, *args, **options):
     with open(
             os.path.dirname(os.path.realpath(__file__)) + os.sep +
             "rss_url_list.txt", 'r') as r_f:
         for data in r_f:
             if not data or data == '\n':
                 continue
             url_data = re.split(r'\t+', data.rstrip())
             raw_data = reader.read(url_data[1])
             for line in raw_data:
                 link = unquote(line[2])
                 m = re.search(
                     'https://www.google.com/url?.*url=('
                     'https://.*)&ct=ga&cd=.*', link)
                 if m:
                     link = m.group(1)
                 try:
                     title, description, image = web_preview(link)
                     if image is None:
                         image = "https://www.freeiconspng.com/uploads/no-image-icon-6.png"
                     try:
                         tag = Tag.objects.get(tag_text=url_data[0])
                     except Tag.DoesNotExist:
                         tag = Tag(tag_text=url_data[0])
                         tag.save()
                     article = Article(pub_date=timezone.now(),
                                       title_text=line[0],
                                       summary_text=line[1],
                                       link_text=link,
                                       image_url=image,
                                       state=0)
                     article.save()
                     article.tags.add(tag)
                     self.stdout.write(
                         self.style.SUCCESS(
                             'Successfully saved article. title: "%s"' %
                             article.title_text))
                 except Exception as e:
                     print(e)
Beispiel #17
0
# coding: utf-8

# In[6]:

from webpreview import web_preview
import bs4 as BeautifulSoup


# In[8]:
url = "http://www.streetpress.com/sujet/1488190869-tribunal-de-l-armee"
title, description, image = web_preview(url)


# In[7]:
template = """<html><head><meta name="twitter:card" content="" />
<meta name="twitter:site" content="" />
<meta name="twitter:title" content="%s" />
<meta name="twitter:description" content="%s" />
<meta name="twitter:image" content="%s" />
<meta property="og:type" content="website">
<meta property="og:title" content="%s">
<meta property="og:description" content="%s">
<meta property="og:url" content="%s">
<meta property="og:image" content="%s">
</head><body></body></html>"""%(title,description,image,title,description,url,image)
f = open('index.html', 'w')
f.write(template)
f.close()

Beispiel #18
0
 def web_preview_link(link):
     return web_preview(link)
Beispiel #19
0
from webpreview import web_preview
from pprint import pprint

url = 'auctorial.com'

meta = web_preview(url)

pprint(meta)
Beispiel #20
0
    def resolve_new_activities(self, info, **kwargs):
        import json
        email = kwargs.get('email')

        if email and BoredUser.objects.filter(email=email).exists():
            import requests
            import random
            from webpreview import web_preview
            prob = random.random()
            user = BoredUser.objects.get(email=email)
            weights = user.category_weights.split(',')
            weights = [int(x) for x in weights]
            fav_category = weights.index(max(weights))
            # print(fav_category)
            CATEGORIES = [
                "education", "recreational", "social", "diy", "charity",
                "cooking", "relaxation", "music", "busywork"
            ]
            from google import google

            if prob > 0.6:
                resp = requests.get("https://www.boredapi.com/api/activity/")
            else:
                resp = requests.get(
                    "https://www.boredapi.com/api/activity?type=" +
                    CATEGORIES[fav_category])
            resp = json.loads(resp.text)

            from googleapiclient.discovery import build
            API_KEY = "AIzaSyDIFF1lEqsuX-9vd-W8cUu1unZH5oeQe4s"
            CSE_ID = "010456897677353178205:0mffw7ezvwy"

            def google_search(search_term, api_key, cse_id, **kwargs):
                service = build("customsearch", "v1", developerKey=api_key)
                res = service.cse().list(q=search_term, cx=cse_id,
                                         **kwargs).execute()
                return res

            # result = google_search(resp['activity'], API_KEY, CSE_ID)
            num_page = 1
            result = google.search(resp['activity'], num_page)
            activities = []

            for search_results in result[:6]:
                activity = Activity()
                activity.name = resp['activity']
                activity.category = resp['type']
                activity.participants = int(resp['participants'])
                activity.key = resp['key']

                activity.title = search_results.name
                activity.link = search_results.link
                activity.description = search_results.description
                img_url = web_preview(activity.link)[2]
                # img_url = ""
                activity.thumb = img_url if img_url else ''
                activity.save()
                activities.append(activity)

            user.last_activity = activity
            user.save()
            return activities
Beispiel #21
0
    if image is None:
        image = "https://cdn.crabber.net/img/avatar.jpg"

    return title, description, image


with Lock("fetch-cards") as lock:
    if lock:
        app.app_context().push()

        for card in Card.query_unready():
            try:
                metadata = web_preview(
                    # Redirect Twitter to Nitter (they've started requiring
                    # javascript... so dumb.)
                    card.url.replace(
                        "https://twitter.com", "https://nitter.actionsack.com"
                    ),
                    timeout=2,
                )
                if metadata:
                    card.title, card.description, card.image = metadata
                    if card.title is not None:
                        if all(
                            [
                                not card.title.startswith(error_code + " ")
                                for error_code in ("404", "403", "500")
                            ]
                        ):
                            card.ready = True
                            print(f"Fetched {card.url}")
            except (URLUnreachable, URLNotFound, RequestException, KeyError):