Python Post.link Examples

Programming Language: Python

Namespace/Package Name: feeds.models

Class/Type: Post

Method/Function: link

Examples at hotexamples.com: 3

Python Post.link - 3 examples found. These are the top rated real world Python examples of feeds.models.Post.link extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Post(5)

body(5)

save(5)

title(5)

author(3)

created(3)

found(3)

guid(3)

image_url(3)

link(3)

source(3)

hashurl(2)

profile(2)

Example #1

Show file

File: utils.py Project: Ash-Crow/django-feed-reader

def parse_feed_xml(source_feed, feed_content, output):

    ok = True
    changed = False

    # output.write(ret.content)
    try:

        _customize_sanitizer(parser)
        f = parser.parse(
            feed_content)  # need to start checking feed parser errors here
        entries = f["entries"]
        if len(entries):
            source_feed.last_success = (
                timezone.now()
            )  # in case we start auto unsubscribing long dead feeds
        else:
            source_feed.last_result = "Feed is empty"
            ok = False

    except Exception as ex:
        source_feed.last_result = "Feed Parse Error"
        entries = []
        ok = False

    if ok:
        try:
            source_feed.name = update_source_name(source_feed.name,
                                                  f.feed.title)
        except Exception:
            pass

        try:
            source_feed.site_url = f.feed.link
        except Exception:
            pass

        try:
            source_feed.image_url = f.feed.image.href
        except Exception:
            pass

        # either of these is fine, prefer description over summary
        # also feedparser will give us itunes:summary etc if there
        try:
            source_feed.description = f.feed.summary
        except Exception:
            pass

        try:
            source_feed.description = f.feed.description
        except Exception:
            pass

        # output.write(entries)
        entries.reverse(
        )  # Entries are typically in reverse chronological order - put them in right order
        for e in entries:
            # we are going to take the longest
            body = ""

            if hasattr(e, "content"):
                for c in e.content:
                    if len(c.value) > len(body):
                        body = c.value

            if hasattr(e, "summary"):
                if len(e.summary) > len(body):
                    body = e.summary

            if hasattr(e, "summary_detail"):
                if len(e.summary_detail.value) > len(body):
                    body = e.summary_detail.value

            if hasattr(e, "description"):
                if len(e.description) > len(body):
                    body = e.description

            body = fix_relative(body, source_feed.site_url)

            try:
                guid = e.guid
            except Exception as ex:
                try:
                    guid = e.link
                except Exception as ex:
                    m = hashlib.md5()
                    m.update(body.encode("utf-8"))
                    guid = m.hexdigest()

            try:
                p = Post.objects.filter(source=source_feed).filter(
                    guid=guid)[0]
                output.write("EXISTING " + guid + "\n")

            except Exception as ex:
                output.write("NEW " + guid + "\n")
                p = Post(index=0, body=" ")
                p.found = timezone.now()
                changed = True
                p.source = source_feed

            try:
                title = e.title
            except Exception:
                title = ""

            try:
                p.link = e.link
            except Exception:
                p.link = ""
            p.title = title

            try:
                p.image_url = e.image.href
            except Exception:
                pass

            try:
                # If there is no published_parsed entry, try updated_parsed
                if "published_parsed" in e:
                    time_struct = e.published_parsed
                else:
                    time_struct = e.updated_parsed

                p.created = datetime.datetime.fromtimestamp(
                    time.mktime(time_struct)).replace(tzinfo=timezone.utc)

            except Exception:
                output.write("CREATED ERROR")

            p.guid = guid
            try:
                p.author = e.author
            except Exception as ex:
                p.author = ""

            try:
                p.save()
                # output.write(p.body)
            except Exception as ex:
                # import pdb; pdb.set_trace()
                output.write(str(ex))

            try:
                seen_files = []
                for ee in list(p.enclosures.all()):
                    # check existing enclosure is still there
                    found_enclosure = False
                    for pe in e["enclosures"]:

                        if pe["href"] == ee.href and ee.href not in seen_files:
                            found_enclosure = True

                            try:
                                ee.length = int(pe["length"])
                            except Exception:
                                ee.length = 0

                            try:
                                file_type = pe["type"]
                            except Exception:
                                file_type = "audio/mpeg"  # we are assuming podcasts here but that's probably not safe

                            ee.type = file_type
                            ee.save()
                            break
                    if not found_enclosure:
                        ee.delete()
                    seen_files.append(ee.href)

                for pe in e["enclosures"]:
                    try:
                        if pe["href"] not in seen_files:

                            try:
                                length = int(pe["length"])
                            except Exception:
                                length = 0

                            try:
                                file_type = pe["type"]
                            except Exception:
                                file_type = "audio/mpeg"

                            ee = Enclosure(post=p,
                                           href=pe["href"],
                                           length=length,
                                           type=file_type)
                            ee.save()
                    except Exception:
                        pass
            except Exception as ex:
                if output:
                    output.write("No enclosures - " + str(ex))

            try:
                p.body = body
                p.save()
                # output.write(p.body)
            except Exception as ex:
                output.write(str(ex))
                output.write(p.body)

            try:
                if "tags" in e:
                    for t in e.tags:
                        tag, created = Tag.objects.get_or_create(**t)
                        p.tags.add(tag)
                        print(f"Tag {tag} added to post {p}")
            except Exception as ex:
                output.write(str(ex))
                output.write(f"couldn't add tag {tag} to post {p}")

    return (ok, changed)

Example #2

Show file

File: utils.py Project: Ash-Crow/django-feed-reader

def parse_feed_json(source_feed, feed_content, output):

    ok = True
    changed = False

    try:
        f = json.loads(feed_content)
        entries = f["items"]
        if len(entries):
            source_feed.last_success = (
                timezone.now()
            )  # in case we start auto unsubscribing long dead feeds
        else:
            source_feed.last_result = "Feed is empty"
            source_feed.interval += 120
            ok = False

    except Exception as ex:
        source_feed.last_result = "Feed Parse Error"
        entries = []
        source_feed.interval += 120
        ok = False

    if ok:

        if "expired" in f and f["expired"]:
            # This feed says it is done
            # TODO: permanently disable
            # for now source_feed.interval to max
            source_feed.interval = 24 * 3 * 60
            source_feed.last_result = "This feed has expired"
            return (False, False, source_feed.interval)

        try:
            source_feed.site_url = f["home_page_url"]
            source_feed.name = update_source_name(source_feed.name, f["title"])
        except Exception as ex:
            pass

        if "description" in f:
            _customize_sanitizer(parser)
            source_feed.description = parser._sanitizeHTML(
                f["description"], "utf-8", "text/html")

        _customize_sanitizer(parser)
        source_feed.name = update_source_name(
            source_feed.name,
            parser._sanitizeHTML(source_feed.name, "utf-8", "text/html"),
        )

        if "icon" in f:
            source_feed.image_url = f["icon"]

        # output.write(entries)
        entries.reverse(
        )  # Entries are typically in reverse chronological order - put them in right order
        for e in entries:
            body = " "
            if "content_text" in e:
                body = e["content_text"]
            if "content_html" in e:
                body = e["content_html"]  # prefer html over text

            body = fix_relative(body, source_feed.site_url)

            try:
                guid = e["id"]
            except Exception as ex:
                try:
                    guid = e["url"]
                except Exception as ex:
                    m = hashlib.md5()
                    m.update(body.encode("utf-8"))
                    guid = m.hexdigest()

            try:
                p = Post.objects.filter(source=source_feed).filter(
                    guid=guid)[0]
                output.write("EXISTING " + guid + "\n")

            except Exception as ex:
                output.write("NEW " + guid + "\n")
                p = Post(index=0, body=" ")
                p.found = timezone.now()
                changed = True
                p.source = source_feed

            try:
                title = e["title"]
            except Exception as ex:
                title = ""

            # borrow the RSS parser's sanitizer
            _customize_sanitizer(parser)
            body = parser._sanitizeHTML(
                body, "utf-8", "text/html")  # TODO: validate charset ??
            _customize_sanitizer(parser)
            title = parser._sanitizeHTML(
                title, "utf-8", "text/html")  # TODO: validate charset ??
            # no other fields are ever marked as |safe in the templates

            if "banner_image" in e:
                p.image_url = e["banner_image"]

            if "image" in e:
                p.image_url = e["image"]

            try:
                p.link = e["url"]
            except Exception as ex:
                p.link = ""

            p.title = title

            try:
                p.created = pyrfc3339.parse(e["date_published"])
            except Exception as ex:
                output.write("CREATED ERROR")
                p.created = timezone.now()

            p.guid = guid
            try:
                p.author = e["author"]
            except Exception as ex:
                p.author = ""

            p.save()

            try:
                seen_files = []
                for ee in list(p.enclosures.all()):
                    # check existing enclosure is still there
                    found_enclosure = False
                    if "attachments" in e:
                        for pe in e["attachments"]:

                            if pe["url"] == ee.href and ee.href not in seen_files:
                                found_enclosure = True

                                try:
                                    ee.length = int(pe["size_in_bytes"])
                                except:
                                    ee.length = 0

                                try:
                                    file_type = pe["mime_type"]
                                except:
                                    file_type = "audio/mpeg"  # we are assuming podcasts here but that's probably not safe

                                ee.type = file_type
                                ee.save()
                                break
                    if not found_enclosure:
                        ee.delete()
                    seen_files.append(ee.href)

                if "attachments" in e:
                    for pe in e["attachments"]:

                        try:
                            if pe["url"] not in seen_files:

                                try:
                                    length = int(pe["size_in_bytes"])
                                except:
                                    length = 0

                                try:
                                    filetype = pe["mime_type"]
                                except:
                                    filetype = "audio/mpeg"

                                ee = Enclosure(post=p,
                                               href=pe["url"],
                                               length=length,
                                               type=filetype)
                                ee.save()
                        except Exception as ex:
                            pass
            except Exception as ex:
                if output:
                    output.write("No enclosures - " + str(ex))

            try:
                p.body = body
                p.save()
                # output.write(p.body)
            except Exception as ex:
                output.write(str(ex))
                output.write(p.body)

            try:
                if "tags" in e:
                    for t in e["tags"]:
                        tag, created = Tag.objects.get_or_create(**t)
                        p.tags.add(tag)
                        print(f"Tag {tag} added to post {p}")
            except Exception as ex:
                output.write(str(ex))
                output.write(f"couldn't add tag {tag} to post {p}")

    return (ok, changed)

Example #3

Show file

File: utils.py Project: chrisspen/django-feed-reader

def parse_feed_json(source_feed, feed_content, output):

    ok = True
    changed = False

    try:
        f = json.loads(feed_content)
        entries = f['items']
        if entries:
            source_feed.last_success = timezone.now(
            )  #in case we start auto unsubscribing long dead feeds
        else:
            source_feed.last_result = "Feed is empty"
            source_feed.interval += 120
            ok = False

    except Exception as ex:
        source_feed.last_result = "Feed Parse Error"
        entries = []
        source_feed.interval += 120
        ok = False

    if ok:

        if "expired" in f and f["expired"]:
            # This feed says it is done
            # TODO: permanently disable
            # for now source_feed.interval to max
            source_feed.interval = (24 * 3 * 60)
            source_feed.last_result = "This feed has expired"
            return (False, False, source_feed.interval)

        try:
            source_feed.site_url = f["home_page_url"]
            if not source_feed.name:
                source_feed.name = f["title"]
        except Exception as ex:
            pass

        if "description" in f:
            _customize_sanitizer(feedparser)
            source_feed.description = feedparser._sanitizeHTML(
                f["description"], "utf-8", 'text/html')

        _customize_sanitizer(feedparser)
        if not source_feed.name:
            source_feed.name = feedparser._sanitizeHTML(
                source_feed.name, "utf-8", 'text/html')

        if "icon" in f:
            source_feed.image_url = f["icon"]

        entries.reverse(
        )  # Entries are typically in reverse chronological order - put them in right order
        for e in entries:
            body = " "
            if "content_text" in e:
                body = e["content_text"]
            if "content_html" in e:
                body = e["content_html"]  # prefer html over text

            body = fix_relative(body, source_feed.site_url)

            try:
                guid = e["id"]
            except Exception as ex:
                try:
                    guid = e["url"]
                except Exception as ex:
                    m = hashlib.md5()
                    m.update(body.encode("utf-8"))
                    guid = m.hexdigest()

            try:
                p = Post.objects.filter(source=source_feed).filter(
                    guid=guid)[0]
                logging.info("EXISTING: %s", guid)
            except Exception as ex:
                logging.info("Creating new post %s.", guid)
                p = Post(index=0, body=' ')
                p.found = timezone.now()
                changed = True
                p.source = source_feed

            try:
                title = e["title"]
            except Exception as ex:
                title = ""

            # borrow the RSS parser's sanitizer
            _customize_sanitizer(feedparser)
            body = feedparser._sanitizeHTML(
                body, "utf-8", 'text/html')  # TODO: validate charset ??
            _customize_sanitizer(feedparser)
            title = feedparser._sanitizeHTML(
                title, "utf-8", 'text/html')  # TODO: validate charset ??
            # no other fields are ever marked as |safe in the templates

            if "banner_image" in e:
                p.image_url = e["banner_image"]

            if "image" in e:
                p.image_url = e["image"]

            try:
                p.link = e["url"]
            except Exception as ex:
                p.link = ''

            p.title = title

            try:
                p.created = pyrfc3339.parse(e["date_published"])
            except Exception as ex:
                logging.exception('Unable to parse published date.')
                p.created = timezone.now()

            p.guid = guid
            try:
                p.author = e["author"]
            except Exception as ex:
                p.author = ""

            p.save()

            try:
                seen_files = []
                for ee in list(p.enclosures.all()):
                    # check existing enclosure is still there
                    found_enclosure = False
                    if "attachments" in e:
                        for pe in e["attachments"]:
                            if pe["url"] == ee.href and ee.href not in seen_files:
                                found_enclosure = True
                                ee.length = int(
                                    pe.get("size_in_bytes", None) or 0)
                                typ = pe.get("mime_type", None) or "audio/mpeg"
                                ee.type = typ
                                ee.save()
                                break

                    # DANGEROUS! This deletes everything if a glitch in the feed removes enclosures.
                    # if not found_enclosure:
                    # ee.delete()

                    seen_files.append(ee.href)

                if "attachments" in e:
                    for pe in e["attachments"]:
                        try:
                            # Since many RSS feeds embed trackers into their URL that constantly change, yet almost always only include a single enclosure,
                            # we'll only create a new enclosure when we see a new url if there are no enclosure records created yet.
                            # This is a most robust way of preventing logical duplicates due to tracker URL changes then by trying to predict and strip out
                            # all known tracker prefixes.
                            if pe["url"] not in seen_files and not p.enclosures.all(
                            ).exists():
                                length = int(
                                    pe.get("size_in_bytes", None) or 0)
                                typ = pe.get("mime_type", None) or "audio/mpeg"
                                ee = Enclosure(post=p,
                                               href=pe["url"],
                                               length=length,
                                               type=typ)
                                ee.save()
                        except Exception as ex:
                            pass
            except Exception as ex:
                logging.exception("No enclosures")

            try:
                p.body = body
                p.save()
            except Exception as ex:
                logging.exception('Unable to save body A2.')

    return (ok, changed)