Example #1
0
def normalize_object_uri(uri): 
    return canonical_url(uri)
Example #2
0
 def __call__(self, news_item):
     return self._match(canonical_url(news_item.get('source_url', '')))
Example #3
0
 def id_for_url(cls, url):
     nurl = canonical_url(url).lower()
     return melk_id(nurl)
Example #4
0
File: parse.py Project: jab/melkman
def parse_feed(content, feed_url):
    fake_headers = {
        'content-location': feed_url,
        'content-type': 'text/xml; charset=utf-8',
    }
    ff = feedparser.parse(content, header_defaults=fake_headers)

    # make a clean copy composed of built-in types
    ff = dibjectify(ff)

    if ff is None or not 'feed' in ff:
        raise InvalidFeedError()

    #
    # perform some cleanup...
    #
    source_url = canonical_url(feed_url)

    # make sure the feed has an id...
    if not 'id' in ff.feed:
        ff.feed['id'] = source_url.lower()
    
    # make sure the feed has a self referential link
    has_self_ref = False
    ff.feed.setdefault('links', [])
    for link in ff.feed.links:
        if link.rel == 'self':
            has_self_ref = True
            break
    if not has_self_ref:
        ff.feed.links.append(Dibject(rel='self', href=source_url, title=''))

    # create a structure holding the appropriate source information 
    # from the feed.  This will be copied into each entry.
    source_info = Dibject()
    for k in ['id', 'title', 'title_detail', 'link', 'links', 'icon']:
        try:
            source_info[k] = deepcopy(ff.feed[k])
        except KeyError:
            pass

    out_entries = []
    for e in ff.get('entries', []):
        # make sure it has an id
        eid = e.get('id', None)
        if eid is None:
            eid = find_best_entry_id(e)
            if eid is None:
                # throw this entry out, it has no 
                # id, title, summary or content
                # that is recognizable...
                continue
            e['id'] = eid

        # assign a guid based on the id given and the source url
        e['melk_id'] = melk_id(eid, source_url.lower())

        # build a 'source' entry for each entry which points
        # back to this feed. if there is already a source
        # specified in the entry, we move it aside to 
        # original_source.
        if 'source' in e:
            e['original_source'] = e.source
        
        e.source = deepcopy(source_info)
        out_entries.append(e)

    ff['entries'] = out_entries

    return ff