def __init__(self, *args, **kw): if not args and not 'id' in kw: args = [melk_id(nonce_str())] DocumentHelper.__init__(self, *args, **kw) self._entries = None # lazy load self._removed = {} self._updated = {}
def id_for_url(cls, url): nurl = canonical_url(url).lower() return melk_id(nurl)
def test_melk_id_unicode(): ustr = u'http://feeds.wired.com/wired/\xff\xff\xff\xffhttp://blog.wired.com/defense/2009/01/inside-israels.html' mid = melk_id(ustr)
def random_id(): return melk_id(nonce_str())
def create(cls, context, *args, **kw): if not args and not 'id' in kw: args = [melk_id(nonce_str())] return super(NewsBucket, cls).create(context, *args, **kw)
def parse_feed(content, feed_url): fake_headers = { 'content-location': feed_url, 'content-type': 'text/xml; charset=utf-8', } ff = feedparser.parse(content, header_defaults=fake_headers) # make a clean copy composed of built-in types ff = dibjectify(ff) if ff is None or not 'feed' in ff: raise InvalidFeedError() # # perform some cleanup... # source_url = canonical_url(feed_url) # make sure the feed has an id... if not 'id' in ff.feed: ff.feed['id'] = source_url.lower() # make sure the feed has a self referential link has_self_ref = False ff.feed.setdefault('links', []) for link in ff.feed.links: if link.rel == 'self': has_self_ref = True break if not has_self_ref: ff.feed.links.append(Dibject(rel='self', href=source_url, title='')) # create a structure holding the appropriate source information # from the feed. This will be copied into each entry. source_info = Dibject() for k in ['id', 'title', 'title_detail', 'link', 'links', 'icon']: try: source_info[k] = deepcopy(ff.feed[k]) except KeyError: pass out_entries = [] for e in ff.get('entries', []): # make sure it has an id eid = e.get('id', None) if eid is None: eid = find_best_entry_id(e) if eid is None: # throw this entry out, it has no # id, title, summary or content # that is recognizable... continue e['id'] = eid # assign a guid based on the id given and the source url e['melk_id'] = melk_id(eid, source_url.lower()) # build a 'source' entry for each entry which points # back to this feed. if there is already a source # specified in the entry, we move it aside to # original_source. if 'source' in e: e['original_source'] = e.source e.source = deepcopy(source_info) out_entries.append(e) ff['entries'] = out_entries return ff