def _send_modified_event(self, **kw): kwa = dict(kw) kwa['new_subscriptions'] = list(self._added_subs) kwa['removed_subscriptions'] = list(self._removed_subs) self._added_subs = set() self._removed_subs = set() NewsBucket._send_modified_event(self, **kwa)
def save(self): try: NewsBucket.save(self) except ResourceConflict: raise finally: # just best effort here, we assume conflicts indicate better # information arrived... self._context.db.update(self._updated_news_items.values()) self._updated_news_items = {}
def test_composite_subs_by_title(ctx): from melkman.db.bucket import NewsBucket from melkman.db.composite import Composite, view_composite_subscriptions_by_title from random import shuffle cc = Composite.create(ctx) buckets = [] for i in range(10): bucket = NewsBucket.create(ctx) bucket.title = 'bucket %d' % i bucket.save() buckets.append(bucket) shuffled_buckets = list(buckets) shuffle(shuffled_buckets) for bucket in shuffled_buckets: cc.subscribe(bucket) cc.save() # should come out in alphabetical order for i, row in enumerate(view_composite_subscriptions_by_title(ctx.db, startkey=[cc.id, None], endkey=[cc.id, {}])): assert row.value['bucket_id'] == buckets[i].id assert i + 1 == len(buckets)
def get_initial_batch(id): bucket = NewsBucket.get(id, ctx) if bucket is None: abort(404) batch_args = _get_batch_args() entries, next = _bucket_latest_entries_batch(bucket, **batch_args) return Dibject(entries=entries, next=next)
def test_init_subscription(ctx): from eventlet import sleep, spawn from melkman.aggregator.worker import run_aggregator from melkman.db.bucket import NewsBucket from melkman.db.composite import Composite agg = spawn(run_aggregator, ctx) c = Composite.create(ctx) c.save() entries = [] bucket = NewsBucket.create(ctx) for i in range(5): eid = random_id() entries.append(eid) bucket.add_news_item(eid) bucket.save() sleep(.5) c.subscribe(bucket) c.save() sleep(.5) c.reload() for eid in entries: assert c.has_news_item(eid) agg.kill() agg.wait()
def get_rejected(self, db): if self._rejected is not None and self._rejected.id == self.rejected_ref: return self._rejected elif self.rejected_ref: self._rejected = NewsBucket.load(db, self.rejected_ref) return self._rejected else: return None
def test_composites_by_sub(ctx): from melkman.db.bucket import NewsBucket from melkman.db.composite import Composite, view_composites_by_subscription c1 = Composite.create(ctx) c2 = Composite.create(ctx) bucket1 = NewsBucket.create(ctx) bucket1.save() bucket2 = NewsBucket.create(ctx) bucket2.save() bucket3 = NewsBucket.create(ctx) bucket3.save() c1.subscribe(bucket1) c1.save() c2.subscribe(bucket1) c2.subscribe(bucket2) c2.save() count = 0 seen = set() for r in view_composites_by_subscription(ctx.db, include_docs=True, startkey=bucket1.id, endkey=bucket1.id): comp = Composite.wrap(r.doc) seen.add(comp.id) count += 1 assert count == 2 assert c1.id in seen assert c2.id in seen count = 0 seen = set() for r in view_composites_by_subscription(ctx.db, include_docs=True, startkey=bucket2.id, endkey=bucket2.id): comp = Composite.from_doc(r.doc, ctx) seen.add(comp.id) count += 1 assert count == 1 assert c2.id in seen for r in view_composites_by_subscription(ctx.db, include_docs=True, startkey=bucket3.id, endkey=bucket3.id): assert False, 'unexpected subscription'
def get_feed_info(id): bucket = NewsBucket.get(id, ctx) if bucket is None: abort(404) feed = Dibject() feed.id = id feed.title = bucket.title feed.timestamp = bucket.last_modification_date or datetime.utcnow() try: limit = int(request.params.get('limit', DEFAULT_FEED_SIZE)) except: limit = DEFAULT_FEED_SIZE limit = min(limit, MAX_FEED_SIZE) feed.entries = _bucket_latest_entries_batch(bucket, limit=limit) return feed
def get_batch(self, id): bucket = NewsBucket.get(id, ctx) if bucket is None: abort(404) batch_args = _get_batch_args() entries, next = _bucket_latest_entries_batch(bucket, **batch_args) batch = Dibject(next=next) if request.is_xhr: # this html section can be optionally omitted by specifying # the query argument no_html=True if not asbool(request.params.get('no_html', False)): batch.html = render_entries_html(entries) batch.entries = [i.item_id for i in entries] return json_response(batch) else: batch.entries = entries return self._show_batch(id, batch)
def reload(self): NewsBucket.reload(self) self._updated_news_items = {}
def __init__(self, *args, **kw): NewsBucket.__init__(self, *args, **kw) self._rejected = None self._added_subs = set() self._removed_subs = set()
def delete(self): NewsBucket.delete(self) if self._rejected is not None: self._rejected.delete()
def test_modified_updates_composite(ctx): from eventlet import sleep, spawn from melkman.aggregator.worker import run_aggregator from melkman.db.bucket import NewsBucket from melkman.db.composite import Composite agg = spawn(run_aggregator, ctx) b = [] c = [] # make some buckets and composites. for i in range(3): bucket = NewsBucket.create(ctx) bucket.save() b.append(bucket) comp = Composite.create(ctx) comp.save() c.append(comp) # set up some subscriptions c[0].subscribe(b[0]) c[1].subscribe(b[1]) c[2].subscribe(c[0]) c[2].subscribe(c[1]) for i in range(3): c[i].save() id1 = random_id() b[0].add_news_item(id1) log.debug("updating bucket 0 (%s) with item %s..." % (b[0].id, id1)) b[0].save() sleep(1) # refresh them from the db... for i in range(3): c[i].reload() assert c[0].has_news_item(id1) assert not c[1].has_news_item(id1) assert c[2].has_news_item(id1) id2 = random_id() b[1].add_news_item(id2) log.debug("updating bucket 1 (%s) with item %s..." % (b[1].id, id2)) b[1].save() sleep(1) # refresh them from the db... for i in range(3): c[i].reload() assert not c[0].has_news_item(id2) assert c[1].has_news_item(id2) assert c[2].has_news_item(id2) id3 = random_id() b[2].add_news_item(id3) log.debug("updating bucket 2 (%s) with item %s..." % (b[2].id, id3)) b[2].save() sleep(1) # refresh them from the db... for i in range(3): c[i].reload() assert not c[0].has_news_item(id3) assert not c[1].has_news_item(id3) assert not c[2].has_news_item(id3) agg.kill() agg.wait()
def save(self): NewsBucket.save(self) if self._rejected is not None: self._rejected.save(self)
def __init__(self, *args, **kw): NewsBucket.__init__(self, *args, **kw) self._updated_news_items = {}
def _handle_new_subscriptions(message_data, message, context): """ helper handler called when new subscriptions are added to a composite. """ try: new_subscriptions = message_data.get('new_subscriptions', []) if len(new_subscriptions) == 0: log.warn("Ignoring init_subscription with no new subscriptions...") return cid = message_data.get('bucket_id', None) if cid is None: log.error("Ignoring init_subscription with no bucket_id: %s" % message_data) return composite = Composite.get(cid, context) if composite is None or not 'Composite' in composite.document_types: log.error("Ignoring subscription update for non-existent composite %s" % cid) return new_feeds = [] updates = 0 for sub in new_subscriptions: if not sub in composite.subscriptions: log.warn("ignoring subscription %s -> %s, not in composite" % (sub, cid)) continue bucket = NewsBucket.get(sub, context) if bucket is None: log.warn("Ignoring init subscription to unknown object (%s)" % composite.subscriptions[sub]) continue # try 'casting' to a RemoteFeed if 'RemoteFeed' in bucket.document_types: rf = RemoteFeed.from_doc(bucket.unwrap(), context) # mark as needing immediate fetch if # there is no history for this feed. if len(rf.update_history) == 0: new_feeds.append(rf.url) continue try: log.debug("init subscription %s -> %s" % (sub, cid)) updates += composite.init_subscription(sub) sleep(0) # yield control except: log.error("Error initializing subscription %s -> %s: %s" % (sub, cid, traceback.format_exc())) if updates > 0: try: composite.save() except ResourceConflict: # not a big deal in this case. This basically means # our timestamp did not become the latest -- we # have made no alterations other than adding items. # Our additions succeed/fail independently of this as they # are separate documents. pass # request that we start indexing anything new... for url in new_feeds: request_feed_index(url, context) except: log.error("Error handling init_subscrition %s: %s" % (message_data, traceback.format_exc())) raise
def reload(self): NewsBucket.reload(self) self._rejected = None self._added_subs = set() self._removed_subs = set()