def add_url(self, request): ''' Add a URL to the crawl routine ''' logging.info('========== ADD_URL ==========') if request.parent is not None: logging.info('URL has parent.') p = ndb.Key(urlsafe=request.parent) q = QueuedURL(key=ndb.Key(k, QueuedURL, request.url), url=request.url) k = q.put() else: logging.info('URL has NO parent.') q = QueuedURL(key=ndb.Key(QueuedURL, request.url), url=request.url) k = q.put() logging.info('QueuedURL key: '+k.urlsafe()) c = QueuedURL.query().count() logging.info('New URL count: '+str(c)) ## Generate response qu = QueuedURLMessage(key=k.urlsafe()) qu.url = request.url qu.started = q.started if q.key.parent() is not None: qu.parent = q.key.parent().urlsafe() qu.parent_url=q.key.parent().id_or_name() qu.modified = str(q.modified) qu.created = str(q.created) if q.status_code: qu.status_code = q.status_code if q.task_id: qu.task_id = q.task_id return AddURLResponse(added=True, newcount=c, queued_url=qu)
def add_url(key_or_url, urlparent=None, start=False): ''' Creates a QueuedURL entity and optionally enqueues a task to scrape. ''' global get_task if isinstance(key_or_url, basestring): url = key_or_url if urlparent is not None: if not isinstance(urlparent, (ndb.Model,ndb.Key)): urlparent = ndb.Key(urlparent) q = QueuedURL(key=ndb.Key(urlparent, 'QueuedURL', url), url=url, started=False, parent=urlparent) else: q = QueuedURL(key=ndb.Key('QueuedURL', url), url=url, started=False, parent=None) qk = q.put() t = get_task(qk, start) if start: qk.task_id = t.name return t, q elif isinstance(key_or_url, (ndb.Key, ndb.Model)): if isinstance(key_or_url, ndb.Key): key = key_or_url qu = ndb.Key(urlsafe=key).get() else: key = key_or_url.key qu = key_or_url if not qu.started: t = get_task(qu.key, start) qu.task_id = t.name return t, qu else: return qu.task_id, qu