def emit_url(self, document, url): if isinstance(document, CrawlerMetadata): doc = self.create_document(foreign_id=document.foreign_id, content_hash=document.content_hash) doc.meta.update(document.meta) document = doc if document.source_url is None: document.source_url = url db.session.commit() ingest_url.delay(document.id, url) self.increment_count()
def emit_url(self, meta, url): ingest_url.delay(self.collection.id, meta.clone().data, url)
def emit_url(self, meta, url): ingest_url.delay(self.source.id, meta.clone().data, url)
def emit_url(self, source, meta, url): db.session.commit() ingest_url.delay(source.id, meta.clone().data, url)
def emit_url(self, meta, url): ingest_url.delay(self.collection.id, meta.to_attr_dict(), url) self.increment_count()