Exemple #1
0
 def emit_url(self, document, url):
     if isinstance(document, CrawlerMetadata):
         doc = self.create_document(foreign_id=document.foreign_id,
                                    content_hash=document.content_hash)
         doc.meta.update(document.meta)
         document = doc
     if document.source_url is None:
         document.source_url = url
     db.session.commit()
     ingest_url.delay(document.id, url)
     self.increment_count()
Exemple #2
0
 def emit_url(self, meta, url):
     ingest_url.delay(self.collection.id, meta.clone().data, url)
Exemple #3
0
 def emit_url(self, meta, url):
     ingest_url.delay(self.source.id, meta.clone().data, url)
Exemple #4
0
 def emit_url(self, source, meta, url):
     db.session.commit()
     ingest_url.delay(source.id, meta.clone().data, url)
Exemple #5
0
 def emit_url(self, meta, url):
     ingest_url.delay(self.collection.id, meta.to_attr_dict(), url)
     self.increment_count()
Exemple #6
0
 def emit_url(self, meta, url):
     ingest_url.delay(self.collection.id, meta.clone().data, url)
Exemple #7
0
 def emit_url(self, meta, url):
     ingest_url.delay(self.collection.id, meta.to_attr_dict(), url)
     self.increment_count()
Exemple #8
0
 def emit_url(self, source, meta, url):
     db.session.commit()
     ingest_url.delay(source.id, meta.clone().data, url)