def ingest_url(self, collection_id, metadata, url): meta = Metadata.from_data(metadata) if meta.foreign_id is None: meta.foreign_id = url tmp_path = make_tempfile(meta.file_name, suffix=meta.extension) try: log.info("Ingesting URL: %s", url) res = requests.get(url, stream=True) if res.status_code == 404: log.info("HTTP not found: %s", url) return if res.status_code >= 399: countdown = 3600**self.request.retries self.retry(countdown=countdown) with open(tmp_path, 'w') as fh: for chunk in res.iter_content(chunk_size=1024): if chunk: fh.write(chunk) if not meta.has('source_url'): meta.source_url = res.url meta.headers = res.headers meta = archive.archive_file(tmp_path, meta, move=True) Ingestor.dispatch(collection_id, meta) except IOError as ioe: log.info("IO Failure: %r", ioe) countdown = 3600**self.request.retries self.retry(countdown=countdown) except Exception as ex: Ingestor.handle_exception(meta, collection_id, ex) finally: db.session.remove() remove_tempfile(tmp_path)
def ingest_url(self, collection_id, metadata, url): meta = Metadata.from_data(metadata) tmp_path = make_tempfile(meta.file_name, suffix=meta.extension) try: log.info("Ingesting URL: %s", url) res = requests.get(url, stream=True) if res.status_code == 404: log.info("HTTP not found: %s", url) return if res.status_code >= 399: countdown = 3600 ** self.request.retries self.retry(countdown=countdown) with open(tmp_path, 'w') as fh: for chunk in res.iter_content(chunk_size=1024): if chunk: fh.write(chunk) if not meta.has('source_url'): meta.source_url = res.url meta.headers = res.headers meta = get_archive().archive_file(tmp_path, meta, move=True) Ingestor.dispatch(collection_id, meta) except IOError as ioe: log.info("IO Failure: %r", ioe) countdown = 3600 ** self.request.retries self.retry(countdown=countdown) except Exception as ex: Ingestor.handle_exception(meta, collection_id, ex) finally: db.session.remove() remove_tempfile(tmp_path)
def ingest(source_id, metadata): meta = Metadata(data=metadata) try: process.log(process.INGEST, component='ingest', meta=meta, source_id=source_id) except Exception as ex: log.exception(ex) Ingestor.dispatch(source_id, meta)
def ingest(source_id, metadata): meta = Metadata(data=metadata) try: process.log(process.INGEST, component='ingest', meta=meta, source_id=source_id) except Exception as ex: log.exception(ex) Ingestor.dispatch(source_id, meta)
def ingest_url(source_id, metadata, url): meta = Metadata(data=metadata) try: fh, tmp_path = mkstemp() os.close(fh) log.info("Ingesting URL: %r", url) res = requests.get(url, stream=True) if res.status_code >= 400: raise Exception("HTTP Error %r: %r" % (url, res.status_code)) with open(tmp_path, 'w') as fh: for chunk in res.iter_content(chunk_size=1024): if chunk: fh.write(chunk) if not meta.has('source_url'): meta.source_url = res.url meta.headers = res.headers meta = get_archive().archive_file(tmp_path, meta, move=True) Ingestor.dispatch(source_id, meta) except Exception as ex: Ingestor.handle_exception(meta, source_id, ex)
def ingest_url(collection_id, metadata, url): meta = Metadata.from_data(metadata) tmp_path = make_tempfile(meta.file_name, suffix=meta.extension) try: log.info("Ingesting URL: %r", url) res = requests.get(url, stream=True, timeout=120) if res.status_code >= 400: msg = "HTTP Error %r: %r" % (url, res.status_code) raise IngestorException(msg) with open(tmp_path, 'w') as fh: for chunk in res.iter_content(chunk_size=1024): if chunk: fh.write(chunk) if not meta.has('source_url'): meta.source_url = res.url meta.headers = res.headers meta = get_archive().archive_file(tmp_path, meta, move=True) Ingestor.dispatch(collection_id, meta) except Exception as ex: Ingestor.handle_exception(meta, collection_id, ex) finally: db.session.remove() remove_tempfile(tmp_path)
def ingest_url(collection_id, metadata, url): meta = Metadata.from_data(metadata) tmp_path = make_tempfile(meta.file_name, suffix=meta.extension) try: log.info("Ingesting URL: %r", url) res = requests.get(url, stream=True, timeout=120) if res.status_code >= 400: msg = "HTTP Error %r: %r" % (url, res.status_code) raise IngestorException(msg) with open(tmp_path, 'w') as fh: for chunk in res.iter_content(chunk_size=1024): if chunk: fh.write(chunk) if not meta.has('source_url'): meta.source_url = res.url meta.headers = res.headers meta = get_archive().archive_file(tmp_path, meta, move=True) Ingestor.dispatch(collection_id, meta) except Exception as ex: Ingestor.handle_exception(meta, collection_id, ex) finally: db.session.remove() remove_tempfile(tmp_path)
def ingest(collection_id, metadata): meta = Metadata.from_data(metadata) Ingestor.dispatch(collection_id, meta)
def ingest(source_id, metadata): meta = Metadata(data=metadata) Ingestor.dispatch(source_id, meta)
def ingest(collection_id, metadata): meta = Metadata(data=metadata) Ingestor.dispatch(collection_id, meta)
def ingest(source_id, metadata): clear_session() meta = Metadata(data=metadata) Ingestor.dispatch(source_id, meta)
def ingest(collection_id, metadata): meta = Metadata.from_data(metadata) Ingestor.dispatch(collection_id, meta)