def download_file(url, output_file, chunk_size=1024): """Download a file to specified location.""" from invenio_utils.url import make_user_agent_string headers = { "User-agent": make_user_agent_string("inspire"), } r = requests.get( url=url, headers=headers, stream=True ) if r.status_code == 200: with open(output_file, 'wb') as f: for chunk in r.iter_content(chunk_size): f.write(chunk) return output_file
def make_robotupload_marcxml(url, marcxml, mode, **kwargs): """Make a robotupload request.""" from invenio_utils.url import make_user_agent_string from inspire.utils.text import clean_xml from invenio_base.globals import cfg headers = { "User-agent": make_user_agent_string("inspire"), "Content-Type": "application/marcxml+xml", } if url is None: base_url = cfg.get("CFG_ROBOTUPLOAD_SUBMISSION_BASEURL") else: base_url = url url = os.path.join(base_url, "batchuploader/robotupload", mode) return requests.post( url=url, data=str(clean_xml(marcxml)), headers=headers, params=kwargs, )
def __init__(self, url, filename): """Initialiez external file.""" try: request = urllib2.Request(url) request.add_header('User-Agent', make_user_agent_string()) self._file = urllib2.urlopen(request) self.filename = None info = self._file.info() content_disposition = info.getheader('Content-Disposition') if content_disposition: for item in content_disposition.split(';'): item = item.strip() if item.strip().startswith('filename='): self.filename = item[len('filename="'):-len('"')] if not self.filename: self.filename = filename size = int(info.getheader('Content-length')) if size > cfg['DEPOSIT_MAX_UPLOAD_SIZE']: raise UploadError("File too big") except InvenioBibdocfileUnauthorizedURL as e: raise UploadError(str(e)) except urllib2.URLError as e: raise UploadError('URL could not be opened: %s' % str(e))