def test_waterbutler_api_url_for(self): with self.app.test_request_context(): url = waterbutler_api_url_for('fakeid', 'provider', '/path', base_url=settings.WATERBUTLER_URL) assert_in('/fakeid/', url) assert_in('/path', url) assert_in('/providers/provider/', url) assert_in(settings.WATERBUTLER_URL, url) with self.app.test_request_context(): url = waterbutler_api_url_for('fakeid', 'provider', '/path') assert_in(settings.WATERBUTLER_URL, url)
def generate_waterbutler_url(self, **kwargs): return waterbutler_api_url_for( self.node._id, self.provider, self.path, **kwargs )
def _get_fileobj_child_metadata(self, filenode, user, cookie=None, version=None): from api.base.utils import waterbutler_api_url_for kwargs = {} if version: kwargs['version'] = version if cookie: kwargs['cookie'] = cookie elif user: kwargs['cookie'] = user.get_or_create_cookie() metadata_url = waterbutler_api_url_for( self.owner._id, self.config.short_name, path=filenode.get('path', '/'), user=user, view_only=True, _internal=True, base_url=self.owner.osfstorage_region.waterbutler_url, **kwargs ) res = requests.get(metadata_url) if res.status_code != 200: raise HTTPError(res.status_code, data={'error': res.json()}) # TODO: better throttling? time.sleep(1.0 / 5.0) data = res.json().get('data', None) if data: return [child['attributes'] for child in data] return []
def test_get_files_has_links(self, app, user, url): res = app.get(url, auth=user.auth) file_detail_json = res.json['data'][0] quickfiles_node = quickfiles(user) waterbutler_url = utils.waterbutler_api_url_for( quickfiles_node._id, 'osfstorage', file_detail_json['attributes']['path'] ) assert 'delete' in file_detail_json['links'] assert file_detail_json['links']['delete'] == waterbutler_url assert 'download' in file_detail_json['links'] assert file_detail_json['links']['download'] == waterbutler_url assert 'info' in file_detail_json['links'] assert 'move' in file_detail_json['links'] assert file_detail_json['links']['move'] == waterbutler_url assert 'self' in file_detail_json['links'] assert 'upload' in file_detail_json['links'] assert file_detail_json['links']['upload'] == waterbutler_url
def archive_addon(addon_short_name, job_pk): """Archive the contents of an addon by making a copy request to the WaterBulter API :param addon_short_name: AddonConfig.short_name of the addon to be archived :param job_pk: primary key of ArchiveJob :return: None """ create_app_context() job = ArchiveJob.load(job_pk) src, dst, user = job.info() logger.info('Archiving addon: {0} on node: {1}'.format(addon_short_name, src._id)) cookie = user.get_or_create_cookie() params = {'cookie': cookie} rename_suffix = '' # The dataverse API will not differentiate between published and draft files # unless expcicitly asked. We need to create seperate folders for published and # draft in the resulting archive. # # Additionally trying to run the archive without this distinction creates a race # condition that non-deterministically caused archive jobs to fail. if 'dataverse' in addon_short_name: params['revision'] = 'latest' if addon_short_name.split('-')[-1] == 'draft' else 'latest-published' rename_suffix = ' (draft)' if addon_short_name.split('-')[-1] == 'draft' else ' (published)' addon_short_name = 'dataverse' src_provider = src.get_addon(addon_short_name) folder_name = src_provider.archive_folder_name rename = '{}{}'.format(folder_name, rename_suffix) url = waterbutler_api_url_for(src._id, addon_short_name, _internal=True, **params) data = make_waterbutler_payload(dst._id, rename) make_copy_request.delay(job_pk=job_pk, url=url, data=data)
def export_files(node, user, current_dir): """ Creates a "files" directory within the current directory. Exports all of the OSFStorage files for a given node. Uses WB's download zip functionality to download osfstorage-archive.zip in a single request. """ files_dir = os.path.join(current_dir, 'files') os.mkdir(files_dir) response = requests.get( url=waterbutler_api_url_for( node_id=node._id, _internal=True, provider='osfstorage', zip='', cookie=user.get_or_create_cookie(), base_url=node.osfstorage_region.waterbutler_url ) ) if response.status_code == 200: with open(os.path.join(files_dir, 'osfstorage-archive.zip'), 'wb') as f: f.write(response.content) else: ERRORS.append( 'Error exporting files for node {}. Waterbutler responded with a {} status code. Response: {}' .format(node._id, response.status_code, response.json()) )
def prepare_mock_wb_response( node=None, provider='github', files=None, folder=True, path='/', method=responses.GET, status_code=200 ): """Prepare a mock Waterbutler response with responses library. :param Node node: Target node. :param str provider: Addon provider :param list files: Optional list of files. You can specify partial data; missing values will have defaults. :param folder: True if mocking out a folder response, False if a file response. :param path: Waterbutler path, passed to waterbutler_api_url_for. :param str method: HTTP method. :param int status_code: HTTP status. """ node = node files = files or [] wb_url = waterbutler_api_url_for(node._id, provider=provider, _internal=True, path=path, meta=True, view_only=None) default_file = { u'contentType': None, u'extra': {u'downloads': 0, u'version': 1}, u'kind': u'file', u'modified': None, u'name': u'NewFile', u'path': u'/NewFile', u'provider': provider, u'size': None, u'materialized': '/', } if len(files): data = [dict(default_file, **each) for each in files] else: data = [default_file] jsonapi_data = [] for datum in data: jsonapi_data.append({'attributes': datum}) if not folder: jsonapi_data = jsonapi_data[0] responses.add( responses.Response( method, wb_url, json={u'data': jsonapi_data}, status=status_code, content_type='application/json' ) )
def upload_attachment(user, node, attachment): attachment.seek(0) name = (attachment.filename or settings.MISSING_FILE_NAME) content = attachment.read() upload_url = waterbutler_api_url_for(node._id, 'osfstorage', name=name, base_url=node.osfstorage_region.waterbutler_url, cookie=user.get_or_create_cookie(), _internal=True) requests.put( upload_url, data=content, )
def test_waterbutler_api_url_for_internal(self): settings.WATERBUTLER_INTERNAL_URL = 'http://1.2.3.4:7777' with self.app.test_request_context(): url = waterbutler_api_url_for('fakeid', 'provider', '/path', _internal=True, base_url=settings.WATERBUTLER_INTERNAL_URL) assert_not_in(settings.WATERBUTLER_URL, url) assert_in(settings.WATERBUTLER_INTERNAL_URL, url) assert_in('/fakeid/', url) assert_in('/path', url) assert_in('/providers/provider', url)
def test_files_relationship_upload(self, app, user_one): url = '/{}users/{}/'.format(API_BASE, user_one._id) res = app.get(url, auth=user_one) quickfiles = QuickFilesNode.objects.get(creator=user_one) user_json = res.json['data'] upload_url = user_json['relationships']['quickfiles']['links']['upload']['href'] waterbutler_upload = waterbutler_api_url_for( quickfiles._id, 'osfstorage') assert upload_url == waterbutler_upload
def generate_waterbutler_url(self, **kwargs): base_url = None if hasattr(self.target, 'osfstorage_region'): base_url = self.target.osfstorage_region.waterbutler_url return waterbutler_api_url_for( self.target._id, self.provider, self.path, base_url=base_url, **kwargs )
def get_file_object(target, path, provider, request): # Don't bother going to waterbutler for osfstorage if provider == 'osfstorage': # Kinda like /me for a user # The one odd case where path is not really path if path == '/': if isinstance(target, AbstractNode): obj = target.get_addon('osfstorage').get_root() elif isinstance(target, Preprint): obj = target.root_folder else: obj = target else: if path.endswith('/'): model = OsfStorageFolder else: model = OsfStorageFile content_type = ContentType.objects.get_for_model(target) obj = get_object_or_error(model, Q(target_object_id=target.pk, target_content_type=content_type, _id=path.strip('/')), request) return obj if isinstance(target, AbstractNode) and not target.get_addon(provider) or not target.get_addon(provider).configured: raise NotFound('The {} provider is not configured for this project.'.format(provider)) view_only = request.query_params.get('view_only', default=None) base_url = None if hasattr(target, 'osfstorage_region'): base_url = target.osfstorage_region.waterbutler_url url = waterbutler_api_url_for( target._id, provider, path, _internal=True, base_url=base_url, meta=True, view_only=view_only, ) waterbutler_request = requests.get( url, cookies=request.COOKIES, headers={'Authorization': request.META.get('HTTP_AUTHORIZATION')}, ) if waterbutler_request.status_code == 401: raise PermissionDenied if waterbutler_request.status_code == 404: raise NotFound if is_server_error(waterbutler_request.status_code): raise ServiceUnavailableError(detail='Could not retrieve files information at this time.') try: return waterbutler_request.json()['data'] except KeyError: raise ServiceUnavailableError(detail='Could not retrieve files information at this time.')
def to_representation(self, value): relationship_links = super(QuickFilesRelationshipField, self).to_representation(value) quickfiles_guid = value.nodes_created.filter(type=QuickFilesNode._typedmodels_type).values_list('guids___id', flat=True).get() upload_url = waterbutler_api_url_for(quickfiles_guid, 'osfstorage') relationship_links['links']['upload'] = { 'href': upload_url, 'meta': {}, } relationship_links['links']['download'] = { 'href': '{}?zip='.format(upload_url), 'meta': {}, } return relationship_links
def test_waterbutler_invalid_data_returns_503(self): wb_url = waterbutler_api_url_for(self.project._id, _internal=True, provider='github', path='/', meta=True) self.add_github() responses.add( responses.Response( responses.GET, wb_url, body=json.dumps({}), status=400 ) ) url = '/{}nodes/{}/files/github/'.format(API_BASE, self.project._id) res = self.app.get(url, auth=self.user.auth, expect_errors=True) assert_equal(res.status_code, 503)
def test_handles_bad_waterbutler_request(self): wb_url = waterbutler_api_url_for(self.project._id, _internal=True, provider='github', path='/', meta=True) responses.add( responses.Response( responses.GET, wb_url, json={'bad' : 'json'}, status=418 ) ) self.add_github() url = '/{}nodes/{}/files/github/'.format(API_BASE, self.project._id) res = self.app.get(url, auth=self.user.auth, expect_errors=True) assert_equal(res.status_code, 503) assert_in('detail', res.json['errors'][0])
def get_file_object(target, path, provider, request): # Don't bother going to waterbutler for osfstorage if provider == 'osfstorage': # Kinda like /me for a user # The one odd case where path is not really path if path == '/': if isinstance(target, AbstractNode): obj = target.get_addon('osfstorage').get_root() else: obj = target else: if path.endswith('/'): model = OsfStorageFolder else: model = OsfStorageFile content_type = ContentType.objects.get_for_model(target) obj = get_object_or_error(model, Q(target_object_id=target.pk, target_content_type=content_type, _id=path.strip('/')), request) return obj if isinstance(target, AbstractNode) and not target.get_addon(provider) or not target.get_addon(provider).configured: raise NotFound('The {} provider is not configured for this project.'.format(provider)) view_only = request.query_params.get('view_only', default=None) url = waterbutler_api_url_for(target._id, provider, path, _internal=True, meta=True, view_only=view_only) waterbutler_request = requests.get( url, cookies=request.COOKIES, headers={'Authorization': request.META.get('HTTP_AUTHORIZATION')}, ) if waterbutler_request.status_code == 401: raise PermissionDenied if waterbutler_request.status_code == 404: raise NotFound if is_server_error(waterbutler_request.status_code): raise ServiceUnavailableError(detail='Could not retrieve files information at this time.') try: return waterbutler_request.json()['data'] except KeyError: raise ServiceUnavailableError(detail='Could not retrieve files information at this time.')
def test_waterbutler_invalid_data_returns_503(self): wb_url = waterbutler_api_url_for( self.draft_node._id, _internal=True, provider='github', path='/', meta=True, base_url=self.draft_node.osfstorage_region.waterbutler_url) self.add_github() responses.add( responses.Response(responses.GET, wb_url, body=json.dumps({}), status=400)) url = '/{}draft_nodes/{}/files/github/'.format(API_BASE, self.draft_node._id) res = self.app.get(url, auth=self.user.auth, expect_errors=True) assert_equal(res.status_code, 503)
def test_handles_bad_waterbutler_request(self): wb_url = waterbutler_api_url_for( self.project._id, _internal=True, provider='github', path='/', meta=True, base_url=self.project.osfstorage_region.waterbutler_url) responses.add( responses.Response(responses.GET, wb_url, json={'bad': 'json'}, status=418)) self.add_github() url = '/{}nodes/{}/files/github/'.format(API_BASE, self.project._id) res = self.app.get(url, auth=self.user.auth, expect_errors=True) assert_equal(res.status_code, 503) assert_in('detail', res.json['errors'][0])
def main(): prereg_csv = generate_prereg_csv() filename = 'prereg_{}.csv.gz'.format(timezone.now().isoformat()) output = io.BytesIO() with gzip.GzipFile(filename=filename, mode='wb', fileobj=output) as gzip_obj: gzip_obj.write(prereg_csv.getvalue()) if settings.PREREG_DATA_STORE_GUID and settings.PREREG_DATA_STORE_TOKEN: resp = requests.put(waterbutler_api_url_for(settings.PREREG_DATA_STORE_GUID, 'osfstorage', _internal=True) + '?name={}&kind=file'.format(filename), data=output.getvalue(), headers={'Authorization': 'Bearer {}'.format(settings.PREREG_DATA_STORE_TOKEN)}) if resp.status_code == 201: logger.info('Prereg data uploaded properly') else: logger.info('Prereg data failed with code {}'.format(resp.status_code))
def _save_result(query_result, file_format, request_info, osf_cookie): # Prevent errors when query_result has japanese characters reload(sys) sys.setdefaultencoding('utf-8') now = datetime.datetime.now() file_name = 'sparql_%s.%s' % (now.strftime('%Y-%m-%d_%H-%M-%S'), utils.FORMAT_EXTENSION[file_format]) # Variables for logging into recent activity node = AbstractNode.load(request_info['node_id']) user = OSFUser.load(request_info['uid']) auth = Auth(user=user) try: response = requests.put(waterbutler_api_url_for(request_info['pid'], 'osfstorage', name=file_name, kind='file', _internal=True), data=query_result, cookies={'osf': osf_cookie}) except requests.ConnectionError: node.add_log(action='sparql_upload_fail', params={ 'node': request_info['node_id'], 'project': request_info['pid'] }, auth=auth) raise action = None if response.status_code == requests.codes.created: action = 'sparql_upload_success' else: action = 'sparql_upload_fail' node.add_log(action=action, params={ 'node': request_info['node_id'], 'project': request_info['pid'] }, auth=auth) return response
def get_file_object(node, path, provider, request): # Don't bother going to waterbutler for osfstorage if provider == 'osfstorage': # Kinda like /me for a user # The one odd case where path is not really path if path == '/': obj = node.get_addon('osfstorage').get_root() else: if path.endswith('/'): model = OsfStorageFolder else: model = OsfStorageFile obj = get_object_or_error(model, Q(node=node.pk, _id=path.strip('/')), request) return obj if not node.get_addon(provider) or not node.get_addon(provider).configured: raise NotFound('The {} provider is not configured for this project.'.format(provider)) view_only = request.query_params.get('view_only', default=None) url = waterbutler_api_url_for(node._id, provider, path, _internal=True, meta=True, view_only=view_only) waterbutler_request = requests.get( url, cookies=request.COOKIES, headers={'Authorization': request.META.get('HTTP_AUTHORIZATION')}, ) if waterbutler_request.status_code == 401: raise PermissionDenied if waterbutler_request.status_code == 404: raise NotFound if is_server_error(waterbutler_request.status_code): raise ServiceUnavailableError(detail='Could not retrieve files information at this time.') try: return waterbutler_request.json()['data'] except KeyError: raise ServiceUnavailableError(detail='Could not retrieve files information at this time.')
def archive_addon(addon_short_name, job_pk): """Archive the contents of an addon by making a copy request to the WaterBulter API :param addon_short_name: AddonConfig.short_name of the addon to be archived :param job_pk: primary key of ArchiveJob :return: None """ create_app_context() job = ArchiveJob.load(job_pk) src, dst, user = job.info() logger.info('Archiving addon: {0} on node: {1}'.format( addon_short_name, src._id)) cookie = user.get_or_create_cookie() params = {'cookie': cookie} rename_suffix = '' # The dataverse API will not differentiate between published and draft files # unless expcicitly asked. We need to create seperate folders for published and # draft in the resulting archive. # # Additionally trying to run the archive without this distinction creates a race # condition that non-deterministically caused archive jobs to fail. if 'dataverse' in addon_short_name: params['revision'] = 'latest' if addon_short_name.split( '-')[-1] == 'draft' else 'latest-published' rename_suffix = ' (draft)' if addon_short_name.split( '-')[-1] == 'draft' else ' (published)' addon_short_name = 'dataverse' src_provider = src.get_addon(addon_short_name) folder_name = src_provider.archive_folder_name rename = '{}{}'.format(folder_name, rename_suffix) url = waterbutler_api_url_for( src._id, addon_short_name, _internal=True, base_url=src.osfstorage_region.waterbutler_url, **params) data = make_waterbutler_payload(dst._id, rename) make_copy_request.delay(job_pk=job_pk, url=url, data=data)
def test_get_files_has_links(self, app, user, url, quickfiles): res = app.get(url, auth=user.auth) file_detail_json = res.json['data'][0] waterbutler_url = utils.waterbutler_api_url_for( quickfiles._id, 'osfstorage', file_detail_json['attributes']['path']) assert 'delete' in file_detail_json['links'] assert file_detail_json['links']['delete'] == waterbutler_url assert 'download' in file_detail_json['links'] assert file_detail_json['links']['download'] == waterbutler_url assert 'info' in file_detail_json['links'] assert 'move' in file_detail_json['links'] assert file_detail_json['links']['move'] == waterbutler_url assert 'self' in file_detail_json['links'] assert 'upload' in file_detail_json['links'] assert file_detail_json['links']['upload'] == waterbutler_url
def export_files(node, user, current_dir): """ Creates a "files" directory within the current directory. Exports all of the OSFStorage files for a given node. Uses WB's download zip functionality to download osfstorage-archive.zip in a single request. """ files_dir = os.path.join(current_dir, 'files') os.mkdir(files_dir) response = requests.get( url=waterbutler_api_url_for(node_id=node._id, _internal=True, provider='osfstorage', zip='', cookie=user.get_or_create_cookie())) if response.status_code == 200: with open(os.path.join(files_dir, 'osfstorage-archive.zip'), 'wb') as f: f.write(response.content) else: ERRORS.append( 'Error exporting files for node {}. Waterbutler responded with a {} status code. Response: {}' .format(node._id, response.status_code, response.json()))
def _get_fileobj_child_metadata(self, filenode, user, cookie=None, version=None): from api.base.utils import waterbutler_api_url_for kwargs = {} if version: kwargs['version'] = version if cookie: kwargs['cookie'] = cookie elif user: kwargs['cookie'] = user.get_or_create_cookie() metadata_url = waterbutler_api_url_for( self.owner._id, self.config.short_name, path=filenode.get('path', '/'), user=user, view_only=True, _internal=True, base_url=self.owner.osfstorage_region.waterbutler_url, **kwargs) res = requests.get(metadata_url) if res.status_code != 200: raise HTTPError(res.status_code, data={'error': res.json()}) # TODO: better throttling? time.sleep(1.0 / 5.0) data = res.json().get('data', None) if data: return [child['attributes'] for child in data] return []
def perform_wb_copy(reg, node_settings): src, dst, user = reg.archive_job.info() if dst.files.filter(name=node_settings.archive_folder_name.replace('/', '-')).exists(): if not DELETE_COLLISIONS and not SKIP_COLLISIONS: raise Exception('Archive folder for {} already exists. Investigate manually and rerun with either --delete-collisions or --skip-collisions') if DELETE_COLLISIONS: archive_folder = dst.files.exclude(type='osf.trashedfolder').get(name=node_settings.archive_folder_name.replace('/', '-')) logger.info('Removing {}'.format(archive_folder)) archive_folder.delete() if SKIP_COLLISIONS: complete_archive_target(reg, node_settings.short_name) return params = {'cookie': user.get_or_create_cookie()} data = { 'action': 'copy', 'path': '/', 'rename': node_settings.archive_folder_name.replace('/', '-'), 'resource': dst._id, 'provider': ARCHIVE_PROVIDER, } url = waterbutler_api_url_for(src._id, node_settings.short_name, _internal=True, **params) res = requests.post(url, data=json.dumps(data)) if res.status_code not in (http.OK, http.CREATED, http.ACCEPTED): raise HTTPError(res.status_code)
def get_full_list(uid, pid, node): ''' Get a full list of timestamps from all files uploaded to a storage. ''' user_info = OSFUser.objects.get(id=uid) cookie = user_info.get_or_create_cookie() api_url = util.api_v2_url('nodes/{}/files'.format(pid)) headers = {'content-type': 'application/json'} cookies = {settings.COOKIE_NAME: cookie} file_res = requests.get(api_url, headers=headers, cookies=cookies) provider_json_res = file_res.json() file_res.close() provider_list = [] for provider_data in provider_json_res['data']: waterbutler_meta_url = waterbutler_api_url_for( pid, provider_data['attributes']['provider'], '/', meta=int(time.mktime(datetime.datetime.now().timetuple()))) waterbutler_json_res = None waterbutler_res = requests.get(waterbutler_meta_url, headers=headers, cookies=cookies) waterbutler_json_res = waterbutler_res.json() waterbutler_res.close() file_list = [] child_file_list = [] for file_data in waterbutler_json_res['data']: if file_data['attributes']['kind'] == 'folder': child_file_list.extend( waterbutler_folder_file_info( pid, provider_data['attributes']['provider'], file_data['attributes']['path'], node, cookies, headers)) else: file_info = None basefile_node = BaseFileNode.resolve_class( provider_data['attributes']['provider'], BaseFileNode.FILE).get_or_create( node, file_data['attributes']['path']) basefile_node.save() file_info = { 'file_id': basefile_node._id, 'file_name': file_data['attributes'].get('name'), 'file_path': file_data['attributes'].get('materialized'), 'size': file_data['attributes'].get('size'), 'created': file_data['attributes'].get('created_utc'), 'modified': file_data['attributes'].get('modified_utc'), 'file_version': '' } if provider_data['attributes']['provider'] == 'osfstorage': file_info['file_version'] = file_data['attributes'][ 'extra'].get('version') if file_info: file_list.append(file_info) file_list.extend(child_file_list) if file_list: provider_files = { 'provider': provider_data['attributes']['provider'], 'provider_file_list': file_list } provider_list.append(provider_files) return provider_list
def prepare_mock_wb_response(node=None, provider='github', files=None, folder=True, path='/', method=responses.GET, status_code=200): """Prepare a mock Waterbutler response with responses library. :param Node node: Target node. :param str provider: Addon provider :param list files: Optional list of files. You can specify partial data; missing values will have defaults. :param folder: True if mocking out a folder response, False if a file response. :param path: Waterbutler path, passed to waterbutler_api_url_for. :param str method: HTTP method. :param int status_code: HTTP status. """ node = node files = files or [] wb_url = waterbutler_api_url_for( node._id, provider=provider, _internal=True, path=path, meta=True, view_only=None, base_url=node.osfstorage_region.waterbutler_url) default_file = { u'contentType': None, u'extra': { u'downloads': 0, u'version': 1 }, u'kind': u'file', u'modified': None, u'name': u'NewFile', u'path': u'/NewFile', u'provider': provider, u'size': None, u'materialized': '/', } if len(files): data = [dict(default_file, **each) for each in files] else: data = [default_file] jsonapi_data = [] for datum in data: jsonapi_data.append({'attributes': datum}) if not folder: jsonapi_data = jsonapi_data[0] responses.add( responses.Response(method, wb_url, json={u'data': jsonapi_data}, status=status_code, content_type='application/json'))
def generate_waterbutler_url(self, **kwargs): return waterbutler_api_url_for(self.node._id, self.provider, self.path, **kwargs)