def runSCI(id): if request.method == 'POST': auth = HydroShareAuthBasic(username=request.form['username'], password=request.form['password']) hs = HydroShare(auth=auth) hs.getResource('e987ddcf73a94376a4a70e5db0fb7646', destination='/home/ubuntu/hydroshareLink/', unzip=True) subprocess.Popen( 'sciunit open /home/ubuntu/hydroshareLink/e987ddcf73a94376a4a70e5db0fb7646/e987ddcf73a94376a4a70e5db0fb7646/data/contents/modflow.zip', stdout=subprocess.PIPE, shell=True) os.chdir("/home/ubuntu/test/") hs.getResource(id, destination='/home/ubuntu/test/', unzip=True) proc = subprocess.Popen('sciunit repeat e2 ' + str(id), stdout=subprocess.PIPE, shell=True) output = proc.stdout.read() abstract = '' title = 'MODFLOW_NWT_SCIUNIT_OUTPUT' keywords = ('my keyword 1', 'my keyword 2') rtype = 'MODFLOWModelInstanceResource' output_id = hs.createResource(rtype, title, abstract=abstract, keywords=keywords) for file in os.listdir("/home/ubuntu/test/MODFLOW/"): if file != "mfnwt": hs.addResourceFile(output_id, "/home/ubuntu/test/MODFLOW/" + file) title = 'ModflowNwtCollection' keywords = ('MODFLOW-NWT Input data', 'MODFLOW-NWT Output data', 'MODFLOW-NWT') rtype = 'CollectionResource' resource_id = hs.createResource(rtype, title, abstract=abstract, keywords=keywords) metaData = {'relations': []} newObject = {} newObject['type'] = 'hasPart' newObject['value'] = 'http://www.hydroshare.org/resource/' + str( id) + '/' metaData['relations'].append(newObject) newObject = {} newObject['type'] = 'hasPart' newObject['value'] = 'http://www.hydroshare.org/resource/' + str( output_id) + '/' metaData['relations'].append(newObject) hs.updateScienceMetadata(resource_id, metadata=metaData) return output return render_template('login.html', id=str(id))
def runScript(id): auth = HydroShareAuthBasic(username='', password='') hs = HydroShare(auth=auth) hs.getResource(id, destination='/home/ubuntu/hydroshare_app/', unzip=True) subprocess.call("sudo cp " + str(id) + '/' + str(id) + '/data/contents/* /home/ubuntu/hydroshare_app/Data', shell=True) subprocess.call("sudo rm -r " + str(id), shell=True) process() #Locate the file with the .nam extension os.chdir('MODFLOW') for file in glob.glob("*.nam"): filename = file # Run the model subprocess.call("sudo ./mfnwt " + filename, shell=True) try: hs.deleteResourceFile(id, filename.split(".")[0] + '.list') except: pass #Upload to hydroshare hs.addResourceFile(id, filename.split(".")[0] + '.list') subprocess.call("sudo rm /home/ubuntu/hydroshare_app/MODFLOW/*.*", shell=True) subprocess.call("sudo rm -r /home/ubuntu/hydroshare_app/Scratch", shell=True) subprocess.call("sudo rm -r /home/ubuntu/hydroshare_app/Framework", shell=True) subprocess.call("sudo rm /home/ubuntu/hydroshare_app/Data/*", shell=True) return json.dumps(hs.getScienceMetadata(id))
def test_create_get_delete_resource_file(self): hs = HydroShare(prompt_auth=False) # Add res_id = '511debf8858a4ea081f78d66870da76c' fpath = 'mocks/data/another_resource_file.txt' fname = os.path.basename(fpath) resp = hs.addResourceFile(res_id, fpath) self.assertEqual(resp, res_id) # Get tmpdir = tempfile.mkdtemp() res_file = hs.getResourceFile(res_id, fname, destination=tmpdir) self.assertTrue(filecmp.cmp(res_file, fpath, shallow=False)) shutil.rmtree(tmpdir) # Delete delres = hs.deleteResourceFile(res_id, fname) self.assertEqual(delres, res_id)
def test_create_get_delete_resource_file(self): hs = HydroShare() # Add res_id = '511debf8858a4ea081f78d66870da76c' fpath = 'mocks/data/another_resource_file.txt' fname = os.path.basename(fpath) resp = hs.addResourceFile(res_id, fpath) self.assertEqual(resp, res_id) # Get tmpdir = tempfile.mkdtemp() res_file = hs.getResourceFile(res_id, fname, destination=tmpdir) self.assertTrue(filecmp.cmp(res_file, fpath, shallow=False)) shutil.rmtree(tmpdir) # Delete delres = hs.deleteResourceFile(res_id, fname) self.assertEqual(delres, res_id)
def upload_file(request): # f = open("demofile3.txt", "w") # f.write("Woops! I have deleted the content!") # f.close() # auth = HydroShareAuthBasic(username='******', password='******') # hs = HydroShare(auth=auth) #fpath = '/apps/geocode/upload_file/output.txt' fpath = 'tethysapp/geocode/workspaces/app_workspace/output.txt' hs = HydroShare(auth=auth) abstract = 'My abstract' title = 'My resource' keywords = ('my keyword 1', 'my keyword 2') rtype = 'GenericResource' metadata = '[{"coverage":{"type":"period", "value":{"start":"01/01/2000", "end":"12/12/2010"}}}, {"creator":{"name":"John Smith"}}, {"creator":{"name":"Lisa Miller"}}]' extra_metadata = '{"key-1": "value-1", "key-2": "value-2"}' resource_id = hs.createResource(rtype, title, resource_file=fpath, keywords=keywords, abstract=abstract, metadata=metadata, extra_metadata=extra_metadata) resource_id = hs.addResourceFile('99319811b9c44f03aa14f47a32aa4111', fpath)
class hydroshare(): def __init__(self, username=None, password=None, cache=False): self.hs = None self.content = {} # connect to hydroshare using OAUTH2 authfile = os.path.expanduser("~/.hs_auth") if os.path.exists(authfile): with open(authfile, 'rb') as f: token, cid = pickle.load(f) auth = HydroShareAuthOAuth2(cid, '', token=token) else: # connect to hydroshare using Basic Authentication self.cache = cache notebook_home = os.environ.get('NOTEBOOK_HOME', '.') if cache: utilities.load_environment(os.path.join(notebook_home, '.env')) self.auth_path = os.path.join(notebook_home, '.auth') uname = username if uname is None: uname = os.environ.get('HS_USR_NAME', None) if password is None: # get a secure connection to hydroshare auth = self.getSecureConnection(uname) else: print('WARNING: THIS IS NOT A SECURE METHOD OF CONNECTING TO ' 'HYDROSHARE...AVOID TYPING CREDENTIALS AS PLAIN TEXT') auth = HydroShareAuthBasic(username=uname, password=password) try: self.hs = HydroShare(auth=auth) self.hs.getUserInfo() print('Successfully established a connection with HydroShare') except HydroShareHTTPException as e: print('Failed to establish a connection with HydroShare.\n ' 'Please check that you provided the correct credentials.\n' '%s' % e) # remove the cached authentication if os.path.exists(self.auth_path): os.remove(self.auth_path) return None # set the HS resource download directory download_dir = os.environ.get('JUPYTER_DOWNLOADS', 'hs_downloads') if not os.path.isdir(download_dir): print('Creating a directory to store your HS downloads') os.makedirs(download_dir) self.download_dir = download_dir def _addContentToExistingResource(self, resid, content_files): for f in content_files: self.hs.addResourceFile(resid, f) def getSecureConnection(self, username=None): """Establishes a secure connection with hydroshare. args: -- email: email address associated with hydroshare returns: -- hydroshare api connection """ if not os.path.exists(self.auth_path): print('\nThe hs_utils library requires a secure connection to ' 'your HydroShare account.') if username is None: username = input('Please enter your HydroShare username: '******'Enter the HydroShare password for user ' '\'%s\': ' % username) auth = HydroShareAuthBasic(username=username, password=p) save = input('Do you want to save this password for future use?\n' 'Note: the password will be stored in plain text ' '(not recommended) [y/N]?') if save.lower() == 'y': self.cache = True else: self.cache = False if self.cache: with open(self.auth_path, 'wb') as f: pickle.dump(auth, f, protocol=2) else: with open(self.auth_path, 'rb') as f: auth = pickle.load(f) return auth def getResourceMetadata(self, resid): """Gets metadata for a specified resource. args: -- resid: hydroshare resource id returns: -- resource metadata object """ science_meta = self.hs.getScienceMetadata(resid) system_meta = self.hs.getSystemMetadata(resid) return resource.ResourceMetadata(system_meta, science_meta) def createHydroShareResource(self, abstract, title, derivedFromId=None, keywords=[], resource_type='GenericResource', content_files=[], public=False): """Creates a hydroshare resource. args: -- abstract: abstract for resource (str, required) -- title: title of resource (str, required) -- derivedFromId: id of parent hydroshare resource (str, default=>None) -- keywords: list of subject keywords (list, default=>[]) -- resource_type: type of resource to create (str, default=> 'GenericResource') -- content_files: data to save as resource content (list, default=>[]) -- public: resource sharing status (bool, default=>False) returns: -- None """ # query the hydroshare resource types and make sure that # resource_type is valid restypes = {r.lower(): r for r in self.hs.getResourceTypes()} try: res_type = restypes[resource_type] except KeyError: display( HTML('<b style="color:red;">[%s] is not a valid ' 'HydroShare resource type.</p>' % resource_type)) return None # get the 'derived resource' metadata if derivedFromId is not None: try: # update the abstract and keyword metadata meta = self.getResourceMetadata(derivedFromId) abstract = meta.abstract \ + '\n\n[Modified in JupyterHub on %s]\n%s' \ % (dt.now(), abstract) keywords = set(keywords + meta.keywords) except: display( HTML('<b style="color:red;">Encountered an error ' ' while setting the derivedFrom relationship ' ' using id=%s. Make sure this resource is ' ' is accessible to your account. ' % derivedFromId)) display( HTML('<a href=%s target="_blank">%s<a>' % ('https://www.hydroshare.org/resource/%s' % derivedFromId, 'View the "DerivedFrom" ' 'Resource'))) return None f = None if len(content_files) == 0 else content_files[0] # create the hs resource (1 content file allowed) resid = threads.runThreadedFunction('Creating HydroShare Resource', 'Resource Created Successfully', self.hs.createResource, resource_type=res_type, title=title, abstract=abstract, resource_file=f, keywords=keywords) display(HTML('Resource id: %s' % resid)) display( HTML('<a href=%s target="_blank">%s<a>' % ('https://www.hydroshare.org/resource/%s' % resid, 'Open Resource in HydroShare'))) # add the remaining content files to the hs resource try: if len(content_files) > 1: self.addContentToExistingResource(resid, content_files[1:]) except Exception as e: print(e) def getResourceFromHydroShare(self, resourceid, destination='.'): """Downloads content of a hydroshare resource. args: -- resourceid: id of the hydroshare resource (str) -- destination: path to save resource, default /user/[username]/notebooks/data (str) returns: -- None """ default_dl_path = self.download_dir dst = os.path.abspath(os.path.join(default_dl_path, destination)) download = True # check if the data should be overwritten dst_res_folder = os.path.join(dst, resourceid) if os.path.exists(dst_res_folder): print('This resource already exists in your userspace.') utilities.tree(dst_res_folder) res = input('\nDo you want to overwrite these data [Y/n]? ') if res != 'n': shutil.rmtree(dst_res_folder) else: download = False # re-download the content if desired if download: try: # download the resource (threaded) threads.runThreadedFunction('Downloading Resource', 'Download Finished', self.hs.getResource, resourceid, destination=dst, unzip=True) print('Successfully downloaded resource %s' % resourceid) except Exception as e: display( HTML('<b style="color:red">Failed to retrieve ' 'resource content from HydroShare: %s</b>' % e)) return None # load the resource content outdir = os.path.join(dst, '%s/%s' % (resourceid, resourceid)) content_files = glob.glob(os.path.join(outdir, 'data/contents/*')) content = {} for f in content_files: fname = os.path.basename(f) # trim the base name relative to the data directory dest_folder_name = os.path.dirname(destination).split('/')[-1] f = os.path.join(dest_folder_name, os.path.relpath(f, dest_folder_name)) content[fname] = f # show the resource content files utilities.display_resource_content_files(content) # update the content dictionary self.content.update(content) def addContentToExistingResource(self, resid, content): """Adds content files to an existing hydroshare resource. args: -- resid: id of an existing hydroshare resource (str) -- content: files paths to be added to resource (list) returns: -- None """ threads.runThreadedFunction('Adding Content to Resource', 'Successfully Added Content Files', self._addContentToExistingResource, resid, content) def loadResource(self, resourceid): """Loads the contents of a previously downloaded resource. args: -- resourceid: the id of the resource that has been downloaded (str) returns: -- {content file name: path} """ resdir = utilities.find_resource_directory(resourceid) if resdir is None: display( HTML('<b style="color:red">Could not find any resource ' 'matching the id [%s].</b> <br> It is likely that ' 'this resource has not yet been downloaded from ' 'HydroShare.org, or it was removed from the ' 'JupyterHub server. Please use the following ' 'command to aquire the resource content: ' '<br><br><code>hs.getResourceFromHydroShare(%s)' '</code>.' % (resourceid, resourceid))) return # create search paths. Need to check 2 paths due to hs_restclient bug #63. search_paths = [ os.path.join(resdir, '%s/data/contents/*' % resourceid), os.path.join(resdir, 'data/contents/*') ] content = {} found_content = False for p in search_paths: content_files = glob.glob(p) if len(content_files) > 0: found_content = True display( HTML('<p>Downloaded content is located at: %s</p>' % resdir)) display( HTML('<p>Found %d content file(s). \n</p>' % len(content_files))) for f in content_files: fname = os.path.basename(f) content[fname] = f if len(content.keys()) == 0: display( HTML( '<p>Did not find any content files for resource id: %s</p>' % resourceid)) utilities.display_resource_content_files(content) self.content = content def getContentFiles(self, resourceid): """Gets the content files for a resource that exists on the Jupyter Server args: -- resourceid: the id of the hydroshare resource returns: -- {content file name: path} """ content = utilities.get_hs_content(resourceid) return content def getContentPath(self, resourceid): """Gets the server path of a resources content files. args: -- resourceid: the id of the hydroshare resource returns: -- server path the the resource content files """ path = utilities.find_resource_directory(resourceid) if path is not None: return os.path.join(path, resourceid, 'data/contents')
class hydroshare(): def __init__(self, username=None, password=None, cache=True): self.hs = None self.content = {} # connect to hydroshare using OAUTH2 authfile = os.path.expanduser("~/.hs_auth") if os.path.exists(authfile): with open(authfile, 'rb') as f: token, cid = pickle.load(f) auth = HydroShareAuthOAuth2(cid, '', token=token) else: # connect to hydroshare using Basic Authentication self.cache = cache if cache: utilities.load_environment(os.path.join( os.environ['NOTEBOOK_HOME'], '.env')) self.auth_path = os.environ.get('NOTEBOOK_HOME', '/home/jovyan/.auth') uname = username if uname is None: if 'HS_USR_NAME' in os.environ.keys(): uname = os.environ['HS_USR_NAME'] if password is None: # get a secure connection to hydroshare auth = self.getSecureConnection(uname) else: print('WARNING: THIS IS NOT A SECURE METHOD OF CONNECTING TO ' 'HYDROSHARE...AVOID TYPING CREDENTIALS AS PLAIN TEXT') auth = HydroShareAuthBasic(username=uname, password=password) try: self.hs = HydroShare(auth=auth) self.hs.getUserInfo() print('Successfully established a connection with HydroShare') except HydroShareHTTPException as e: print('Failed to establish a connection with HydroShare.\n ' 'Please check that you provided the correct credentials.\n' '%s' % e) # remove the cached authentication if os.path.exists(self.auth_path): os.remove(self.auth_path) return None # set the HS resource download directory download_dir = os.environ.get('JUPYTER_DOWNLOADS', 'Downloads') if not os.path.isdir(download_dir): os.makedirs(download_dir) self.download_dir = download_dir def _addContentToExistingResource(self, resid, content_files): for f in content_files: self.hs.addResourceFile(resid, f) def getSecureConnection(self, username=None): """Establishes a secure connection with hydroshare. args: -- email: email address associated with hydroshare returns: -- hydroshare api connection """ if not os.path.exists(self.auth_path): print('\nThe hs_utils library requires a secure connection to ' 'your HydroShare account.') if username is None: username = input('Please enter your HydroShare username: '******'Enter the HydroShare password for user ' '\'%s\': ' % username) auth = HydroShareAuthBasic(username=username, password=p) if self.cache: with open(self.auth_path, 'wb') as f: pickle.dump(auth, f, protocol=2) else: with open(self.auth_path, 'rb') as f: auth = pickle.load(f) return auth def getResourceMetadata(self, resid): """Gets metadata for a specified resource. args: -- resid: hydroshare resource id returns: -- resource metadata object """ science_meta = self.hs.getScienceMetadata(resid) system_meta = self.hs.getSystemMetadata(resid) return resource.ResourceMetadata(system_meta, science_meta) def createHydroShareResource(self, abstract, title, derivedFromId=None, keywords=[], resource_type='GenericResource', content_files=[], public=False): """Creates a hydroshare resource. args: -- abstract: abstract for resource (str, required) -- title: title of resource (str, required) -- derivedFromId: id of parent hydroshare resource (str, default=>None) -- keywords: list of subject keywords (list, default=>[]) -- resource_type: type of resource to create (str, default=> 'GenericResource') -- content_files: data to save as resource content (list, default=>[]) -- public: resource sharing status (bool, default=>False) returns: -- None """ # query the hydroshare resource types and make sure that # resource_type is valid restypes = {r.lower(): r for r in self.hs.getResourceTypes()} try: res_type = restypes[resource_type] except KeyError: display(HTML('<b style="color:red;">[%s] is not a valid ' 'HydroShare resource type.</p>' % resource_type)) return None # get the 'derived resource' metadata if derivedFromId is not None: try: # update the abstract and keyword metadata meta = self.getResourceMetadata(derivedFromId) abstract = meta.abstract \ + '\n\n[Modified in JupyterHub on %s]\n%s' \ % (dt.now(), abstract) keywords = set(keywords + meta.keywords) except: display(HTML('<b style="color:red;">Encountered an error ' ' while setting the derivedFrom relationship ' ' using id=%s. Make sure this resource is ' ' is accessible to your account. ' % derivedFromId)) display(HTML('<a href=%s target="_blank">%s<a>' % ('https://www.hydroshare.org/resource/%s' % derivedFromId, 'View the "DerivedFrom" ' 'Resource'))) return None f = None if len(content_files) == 0 else content_files[0] # create the hs resource (1 content file allowed) resid = threads.runThreadedFunction('Creating HydroShare Resource', 'Resource Created Successfully', self.hs.createResource, resource_type=res_type, title=title, abstract=abstract, resource_file=f, keywords=keywords) # add the remaining content files to the hs resource try: if len(content_files) > 1: self.addContentToExistingResource(resid, content_files[1:]) except Exception as e: print(e) display(HTML('Resource id: %s' % resid)) display(HTML('<a href=%s target="_blank">%s<a>' % ('https://www.hydroshare.org/resource/%s' % resid, 'Open Resource in HydroShare'))) def getResourceFromHydroShare(self, resourceid, destination='.'): """Downloads content of a hydroshare resource. args: -- resourceid: id of the hydroshare resource (str) -- destination: path to save resource, default /user/[username]/notebooks/data (str) returns: -- None """ default_dl_path = self.download_dir dst = os.path.abspath(os.path.join(default_dl_path, destination)) download = True # check if the data should be overwritten dst_res_folder = os.path.join(dst, resourceid) if os.path.exists(dst_res_folder): print('This resource already exists in your userspace.') utils.tree(dst_res_folder) res = input('\nDo you want to overwrite these data [Y/n]? ') if res != 'n': shutil.rmtree(dst_res_folder) else: download = False # re-download the content if desired if download: try: # download the resource (threaded) threads.runThreadedFunction('Downloading Resource', 'Download Finished', self.hs.getResource, resourceid, destination=dst, unzip=True) print('Successfully downloaded resource %s' % resourceid) except Exception as e: display(HTML('<b style="color:red">Failed to retrieve ' 'resource content from HydroShare: %s</b>' % e)) return None # load the resource content outdir = os.path.join(dst, '%s/%s' % (resourceid, resourceid)) content_files = glob.glob(os.path.join(outdir, 'data/contents/*')) content = {} for f in content_files: fname = os.path.basename(f) # trim the base name relative to the data directory dest_folder_name = os.path.dirname(destination).split('/')[-1] f = os.path.join(dest_folder_name, os.path.relpath(f, dest_folder_name)) content[fname] = f # show the resource content files utilities.display_resource_content_files(content) # update the content dictionary self.content.update(content) def addContentToExistingResource(self, resid, content): """Adds content files to an existing hydroshare resource. args: -- resid: id of an existing hydroshare resource (str) -- content: files paths to be added to resource (list) returns: -- None """ threads.runThreadedFunction('Adding Content to Resource', 'Successfully Added Content Files', self._addContentToExistingResource, resid, content) def loadResource(self, resourceid): """Loads the contents of a previously downloaded resource. args: -- resourceid: the id of the resource that has been downloaded (str) returns: -- {content file name: path} """ resdir = utilities.find_resource_directory(resourceid) if resdir is None: display(HTML('<b style="color:red">Could not find any resource ' 'matching the id [%s].</b> <br> It is likely that ' 'this resource has not yet been downloaded from ' 'HydroShare.org, or it was removed from the ' 'JupyterHub server. Please use the following ' 'command to aquire the resource content: ' '<br><br><code>hs.getResourceFromHydroShare(%s)' '</code>.' % (resourceid, resourceid))) return # create search paths. Need to check 2 paths due to hs_restclient bug #63. search_paths = [os.path.join(resdir, '%s/data/contents/*' % resourceid), os.path.join(resdir, 'data/contents/*')] content = {} found_content = False for p in search_paths: content_files = glob.glob(p) if len(content_files) > 0: found_content = True display(HTML('<p>Downloaded content is located at: %s</p>' % resdir)) display(HTML('<p>Found %d content file(s). \n</p>' % len(content_files))) for f in content_files: fname = os.path.basename(f) content[fname] = f if len(content.keys()) == 0: display(HTML('<p>Did not find any content files for resource id: %s</p>' % resourceid)) utilities.display_resource_content_files(content) self.content = content def getContentFiles(self, resourceid): """Gets the content files for a resource that exists on the Jupyter Server args: -- resourceid: the id of the hydroshare resource returns: -- {content file name: path} """ content = utilities.get_hs_content(resourceid) return content def getContentPath(self, resourceid): """Gets the server path of a resources content files. args: -- resourceid: the id of the hydroshare resource returns: -- server path the the resource content files """ path = utilities.find_resource_directory(resourceid) if path is not None: return os.path.join(path, resourceid, 'data/contents')
class hydroshare(): def __init__(self, username=None): self.hs = None self.content = {} # load the HS environment variables # self.load_environment() uname = username if uname is None: uname = os.environ['HS_USR_NAME'] # get a secure connection to hydroshare auth = self.getSecureConnection(uname) try: self.hs = HydroShare(auth=auth) self.hs.getUserInfo() display(HTML('<b style="color:green;">Successfully established a connection with HydroShare</b>')) except HydroShareHTTPException as e: display(HTML( '<p style="color:red;"><b>Failed to establish a connection with HydroShare. Please check that you provided the correct credentials</b><br>%s </p>' % e)) # remove the cached authentication auth_path = os.path.join(os.path.dirname(__file__), '../../../.auth') os.remove(auth_path) return None def _getResourceFromHydroShare(self, resourceid, destination='.', unzip=True): # download the resource pid = self.hs.getResource(resourceid, destination=destination, unzip=unzip) threadResults.put(pid) def _createHydroShareResource(self, res_type, title, abstract, content_file, keywords=[]): resid = self.hs.createResource(res_type, title, resource_file=content_file, keywords=keywords, abstract=abstract) threadResults.put(resid) def _addContentToExistingResource(self, resid, content_files): for f in content_files: self.hs.addResourceFile(resid, f) def load_environment(self): env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'env') with open(env_path, 'r') as f: lines = f.readlines() print('Adding the following system variables:') for line in lines: k, v = line.strip().split('=') os.environ[k] = v print(' %s = %s' % (k, v)) print('\nThese can be accessed using the following command: ') print(' os.environ[key]') print('\n (e.g.)\n os.environ["HS_USR_NAME"] => %s' % os.environ['HS_USR_NAME']) def getSecureConnection(self, username): """ Establishes a secure connection with HydroShare. Args: email: email address associated with HydroShare Returns: HydroShare connection """ auth_path = os.path.join(os.path.dirname(__file__), '../../../.auth') if not os.path.exists(auth_path): print('\nThe hs_utils library requires a secure connection to your HydroShare account.') # p = getpass.getpass('Enter the HydroShare password for user \'%s\': ' % username) p = '7jmftUpata' auth = HydroShareAuthBasic(username=username, password=p) with open(auth_path, 'wb') as f: pickle.dump(auth, f, protocol=2) else: with open(auth_path, 'rb') as f: auth = pickle.load(f) return auth def getResourceMetadata(self, resid): science_meta = self.hs.getScienceMetadata(resid) system_meta = self.hs.getSystemMetadata(resid) return ResourceMetadata(system_meta, science_meta) def createHydroShareResource(self, abstract, title, derivedFromId, keywords=[], resource_type='GenericResource', content_files=[], public=False): # query the hydroshare resource types and make sure that resource_type is valid restypes = {r.lower(): r for r in self.hs.getResourceTypes()} try: res_type = restypes[resource_type] except KeyError: display(HTML('<b style="color:red;">[%s] is not a valid HydroShare resource type.</p>' % resource_type)) return None # get the 'derived resource' metadata if derivedFromId is not None: try: # update the abstract and keyword metadata meta = self.getResourceMetadata(derivedFromId) abstract = meta.abstract + '\n\n[Modified in JupyterHub on %s]\n%s' % (dt.now(), abstract) keywords = set(keywords + meta.keywords) except: display(HTML( '<b style="color:red;">[%s] is not a valid HydroShare resource id for setting the "derivedFrom" attribute.</p>' % derivedFromId)) return None else: response = input( 'You have indicated that this resource is NOT derived from any existing HydroShare resource. Are you sure that this is what you intended? [Y/n]') if response == 'n': display(HTML('<b style="color:red;">Resource creation aborted.</p>')) return f = None if len(content_files) == 0 else content_files[0] # create the hs resource (1 content file allowed) t = threading.Thread(target=self._createHydroShareResource, args=(res_type, title, abstract, f), kwargs={'keywords': keywords}) resid = runThreadedFunction(t, msg='Creating HydroShare Resource', success='Resource Creation Successful') # add the remaining content files to the hs resource self.addContentToExistingResource(resid, content_files[1:]) display(HTML('Resource id: %s' % resid)) display(HTML('<a href=%s target="_blank">%s<a>' % ( 'https://www.hydroshare.org/resource/%s' % resid, 'Open Resource in HydroShare'))) def getResourceFromHydroShare(self, resourceid, destination='.'): """ Downloads the content of HydroShare resource to the JupyterHub userspace Args: resourceid: id of the HydroShare resource to query destination: path relative to /user/[username]/notebooks/data """ default_dl_path = 'C:\\Users\\12672\\Box\\data\\NEON\\lczodata\\' # os.environ['DATA'] dst = os.path.abspath(os.path.join(default_dl_path, destination)) download = True # check if the data should be overwritten dst_res_folder = os.path.join(dst, resourceid) if os.path.exists(dst_res_folder): res = input( 'This resource already exists in your userspace.\nWould you like to overwrite this data [Y/n]? ') if res != 'n': shutil.rmtree(dst_res_folder) else: download = False # re-download the content if desired if download: try: # get some metadata about the resource that will be downloaded res_meta = self.hs.getSystemMetadata(resourceid) header = requests.head(res_meta['bag_url']) # download the resource (threaded) t = threading.Thread(target=self._getResourceFromHydroShare, args=(resourceid,), kwargs={'destination': dst, 'unzip': True}) runThreadedFunction(t, msg='Downloading', success='Download Completed Successfully') except Exception as e: display(HTML('<b style="color:red">Failed to retrieve resource content from HydroShare: %s</b>' % e)) return None # load the resource content outdir = os.path.join(dst, '%s/%s' % (resourceid, resourceid)) content_files = glob.glob(os.path.join(outdir, 'data/contents/*')) # display(HTML('Your Content is located at: %s' % outdir)) content = {} for f in content_files: fname = os.path.basename(f) content[fname] = f display_resource_content_files(content) # check_for_ipynb(content_files) # update the content dictionary self.content.update(content) def addContentToExistingResource(self, resid, content): t = threading.Thread(target=self._addContentToExistingResource, args=(resid, content)) runThreadedFunction(t, msg='Adding Content to Resource', success='Successfully Added Content Files') def loadResource(self, resourceid): resdir = find_resource_directory(resourceid) if resdir is None: display(HTML( '<b style="color:red">Could not find any resource matching the id [%s].</b> <br> It is likely that this resource has not yet been downloaded from HydroShare.org, or it was removed from the JupyterHub server. Please use the following command to aquire the resource content: <br><br> <code> hs.getResourceFromHydroShare(%s)</code>.' % ( resourceid, resourceid))) return content_files = glob.glob(os.path.join(resdir, '%s/data/contents/*' % resourceid)) display(HTML('<p>Downloaded content is located at: %s</p>' % resdir)) display(HTML('<p>Found %d content file(s). \n</p>' % len(content_files))) content = {} for f in content_files: fname = os.path.basename(f) content[fname] = f display_resource_content_files(content) self.content = content
from hs_restclient import HydroShare, HydroShareAuthBasic auth = HydroShareAuthBasic(username="******", password="******") hs = HydroShare(auth=auth) # Adding a local file to a HS resource result = hs.addResourceFile('9cd1b4c170634534b86537321c37dd57', "/home/christian/src/test_file.txt")
def upload_to_hs(uploadtype, modelname, resource_name, resource_abstract, resource_key): dbs = { 'zone': zone, 'mult': mult, 'pval': pval, 'bas6': bas6, 'dis': dis, 'disu': disu, 'bcf6': bcf6, 'lpf': lpf, 'hfb6': hfb6, 'chd': chd, 'fhb': fhb, 'wel': wel, 'mnw1': mnw1, 'mnw2': mnw2, 'mnwi': mnwi, 'drn': drn, 'rch': rch, 'evt': evt, 'ghb': ghb, 'gmg': gmg, 'lmt6': lmt6, 'lmt7': lmt7, 'riv': riv, 'str': str, 'swi2': swi2, 'pcg': pcg, 'pcgn': pcgn, 'nwt': nwt, 'pks': pks, 'sms': sms, 'sfr': sfr, 'lak': lak, 'gage': gage, 'sip': sip, 'sor': sor, 'de4': de4, 'oc': oc, 'uzf': uzf, 'upw': upw, 'sub': sub, 'swt': swt, 'hyd': hyd, 'hob': hob, 'vdf': vdf, 'vsc': vsc, 'drt': drt, 'pvl': pvl, 'ets': ets, 'bas': bas, 'nam': nam } hs = HydroShare() Session = app.get_persistent_store_database('primary_db', as_sessionmaker=True) session = Session() fileliststr = session.query(Model).filter( Model.displayname == modelname).first() filelist = [i for i in fileliststr.modelfiles.strip('{}').split(',')] mainid = fileliststr.id resourceid = fileliststr.resourceid if uploadtype == 'new': abstract = resource_abstract title = resource_name keywords = (i for i in resource_key.split(',')) rtype = 'ModelInstanceResource' new_resource_id = hs.createResource(rtype, title, keywords=keywords, abstract=abstract) for fi in filelist: parts = fi.split(".") ext_data = session.query( dbs[parts[1]]).filter(dbs[parts[1]].id == mainid).first().data if uploadtype == 'add': date = dt.now().strftime("%m-%d-%Y-%X") filename = "{}_{}.{}".format(parts[0], date, parts[1]) else: filename = fi if uploadtype == 'new': hs.addResourceFile(new_resource_id, ext_data, resource_filename=filename) elif uploadtype == 'overwrite': hs.deleteResourceFile(resourceid, filename) hs.addResourceFile(resourceid, ext_data, resource_filename=filename) else: hs.addResourceFile(resourceid, ext_data, resource_filename=filename) session.close() return_obj = {'success': True} return JsonResponse(return_obj)
def upload_to_hs(id,file): auth = HydroShareAuthBasic(username='******', password='******') hs = HydroShare(auth=auth) fpath = '/path/to/somefile.txt' resource_id = hs.addResourceFile('id', file)
def post_hs(ctx, username, password, modelrun_dir, include_shear_nc, resource_title, keyword): """Post the model run data to HydroShare""" # iterate over files and folders of interest, adding files to resource # RipCAS files are the vegetation .asc; TODO XXX include base RipCAS XXX TODO export_dir = os.path.join(modelrun_dir, 'export') veg_export_dir = os.path.join(export_dir, 'vegetation') if os.path.isdir(export_dir): shutil.rmtree(export_dir) os.mkdir(export_dir) os.mkdir(veg_export_dir) veg_pattern = os.path.join(modelrun_dir, 'ripcas-*', 'vegetation.asc') for tstep, veg_map in enumerate(glob.glob(veg_pattern)): veg_map_path = os.path.join( export_dir, 'vegetation', 'vegetation-%s.asc' % tstep ) shutil.copy(veg_map, veg_map_path) shutil.make_archive(veg_export_dir, 'zip', veg_export_dir) # connect hs = HydroShare( auth=HydroShareAuthBasic(username=username, password=password) ) # create new resource rtype = 'GenericResource' r_id = hs.createResource( rtype, resource_title, keywords=keyword # , abstract=abstract ) print "adding vegmap archive file %s to resource %s" % (veg_map_path, r_id) hs.addResourceFile(r_id, os.path.join(export_dir, 'vegetation.zip')) inputs_dir = os.path.join(modelrun_dir, 'inputs') inputs_export_basename = os.path.join(export_dir, 'inputs') shutil.make_archive(inputs_export_basename, 'zip', inputs_dir) print "adding inputs archive file %s to resource %s" % (veg_map_path, r_id) hs.addResourceFile( r_id, os.path.join(export_dir, 'inputs.zip') ) shear_export_dir = os.path.join(export_dir, 'shear') os.mkdir(shear_export_dir) shear_pattern = os.path.join(modelrun_dir, 'dflow-*', 'shear_out.asc') for tstep, shear_map in enumerate(glob.glob(shear_pattern)): shear_map_path = os.path.join( export_dir, 'shear', 'shear-%s.asc' % tstep ) shutil.copy(shear_map, shear_map_path) shutil.make_archive(shear_export_dir, 'zip', shear_export_dir) print "adding shear archive file %s to resource %s" % (veg_map_path, r_id) hs.addResourceFile( r_id, os.path.join(export_dir, 'shear.zip') )
class HydroShareUtility: def __init__(self): self.client = None # type: HydroShare self.auth = None self.user_info = None self.re_period = re.compile(r'(?P<tag_start>^start=)(?P<start>[0-9-]{10}T[0-9:]{8}).{2}(?P<tag_end>end=)' r'(?P<end>[0-9-]{10}T[0-9:]{8}).{2}(?P<tag_scheme>scheme=)(?P<scheme>.+$)', re.I) self.xml_ns = { 'dc': "http://purl.org/dc/elements/1.1/", 'dcterms': "http://purl.org/dc/terms/", 'hsterms': "http://hydroshare.org/terms/", 'rdf': "http://www.w3.org/1999/02/22-rdf-syntax-ns#", 'rdfs1': "http://www.w3.org/2001/01/rdf-schema#"} self.time_format = '%Y-%m-%dT%H:%M:%S' self.xml_coverage = 'start={start}; end={end}; scheme=W3C-DTF' def authenticate(self, username, password, client_id=None, client_secret=None): """ Authenticates access to allow read/write access to privileged resource_cache :param username: username for HydroShare.org :param password: password associated with username :param client_id: Client ID obtained from HydroShare :param client_secret: Client Secret provided by HydroShare :return: Returns true if authentication was successful, false otherwise """ if not all([username, password]): self.auth = None return False if client_id is not None and client_secret is not None: self.auth = HydroShareAuthOAuth2(client_id, client_secret, username=username, password=password) else: self.auth = HydroShareAuthBasic(username, password) try: self.client = HydroShare(auth=self.auth) # , verify=False) self.user_info = self.client.getUserInfo() return True except HydroShareException as e: # for incorrect username/password combinations print('Authentication failed: {}'.format(e)) except InvalidGrantError as e: # for failures when attempting to use OAuth2 print('Credentials could not be validated: {}'.format(e)) except InvalidClientError as e: print('Invalid client ID and/or client secret: {}'.format(e)) except Exception as e: print(e) self.auth = None return False def purgeDuplicateGamutFiles(self, resource_id, regex, confirm_delete=False): """ Removes all files that have a duplicate-style naming pattern (e.g. ' (1).csv', '_ASDFGJK9.csv' :param resource_id: Resource to inspect for duplicates :type resource_id: Resource object received from the HydroShare API client :param confirm_delete: If true, requires input that confirm file should be deleted :type confirm_delete: bool """ from collections import defaultdict re_breakdown = re.compile(regex, re.I) resource_files = self.getResourceFileList(resource_id) # type: [dict] duplicates_list = [] for remote_file in resource_files: url = remote_file.get('url', '') results = re_breakdown.match(url) # Check the file URL for expected patterns temp_dict = defaultdict(lambda: '') # type: dict if results: temp_dict['duplicated'] = results.groupdict() if len(temp_dict.get('duplicated', '')): # A non-duplicated file will match with length 0 duplicates_list.append(temp_dict) # Save the file name so we can remove it later for file_detail in duplicates_list: if not confirm_delete: delete_me = True else: user_answer = raw_input("Delete file {} [Y/n]: ".format(file_detail['name'])) if user_answer != 'N' and user_answer != 'n': delete_me = True else: delete_me = False if delete_me: self.client.deleteResourceFile(resource_id, file_detail['name']) print('Deleting file {}...'.format(file_detail['name'])) else: print('Skipping duplicate file {}...'.format(file_detail['name'])) def getResourceFileList(self, resource_id): """ :param resource_id: ID of resource for which to retrieve a file list :type resource_id: str :return: List of files in resource :rtype: list of str """ try: return list(self.client.getResourceFileList(resource_id)) except Exception as e: print('Error while fetching resource files {}'.format(e)) return [] def getAllResources(self): filtered_resources = {} owner = self.user_info['username'] if self.auth is None: raise HydroShareUtilityException("Cannot query resources without authentication") all_resources = self.client.resources(owner=owner) for resource in all_resources: resource_object = HydroShareResource(resource) filtered_resources[resource_object.id] = resource_object return filtered_resources def getMetadataForResource(self, resource): """ :type resource: HydroShareResource """ metadata = self.client.getScienceMetadata(resource.id) resource.title = metadata.get('title', '') resource.subjects = [item['value'] for item in metadata.get('subjects', [])] resource.abstract = metadata.get('description', '') if resource.abstract is None: resource.abstract = '' if 'funding_agencies' in metadata and len(metadata['funding_agencies']) > 0: funding_agency = metadata['funding_agencies'][0] resource.agency_url = funding_agency['agency_url'] if 'agency_url' in funding_agency else '' resource.funding_agency = funding_agency['agency_name'] if 'agency_name' in funding_agency else '' resource.award_number = funding_agency['award_number'] if 'award_number' in funding_agency else '' resource.award_title = funding_agency['award_title'] if 'award_title' in funding_agency else '' def updateResourceMetadata(self, resource): """ :type resource: HydroShareResource """ return self.client.updateScienceMetadata(resource.id, resource.get_metadata()) def _request(self, method, url, params=None, data=None, files=None, headers=None, stream=False): request = self.client.session.request(method, url, params=params, data=data, files=files, headers=headers, stream=stream) return request def requestAccessRules(self, resource): """ Get access rule for a resource. """ url = "{url_base}/resource/{pid}/sysmeta/".format(url_base=self.client.url_base, pid=resource.id) r = self._request('GET', url) if r.status_code != 200: raise Exception("Failed to get system metadata for resource: {}".format(resource.id)) data = r.json() resource.public = data.get('public', False) resource.shareable = data.get('shareable', False) def makePublic(self, resource, public=True): """ Makes a resource public or private :param resource: The resource you want to modify :param public: boolean value, True makes the resource public, False makes it private (wowzer) :return: None """ hs = HydroShare(auth=self.auth) res = hs.resource(resource.id).public(public) if res.status_code == 200 or res.status_code == 202: resource.public = public def updateKeywords(self, resource, keywords=None): if keywords is None: keywords = resource.keywords # remove leading/trailing whitespaces from keywords keywords = map(lambda x: x.strip(), keywords) url = "{url_base}/resource/{id}/scimeta/elements/".format(url_base=self.client.url_base, id=resource.id) subjects = [] for keyword in keywords: subjects.append({"value": keyword}) r = self.client.session.request('PUT', url, json={"subjects": subjects}) if r.status_code != 202: raise HydroShareException((url, 'PUT', r.status_code, keywords)) return r.json() def getFileListForResource(self, resource): resource.files = [os.path.basename(f['url']) for f in self.getResourceFileList(resource.id)] return resource.files def getFilesByResourceId(self, resource_id): return [os.path.basename(f['url']) for f in self.getResourceFileList(resource_id)] def filterResourcesByRegex(self, regex_string=None, owner=None, regex_flags=re.IGNORECASE): """ Apply a regex filter to all available resource_cache. Useful for finding GAMUT resource_cache :param owner: username of the owner of the resource :type owner: string :param regex_string: String to be used as the regex filter :param regex_flags: Flags to be passed to the regex search :return: A list of resource_cache that matched the filter """ filtered_resources = [] if owner is None: owner = self.user_info['username'] if self.auth is None: raise HydroShareUtilityException("Cannot query resources without authentication") all_resources = self.client.resources(owner=owner) regex_filter = re.compile(regex_string, regex_flags) for resource in all_resources: if regex_string is not None and regex_filter.search(resource['resource_title']) is None: continue resource_object = HydroShareResource(resource) resource_object.files = [os.path.basename(f['url']) for f in self.getResourceFileList(resource_object.id)] filtered_resources.append(resource_object) return filtered_resources def UploadFiles(self, files, resource): # type: ([str], HydroShareResource) -> bool if self.auth is None: raise HydroShareUtilityException("Cannot modify resources without authentication") try: for csv_file in files: try: self.client.deleteResourceFile(resource.id, os.path.basename(csv_file)) except HydroShareNotFound: pass # except Exception as e: # if APP_SETTINGS.H2O_DEBUG and APP_SETTINGS.VERBOSE: # print 'File did not exist in remote: {}, {}'.format(type(e), e) if type(csv_file) != str: csv_file = str(csv_file) self.client.addResourceFile(resource.id, csv_file) msg = "File {} uploaded to remote {}".format(os.path.basename(csv_file), repr(resource)) print(msg) pub.sendMessage('logger', message=msg) except HydroShareException as e: print("Upload failed - could not complete upload to HydroShare due to exception: {}".format(e)) return False except KeyError as e: print('Incorrectly formatted arguments given. Expected key not found: {}'.format(e)) return False return True def setResourcesAsPublic(self, resource_ids): if self.auth is None: raise HydroShareUtilityException("Cannot modify resources without authentication") for resource_id in resource_ids: try: print('Setting resource {} as public'.format(resource_id)) self.client.setAccessRules(resource_id, public=True) except HydroShareException as e: print("Access rule edit failed - could not set to public due to exception: {}".format(e)) except KeyError as e: print('Incorrectly formatted arguments given. Expected key not found: {}'.format(e)) def deleteFilesInResource(self, resource): if self.auth is None: raise HydroShareUtilityException("Cannot modify resources without authentication") try: file_list = self.getResourceFileList(resource.id) for file_info in file_list: msg = 'Deleting resource file: {}'.format(os.path.basename(file_info['url'])) print(msg) pub.sendMessage('logger', message=msg) self.client.deleteResourceFile(resource.id, os.path.basename(file_info['url'])) except Exception as e: print('Could not delete files in resource {}\n{}'.format(resource.id, e)) def getResourceCoveragePeriod(self, resource_id): metadata = self.client.getScienceMetadata(resource_id) period_start = None period_end = None try: xml_tree = ElementTree.fromstring(metadata) description_node = xml_tree.find('rdf:Description', namespaces=self.xml_ns) coverage_node = description_node.find('dc:coverage', namespaces=self.xml_ns) period_node = coverage_node.find('dcterms:period', namespaces=self.xml_ns) value_node = period_node.find('rdf:value', namespaces=self.xml_ns) match = self.re_period.match(value_node.text) if match is not None: period_start = dateutil.parser.parse(match.group('start')) period_end = dateutil.parser.parse(match.group('end')) except Exception as e: print("Unable to find coverage data - encountered exception {}".format(e)) return period_start, period_end def deleteResource(self, resource_id, confirm=True): if self.auth is None: raise HydroShareUtilityException("Cannot modify resources without authentication") try: if confirm: user_input = raw_input('Are you sure you want to delete the resource {}? (y/N): '.format(resource_id)) if user_input.lower() != 'y': return print('Deleting resource {}'.format(resource_id)) self.client.deleteResource(resource_id) except Exception as e: print('Exception encountered while deleting resource {}: {}'.format(resource_id, e)) def createNewResource(self, resource): # type: (ResourceTemplate) -> HydroShareResource """ :param resource: :type resource: ResourceTemplate :return: :rtype: str """ if self.auth is None: raise HydroShareUtilityException("Cannot create resource without authentication") # http://hs-restclient.readthedocs.io/en/latest/ if resource is not None: metadata = [] fundingagency = {} fundingagency_attr_map = ( ('funding_agency', 'agency_name'), ('award_title', 'award_title'), ('award_number', 'award_number'), ('agency_url', 'agency_url') ) for reskey, fakey in fundingagency_attr_map: # reskey - resource attribute name # fakey - funding agency attribute name value = getattr(resource, reskey).strip() if len(value): fundingagency[fakey] = value if len(fundingagency.keys()): metadata.append({'fundingagency': fundingagency}) resource_id = self.client.createResource(resource_type='CompositeResource', title=resource.title, abstract=resource.abstract, keywords=resource.keywords, metadata=json.dumps(metadata, encoding='ascii')) hs_resource = HydroShareResource({'resource_id': resource_id}) self.getMetadataForResource(hs_resource) return hs_resource return None