def create(self, title): ''' Create a draft in Dataverse with some minimal metadata. ''' if self.draftUrl != '': print "Draft already created: " + self.draftUrl return [ 'Dataverse PUBLISH INFO: Draft already exists: ' + self.draftUrl ] connection = Connection(self.apiUrl, self.apiToken, use_https=False) dataverse = connection.get_dataverse(self.alias) #create draft #put some required default metadata creator = 'ibridges' description = 'Description' metadata = {'subject': 'Other'} try: self.__dataset = dataverse.create_dataset(title=title, creator=creator, description=description, **metadata) self.__md = self.__dataset.get_metadata() self.draftUrl = 'http://'+self.__dataset.connection.host+'/dataset.xhtml?persistentId='+\ self.__dataset.doi return except: return ["Draft not created."]
def create(self, title): ''' Create a draft in Dataverse with some minimal metadata. ''' if self.draftUrl != '': raise RuntimeError("Dataverse PUBLISH:" + "Draft already created: " + self.draftUrl) self.logger.debug('connect to %s', self.apiUrl) connection = Connection(self.apiUrl, self.apiToken, use_https=False) dataverse = connection.get_dataverse(self.alias) # create draft # put some required default metadata creator = 'ibridges' description = 'Description' metadata = {'subject': 'Other'} self.logger.info('create draft title=%s, creator=%s, description=%s' % (title, creator, description)) self.__dataset = dataverse.create_dataset(title=title, creator=creator, description=description, **metadata) self.__md = self.__dataset.get_metadata() self.draftUrl = 'http://' + self.__dataset.connection.host + \ '/dataset.xhtml?persistentId=' + \ self.__dataset.doi self.logger.info('draft created: %s', self.draftUrl)
def create_dataset(): print 'make connection...' connection = Connection(DV_HOST, TOKEN, use_https=False) print 'connection', connection dataverse = connection.get_dataverse('root') print 'base_url', connection.base_url title, description, creator = get_geotweet_params() kwargs = dict(notes="notes go here") dataverse.create_dataset(title, description, creator, **kwargs) print 'dataset created'
def clone(self): #Disables ssl validation c = Connection(self.user, self.password, self.host, disable_ssl=True) if not c.connected: raise DataverseArchiverError('Invalid creditials or host') dv = c.get_dataverse(self.dataverse_name) if not dv: raise DataverseArchiverError('Invalid dataverse alias') study = dv.get_study_by_doi(self.study_doi) if not study: raise DataverseArchiverError('Invalid study doi') header = [ self.download_file.si(self, f.download_url) for f in study.get_released_files() ] return chord(header, self.clone_done.s(self))
def fetch_dz(token): from dataverse import Connection host = "dataverse.harvard.edu" connection = Connection(host, token) dataverse = connection.get_dataverse("jakobshavn-inversions", refresh = True) if not dataverse: raise DataverseError("No dataverse found!") title = ("Ice surface elevation and change at Jakobshavn Isbrae, " "West Greenland, 2008-2014") dataset = dataverse.get_dataset_by_title(title, refresh = True) if not dataset: raise DataverseError("No data set found!") file = dataset.get_file("dZ_grid.nc", "latest") url = file.download_url subprocess.call(["wget", "--no-clobber", url, "-O", "data/dZ_grid.nc"])
alias = 'myDataverseAlias' # Create Dataverse dataverse = connection.create_dataverse(alias, alias, '*****@*****.**', alias) # Http responses on server malformatted: https:// ... 8080:8080 or 8181:8181 --> server side fault, github issue created #if connection.host.endswith(':8080') and connection.host != 'localhost': # href = dataverse.collection.get('href') # href = href.replace('https', 'http') # href = href.replace('8080:8080', '8080') # dataverse.collection.set('href', href) # and publish dataverse.publish() # Get dataverse and data dataverse = connection.get_dataverse(alias) datasets = dataverse.get_datasets( ) # throws error Failed to parse: ip.add.ress:8080:8080 dataset = dataverse.get_dataset_by_doi(dataverse.get_datasets()[0].doi) #if self.connection.host.endswith(':8080') and self.connection.host != 'localhost': # self.edit_uri = self.edit_uri.replace('https', 'http') # self.edit_uri = self.edit_uri.replace('8080:8080', '8080') # self.edit_media_uri = self.edit_media_uri.replace('https', 'http') # self.edit_media_uri = self.edit_media_uri.replace('8080:8080', '8080') #files = dataset.get_files() print files[0].download_url # Delete Dataverse alias = "another-alias" dataverse = connection.get_dataverse(alias) connection.delete_dataverse(dataverse)
def loadjson(apiurl): jsondataurl = apiurl req = urllib2.Request(jsondataurl) opener = urllib2.build_opener() f = opener.open(req) dataframe = simplejson.load(f) return dataframe config = configuration() connection = Connection(host, token) dataverse = connection.get_dataverse('clioinfra') for item in dataverse.get_contents(): # u'protocol': u'hdl', u'authority': u'10622' u'identifier': u'R8EJJF' try: handle = str(item['protocol']) + ':' + str( item['authority']) + "/" + str(item['identifier']) datasetid = item['id'] url = "https://" + str(host) + "/api/datasets/" + str( datasetid) + "/versions/1.0?&key=" + str(token) print item dataframe = loadjson(url) for fileitem in dataframe['data']['files']: runimport = os.path.abspath(os.path.join( os.path.dirname(__file__))) runimport = str(runimport) + "/import.py -d 'https://" + str(
# if resp.status_code == 404: # raise exceptions.DataverseNotFoundError( # 'Dataverse {0} was not found.'.format(parent) # ) # elif resp.status_code != 201: # raise exceptions.OperationFailedError( # '{0} Dataverse could not be created.'.format(name) # ) # # dataset.get_service_document(refresh=True) # return dataset.get_dataverse(alias) # creation of a dataverse automatically is not currently working # I have created one manually with id: testing_dataverse_123 # fetch dataverse by id dataverse = connection.get_dataverse('testing_dataverse_123') # I have created the dataset in dataverse manually: "doi:10.70122/FK2/0HH8BM" dataset = dataverse.get_dataset_by_doi('doi:10.70122/FK2/O13BQC') # upload a string under a filename #dataset.upload_file("test_file.txt", "string of what's inside the file", False) # # upload a file as is from path # dataset.upload_filepath("test_file.txt") # # upload and encrypt file from path with a random key # out = enc_file("test_file.txt") # if(out is None): # print("error encrypting file") # else:
# -*- coding: utf-8 -*- """ Created on Wed Jun 27 21:21:24 2018 @author: moniy """ from dataverse import Connection host = 'dataverse.harvard.edu' # All clients >4.0 are supported token = 'ed0b265c-8c6b-417a-883c-cd077d7ae354' # Generated at /account/apitoken connection = Connection(host, token) dataverse = connection.get_dataverse() ### tHIS IS THE PROBLEM dataset = dataverse.get_dataset_by_doi('DOI:10.7910/DVN/LAYMOS') files = dataset.get_files('latest') ### DOESNT WORK. THIS IS WHY I CALLED YOU. iT WORKS ON r ##inVALID CREDENTIALS ERROR ##sAME CREDS WORK ON r tHE GUY WHO IS RPRASAD ON SLACK GROUP HE WROTE IT
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../modules'))) from config import configuration def loadjson(apiurl): jsondataurl = apiurl req = urllib2.Request(jsondataurl) opener = urllib2.build_opener() f = opener.open(req) dataframe = simplejson.load(f) return dataframe config = configuration() connection = Connection(host, token) dataverse = connection.get_dataverse('clioinfra') for item in dataverse.get_contents(): # u'protocol': u'hdl', u'authority': u'10622' u'identifier': u'R8EJJF' try: handle = str(item['protocol']) + ':' + str(item['authority']) + "/" + str(item['identifier']) datasetid = item['id'] url = "https://" + str(host) + "/api/datasets/" + str(datasetid) + "/versions/1.0?&key=" + str(token) print item dataframe = loadjson(url) for fileitem in dataframe['data']['files']: runimport = os.path.abspath(os.path.join(os.path.dirname(__file__))) runimport = str(runimport) + "/import.py -d 'https://" + str(host) + "' -H '" + str(handle) + ":" + str(datasetid) + ":" + str(fileitem['datafile']['id']) + "' -k " + str(token) #print fileitem['datafile']['id'] p = Popen(runimport, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) response = p.stdout.read()
def datasetspace(settings=''): (where, query, datasets, metadata, s, permissions) = ({}, '', [], [], {}, 'yes') where = {'collab': '', 'iish': '', 'harvard': ''} pagetitle = "Public datasets" config = configuration() if config['error']: return config['error'] root = config['apiroot'] dataversename = 'global' if request.args.get('dv'): dataversename = request.args.get('dv') if request.args.get('q'): query = request.args.get('q') if request.args.get('permissions'): permissions = request.args.get('permissions') if request.args.get('where'): where[request.args.get('where')] = 'checked="checked"' settings = Configuration() sconnection = ExtrasearchAPI(settings.config['dataverseroot'], dataversename) if where['harvard']: # Extract host for Dataverse connection findhost = re.search('(http\:\/\/|https\:\/\/)(.+)', settings.config['harvarddataverse']) if findhost: settings.config['dataversehostname'] = findhost.group(2) connection = Connection(settings.config['dataversehostname'], settings.config['harvardkey']) else: try: connection = Connection(config['hostname'], settings.config['key']) except: return 'Error: no connection to Dataverse. Please try later...' handlestr = '' if query: s['q'] = query metadata = search_by_keyword(connection, s) else: try: dataverse = connection.get_dataverse(dataversename) item = dataverse.get_contents() active = 'yes' except: active = None if active: try: for item in dataverse.get_contents(): handlestr+= item['identifier'] + ' ' active = 'yes' except: active = None if not active: handlestr = sconnection.read_all_datasets() if handlestr: s['q'] = handlestr s['per_page'] = 100 metadata = search_by_keyword(connection, s) #return str(metadata['items']) for dataset in metadata['items']: active = '' # Private datasets if permissions == 'closed': pagetitle = "Restricted datasets" try: if (sconnection.has_restricted_data(dataset['global_id'])): active = 'yes' except: active = '' # Public data else: try: if not (sconnection.has_restricted_data(dataset['global_id'])): active = 'yes' except: active = '' if active: try: for author in dataset['authors']: dataset['author'] = str(author) + ', ' dataset['author'] = dataset['author'][:-2] except: dataset['author'] = str(dataset['description']) datasets.append(dataset) if where['harvard']: datasets.append(dataset) (username, projectname) = ('','') fields = {} resp = make_response(render_template('search.html', projectname=projectname, username=username, datasets=datasets, searchq=query, pagetitle=pagetitle, where=where, fields=fields)) return resp