Beispiel #1
0
    def create(self, title):
        '''
        Create a draft in Dataverse with some minimal metadata.
        '''
        if self.draftUrl != '':
            print "Draft already created: " + self.draftUrl
            return [
                'Dataverse PUBLISH INFO: Draft already exists: ' +
                self.draftUrl
            ]

        connection = Connection(self.apiUrl, self.apiToken, use_https=False)
        dataverse = connection.get_dataverse(self.alias)

        #create draft
        #put some required default metadata
        creator = 'ibridges'
        description = 'Description'
        metadata = {'subject': 'Other'}
        try:
            self.__dataset = dataverse.create_dataset(title=title,
                                                      creator=creator,
                                                      description=description,
                                                      **metadata)
            self.__md = self.__dataset.get_metadata()
            self.draftUrl = 'http://'+self.__dataset.connection.host+'/dataset.xhtml?persistentId='+\
                self.__dataset.doi
            return
        except:
            return ["Draft not created."]
Beispiel #2
0
    def create(self, title):
        '''
        Create a draft in Dataverse with some minimal metadata.
        '''
        if self.draftUrl != '':
            raise RuntimeError("Dataverse PUBLISH:" +
                               "Draft already created: " + self.draftUrl)

        self.logger.debug('connect to %s', self.apiUrl)
        connection = Connection(self.apiUrl, self.apiToken, use_https=False)
        dataverse = connection.get_dataverse(self.alias)

        # create draft
        # put some required default metadata
        creator = 'ibridges'
        description = 'Description'
        metadata = {'subject': 'Other'}
        self.logger.info('create draft title=%s, creator=%s, description=%s' %
                         (title, creator, description))
        self.__dataset = dataverse.create_dataset(title=title,
                                                  creator=creator,
                                                  description=description,
                                                  **metadata)
        self.__md = self.__dataset.get_metadata()
        self.draftUrl = 'http://' + self.__dataset.connection.host + \
                        '/dataset.xhtml?persistentId=' + \
                        self.__dataset.doi
        self.logger.info('draft created: %s', self.draftUrl)
def create_dataset():

    print 'make connection...'
    connection = Connection(DV_HOST, TOKEN, use_https=False)

    print 'connection', connection
    dataverse = connection.get_dataverse('root')
    print 'base_url', connection.base_url

    title, description, creator = get_geotweet_params()

    kwargs = dict(notes="notes go here")
    dataverse.create_dataset(title, description, creator, **kwargs)
    print 'dataset created'
    def clone(self):
        #Disables ssl validation
        c = Connection(self.user, self.password, self.host, disable_ssl=True)
        if not c.connected:
            raise DataverseArchiverError('Invalid creditials or host')

        dv = c.get_dataverse(self.dataverse_name)
        if not dv:
            raise DataverseArchiverError('Invalid dataverse alias')

        study = dv.get_study_by_doi(self.study_doi)
        if not study:
            raise DataverseArchiverError('Invalid study doi')

        header = [
            self.download_file.si(self, f.download_url)
            for f in
            study.get_released_files()
        ]

        return chord(header, self.clone_done.s(self))
def fetch_dz(token):
    from dataverse import Connection
    host = "dataverse.harvard.edu"
    connection = Connection(host, token)
    dataverse = connection.get_dataverse("jakobshavn-inversions",
                                         refresh = True)

    if not dataverse:
        raise DataverseError("No dataverse found!")

    title = ("Ice surface elevation and change at Jakobshavn Isbrae, "
             "West Greenland, 2008-2014")
    dataset = dataverse.get_dataset_by_title(title, refresh = True)

    if not dataset:
        raise DataverseError("No data set found!")

    file = dataset.get_file("dZ_grid.nc", "latest")
    url  = file.download_url

    subprocess.call(["wget", "--no-clobber", url, "-O", "data/dZ_grid.nc"])
Beispiel #6
0
alias = 'myDataverseAlias'

# Create Dataverse
dataverse = connection.create_dataverse(alias, alias, '*****@*****.**', alias)
# Http responses on server malformatted: https:// ... 8080:8080 or 8181:8181 --> server side fault, github issue created
#if connection.host.endswith(':8080') and connection.host != 'localhost':
#    href = dataverse.collection.get('href')
#    href = href.replace('https', 'http')
#    href = href.replace('8080:8080', '8080')
#    dataverse.collection.set('href', href)
# and publish
dataverse.publish()

# Get dataverse and data
dataverse = connection.get_dataverse(alias)
datasets = dataverse.get_datasets(
)  # throws error Failed to parse: ip.add.ress:8080:8080
dataset = dataverse.get_dataset_by_doi(dataverse.get_datasets()[0].doi)
#if self.connection.host.endswith(':8080') and self.connection.host != 'localhost':
#            self.edit_uri = self.edit_uri.replace('https', 'http')
#            self.edit_uri = self.edit_uri.replace('8080:8080', '8080')
#            self.edit_media_uri = self.edit_media_uri.replace('https', 'http')
#            self.edit_media_uri = self.edit_media_uri.replace('8080:8080', '8080')
#files = dataset.get_files()
print files[0].download_url

# Delete Dataverse
alias = "another-alias"
dataverse = connection.get_dataverse(alias)
connection.delete_dataverse(dataverse)
Beispiel #7
0

def loadjson(apiurl):
    jsondataurl = apiurl

    req = urllib2.Request(jsondataurl)
    opener = urllib2.build_opener()
    f = opener.open(req)
    dataframe = simplejson.load(f)
    return dataframe


config = configuration()

connection = Connection(host, token)
dataverse = connection.get_dataverse('clioinfra')

for item in dataverse.get_contents():
    # u'protocol': u'hdl', u'authority': u'10622' u'identifier': u'R8EJJF'
    try:
        handle = str(item['protocol']) + ':' + str(
            item['authority']) + "/" + str(item['identifier'])
        datasetid = item['id']
        url = "https://" + str(host) + "/api/datasets/" + str(
            datasetid) + "/versions/1.0?&key=" + str(token)
        print item
        dataframe = loadjson(url)
        for fileitem in dataframe['data']['files']:
            runimport = os.path.abspath(os.path.join(
                os.path.dirname(__file__)))
            runimport = str(runimport) + "/import.py -d 'https://" + str(
#    if resp.status_code == 404:
#        raise exceptions.DataverseNotFoundError(
#            'Dataverse {0} was not found.'.format(parent)
#        )
#    elif resp.status_code != 201:
#        raise exceptions.OperationFailedError(
#            '{0} Dataverse could not be created.'.format(name)
#        )
#
#    dataset.get_service_document(refresh=True)
# return dataset.get_dataverse(alias)

# creation of a dataverse automatically is not currently working
# I have created one manually with id:  testing_dataverse_123
# fetch dataverse by id
dataverse = connection.get_dataverse('testing_dataverse_123')

# I have created the dataset in dataverse manually: "doi:10.70122/FK2/0HH8BM"
dataset = dataverse.get_dataset_by_doi('doi:10.70122/FK2/O13BQC')

# upload a string under a filename
#dataset.upload_file("test_file.txt", "string of what's inside the file", False)

# # upload a file as is from path
# dataset.upload_filepath("test_file.txt")

# # upload and encrypt file from path with a random key
# out = enc_file("test_file.txt")
# if(out is None):
#     print("error encrypting file")
# else:
Beispiel #9
0
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 27 21:21:24 2018

@author: moniy
"""
from dataverse import Connection

host = 'dataverse.harvard.edu'                  # All clients >4.0 are supported
token = 'ed0b265c-8c6b-417a-883c-cd077d7ae354'  # Generated at /account/apitoken
connection = Connection(host, token)
dataverse = connection.get_dataverse() ### tHIS IS THE PROBLEM
dataset = dataverse.get_dataset_by_doi('DOI:10.7910/DVN/LAYMOS')
files = dataset.get_files('latest')


### DOESNT WORK. THIS IS WHY I CALLED YOU. iT WORKS ON r
##inVALID CREDENTIALS ERROR

##sAME CREDS WORK ON r
tHE GUY WHO IS RPRASAD ON SLACK GROUP
HE WROTE IT
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../modules')))
from config import configuration

def loadjson(apiurl):
    jsondataurl = apiurl
    
    req = urllib2.Request(jsondataurl)
    opener = urllib2.build_opener()
    f = opener.open(req)
    dataframe = simplejson.load(f)
    return dataframe

config = configuration()

connection = Connection(host, token)
dataverse = connection.get_dataverse('clioinfra')

for item in dataverse.get_contents():
    # u'protocol': u'hdl', u'authority': u'10622' u'identifier': u'R8EJJF'
    try:
        handle = str(item['protocol']) + ':' + str(item['authority']) + "/" + str(item['identifier'])
        datasetid = item['id']
        url = "https://" + str(host) + "/api/datasets/" + str(datasetid) + "/versions/1.0?&key=" + str(token)
        print item
	dataframe = loadjson(url)
        for fileitem in dataframe['data']['files']:
	    runimport = os.path.abspath(os.path.join(os.path.dirname(__file__)))
	    runimport = str(runimport) + "/import.py -d 'https://" + str(host) + "' -H '" + str(handle) + ":" + str(datasetid) + ":" + str(fileitem['datafile']['id']) + "' -k " + str(token)
	    #print fileitem['datafile']['id']
            p = Popen(runimport, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
            response = p.stdout.read()
Beispiel #11
0
def datasetspace(settings=''):
    (where, query, datasets, metadata, s, permissions) = ({}, '', [], [], {}, 'yes')
    where = {'collab': '', 'iish': '', 'harvard': ''}
    pagetitle = "Public datasets"
    config = configuration()
    if config['error']:
        return config['error']

    root = config['apiroot']
    dataversename = 'global'
    if request.args.get('dv'):
	dataversename = request.args.get('dv')
    if request.args.get('q'):
        query = request.args.get('q')
    if request.args.get('permissions'):
        permissions = request.args.get('permissions')
    if request.args.get('where'):
	where[request.args.get('where')] = 'checked="checked"' 

    settings = Configuration()
    sconnection = ExtrasearchAPI(settings.config['dataverseroot'], dataversename)
    if where['harvard']:
        # Extract host for Dataverse connection
        findhost = re.search('(http\:\/\/|https\:\/\/)(.+)', settings.config['harvarddataverse'])
        if findhost:
            settings.config['dataversehostname'] = findhost.group(2)
        connection = Connection(settings.config['dataversehostname'], settings.config['harvardkey'])
    else:
       try:
           connection = Connection(config['hostname'], settings.config['key'])
       except:
           return 'Error: no connection to Dataverse. Please try later...'

    handlestr = ''
    if query:
	s['q'] = query
	metadata = search_by_keyword(connection, s)
    else:
	try:
	    dataverse = connection.get_dataverse(dataversename)
	    item = dataverse.get_contents()
	    active = 'yes'	
	except:
	    active = None
	if active:
	    try:
                for item in dataverse.get_contents():
                    handlestr+= item['identifier'] + ' '
		    active = 'yes'
	    except:
	 	active = None

	if not active:
	    handlestr = sconnection.read_all_datasets()

        if handlestr:
	    s['q'] = handlestr	
	    s['per_page'] = 100
	    metadata = search_by_keyword(connection, s)

    #return str(metadata['items'])
    for dataset in metadata['items']:
	active = ''
	# Private datasets
	if permissions == 'closed':
	    pagetitle = "Restricted datasets"
	    try:
	        if (sconnection.has_restricted_data(dataset['global_id'])):
		    active = 'yes'
	    except:
		active = ''
	# Public data
	else:
	    try:
                if not (sconnection.has_restricted_data(dataset['global_id'])):
                    active = 'yes'
	    except:
		active = ''
	
	if active:
	    try:
	        for author in dataset['authors']:
	            dataset['author'] = str(author) + ', '
	            dataset['author'] = dataset['author'][:-2]
	    except:
	        dataset['author'] = str(dataset['description'])
	
            datasets.append(dataset)
	if where['harvard']:
	    datasets.append(dataset)

    (username, projectname) = ('','')    
    fields = {}
    resp = make_response(render_template('search.html', projectname=projectname, username=username, datasets=datasets, searchq=query, pagetitle=pagetitle, where=where, fields=fields))
    return resp