def localDOI(DOI, myXSDtree, code_srcDir): with open(code_srcDir + '/doi.pkl','rb') as f: alldoiDict = pickle.load(f) rollback = copy.deepcopy(alldoiDict) if DOI not in alldoiDict: # assign it 'nextPID', update 'nextPID', save it into alldoiDict, update # doi.pkl, fetching the metadata is slow, so we need to make sure the # paperID is updated in the doi.pkl first to avoid collision. PID = 'L' + str(alldoiDict['nextPID']) alldoiDict['nextPID'] += 1 alldoiDict[DOI] = {'paperID': PID} with open(code_srcDir + '/doi.pkl', 'wb') as f: pickle.dump(alldoiDict, f) # special case, special issue madeup DOI if 'ma-SI' in DOI: return alldoiDict[DOI] # now fetch the metadata using doi-crawler and save to alldoiDict, doi.pkl crawlerDict = mainDOIsoupFirst(DOI) # if doi is not valid, mainDOIsoupFirst() returns {} if len(crawlerDict) == 0: with open(code_srcDir + '/doi.pkl', 'wb') as f: pickle.dump(rollback, f) return None # transfer the newdoiDict to an xml element xmlstring = dict2element(crawlerDict, myXSDtree) # an xml element string alldoiDict[DOI]['metadata'] = xmlstring # update the doi.pkl for the metadata field with open(code_srcDir + '/doi.pkl', 'wb') as f: pickle.dump(alldoiDict, f) return alldoiDict[DOI] else: return alldoiDict[DOI]
def restDOI(DOI, code_srcDir, restbase, sheet_sample, user, runCtx): message = '' new_dataset = False response = None # check existence try: # dsurl = restbase + '/nmr/dataset?doi='+DOI dsurl = restbase + '/nmr/dataset?id=' + runCtx['datasetId'] rq = urllib.request.Request(dsurl) # j = json.loads(urllib.request.urlopen(rq, context=ssl._create_unverified_context()).read().decode('utf-8')) nmCurate = nm_rest(logging, runCtx['sysToken'], runCtx['curateApiToken'], runCtx['curateRefreshToken'], rq) #j = json.loads(nmCurate.urlopen(None).read().encode("utf8")) # no data for GET request rv = nmCurate.urlopen(None) j = json.loads(rv.read().decode('utf8')) # no data for GET request if len(j["data"]) > 0: if j["data"][0]['doi'] == runCtx['nm_dataset_initial_doi']: new_dataset = True response = j["data"][0] else: message += 'Expected to find datasetId: ' + runCtx[ 'datasetId'] + ', but no data found.' logging.error(message) except: message += 'exception occurred during dataset GET by datasetId: ' + runCtx[ 'datasetId'] + '\n' # message += 'exception occurred during dataset GET by doi\n' message += 'exception: ' + str(traceback.format_exc()) + '\n' logging.error('exception: ' + str(traceback.format_exc())) if message != '': return (None, message) # if doi doesn't exist, ds-create ## NOTE: should not create datasets anymore. The datasetId in the job_parameters should already exist ## However, initially, the dataset is mostly empty, so the data must still be set up dsInfo = response if new_dataset: # special case, special issue madeup DOI if 'ma-SI' in DOI: # DOI from spreadsheet is special issue logging.info('DOI from spreadsheet: ' + DOI + ' dsInfo: ' + json.dumps(dsInfo)) DOI = 'unpublished-' + DOI dsInfo['doi'] = DOI # generate ds_data for special issue by reading the Excel dsInfo = specialIssueRest(sheet_sample, dsInfo) else: # DOI from spreadsheet is standard DOI # now fetch the metadata using doi-crawler crawlerDict = mainDOIsoupFirst(DOI, code_srcDir) # check if doi valid if len(crawlerDict) == 0: message += '[DOI Error] Please check the reported DOI, it seems that DOI does not exist.\n' return (None, message) dsInfo = mapToRest(dsInfo, crawlerDict)['dsUpdate'] # return response at the end, if response is not None, message will be '' return (dsInfo, message)
def restDOI(DOI, code_srcDir, restbase, sheet_sample): message = '' exist = False response = None # check existence try: dsurl = restbase + '/nmr/dataset?doi=' + DOI rq = urllib2.Request(dsurl) j = json.loads(urllib2.urlopen(rq).read()) if len(j["data"]) > 0: exist = True response = j["data"][0] except: message += 'exception occurred during dataset GET by doi\n' message += 'exception: ' + str(sys.exc_info()[0]) + '\n' if message != '': return (None, message) # if doi doesn't exist, ds-create if not exist: # special case, special issue madeup DOI if 'ma-SI' in DOI: # generate ds_data for special issue by reading the Excel ds_data = specialIssueRest(sheet_sample, DOI) else: # now fetch the metadata using doi-crawler crawlerDict = mainDOIsoupFirst(DOI, code_srcDir) # check if doi valid if len(crawlerDict) == 0: message += '[DOI Error] Please check the reported DOI, it seems that DOI does not exist.\n' return (None, message) ds_data = mapToRest(crawlerDict) response = None # initialize response of the request # POST ds-create try: ds_create_url = restbase + '/nmr/dataset/create' rq = urllib2.Request(ds_create_url) # logging.info('request created using ds_create_url') rq.add_header('Content-Type', 'application/json') r = urllib2.urlopen(rq, json.dumps(ds_data)) # logging.info('dataset create request posted: ' + str(r.getcode())) response = json.loads(r.read())['data'] except: message += 'exception occurred during dataset-create\n' message += 'exception: ' + str(sys.exc_info()[0]) + '\n' # assemble the PID if response is None: message += 'exception occurred during getting the response of dataset-create\n' if message != '': return (None, message) # return response at the end, if response is not None, message will be '' return (response, message)
def restDOI(DOI, code_srcDir, restbase, sheet_sample, user): message = '' exist = False response = None # check existence try: dsurl = restbase + '/nmr/dataset?doi=' + DOI rq = urllib.request.Request(dsurl) j = json.loads( urllib.request.urlopen( rq, context=ssl._create_unverified_context()).read().decode( 'utf-8')) if len(j["data"]) > 0: exist = True response = j["data"][0] except: message += 'exception occurred during dataset GET by doi\n' message += 'exception: ' + str(traceback.format_exc()) + '\n' ## print('exception: ' + str(traceback.format_exc())) if message != '': return (None, message) # if doi doesn't exist, ds-create if not exist: # special case, special issue madeup DOI if 'ma-SI' in DOI: # generate ds_data for special issue by reading the Excel ds_data = specialIssueRest(sheet_sample, DOI) else: # now fetch the metadata using doi-crawler crawlerDict = mainDOIsoupFirst(DOI, code_srcDir) # check if doi valid if len(crawlerDict) == 0: message += '[DOI Error] Please check the reported DOI, it seems that DOI does not exist.\n' return (None, message) ds_data = mapToRest(crawlerDict) response = None # initialize response of the request # POST ds-create try: ds_create_url = restbase + '/nmr/dataset/create' rq = urllib.request.Request(ds_create_url) # logging.info('request created using ds_create_url') rq.add_header('Content-Type', 'application/json') # NOTE: TODO this may not be the best place for this but, default for create is false and need to set userid ... dsInfo = ds_data['dsInfo'] dsInfo['isPublic'] = 'false' dsInfo['ispublished'] = 'false' # no camel case on this one dsInfo['userid'] = user # NOTE END r = urllib.request.urlopen( rq, json.dumps(ds_data).encode("utf-8"), context=ssl._create_unverified_context()) # logging.info('dataset create request posted: ' + str(r.getcode())) response = json.loads(r.read().decode("utf-8"))['data'] except: message += 'exception occurred during dataset-create\n' message += 'exception: ' + str(traceback.format_exc()) + '\n' # assemble the PID if response is None: message += 'exception occurred during getting the response of dataset-create\n' if message != '': return (None, message) # return response at the end, if response is not None, message will be '' return (response, message)