Exemple #1
0
def localDOI(DOI, myXSDtree, code_srcDir):
    with open(code_srcDir + '/doi.pkl','rb') as f:
        alldoiDict = pickle.load(f)
        rollback = copy.deepcopy(alldoiDict)
    if DOI not in alldoiDict:
        # assign it 'nextPID', update 'nextPID', save it into alldoiDict, update
        # doi.pkl, fetching the metadata is slow, so we need to make sure the
        # paperID is updated in the doi.pkl first to avoid collision.
        PID = 'L' + str(alldoiDict['nextPID'])
        alldoiDict['nextPID'] += 1
        alldoiDict[DOI] = {'paperID': PID}
        with open(code_srcDir + '/doi.pkl', 'wb') as f:
            pickle.dump(alldoiDict, f)
        # special case, special issue madeup DOI
        if 'ma-SI' in DOI:
            return alldoiDict[DOI]
        # now fetch the metadata using doi-crawler and save to alldoiDict, doi.pkl
        crawlerDict = mainDOIsoupFirst(DOI)
        # if doi is not valid, mainDOIsoupFirst() returns {}
        if len(crawlerDict) == 0:
            with open(code_srcDir + '/doi.pkl', 'wb') as f:
                pickle.dump(rollback, f)
            return None
        # transfer the newdoiDict to an xml element
        xmlstring = dict2element(crawlerDict, myXSDtree) # an xml element string
        alldoiDict[DOI]['metadata'] = xmlstring
        # update the doi.pkl for the metadata field
        with open(code_srcDir + '/doi.pkl', 'wb') as f:
            pickle.dump(alldoiDict, f)
        return alldoiDict[DOI]
    else:
        return alldoiDict[DOI]
Exemple #2
0
def restDOI(DOI, code_srcDir, restbase, sheet_sample, user, runCtx):
    message = ''
    new_dataset = False
    response = None

    # check existence
    try:
        # dsurl = restbase + '/nmr/dataset?doi='+DOI
        dsurl = restbase + '/nmr/dataset?id=' + runCtx['datasetId']
        rq = urllib.request.Request(dsurl)
        # j = json.loads(urllib.request.urlopen(rq, context=ssl._create_unverified_context()).read().decode('utf-8'))
        nmCurate = nm_rest(logging, runCtx['sysToken'],
                           runCtx['curateApiToken'],
                           runCtx['curateRefreshToken'], rq)
        #j = json.loads(nmCurate.urlopen(None).read().encode("utf8")) # no data for GET request
        rv = nmCurate.urlopen(None)
        j = json.loads(rv.read().decode('utf8'))  # no data for GET request
        if len(j["data"]) > 0:
            if j["data"][0]['doi'] == runCtx['nm_dataset_initial_doi']:
                new_dataset = True
            response = j["data"][0]
        else:
            message += 'Expected to find datasetId: ' + runCtx[
                'datasetId'] + ', but no data found.'
            logging.error(message)
    except:
        message += 'exception occurred during dataset GET by datasetId: ' + runCtx[
            'datasetId'] + '\n'
        # message += 'exception occurred during dataset GET by doi\n'
        message += 'exception: ' + str(traceback.format_exc()) + '\n'
        logging.error('exception: ' + str(traceback.format_exc()))

    if message != '':
        return (None, message)
    # if doi doesn't exist, ds-create

    ## NOTE: should not create datasets anymore. The datasetId in the job_parameters should already exist
    ## However, initially, the dataset is mostly empty, so the data must still be set up
    dsInfo = response
    if new_dataset:
        # special case, special issue madeup DOI
        if 'ma-SI' in DOI:  # DOI from spreadsheet is special issue
            logging.info('DOI from spreadsheet: ' + DOI + ' dsInfo: ' +
                         json.dumps(dsInfo))
            DOI = 'unpublished-' + DOI
            dsInfo['doi'] = DOI
            # generate ds_data for special issue by reading the Excel
            dsInfo = specialIssueRest(sheet_sample, dsInfo)
        else:  # DOI from spreadsheet is standard DOI
            # now fetch the metadata using doi-crawler
            crawlerDict = mainDOIsoupFirst(DOI, code_srcDir)
            # check if doi valid
            if len(crawlerDict) == 0:
                message += '[DOI Error] Please check the reported DOI, it seems that DOI does not exist.\n'
                return (None, message)
            dsInfo = mapToRest(dsInfo, crawlerDict)['dsUpdate']
    # return response at the end, if response is not None, message will be ''
    return (dsInfo, message)
Exemple #3
0
def restDOI(DOI, code_srcDir, restbase, sheet_sample):
    message = ''
    exist = False
    response = None
    # check existence
    try:
        dsurl = restbase + '/nmr/dataset?doi=' + DOI
        rq = urllib2.Request(dsurl)
        j = json.loads(urllib2.urlopen(rq).read())
        if len(j["data"]) > 0:
            exist = True
            response = j["data"][0]
    except:
        message += 'exception occurred during dataset GET by doi\n'
        message += 'exception: ' + str(sys.exc_info()[0]) + '\n'
    if message != '':
        return (None, message)
    # if doi doesn't exist, ds-create
    if not exist:
        # special case, special issue madeup DOI
        if 'ma-SI' in DOI:
            # generate ds_data for special issue by reading the Excel
            ds_data = specialIssueRest(sheet_sample, DOI)
        else:
            # now fetch the metadata using doi-crawler
            crawlerDict = mainDOIsoupFirst(DOI, code_srcDir)
            # check if doi valid
            if len(crawlerDict) == 0:
                message += '[DOI Error] Please check the reported DOI, it seems that DOI does not exist.\n'
                return (None, message)
            ds_data = mapToRest(crawlerDict)
        response = None  # initialize response of the request
        # POST ds-create
        try:
            ds_create_url = restbase + '/nmr/dataset/create'
            rq = urllib2.Request(ds_create_url)
            # logging.info('request created using ds_create_url')
            rq.add_header('Content-Type', 'application/json')
            r = urllib2.urlopen(rq, json.dumps(ds_data))
            # logging.info('dataset create request posted: ' + str(r.getcode()))
            response = json.loads(r.read())['data']
        except:
            message += 'exception occurred during dataset-create\n'
            message += 'exception: ' + str(sys.exc_info()[0]) + '\n'
        # assemble the PID
        if response is None:
            message += 'exception occurred during getting the response of dataset-create\n'
        if message != '':
            return (None, message)
    # return response at the end, if response is not None, message will be ''
    return (response, message)
def restDOI(DOI, code_srcDir, restbase, sheet_sample, user):
    message = ''
    exist = False
    response = None
    # check existence
    try:
        dsurl = restbase + '/nmr/dataset?doi=' + DOI
        rq = urllib.request.Request(dsurl)
        j = json.loads(
            urllib.request.urlopen(
                rq, context=ssl._create_unverified_context()).read().decode(
                    'utf-8'))
        if len(j["data"]) > 0:
            exist = True
            response = j["data"][0]
    except:
        message += 'exception occurred during dataset GET by doi\n'
        message += 'exception: ' + str(traceback.format_exc()) + '\n'
        ## print('exception: '  + str(traceback.format_exc()))

    if message != '':
        return (None, message)
    # if doi doesn't exist, ds-create
    if not exist:
        # special case, special issue madeup DOI
        if 'ma-SI' in DOI:
            # generate ds_data for special issue by reading the Excel
            ds_data = specialIssueRest(sheet_sample, DOI)
        else:
            # now fetch the metadata using doi-crawler
            crawlerDict = mainDOIsoupFirst(DOI, code_srcDir)
            # check if doi valid
            if len(crawlerDict) == 0:
                message += '[DOI Error] Please check the reported DOI, it seems that DOI does not exist.\n'
                return (None, message)
            ds_data = mapToRest(crawlerDict)
        response = None  # initialize response of the request
        # POST ds-create
        try:
            ds_create_url = restbase + '/nmr/dataset/create'
            rq = urllib.request.Request(ds_create_url)
            # logging.info('request created using ds_create_url')
            rq.add_header('Content-Type', 'application/json')

            # NOTE: TODO this may not be the best place for this but, default for create is false and need to set userid ...
            dsInfo = ds_data['dsInfo']
            dsInfo['isPublic'] = 'false'
            dsInfo['ispublished'] = 'false'  # no camel case on this one
            dsInfo['userid'] = user
            # NOTE END

            r = urllib.request.urlopen(
                rq,
                json.dumps(ds_data).encode("utf-8"),
                context=ssl._create_unverified_context())
            # logging.info('dataset create request posted: ' + str(r.getcode()))
            response = json.loads(r.read().decode("utf-8"))['data']
        except:
            message += 'exception occurred during dataset-create\n'
            message += 'exception: ' + str(traceback.format_exc()) + '\n'
        # assemble the PID
        if response is None:
            message += 'exception occurred during getting the response of dataset-create\n'
        if message != '':
            return (None, message)
    # return response at the end, if response is not None, message will be ''
    return (response, message)