Esempio n. 1
0
 def testElapsedTime(self):
     """Test elapsed time function."""
     startTime = time.time()
     sleepTime = 5
     time.sleep(sleepTime)
     td = dsFunc.elapsedTime(startTime, 'Elapsed run time')
     self.assertTrue(sleepTime <= int(td.seconds) <= sleepTime + 1)
Esempio n. 2
0
                        for metadata_element in metadata:
                            if metadata_element['key'] == 'dc.identifier.uri':
                                uri = metadata_element['value']
                                contains_initials = re.search(
                                    regexCI, indvdl_nm)
                                contains_middleinitial = re.search(
                                    regexMI, indvdl_nm)
                                contains_parentheses = re.search(
                                    regexPR, indvdl_nm)
                                if contains_middleinitial:
                                    continue
                                elif contains_parentheses:
                                    continue
                                elif contains_initials:
                                    initialCount += 1
                                else:
                                    continue
        offset = offset + 200
        print(offset)
    if initialCount > 0:
        f.writerow([collectionName] + [baseURL + '/' + collectionHandle] +
                   [str(initialCount).zfill(6)])

logout = requests.post(baseURL + '/rest/logout',
                       headers=header,
                       cookies=cookies,
                       verify=verify)

# print script run time
dsFunc.elapsedTime(startTime, 'Script run time')
                                     verify=verify)
                while items.status_code != 200:
                    time.sleep(5)
                    items = requests.get(
                        baseURL + '/rest/collections/' + str(collectionID) +
                        '/items?limit=200&offset=' + str(offset),
                        headers=header,
                        cookies=cookies,
                        verify=verify)
                items = items.json()
                for k in range(0, len(items)):
                    itemID = items[k]['uuid']
                    itemList.append(itemID)
                offset = offset + 200

dsFunc.elapsedTime(startTime, 'Item list creation time')

f = csv.writer(
    open(
        filePath + 'removeUnnecessarySpaces' +
        datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] + ['delete'] +
           ['post'])
for itemID in itemList:
    itemMetadataProcessed = []
    metadata = requests.get(baseURL + '/rest/items/' + str(itemID) +
                            '/metadata',
                            headers=header,
                            cookies=cookies,
                            verify=verify).json()
    for i in range(0, len(metadata)):
                    if os.path.isfile(filePathComplete + key +
                                      'ValuesComplete.csv') is False:
                        f = csv.writer(
                            open(filePathComplete + key + 'ValuesComplete.csv',
                                 'w'))
                        f.writerow(['handle'] + ['value'])
                        f.writerow([uri] + [value])
                    else:
                        f = csv.writer(
                            open(filePathComplete + key + 'ValuesComplete.csv',
                                 'a'))
                        f.writerow([uri] + [value])
        offset = offset + 20
        print(offset)

        dsFunc.elapsedTime(setTime, 'Set run time')

    dsFunc.elapsedTime(startTime, 'Collection run time')

dsFunc.elapsedTime(startTime, 'Complete value list creation time')
#
for fileName in os.listdir(filePathComplete):
    reader = csv.DictReader(open(filePathComplete + fileName))
    fileName = fileName.replace('Complete', 'Unique')
    valueList = []
    for row in reader:
        valueList.append(row['value'])
    valueListCount = Counter(valueList)
    f = csv.writer(open(filePathUnique + fileName, 'w'))
    f.writerow(['value'] + ['count'])
    for key, value in valueListCount.items():
bitstreams = ''
url = baseURL + '/rest/items/' + str(itemID) + '/bitstreams?expand=bitstreams'
bitstreams = requests.get(url, headers=header, cookies=cookies, verify=verify)
while bitstreams.status_code != 200:
    time.sleep(5)
    bitstreams = requests.get(url,
                              headers=header,
                              cookies=cookies,
                              verify=verify)
bitstreams = bitstreams.json()
print('found %d bitstreams' % len(bitstreams))
for k in range(0, len(bitstreams)):
    bitstreamID = bitstreams[k]['uuid']
    bitstreamList.append(bitstreamID)

dsFunc.elapsedTime(startTime, 'Bitstream list creation time')
print(bitstreamList)

f = csv.writer(
    open(
        filePath + 'deletedBitstreams' +
        datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
f.writerow(['bitstreamID'] + ['delete'])
for number, bitstreamID in enumerate(bitstreamList):
    bitstreamsRemaining = len(bitstreamList) - number
    print('Bitstreams remaining: ', bitstreamsRemaining, 'bitstreamID: ',
          bitstreamID)
    delete = requests.delete(baseURL + '/rest/bitstreams/' + str(bitstreamID),
                             headers=header,
                             cookies=cookies,
                             verify=verify)
                                 verify=verify)
            while items.status_code != 200:
                time.sleep(5)
                items = requests.get(baseURL + '/rest/collections/' +
                                     str(collectionID) +
                                     '/items?limit=100&offset=' + str(offset),
                                     headers=header,
                                     cookies=cookies,
                                     verify=verify)
            items = items.json()
            for k in range(0, len(items)):
                itemID = items[k]['uuid']
                itemList.append(itemID)
            offset = offset + 100

dsFunc.elapsedTime(startTime, 'Item list creation time')

os.mkdir(filePathComplete)
os.mkdir(filePathUnique)
for number, itemID in enumerate(itemList):
    itemsRemaining = len(itemList) - number
    print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID)
    metadata = requests.get(baseURL + '/rest/items/' + str(itemID) +
                            '/metadata',
                            headers=header,
                            cookies=cookies,
                            verify=verify).json()
    for l in range(0, len(metadata)):
        if metadata[l]['key'] != 'dc.description.provenance':
            key = metadata[l]['key']
            try: