def testElapsedTime(self): """Test elapsed time function.""" startTime = time.time() sleepTime = 5 time.sleep(sleepTime) td = dsFunc.elapsedTime(startTime, 'Elapsed run time') self.assertTrue(sleepTime <= int(td.seconds) <= sleepTime + 1)
for metadata_element in metadata: if metadata_element['key'] == 'dc.identifier.uri': uri = metadata_element['value'] contains_initials = re.search( regexCI, indvdl_nm) contains_middleinitial = re.search( regexMI, indvdl_nm) contains_parentheses = re.search( regexPR, indvdl_nm) if contains_middleinitial: continue elif contains_parentheses: continue elif contains_initials: initialCount += 1 else: continue offset = offset + 200 print(offset) if initialCount > 0: f.writerow([collectionName] + [baseURL + '/' + collectionHandle] + [str(initialCount).zfill(6)]) logout = requests.post(baseURL + '/rest/logout', headers=header, cookies=cookies, verify=verify) # print script run time dsFunc.elapsedTime(startTime, 'Script run time')
verify=verify) while items.status_code != 200: time.sleep(5) items = requests.get( baseURL + '/rest/collections/' + str(collectionID) + '/items?limit=200&offset=' + str(offset), headers=header, cookies=cookies, verify=verify) items = items.json() for k in range(0, len(items)): itemID = items[k]['uuid'] itemList.append(itemID) offset = offset + 200 dsFunc.elapsedTime(startTime, 'Item list creation time') f = csv.writer( open( filePath + 'removeUnnecessarySpaces' + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] + ['delete'] + ['post']) for itemID in itemList: itemMetadataProcessed = [] metadata = requests.get(baseURL + '/rest/items/' + str(itemID) + '/metadata', headers=header, cookies=cookies, verify=verify).json() for i in range(0, len(metadata)):
if os.path.isfile(filePathComplete + key + 'ValuesComplete.csv') is False: f = csv.writer( open(filePathComplete + key + 'ValuesComplete.csv', 'w')) f.writerow(['handle'] + ['value']) f.writerow([uri] + [value]) else: f = csv.writer( open(filePathComplete + key + 'ValuesComplete.csv', 'a')) f.writerow([uri] + [value]) offset = offset + 20 print(offset) dsFunc.elapsedTime(setTime, 'Set run time') dsFunc.elapsedTime(startTime, 'Collection run time') dsFunc.elapsedTime(startTime, 'Complete value list creation time') # for fileName in os.listdir(filePathComplete): reader = csv.DictReader(open(filePathComplete + fileName)) fileName = fileName.replace('Complete', 'Unique') valueList = [] for row in reader: valueList.append(row['value']) valueListCount = Counter(valueList) f = csv.writer(open(filePathUnique + fileName, 'w')) f.writerow(['value'] + ['count']) for key, value in valueListCount.items():
bitstreams = '' url = baseURL + '/rest/items/' + str(itemID) + '/bitstreams?expand=bitstreams' bitstreams = requests.get(url, headers=header, cookies=cookies, verify=verify) while bitstreams.status_code != 200: time.sleep(5) bitstreams = requests.get(url, headers=header, cookies=cookies, verify=verify) bitstreams = bitstreams.json() print('found %d bitstreams' % len(bitstreams)) for k in range(0, len(bitstreams)): bitstreamID = bitstreams[k]['uuid'] bitstreamList.append(bitstreamID) dsFunc.elapsedTime(startTime, 'Bitstream list creation time') print(bitstreamList) f = csv.writer( open( filePath + 'deletedBitstreams' + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) f.writerow(['bitstreamID'] + ['delete']) for number, bitstreamID in enumerate(bitstreamList): bitstreamsRemaining = len(bitstreamList) - number print('Bitstreams remaining: ', bitstreamsRemaining, 'bitstreamID: ', bitstreamID) delete = requests.delete(baseURL + '/rest/bitstreams/' + str(bitstreamID), headers=header, cookies=cookies, verify=verify)
verify=verify) while items.status_code != 200: time.sleep(5) items = requests.get(baseURL + '/rest/collections/' + str(collectionID) + '/items?limit=100&offset=' + str(offset), headers=header, cookies=cookies, verify=verify) items = items.json() for k in range(0, len(items)): itemID = items[k]['uuid'] itemList.append(itemID) offset = offset + 100 dsFunc.elapsedTime(startTime, 'Item list creation time') os.mkdir(filePathComplete) os.mkdir(filePathUnique) for number, itemID in enumerate(itemList): itemsRemaining = len(itemList) - number print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID) metadata = requests.get(baseURL + '/rest/items/' + str(itemID) + '/metadata', headers=header, cookies=cookies, verify=verify).json() for l in range(0, len(metadata)): if metadata[l]['key'] != 'dc.description.provenance': key = metadata[l]['key'] try: