def replace(self, *strings): """ Iterates over a set of strings. Any keys in self found within the string will be replaced with their respective values. Returns an array of strings, regardless of the number of parameters pased in. For example: >>> rd = ReplacementDict({"$foo": "bar"}) >>> rd.replace('The value of the foo variable is: $foo') ['The value of the foo variable is: bar'] IMPORTANT NOTE: Any unicode strings present as dictionary values will be converted into bytestrings. All returned strings will also be bytestrings, regardless of the type of the original strings. Returned strings may or may not be valid Unicode, depending on the contents of data fetched from the database. (%originalLocation%, for instance, may contain arbitrary non-Unicode characters of nonspecific encoding.) Note that, within, Archivematica, the only value that typically contains Unicode characters is "%originalLocation%", and Archivematica does not use this variable in any place where precise fidelity of the original string is required. """ ret = [] for orig in strings: if orig is not None: orig = unicodeToStr(orig) for key, value in self.iteritems(): orig = orig.replace(key, unicodeToStr(value)) ret.append(orig) return ret
def start(self): """Based on polling example: http://timgolden.me.uk/python/win32_how_do_i/watch_directory_for_changes.html""" self.run = True LOGGER.info('Watching directory %s (Files: %s)', self.directory, self.alertOnFiles) before = dict([(f, None) for f in os.listdir(self.directory)]) while self.run: time.sleep(self.interval) after = dict([(f, None) for f in os.listdir(self.directory)]) added = [f for f in after if f not in before] removed = [f for f in before if f not in after] if added: LOGGER.debug('Added %s', added) for i in added: i = unicodeToStr(i) directory = unicodeToStr(self.directory) self.event(os.path.join(directory, i), self.variablesAdded, self.callBackFunctionAdded) if removed: LOGGER.debug('Removed %s', removed) for i in removed: i = unicodeToStr(i) directory = unicodeToStr(self.directory) self.event(os.path.join(directory, i), self.variablesRemoved, self.callBackFunctionRemoved) before = after
def updateFileLocation(src, dst, eventType, eventDateTime, eventDetail, eventIdentifierUUID = uuid.uuid4().__str__(), fileUUID="None", sipUUID = None, transferUUID=None, eventOutcomeDetailNote = ""): """If the file uuid is not provided, will use the sip uuid and old path to find the file uuid""" src = unicodeToStr(src) dst = unicodeToStr(dst) fileUUID = unicodeToStr(fileUUID) if not fileUUID or fileUUID == "None": sql = "Need to define transferUUID or sipUUID" if sipUUID: sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(src) + "' AND Files.sipUUID = '" + sipUUID + "';" elif transferUUID: sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(src) + "' AND Files.transferUUID = '" + transferUUID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: fileUUID = unicodeToStr(row[0]) row = c.fetchone() sqlLock.release() if eventOutcomeDetailNote == "": eventOutcomeDetailNote = "Original name=\"%s\"; cleaned up name=\"%s\"" %(src, dst) #eventOutcomeDetailNote = eventOutcomeDetailNote.decode('utf-8') #CREATE THE EVENT if not fileUUID: print >>sys.stderr, "Unable to find file uuid for: ", src, " -> ", dst exit(6) insertIntoEvents(fileUUID=fileUUID, eventIdentifierUUID=eventIdentifierUUID, eventType=eventType, eventDateTime=eventDateTime, eventDetail=eventDetail, eventOutcome="", eventOutcomeDetailNote=eventOutcomeDetailNote) #UPDATE THE CURRENT FILE PATH sql = """UPDATE Files SET currentLocation='%s' WHERE fileUUID='%s';""" % (MySQLdb.escape_string(dst), fileUUID) databaseInterface.runSQL(sql)
def start(self): """Based on polling example: http://timgolden.me.uk/python/win32_how_do_i/watch_directory_for_changes.html""" self.run = True if DEBUG: print "watching directory: ", self.directory before = dict([(f, None) for f in os.listdir(self.directory)]) while self.run: time.sleep(self.interval) after = dict([(f, None) for f in os.listdir(self.directory)]) added = [f for f in after if not f in before] removed = [f for f in before if not f in after] if added: if DEBUG: print "Added: ", ", ".join(added) for i in added: i = unicodeToStr(i) directory = unicodeToStr(self.directory) self.event(os.path.join(directory, i), self.variablesAdded, self.callBackFunctionAdded) if removed: if DEBUG: print "Removed: ", ", ".join(removed) for i in removed: i = unicodeToStr(i) directory = unicodeToStr(self.directory) self.event(os.path.join(directory, i), self.variablesRemoved, self.callBackFunctionRemoved) before = after
def start(self): """Based on polling example: http://timgolden.me.uk/python/win32_how_do_i/watch_directory_for_changes.html""" self.run = True if DEBUG: print "watching directory: ", self.directory before = dict ([(f, None) for f in os.listdir (self.directory)]) while self.run: time.sleep (self.interval) after = dict ([(f, None) for f in os.listdir (self.directory)]) added = [f for f in after if not f in before] removed = [f for f in before if not f in after] if added: if DEBUG: print "Added: ", ", ".join (added) for i in added: i = unicodeToStr(i) directory = unicodeToStr(self.directory) self.event(os.path.join(directory, i), self.variablesAdded, self.callBackFunctionAdded) if removed: if DEBUG: print "Removed: ", ", ".join (removed) for i in removed: i = unicodeToStr(i) directory = unicodeToStr(self.directory) self.event(os.path.join(directory, i), self.variablesRemoved, self.callBackFunctionRemoved) before = after
def directory_children(request, basePath=False): path = '' if (basePath): path = path + basePath path = path + request.GET.get('base_path', '') path = path + request.GET.get('path', '') response = {} entries = [] directories = [] for entry in sorted_directory_list(path): entry = archivematicaFunctions.strToUnicode(entry) if unicode(entry)[0] != '.': entries.append(entry) entry_path = os.path.join(path, entry) if os.path.isdir(archivematicaFunctions.unicodeToStr( entry_path)) and os.access( archivematicaFunctions.unicodeToStr(entry_path), os.R_OK): directories.append(entry) response = {'entries': entries, 'directories': directories} return HttpResponse( simplejson.JSONEncoder(encoding='utf-8').encode(response), mimetype='application/json')
def start_transfer_logged_in(request): """ Endpoint for starting a transfer if logged in and calling from the dashboard. """ if request.method not in ('POST', ): return django.http.HttpResponseNotAllowed(['POST']) transfer_name = archivematicaFunctions.unicodeToStr( request.POST.get('name', '')) transfer_type = archivematicaFunctions.unicodeToStr( request.POST.get('type', '')) accession = archivematicaFunctions.unicodeToStr( request.POST.get('accession', '')) # Note that the path may contain arbitrary, non-unicode characters, # and hence is POSTed to the server base64-encoded paths = request.POST.getlist('paths[]', []) paths = [base64.b64decode(path) for path in paths] row_ids = request.POST.getlist('row_ids[]', []) try: response = start_transfer(transfer_name, transfer_type, accession, paths, row_ids) except ValueError as e: return helpers.json_response({ 'error': True, 'message': str(e) }, status_code=400) except storage_service.StorageServiceError as e: return helpers.json_response({ 'error': True, 'message': str(e) }, status_code=500) else: return helpers.json_response(response)
def copy_transfer_component(request): transfer_name = archivematicaFunctions.unicodeToStr(request.POST.get('name', '')) path = archivematicaFunctions.unicodeToStr(request.POST.get('path', '')) destination = archivematicaFunctions.unicodeToStr(request.POST.get('destination', '')) error = None if transfer_name == '': error = 'No transfer name provided.' else: if path == '': error = 'No path provided.' else: # if transfer compontent path leads to a ZIP file, treat as zipped # bag try: path.lower().index('.zip') shutil.copy(path, destination) paths_copied = 1 except: transfer_dir = os.path.join(destination, transfer_name) # Create directory before it is used, otherwise shutil.copy() # would that location to store a file if not os.path.isdir(transfer_dir): os.mkdir(transfer_dir) paths_copied = 0 # cycle through each path copying files/dirs inside it to transfer dir for entry in sorted_directory_list(path): entry_path = os.path.join(path, entry) if os.path.isdir(entry_path): destination_dir = os.path.join(transfer_dir, entry) try: shutil.copytree( entry_path, destination_dir ) except: error = 'Error copying from ' + entry_path + ' to ' + destination_dir + '. (' + str(sys.exc_info()[0]) + ')' else: shutil.copy(entry_path, transfer_dir) paths_copied = paths_copied + 1 response = {} if error != None: response['message'] = error response['error'] = True else: response['message'] = 'Copied ' + str(paths_copied) + ' entries.' return HttpResponse( simplejson.JSONEncoder().encode(response), mimetype='application/json' )
def updateFileLocation( src, dst, eventType="", eventDateTime="", eventDetail="", eventIdentifierUUID=uuid.uuid4().__str__(), fileUUID="None", sipUUID=None, transferUUID=None, eventOutcomeDetailNote="", createEvent=True, ): """ Updates file location in the database, and optionally writes an event for the sanitization to the database. Note that this does not actually move a file on disk. If the file uuid is not provided, will use the SIP uuid and the old path to find the file uuid. To suppress creation of an event, pass the createEvent keyword argument (for example, if the file moved due to the renaming of a parent directory and not the file itself). """ src = unicodeToStr(src) dst = unicodeToStr(dst) fileUUID = unicodeToStr(fileUUID) if not fileUUID or fileUUID == "None": kwargs = {"removedtime__isnull": True, "currentlocation": src} if sipUUID: kwargs["sip_id"] = sipUUID elif transferUUID: kwargs["transfer_id"] = transferUUID else: raise ValueError( "One of fileUUID, sipUUID, or transferUUID must be provided") f = File.objects.get(**kwargs) else: f = File.objects.get(uuid=fileUUID) # UPDATE THE CURRENT FILE PATH f.currentlocation = dst f.save() if not createEvent: return if eventOutcomeDetailNote == "": eventOutcomeDetailNote = 'Original name="%s"; cleaned up name="%s"' % ( src, dst) # CREATE THE EVENT insertIntoEvents( fileUUID=f.uuid, eventType=eventType, eventDateTime=eventDateTime, eventDetail=eventDetail, eventOutcome="", eventOutcomeDetailNote=eventOutcomeDetailNote, )
def createUnitAndJobChain(path, config, terminate=False): path = unicodeToStr(path) if os.path.isdir(path): path = path + "/" print "createUnitAndJobChain", path, config unit = None if os.path.isdir(path): if config[3] == "SIP": UUID = findOrCreateSipInDB(path) unit = unitSIP(path, UUID) elif config[3] == "DIP": UUID = findOrCreateSipInDB(path) unit = unitDIP(path, UUID) elif config[3] == "Transfer": #UUID = findOrCreateSipInDB(path) unit = unitTransfer(path) elif os.path.isfile(path): if config[3] == "Transfer": unit = unitTransfer(path) else: return UUID = uuid.uuid4() unit = unitFile(path, UUID) else: return jobChain(unit, config[1]) if terminate: exit(0)
def directory_children(request, basePath=False): path = '' if (basePath): path = path + basePath path = path + request.GET.get('base_path', '') path = path + request.GET.get('path', '') response = {} entries = [] directories = [] for entry in sorted_directory_list(path): entry = archivematicaFunctions.strToUnicode(entry) if unicode(entry)[0] != '.': entries.append(entry) entry_path = os.path.join(path, entry) if os.path.isdir(archivematicaFunctions.unicodeToStr(entry_path)) and os.access(archivematicaFunctions.unicodeToStr(entry_path), os.R_OK): directories.append(entry) response = { 'entries': entries, 'directories': directories } return HttpResponse( simplejson.JSONEncoder(encoding='utf-8').encode(response), mimetype='application/json' )
def approve_transfer(request): # Example: curl --data \ # "username=mike&api_key=<API key>&directory=MyTransfer" \ # http://127.0.0.1/api/transfer/approve if request.method == 'POST': auth_error = authenticate_request(request) response = {} if auth_error is None: error = None directory = request.POST.get('directory', '') transfer_type = request.POST.get('type', 'standard') directory = archivematicaFunctions.unicodeToStr(directory) error, unit_uuid = approve_transfer_via_mcp( directory, transfer_type, request.user.id) if error is not None: response['message'] = error response['error'] = True return helpers.json_response(response, status_code=500) else: response['message'] = 'Approval successful.' response['uuid'] = unit_uuid return helpers.json_response(response) else: response['message'] = auth_error response['error'] = True return helpers.json_response(response, status_code=403) else: return django.http.HttpResponseNotAllowed(permitted_methods=['POST'])
def reloadFileList(self): """Match files to their UUID's via their location and the File table's currentLocation""" self.fileList = {} # currentPath must be a string to return all filenames as bytestrings, # and to safely concatenate with other bytestrings currentPath = os.path.join(self.currentPath.replace("%sharedPath%", django_settings.SHARED_DIRECTORY, 1), "").encode('utf-8') try: for directory, subDirectories, files in os.walk(currentPath): directory = directory.replace(currentPath, self.pathString, 1) for file_ in files: if self.pathString != directory: filePath = os.path.join(directory, file_) else: filePath = directory + file_ self.fileList[filePath] = unitFile(filePath, owningUnit=self) if self.unitType == "Transfer": files = File.objects.filter(transfer_id=self.UUID) else: files = File.objects.filter(sip_id=self.UUID) for f in files: currentlocation = archivematicaFunctions.unicodeToStr(f.currentlocation) if currentlocation in self.fileList: self.fileList[currentlocation].UUID = f.uuid self.fileList[currentlocation].fileGrpUse = f.filegrpuse else: LOGGER.warning('%s %s has file (%s) %s in the database, but file does not exist in the file system', self.unitType, self.UUID, f.uuid, f.currentlocation) except Exception: LOGGER.exception('Error reloading file list for %s', currentPath) exit(1)
def createUnitAndJobChain(path, config, terminate=False): path = unicodeToStr(path) if os.path.isdir(path): path = path + "/" logger.debug('Creating unit and job chain for %s with %s', path, config) unit = None if os.path.isdir(path): if config[3] == "SIP": UUID = findOrCreateSipInDB(path) unit = unitSIP(path, UUID) elif config[3] == "DIP": UUID = findOrCreateSipInDB(path, unit_type='DIP') unit = unitDIP(path, UUID) elif config[3] == "Transfer": unit = unitTransfer(path) elif os.path.isfile(path): if config[3] == "Transfer": unit = unitTransfer(path) else: return UUID = uuid.uuid4() unit = unitFile(path, UUID) else: return jobChain(unit, config[1]) if terminate: exit(0)
def start_transfer(transfer_name, transfer_type, accession, access_id, paths, row_ids): """ Start a new transfer. :param str transfer_name: Name of new transfer. :param str transfer_type: Type of new transfer. From TRANSFER_TYPE_DIRECTORIES. :param str accession: Accession number of new transfer. :param str access_id: Access system identifier for the new transfer. :param list paths: List of <location_uuid>:<relative_path> to be copied into the new transfer. Location UUIDs should be associated with this pipeline, and relative path should be relative to the location. :param list row_ids: ID of the associated TransferMetadataSet for disk image ingest. :returns: Dict with {'message': <message>, ['error': True, 'path': <path>]}. Error is a boolean, present and True if there is an error. Message describes the success or failure. Path is populated if there is no error. """ if not transfer_name: raise ValueError('No transfer name provided.') if not paths: raise ValueError('No path provided.') # Create temp directory that everything will be copied into temp_base_dir = os.path.join(SHARED_DIRECTORY_ROOT, 'tmp') temp_dir = tempfile.mkdtemp(dir=temp_base_dir) os.chmod(temp_dir, 0o770) # Needs to be writeable by the SS for i, path in enumerate(paths): index = i + 1 # so transfers start from 1, not 0 # Don't suffix the first transfer component, only subsequent ones if index > 1: target = transfer_name + '_' + str(index) else: target = transfer_name row_id = row_ids[i] if helpers.file_is_an_archive(path): transfer_dir = temp_dir p = path.split(':', 1)[1] logger.debug('found a zip file, splitting path ' + p) filepath = os.path.join(temp_dir, os.path.basename(p)) else: path = os.path.join(path, '.') # Copy contents of dir but not dir transfer_dir = os.path.join(temp_dir, target) filepath = os.path.join(temp_dir, target) transfer_relative = transfer_dir.replace(SHARED_DIRECTORY_ROOT, '', 1) copy_from_transfer_sources([path], transfer_relative) filepath = archivematicaFunctions.unicodeToStr(filepath) try: destination = copy_to_start_transfer( filepath=filepath, type=transfer_type, accession=accession, access_id=access_id, transfer_metadata_set_row_uuid=row_id) except Exception as e: logger.exception('Error starting transfer {}: {}'.format( filepath, e)) raise Exception('Error starting transfer {}: {}'.format( filepath, e)) shutil.rmtree(temp_dir) return {'message': _('Copy successful.'), 'path': destination}
def checkForPreconfiguredXML(self): ret = None xmlFilePath = os.path.join( \ self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \ archivematicaMCP.config.get('MCPServer', "processingXMLFile") \ ) xmlFilePath = unicodeToStr(xmlFilePath) if os.path.isfile(xmlFilePath): # For a list of items with pks: # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc; try: command = "sudo chmod 774 \"" + xmlFilePath + "\"" if isinstance(command, unicode): command = command.encode("utf-8") exitCode, stdOut, stdError = executeOrRun("command", command, "", printing=False) tree = etree.parse(xmlFilePath) root = tree.getroot() for preconfiguredChoice in root.find("preconfiguredChoices"): #if int(preconfiguredChoice.find("appliesTo").text) == self.jobChainLink.pk: if preconfiguredChoice.find("appliesTo").text == self.jobChainLink.description: desiredChoice = preconfiguredChoice.find("goToChain").text sql = """SELECT MicroServiceChains.pk FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk WHERE MicroServiceChains.description = '%s' AND MicroServiceChainChoice.choiceAvailableAtLink = %s;""" % (desiredChoice, self.jobChainLink.pk.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: ret = row[0] row = c.fetchone() sqlLock.release() try: #<delay unitAtime="yes">30</delay> delayXML = preconfiguredChoice.find("delay") unitAtimeXML = delayXML.get("unitCtime") if unitAtimeXML != None and unitAtimeXML.lower() != "no": delaySeconds=int(delayXML.text) unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1)) nowTime=time.time() timeDifference = nowTime - unitTime timeToGo = delaySeconds - timeDifference print "time to go:", timeToGo #print "that will be: ", (nowTime + timeToGo) self.jobChainLink.setExitMessage("Waiting till: " + datetime.datetime.fromtimestamp((nowTime + timeToGo)).ctime()) t = threading.Timer(timeToGo, self.proceedWithChoice, args=[ret], kwargs={"delayTimerStart":True}) t.daemon = True self.delayTimer = t t.start() return None except Exception as inst: print >>sys.stderr, "Error parsing xml:" print >>sys.stderr, type(inst) print >>sys.stderr, inst.args except Exception as inst: print >>sys.stderr, "Error parsing xml:" print >>sys.stderr, type(inst) print >>sys.stderr, inst.args return ret
def copy_to_start_transfer(request): filepath = archivematicaFunctions.unicodeToStr(request.POST.get('filepath', '')) type = request.POST.get('type', '') error = check_filepath_exists('/' + filepath) if error == None: # confine destination to subdir of originals filepath = os.path.join('/', filepath) basename = os.path.basename(filepath) # default to standard transfer type_paths = { 'standard': 'standardTransfer', 'unzipped bag': 'baggitDirectory', 'zipped bag': 'baggitZippedDirectory', 'dspace': 'Dspace', 'maildir': 'maildir' } try: type_subdir = type_paths[type] destination = os.path.join(ACTIVE_TRANSFER_DIR, type_subdir) except KeyError: destination = os.path.join(STANDARD_TRANSFER_DIR) # if transfer compontent path leads to a ZIP file, treat as zipped # bag try: filepath.lower().index('.zip') shutil.copy(filepath, destination) except: destination = os.path.join(destination, basename) destination = pad_destination_filepath_if_it_already_exists(destination) try: shutil.copytree( filepath, destination ) except: error = 'Error copying from ' + filepath + ' to ' + destination + '. (' + str(sys.exc_info()[0]) + ')' response = {} if error != None: response['message'] = error response['error'] = True else: response['message'] = 'Copy successful.' return HttpResponse( simplejson.JSONEncoder().encode(response), mimetype='application/json' )
def _determine_transfer_paths(name, path, tmpdir): if _file_is_an_archive(path): transfer_dir = tmpdir p = Path(path).path filepath = os.path.join(tmpdir, os.path.basename(p)) else: path = os.path.join(path, '.') # Copy contents of dir but not dir transfer_dir = filepath = os.path.join(tmpdir, name) return (transfer_dir.replace(django_settings.SHARED_DIRECTORY, '', 1), unicodeToStr(filepath), path)
def sanitizeRecursively(job, path): path = os.path.abspath(path) sanitizations = {} sanitizedName = sanitizePath(job, path) if sanitizedName != path: path_key = unicodeToStr( unicodedata.normalize('NFC', path.decode('utf8'))) sanitizations[path_key] = sanitizedName if os.path.isdir(sanitizedName): for f in os.listdir(sanitizedName): sanitizations.update(sanitizeRecursively(job, os.path.join(sanitizedName, f))) return sanitizations
def approve_transfer(request): """Approve a transfer. The user may find the Package API a better option when the ID of the unit is known in advance. The errors returned use the 500 status code for backward-compatibility reasons. Example:: $ curl --data "directory=MyTransfer" \ --header "Authorization: ApiKey: user:token" \ http://127.0.0.1/api/transfer/approve """ directory = request.POST.get("directory") if not directory: return _error_response( "Please specify a transfer directory.", status_code=500) directory = archivematicaFunctions.unicodeToStr(directory) transfer_type = request.POST.get("type", "standard") if not transfer_type: return _error_response( "Please specify a transfer type.", status_code=500) modified_transfer_path = get_modified_standard_transfer_path(transfer_type) if modified_transfer_path is None: return _error_response("Invalid transfer type.", status_code=500) watched_path = os.path.join(modified_transfer_path, directory) transfer_file = watched_path.replace( SHARED_PATH_TEMPLATE_VAL, SHARED_DIRECTORY_ROOT ) if transfer_type in ("zipped bag", "dspace") and os.path.isfile(transfer_file): db_transfer_path = watched_path else: # Append a slash to complete the directory path. db_transfer_path = os.path.join(watched_path, "") try: client = MCPClient(request.user) unit_uuid = client.approve_transfer_by_path( db_transfer_path, transfer_type) except Exception as err: msg = "Unable to start the transfer." LOGGER.error("%s %s (db_transfer_path=%s)", msg, err, db_transfer_path) return _error_response(msg, status_code=500) return _ok_response("Approval successful.", uuid=unit_uuid)
def approve_transfer(request): # Example: curl --data \ # "username=mike&api_key=<API key>&directory=MyTransfer" \ # http://127.0.0.1/api/transfer/approve response = {} error = None directory = request.POST.get('directory', '') transfer_type = request.POST.get('type', 'standard') directory = archivematicaFunctions.unicodeToStr(directory) error, unit_uuid = approve_transfer_via_mcp(directory, transfer_type, request.user.id) if error is not None: response['message'] = error response['error'] = True return helpers.json_response(response, status_code=500) else: response['message'] = 'Approval successful.' response['uuid'] = unit_uuid return helpers.json_response(response)
def createUnitAndJobChain(path, watched_dir, workflow): path = unicodeToStr(path) if os.path.isdir(path): path = path + "/" logger.debug('Starting chain for %s', path) if not os.path.exists(path): return unit = None unit_type = watched_dir["unit_type"] if os.path.isdir(path): if unit_type == "SIP": UUID = findOrCreateSipInDB(path) unit = unitSIP(path, UUID) elif unit_type == "DIP": UUID = findOrCreateSipInDB(path, unit_type='DIP') unit = unitDIP(path, UUID) elif unit_type == "Transfer": unit = unitTransfer(path) elif os.path.isfile(path): if unit_type == "Transfer": unit = unitTransfer(path) else: return jobChain(unit, watched_dir.chain, workflow)
def sorted_directory_list(path): cleaned = [] entries = os.listdir(archivematicaFunctions.unicodeToStr(path)) for entry in entries: cleaned.append(archivematicaFunctions.unicodeToStr(entry)) return sorted(cleaned, cmp=locale.strcoll)
def handle_job(job): # http://www.doughellmann.com/PyMOTW/mailbox/ sharedVariablesAcrossModules.errorCounter = 0 transferDir = job.args[1] transferUUID = job.args[2] date = job.args[3] maildir = os.path.join(transferDir, "objects", "Maildir") outXML = os.path.join(transferDir, "logs", "attachmentExtraction.xml") mirrorDir = os.path.join(transferDir, "objects", "attachments") try: os.makedirs(mirrorDir) except os.error: pass # print "Extracting attachments from: " + maildir root = etree.Element("ArchivematicaMaildirAttachmentExtractionRecord") root.set("directory", maildir) for maildirsub in (d for d in os.listdir(maildir) if os.path.isdir(os.path.join(maildir, d))): maildirsub_full_path = os.path.join(maildir, maildirsub) job.pyprint("Extracting attachments from: " + maildirsub_full_path) md = mailbox.Maildir(maildirsub_full_path, None) directory = etree.SubElement(root, "subDir") directory.set("dir", maildirsub) try: for item in md.keys(): try: subDir = md.get_message(item).get_subdir() sourceFilePath2 = os.path.join(maildir, maildirsub, subDir, item) sourceFilePath = sourceFilePath2.replace( transferDir, "%transferDirectory%", 1) sourceFileUUID = getFileUUIDofSourceFile( transferUUID, sourceFilePath) sharedVariablesAcrossModules.sourceFileUUID = sourceFileUUID sharedVariablesAcrossModules.sourceFilePath = sourceFilePath fil = md.get_file(item) out = parse(fil) job.pyprint('Email Subject:', out.get('subject')) if out['attachments']: msg = etree.SubElement(directory, "msg") etree.SubElement( msg, "Message-ID" ).text = out['msgobj']['Message-ID'][1:-1] etree.SubElement(msg, "Extracted-from").text = item if isinstance(out["subject"], six.binary_type): etree.SubElement( msg, "Subject").text = out["subject"].decode( 'utf-8') else: etree.SubElement(msg, "Subject").text = out["subject"] etree.SubElement(msg, "Date").text = out['msgobj']['date'] etree.SubElement(msg, "To").text = out["to"] etree.SubElement(msg, "From").text = out["from"] for attachment in out['attachments']: job.pyprint('\tAttachment name:', attachment.name) try: if attachment.name is None: continue # these are versions of the body of the email - I think if attachment.name == 'rtf-body.rtf': continue attachedFileUUID = uuid.uuid4().__str__() # attachment = StringIO(file_data) TODO LOG TO FILE attch = etree.SubElement(msg, "attachment") etree.SubElement(attch, "name").text = attachment.name etree.SubElement( attch, "content_type" ).text = attachment.content_type etree.SubElement(attch, "size").text = str( attachment.size) # print attachment.create_date # FIXME Dates don't appear to be working. Disabling for the moment # etree.SubElement(attch, "create_date").text = attachment.create_date # etree.SubElement(attch, "mod_date").text = attachment.mod_date # etree.SubElement(attch, "read_date").text = attachment.read_date filePath = os.path.join( transferDir, "objects", "attachments", maildirsub, subDir, "%s_%s" % (attachedFileUUID, attachment.name)) job.pyprint('\tAttachment path:', filePath) filePath = unicodeToStr(filePath) writeFile(filePath, attachment) eventDetail = "Unpacked from: {%s}%s" % ( sourceFileUUID, sourceFilePath) addFile(filePath, transferDir, transferUUID, date, eventDetail=eventDetail, fileUUID=attachedFileUUID) except Exception as inst: job.pyprint(sourceFilePath, file=sys.stderr) job.print_error(traceback.format_exc()) job.pyprint( type(inst), file=sys.stderr) # the exception instance job.pyprint(inst.args, file=sys.stderr) job.pyprint(etree.tostring(msg), file=sys.stderr) job.pyprint(file=sys.stderr) sharedVariablesAcrossModules.errorCounter += 1 except Exception as inst: job.pyprint(sourceFilePath, file=sys.stderr) job.print_error(traceback.format_exc()) job.pyprint(type(inst), file=sys.stderr) # the exception instance job.pyprint(inst.args, file=sys.stderr) job.pyprint(file=sys.stderr) sharedVariablesAcrossModules.errorCounter += 1 except Exception as inst: job.pyprint("INVALID MAILDIR FORMAT", file=sys.stderr) job.pyprint(type(inst), file=sys.stderr) job.pyprint(inst.args, file=sys.stderr) job.set_status(246) # Was -10, but exit codes are unsigned return mirrorDir = os.path.join(transferDir, "objects/attachments", maildirsub) try: os.makedirs(mirrorDir) except: pass eventDetail = "added for normalization purposes" fileUUID = uuid.uuid4().__str__() addKeyFileToNormalizeMaildirOffOf(os.path.join( maildir, maildirsub).replace(transferDir, "%transferDirectory%", 1), mirrorDir, transferDir, transferUUID, date, eventDetail=eventDetail, fileUUID=fileUUID) tree = etree.ElementTree(root) tree.write(outXML, pretty_print=True, xml_declaration=True) job.set_status(sharedVariablesAcrossModules.errorCounter)
def escapeForDB(str): str = unicodeToStr(str) str = MySQLdb.escape_string(str) return str
def create_package(name, type_, accession, access_system_id, path, metadata_set_id, auto_approve=True, wait_until_complete=False, processing_config=None): """Launch transfer and return its object immediately. ``auto_approve`` changes significantly the way that the transfer is initiated. See ``_start_package_transfer_with_auto_approval`` and ``_start_package_transfer`` for more details. """ if not name: raise ValueError('No transfer name provided.') name = unicodeToStr(name) if type_ is None or type_ == 'disk image': type_ = 'standard' if type_ not in PACKAGE_TYPE_STARTING_POINTS: raise ValueError( "Unexpected type of package provided '{}'".format(type_)) if not path: raise ValueError('No path provided.') if isinstance(auto_approve, bool) is False: raise ValueError('Unexpected value in auto_approve parameter') # Create Transfer object. kwargs = {'uuid': str(uuid4())} if accession is not None: kwargs['accessionid'] = unicodeToStr(accession) if access_system_id is not None: kwargs['access_system_id'] = unicodeToStr(access_system_id) if metadata_set_id is not None: try: kwargs['transfermetadatasetrow'] = \ TransferMetadataSet.objects.get(id=metadata_set_id) except TransferMetadataSet.DoesNotExist: pass transfer = Transfer.objects.create(**kwargs) logger.debug('Transfer object created: %s', transfer.pk) @auto_close_db def _start(transfer, name, type_, path): # TODO: use tempfile.TemporaryDirectory as a context manager in Py3. tmpdir = mkdtemp( dir=os.path.join(django_settings.SHARED_DIRECTORY, 'tmp')) starting_point = PACKAGE_TYPE_STARTING_POINTS.get(type_) logger.debug('Package %s: starting transfer (%s)', transfer.pk, (name, type_, path, tmpdir)) try: params = (transfer, name, path, tmpdir, starting_point, processing_config) if auto_approve: _start_package_transfer_with_auto_approval(*params) else: _start_package_transfer(*params) finally: os.chmod(tmpdir, 0o770) # Needs to be writeable by the SS. getattr( Executor, 'apply' if wait_until_complete else 'apply_async', )(_start, (transfer, name, type_, path)) return transfer
relativeReplacement, 1) + "/" newfile = newfile.replace(objectsDirectory, relativeReplacement, 1) + "/" directoryContents = [] sql = "SELECT fileUUID, currentLocation FROM Files WHERE Files.removedTime = 0 AND Files.currentLocation LIKE '" + MySQLdb.escape_string( oldfile.replace("\\", "\\\\")).replace( "%", "\%") + "%' AND " + groupSQL + " = '" + groupID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: fileUUID = row[0] oldPath = row[1] newPath = unicodeToStr(oldPath).replace( oldfile, newfile, 1) directoryContents.append((fileUUID, oldPath, newPath)) row = c.fetchone() sqlLock.release() print oldfile, " -> ", newfile for fileUUID, oldPath, newPath in directoryContents: updateFileLocation(oldPath, newPath, "name cleanup", date, "prohibited characters removed:" + eventDetail, fileUUID=fileUUID)
def sorted_directory_list(path): cleaned = [] entries = os.listdir(archivematicaFunctions.unicodeToStr(path)) for entry in entries: cleaned.append(archivematicaFunctions.unicodeToStr(entry)) return sorted(cleaned, key=helpers.keynat)
def sorted_directory_list(path): cleaned = [] entries = os.listdir(unicodeToStr(path)) cleaned = [unicodeToStr(entry) for entry in entries] return sorted(cleaned, key=helpers.keynat)
def checkForPreconfiguredXML(self): ret = None xmlFilePath = os.path.join( \ self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \ archivematicaMCP.config.get('MCPServer', "processingXMLFile") \ ) xmlFilePath = unicodeToStr(xmlFilePath) if os.path.isfile(xmlFilePath): # For a list of items with pks: # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc; try: command = "sudo chmod 774 \"" + xmlFilePath + "\"" if isinstance(command, unicode): command = command.encode("utf-8") exitCode, stdOut, stdError = executeOrRun("command", command, "", printing=False) tree = etree.parse(xmlFilePath) root = tree.getroot() for preconfiguredChoice in root.find("preconfiguredChoices"): #if int(preconfiguredChoice.find("appliesTo").text) == self.jobChainLink.pk: if preconfiguredChoice.find( "appliesTo").text == self.jobChainLink.description: desiredChoice = preconfiguredChoice.find( "goToChain").text sql = """SELECT MicroServiceChains.pk FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk WHERE MicroServiceChains.description = '%s' AND MicroServiceChainChoice.choiceAvailableAtLink = '%s';""" % ( desiredChoice, self.jobChainLink.pk.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: ret = row[0] row = c.fetchone() sqlLock.release() try: #<delay unitAtime="yes">30</delay> delayXML = preconfiguredChoice.find("delay") unitAtimeXML = delayXML.get("unitCtime") if unitAtimeXML != None and unitAtimeXML.lower( ) != "no": delaySeconds = int(delayXML.text) unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1)) nowTime = time.time() timeDifference = nowTime - unitTime timeToGo = delaySeconds - timeDifference print "time to go:", timeToGo #print "that will be: ", (nowTime + timeToGo) self.jobChainLink.setExitMessage( "Waiting till: " + datetime.datetime.fromtimestamp( (nowTime + timeToGo)).ctime()) t = threading.Timer( timeToGo, self.proceedWithChoice, args=[ret, None], kwargs={"delayTimerStart": True}) t.daemon = True self.delayTimer = t t.start() return None except Exception as inst: print >> sys.stderr, "Error parsing xml:" print >> sys.stderr, type(inst) print >> sys.stderr, inst.args except Exception as inst: print >> sys.stderr, "Error parsing xml:" print >> sys.stderr, type(inst) print >> sys.stderr, inst.args return ret
def sorted_directory_list(path): cleaned = [] entries = os.listdir(archivematicaFunctions.unicodeToStr(path)) cleaned = [archivematicaFunctions.unicodeToStr(entry) for entry in entries] return sorted(cleaned, key=helpers.keynat)
def generate_project_client_package(job, output_dir, package_type, structmap, dmdsecs, dipuuid): """ Generates a simple.txt or compound.txt from the METS of a DIP :param output_dir: Path to directory for simple/compound.txt :param structmap: structMap element from the METS (Preparse somehow?) :param dmdsecs: Dict of {<DMDID>: OrderedDict{column name: value} or <dmdSec element>? } :param dipuuid: UUID of the DIP """ job.pyprint('DIP UUID:', dipuuid) if 'compound' in package_type: csv_path = os.path.join(output_dir, 'compound.txt') else: csv_path = os.path.join(output_dir, 'simple.txt') job.pyprint('Package type:', package_type) job.pyprint('Path to the output tabfile', csv_path) divs_with_dmdsecs = structmap.findall('.//mets:div[@DMDID]', namespaces=ns.NSMAP) with open(csv_path, "wb") as csv_file: writer = csv.writer(csv_file, delimiter='\t') # Iterate through every div and create a row for each csv_header_ref = None for div in divs_with_dmdsecs: # Find associated dmdSecs dmdids = div.get('DMDID').split() # Take nonDC dmdSec, fallback to DC dmdSec dmdsecpair = splitDmdSecs(job, [dmdsecs[dmdid] for dmdid in dmdids]) dmdsecpair['dc'] = addAipUuidToDcMetadata(dipuuid, dmdsecpair['dc']) metadata = dmdsecpair['nonDc'] or dmdsecpair['dc'] # Create csv_header and csv_values from the dmdSec metadata csv_header = [] csv_values = [] for header, value in metadata.items(): csv_header.append(header) value = '; '.join(value).replace('\r', '').replace('\n', '') csv_values.append(archivematicaFunctions.unicodeToStr(value)) # Add AIP UUID csv_header.append('AIP UUID') csv_values.append(dipuuid) # Add file UUID csv_header.append('file UUID') if 'dirs' in package_type: # Directories have no file UUID csv_values.append('') else: file_uuid = '' fptr = div.find('mets:fptr', namespaces=ns.NSMAP) # Only files have fptrs as direct children if fptr is not None: # File UUID is last 36 characters of FILEID file_uuid = fptr.get('FILEID')[-36:] csv_values.append(file_uuid) # Add file or directory name name = div.attrib['LABEL'] # Fallback if LABEL doesn't exist? if 'dirs' in package_type: csv_header.insert(0, 'Directory name') csv_values.insert(0, name) else: csv_header.append('Filename') csv_values.append(name) # Compare csv_header, if diff ERROR (first time set, write to file) if csv_header_ref and csv_header_ref != csv_header: job.pyprint('ERROR headers differ,', csv_path, 'almost certainly invalid', file=sys.stderr) job.pyprint('Reference header:', csv_header_ref, file=sys.stderr) job.pyprint('Differing header:', csv_header, file=sys.stderr) return 1 # If first time through, write out header if not csv_header_ref: csv_header_ref = csv_header writer.writerow(csv_header_ref) job.pyprint('Tabfile header:', csv_header) # Write csv_row writer.writerow(csv_values) job.pyprint('Values:', csv_values) return 0
def checkForPreconfiguredXML(self): desiredChoice = None xmlFilePath = os.path.join( \ self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \ archivematicaMCP.config.get('MCPServer', "processingXMLFile") \ ) xmlFilePath = unicodeToStr(xmlFilePath) if os.path.isfile(xmlFilePath): # For a list of items with pks: # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc; try: command = "sudo chmod 774 \"" + xmlFilePath + "\"" if isinstance(command, unicode): command = command.encode("utf-8") exitCode, stdOut, stdError = executeOrRun("command", command, "", printing=False) tree = etree.parse(xmlFilePath) root = tree.getroot() for preconfiguredChoice in root.findall( ".//preconfiguredChoice"): if preconfiguredChoice.find( "appliesTo").text == self.jobChainLink.pk: desiredChoice = preconfiguredChoice.find( "goToChain").text try: #<delay unitAtime="yes">30</delay> delayXML = preconfiguredChoice.find("delay") if delayXML is not None: unitAtimeXML = delayXML.get("unitCtime") else: unitAtimeXML = None if unitAtimeXML is not None and unitAtimeXML.lower( ) != "no": delaySeconds = int(delayXML.text) unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1)) nowTime = time.time() timeDifference = nowTime - unitTime timeToGo = delaySeconds - timeDifference LOGGER.info('Time to go: %s', timeToGo) self.jobChainLink.setExitMessage( "Waiting till: " + datetime.datetime.fromtimestamp( (nowTime + timeToGo)).ctime()) t = threading.Timer( timeToGo, self.proceedWithChoice, args=[desiredChoice, None], kwargs={"delayTimerStart": True}) t.daemon = True self.delayTimer = t t.start() return None except Exception: LOGGER.info('Error parsing XML', exc_info=True) except Exception: LOGGER.warning( 'Error parsing xml at %s for pre-configured choice', xmlFilePath, exc_info=True) LOGGER.info('Using preconfigured choice %s for %s', desiredChoice, self.jobChainLink.pk) return desiredChoice
def create_package( package_queue, executor, name, type_, accession, access_system_id, path, metadata_set_id, user_id, workflow, auto_approve=True, processing_config=None, ): """Launch transfer and return its object immediately. ``auto_approve`` changes significantly the way that the transfer is initiated. See ``_start_package_transfer_with_auto_approval`` and ``_start_package_transfer`` for more details. """ if not name: raise ValueError("No transfer name provided.") if type_ is None or type_ == "disk image": type_ = "standard" if type_ not in PACKAGE_TYPE_STARTING_POINTS: raise ValueError("Unexpected type of package provided '{}'".format(type_)) if not path: raise ValueError("No path provided.") if isinstance(auto_approve, bool) is False: raise ValueError("Unexpected value in auto_approve parameter") try: int(user_id) except (TypeError, ValueError): raise ValueError("Unexpected value in user_id parameter") # Create Transfer object. kwargs = {"uuid": str(uuid4())} if accession is not None: kwargs["accessionid"] = unicodeToStr(accession) if access_system_id is not None: kwargs["access_system_id"] = unicodeToStr(access_system_id) if metadata_set_id is not None: try: kwargs["transfermetadatasetrow"] = models.TransferMetadataSet.objects.get( id=metadata_set_id ) except models.TransferMetadataSet.DoesNotExist: pass transfer = models.Transfer.objects.create(**kwargs) if not processing_configuration_file_exists(processing_config): processing_config = "default" transfer.set_processing_configuration(processing_config) transfer.update_active_agent(user_id) logger.debug("Transfer object created: %s", transfer.pk) # TODO: use tempfile.TemporaryDirectory as a context manager in Py3. tmpdir = mkdtemp(dir=os.path.join(_get_setting("SHARED_DIRECTORY"), "tmp")) starting_point = PACKAGE_TYPE_STARTING_POINTS.get(type_) logger.debug( "Package %s: starting transfer (%s)", transfer.pk, (name, type_, path, tmpdir) ) params = (transfer, name, path, tmpdir, starting_point) if auto_approve: params = params + (workflow, package_queue) result = executor.submit(_start_package_transfer_with_auto_approval, *params) else: result = executor.submit(_start_package_transfer, *params) result.add_done_callback(lambda f: os.chmod(tmpdir, 0o770)) return transfer
def sanitize_object_names(job, objectsDirectory, sipUUID, date, groupType, groupSQL, sipPath): """Sanitize object names in a Transfer/SIP.""" relativeReplacement = objectsDirectory.replace(sipPath, groupType, 1) # "%SIPDirectory%objects/" # Get any ``Directory`` instances created for this transfer (if such exist) directory_mdls = [] if groupSQL == 'transfer_id': transfer_mdl = Transfer.objects.get(uuid=sipUUID) if transfer_mdl.diruuids: directory_mdls = Directory.objects.filter( transfer=transfer_mdl).all() # Sanitize objects on disk sanitizations = sanitize_names.sanitizeRecursively(job, objectsDirectory) for oldfile, newfile in sanitizations.items(): logger.info('sanitizations: %s -> %s', oldfile, newfile) eventDetail = 'program="sanitize_names"; version="' + sanitize_names.VERSION + '"' # Update files in DB kwargs = { groupSQL: sipUUID, "removedtime__isnull": True, } file_mdls = File.objects.filter(**kwargs) # Iterate over ``File`` and ``Directory`` for model in chain(file_mdls, directory_mdls): # Check all files to see if any parent directory had a sanitization event current_location = unicodeToStr( unicodedata.normalize('NFC', model.currentlocation)).replace( groupType, sipPath) sanitized_location = unicodeToStr(current_location) logger.info('Checking %s', current_location) # Check parent directories # Since directory keys are a mix of sanitized and unsanitized, this is # a little complicated # Directories keys are in the form sanitized/sanitized/unsanitized # When a match is found (eg 'unsanitized' -> 'sanitized') reset the # search. # This will find 'sanitized/unsanitized2' -> 'sanitized/sanitized2' on # the next pass # TODO This should be checked for a more efficient solution dirpath = sanitized_location while objectsDirectory in dirpath: # Stay within unit if dirpath in sanitizations: # Make replacement sanitized_location = sanitized_location.replace( dirpath, sanitizations[dirpath]) dirpath = sanitized_location # Reset search else: # Check next level up dirpath = os.path.dirname(dirpath) if current_location != sanitized_location: old_location = current_location.replace( objectsDirectory, relativeReplacement, 1) new_location = sanitized_location.replace( objectsDirectory, relativeReplacement, 1) kwargs = { 'src': old_location, 'dst': new_location, 'eventType': 'name cleanup', 'eventDateTime': date, 'eventDetail': "prohibited characters removed:" + eventDetail, 'fileUUID': None, } if groupType == "%SIPDirectory%": kwargs['sipUUID'] = sipUUID elif groupType == "%transferDirectory%": kwargs['transferUUID'] = sipUUID else: job.pyprint("bad group type", groupType, file=sys.stderr) return 3 logger.info('Sanitized name: %s -> %s', old_location, new_location) job.pyprint('Sanitized name:', old_location, " -> ", new_location) if isinstance(model, File): updateFileLocation(**kwargs) else: model.currentlocation = new_location model.save() else: logger.info('No sanitization for %s', current_location) job.pyprint('No sanitization found for', current_location) return 0
def copy_to_start_transfer(request): filepath = archivematicaFunctions.unicodeToStr(request.POST.get('filepath', '')) type = request.POST.get('type', '') accession = request.POST.get('accession', '') error = check_filepath_exists('/' + filepath) if error == None: # confine destination to subdir of originals filepath = os.path.join('/', filepath) basename = os.path.basename(filepath) # default to standard transfer type_paths = { 'standard': 'standardTransfer', 'unzipped bag': 'baggitDirectory', 'zipped bag': 'baggitZippedDirectory', 'dspace': 'Dspace', 'maildir': 'maildir', 'TRIM': 'TRIM' } try: type_subdir = type_paths[type] destination = os.path.join(ACTIVE_TRANSFER_DIR, type_subdir) except KeyError: destination = os.path.join(STANDARD_TRANSFER_DIR) # if transfer compontent path leads to a ZIP file, treat as zipped # bag try: filepath.lower().index('.zip') except: destination = os.path.join(destination, basename) destination = pad_destination_filepath_if_it_already_exists(destination) # relay accession via DB row that MCPClient scripts will use to get # supplementary info from if accession != '': temp_uuid = uuid.uuid4().__str__() mcp_destination = destination.replace(SHARED_DIRECTORY_ROOT + '/', '%sharedPath%') + '/' transfer = models.Transfer.objects.create( uuid=temp_uuid, accessionid=accession, currentlocation=mcp_destination ) transfer.save() try: shutil.move(filepath, destination) except: error = 'Error copying from ' + filepath + ' to ' + destination + '. (' + str(sys.exc_info()[0]) + ')' response = {} if error != None: response['message'] = error response['error'] = True else: response['message'] = 'Copy successful.' return HttpResponse( simplejson.JSONEncoder().encode(response), mimetype='application/json' )
etree.SubElement( attch, "content_type" ).text = attachment.content_type etree.SubElement(attch, "size").text = str( attachment.size) #print attachment.create_date # Dates don't appear to be working. Disabling for the moment - Todo #etree.SubElement(attch, "create_date").text = attachment.create_date #etree.SubElement(attch, "mod_date").text = attachment.mod_date #etree.SubElement(attch, "read_date").text = attachment.read_date filePath = os.path.join( transferDir, "objects/attachments", maildirsub2, subDir, "%s_%s" % (attachedFileUUID, attachment.name)) filePath = unicodeToStr(filePath) writeFile(filePath, attachment) eventDetail = "Unpacked from: {%s}%s" % ( sourceFileUUID, sourceFilePath) addFile(filePath, transferDir, transferUUID, date, eventDetail=eventDetail, fileUUID=attachedFileUUID) except Exception as inst: print >> sys.stderr, sourceFilePath traceback.print_exc(file=sys.stderr) print >> sys.stderr, type( inst) # the exception instance print >> sys.stderr, inst.args
if groupType == "%SIPDirectory%": updateFileLocation(oldfile, newfile, "name cleanup", date, "prohibited characters removed:" + eventDetail, fileUUID=None, sipUUID=sipUUID) elif groupType == "%transferDirectory%": updateFileLocation(oldfile, newfile, "name cleanup", date, "prohibited characters removed:" + eventDetail, fileUUID=None, transferUUID=sipUUID) else: print >>sys.stderr, "bad group type", groupType exit(3) elif os.path.isdir(newfile): oldfile = oldfile.replace(objectsDirectory, relativeReplacement, 1) + "/" newfile = newfile.replace(objectsDirectory, relativeReplacement, 1) + "/" directoryContents = [] sql = "SELECT fileUUID, currentLocation FROM Files WHERE Files.removedTime = 0 AND Files.currentLocation LIKE '" + MySQLdb.escape_string(oldfile.replace("\\", "\\\\")).replace("%","\%") + "%' AND " + groupSQL + " = '" + groupID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: fileUUID = row[0] oldPath = row[1] newPath = unicodeToStr(oldPath).replace(oldfile, newfile, 1) directoryContents.append((fileUUID, oldPath, newPath)) row = c.fetchone() sqlLock.release() print oldfile, " -> ", newfile for fileUUID, oldPath, newPath in directoryContents: updateFileLocation(oldPath, newPath, "name cleanup", date, "prohibited characters removed:" + eventDetail, fileUUID=fileUUID)
def checkForPreconfiguredXML(self): desiredChoice = None xmlFilePath = os.path.join( self.unit.currentPath.replace("%sharedPath%", django_settings.SHARED_DIRECTORY, 1), django_settings.PROCESSING_XML_FILE, ) xmlFilePath = unicodeToStr(xmlFilePath) if os.path.isfile(xmlFilePath): # For a list of items with pks: # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc; try: tree = etree.parse(xmlFilePath) root = tree.getroot() for preconfiguredChoice in root.findall( ".//preconfiguredChoice"): if (preconfiguredChoice.find("appliesTo").text == self.jobChainLink.pk): desiredChoice = preconfiguredChoice.find( "goToChain").text try: # <delay unitAtime="yes">30</delay> delayXML = preconfiguredChoice.find("delay") if delayXML is not None: unitAtimeXML = delayXML.get("unitCtime") else: unitAtimeXML = None if (unitAtimeXML is not None and unitAtimeXML.lower() != "no"): delaySeconds = int(delayXML.text) unitTime = os.path.getmtime( self.unit.currentPath.replace( "%sharedPath%", django_settings.SHARED_DIRECTORY, 1, )) nowTime = time.time() timeDifference = nowTime - unitTime timeToGo = delaySeconds - timeDifference LOGGER.info("Time to go: %s", timeToGo) self.jobChainLink.setExitMessage( "Waiting till: " + datetime.datetime.fromtimestamp( (nowTime + timeToGo)).ctime()) t = threading.Timer( timeToGo, self.proceedWithChoice, args=[desiredChoice, None], kwargs={"delayTimerStart": True}, ) t.daemon = True self.delayTimer = t t.start() return None except Exception: LOGGER.info("Error parsing XML", exc_info=True) except Exception: LOGGER.warning( "Error parsing xml at %s for pre-configured choice", xmlFilePath, exc_info=True, ) LOGGER.info("Using preconfigured choice %s for %s", desiredChoice, self.jobChainLink.pk) return desiredChoice
elif os.path.isdir(newfile): oldfile = oldfile.replace(objectsDirectory, relativeReplacement, 1) + "/" newfile = newfile.replace(objectsDirectory, relativeReplacement, 1) + "/" directoryContents = [] kwargs = { "removedtime__isnull": True, "currentlocation__startswith": oldfile, groupSQL: groupID } files = File.objects.filter(**kwargs) print oldfile, " -> ", newfile for f in files: new_path = unicodeToStr(f.currentlocation).replace( oldfile, newfile, 1) updateFileLocation( f.currentlocation, new_path, fileUUID=f.uuid, # Don't create sanitization events for each # file, since it's only a parent directory # somewhere up that changed. # Otherwise, extra amdSecs will be generated # from the resulting METS. createEvent=False)
continue attachedFileUUID = uuid.uuid4().__str__() #attachment = StringIO(file_data) TODO LOG TO FILE attch = etree.SubElement(msg, "attachment") #attachment.name = attachment.name[1:-1] etree.SubElement(attch, "name").text = attachment.name etree.SubElement(attch, "content_type").text = attachment.content_type etree.SubElement(attch, "size").text = str(attachment.size) #print attachment.create_date # Dates don't appear to be working. Disabling for the moment - Todo #etree.SubElement(attch, "create_date").text = attachment.create_date #etree.SubElement(attch, "mod_date").text = attachment.mod_date #etree.SubElement(attch, "read_date").text = attachment.read_date filePath = os.path.join(transferDir, "objects/attachments", maildirsub2, subDir, "%s_%s" % (attachedFileUUID, attachment.name)) filePath = unicodeToStr(filePath) writeFile(filePath, attachment) eventDetail="Unpacked from: {%s}%s" % (sourceFileUUID, sourceFilePath) addFile(filePath, transferDir, transferUUID, date, eventDetail=eventDetail, fileUUID=attachedFileUUID) except Exception as inst: print >>sys.stderr, sourceFilePath traceback.print_exc(file=sys.stderr) print >>sys.stderr, type(inst) # the exception instance print >>sys.stderr, inst.args print >>sys.stderr, etree.tostring(msg) print >>sys.stderr sharedVariablesAcrossModules.errorCounter += 1 except Exception as inst: print >>sys.stderr, sourceFilePath traceback.print_exc(file=sys.stderr) print >>sys.stderr, type(inst) # the exception instance