def _bundle_metadata(self, metadata): """ Bundles the metadata into a tarfile formatted bundle @param metadata: The metadata string to bundle """ #print >> sys.stderr, "Bundle meta!" #print >> sys.stderr, metadata metadata_file = None try: metadata_file = tempfile.TemporaryFile() except IOError: task_error("Can't create metadata file in working directory") metadata_file.write(metadata) metadata_file.seek(0) if self.empty_tar: tarball = tarfile.TarFile(name=self.bundle_path, mode='w') self.empty_tar = False else: tarball = tarfile.TarFile(name=self.bundle_path, mode='a') tar_info = tarfile.TarInfo("metadata.txt") tar_info.size = len(metadata) tar_info.mtime = time.time() tarball.addfile(tar_info, metadata_file) metadata_file.close() tarball.close()
def _bundle_file(self, file_paths): """ A 'Pure Virtual' function that will perform file bundling in a child class :Parameters: file_name The name of the file to bundle file_arcname An alternative name to use for the file inside of the bundle """ task_error("Can't bundle a file with the base class")
def __init__(self, bundle_path): """ Initializes a Tar_Bundler :Parameters: bundle_path The path to the target bundle file """ if bundle_path == '' or bundle_path is None: task_error('no bundle path') # Initialize the Base Bundler Class FileBundler.__init__(self, bundle_path) tarball = tarfile.TarFile(name=self.bundle_path, mode='w') tarball.close() self.empty_tar = True print >> sys.stderr, "Successfully created tarfile bundle %s" % self.bundle_path
def bundle_metadata(self, metadata): """ Bundles the metadata into a tarfile formatted bundle @param metadata: The metadata string to bundle """ metadata_file = None try: metadata_file = tempfile.NamedTemporaryFile(delete=False) except IOError: task_error('Cannot create metadata file in working directory') metadata_file.write(metadata) fname = metadata_file.name metadata_file.close() metadata_file = open(fname, mode='rb') # metadata_file.seek(0) if self.empty_tar: tarball = tarfile.TarFile(name=self.bundle_path, mode='w') self.empty_tar = False else: tarball = tarfile.TarFile(name=self.bundle_path, mode='a') try: tar_info = tarfile.TarInfo('metadata.txt') tar_info.size = len(metadata) tar_info.mtime = time.time() tarball.addfile(tar_info, metadata_file) metadata_file.close() tarball.close() os.remove(fname) except Exception, ex: print ex traceback.print_exc(file=sys.stdout) raise ex
def bundle_file(self, file_paths, bundle_size=0): """ Bundles files into a tarfile formatted bundle :Parameters: file_name The name of the file to bundle file_arcname An alternative name to use for the file inside of the bundle """ if self.empty_tar: tarball = tarfile.TarFile(name=self.bundle_path, mode='w') self.empty_tar = False else: tarball = tarfile.TarFile(name=self.bundle_path, mode='a') self.running_size = 0 self.last_percent = 0 self.percent_complete = 0 self.bundle_size = bundle_size try: self.report_percent_complete() for (file_path, file_arcname) in file_paths: # hash the file and store in hash_dict # percent complete is reported only as read and hashed # hopefully that being the slowest part and all we have # access to for completion statistics self.hash_file(file_path, file_arcname) # for version 1.2, push files to a data/ directory #to avoid collisions with metadata.txt in the root modified_arc_name = os.path.join('data', file_arcname) tarball.add(file_path, arcname=modified_arc_name, recursive=False) except BundlerError, err: task_error("Failed to bundle file: %s" % (err.msg))
def __init__(self, bundle_path, proposal_ID='', instrument_name='', instrument_ID='', groups=None): """ Initializes a Tar_Bundler :Parameters: bundle_path The path to the target bundle file proposal_ID An optional string describing the proposal associated with this bundle instrument_name The name of the instrument that produced the data packaged in the bundle """ if bundle_path == '' or bundle_path == None: bundle_path = 'bundle.tar' # Initialize the Base Bundler Class FileBundler.__init__(self, bundle_path, proposal_id=proposal_ID, instrument_name=instrument_name, instrument_id=instrument_ID, groups=groups) try: tarball = tarfile.TarFile(name=self.bundle_path, mode='w') tarball.close() except: task_error("Couldn't create bundle tarball: %s" % self.bundle_path) self.empty_tar = True print >> sys.stderr, "Successfully created tarfile bundle %s" % self.bundle_path
def progress(_download_t, _download_d, upload_t, upload_d): """ gets the progress of the current pycurl upload """ if upload_t > 0: try: percent = 100.0 * float(upload_d) / float(upload_t) if percent - TrackPercent.percent > 5: meta_dict={'Status': "upload percent complete: " + str(int(percent))} task_state("PROGRESS", meta_dict) TrackPercent.percent = percent except Exception, e: raise task_error("Error during callback: " + e.message)
def bundle(bundle_name='', file_list=None, bundle_size=0, meta_list=None): """ Bundles a list of files into a single aggregated bundle file :Parameters: bundle_name The target bundle file in which to aggregate the file list file_list The list of files to bundle bundle_size total size of the files to be bundled meta_list list of metadata items. File metadata will be added to this list """ # validate parameters if bundle_name is None or bundle_name == '': task_error("Missing bundle name") if file_list is None or len(file_list) == 0: task_error("Missing file list") # Set up the bundle file bundle_path = os.path.abspath(bundle_name) # Set up the bundler object bundler = None bundler = TarBundler(bundle_path) bundler.bundle_file(file_list, bundle_size, meta_list) meta_str = json.dumps(meta_list) bundler.bundle_metadata(meta_str) TaskComm.set_state('PROGRESS', 'Bundling complete')
def upload_files(ingest_server='', bundle_name='', file_list=None, bundle_size=0, meta_list=None, auth={}, verify=True, tartar=False): """ task created on a separate Celery process to bundle and upload in the background status and errors are pushed by celery to the main server through RabbitMQ """ # one big-ass exception handler for upload. try: target_dir = os.path.dirname(bundle_name) if not os.path.isdir(target_dir): task_error('Bundle directory does not exist') return TaskComm.set_state("PROGRESS", "Cleaning previous uploads") # initial state pushed through celery TaskComm.set_state("PROGRESS", "Starting Bundle/Upload Process") bundle(bundle_name=bundle_name, file_list=file_list, meta_list=meta_list, bundle_size=bundle_size) TaskComm.set_state("PROGRESS", "Completed Bundling") if tartar: # create the file tuple list of 1 file fname = os.path.basename(bundle_name) file_tuples = [] file_tuples.append((bundle_name, fname)) bundle_size = os.path.getsize(bundle_name) # dual extension indicates tartar bundle_name += '.tar' bundle(bundle_name=bundle_name, file_list=file_tuples, meta_list=meta_list, bundle_size=bundle_size) TaskComm.set_state( "PROGRESS", "Starting Uploady: " + str(bundle_name) + ": " + ingest_server + ": " + str(auth)) uploader = Uploader(bundle_name, ingest_server, auth, verify) TaskComm.set_state("PROGRESS", "Uploader Initialized") result = uploader.upload_bundle() TaskComm.set_state("PROGRESS", "Finished Upload") try: status = json.loads(result) except Exception, e: print 'Upload Error' print result raise Exception('Upload error: ' + result) print 'End Upload Error' # check for a valid job id. Ingest error should return -99 job_id = status['job_id'] if job_id < 0: task_error(err) raise Exception('Upload error: ' + bundle_name) TaskComm.set_state("PROGRESS", "Rename Tar File") try: rename_tar_file(target_dir, bundle_name, job_id) except Exception, ex: raise Exception(ex.message + ': ' + bundle_name + ': ' + str(job_id))
rename_tar_file(target_dir, bundle_name, job_id) except Exception, ex: raise Exception(ex.message + ': ' + bundle_name + ': ' + str(job_id)) print status if TaskComm.USE_CELERY: # set job ID here print 'exit with deliberate error' print result raise StandardError(result) else: TaskComm.set_state("DONE", result) except StandardError, error: raise error except Exception, ex: print >> sys.stderr, "Exception in upload_files:" print >> sys.stderr, '-' * 60 traceback.print_exc(file=sys.stderr) print >> sys.stderr, '-' * 60 err = 'Task exception: upload_files :' + str( ex.message) + ': ' + result + ': ' + traceback.format_exc() task_error(err) print err raise Exception(err)
def hash_file(self, file_path, file_arcname): """ Bundle in a file or directory :Parameters: file_path The path to the file to bundle file_arcname An alternative name to use for the file inside of the bundle """ file_path = os.path.abspath(file_path) # If the file_arcname argument is None use the base file name as the # arc name if file_arcname is None: file_arcname = os.path.basename(file_path) if not os.path.exists(file_path): task_error("%s doesn't exist" % file_path) if not os.access(file_path, os.R_OK): task_error("Can't read from %s" % file_path) file_mode = os.stat(file_path)[stat.ST_MODE] if not stat.S_ISDIR(file_mode) and not stat.S_ISREG(file_mode): task_error("Unknown file type for %s" % file_path) file_in = None try: # open to read binary. This is important. file_in = open(file_path, 'rb') except IOError: task_error("Couldn't read from file: %s" % file_path) # hash file 1Mb at a time hashval = hashlib.sha1() while True: data = file_in.read(1024 * 1024) if not data: break hashval.update(data) # update file bundle status self.running_size += len(data) self.percent_complete = 100.0 * self.running_size / self.bundle_size # only update significant progress if self.percent_complete - self.last_percent > 1: self.report_percent_complete() self.last_percent = self.percent_complete file_hash = hashval.hexdigest() # print 'hash: ' + file_hash file_in.close() modified_name = os.path.join('data', file_arcname) (file_dir, file_name) = os.path.split(modified_name) # linuxfy the directory file_dir = file_dir.replace('\\', '/') info = {} info['size'] = os.path.getsize(file_path) mime_type = mimetypes.guess_type(file_path, strict=True)[0] info[ 'mimetype'] = mime_type if mime_type is not None else 'application/octet-stream' info['name'] = file_name info['mtime'] = DT.datetime.utcfromtimestamp( int(os.path.getmtime(file_path))).isoformat() info['ctime'] = DT.datetime.utcfromtimestamp( int(os.path.getctime(file_path))).isoformat() info['destinationTable'] = 'Files' info['subdir'] = file_dir info['hashsum'] = file_hash info['hashtype'] = 'sha1' # todo make sure errors bubble up without crashing if file_arcname in self.file_meta: print file_arcname task_error( "Different file with the same arcname is already in the bundle" ) return return info
def bundle(bundle_name='', instrument_name='', proposal='', file_list=None, groups=None, bundle_size=0): """ Bundles a list of files into a single aggregated bundle file :Parameters: bundle_name The target bundle file in which to aggregate the file list instrument_name The name of the instrument that produced the data files that will be bundled groups The a hash of type/name groups to attach to tarfile If true, tarfile format is used to bundle. Otherwise zipfile format is used proposal An optional proposal ID to attach to the bundle file_list The list of files to bundle """ # validate parameters if bundle_name == None or bundle_name == '': task_error("Missing bundle name") if instrument_name == None or instrument_name == '': task_error("Missing instrument name") if proposal == None or proposal == '': task_error("Missing proposal") if file_list == None or len(file_list) == 0: task_error("Missing file list") if groups == None or groups == '': task_error("Missing groups") #print >> sys.stderr, "Start bundling %s" % bundle_name # Set up the bundle file bundle_path = os.path.abspath(bundle_name) #print >> sys.stderr, "Bundle file set to %s" % bundle_path # Set up the bundler object bundler = None # dfh note we are setting the instrument name and ID to the same thing, # which is being # sent in as the instrument name but is actually the instrument ID. Fix # this. bundler = TarBundler(bundle_path, proposal_ID=proposal, instrument_name=instrument_name, instrument_ID=instrument_name, groups=groups) bundler.bundle_file(file_list, bundle_size) bundler.bundle_metadata() #print >> sys.stderr, "Finished bundling" task_state('PROGRESS', "Bundling complete")
def _bundle_metadata(self, metadata): """ A 'Pure Virtual' function that will perform metadata bundling in a child class @param metadata: The metadata string to bundle """ task_error("Can't bundle metadata with the base class")
def hash_file(self, file_path, file_arcname): """ Bundle in a file or directory :Parameters: file_path The path to the file to bundle file_arcname An alternative name to use for the file inside of the bundle """ file_path = os.path.abspath(file_path) # If the file_arcname argument is None use the base file name as the # arc name if file_arcname == None: file_arcname = os.path.basename(file_path) if not os.path.exists(file_path): task_error("%s doesn't exist" % file_path) if not os.access(file_path, os.R_OK): task_error("Can't read from %s" % file_path) file_mode = os.stat(file_path)[stat.ST_MODE] if not stat.S_ISDIR(file_mode) and not stat.S_ISREG(file_mode): task_error("Unknown file type for %s" % file_path) file_in = None try: # open to read binary. This is important. file_in = open(file_path, 'rb') except IOError: task_error("Couldn't read from file: %s" % file_path) # hash file 1Mb at a time hashval = hashlib.sha1() while True: data = file_in.read(1024 * 1024) if not data: break hashval.update(data) # update file bundle status self.running_size += len(data) self.percent_complete = 100.0 * self.running_size / self.bundle_size # only update significant progress if self.percent_complete - self.last_percent > 1: self.report_percent_complete() self.last_percent = self.percent_complete file_hash = hashval.hexdigest() # print 'hash: ' + file_hash file_in.close() #todo make sure errors bubble up without crashing if file_arcname in self.hash_dict: if hash != self.hash_dict[file_arcname]: print file_arcname task_error("Different file with the same arcname is already in the bundle") return self.hash_dict[file_arcname] = file_hash