def __call__(self, t_path, name_info, i_str): name_info.update(get_name_info(t_path, i_str=i_str)) if name_info["num"] == 0: return None o_fname = self.config["output_name"] % name_info o_dir = self.config["output_path"] o_path = os.path.join(o_dir, o_fname + ".tar.gz") ## if dir is missing make it dirname = os.path.dirname(o_path) if dirname and not os.path.exists(dirname): os.makedirs(dirname) t_path2 = tarball_export(t_path, name_info) ## do an atomic renaming try: logger.debug("attemping os.rename(%r, %r)" % (t_path2, o_path)) os.rename(t_path2, o_path) except OSError, exc: if exc.errno == 18: patient_move(t_path2, o_path) else: msg = "failed shutil.copy2(%r, %r) and/or os.remove(t_path)\n%s" % ( t_path2, o_path, traceback.format_exc(exc), ) logger.critical(traceback.format_exc(exc)) raise
def __call__(self, t_path, name_info, i_str): ''' Load chunk from t_path and put it into the right place in s3 using the output_name template from the config ''' name_info.update( get_name_info(t_path, i_str=i_str) ) if name_info['num'] == 0: o_path = None return o_path o_fname = self.config['output_name'] % name_info o_path = os.path.join(self.config['s3_path_prefix'], o_fname + '.tar.gz') logger.info('to_s3_tarballs: \n\t%r\n\tfrom: %r\n\tby way of %r ' % (o_path, i_str, t_path)) ## forcibly collect dereferenced objects #gc.collect() t_path2 = tarball_export(t_path, name_info) data = open(t_path2).read() name_info['md5'] = hashlib.md5(data).hexdigest() # pylint: disable=E1101 self.upload(o_path, data, name_info) self.cleanup(t_path) self.cleanup(t_path2) logger.info('to_s3_tarballs finished:\n\t input: %s\n\toutput: %s' % (i_str, o_path)) ## return the final output path return o_path
def __call__(self, t_path, name_info, i_str): ''' Load chunk from t_path and put it into the right place in s3 using the output_name template from the config ''' name_info.update( get_name_info(t_path, i_str=i_str) ) if name_info['num'] == 0: o_path = None return o_path o_fname = self.config['output_name'] % name_info o_path = os.path.join(self.config['s3_path_prefix'], o_fname + '.sc.xz.gpg') name_info['s3_output_path'] = o_path logger.info('to_s3_chunks: \n\t%r\n\tfrom: %r\n\tby way of %r ' % (o_path, i_str, t_path)) ## forcibly collect dereferenced objects #gc.collect() ## compress and encrypt logger.critical( 'key path: %r' % self.config['gpg_encryption_key_path'] ) _errors, t_path2 = compress_and_encrypt_path( t_path, self.config['gpg_encryption_key_path'], gpg_recipient=self.config['gpg_recipient'], tmp_dir=self.config['tmp_dir_path'], ) logger.info( '\n'.join(_errors) ) data = open(t_path2).read() logger.debug('compressed size: %d' % len(data)) while 1: start_time = time.time() self.put(o_path, data) elapsed = time.time() - start_time if elapsed > 0: logger.debug('put %.1f bytes/second' % (len(data) / elapsed)) if self.config['verify_via_http']: try: start_time = time.time() self.verify(o_path, name_info['md5']) elapsed = time.time() - start_time if elapsed > 0: logger.debug('verify %.1f bytes/second' % (len(data) / elapsed)) break except Exception, exc: logger.critical( 'verify_via_http failed so retrying: %r' % exc ) ## keep looping if verify raises anything continue else: ## not verifying, so don't attempt multiple puts break
def __call__(self, t_path, name_info, i_str): o_type = self.config["output_type"] name_info.update(get_name_info(t_path, i_str=i_str)) if name_info["num"] == 0: return None if "input" in self.config["output_name"]: i_fname = i_str.split("/")[-1] if i_fname.endswith(".gpg"): i_fname = i_fname[:-4] if i_fname.endswith(".xz"): i_fname = i_fname[:-3] if i_fname.endswith(".sc"): i_fname = i_fname[:-3] name_info["input_fname"] = i_fname ## prepare to compress the output compress = self.config.get("compress", None) assert compress in [None, "xz"], compress if o_type == "samedir": ## assume that i_str was a local path assert i_str[-3:] == ".sc", repr(i_str[-3:]) o_path = i_str[:-3] + "-%s.sc" % self.config["output_name"] if compress: o_path += ".xz" # print 'creating %s' % o_path elif o_type == "inplace": ## replace the input chunks with the newly created o_path = i_str if o_path.endswith(".xz"): compress = True elif o_type == "otherdir": ## put the if not self.config["output_path"].startswith("/"): o_dir = os.path.join(os.getcwd(), self.config["output_path"]) else: o_dir = self.config["output_path"] if not os.path.exists(o_dir): os.makedirs(o_dir) o_fname = self.config["output_name"] % name_info o_path = os.path.join(o_dir, o_fname + ".sc") if compress: o_path += ".xz" ## if dir is missing make it dirname = os.path.dirname(o_path) if dirname and not os.path.exists(dirname): os.makedirs(dirname) if compress: assert o_path.endswith(".xz"), o_path logger.info("compress_and_encrypt_path(%r, tmp_dir=%r)", t_path, self.config["tmp_dir_path"]) ## forcibly collect dereferenced objects # gc.collect() errors, t_path2 = streamcorpus.compress_and_encrypt_path(t_path, tmp_dir=self.config["tmp_dir_path"]) assert not errors, errors if self.config.get("cleanup_tmp_files", True): # default action, move tmp file to output position try: logger.debug("attempting renamed(%r, %r)", t_path2, o_path) os.rename(t_path2, o_path) logger.debug("renamed(%r, %r)", t_path2, o_path) except OSError, exc: if exc.errno == 18: logger.debug("resorting to patient_move(%r, %r)", t_path2, o_path, exc_info=True) patient_move(t_path2, o_path) logger.debug("patient_move succeeded") else: logger.critical("rename failed (%r -> %r)", t_path2, o_path, exc_info=True) raise return o_path else: # for debugging, leave temp file, copy to output shutil.copy(t_path2, o_path) logger.info("copied %r -> %r", t_path2, o_path) return o_path