def pickle_load(self, path): if path.startswith('s3://'): sys.stderr.flush() path = path[5:].split('/') bucket = path[0] key = '/'.join(path[1:]) return self.pickle_from_s3(bucket, key) else: try: with open(path, 'r') as F: return pickle.load(F) except Exception as e: print execute.traceback_exception(e) return None
def numpy_from_s3(self, bucket, key): try: path = self.tmppath('npy') if self.__from_s3(bucket, key, path): data = numpy.load(path) os.remove(path) return data else: return None except Exception as e: print execute.traceback_exception(e) if os.path.exists(path): os.remove(path) return None
def csv_load(self, path): if path.startswith('s3://'): sys.stderr.flush() path = path[5:].split('/') bucket = path[0] key = '/'.join(path[1:]) return self.csv_from_s3(bucket, key) else: try: with open(path, 'r') as F: return map(lambda line: line.split('\n')[0].split(','), F.readlines()) except Exception as e: print execute.traceback_exception(e) return None
def archive_s3(self, bucket, key, prefix): try: path = self.tmppath('tar.gz') if archive(path, prefix, self.config['tmpdir']) and self.__to_s3(bucket, key, path): os.remove(path) return True else: if os.path.exists(path): os.remove(path) return False except Exception as e: print execute.traceback_exception(e) if os.path.exists(path): os.remove(path) return False
def pickle_from_s3(self, bucket, key): try: path = self.tmppath('pkl') if self.__from_s3(bucket, key, path): with open(path, 'r') as F: data = pickle.load(F) os.remove(path) return data else: return None except Exception as e: print execute.traceback_exception(e) if os.path.exists(path): os.remove(path) return None
def csv_from_s3(self, bucket, key): try: path = self.tmppath('csv') if self.__from_s3(bucket, key, path): with open(path, 'r') as F: data = map(lambda line: line.split('\n')[0].split(','), F.readlines()) os.remove(path) return data else: return None except Exception as e: print execute.traceback_exception(e) if os.path.exists(path): os.remove(path) return None
def numpy_to_s3(self, bucket, key, data): try: path = self.tmppath('npy') numpy.save(path, data) if self.__to_s3(bucket, key, path): os.remove(path) return True else: return False except Exception as e: print execute.traceback_exception(e) if os.path.exists(path): os.remove(path) return False
def unarchive(self, path, target_dir = None): if path.startswith('s3://'): path = path[5:].split('/') bucket = path[0] key = '/'.join(path[1:]) return self.unarchive_s3(bucket, key, target_dir) else: try: if target_dir is None: target_dir = '{}/{}'.format(self.config['tmpdir'], uuid.uuid4()) if not os.path.exists(target_dir): os.makedirs(target_dir) return unarchive(path, target_dir, self.config['tmpdir']) except Exception as e: print execute.traceback_exception(e) return None
def pickle_to_s3(self, bucket, key, data): try: path = self.tmppath('pkl') with open(path, 'w') as F: pickle.dump(data, F) if self.__to_s3(bucket, key, path): os.remove(path) return True else: return False except Exception as e: print execute.traceback_exception(e) if os.path.exists(path): os.remove(path) return False
def csv_to_s3(self, bucket, key, data): try: path = self.tmppath('csv') with open(path, 'w') as F: for row in data: F.write('{}\n'.format(','.join(map(str,row)))) if self.__to_s3(bucket, key, path): os.remove(path) return True else: return False except Exception as e: print execute.traceback_exception(e) if os.path.exists(path): os.remove(path) return False
def __to_s3(self, bucket, key, path): res = None i = 0 while res is None and i < self.retry: try: self.get_bucket(bucket).upload_file(path, key) except botocore.exceptions.NoCredentialsError as e: i += 1 time.sleep(1) except Exception as e: print execute.traceback_exception(e) res = False else: res = True if res is None: res = False return res
def unarchive_s3(self, bucket, key, target_dir = None): try: if target_dir is None: target_dir = '{}/{}'.format(self.config['tmpdir'], uuid.uuid4()) if not os.path.exists(target_dir): os.makedirs(target_dir) path = '{}.tar.gz'.format(target_dir) if self.__from_s3(bucket, key, path): res = unarchive(path, target_dir, self.config['tmpdir']) os.remove(path) return res else: return None except Exception as e: print execute.traceback_exception(e) if os.path.exists(path): os.remove(path) return None
def __from_s3(self, bucket, key, path): res = None i = 0 while res is None and i < self.retry: try: self.get_bucket(bucket).download_file(key, path) except botocore.exceptions.NoCredentialsError as e: i += 1 time.sleep(1) except Exception as e: print execute.traceback_exception(e) res = False else: if os.path.getsize(path) > 0: res = True else: res = False os.remove(path) if res is None: res = False return res