def new_data_to_publish(config, section, blob): # Get the metadata for our old chunk # If necessary, fetch the existing data from S3, otherwise open a local file if ((config.has_option('main', 's3_upload') and config.getboolean('main', 's3_upload')) or (config.has_option(section, 's3_upload') and config.getboolean(section, 's3_upload'))): conn = boto.s3.connection.S3Connection() bucket = conn.get_bucket(config.get('main', 's3_bucket')) s3key = config.get(section, 's3_key') or config.get(section, 'output') key = bucket.get_key(s3key) if key is None: # most likely a new list print "{0} looks like it hasn't been uploaded to s3://{1}/{2}".format( section, bucket.name, s3key) key = boto.s3.key.Key(bucket) key.key = s3key key.set_contents_from_string("a:1:32:32\n" + 32 * '1') current = tempfile.TemporaryFile() key.get_contents_to_file(current) current.seek(0) else: current = open(config.get(section, 'output'), 'rb') old = chunk_metadata(current) current.close() new = chunk_metadata(blob) if old['checksum'] != new['checksum']: return True return False
def new_data_to_publish(config, section, blob): # Get the metadata for our old chunk # If necessary fetch the existing data from S3, otherwise open a local file if ((config.has_option('main', 's3_upload') and config.getboolean('main', 's3_upload')) or (config.has_option(section, 's3_upload') and config.getboolean(section, 's3_upload'))): conn = boto.s3.connection.S3Connection() bucket = conn.get_bucket(config.get('main', 's3_bucket')) s3key = config.get(section, 's3_key') or config.get(section, 'output') key = bucket.get_key(s3key) if key is None: # most likely a new list print("{0} looks like it hasn't been uploaded to " "s3://{1}/{2}".format(section, bucket.name, s3key)) key = boto.s3.key.Key(bucket) key.key = s3key key.set_contents_from_string("a:1:32:32\n" + 32 * '1') current = tempfile.TemporaryFile() key.get_contents_to_file(current) current.seek(0) else: current = open(config.get(section, 'output'), 'rb') old = chunk_metadata(current) current.close() new = chunk_metadata(blob) if old['checksum'] != new['checksum']: return True return False
def sendBlob(self, oid, serial): try: key = self._s3key except AttributeError: conn = boto.s3.connection.S3Connection() bucket = self._s3bucket = conn.get_bucket(bucket_name) key = self._s3key = boto.s3.key.Key( boto.s3.connection.S3Connection().get_bucket(bucket_name) ) key.key = "%s/%s/%s" % (folder, oid.encode('hex'), serial.encode('hex')) f = tempfile.TemporaryFile() key.get_contents_to_file(f) f.seek(0) def store(): yield ('receiveBlobStart', (oid, serial)) while 1: chunk = f.read(59000) if not chunk: break yield ('receiveBlobChunk', (oid, serial, chunk, )) f.close() yield ('receiveBlobStop', (oid, serial)) self.client.rpc.callAsyncIterator(store())
def fetch(s3bucket, s3key, aws_access_key, aws_secret_key, output_file, headers=None): if isinstance(output_file, basestring): output_file = open(output_file, 'w') _close_when_done_ = True else: _close_when_done_ = False connection = S3Connection(aws_access_key, aws_secret_key) bucket = connection.lookup(s3bucket) if bucket is None: sys.stderr.write( 'bucket does not exist, may be cause by incorrect credentials') return 1 key = boto.s3.key.Key(bucket, s3key) if not key.exists(): sys.stderr.write('key does not exist within given bucket') return 1 key.get_contents_to_file(output_file, headers=headers) if _close_when_done_: output_file.close() return 0
def fetch(s3bucket, s3key, aws_access_key, aws_secret_key, output_file, headers=None): if isinstance(output_file, basestring): output_file = open(output_file, 'w') _close_when_done_ = True else: _close_when_done_ = False connection = S3Connection(aws_access_key,aws_secret_key) bucket = connection.lookup(s3bucket) if bucket is None: sys.stderr.write('bucket does not exist, may be cause by incorrect credentials') return 1 key = boto.s3.key.Key(bucket, s3key) if not key.exists(): sys.stderr.write('key does not exist within given bucket') return 1 key.get_contents_to_file(output_file, headers=headers) if _close_when_done_: output_file.close() return 0
def sync(log_paths, es, location=None, db=None, force=False): """ Insert log records into Elastic Search. """ create_index_templates(es) conn = None keys = log_paths if location: log_url = urllib3.util.parse_url(location) if log_url.scheme == 's3': s3_bucket = log_url.host s3_prefix = log_url.path if s3_prefix.startswith('/'): s3_prefix = s3_prefix[1:] if conn is None: conn = boto.connect_s3() bucket = conn.get_bucket(s3_bucket) if not log_paths: keys = bucket.list(prefix=s3_prefix) else: keys = [bucket.get_key(path) for path in log_paths] for key in keys: if isinstance(key, boto.s3.key.Key): name = key.key else: name = key if force: finished = False else: db.execute('select finished from UPLOAD where key=?', (name,)) row = db.fetchone() finished = (row and row[0]) if not finished: sys.stderr.write('uploading %s...\n' % name) if isinstance(key, boto.s3.key.Key): with tempfile.TemporaryFile() as temp_file: key.get_contents_to_file(temp_file) temp_file.seek(0) (successes, errors) = sync_fileobj(temp_file, es, name) else: with open(name, 'rb') as fileobj: (successes, errors) = sync_fileobj(fileobj, es, name) sys.stdout.write('%s: %d successes, %d errors\n' % (name, successes, len(errors))) if errors: sys.stdout.write('error: %s\n' % str(errors)) else: row_data = (datetime.now().isoformat(), name, True) db.execute( 'INSERT OR REPLACE into UPLOAD (dt,key,finished) VALUES (?,?,?)', row_data) sys.stdout.write('%s: cache results\n' % name) else: sys.stdout.write('%s: cached (skipping)\n' % name)
def sync(log_paths, es, location=None, db=None, force=False): """ Insert log records into Elastic Search. """ create_index_templates(es) conn = None keys = log_paths if location: log_url = urllib3.util.parse_url(location) if log_url.scheme == 's3': s3_bucket = log_url.host s3_prefix = log_url.path if s3_prefix.startswith('/'): s3_prefix = s3_prefix[1:] if conn is None: conn = boto.connect_s3() bucket = conn.get_bucket(s3_bucket) if not log_paths: keys = bucket.list(prefix=s3_prefix) else: keys = [bucket.get_key(path) for path in log_paths] for key in keys: if isinstance(key, boto.s3.key.Key): name = key.key else: name = key if force: finished = False else: db.execute('select finished from UPLOAD where key=?', (name,)) row = db.fetchone() finished = (row and row[0]) if not finished: sys.stdout.write('uploading %s...\n' % name) if isinstance(key, boto.s3.key.Key): with tempfile.TemporaryFile() as temp_file: key.get_contents_to_file(temp_file) temp_file.seek(0) (successes, errors) = sync_fileobj(temp_file, es, name) else: with open(name, 'rb') as fileobj: (successes, errors) = sync_fileobj(fileobj, es, name) sys.stdout.write('successes: %s\n' % str(successes)) sys.stdout.write('errors: %s\n' % str(errors)) if not errors: row_data = (datetime.now().isoformat(), name, True) db.execute( 'INSERT OR REPLACE into UPLOAD (dt,key,finished) VALUES (?,?,?)', row_data) sys.stdout.write('done %s\n' % name) else: sys.stdout.write('skipping %s\n' % name)
def new_data_to_publish_to_s3(config, section, new): # Get the metadata for our old chunk # If necessary fetch the existing data from S3, otherwise open a local file if ((config.has_option('main', 's3_upload') and config.getboolean('main', 's3_upload')) or (config.has_option(section, 's3_upload') and config.getboolean(section, 's3_upload'))): conn = boto.s3.connection.S3Connection() bucket = conn.get_bucket(config.get('main', 's3_bucket')) s3key = config.get(section, 's3_key') or config.get(section, 'output') key = bucket.get_key(s3key) if key is None: # most likely a new list print('{0} looks like it hasn\'t been uploaded to ' 's3://{1}/{2}'.format(section, bucket.name, s3key)) key = boto.s3.key.Key(bucket) key.key = s3key key.set_contents_from_string('a:1:32:32\n' + 32 * '1') current = tempfile.TemporaryFile() key.get_contents_to_file(current) key.set_acl('bucket-owner-full-control') if CLOUDFRONT_USER_ID is not None: key.add_user_grant('READ', CLOUDFRONT_USER_ID) current.seek(0) else: current = open(config.get(section, 'output'), 'rb') old = chunk_metadata(current) current.close() s3_upload_needed = False if old['checksum'] != new['checksum']: s3_upload_needed = True return s3_upload_needed
def _fetch_file(data, source, file): if source.startswith('s3://'): key = _get_key(data, source) key.get_contents_to_file(file) else: raise NotImplementedError