def get_access_raw(marker=None, end=None, limit=10000, chunk_output=False): """ This iterates over all logs in a container Useful when there are greater than 10,000 logs in a container, as most swift servers are configured to limit each request to that number. """ c = StandardClient(**settings.swiftly_config) processing = True while processing: result = c.get_container(settings.LOG_CONTAINER, marker=marker, end_marker=end, limit=limit)[-1] if result: marker = result[-1]['name'] if chunk_output: yield result else: for obj in result: yield obj else: processing = False
def get_log_data(name): """ Function to get the log lines for processing :param name: (string) Name of the log in the access_raw container """ c = StandardClient(**settings.swiftly_config) res = c.get_object(settings.LOG_CONTAINER, name, stream=False) gz = gzip.GzipFile(mode='rb', fileobj=io.BytesIO(res[-1])) with io.TextIOWrapper(io.BufferedReader(gz)) as f: for line in f: if line_is_valid(line): yield line
def _get_url(shortcode): """Retrive url from cache or swift""" longurl = cache.get(shortcode) if longurl: return longurl else: cf = StandardClient( auth_url=app.config['CF_AUTH_URL'], auth_user=app.config['CF_USERNAME'], auth_key=app.config['CF_API_KEY'], snet=app.config['USE_SNET'], auth_cache_path=app.config['SWIFTLY_AUTH_CACHE_PATH'], eventlet=app.config['USE_EVENTLET'], region=app.config['CF_REGION'], verbose=_swiftlyv ) res = cf.head_object(app.config['CF_CONTAINER'], shortcode) if not res[0] == 200: return None else: longurl = res[2].get('x-object-meta-longurl', None) cache.set(shortcode, longurl) return longurl
from StringIO import StringIO from swiftly.client import StandardClient import os import random import gzip import datetime import cPickle as pickle import sys # from custom_bloom import filter_logs, get_logs, CustomBloomFilter from custom_bloom_filter import filter_logs, get_logs, CustomBloomFilter, SIZE_OF_BLOOMFILTER, NO_OF_HASH_FUNCTION client = StandardClient( auth_url='https://swauth.ord1.swift.racklabs.com/auth/v1.0', auth_user='******', auth_key='VHZmEKSJm6nNs', insecure=True) def get_objects_by_date_range(start="2017/04/16/10", end="2017/04/16/11"): container = client.get_container("access_raw", marker=start, end_marker=end, decode_json=True) c = container[2] object_list = container[3] print "- " * 5 print "-- Container Stats --" print "Total size of access_raw: {}".format(
@app.route('/') def index(): return render_template('index.html') @app.errorhandler(404) def page_not_found(e): return render_template('index.html', error=404), 404 if __name__ == '__main__': try: cf = StandardClient( auth_url=app.config['CF_AUTH_URL'], auth_user=app.config['CF_USERNAME'], auth_key=app.config['CF_API_KEY'], snet=app.config['USE_SNET'], auth_cache_path=app.config['SWIFTLY_AUTH_CACHE_PATH'], eventlet=app.config['USE_EVENTLET'], region=app.config['CF_REGION'], verbose=_swiftlyv ) # Create the container if it doesn't exist if cf.head_container(app.config['CF_CONTAINER'])[0] != 204: cf.put_container(app.config['CF_CONTAINER']) except Exception as err: print "Got -> %s" % err sys.exit(1) app.run(host='0.0.0.0', debug=app.config['DEBUG'])