def getResults(response, table): if Common.exists(response, table): return response[table] elif Common.exists(response, 'results'): return response['results'] else: return response
def rip_multi_page(schema, table_key): page = count = 1 #Get one response, process it, and if it has pages then do below params = schema['static_params'] + [('page', page)] url = Common.buildUrl(schema['endpoints'][table_key], params) response = json.loads(urllib.request.urlopen(url).read()) num_pages = math.ceil(int(response['count']) / int(response['perPage'])) for page in range(1, num_pages): params = schema['static_params'] + [('page', page)] yield Common.buildUrl(schema['endpoints'][table_key], params)
def escape(string): # escape(x) if isNumber(x) else "'" + escape(x) + "'" if Common.isString(string): if (string == 'desc'): return '"desc"' else: result = str(string).split("'") return "'" + "''".join(result) + "'" else: return str(string)
def rip_aggregates(schema, table_key): from datetime import date date_format = '%Y-%m-%d' date_from = datetime.datetime.strptime('2000-01-01', date_format) date_to = datetime.datetime.strptime(date.today().strftime(date_format), date_format) num_days = (date_to - date_from).days + 1 tickers = getAllTickers(schema['db_connection_str']) for ticker in tickers: date = date_from while date < date_to: _date_to = date + datetime.timedelta(days=1) params = schema['static_params'] + [ ('asset', ticker), ('date-from', date.strftime(date_format)), ('date-to', _date_to.strftime(date_format)) ] yield Common.buildUrl(schema['endpoints'][table_key], params) date += datetime.timedelta(days=1)
def rip_ticker_detail(schema, table_key): tickers = getAllTickers(schema['db_connection_str']) for ticker in tickers: params = schema['static_params'] + [('asset', ticker)] yield Common.buildUrl(schema['endpoints'][table_key], params)
def rip_single_page(schema, table_key): yield Common.buildUrl(schema['endpoints'][table_key], schema['static_params'])
ripper = schema['endpoint_rippers']['default'] if table_key in schema['endpoint_rippers'].keys(): ripper = schema['endpoint_rippers'][table_key] urls = [] for url in ripper(schema, table_key): if task_limit >= 0 and len(urls) >= task_limit: break urls.append(url) with alive_bar(len(urls)) as bar: for url in urls: if multithreaded: tm.do_task(Common.get_response, [schema_file, table_key, url]) else: r = Common.get_response([schema_file, table_key, url]) if r is not None: db_helper.execute(r) bar() tasks_complete = 0 while not tm.all_tasks_complete(): time.sleep(0.1) for i in range(0, tm.num_completed_tasks() - tasks_complete): bar() tasks_complete = tm.num_completed_tasks() tm.reset() exit()