def does_period_have_data(inst, db, ssl, table, start_dt, stop_dt): assert start_dt is None or isinstance(start_dt, datetime.datetime) assert stop_dt is None or isinstance(stop_dt, datetime.datetime) if start_dt is None or stop_dt is None: ymd_filter = '' else: ymd_filter = tools.get_ymd_filter(tools.dt_to_iso8601(start_dt), tools.dt_to_iso8601(stop_dt)) ssl_opt = tools.format_ssl(ssl) sql = """ SELECT 'found-data' FROM {tab} c WHERE 1 = 1 {filter} LIMIT 1 """.format(tab=table, filter=ymd_filter) sql = ' '.join(sql.split()) cmd = """ impala-shell -i {inst} -d {db} --quiet -B {ssl} -q "{sql}" | columns | cut -f 1 """.format(inst=inst, db=db, ssl=ssl_opt, sql=sql) r = envoy.run(cmd) if r.status_code != 0: print(cmd) print(r.std_err) print(r.std_out) tools.abort("Error: does_period_have_data() failed!") else: if 'found-data' in r.std_out.strip(): return True else: return False
def get_cmd(inst, db, child_table, child_col, parent_table, parent_col, start_ts, stop_ts, ssl): if start_ts is None or stop_ts is None: filter = '' else: filter = tools.get_ymd_filter(start_ts, stop_ts) sql = """ WITH t1 AS ( \ SELECT c.{c_col} AS child_col, \ p.{p_col} AS par_col \ FROM {c_tab} c \ LEFT OUTER JOIN {p_tab} p \ ON c.{c_col} = p.{p_col} \ WHERE p.{p_col} IS NULL \ {filter} \ ) \ SELECT COALESCE(COUNT(*), 0) \ FROM t1 \ """.format(c_col=child_col, p_col=parent_col, c_tab=child_table, p_tab=parent_table, filter=filter) sql = ' '.join(sql.split()) ssl_opt = tools.format_ssl(ssl) cmd = """ impala-shell -i {inst} -d {db} --quiet -B {ssl} -q "{sql}" """.format(inst=inst, db=db, ssl=ssl_opt, sql=sql) mode = 'incremental' if filter else 'full' return cmd, mode
def test_bad_dates(self): with pytest.raises(ValueError): mod.get_ymd_filter('foo', '20160102T235959')
def test_none_input(self): assert mod.get_ymd_filter(None, '20160102T235959') == ''
def test_iso8601_ext(self): result = mod.get_ymd_filter('2016-01-02T03:04:05', '20160102T235959') assert '2016' in result
def test_iso8601_basic(self): result = mod.get_ymd_filter('20160102T000000', '20160102T235959') assert '2016' in result