def get_dynamic_k_for_experiment(name): conn = db_connection.get_conn() df = pd.read_sql_query( "select k_value from data_science.experiments_dynamic_ratios where name = '{name}'" .format(name=name), conn) db_connection.put_conn(conn) return df.k_value[0]
def get_experiment_agg_data(experiment_id, start_date = '2020-01-11', end_date = None, filters = {}, ordering = None): ''' Filters the input table for a specific experiment_id ''' conn = db_connection.get_conn() print('get_experiment_agg_data experiment_id: ' + str(experiment_id) + ' start_date: ' + str(start_date) + ' end_date: ' + str(end_date)) start_date = clean_date(start_date) end_date = clean_date(end_date) filters_clause = "" filters_array = [] input_table = 'data_science.experiments_aggregate_base' if 'city' in filters: filters_array.append("city = '{selected_city}'".format(selected_city=filters['city'])) # filters_clause = " and city = '{selected_city}'".format(selected_city=filters['city']) input_table = 'data_science.experiments_aggregate_base_per_city' if 'distinct_dates_7' in filters: filters_array.append("distinct_dates_7 >= 7") if 'distinct_dates_14' in filters: filters_array.append("distinct_dates_7 >= 14") if 'alternative_ratio' in filters: filters_array.append("alternative_ratio > 0") if filters_array != []: filters_clause = ' AND ' + ' and '.join(filters_array) order_clause = '' if not ordering is None: order_clause = 'order by {ordering}'.format(ordering=ordering) query = """ select * from {input_table} where fee_experiment_id = {selected_experiment} and date >= '{start_date}' {end_date_clause} {filters_clause} {order_clause} """.format(selected_experiment=experiment_id,input_table=input_table,start_date=start_date,end_date_clause=_build_end_date_clause(end_date),filters_clause=filters_clause,order_clause=order_clause) # print(query) df_agg = pd.read_sql_query(query, conn) df_agg = adjust_zero_ratios(adjust_dataframe_fields(df_agg)) db_connection.put_conn(conn) return df_agg
def get_alternatives_for_experiment(experiment_id, start_date = '2020-01-11', end_date = None): conn = db_connection.get_conn() start_date = clean_date(start_date) end_date = clean_date(end_date) df = pd.read_sql_query(""" select max(alternative_ratio) as alternative_ratio, alternative, alternative_id from data_science.experiments_aggregate_base_per_city where fee_experiment_id = {selected_experiment} and date >= '{start_date}' {end_date_clause} group by alternative, alternative_id """.format(selected_experiment=experiment_id,start_date=start_date,end_date_clause=_build_end_date_clause(end_date)), conn) db_connection.put_conn(conn) return df
def get_alternative_agg_data(alternative_id, start_date, end_date, filters = {}): conn = db_connection.get_conn() baseline_alternative = sample_baseline_for_alternative(alternative_id, conn) df_dates = sample_valid_dates(alternative_id, baseline_alternative, conn, start_date, end_date) dates_str = ','.join(map(lambda x: "'{x}'".format(x=x.strftime('%Y-%m-%d')), df_dates['date'].values[:-1])) alternative_ids = [str(alternative_id), str(baseline_alternative)] location_filter = "" if 'city' in filters: location_filter = " AND city = '{city}'".format(city=filters['city']) base_selection = get_consolidated_orders_data_sql() + """ and created_day in ({dates}) and alternative_id in ({alternatives}) {location_filter} """.format(alternatives=','.join(alternative_ids),dates=dates_str,location_filter=location_filter) df_cum_results = pd.read_sql_query(get_cumulative_agg_results_sql(base_selection, postgres=True, initial_date=df_dates['date'].values[0], final_date=df_dates['date'].values[-2]), conn) df_cum_results = adjust_dataframe_fields(df_cum_results) db_connection.put_conn(conn) return df_cum_results
def get_experiment_agg_data_per_city(experiment_id, start_date, end_date=None): conn = db_connection.get_conn() start_date = clean_date(start_date) end_date = clean_date(end_date) aux = pd.read_sql_query(""" select sum(orders)::integer as order_id, city, city_id from data_science.experiments_aggregate_base_per_city where fee_experiment_id = {selected_experiment} and date >= '{start_date}' {end_date_clause} group by city, city_id order by 1 desc """.format(selected_experiment=experiment_id,start_date=start_date,end_date_clause=_build_end_date_clause(end_date)), conn) db_connection.put_conn(conn) return aux