def BoostingStump(x, y, estimate_intercept=True, no_of_variables=None, *args, **kwargs): ''' Boosting stump with Pandas variables Input -------- x Pandas DataFrame or Series of predictive variables y Pandas DataFrame or Series of target variables estimate_intercept Boolean no_of_variables integer, the number of variables to use Output -------- ans Pandas DataFrame with predictive variable name, target variable name, stump decision point of each x variable and weight corresponding to each x variable Notes -------- The target is to maximize the success rate of predicting the sign of y in-sample, weighted by the magnitude of y. Use caution to normalize y before estimation. x variables are used in the order that they are given. If there are multiple columns in y, each column will be predicted independently. ''' myx, myy = give_me_pandas_variables(x, y) if no_of_variables is not None: if np.size(myx, 1) > no_of_variables: myx = myx.iloc[:, :no_of_variables] ans = [] for i in xrange(np.size(myy, 1)): try: ans.append(estimate_boosting_stump(myx, myy.iloc[:, i], estimate_intercept)) except Exception as e: logger.warning('Boosting stump failed at variable %s: %s' % (str(myy.columns[i]), str(e))) return pd.concat(ans, axis=0)
def get_series_all_release(series_name, start_date=None, end_date=None): read_script = get_release_read_script(RELEASE_TABLE_NAME, index_range=(start_date, end_date), column_list=[series_name]) success, data = du.execute_sql_output_script(DATABASE_NAME, read_script) if success: return pd.DataFrame(np.array(data), columns=['time_index', 'realtime_start', 'series_name', 'value']) if len(data) > 0 else None else: logger.warning('Failed to read data: ' + str(data))
def get_table_column_values(database_name, table_name, column_name=TIMESERIES_COLUMN_NAME): read_script = get_table_column_value_scripts(table_name, column_name) success, data = execute_sql_output_script(database_name, read_script) if success: return [x[0] for x in data] if len(data) > 0 else None else: logger.warning('Failed to read column names: ' + str(data))
def get_database_connection(database_name='mysql'): try: return mdb.connect(host=HOST, user=USER, passwd=PASSWORD, db=database_name) except Exception as e: logger.warning('Failed to establish databse connection: ' + str(e)) return None
def release_bulk_insert(data, table_name, series_name): delete_script = get_release_bulk_delete_script(data, table_name, series_name) e = du.execute_sql_input_script(DATABASE_NAME, delete_script) if e is not None: logger.warning('Failed to clear data from table: ' + str(e)) else: insert_script = get_release_bulk_insert_script(data, table_name, series_name) e = du.execute_sql_input_script(DATABASE_NAME, insert_script) if e is not None: logger.warning('Failed to insert data: ' + str(e))
def pandas_delete(database_name, table_name, column_name, index_name, value_name, index_range=None, column_list=None, data_name=None): delete_script = get_pandas_delete_script(table_name, column_name, index_name, value_name, index_range, column_list, data_name) e = execute_sql_input_script(database_name, delete_script) if e is not None: logger.warning('Failed to delete data from table: ' + str(e))
def pandas_read(database_name, table_name, column_name, index_name, value_name, index_range=None, column_list=None, data_name=None): read_script = get_pandas_read_script(table_name, column_name, index_name, value_name, index_range, column_list, data_name) success, data = execute_sql_output_script(database_name, read_script) if success: return get_pandas_output(data, column_name, index_name, value_name) if len(data) > 0 else None else: logger.warning('Failed to read data: ' + str(data))
def pandas_bulk_insert(data, database_name, table_name, column_name, index_name, value_name, data_name=None, data_column_name=None): delete_script = get_pandas_bulk_delete_script(data, table_name, column_name, index_name, data_name, data_column_name) e = execute_sql_input_script(database_name, delete_script) if e is not None: logger.warning('Failed to clear data from table: ' + str(e)) else: insert_script = get_pandas_bulk_insert_script(data, table_name, column_name, index_name, value_name, data_name, data_column_name) e = execute_sql_input_script(database_name, insert_script) if e is not None: logger.warning('Failed to insert data: ' + str(e))
def create_table(database_name, table_name, table_format): script = CREATE_TABLE_IF_NOT_EXISTS % (table_name, table_format) e = execute_sql_input_script(database_name, script) if e is not None: logger.warning('Failed to create table: ' + str(e))