def write(): data = funcs.get_data() data['Swim'].fillna(0.0, inplace=True) data['Bike'].fillna(0, inplace=True) data['Run'].fillna(0, inplace=True) data['T1'].fillna(0, inplace=True) data['T2'].fillna(0, inplace=True) data = funcs.removeNotFinished(data) data = funcs.convertTimes(data) data['Country Name'] = data['Country'].apply(funcs.getCountryName) udisp.title_awesome('Melhor natação') d = data[data.SwimN == data.SwimN.min()] st.table(removeAddicionalColumns(d)) udisp.title_awesome('Melhor T1') d = data[data.T1N == data.T1N.min()] st.table(removeAddicionalColumns(d)) udisp.title_awesome('Melhor bike') d = data[data.BikeN == data.BikeN.min()] st.table(removeAddicionalColumns(d)) udisp.title_awesome('Melhor T2') d = data[data.T2N == data.T2N.min()] st.table(removeAddicionalColumns(d)) udisp.title_awesome('Melhor corrida') d = data[data.RunN == data.RunN.min()] st.table(removeAddicionalColumns(d))
def write(): data = funcs.get_data() data['CountryName'] = data['Country'].apply(funcs.getCountryName) categorias = funcs.getCategories(data) option = st.sidebar.selectbox("Selecione a categoria", sorted(categorias)) atletas_m = data.loc[data['Division'] == f'M{option}'] atletas_f = data.loc[data['Division'] == f'F{option}'] atletas_m.drop( columns=['Division', 'Division Rank', 'Gender', 'Gender Rank'], axis=1, inplace=True) atletas_f.drop( columns=['Division', 'Division Rank', 'Gender', 'Gender Rank'], axis=1, inplace=True) udisp.title_awesome("Top 10 registros masculinos") atletas_m.reset_index(drop=True) st.altair_chart( funcs.createStackPlot(funcs.getDataAndConvert(atletas_m.head(10)))) udisp.title_awesome("Top 10 registros femininos") atletas_f.reset_index(drop=True) st.altair_chart( funcs.createStackPlot(funcs.getDataAndConvert(atletas_f.head(10))))
def handle(self, *args, **options): sig_time, sig_1_1, sig_1_3, sig_2_1, sig_2_2, sig_3_1, sig_3_3 = get_data( options['file_name']) sig_dict = { 'time': sig_time, 'sig_1_1': sig_1_1, 'sig_1_3': sig_1_3, 'sig_2_1': sig_2_1, 'sig_2_2': sig_2_2, 'sig_3_1': sig_3_1, 'sig_3_3': sig_3_3 } Signal.objects.create(name=options['file_name'], data=sig_dict)
def write(): data = funcs.get_data() data['Country Name'] = data['Country'].apply(funcs.getCountryName) athletes = data['Name'].unique() option = st.sidebar.selectbox('Buscar atleta pelo nome:', sorted(athletes)) atleta = data.loc[data['Name'] == option] atleta = atleta.drop(['Country'], axis=1) st.table(atleta.assign(hack='').set_index('hack')) atleta = funcs.convertTimes(atleta) source = pd.DataFrame({ 'Atividade': ['Swim', 'T1', 'Bike', 'T2', 'Run'], 'TimeN': [ getValueUniq(atleta, 'SwimN'), getValueUniq(atleta, 'T1N'), getValueUniq(atleta, 'BikeN'), getValueUniq(atleta, 'T2N'), getValueUniq(atleta, 'RunN') ], 'Tempo': [ getValueUniq(atleta, 'Swim'), getValueUniq(atleta, 'T1'), getValueUniq(atleta, 'Bike'), getValueUniq(atleta, 'T2'), getValueUniq(atleta, 'Run') ], 'Ordem': [1, 2, 3, 4, 5] }) if getValueUniq(atleta, 'Overall Rank') != 'DNF': st.altair_chart( alt.Chart(source).transform_joinaggregate( TotalTime='sum(TimeN)', ).transform_calculate( PercentOfTotal="datum.TimeN / datum.TotalTime").mark_bar(). encode(x=alt.X('PercentOfTotal:Q', axis=alt.Axis(format='.0%'), title='Porcentagem do total'), y=alt.Y('Atividade:N', sort=alt.EncodingSortField(field="Ordem", order='ascending')), tooltip=['Tempo']).properties(height=450, width=700))
def write(): data = funcs.get_data() data = funcs.convertTimes(data) data = funcs.removeNotFinished(data) data['Country Name'] = data['Country'].apply(funcs.getCountryName) createPlot(data, 'Swim', 'green') createPlot(data, 'T1', 'red') createPlot(data, 'Bike', 'yellow') createPlot(data, 'T2', 'black') createPlot(data, 'Run', 'pink')
def write(): data = funcs.get_data() data = funcs.convertTimes(data) data['Country Name'] = data['Country'].apply(funcs.getCountryName) data = funcs.removeNotFinished(data) ## ## Gráfico ## athletes = data['Name'].unique() option1 = st.sidebar.selectbox('Buscar atleta pelo nome:', sorted(athletes) ) atleta1 = data.loc[data['Name'] == option1].drop(['Country'], axis=1) option2 = st.sidebar.selectbox('Segundo atleta:', sorted(athletes) ) atleta2 = data.loc[data['Name'] == option2].drop(['Country'], axis=1) df = pd.DataFrame([ {'val':'1', 'Order':1, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'Swim'), 'TempoN': funcs.getValueUniq(atleta1, 'SwimN'), 'Atividade':'Swim'}, {'val':'2', 'Order':1, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'Swim'), 'TempoN': funcs.getValueUniq(atleta2, 'SwimN'), 'Atividade':'Swim'}, {'val':'1', 'Order':2, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'T1'), 'TempoN': funcs.getValueUniq(atleta1, 'T1N'), 'Atividade':'T1'}, {'val':'2', 'Order':2, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'T1'), 'TempoN': funcs.getValueUniq(atleta2, 'T1N'), 'Atividade':'T1'}, {'val':'1', 'Order':3, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'Bike'), 'TempoN': funcs.getValueUniq(atleta1, 'BikeN'), 'Atividade':'Bike'}, {'val':'2', 'Order':3, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'Bike'), 'TempoN': funcs.getValueUniq(atleta2, 'BikeN'), 'Atividade':'Bike'}, {'val':'1', 'Order':4, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'T2'), 'TempoN': funcs.getValueUniq(atleta1, 'T2N'), 'Atividade':'T2'}, {'val':'2', 'Order':4, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'T2'), 'TempoN': funcs.getValueUniq(atleta2, 'T2N'), 'Atividade':'T2'}, {'val':'1', 'Order':5, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'Run'), 'TempoN': funcs.getValueUniq(atleta1, 'RunN'), 'Atividade':'Run'}, {'val':'2', 'Order':5, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'Run'), 'TempoN': funcs.getValueUniq(atleta2, 'RunN'), 'Atividade':'Run'} ]) base = alt.Chart(df).properties( width=400 ) color_scale = alt.Scale(domain=[option1, option2], range=['#1f77b4', '#1f77b4']) left = base.transform_filter( alt.datum.val == '1' ).encode( y=alt.Y('Atividade:N', axis=None, sort=alt.EncodingSortField(field="Order", order='ascending')), x=alt.X('sum(TempoN):Q', title='Tempo', sort=alt.SortOrder('descending')), color=alt.Color('name:N', scale=color_scale, legend=None), tooltip=['Tempo:N'], order=alt.Order( 'Order', sort='ascending' ) ).mark_bar().properties(title=option1) middle = base.encode( y=alt.Y('Atividade:N', axis=None, sort=alt.EncodingSortField(field="Order", order='ascending')), text=alt.Text('Atividade:N'), order=alt.Order( 'Order', sort='ascending' ) ).mark_text().properties(width=40) right = base.transform_filter( alt.datum.val == '2' ).encode( y=alt.Y('Atividade:N', axis=None, sort=alt.EncodingSortField(field="Order", order='ascending')), x=alt.X('sum(TempoN):Q', title='Tempo'), color=alt.Color('name:N', scale=color_scale, legend=None), tooltip=['Tempo:N'] ).mark_bar().properties(title=option2) st.altair_chart( alt.concat(left, middle, right, spacing=5) ) ## ## Tabela ## df1 = pd.DataFrame([ {'Atividade':'Swim', 'Tempo': funcs.getValueUniq(atleta1, 'Swim')}, {'Atividade':'T1', 'Tempo': funcs.getValueUniq(atleta1, 'T1')}, {'Atividade':'Bike', 'Tempo': funcs.getValueUniq(atleta1, 'Bike')}, {'Atividade':'T2', 'Tempo': funcs.getValueUniq(atleta1, 'T2')}, {'Atividade':'Run', 'Tempo': funcs.getValueUniq(atleta1, 'Run')}, {'Atividade':'Total', 'Tempo': funcs.getValueUniq(atleta1, 'Overall')} ]) df2 = pd.DataFrame([ {'Atividade':'Swim', 'Tempo': funcs.getValueUniq(atleta2, 'Swim')}, {'Atividade':'T1', 'Tempo': funcs.getValueUniq(atleta2, 'T1')}, {'Atividade':'Bike', 'Tempo': funcs.getValueUniq(atleta2, 'Bike')}, {'Atividade':'T2', 'Tempo': funcs.getValueUniq(atleta2, 'T2')}, {'Atividade':'Run', 'Tempo': funcs.getValueUniq(atleta2, 'Run')}, {'Atividade':'Total', 'Tempo': funcs.getValueUniq(atleta2, 'Overall')} ]) with Grid("1 1", color="#000000", background_color="#FFFFFF") as grid: grid.cell("a", 1, 2, 1, 2).dataframe( df1.set_index('Atividade', inplace=False) ) grid.cell("b", 2, 3, 1, 2).dataframe( df2.set_index('Atividade', inplace=False) ) # st.write(funcs.secondsToTime( funcs.getValueUniq(atleta2, 'RunN') ))
def write(): data = funcs.get_data() data = funcs.convertTimes(data) ## ## Tabela ## total = data.shape[0] df = pd.DataFrame({ 'Athletes': [total], # 'Swim Finish': [ showPorcent( 100 * ( (total - data[data['Swim'].isnull()].shape[0] ) / total) ) ], 'Swim Finish': [(total - data[data['Swim'].isnull()].shape[0])], 'Swim DNS/DNF': [ funcs.showPercent(100 * (data['Swim'].isnull() & (data['Overall Rank'].eq('DNS') | data['Overall Rank'].eq('DNF'))).mean()) ], # 'Bike Finish': [ showPorcent( 100 * ( (total - data[data['Bike'].isnull()].shape[0] ) / total) ) ], 'Bike Finish': [(total - data[data['Bike'].isnull()].shape[0])], 'Bike DNF': [ showPorcent(100 * (data['Bike'].isnull() & (data['Overall Rank'].eq('DNS') | data['Overall Rank'].eq('DNF'))).mean()) ], # 'Run Finish': [ showPorcent( 100 * ( (total - data[data['Run'].isnull()].shape[0] ) / total) ) ], 'Run Finish': [(total - data[data['Run'].isnull()].shape[0])], 'Run DNF': [ showPorcent( 100 * (data['Run'].isnull() & data['Overall Rank'].eq('DNF')).mean()) ], 'Overall DNS/DNF': [ showPorcent(100 * (data['Overall Rank'].eq('DNS') | data['Overall Rank'].eq('DNF')).mean()) ], }) df = df.assign(hack='').set_index('hack') st.table(df) ## ## Gráficos ## data = data.drop([ 'Overall', 'Run', 'Bike', 'Swim', 'T1', 'T2', 'Division Rank', 'Gender Rank' ], axis=1).sort_values(['Division'], ascending=[1]) data['Atletas'] = 1 prepareBlock(data, 'DQ') prepareBlock(data, 'DNS') prepareBlock(data, 'DNF')
def main(): if args.use_original: df = pd.read_csv(args.csv_path) if args.iid: trn, dev, tst = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))]) else: trn, dev, tst = np.split(df, [int(.6*len(df)), int(.8*len(df))]) model_path = f'./ae_regressor/best_model/{args.label_type}/norm_{args.norm_type}/{args.data_type}/img_flatten/original' else: trn_loader, dev_loader, tst_loader = get_dataloader( csv_path=args.csv_path, batch_size=args.batch_size, label_type=args.label_type, iid=args.iid, transform=args.norm_type, img_size=args.img_size ) # Create model model_path = f'./ae_regressor/best_model/{args.label_type}/norm_{args.norm_type}/{args.data_type}/CAE' autoencoder = F.create_model(args) checkpoint = torch.load(os.path.join(model_path, 'autoencoder.pkl')) autoencoder.module.load_state_dict(checkpoint['model']) encoder = autoencoder.module.encoder os.makedirs(model_path, exist_ok=True) print(f"** Training progress with {args.data_type} condition **") if args.test: load_path = os.path.join(model_path, 'lightgbm.pkl') with open(load_path, 'rb') as f: best_model = pickle.load(f) if args.use_original: x_tst, y_tst = F.get_original_data(args, tst, 'tst') _, y_trn = F.get_original_data(args, trn, 'trn') else: x_tst, y_tst = F.get_data(args, tst_loader, encoder, 'tst') _, y_trn = F.get_data(args, trn_loader, encoder, 'trn') y_pred = best_model.predict(x_tst) mae = mean_absolute_error(y_tst, y_pred) mape = mean_absolute_percentage_error(y_true=y_tst, y_pred=y_pred) print(f"[Test] MAE:{mae}, MAPE:{mape}") mean_value = np.full_like(y_tst, np.mean(y_trn)) mae = mean_absolute_error(y_tst, mean_value) mape = mean_absolute_percentage_error(y_true=y_tst, y_pred=mean_value) print(f"[Mean Value] MAE:{mae}, MAPE:{mape}") else: # train data 추출 if args.use_original: x_train, y_train = F.get_original_data(args, trn, 'trn') x_dev, y_dev = F.get_original_data(args, dev, 'dev') else: x_train, y_train = F.get_data(args, trn_loader, encoder, 'trn') x_dev, y_dev = F.get_data(args, dev_loader, encoder, 'dev') print(f'Data volumn for grid search: {len(y_train)}') d_train = lgb.Dataset(data=x_train, label = y_train) d_dev = lgb.Dataset(data=x_dev, label=y_dev) params = {} params['learning_rate'] = 0.1 params['boosting_type'] = 'gbdt' params['objective'] = 'regression_l1' params['metric'] = 'mae' params['num_leaves'] = 32 # defualt: 31 params['min_data'] = 20 # number of data in a leaf: overfitting, default: 20 params['device'] = 'cpu' params['bagging_fraction'] = 0.3 params['bagging_freq'] = 10 params['lambda_l1'] = 0.7 model = lgb.train( params=params, train_set=d_train, num_boost_round=2000, valid_sets=d_dev, verbose_eval=100, early_stopping_rounds=100 ) predict_dev = model.predict(x_dev) mae = mean_absolute_error(y_dev, predict_dev) mape = mean_absolute_percentage_error(y_dev, predict_dev) print(f"MAE: {mae}, MAPE: {mape}") with open(os.path.join(model_path, 'lightgbm.pkl'), 'wb') as f: pickle.dump(model, f)
import pandas as pd import numpy as np import altair as alt import streamlit as st import utils.display as udisp import utils.functions as funcs data = funcs.get_data() data = funcs.convertTimes(data) data = funcs.removeNotFinished(data) data['Country Name'] = data['Country'].apply(funcs.getCountryName) data.astype({'BikeN': int}) def calculeMedianFromCat(cat, n): df = data[data['Division'] == cat].head(n) aux = { 'Name': cat, 'SwimN': int(df['SwimN'].median()), 'T1N': int(df['T1N'].median()), 'BikeN': int(df['BikeN'].median()), 'T2N': int(df['T2N'].median()), 'RunN': int(df['RunN'].median()), } aux['Swim'] = funcs.secondsToTime(aux['SwimN']) aux['T1'] = funcs.secondsToTime(aux['T1N']) aux['Bike'] = funcs.secondsToTime(aux['BikeN']) aux['T2'] = funcs.secondsToTime(aux['T2N'])
def write(): data = funcs.get_data() ## ## FIRST STEP, NUMBER OF ATHLETES FROM COUNTRY ## data['Country Name'] = data['Country'].apply(funcs.getCountryName) data['Atletas'] = 1 countryes_sum = data.groupby(['Country', 'Country Name']).agg({"Atletas": np.sum}) countryes_sum_values = np.array(countryes_sum['Atletas'].tolist()) countryes_abrev = countryes_sum.index.get_level_values(0) countryes_names = countryes_sum.index.get_level_values(1) df = pd.DataFrame({ 'Country': countryes_abrev, 'Country Name': countryes_names, 'Atletas': countryes_sum_values }) udisp.title_awesome("Quantidade de Atletas por país") bars = alt.Chart(df).mark_bar().encode( alt.Y('Atletas', type='quantitative', title='Quantidade de Atletas'), alt.X('Country Name:N', title='País'), tooltip=['Atletas']).properties(height=450) #, width=700 text = bars.mark_text( align='left', baseline='middle', dx=2 # Nudges text to right so it doesn't appear on top of the bar ).encode(text='Country') (bars + text).properties(widht=600) st.altair_chart(bars) ## ## PAÍSES COM MAIS ATLETAS ## udisp.title_awesome('Top 10 países com mais atletas') total = data.shape[0] df['Porcentagem'] = 0 # df['Porcentagem'].apply(lambda x: calcPorcent(total, x['Atletas']), axis=1 ) df['Porcentagem'] = df.apply( lambda x: funcs.showPercent(funcs.calcPercent(total, x['Atletas'])), axis=1) df = df.sort_values('Atletas', ascending=False).reset_index().assign( hack='').set_index('hack').drop(['index'], axis=1).head(10) st.table(df) ## ## PAÍSES COM MAIS VENCEDORES NAS CATEGORIAS ## udisp.title_awesome('Top países com mais vitórias') categorias = funcs.getCategories(data) totalCat = len(categorias) df = data[data['Division Rank'].eq('1')].groupby( ['Country', 'Country Name']).agg({"Atletas": np.sum}) df = df.rename({'Atletas': 'Vitórias'}, axis='columns') df = df.sort_values('Vitórias', ascending=False).reset_index().assign( hack='').set_index('hack').head(10) df['Porcentagem'] = df.apply(lambda x: funcs.showPercent( funcs.calcPercent(totalCat, x['Vitórias'])), axis=1) st.table(df) option = st.sidebar.selectbox("Selecione a categoria", sorted(categorias)) Atletas_m = data.loc[data['Division'] == f'M{option}'] Atletas_f = data.loc[data['Division'] == f'F{option}'] Atletas_m['Gender'] = 'Masculino' Atletas_f['Gender'] = 'Feminino' q = st.sidebar.selectbox("Quantidade de Atletas", [5, 10, 15, 20]) udisp.title_awesome(f'Top {q} atletas por país na categoria') division_sum_m = Atletas_m.head(int(q)).groupby( ['Country', 'Country Name', 'Gender']).agg({"Atletas": np.sum}) division_sum_f = Atletas_f.head(int(q)).groupby( ['Country', 'Country Name', 'Gender']).agg({"Atletas": np.sum}) topAtletas = pd.concat([division_sum_f, division_sum_m]) source = pd.DataFrame({ 'abrev': topAtletas.index.get_level_values(0), 'name': topAtletas.index.get_level_values(1), 'Sexo': topAtletas.index.get_level_values(2), 'Atletas': np.array(topAtletas['Atletas'].tolist()) }) c = alt.Chart(source).mark_bar().encode( x=alt.X('Sexo:N', axis=alt.Axis(title=None)), y=alt.Y('Atletas:Q', axis=alt.Axis(offset=1)), #, scale=alt.Scale(round=True) color='Sexo:N', column=alt.Column('name:N', title='País', header=alt.Header(labelAngle=270, labelAlign='right'))) st.altair_chart(c) # with Grid("1 1 1", color="#000000", background_color="#FFFFFF") as grid: # grid.cell("a", 1, 2, 1, 2).markdown("**Masculino**") # grid.cell("b", 2, 3, 1, 2).markdown("**Feminino**") # grid.cell("c", 1, 2, 2, 3).dataframe(division_sum_m) # grid.cell("d", 2, 3, 2, 3).dataframe(division_sum_f) ## ## Países com mais Atletas entre os top 5 ## udisp.title_awesome(f'Top {q} atletas por país em todas categorias') data = funcs.removeNotFinished(data) data = data.astype({"Division Rank": int}) data = data[data['Division Rank'] <= q] countryes_sum = data.groupby(['Country', 'Country Name', 'Division']).agg({"Atletas": np.sum}) # st.table( countryes_sum ) source = pd.DataFrame({ 'abrev': countryes_sum.index.get_level_values(0), 'name': countryes_sum.index.get_level_values(1), 'Categoria': countryes_sum.index.get_level_values(2), 'Atletas': np.array(countryes_sum['Atletas'].tolist()) }) color_scale = alt.Scale( domain=np.array(countryes_sum.index.get_level_values(2)), range=["#c30d24", "#f3a583", "#cccccc", "#94c6da", "#1770ab"]) y_axis = alt.Axis( title=None, #'País', offset=1, ticks=False, minExtent=60, domain=False) c = alt.Chart(source).mark_bar().encode( x='Atletas:Q', y=alt.Y('name:N', axis=y_axis), tooltip=['Atletas', 'Categoria'], color=alt.Color( 'Categoria:N', legend=alt.Legend(title='Categoria'), scale=color_scale, )) st.altair_chart((c).properties(width=900))