Exemple #1
0
    def test_load_treated(self):
        data = tcu_io.load_treated_csv_to_numpy(TREATED_TCU_FILE)
        self.assertEquals(29202, len(data))

        names = ('Cluster', 'ChavePregao', 'UASG', 'PregoeiroOficial',
                 'AceitoPara_CNPJ', 'PeloMenorLance', 'ValorReferencia', 
                 'GanhoPregao', 'SuperFaturamento')

        for column_name in names:
            self.assertEquals(len(data[column_name]), 29202) 
def main(tcu_fpath):
    data = tcu_io.load_treated_csv_to_numpy(tcu_fpath)
    print(stats.pearsonr(data['ValorReferencia'], data['PeloMenorLance']))
    print(stats.pearsonr(data['ValorReferencia'], data['GanhoPregao']))
    print(stats.pearsonr(data['GanhoPregao'], data['PeloMenorLance']))
    
    to_corr_cat = [('SuperFaturamento', 'PregoeiroOficial'),
                   ('SuperFaturamento', 'AceitoPara_CNPJ'),
                   ('SuperFaturamento', 'Cluster')]
    
    for pair in to_corr_cat:
        row = pair[0]
        col = pair[1]
        
        vals_row = set(data[row])
        vals_col = set(data[col])
        
        n_rows = len(vals_row)
        n_cols = len(vals_col)
        
        #Creating table
        contingency_table = np.ndarray(shape=(n_rows, n_cols), dtype='i')
        for i, possible_row in enumerate(vals_row):
            for j, possible_col in enumerate(vals_col):
                #Create boolean arrays with lines that contain the values
                with_both = \
                    (data[row] == possible_row) & (data[col] == possible_col) 
                num_occur = with_both.sum()
                
                contingency_table[i, j] = num_occur
        
        print('Correlating %s', pair)
        chi2, p, dof, e =  contingency.chi2_contingency(contingency_table)
        
        print('Correlation', pair, ': chi2 = %f; p = %f; df = %f;' \
              %(chi2, p, dof))