Ejemplo n.º 1
0
            remaining.remove(response)
            selected = []
            current_score, best_new_score = 0.0, 0.0
            while remaining and current_score == best_new_score:
                scores_with_candidates = []
                for candidate in remaining:
                    formula = '{} ~ {} + 1'.format(response, ' + '.join(selected + [candidate]))
                    score = smf.ols(formula, data).fit().rsquared_adj
                    scores_with_candidates.append((score, candidate))
                scores_with_candidates.sort()
                best_new_score, best_candidate = scores_with_candidates.pop()
                if current_score > best_new_score:
                    remaining.remove(best_candidate)
                    selected.append(best_candidate)
                    current_score = best_new_score
            formula = '{} ~ {} + 1'.format(response, ' + '.join(seleted))
            model = smf.ols(formula, data).fit()
            return model

        model = forward_selected(DataSubset, 'NumberLikes24')
        print('Model Selected')
        print(model.model.formula)
        print('R2=', model.rsquared_adj)


if __name__ == "__main__":
    assignment = LinearRegression('somefile.txt')
    assignment.clean()
    assignment.summarize()
    assignment.regression()