def rank(training_set, paradigm_lengths, category_description):

    transfomer = DataTransformer(training_set, paradigm_lengths, category_description)
    headlines, matrix, targets = transfomer.get_training_data_matrix(normalize=True)
    matrix = matrix.toarray()
    estimator = svm.SVC(C=1, kernel='linear')
    selector = RFE(estimator, 1, step=1)
    selector = selector.fit(matrix, targets)
    for i in range(len(headlines)):
        print headlines[i], selector.ranking_[i]
def get_feature_percentage(training_set, paradigm_lengths, category_description):
    transfomer = DataTransformer(training_set, paradigm_lengths, category_description)
    headlines, matrix, targets = transfomer.get_training_data_matrix(normalize=True)
    matrix = matrix.toarray()
    forest = ExtraTreesClassifier(n_estimators=10)
    forest.fit(matrix, targets)
    importances = forest.feature_importances_
    std = np.std([tree.feature_importances_ for tree in forest.estimators_],
                 axis=0)
    indices = np.argsort(importances)[::-1]
    dict = {}

    for f in range(matrix.shape[1]):
        dict[headlines[indices[f]]] = importances[indices[f]]
    return dict