コード例 #1
0
def learn_lr_classifier(training_corpus):
        D = get_vocabulary_size()
        labels = get_labels()
        w = [0] * (D + 1)
        norm = 1.0
        num_iters = 0
        while norm > convergence_threshold:
                num_iters += 1
                if num_iters > max_iters:
                        break
                old_w = list(w)
                shuffled = list(training_corpus)
                shuffle(shuffled)
                for vector in shuffled:
                        label = 1.0 if float(vector[0]) == labels[0] else 0.0
                        prediction = get_prediction(vector[1:], w)
                        delta = label - prediction
                        update_weights(vector[1:], w, delta)
                norm = get_norm(w,old_w)
        return w
コード例 #2
0
def test_logistic_regression(w):
        if not generate_labels():
                return
        fp = FileProcessor(testing_data_filepath, ' ')
        rows = fp.parse_input_file()
        output = []
        expected = []
        labels = get_labels()
        
        for row in rows:
             expected.append(row[0])
             row = row[1:]
             sum_val = w[0]
             for feature in row:
                     feature_id = int(feature.split(':')[0])
                     sum_val += w[feature_id]
             
             if sigmoid(sum_val) >= 0.5:
                     output.append(labels[0])
             else:
                     output.append(labels[1])
        
        if fp.generate_output(output_filepath, output):
                print 'Successfully generated predictions.lr'