Esempio n. 1
0
def generate_labels():
        fp = FileProcessor(testing_data_filepath, ' ')
        rows = fp.parse_input_file()
        expected = []
        for row in rows:
                expected.append(row[0])
        
        if fp.generate_output(labels_output_filepath, expected):
                return True
Esempio n. 2
0
def test_logistic_regression(w):
        if not generate_labels():
                return
        fp = FileProcessor(testing_data_filepath, ' ')
        rows = fp.parse_input_file()
        output = []
        expected = []
        labels = get_labels()
        
        for row in rows:
             expected.append(row[0])
             row = row[1:]
             sum_val = w[0]
             for feature in row:
                     feature_id = int(feature.split(':')[0])
                     sum_val += w[feature_id]
             
             if sigmoid(sum_val) >= 0.5:
                     output.append(labels[0])
             else:
                     output.append(labels[1])
        
        if fp.generate_output(output_filepath, output):
                print 'Successfully generated predictions.lr'
def get_vocabulary_size():
        fp = FileProcessor(vocabulary_filepath, ' ')
        lines = fp.parse_input_file()
        return len(lines)
def get_labels():
        fp = FileProcessor(labels_filepath, ' ')
        lines = fp.parse_input_file()
        return [float(lines[0][1]), float(lines[1][1])]
Esempio n. 5
0
def learn_logistic_regression():
        fp = FileProcessor(training_data_filepath, ' ')
        training_corpus = fp.parse_input_file()
        return learn_lr_classifier(training_corpus)