Ejemplo n.º 1
0
 def processing_dataset_train(self):
     current_directory = (os.path.dirname(__file__))
     data_directory_path = os.path.join(current_directory, '../../../',
                                        'data')
     print("data directory :", data_directory_path)
     filePath = os.path.join(data_directory_path, self.test_file)
     print("Filepath :", filePath)
     train_labels, train_features = process_data(filePath)
     print("len of labels {}".format(len(train_labels)))
     return train_labels, train_features
Ejemplo n.º 2
0
 def processing_dataset_test(self):
     current_directory = (os.path.dirname(__file__))
     data_directory_path = os.path.join(current_directory, '../../../',
                                        'data')
     print("data directory :", data_directory_path)
     filePath = os.path.join(data_directory_path, self.test_file)
     print("Filepath :", filePath)
     test_filePath = os.path.join(data_directory_path, self.test_file)
     test_labels, test_features = process_data(test_filePath)
     print("Dataset Processing Done")
     return test_labels, test_features
Ejemplo n.º 3
0
        for i in range(len(feature_pos)):
            logit += self.top_k.get_value_for_key(feature_pos[i]) * feature_val[i]
        a = self.sigmoid(logit)
        if a > 0.5:
            return 1
        else:
            return 0


if __name__ == '__main__':
    h = hpy()
    current_directory = (os.path.dirname(__file__))
    data_directory_path = os.path.join(current_directory, '..', 'data')
    fileName = "rcv1_train.binary"
    filePath = os.path.join(data_directory_path, fileName)
    labels, features = process_data(filePath)
    D = 47236
    lgr = LogisticRegression(num_features=D)
    print("len of labels {}".format(len(labels)))
    start_time = time.time()
    for epoch in range(0, 1):
        print("epoch {}".format(epoch))
        for i in range(len(labels)):
            print("i {}".format(i))
            label = labels[i]
            label = (1 + label) / 2
            example_features = features[i]
            feature_pos = [item[0] for item in example_features]
            feature_vals = [item[1] for item in example_features]
            loss = lgr.train_with_sketch(feature_pos, feature_vals, label)
            print("loss {}".format(loss))