예제 #1
0
 def getValidationDataset(self):
     print "Reading the valid pairs"
     valid = data_io.read_valid_pairs()
     valid2 = data_io.read_valid_info()
     valid["A type"] = valid2["A type"]
     valid["B type"] = valid2["B type"]
     return valid
 def getDataset(self):
     if self.getTrain:
         readData = data_io.read_train_pairs()
         readData2 = data_io.read_train_info()
     else:
         readData = data_io.read_valid_pairs()
         readData2 = data_io.read_valid_info()
     readData["A type"] = readData2["A type"]
     readData["B type"] = readData2["B type"]
     return readData
 def getDataset(self):
     if self.getTrain:
         readData = data_io.read_train_pairs()
         readData2 = data_io.read_train_info()
     else:
         readData = data_io.read_valid_pairs()
         readData2 = data_io.read_valid_info()
     readData["A type"] = readData2["A type"]
     readData["B type"] = readData2["B type"]
     return readData
예제 #4
0
def main():
    print("Reading the valid pairs") 
    valid = data_io.read_valid_pairs()
    valid_info = data_io.read_valid_info()
    valid = pd.concat([valid, valid_info],axis =1) 
    valid = train.get_types(valid)

    print("Loading the classifier")
    classifier = data_io.load_model()

    print("Making predictions") 
    predictions = classifier.predict(valid)
    predictions = predictions.flatten()

    print("Writing predictions to file")
    data_io.write_submission(predictions, fn)
예제 #5
0
def main():
    print("Reading the valid pairs")
    valid = data_io.read_valid_pairs()
    features = fe.feature_extractor()
    print("Transforming features")
    trans_valid = features.fit_transform(valid)
    trans_valid = np.nan_to_num(trans_valid)

    print("Saving Valid Features")
    data_io.save_valid_features(trans_valid)

    print("Loading the classifier")
    #(both_classifier, A_classifier, B_classifier, none_classifier) = data_io.load_model()
    classifier = data_io.load_model()

    print("Making predictions")
    valid_info = data_io.read_valid_info()
    predictions = list()
    curr_pred = None
    """
    for i in range(len(trans_valid)):
      
      if valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] == "Numerical":
        curr_pred = both_classifier.predict_proba(trans_valid[i, :])
      
      elif valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] != "Numerical":
        curr_pred = A_classifier.predict_proba(trans_valid[i, :])
      
      elif valid_info["A type"][i] != "Numerical" and valid_info["B type"][i] == "Numerical":
        curr_pred = B_classifier.predict_proba(trans_valid[i, :])
     
      else:
        curr_pred = none_classifier.predict_proba(trans_valid[i, :])

      predictions.append(curr_pred[0][2] - curr_pred[0][0])
    """

    orig_predictions = classifier.predict_proba(trans_valid)
    predictions = orig_predictions[:, 2] - orig_predictions[:, 0]
    predictions = predictions.flatten()

    print("Writing predictions to file")
    data_io.write_submission(predictions)
예제 #6
0
def main():
    print("Reading the valid pairs") 
    valid = data_io.read_valid_pairs()
    features = fe.feature_extractor()
    print("Transforming features")
    trans_valid = features.fit_transform(valid)
    trans_valid = np.nan_to_num(trans_valid)

    print("Saving Valid Features")
    data_io.save_valid_features(trans_valid)

    print("Loading the classifier")
    #(both_classifier, A_classifier, B_classifier, none_classifier) = data_io.load_model()
    classifier = data_io.load_model()

    print("Making predictions")
    valid_info = data_io.read_valid_info() 
    predictions = list()
    curr_pred = None
    """
    for i in range(len(trans_valid)):
      
      if valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] == "Numerical":
        curr_pred = both_classifier.predict_proba(trans_valid[i, :])
      
      elif valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] != "Numerical":
        curr_pred = A_classifier.predict_proba(trans_valid[i, :])
      
      elif valid_info["A type"][i] != "Numerical" and valid_info["B type"][i] == "Numerical":
        curr_pred = B_classifier.predict_proba(trans_valid[i, :])
     
      else:
        curr_pred = none_classifier.predict_proba(trans_valid[i, :])

      predictions.append(curr_pred[0][2] - curr_pred[0][0])
    """

    orig_predictions = classifier.predict_proba(trans_valid)
    predictions = orig_predictions[:, 2] - orig_predictions[:, 0]
    predictions = predictions.flatten()

    print("Writing predictions to file")
    data_io.write_submission(predictions)
예제 #7
0
파일: fe.py 프로젝트: diogo149/causality
def extract_valid_features():
    start = time.time()
    features = feature_extractor()
    header = []
    for h in features.features:
        header.append(h[0])


    print("Reading the valid pairs")
    X = data_io.read_valid_pairs()

    print("Extracting features")
    # well, no fit data, so y = None
    extracted = features.fit_transform(X,y = None,type_map = data_io.read_valid_info())


    elapsed = float(time.time() - start)
    print("Features extracted in " + str(elapsed/60.0) + " Minutes")

    print ("Saving features")
    X = pd.DataFrame(extracted, index = X.index)
    X.columns = header
    data_io.save_valid_features(X)
예제 #8
0
def extract_valid_features():
    start = time.time()
    features = feature_extractor()
    header = []
    for h in features.features:
        header.append(h[0])

    print("Reading the valid pairs")
    X = data_io.read_valid_pairs()

    print("Extracting features")
    # well, no fit data, so y = None
    extracted = features.fit_transform(X,
                                       y=None,
                                       type_map=data_io.read_valid_info())

    elapsed = float(time.time() - start)
    print("Features extracted in " + str(elapsed / 60.0) + " Minutes")

    print("Saving features")
    X = pd.DataFrame(extracted, index=X.index)
    X.columns = header
    data_io.save_valid_features(X)
예제 #9
0
    probb_nz = probb[np.nonzero(probb)]

    jointp = np.outer(proba_nz, probb_nz)
    hpos = np.sum(np.log(jointp) * jointp)
    return -hpos


if __name__ == '__main__':

    print 'Reading in {} data...'.format(DATA)

    if DATA == 'train':
        info = data_io.read_train_info()
        train = data_io.read_train_pairs()
    elif DATA == 'valid':
        info = data_io.read_valid_info()
        train = data_io.read_valid_pairs()
    else:
        raise ValueError

    print 'Saving coded info matrix...'
    codes = np.zeros(info.values.shape)
    lookup = {'Numerical': 1, 'Categorical': 2, 'Binary': 3}
    for i, t in enumerate(info.values):
        a, b = t
        codes[i, :] = [lookup[a], lookup[b]]

    savemat('matlab/{}info.mat'.format(DATA), {'codes': codes},
            oned_as='column')

    print 'Saving value matrices...'
예제 #10
0
    probb_nz = probb[np.nonzero(probb)]

    jointp = np.outer(proba_nz, probb_nz)
    hpos = np.sum(np.log(jointp) * jointp)
    return -hpos


if __name__ == "__main__":

    print "Reading in {} data...".format(DATA)

    if DATA == "train":
        info = data_io.read_train_info()
        train = data_io.read_train_pairs()
    elif DATA == "valid":
        info = data_io.read_valid_info()
        train = data_io.read_valid_pairs()
    else:
        raise ValueError

    print "Saving coded info matrix..."
    codes = np.zeros(info.values.shape)
    lookup = {"Numerical": 1, "Categorical": 2, "Binary": 3}
    for i, t in enumerate(info.values):
        a, b = t
        codes[i, :] = [lookup[a], lookup[b]]

    savemat("matlab/{}info.mat".format(DATA), {"codes": codes}, oned_as="column")

    print "Saving value matrices..."
    for i, t in enumerate(train.values):