def getValidationDataset(self): print "Reading the valid pairs" valid = data_io.read_valid_pairs() valid2 = data_io.read_valid_info() valid["A type"] = valid2["A type"] valid["B type"] = valid2["B type"] return valid
def getDataset(self): if self.getTrain: readData = data_io.read_train_pairs() readData2 = data_io.read_train_info() else: readData = data_io.read_valid_pairs() readData2 = data_io.read_valid_info() readData["A type"] = readData2["A type"] readData["B type"] = readData2["B type"] return readData
def main(): print("Reading the valid pairs") valid = data_io.read_valid_pairs() valid_info = data_io.read_valid_info() valid = pd.concat([valid, valid_info],axis =1) valid = train.get_types(valid) print("Loading the classifier") classifier = data_io.load_model() print("Making predictions") predictions = classifier.predict(valid) predictions = predictions.flatten() print("Writing predictions to file") data_io.write_submission(predictions, fn)
def main(): print("Reading the valid pairs") valid = data_io.read_valid_pairs() features = fe.feature_extractor() print("Transforming features") trans_valid = features.fit_transform(valid) trans_valid = np.nan_to_num(trans_valid) print("Saving Valid Features") data_io.save_valid_features(trans_valid) print("Loading the classifier") #(both_classifier, A_classifier, B_classifier, none_classifier) = data_io.load_model() classifier = data_io.load_model() print("Making predictions") valid_info = data_io.read_valid_info() predictions = list() curr_pred = None """ for i in range(len(trans_valid)): if valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] == "Numerical": curr_pred = both_classifier.predict_proba(trans_valid[i, :]) elif valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] != "Numerical": curr_pred = A_classifier.predict_proba(trans_valid[i, :]) elif valid_info["A type"][i] != "Numerical" and valid_info["B type"][i] == "Numerical": curr_pred = B_classifier.predict_proba(trans_valid[i, :]) else: curr_pred = none_classifier.predict_proba(trans_valid[i, :]) predictions.append(curr_pred[0][2] - curr_pred[0][0]) """ orig_predictions = classifier.predict_proba(trans_valid) predictions = orig_predictions[:, 2] - orig_predictions[:, 0] predictions = predictions.flatten() print("Writing predictions to file") data_io.write_submission(predictions)
def extract_valid_features(): start = time.time() features = feature_extractor() header = [] for h in features.features: header.append(h[0]) print("Reading the valid pairs") X = data_io.read_valid_pairs() print("Extracting features") # well, no fit data, so y = None extracted = features.fit_transform(X,y = None,type_map = data_io.read_valid_info()) elapsed = float(time.time() - start) print("Features extracted in " + str(elapsed/60.0) + " Minutes") print ("Saving features") X = pd.DataFrame(extracted, index = X.index) X.columns = header data_io.save_valid_features(X)
def extract_valid_features(): start = time.time() features = feature_extractor() header = [] for h in features.features: header.append(h[0]) print("Reading the valid pairs") X = data_io.read_valid_pairs() print("Extracting features") # well, no fit data, so y = None extracted = features.fit_transform(X, y=None, type_map=data_io.read_valid_info()) elapsed = float(time.time() - start) print("Features extracted in " + str(elapsed / 60.0) + " Minutes") print("Saving features") X = pd.DataFrame(extracted, index=X.index) X.columns = header data_io.save_valid_features(X)
probb_nz = probb[np.nonzero(probb)] jointp = np.outer(proba_nz, probb_nz) hpos = np.sum(np.log(jointp) * jointp) return -hpos if __name__ == '__main__': print 'Reading in {} data...'.format(DATA) if DATA == 'train': info = data_io.read_train_info() train = data_io.read_train_pairs() elif DATA == 'valid': info = data_io.read_valid_info() train = data_io.read_valid_pairs() else: raise ValueError print 'Saving coded info matrix...' codes = np.zeros(info.values.shape) lookup = {'Numerical': 1, 'Categorical': 2, 'Binary': 3} for i, t in enumerate(info.values): a, b = t codes[i, :] = [lookup[a], lookup[b]] savemat('matlab/{}info.mat'.format(DATA), {'codes': codes}, oned_as='column') print 'Saving value matrices...'
probb_nz = probb[np.nonzero(probb)] jointp = np.outer(proba_nz, probb_nz) hpos = np.sum(np.log(jointp) * jointp) return -hpos if __name__ == "__main__": print "Reading in {} data...".format(DATA) if DATA == "train": info = data_io.read_train_info() train = data_io.read_train_pairs() elif DATA == "valid": info = data_io.read_valid_info() train = data_io.read_valid_pairs() else: raise ValueError print "Saving coded info matrix..." codes = np.zeros(info.values.shape) lookup = {"Numerical": 1, "Categorical": 2, "Binary": 3} for i, t in enumerate(info.values): a, b = t codes[i, :] = [lookup[a], lookup[b]] savemat("matlab/{}info.mat".format(DATA), {"codes": codes}, oned_as="column") print "Saving value matrices..." for i, t in enumerate(train.values):