Ejemplo n.º 1
0
def preLoadData():
	x_train_file_name = FILE_X_TRAIN
	x_dev_file_name = FILE_X_DEV
	y_train_file_name = FILE_Y_TRAIN
	y_dev_file_name = FILE_Y_DEV
	y_bin_train_file_name = FILE_Y_TRAIN_BIN
	y_bin_dev_file_name = FILE_Y_DEV_BIN
	if len(sys.argv) == 5:
		x_train_file_name = sys.argv[1]
		x_dev_file_name = sys.argv[2]
		y_train_file_name = sys.argv[3]
		y_dev_file_name = sys.argv[4]
	elif len(sys.argv) == 3:
		x_train_file_name = sys.argv[1]
		x_dev_file_name = sys.argv[2]
	elif len(sys.argv) == 2:    
		 x_dev_file_name = sys.argv[1]

	X_train, y_train, X_dev, y_dev, y_bin_train, y_bin_dev = read_train_dev_files_with_binary(x_train_file_name, x_dev_file_name, y_train_file_name, y_dev_file_name, y_bin_train_file_name, y_bin_dev_file_name)
	return X_train, y_train, X_dev, y_dev, y_bin_train, y_bin_dev	
y_dev_file_name = "data/splitted/y/devY.txt"

y_bin_train_file_name = "data/splitted/bin_Y/trainY.txt"
y_bin_dev_file_name = "data/splitted/bin_Y/devY.txt"

if len(sys.argv) == 5:
    x_train_file_name = sys.argv[1]
    x_dev_file_name = sys.argv[2]
    y_train_file_name = sys.argv[3]
    y_dev_file_name = sys.argv[4]
elif len(sys.argv) == 3:
    x_train_file_name = sys.argv[1]
    x_dev_file_name = sys.argv[2]

X_train, y_train, X_dev, y_dev, y_bin_train, y_bin_dev = read_train_dev_files_with_binary(
    x_train_file_name, x_dev_file_name, y_train_file_name, y_dev_file_name,
    y_bin_train_file_name, y_bin_dev_file_name)
n_feats = len(X_train[0])

# We perform feature selection first
numFeatsFn = lambda n: int(ceil(sqrt(n_feats)))


def reliefPostProc(X, y):
    scores = reliefF.reliefF(X, y)
    indexes = range(0, len(scores))
    pairedScores = zip(scores, indexes)
    pairedScores = sorted(pairedScores, reverse=True)
    return np.array([eaPair[1]
                     for eaPair in pairedScores][:numFeatsFn(n_feats)])