def glm_gamma_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir): save_path = join(config_dir, save_folder_name) predict_write = join(predict_dir, save_folder_name) dfam = '1' link = '1' vpow = '2' lpow = '0' X = join(datagen_dir, 'X_test.data') B = join(train_dir, 'B.data') Y = join(datagen_dir, 'Y_test.data') M = join(predict_write, 'M.data') O = join(predict_write, 'O.data') config = dict(dfam=dfam, link=link, vpow=vpow, lpow=lpow, fmt=DATA_FORMAT, X=X, B=B, Y=Y, M=M, O=O) config_writer(save_path + '.json', config) return save_path
def stats1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['stats1', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) DATA = join(datagen_write, 'X.data') TYPES = join(datagen_write, 'types') TYPES1 = join(datagen_write, 'set1.types') TYPES2 = join(datagen_write, 'set2.types') INDEX1 = join(datagen_write, 'set1.indices') INDEX2 = join(datagen_write, 'set2.indices') MAXDOMAIN = '1100' SETSIZE = '20' LABELSETSIZE = '10' # NC should be less than C and more than num0 # NC = 10 (old value) # num0 = NC/2 # num0 < NC < C # NC = C/2 NC = int(int(col)/2) config = dict(R=row, C=col, NC=NC, MAXDOMAIN=MAXDOMAIN, DATA=DATA, TYPES=TYPES, SETSIZE=SETSIZE, LABELSETSIZE=LABELSETSIZE, TYPES1=TYPES1, TYPES2=TYPES2, INDEX1=INDEX1, INDEX2=INDEX2, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def binomial_multilogreg_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) data_folders = [] for i in [0, 1, 2]: icpt = str(i) reg = '0.01' tol = '0.0001' moi = '100' mii = '5' X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') B = join(train_write + '.' + str(i), 'B.data') config = dict(X=X, Y=Y, icpt=icpt, reg=reg, tol=tol, moi=moi, mii=mii, B=B) config_writer(save_path + '.' + str(i) + '.json', config) data_folders.append(save_path + '.' + str(i)) return data_folders
def regression1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['regression1', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) numSamples = row numFeatures = col maxFeatureValue = '5' maxWeight = '5' loc_weights = join(datagen_write, 'weight.data') loc_data = join(datagen_write, 'X.data') loc_labels = join(datagen_write, 'Y.data') noise = '1' intercept = '0' sparsity = MATRIX_TYPE_DICT[matrix_type] tranform_labels = '1' fmt = DATA_FORMAT config = [ numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights, loc_data, loc_labels, noise, intercept, sparsity, fmt, tranform_labels ] config_writer(save_path + '.json', config) return save_path
def regression1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['regression1', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) numSamples = row numFeatures = col maxFeatureValue = '5' maxWeight = '5' loc_weights = join(datagen_write, 'weight.data') loc_data = join(datagen_write, 'X.data') loc_labels = join(datagen_write, 'Y.data') noise = '1' intercept = '0' sparsity = MATRIX_TYPE_DICT[matrix_type] tranform_labels = '1' fmt = DATA_FORMAT config = [numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights, loc_data, loc_labels, noise, intercept, sparsity, fmt, tranform_labels] config_writer(save_path + '.json', config) return save_path
def clustering_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['clustering', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) X = join(datagen_write, 'X.data') Y = join(datagen_write, 'Y.data') YbyC = join(datagen_write, 'YbyC.data') C = join(datagen_write, 'C.data') nc = '50' dc = '10.0' dr = '1.0' fbf = '100.0' cbf = '100.0' config = dict(nr=row, nf=col, nc=nc, dc=dc, dr=dr, fbf=fbf, cbf=cbf, X=X, C=C, Y=Y, YbyC=YbyC, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def binomial_l2_svm_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) data_folders = [] for i in [0, 1]: icpt = str(i) reg = '0.01' tol = '0.0001' maxiter = '100' X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') model = join(train_write + '.' + str(i), 'model.data') Log = join(train_write + '.' + str(i), 'Log.data') config = dict(X=X, Y=Y, icpt=icpt, reg=reg, tol=tol, maxiter=maxiter, model=model, Log=Log, fmt=DATA_FORMAT) config_writer(save_path + '.' + str(i) + '.json', config) data_folders.append(save_path + '.' + str(i)) return data_folders
def stats1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['stats1', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) DATA = join(datagen_write, 'X.data') TYPES = join(datagen_write, 'types') TYPES1 = join(datagen_write, 'set1.types') TYPES2 = join(datagen_write, 'set2.types') INDEX1 = join(datagen_write, 'set1.indices') INDEX2 = join(datagen_write, 'set2.indices') MAXDOMAIN = '1100' SETSIZE = '20' LABELSETSIZE = '10' # NC should be less than C and more than num0 # NC = 10 (old value) # num0 = NC/2 # num0 < NC < C # NC = C/2 NC = int(int(col)/2) config = dict(R=row, C=col, NC=NC, MAXDOMAIN=MAXDOMAIN, DATA=DATA, TYPES=TYPES, SETSIZE=SETSIZE, LABELSETSIZE=LABELSETSIZE, TYPES1=TYPES1, TYPES2=TYPES2, INDEX1=INDEX1, INDEX2=INDEX2, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def regression1_linearregcg_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) data_folders = [] for i in [0, 1, 2]: icpt = str(i) reg = '0.01' tol = '0.0001' maxi = '20' X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') B = join(train_write + '.' + str(i), 'B.data') config = dict(X=X, Y=Y, B=B, icpt=icpt, fmt=DATA_FORMAT, maxi=maxi, tol=tol, reg=reg) config_writer(save_path + '.' + str(i) + '.json', config) data_folders.append(save_path + '.' + str(i)) return data_folders
def regression2_glm_poisson_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) data_folders = [] for i in [0, 1, 2]: X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') B = join(train_write + '.' + str(i), 'B.data') icpt = str(i) fmt = DATA_FORMAT moi = '200' mii = '5' dfam = '1' vpov = '1' link = '1' lpow = '0' tol = '0.0001' reg = '0.01' config = dict(X=X, Y=Y, B=B, icpt=icpt, fmt=fmt, moi=moi, mii=mii, dfam=dfam, vpov=vpov, link=link, lpow=lpow, tol=tol, reg=reg) config_writer(save_path + '.' + str(i) + '.json', config) data_folders.append(save_path + '.' + str(i)) return data_folders
def l2_svm_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir): save_path = join(config_dir, save_folder_name) X = join(datagen_dir, 'X_test.data') Y = join(datagen_dir, 'Y_test.data') icpt = save_folder_name.split('.')[-1] model = join(train_dir, 'model.data') config = dict(X=X, Y=Y, icpt=icpt, model=model, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def l2_svm_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir): save_path = join(config_dir, save_folder_name) X = join(datagen_dir, 'X_test.data') Y = join(datagen_dir, 'Y_test.data') icpt = save_folder_name.split('.')[-1] model = join(train_dir, 'model.data') config = dict(X=X, Y=Y, icpt=icpt, model=model, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def stats1_univar_stats_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) X = join(datagen_dir, 'X.data') TYPES = join(datagen_dir, 'types') STATS = join(train_write, 'STATS.data') config = dict(X=X, TYPES=TYPES, STATS=STATS) config_writer(save_path + '.json', config) return [save_path]
def stats2_stratstats_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) X = join(datagen_dir, 'X.data') Xcid = join(datagen_dir, 'Xcid.data') Ycid = join(datagen_dir, 'Ycid.data') O = join(train_write, 'O.data') config = dict(X=X, Xcid=Xcid, Ycid=Ycid, O=O, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return [save_path]
def stats1_univar_stats_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) X = join(datagen_dir, 'X.data') TYPES = join(datagen_dir, 'types') STATS = join(train_write, 'STATS.data') config = dict(X=X, TYPES=TYPES, STATS=STATS) config_writer(save_path + '.json', config) return [save_path]
def naive_bayes_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir): save_path = join(config_dir, save_folder_name) X = join(datagen_dir, 'X_test.data') Y = join(datagen_dir, 'Y_test.data') prior = join(train_dir, 'prior') conditionals = join(train_dir, 'conditionals') probabilities = join(train_dir, 'probabilities') config = dict(X=X, Y=Y, prior=prior, conditionals=conditionals, fmt=DATA_FORMAT, probabilities=probabilities) config_writer(save_path + '.json', config) return save_path
def stats1_bivar_stats_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) X = join(datagen_dir, 'X.data') index1 = join(datagen_dir, 'set1.indices') index2 = join(datagen_dir, 'set2.indices') types1 = join(datagen_dir, 'set1.types') types2 = join(datagen_dir, 'set2.types') config = dict(X=X, index1=index1, index2=index2, types1=types1, types2=types2, OUTDIR=train_write) config_writer(save_path + '.json', config) return [save_path]
def naive_bayes_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir): save_path = join(config_dir, save_folder_name) X = join(datagen_dir, 'X_test.data') Y = join(datagen_dir, 'Y_test.data') prior = join(train_dir, 'prior') conditionals = join(train_dir, 'conditionals') probabilities = join(train_dir, 'probabilities') config = dict(X=X, Y=Y, prior=prior, conditionals=conditionals, fmt=DATA_FORMAT, probabilities=probabilities) config_writer(save_path + '.json', config) return save_path
def dimreduction_pca_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) INPUT = join(datagen_dir, 'X.data') SCALE = '1' PROJDATA = '1' OUTPUT = join(train_write, 'Output.data') config = dict(INPUT=INPUT, SCALE=SCALE, PROJDATA=PROJDATA, OUTPUT=OUTPUT, OFMT=DATA_FORMAT) config_writer(save_path + '.json', config) return [save_path]
def stats2_stratstats_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) X = join(datagen_dir, 'X.data') Xcid = join(datagen_dir, 'Xcid.data') Ycid = join(datagen_dir, 'Ycid.data') O = join(train_write, 'O.data') config = dict(X=X, Xcid=Xcid, Ycid=Ycid, O=O, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return [save_path]
def clustering_kmeans_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) X = join(datagen_dir, 'X.data') C = join(train_write, 'C.data') k = '50' maxi = '50' tol = '0.0001' config = dict(X=X, k=k, maxi=maxi, tol=tol, C=C) config_writer(save_path + '.json', config) return [save_path]
def kmeans_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir): save_path = join(config_dir, save_folder_name) predict_write = join(predict_dir, save_folder_name) X = join(datagen_dir, 'X_test.data') C = join(datagen_dir, 'C.data') prY = join(predict_write, 'prY.data') config = dict(X=X, C=C, prY=prY) config_writer(save_path + '.json', config) return save_path
def kmeans_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir): save_path = join(config_dir, save_folder_name) predict_write = join(predict_dir, save_folder_name) X = join(datagen_dir, 'X_test.data') C = join(datagen_dir, 'C.data') prY = join(predict_write, 'prY.data') config = dict(X=X, C=C, prY=prY) config_writer(save_path + '.json', config) return save_path
def clustering_kmeans_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) X = join(datagen_dir, 'X.data') C = join(train_write, 'C.data') k = '50' maxi = '50' tol = '0.0001' config = dict(X=X, k=k, maxi=maxi, tol=tol, C=C) config_writer(save_path + '.json', config) return [save_path]
def dimreduction_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['dimreduction', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) R = row C = col OUT = join(datagen_write, 'X.data') config = dict(R=R, C=C, OUT=OUT, FMT=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def multilogreg_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir): save_path = join(config_dir, save_folder_name) X = join(datagen_dir, 'X_test.data') Y = join(datagen_dir, 'Y_test.data') B = join(train_dir, 'B.data') M = join(train_dir, 'M.data') dfam = '3' vpow = '-1' link = '2' config = dict(dfam=dfam, vpow=vpow, link=link, fmt=DATA_FORMAT, X=X, B=B, Y=Y, M=M) config_writer(save_path + '.json', config) return save_path
def multinomial_naive_bayes_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') classes = '150' prior = join(train_write, 'prior') conditionals = join(train_write, 'conditionals') accuracy = join(train_write, 'accuracy') probabilities = join(train_write, 'probabilities') config = dict(X=X, Y=Y, classes=classes, prior=prior, conditionals=conditionals, accuracy=accuracy, fmt=DATA_FORMAT, probabilities=probabilities) config_writer(save_path + '.json', config) return [save_path]
def regression1_linearregds_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) data_folders = [] for i in [0, 1, 2]: icpt = str(i) reg = '0.01' X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') B = join(train_write + '.' + str(i), 'B.data') config = dict(X=X, Y=Y, B=B, icpt=icpt, fmt=DATA_FORMAT, reg=reg) config_writer(save_path + '.' + str(i) + '.json', config) data_folders.append(save_path + '.' + str(i)) return data_folders
def multilogreg_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir): save_path = join(config_dir, save_folder_name) X = join(datagen_dir, 'X_test.data') Y = join(datagen_dir, 'Y_test.data') B = join(train_dir, 'B.data') M = join(train_dir, 'M.data') dfam = '3' vpow = '-1' link = '2' config = dict(dfam=dfam, vpow=vpow, link=link, fmt=DATA_FORMAT, X=X, B=B, Y=Y, M=M) config_writer(save_path + '.json', config) return save_path
def stats2_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['stats2', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) D = join(datagen_write, 'X.data') Xcid = join(datagen_write, 'Xcid.data') Ycid = join(datagen_write, 'Ycid.data') A = join(datagen_write, 'A.data') config = dict(nr=row, nf=col, D=D, Xcid=Xcid, Ycid=Ycid, A=A, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def stats2_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['stats2', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) D = join(datagen_write, 'X.data') Xcid = join(datagen_write, 'Xcid.data') Ycid = join(datagen_write, 'Ycid.data') A = join(datagen_write, 'A.data') config = dict(nr=row, nf=col, D=D, Xcid=Xcid, Ycid=Ycid, A=A, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def glm_binomial_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir): save_path = join(config_dir, save_folder_name) predict_write = join(predict_dir, save_folder_name) dfam = '2' link = '3' X = join(datagen_dir, 'X_test.data') B = join(train_dir, 'B.data') Y = join(datagen_dir, 'Y_test.data') M = join(predict_write, 'M.data') O = join(predict_write, 'O.data') config = dict(dfam=dfam, link=link, fmt=DATA_FORMAT, X=X, B=B, Y=Y, M=M, O=O) config_writer(save_path + '.json', config) return save_path
def linearregds_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir): save_path = join(config_dir, save_folder_name) predict_write = join(predict_dir, save_folder_name) dfam = '1' link = '1' vpow = '0.0' lpow = '1.0' X = join(datagen_dir, 'X_test.data') B = join(train_dir, 'B.data') Y = join(datagen_dir, 'Y_test.data') M = join(predict_write, 'M.data') O = join(predict_write, 'O.data') config = dict(dfam=dfam, link=link, vpow=vpow, lpow=lpow, fmt=DATA_FORMAT, X=X, B=B, Y=Y, M=M, O=O) config_writer(save_path + '.json', config) return save_path
def dimreduction_pca_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) INPUT = join(datagen_dir, 'X.data') SCALE = '1' PROJDATA = '1' OUTPUT = join(train_write, 'Output.data') config = dict(INPUT=INPUT, SCALE=SCALE, PROJDATA=PROJDATA, OUTPUT=OUTPUT, OFMT=DATA_FORMAT) config_writer(save_path + '.json', config) return [save_path]
def multinomial_multilogreg_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) data_folders = [] for i in [0, 1, 2]: icpt = str(i) reg = '0.01' tol = '0.0001' moi = '100' mii = '0' X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') B = join(train_write + '.' + str(i), 'B.data') config = dict(X=X, Y=Y, B=B, icpt=icpt, reg=reg, tol=tol, moi=moi, mii=mii, fmt=DATA_FORMAT) config_writer(save_path + '.' + str(i) + '.json', config) data_folders.append(save_path + '.' + str(i)) return data_folders
def stats1_bivar_stats_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) X = join(datagen_dir, 'X.data') index1 = join(datagen_dir, 'set1.indices') index2 = join(datagen_dir, 'set2.indices') types1 = join(datagen_dir, 'set1.types') types2 = join(datagen_dir, 'set2.types') config = dict(X=X, index1=index1, index2=index2, types1=types1, types2=types2, OUTDIR=train_write) config_writer(save_path + '.json', config) return [save_path]
def binomial_l2_svm_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) data_folders = [] for i in [0, 1]: icpt = str(i) reg = '0.01' tol = '0.0001' maxiter = '100' X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') model = join(train_write + '.' + str(i), 'model.data') Log = join(train_write + '.' + str(i), 'Log.data') config = dict(X=X, Y=Y, icpt=icpt, reg=reg, tol=tol, maxiter=maxiter, model=model, Log=Log, fmt=DATA_FORMAT) config_writer(save_path + '.' + str(i) + '.json', config) data_folders.append(save_path + '.' + str(i)) return data_folders
def multinomial_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['multinomial', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) numSamples = row numFeatures = col sparsity = MATRIX_TYPE_DICT[matrix_type] num_categories = '150' intercept = '0' X = join(datagen_write, 'X.data') Y = join(datagen_write, 'Y.data') fmt = DATA_FORMAT config = [numSamples, numFeatures, sparsity, num_categories, intercept, X, Y, fmt, '1'] config_writer(save_path + '.json', config) return save_path
def clustering_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['clustering', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) X = join(datagen_write, 'X.data') Y = join(datagen_write, 'Y.data') YbyC = join(datagen_write, 'YbyC.data') C = join(datagen_write, 'C.data') nc = '50' dc = '10.0' dr = '1.0' fbf = '100.0' cbf = '100.0' config = dict(nr=row, nf=col, nc=nc, dc=dc, dr=dr, fbf=fbf, cbf=cbf, X=X, C=C, Y=Y, YbyC=YbyC, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def multinomial_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['multinomial', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) numSamples = row numFeatures = col sparsity = MATRIX_TYPE_DICT[matrix_type] num_categories = '150' intercept = '0' X = join(datagen_write, 'X.data') Y = join(datagen_write, 'Y.data') fmt = DATA_FORMAT config = [numSamples, numFeatures, sparsity, num_categories, intercept, X, Y, fmt, '1'] config_writer(save_path + '.json', config) return save_path
def regression2_glm_poisson_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) data_folders = [] for i in [0, 1, 2]: X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') B = join(train_write + '.' + str(i), 'B.data') icpt = str(i) fmt = DATA_FORMAT moi = '200' mii = '5' dfam = '1' vpov = '1' link = '1' lpow = '0' tol = '0.0001' reg = '0.01' config = dict(X=X, Y=Y, B=B, icpt=icpt, fmt=fmt, moi=moi, mii=mii, dfam=dfam, vpov=vpov, link=link, lpow=lpow, tol=tol, reg=reg) config_writer(save_path + '.' + str(i) + '.json', config) data_folders.append(save_path + '.' + str(i)) return data_folders
def multinomial_naive_bayes_train(save_folder_name, datagen_dir, train_dir, config_dir): save_path = join(config_dir, save_folder_name) train_write = join(train_dir, save_folder_name) X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') classes = '150' prior = join(train_write, 'prior') conditionals = join(train_write, 'conditionals') accuracy = join(train_write, 'accuracy') probabilities = join(train_write, 'probabilities') config = dict(X=X, Y=Y, classes=classes, prior=prior, conditionals=conditionals, accuracy=accuracy, fmt=DATA_FORMAT, probabilities=probabilities) config_writer(save_path + '.json', config) return [save_path]