def clustering_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['clustering', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) X = join(datagen_write, 'X.data') Y = join(datagen_write, 'Y.data') YbyC = join(datagen_write, 'YbyC.data') C = join(datagen_write, 'C.data') nc = '50' dc = '10.0' dr = '1.0' fbf = '100.0' cbf = '100.0' config = dict(nr=row, nf=col, nc=nc, dc=dc, dr=dr, fbf=fbf, cbf=cbf, X=X, C=C, Y=Y, YbyC=YbyC, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def regression1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['regression1', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) numSamples = row numFeatures = col maxFeatureValue = '5' maxWeight = '5' loc_weights = join(datagen_write, 'weight.data') loc_data = join(datagen_write, 'X.data') loc_labels = join(datagen_write, 'Y.data') noise = '1' intercept = '0' sparsity = MATRIX_TYPE_DICT[matrix_type] tranform_labels = '1' fmt = DATA_FORMAT config = [ numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights, loc_data, loc_labels, noise, intercept, sparsity, fmt, tranform_labels ] config_writer(save_path + '.json', config) return save_path
def stats1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['stats1', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) DATA = join(datagen_write, 'X.data') TYPES = join(datagen_write, 'types') TYPES1 = join(datagen_write, 'set1.types') TYPES2 = join(datagen_write, 'set2.types') INDEX1 = join(datagen_write, 'set1.indices') INDEX2 = join(datagen_write, 'set2.indices') MAXDOMAIN = '1100' SETSIZE = '20' LABELSETSIZE = '10' # NC should be less than C and more than num0 # NC = 10 (old value) # num0 = NC/2 # num0 < NC < C # NC = C/2 NC = int(int(col)/2) config = dict(R=row, C=col, NC=NC, MAXDOMAIN=MAXDOMAIN, DATA=DATA, TYPES=TYPES, SETSIZE=SETSIZE, LABELSETSIZE=LABELSETSIZE, TYPES1=TYPES1, TYPES2=TYPES2, INDEX1=INDEX1, INDEX2=INDEX2, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def regression1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['regression1', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) numSamples = row numFeatures = col maxFeatureValue = '5' maxWeight = '5' loc_weights = join(datagen_write, 'weight.data') loc_data = join(datagen_write, 'X.data') loc_labels = join(datagen_write, 'Y.data') noise = '1' intercept = '0' sparsity = MATRIX_TYPE_DICT[matrix_type] tranform_labels = '1' fmt = DATA_FORMAT config = [numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights, loc_data, loc_labels, noise, intercept, sparsity, fmt, tranform_labels] config_writer(save_path + '.json', config) return save_path
def dimreduction_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['dimreduction', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) R = row C = col OUT = join(datagen_write, 'X.data') config = dict(R=R, C=C, OUT=OUT, FMT=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def stats2_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['stats2', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) D = join(datagen_write, 'X.data') Xcid = join(datagen_write, 'Xcid.data') Ycid = join(datagen_write, 'Ycid.data') A = join(datagen_write, 'A.data') config = dict(nr=row, nf=col, D=D, Xcid=Xcid, Ycid=Ycid, A=A, fmt=DATA_FORMAT) config_writer(save_path + '.json', config) return save_path
def multinomial_datagen(matrix_dim, matrix_type, datagen_dir, config_dir): path_name = '.'.join(['multinomial', matrix_type, str(matrix_dim)]) datagen_write = join(datagen_dir, path_name) save_path = join(config_dir, path_name) row, col = split_rowcol(matrix_dim) numSamples = row numFeatures = col sparsity = MATRIX_TYPE_DICT[matrix_type] num_categories = '150' intercept = '0' X = join(datagen_write, 'X.data') Y = join(datagen_write, 'Y.data') fmt = DATA_FORMAT config = [numSamples, numFeatures, sparsity, num_categories, intercept, X, Y, fmt, '1'] config_writer(save_path + '.json', config) return save_path