def write_syn_dataset(csvPathname, rowCount, colCount, SEED): # 8 random generators, 1 per column r1 = random.Random(SEED) dsf = open(csvPathname, "w+") for i in range(rowCount): rowData = [] rowSum = 0 for j in range(colCount): if BASE == 2: # 50/50 # r = h2o_util.choice_with_probability([(0, .5), (1, .5)]) # 98/2 r = h2o_util.choice_with_probability([(0, .98), (1, .2)]) else: raise Exception("Unsupported BASE: " + BASE) rowSum += r rowData.append(r) responseVar = rowSum % BASE # make r a many-digit real, so gzip compresses even more better! rowData.append('%#034.32e' % responseVar) rowDataCsv = ",".join(map(str, rowData)) dsf.write(rowDataCsv + "\n") dsf.close()
def write_syn_dataset(csvPathname, rowCount, colCount, SEED): # 8 random generators, 1 per column r1 = random.Random(SEED) dsf = open(csvPathname, "w+") for i in range(rowCount): rowData = [] rowSum = 0 for j in range(colCount): if BASE == 2: # 50/50 # r = h2o_util.choice_with_probability([(0, .5), (1, .5)]) # 98/2 r = h2o_util.choice_with_probability([(0, 0.98), (1, 0.2)]) else: raise Exception("Unsupported BASE: " + BASE) rowSum += r rowData.append(r) responseVar = rowSum % BASE # make r a many-digit real, so gzip compresses even more better! rowData.append("%#034.32e" % responseVar) rowDataCsv = ",".join(map(str, rowData)) dsf.write(rowDataCsv + "\n") dsf.close()
def write_syn_dataset(csvPathname, rowCount, colCount, SEED): # 8 random generatators, 1 per column r1 = random.Random(SEED) dsf = open(csvPathname, "w+") for i in range(rowCount): rowData = [] for j in range(colCount): r = h2o_util.choice_with_probability([(1.1, .05), (0.1, .95)]) rowData.append(r) rowDataCsv = ",".join(map(str, rowData)) dsf.write(rowDataCsv + "\n") dsf.close()
def write_syn_dataset(csvPathname, rowCount, colCount, SEED): # 8 random generatators, 1 per column r1 = random.Random(SEED) dsf = open(csvPathname, "w+") for i in range(rowCount): rowData = [] for j in range(colCount): r = h2o_util.choice_with_probability([(1.1, .05), (0.1, .95)]) rowData.append(r) rowDataCsv = ",".join(map(str,rowData)) dsf.write(rowDataCsv + "\n") dsf.close()
def write_syn_dataset(csvPathname, rowCount, colCount, SEED): # 8 random generatators, 1 per column r1 = random.Random(SEED) dsf = open(csvPathname, "w+") for i in range(rowCount): rowData = [] for j in range(colCount): # r = h2o_util.choice_with_probability([(1.1, .02), (0.1, .98)]) r = h2o_util.choice_with_probability([(1, ONE_RATE), (0, 1 - ONE_RATE)]) # make r a many-digit real, so gzip compresses even more better! # rowData.append('%#034.32e' % r) rowData.append('%.1f' % r) rowDataCsv = ",".join(map(str,rowData)) dsf.write(rowDataCsv + "\n") dsf.close()
def write_syn_dataset(csvPathname, rowCount, colCount, SEED): # 8 random generators, 1 per column r1 = random.Random(SEED) dsf = open(csvPathname, "w+") for i in range(rowCount): rowData = [] rowSum = 0 for j in range(colCount): if BASE==2: # we're just doing 50/50 for now, unlike the print says above r = h2o_util.choice_with_probability([(0, .5), (1, .5)]) else: raise Exception("Unsupported BASE: " + BASE) rowSum += r rowData.append(r) responseVar = rowSum % BASE rowData.append(responseVar) rowDataCsv = ",".join(map(str,rowData)) dsf.write(rowDataCsv + "\n") dsf.close()
def write_syn_dataset(csvPathname, rowCount, colCount, SEED): # 8 random generators, 1 per column r1 = random.Random(SEED) dsf = open(csvPathname, "w+") for i in range(rowCount): rowData = [] rowSum = 0 for j in range(colCount): if BASE == 2: # we're just doing 50/50 for now, unlike the print says above r = h2o_util.choice_with_probability([(0, .5), (1, .5)]) else: raise Exception("Unsupported BASE: " + BASE) rowSum += r rowData.append(r) responseVar = rowSum % BASE rowData.append(responseVar) rowDataCsv = ",".join(map(str, rowData)) dsf.write(rowDataCsv + "\n") dsf.close()
def good_choices(n): ch = h2o_util.choice_with_probability([(ch1,0.1), (ch2,0.1), (ch3,0.1), (ch4,0.1), (ch5,0.6)]) return ch(n)