def write_syn_dataset(csvPathname, rowCount, colCount, SEED):
    # 8 random generators, 1 per column
    r1 = random.Random(SEED)
    dsf = open(csvPathname, "w+")

    for i in range(rowCount):
        rowData = []
        rowSum = 0
        for j in range(colCount):
            if BASE == 2:
                # 50/50
                # r = h2o_util.choice_with_probability([(0, .5), (1, .5)])
                # 98/2
                r = h2o_util.choice_with_probability([(0, .98), (1, .2)])
            else:
                raise Exception("Unsupported BASE: " + BASE)

            rowSum += r

            rowData.append(r)

        responseVar = rowSum % BASE
        # make r a many-digit real, so gzip compresses even more better!
        rowData.append('%#034.32e' % responseVar)
        rowDataCsv = ",".join(map(str, rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
Esempio n. 2
0
def write_syn_dataset(csvPathname, rowCount, colCount, SEED):
    # 8 random generators, 1 per column
    r1 = random.Random(SEED)
    dsf = open(csvPathname, "w+")

    for i in range(rowCount):
        rowData = []
        rowSum = 0
        for j in range(colCount):
            if BASE == 2:
                # 50/50
                # r = h2o_util.choice_with_probability([(0, .5), (1, .5)])
                # 98/2
                r = h2o_util.choice_with_probability([(0, 0.98), (1, 0.2)])
            else:
                raise Exception("Unsupported BASE: " + BASE)

            rowSum += r

            rowData.append(r)

        responseVar = rowSum % BASE
        # make r a many-digit real, so gzip compresses even more better!
        rowData.append("%#034.32e" % responseVar)
        rowDataCsv = ",".join(map(str, rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
Esempio n. 3
0
def write_syn_dataset(csvPathname, rowCount, colCount, SEED):
    # 8 random generatators, 1 per column
    r1 = random.Random(SEED)
    dsf = open(csvPathname, "w+")

    for i in range(rowCount):
        rowData = []
        for j in range(colCount):
            r = h2o_util.choice_with_probability([(1.1, .05), (0.1, .95)])
            rowData.append(r)

        rowDataCsv = ",".join(map(str, rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
def write_syn_dataset(csvPathname, rowCount, colCount, SEED):
    # 8 random generatators, 1 per column
    r1 = random.Random(SEED)
    dsf = open(csvPathname, "w+")

    for i in range(rowCount):
        rowData = []
        for j in range(colCount):
            r = h2o_util.choice_with_probability([(1.1, .05), (0.1, .95)])
            rowData.append(r)

        rowDataCsv = ",".join(map(str,rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
Esempio n. 5
0
def write_syn_dataset(csvPathname, rowCount, colCount, SEED):
    # 8 random generatators, 1 per column
    r1 = random.Random(SEED)
    dsf = open(csvPathname, "w+")

    for i in range(rowCount):
        rowData = []
        for j in range(colCount):
            # r = h2o_util.choice_with_probability([(1.1, .02), (0.1, .98)])
            r = h2o_util.choice_with_probability([(1, ONE_RATE), (0, 1 - ONE_RATE)])
            # make r a many-digit real, so gzip compresses even more better!
            # rowData.append('%#034.32e' % r)
            rowData.append('%.1f' % r)

        rowDataCsv = ",".join(map(str,rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
def write_syn_dataset(csvPathname, rowCount, colCount, SEED):
    # 8 random generatators, 1 per column
    r1 = random.Random(SEED)
    dsf = open(csvPathname, "w+")

    for i in range(rowCount):
        rowData = []
        for j in range(colCount):
            # r = h2o_util.choice_with_probability([(1.1, .02), (0.1, .98)])
            r = h2o_util.choice_with_probability([(1, ONE_RATE), (0, 1 - ONE_RATE)])
            # make r a many-digit real, so gzip compresses even more better!
            # rowData.append('%#034.32e' % r)
            rowData.append('%.1f' % r)

        rowDataCsv = ",".join(map(str,rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
Esempio n. 7
0
def write_syn_dataset(csvPathname, rowCount, colCount, SEED):
    # 8 random generators, 1 per column
    r1 = random.Random(SEED)
    dsf = open(csvPathname, "w+")

    for i in range(rowCount):
        rowData = []
        rowSum = 0
        for j in range(colCount):
            if BASE==2:
                # we're just doing 50/50 for now, unlike the print says above
                r = h2o_util.choice_with_probability([(0, .5), (1, .5)])
            else:
                raise Exception("Unsupported BASE: " + BASE)

            rowSum += r
            rowData.append(r)

        responseVar = rowSum % BASE
        rowData.append(responseVar)
        rowDataCsv = ",".join(map(str,rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
Esempio n. 8
0
def write_syn_dataset(csvPathname, rowCount, colCount, SEED):
    # 8 random generators, 1 per column
    r1 = random.Random(SEED)
    dsf = open(csvPathname, "w+")

    for i in range(rowCount):
        rowData = []
        rowSum = 0
        for j in range(colCount):
            if BASE == 2:
                # we're just doing 50/50 for now, unlike the print says above
                r = h2o_util.choice_with_probability([(0, .5), (1, .5)])
            else:
                raise Exception("Unsupported BASE: " + BASE)

            rowSum += r
            rowData.append(r)

        responseVar = rowSum % BASE
        rowData.append(responseVar)
        rowDataCsv = ",".join(map(str, rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
Esempio n. 9
0
def good_choices(n):
    ch = h2o_util.choice_with_probability([(ch1,0.1), (ch2,0.1), (ch3,0.1), (ch4,0.1), (ch5,0.6)])
    return ch(n)