Example #1
0
def plot(x,
         y,
         methods,
         xlabel,
         ylabel,
         filename,
         intXAxis=False,
         intYAxis=False,
         xAxis=None,
         ylim=None):
    """
  plot data.

  :param x: x axis
  :param y: y(method, x_elem) is a vector that contains raw data
  :param methods: methods to plot, each has a legend
  :param xlabel: name of xlabel
  :param ylabel: name of ylabel
  :param filename: output to filename.pdf
  :return:
  """
    if xAxis is None: xAxis = x

    yMean = lambda method: [mean(y(method, xElem)) for xElem in x]
    yCI = lambda method: [standardErr(y(method, xElem)) for xElem in x]

    fig = pylab.figure()

    ax = pylab.gca()
    print xlabel, ylabel
    for method in methods:
        print method, yMean(method), yCI(method)
        ax.errorbar(xAxis,
                    yMean(method),
                    yCI(method),
                    fmt=markers[method],
                    mfc='none',
                    label=names[method],
                    markersize=15,
                    capsize=10,
                    linewidth=2)

    pylab.xlabel(xlabel)
    pylab.ylabel(ylabel)

    if intXAxis:
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    if intYAxis:
        ax.yaxis.set_major_locator(MaxNLocator(integer=True))
    if ylim is not None:
        pylab.ylim(ylim)

    pylab.gcf().subplots_adjust(bottom=0.15, left=0.15)
    fig.savefig(filename + ".pdf", dpi=300, format="pdf")

    plotLegend()

    pylab.close()
Example #2
0
def plotNumVsProportion(pfRange, pfStep):
  """
  Plot the the number of queried features vs the proportion of free features
  """
  # fixed carpet num for this exp
  carpets = 10

  for method in methods:
    for pf in pfRange:
      lensOfQ[method, pf] = []
      times[method, pf] = []

  validInstances = []

  for rnd in range(rndSeeds):
    # set to true if this instance is valid (no safe init policy)
    rndProcessed = False

    for pf in pfRange:
      try:
        pfUb = pf + pfStep

        filename = str(width) + '_' + str(height) + '_' + str(carpets) + '_' + str(pf) + '_' + str(pfUb) + '_' + str(rnd) + '.pkl'
        data = pickle.load(open(filename, 'rb'))
      except IOError:
        print filename, 'not exist'
        continue

      # number of features queried
      for method in methods:
        lensOfQ[method, pf].append(len(data['q'][method]))
        times[method, pf].append(data['t'][method])
      
      if not rndProcessed:
        rndProcessed = True

        validInstances.append(rnd)
    
  print 'valid instances', len(validInstances)
  assert len(validInstances) > 0

  # show cases where method1 and method2 are different with proportion. for further debugging methods
  diffInstances = lambda pf, method1, method2:\
                  (pf, method1, method2,\
                  filter(lambda _: _[1] != _[2], zip(validInstances, lensOfQ[method1, pf], lensOfQ[method2, pf])))

  """
  for pf in pfRange: 
    print diffInstances(pf, 'iisAndRelpi', 'iisOnly')
  """

  # plot figure
  x = pfRange
  y = lambda method: [mean(vectorDiff(lensOfQ[method, pf], lensOfQ[methods[0], pf])) for pf in pfRange]
  yci = lambda method: [standardErr(vectorDiff(lensOfQ[method, pf], lensOfQ[methods[0], pf])) for pf in pfRange]

  plot(x, y, yci, methods, '$p_f$', '# of Queried Features (' + names[methods[0]] + ' as baseline)', 'lensOfQPf' + str(int(pfStep * 10)))
Example #3
0
def plotMeanOfRatioWrtBaseline(x,
                               y,
                               methods,
                               xlabel,
                               ylabel,
                               filename,
                               integerAxis=False,
                               xAxis=None):
    """
  plot data with a specified baseline.

  mean (value of this method / value of the baseline)
  """
    # can set displayed x to be different
    if xAxis is None: xAxis = x

    yMean = lambda method: [
        mean(vectorDivide(y(method, xElem), y(baseline, xElem))) for xElem in x
    ]
    yCI = lambda method: [
        standardErr(vectorDivide(y(method, xElem), y(baseline, xElem)))
        for xElem in x
    ]

    fig = pylab.figure()

    ax = pylab.gca()
    for method in methods:
        #print method, yMean(method), yCI(method)
        ax.errorbar(xAxis,
                    yMean(method),
                    yCI(method),
                    fmt=markers[method],
                    mfc='none',
                    label=names[method],
                    markersize=10,
                    capsize=5)

    pylab.xlabel(xlabel)
    pylab.ylabel(ylabel)

    if integerAxis:
        # x-axis should be integers
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))

    fig.savefig(filename + ".pdf", dpi=300, format="pdf")

    plotLegend()  # make sure legend is plotted somewhere

    pylab.close()
Example #4
0
def plotRatioOfMeanDiffWrtBaseline(x,
                                   y,
                                   methods,
                                   xlabel,
                                   ylabel,
                                   filename,
                                   integerAxis=False):
    """
  plot data with a specified baseline.

  mean values of this method / mean values of the baseline
  """
    yMean = lambda method: [mean(y(method, xElem)) for xElem in x]
    yCI = lambda method: [
        standardErr(vectorDiff(y(method, xElem), y(baseline, xElem)))
        for xElem in x
    ]

    fig = pylab.figure()

    ax = pylab.gca()
    for method in methods:
        #print method, yMean(method), yCI(method)
        ax.errorbar(x,
                    vectorDivide(yMean(method), yMean(baseline)),
                    vectorDivide(yCI(method), yMean(baseline)),
                    fmt=markers[method],
                    mfc='none',
                    label=names[method],
                    markersize=15,
                    capsize=5)

    pylab.xlabel(xlabel)
    pylab.ylabel(ylabel)

    if integerAxis:
        # x-axis should be integers
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))

    fig.savefig(filename + ".pdf", dpi=300, format="pdf")

    pylab.close()
Example #5
0
def plotNumVsProportion(pfRange, pfStep):
    """
  Plot the the number of queried features vs the proportion of free features
  """
    # fixed carpet num for this exp
    carpets = 10

    for method in methods:
        for pf in pfRange:
            lensOfQ[method, pf] = []
            times[method, pf] = []

    validInstances = []

    for rnd in range(rndSeeds):
        # set to true if this instance is valid (no safe init policy)
        rndProcessed = False

        for pf in pfRange:
            try:
                pfUb = pf + pfStep

                filename = str(width) + '_' + str(height) + '_' + str(
                    carpets) + '_' + str(pf) + '_' + str(pfUb) + '_' + str(
                        rnd) + '.pkl'
                data = pickle.load(open(filename, 'rb'))
            except IOError:
                print filename, 'not exist'
                continue

            # number of features queried
            for method in methods:
                lensOfQ[method, pf].append(len(data['q'][method]))
                times[method, pf].append(data['t'][method])

            if not rndProcessed:
                rndProcessed = True

                validInstances.append(rnd)

    print 'valid instances', len(validInstances)
    assert len(validInstances) > 0

    # show cases where method1 and method2 are different with proportion. for further debugging methods
    diffInstances = lambda pf, method1, method2:\
                    (pf, method1, method2,\
                    filter(lambda _: _[1] != _[2], zip(validInstances, lensOfQ[method1, pf], lensOfQ[method2, pf])))
    """
  for pf in pfRange: 
    print diffInstances(pf, 'iisAndRelpi', 'iisOnly')
  """

    # plot figure
    x = pfRange
    y = lambda method: [
        mean(vectorDiff(lensOfQ[method, pf], lensOfQ[methods[0], pf]))
        for pf in pfRange
    ]
    yci = lambda method: [
        standardErr(vectorDiff(lensOfQ[method, pf], lensOfQ[methods[0], pf]))
        for pf in pfRange
    ]

    plot(x, y, yci, methods, '$p_f$',
         '# of Queried Features (' + names[methods[0]] + ' as baseline)',
         'lensOfQPf' + str(int(pfStep * 10)))
Example #6
0
def plotNumVsCarpets():
    """
  plot the num of queried features / computation time vs. num of carpets
  """
    for method in methods:
        for carpetNum in carpetNums:
            lensOfQ[method, carpetNum] = []
            times[method, carpetNum] = []

    iisSizes = {}
    iisSizesVec = {}

    domPiSizes = {}
    domPiSizesVec = {}

    solveableIns = {}
    validInstances = {}
    for carpetNum in carpetNums:
        iisSizes[carpetNum] = util.Counter()
        iisSizesVec[carpetNum] = []

        domPiSizes[carpetNum] = util.Counter()
        domPiSizesVec[carpetNum] = []

        solveableIns[carpetNum] = util.Counter()

        validInstances[carpetNum] = []

    for rnd in range(rndSeeds):
        for carpetNum in carpetNums:
            try:
                filename = str(width) + '_' + str(height) + '_' + str(
                    carpetNum) + '_0_1_' + str(rnd) + '.pkl'
                data = pickle.load(open(filename, 'rb'))
            except IOError:
                print filename, 'not exist'
                continue

            # number of features queried
            for method in methods:
                lensOfQ[method, carpetNum].append(len(data['q'][method]))
                times[method, carpetNum].append(data['t'][method])

            validInstances[carpetNum].append(rnd)

            # print the case where ouralg is suboptimal
            if 'opt' in methods and len(data['q']['opt']) < len(
                    data['q']['iisAndRelpi']):
                print 'rnd', rnd, 'carpetNum', carpetNum, 'opt', data['q'][
                    'opt'], 'iisAndRelpi', data['q']['iisAndRelpi']

            addFreq(len(data['iiss']), iisSizes[carpetNum])
            iisSizesVec[carpetNum].append(len(data['iiss']))

            addFreq(len(data['relFeats']), domPiSizes[carpetNum])
            domPiSizesVec[carpetNum].append(len(data['relFeats']))

            addFreq(data['solvable'], solveableIns[carpetNum])

    print 'iiss', [
        round(mean(iisSizesVec[carpetNum]), 2) for carpetNum in carpetNums
    ]
    print 'relFeats', [
        round(mean(domPiSizesVec[carpetNum]), 2) for carpetNum in carpetNums
    ]

    print 'validins', [
        len(validInstances[carpetNum]) for carpetNum in carpetNums
    ]
    print 'solvable', [
        round(
            1.0 * solveableIns[carpetNum][True] /
            len(validInstances[carpetNum]), 2) for carpetNum in carpetNums
    ]

    print '# of queries'
    x = carpetNums
    # use the first method as baseline, a bit hacky here.
    y = lambda method: [
        mean(
            vectorDiff(lensOfQ[method, carpetNum], lensOfQ[methods[
                0], carpetNum])) for carpetNum in carpetNums
    ]
    yci = lambda method: [
        standardErr(
            vectorDiff(lensOfQ[method, carpetNum], lensOfQ[methods[
                0], carpetNum])) for carpetNum in carpetNums
    ]
    plot(x, y, yci, methods, '# of Carpets',
         '# of Queried Features (' + names[methods[0]] + ' as baseline)',
         'lensOfQCarpets')

    print 'compute time'
    x = carpetNums
    y = lambda method: [
        mean(times[method, carpetNum]) for carpetNum in carpetNums
    ]
    yci = lambda method: [
        standardErr(times[method, carpetNum]) for carpetNum in carpetNums
    ]
    plot(x, y, yci, methods, '# of Carpets', 'Computation Time (sec.)',
         'timesCarpets')
Example #7
0
    'random': 'c.-'
}
names = {'opt': 'Optimal', 'iisAndRelpi': 'SetCover', 'iisOnly': 'SetCover (IIS)', 'relpiOnly': 'SetCover (rel. feat.)', 'maxProb': 'Greed. Prob.',\
         'piHeu': 'Most-Likely', 'random': 'Descending'}


def addFreq(elem, counter):
    counter[elem] += 1


# output the diffierence of two vectors
vectorDiff = lambda v1, v2: map(lambda e1, e2: e1 - e2, v1, v2)

# for output as latex table
outputFormat = lambda d: '$' + str(round(mean(d), 4)) + ' \pm ' + str(
    round(standardErr(d), 4)) + '$'


def plot(x, y, yci, methods, xlabel, ylabel, filename):
    """
  general script for plotting using pylab
  """
    fig = pylab.figure()

    ax = pylab.gca()
    for method in methods:
        print method, y(method), yci(method)
        lines = ax.errorbar(x,
                            y(method),
                            yci(method),
                            fmt=markers[method],
Example #8
0
def plotNumVsCarpets():
  """
  plot the num of queried features / computation time vs. num of carpets
  """
  for method in methods:
    for carpetNum in carpetNums:
      lensOfQ[method, carpetNum] = []
      times[method, carpetNum] = []

  iisSizes = {}
  iisSizesVec = {}

  domPiSizes = {}
  domPiSizesVec = {}

  solveableIns = {}
  validInstances = {}
  for carpetNum in carpetNums:
    iisSizes[carpetNum] = util.Counter()
    iisSizesVec[carpetNum] = []

    domPiSizes[carpetNum] = util.Counter()
    domPiSizesVec[carpetNum] = []

    solveableIns[carpetNum] = util.Counter()

    validInstances[carpetNum] = []

  for rnd in range(rndSeeds):
    for carpetNum in carpetNums:
      try:
        filename = str(width) + '_' + str(height) + '_' + str(carpetNum) + '_0_1_' +  str(rnd) + '.pkl'
        data = pickle.load(open(filename, 'rb'))
      except IOError:
        print filename, 'not exist'
        continue

      # number of features queried
      for method in methods:
        lensOfQ[method, carpetNum].append(len(data['q'][method]))
        times[method, carpetNum].append(data['t'][method])

      validInstances[carpetNum].append(rnd)

      # print the case where ouralg is suboptimal
      if 'opt' in methods and len(data['q']['opt']) < len(data['q']['iisAndRelpi']):
        print 'rnd', rnd, 'carpetNum', carpetNum, 'opt', data['q']['opt'], 'iisAndRelpi', data['q']['iisAndRelpi']

      addFreq(len(data['iiss']), iisSizes[carpetNum])
      iisSizesVec[carpetNum].append(len(data['iiss']))

      addFreq(len(data['relFeats']), domPiSizes[carpetNum])
      domPiSizesVec[carpetNum].append(len(data['relFeats']))

      addFreq(data['solvable'], solveableIns[carpetNum])


  print 'iiss', [round(mean(iisSizesVec[carpetNum]), 2) for carpetNum in carpetNums]
  print 'relFeats', [round(mean(domPiSizesVec[carpetNum]), 2) for carpetNum in carpetNums]

  print 'validins', [len(validInstances[carpetNum]) for carpetNum in carpetNums]
  print 'solvable', [round(1.0 * solveableIns[carpetNum][True] / len(validInstances[carpetNum]), 2) for carpetNum in carpetNums]

  print '# of queries'
  x = carpetNums
  # use the first method as baseline, a bit hacky here.
  y = lambda method: [mean(vectorDiff(lensOfQ[method, carpetNum], lensOfQ[methods[0], carpetNum])) for carpetNum in carpetNums]
  yci = lambda method: [standardErr(vectorDiff(lensOfQ[method, carpetNum], lensOfQ[methods[0], carpetNum])) for carpetNum in carpetNums]
  plot(x, y, yci, methods, '# of Carpets', '# of Queried Features (' + names[methods[0]] + ' as baseline)', 'lensOfQCarpets')

  print 'compute time'
  x = carpetNums
  y = lambda method: [mean(times[method, carpetNum]) for carpetNum in carpetNums]
  yci = lambda method: [standardErr(times[method, carpetNum]) for carpetNum in carpetNums]
  plot(x, y, yci, methods, '# of Carpets', 'Computation Time (sec.)', 'timesCarpets')
Example #9
0
methods = (['opt'] if includeOpt else []) \
          + ['iisAndRelpi', 'iisOnly', 'relpiOnly', 'maxProb', 'piHeu'] \
          + (['random'] if includeRandom else [])

markers = {'opt': 'r*-', 'iisAndRelpi': 'bo-', 'iisOnly': 'bs--', 'relpiOnly': 'bd-.', 'maxProb': 'g^-', 'piHeu': 'm+-', 'random': 'c.-'}
names = {'opt': 'Optimal', 'iisAndRelpi': 'SetCover', 'iisOnly': 'SetCover (IIS)', 'relpiOnly': 'SetCover (rel. feat.)', 'maxProb': 'Greed. Prob.',\
         'piHeu': 'Most-Likely', 'random': 'Descending'}

def addFreq(elem, counter): counter[elem] += 1

# output the diffierence of two vectors
vectorDiff = lambda v1, v2: map(lambda e1, e2: e1 - e2, v1, v2)

# for output as latex table
outputFormat = lambda d: '$' + str(round(mean(d), 4)) + ' \pm ' + str(round(standardErr(d), 4)) + '$'

def plot(x, y, yci, methods, xlabel, ylabel, filename):
  """
  general script for plotting using pylab
  """
  fig = pylab.figure()

  ax = pylab.gca()
  for method in methods:
    print method, y(method), yci(method)
    lines = ax.errorbar(x, y(method), yci(method), fmt=markers[method], mfc='none', label=names[method], markersize=10, capsize=5)

  pylab.xlabel(xlabel)
  pylab.ylabel(ylabel)
  pylab.legend()
Example #10
0
def maximumRegretK():
    mr = {}
    nmr = {}  # normalized mr
    dmr = {}  # delta mr
    time = {}
    q = {}
    regret = {}

    validTrials = trials - len(excluded)

    relPhiNum = {}
    for n in nRange:
        for r in range(trials):
            if r in excluded: continue
            domainFileName = dataDir + 'domain_' + str(n) + '_' + str(
                r) + '.pkl'
            (relFeats, domPis,
             domPiTime) = pickle.load(open(domainFileName, 'rb'))
            relPhiNum[n, r] = len(relFeats)

    # plot distribution over # of relevant features
    print relPhiNum
    hist(relPhiNum.values(), 'brute', '', '$|\Phi_{rel}|$', 'Frequency',
         'numRelPhi')

    for n in nRange:
        print n
        for mr_type in ['mrk']:
            #mr_label = '$MR$' if mr_type == 'mr' else '$MR_k$'
            #FIXME call it MR no matter if it's MR or MR_k
            mr_label = '$MR$'
            title = "$|\Phi_?| = " + str(n) + "$"
            for method in methods:
                for k in kRange(method):
                    mr[method, k, n, mr_type] = []
                    time[method, k, n, mr_type] = []
                    q[method, k, n, mr_type] = []
                    regret[method, k, n, mr_type] = []
                    for r in range(trials):
                        if r not in excluded:
                            try:
                                ret = pickle.load(
                                    open(
                                        dataDir + method + '_' + mr_type +
                                        '_' + str(k) + '_' + str(n) + '_' +
                                        str(r) + '.pkl', 'rb'))
                                mr[method, k, n, mr_type].append(ret[mr_type])
                                time[method, k, n, mr_type].append(ret['time'])
                                q[method, k, n, mr_type].append(ret['q'])
                                #regret[method, k, n, mr_type].append(ret['regret'])
                            except IOError:
                                print 'not reading', method, k, n, r

            for method in methods:
                for k in kRange(method):
                    nmr[method, k, n, mr_type] = []
                    dmr[method, k, n, mr_type] = []
                    for r in range(validTrials):
                        normalizedmr = normalize(mr[method, k, n, mr_type][r],
                                                 mr['alg1', k, n, mr_type][r],
                                                 mr['nq', k, n, mr_type][r])
                        if normalizedmr != None:
                            nmr[method, k, n, mr_type].append(normalizedmr)

                        dmr[method, k, n,
                            mr_type].append(mr[method, k, n, mr_type][r] -
                                            mr['alg1', k, n, mr_type][r])

                    hist(dmr[method, k, n, mr_type], method,
                         legends[method] + ", k = " + str(k),
                         "$MR(\Phi_q) - MR(\Phi_q^{MMR})$", "Frequency",
                         "mrkFreq_" + method + "_" + str(n) + "_" + str(k))
            """
      print 'measured by mr/mrk'
      plot(kRange, lambda method: [mean(mr[method, _, n, mr_type]) for _ in kRange], lambda method: [standardErr(mr[method, _, n, mr_type]) for _ in kRange],
           methods, title, "k", mr_label, "mr_" + str(n) + "_" + mr_type)

      """
            print 'measured by normalized mr/mrk'
            plot(
                kRange, lambda method:
                [mean(nmr[method, _, n, mr_type])
                 for _ in kRange(method)], lambda method: [
                     standardErr(nmr[method, _, n, mr_type])
                     for _ in kRange(method)
                 ], methods, title, "k", "Normalized " + mr_label,
                "nmr_" + str(n) + "_" + mr_type)
            """

      # COMPARING WITH ALG1 for now
      print 'ratio of finding mmr-q'
      plot(kRange, lambda method: [100.0 * sum(mr[method, k, n, mr_type][_] == mr['alg1', k, n, mr_type][_] for _ in range(validTrials)) / validTrials for k in kRange], lambda _: [0.0] * len(kRange),
           methods, title, "k", "% of Finding a MMR Query", "ratiok_" + str(n) + "_" + mr_type)

      print 'measured by expected regret'
      plot(kRange, lambda method: [mean(regret[method, _, n, mr_type]) for _ in kRange], lambda method: [standardErr(regret[method, _, n, mr_type]) for _ in kRange],
           methods, title, "k", "Expected Regret", "regret_" + str(n) + "_" + mr_type)
      """

            # FIXME may require plotting brute force as well
            print 'time'
            plot(
                kRange, lambda method:
                [mean(time[method, _, n, mr_type])
                 for _ in kRange(method)], lambda method: [
                     standardErr(time[method, _, n, mr_type])
                     for _ in kRange(method)
                 ], methods, title, "k", "Computation Time (sec.)",
                "t_" + str(n) + "_" + mr_type)

    assert all(_ >= 0 for _ in regret.values())
    """
Example #11
0
def maximumRegretCVSRelPhi():
    mr = {}
    nmr = {}  # normalized mr
    time = {}
    kRange = range(1, 11)  # same for all figures

    validTrials = trials - len(excluded)

    relPhiNum = {}
    for n in nRange:
        for r in range(trials):
            if r in excluded: continue
            domainFileName = dataDir + 'domain_' + str(n) + '_' + str(
                r) + '.pkl'
            (relFeats, domPis,
             domPiTime) = pickle.load(open(domainFileName, 'rb'))
            relPhiNum[n, r] = len(relFeats)

    #print relPhiNum

    # granularity of x axis
    gran = 1
    bins = range(max(nRange) / gran + 1)

    for k in kRange:
        print k

        for mr_type in ['mrk']:
            #mr_label = '$MR$' if mr_type == 'mr' else '$MR_k$'
            #FIXME call it MR no matter if it's MR or MR_k
            mr_label = '$MR$'
            title = "$k = " + str(k) + "$"
            print mr_type

            xScatter = {}
            yScatter = {}
            for method in methods:
                xScatter[method] = []
                yScatter[method] = []
                for bin in bins:
                    mr[method, k, bin] = []
                    time[method, k, bin] = []

                for n in nRange:
                    for r in range(trials):
                        if r in excluded: continue
                        #FIXME weird to read the data file multiple times, but we are representing in a different way. should be fine.
                        ret = pickle.load(
                            open(
                                dataDir + method + '_' + mr_type + '_' +
                                str(k) + '_' + str(n) + '_' + str(r) + '.pkl',
                                'rb'))
                        mr[method, k,
                           relPhiNum[n, r] / gran].append(ret[mr_type])
                        time[method, k,
                             relPhiNum[n, r] / gran].append(ret['time'])

                        xScatter[method].append(relPhiNum[n, r])
                        yScatter[method].append(ret[mr_type])

            for method in methods:
                for bin in bins:
                    nmr[method, k, bin] = []
                    for r in range(len(mr['alg1', k, bin])):
                        normalizedmr = normalize(mr[method, k, bin][r],
                                                 mr['alg1', k, bin][r],
                                                 mr['nq', k, bin][r])
                        if normalizedmr != None:
                            nmr[method, k, bin].append(normalizedmr)
            """
      print 'measured by mr/mrk'
      plot([_ * gran for _ in bins], lambda method: [mean(mr[method, k, _]) for _ in bins], lambda method: [standardErr(mr[method, k, _]) for _ in bins],
           methods, title, "|$\Phi_{rel}$|", mr_label, "mrc_" + str(k) + "_" + mr_type)
      """

            print 'measured by normalized mr/mrk'
            plot([_ * gran for _ in bins],
                 lambda method: [mean(nmr[method, k, _]) for _ in bins],
                 lambda method: [standardErr(nmr[method, k, _]) for _ in bins],
                 methods, title, "|$\Phi_{rel}$|", "Normalized " + mr_label,
                 "nmrc_" + str(k) + "_" + mr_type)

            #scatter(xScatter, yScatter, methods, title, "|$\Phi_{rel}$|", "Maximum Regret (" + mr_label + ")", "mrc_" + str(k) + "_" + mr_type)
            """
Example #12
0
def maximumRegretK():
  mr = {}
  nmr = {} # normalized mr
  dmr = {} # delta mr
  time = {}
  q = {}
  regret = {}
  
  validTrials = trials - len(excluded)

  relPhiNum = {}
  for n in nRange:
    for r in range(trials):
      if r in excluded: continue
      domainFileName = dataDir + 'domain_' + str(n) + '_' + str(r) + '.pkl'
      (relFeats, domPis, domPiTime) = pickle.load(open(domainFileName, 'rb'))
      relPhiNum[n, r] = len(relFeats)
  
  # plot distribution over # of relevant features
  print relPhiNum
  hist(relPhiNum.values(), 'brute', '', '$|\Phi_{rel}|$', 'Frequency', 'numRelPhi')
 
  for n in nRange:
    print n
    for mr_type in ['mrk']:
      #mr_label = '$MR$' if mr_type == 'mr' else '$MR_k$'
      #FIXME call it MR no matter if it's MR or MR_k
      mr_label = '$MR$'
      title = "$|\Phi_?| = " + str(n) + "$"
      for method in methods:
        for k in kRange(method):
          mr[method, k, n, mr_type] = []
          time[method, k, n, mr_type] = []
          q[method, k, n, mr_type] = []
          regret[method, k, n, mr_type] = []
          for r in range(trials):
            if r not in excluded:
              try:
                ret = pickle.load(open(dataDir + method + '_' + mr_type + '_' + str(k) + '_' + str(n) + '_' + str(r) + '.pkl', 'rb'))
                mr[method, k, n, mr_type].append(ret[mr_type])
                time[method, k, n, mr_type].append(ret['time'])
                q[method, k, n, mr_type].append(ret['q'])
                #regret[method, k, n, mr_type].append(ret['regret'])
              except IOError:
                print 'not reading', method, k, n, r

      for method in methods:
        for k in kRange(method):
          nmr[method, k, n, mr_type] = []
          dmr[method, k, n, mr_type] = []
          for r in range(validTrials):
            normalizedmr = normalize(mr[method, k, n, mr_type][r], mr['alg1', k, n, mr_type][r], mr['nq', k, n, mr_type][r])
            if normalizedmr != None:
              nmr[method, k, n, mr_type].append(normalizedmr)
            
            dmr[method, k, n, mr_type].append(mr[method, k, n, mr_type][r] - mr['alg1', k, n, mr_type][r])
            
          hist(dmr[method, k, n, mr_type], method, legends[method] + ", k = " + str(k), "$MR(\Phi_q) - MR(\Phi_q^{MMR})$", "Frequency",
                       "mrkFreq_" + method + "_" + str(n) + "_" + str(k))
        
      """
      print 'measured by mr/mrk'
      plot(kRange, lambda method: [mean(mr[method, _, n, mr_type]) for _ in kRange], lambda method: [standardErr(mr[method, _, n, mr_type]) for _ in kRange],
           methods, title, "k", mr_label, "mr_" + str(n) + "_" + mr_type)

      """
      print 'measured by normalized mr/mrk'
      plot(kRange, lambda method: [mean(nmr[method, _, n, mr_type]) for _ in kRange(method)], lambda method: [standardErr(nmr[method, _, n, mr_type]) for _ in kRange(method)],
           methods, title, "k", "Normalized " + mr_label, "nmr_" + str(n) + "_" + mr_type)
      """

      # COMPARING WITH ALG1 for now
      print 'ratio of finding mmr-q'
      plot(kRange, lambda method: [100.0 * sum(mr[method, k, n, mr_type][_] == mr['alg1', k, n, mr_type][_] for _ in range(validTrials)) / validTrials for k in kRange], lambda _: [0.0] * len(kRange),
           methods, title, "k", "% of Finding a MMR Query", "ratiok_" + str(n) + "_" + mr_type)

      print 'measured by expected regret'
      plot(kRange, lambda method: [mean(regret[method, _, n, mr_type]) for _ in kRange], lambda method: [standardErr(regret[method, _, n, mr_type]) for _ in kRange],
           methods, title, "k", "Expected Regret", "regret_" + str(n) + "_" + mr_type)
      """

      # FIXME may require plotting brute force as well
      print 'time'
      plot(kRange, lambda method: [mean(time[method, _, n, mr_type]) for _ in kRange(method)], lambda method: [standardErr(time[method, _, n, mr_type]) for _ in kRange(method)],
           methods, title, "k", "Computation Time (sec.)", "t_" + str(n) + "_" + mr_type)
  
  assert all(_ >= 0 for _ in regret.values())
  """
Example #13
0
def maximumRegretCVSRelPhi():
  mr = {}
  nmr = {} # normalized mr
  time = {}
  kRange = range(1,11) # same for all figures

  validTrials = trials - len(excluded)

  relPhiNum = {}
  for n in nRange:
    for r in range(trials):
      if r in excluded: continue
      domainFileName = dataDir + 'domain_' + str(n) + '_' + str(r) + '.pkl'
      (relFeats, domPis, domPiTime) = pickle.load(open(domainFileName, 'rb'))
      relPhiNum[n, r] = len(relFeats)
  
  #print relPhiNum

  # granularity of x axis
  gran = 1
  bins = range(max(nRange) / gran + 1)

  for k in kRange:
    print k

    for mr_type in ['mrk']:
      #mr_label = '$MR$' if mr_type == 'mr' else '$MR_k$'
      #FIXME call it MR no matter if it's MR or MR_k
      mr_label = '$MR$'
      title = "$k = " + str(k) + "$"
      print mr_type

      xScatter = {}
      yScatter = {}
      for method in methods:
        xScatter[method] = []
        yScatter[method] = []
        for bin in bins: 
          mr[method, k, bin] = []
          time[method, k, bin] = []

        for n in nRange:
          for r in range(trials):
            if r in excluded: continue
            #FIXME weird to read the data file multiple times, but we are representing in a different way. should be fine.
            ret = pickle.load(open(dataDir + method + '_' + mr_type + '_' + str(k) + '_' + str(n) + '_' + str(r) + '.pkl', 'rb'))
            mr[method, k, relPhiNum[n, r] / gran].append(ret[mr_type])
            time[method, k, relPhiNum[n, r] / gran].append(ret['time'])
            
            xScatter[method].append(relPhiNum[n, r])
            yScatter[method].append(ret[mr_type])
            
      for method in methods:
        for bin in bins:
          nmr[method, k, bin] = []
          for r in range(len(mr['alg1', k, bin])):
            normalizedmr = normalize(mr[method, k, bin][r], mr['alg1', k, bin][r], mr['nq', k, bin][r])
            if normalizedmr != None:
              nmr[method, k, bin].append(normalizedmr)

      """
      print 'measured by mr/mrk'
      plot([_ * gran for _ in bins], lambda method: [mean(mr[method, k, _]) for _ in bins], lambda method: [standardErr(mr[method, k, _]) for _ in bins],
           methods, title, "|$\Phi_{rel}$|", mr_label, "mrc_" + str(k) + "_" + mr_type)
      """

      print 'measured by normalized mr/mrk'
      plot([_ * gran for _ in bins], lambda method: [mean(nmr[method, k, _]) for _ in bins], lambda method: [standardErr(nmr[method, k, _]) for _ in bins],
           methods, title, "|$\Phi_{rel}$|", "Normalized " + mr_label, "nmrc_" + str(k) + "_" + mr_type)

      #scatter(xScatter, yScatter, methods, title, "|$\Phi_{rel}$|", "Maximum Regret (" + mr_label + ")", "mrc_" + str(k) + "_" + mr_type)

      """