def main():
  print "reading data"
  df = pd.read_csv("../data/CCS/subsetShots_5pct.csv")
  print "preprocessing data"
  df = normalizeRows(df, useWidths=False)
  nrow = df.shape[0]
  rowsum = df.iloc[:,3:].sum(axis=0)
  cumulative = rowsum.cumsum()

  wavelengths = map(extractWavelength, df.columns[3:])

  k = 50  # desired number of features
  alphas = [nrow * (2*j - 1) / (2.0 * k) for j in range(1, k+1)]
  new_alphas = []
  new_xs = []
  alpha_index = 0
  w_index = 0
 
  print "computing density-dependent centers"
  while True:
    if alpha_index >= len(alphas) or w_index >= len(wavelengths):
      break
    if cumulative[w_index] >= alphas[alpha_index]:
      if len(new_xs) == 0 or wavelengths[w_index] != new_xs[-1]:
        new_xs.append(wavelengths[w_index])
        new_alphas.append(alphas[alpha_index])
      alpha_index += 1
    else:
      w_index += 1

  print new_xs

  print "creating plot"
  fig = plt.figure()
  ax = plt.gca()
  for a in new_alphas:
    ax.axhline(y=a / float(nrow), linestyle='-', color='#cccccc')
  for x in new_xs:
    ax.axvline(x=x, linestyle='-', color='#cccccc')
  #ax.twinx().plot(wavelengths, rowsum / float(nrow), color=(0.5,0.7,0.5,0.3))
  ax.plot(wavelengths, cumulative / float(nrow))
  ax.set_xlabel("wavelength")
  ax.set_ylabel("cumulative spectral intensity $\\hat F_D$")
  plt.savefig("wavelengths_density_dependent.pdf")
  plt.close(fig)

  print "reducing data"
  mapping = nearestColumnMapping(new_xs, df.columns[3:])
  newData = df.apply(reduceDimRow, axis = 1, centers = mapping)
  print "saving reduced data"
  newData.to_csv("../data/CCS/subsetShots_5pct_reduced_by_density.csv", index=None)
def main():
  print "reading data"
  df = pd.read_csv("../data/CCS/subsetShots_5pct.csv")
  print "preprocessing data"
  df = normalizeRows(df, useWidths=False)

  k = 50  # desired number of features
  wavelengths = map(extractWavelength, df.columns[3:])
  new_xs = np.linspace(min(wavelengths), max(wavelengths), k)

  print new_xs

  print "reducing data"
  mapping = nearestColumnMapping(new_xs, df.columns[3:])
  newData = df.apply(reduceDimRow, axis = 1, centers = mapping)
  print "saving reduced data"
  newData.to_csv("../data/CCS/subsetShots_5pct_reduced_uniform.csv", index=None)