def pdgRowAndColumnConstraints(probabilities_channel,parameter_to_index,num_pgd_iterations,alpha,beta,eta,lower_bound,armijo_beta,armijo_sigma,num_plain_letters,num_cipher_letters,expected_counts):
  p = zeros(shape=(num_plain_letters,num_plain_letters))
  emMethods.dictionaryToArray(p,probabilities_channel,parameter_to_index)
  '''
  #expected_counts = zeros(shape=(num_plain_letters,num_plain_letters))
  #current_fractional_counts = fractional_counts_channel
  #first populating the expected counts and probabilities matrix
  #for i,plain_letter in enumerate(probabilities_channel.keys()) :
  emMethods.dictionaryToArray(expected_counts,current_fractional_counts,parameter_to_index)
  for i,k in enumerate(range(65, 91)):
    plain_letter = chr(k)
    expected_counts_sum = 0.
    #print plain_letter
    #print 'number of cipher letters is ',len(probabilities_channel[plain_letter].keys())
    for cipher_letter in probabilities_channel[plain_letter].keys() :
      parameter_number = parameter_to_index[plain_letter][cipher_letter]
      #print 'parameter number is ',parameter_number
      #print 'i is ',i
      p[i][parameter_number] = probabilities_channel[plain_letter][cipher_letter]  
      expected_counts[i][parameter_number] = current_fractional_counts[plain_letter][cipher_letter]
      expected_counts_sum +=  current_fractional_counts[plain_letter][cipher_letter]
    #print 'expected counts sum was ',expected_counts_sum
    #print parameter_to_index
  '''
  #current_eta = eta_0 #/sqrt(num_iterations+1)
  print 'Doing projected gradient descent'
  new_probabilities = projectedGradientDescentWithArmijoRuleMatrix(p = p,expected_counts = expected_counts,num_pgd_iterations = num_pgd_iterations,eta = eta,lower_bound = lower_bound,armijo_beta = armijo_beta,armijo_sigma = armijo_sigma,alpha = alpha,beta = beta,num_cipher_letters = num_cipher_letters)
  print 'finished projected gradient descent'
  #new_probabilities = newtonProjectedGradientDescentWithArmijoRule(x,num_pgd_iterations,eta,lower_bound,armijo_beta,armijo_sigma)
  #print new_probabilities
  #raw_input()
  print 'we are replacing channel probs'
  assignProbsMatrix(new_probabilities,probabilities_channel,parameter_to_index)
  '''
def nceUpdate(learning_rate,gradient_num,probabilities_channel,parameter_to_index,num_plain_letters):
  p = zeros(shape=(num_plain_letters,num_plain_letters))
  emMethods.dictionaryToArray(p,probabilities_channel,parameter_to_index)
  #print 'gradient num is ',gradient_num
  #print 'p is ',p
  gradient = gradient_num/p
  new_point = numpy.array(p)
  new_point += learning_rate*gradient
  new_feasible_point,blah = dykstra(new_point,10E-6)
  new_feasible_point = array(new_feasible_point)
  #print 'new feasible point is ',new_feasible_point 
  #print 'new feasible point shape is ',new_feasible_point.shape
  #add epsilon to new feasible point and then renormalize
  #new_feasible_point += 10E-7
  return new_feasible_point,p,gradient
def main() :
  # setting up options
  parser = OptionParser()
  parser.add_option("--num_iter", action="store", type="int", dest="num_iterations",default=100,help="number of iterations you would like to run em+smoothed l0. Default is 50")
  parser.add_option("--initial_alpha", action="store", type="float", dest="initial_alpha",default = 0.0,help="initial_alpha,the weight of the smoothed l0 penalty. Defaut is 0 ")
  parser.add_option("--final_alpha", action="store", type="float", dest="final_alpha",default = 0.0,help="final_alpha,the weight of the smoothed l0 penalty. Defaut is 0 ")
  parser.add_option("--beta", action="store", type="float", dest="beta",default = 0.5,help="beta, the smoothness of the l0 prior, smaller the sigma, closer the approximation to true L0. Default is 0.5")
  parser.add_option("--slack", action="store_true", dest="slack_option",default = False,help="if you want to project on the simplex with slack.")
  parser.add_option("--noslack", action="store_false", dest="slack_option",default = False, help="if you want to project on the simplex with no slack. This is the regular projection approach")
  parser.add_option("--num_pgd_iterations", action="store", type="int", dest="num_pgd_iterations",default = 10,help="Number of Projected Gradient Descent to run. Default is 100")
  parser.add_option("--eta", action="store", type="float", dest="eta",default = 0.1,help="Eta, the constant step size for PGD using armijo line search. Default is 0.1")
  parser.add_option("--armijo_beta", action="store", type="float", dest="armijo_beta",default = 0.5,help="Set value for Armijo beta, the beta used in in armijo line search. Default value is 0.2")
  parser.add_option("--armijo_sigma", action="store", type="float", dest="armijo_sigma",default = 0.5,help="Set value for Armijo sigma, the sigma used in in armijo line search. Lower bound is 0.0001")
  parser.add_option("--lower_bound", action="store", type="float", dest="lower_bound",default = 0.000001,help="Set value for the lower bound on the probability.Default is 10E-6")
  parser.add_option("--cipher_data_file", action="store", type="string", dest="cipher_data_file",default ='cipher.data',help="Cipher data file for training")
  parser.add_option("--cipher_noq_file", action="store", type="string", dest="cipher_noq_file",default ='cipher.noq',help="Cipher data without quotes")
  parser.add_option("--cipher_decode_file", action="store", type="string", dest="cipher_decode_file",default ='cipher.decode',help="Cipher file for decoding")
  parser.add_option("--cipher_gold_file", action="store", type="string", dest="cipher_gold_file",default ='cipher.gold',help="The correct decipherment")
  parser.add_option("--lm", action="store", type="string", dest="lm",default ='lm.carmel',help="The lm file")
  parser.add_option("--noe_lm", action="store", type="string", dest="noe_lm",default ='lm.carmel',help="The noe lm file")
  parser.add_option("--gaussian_params_file", action="store", type="string", dest="gaussian_params_file",default =None,help="The means and variances for each gaussian letter")

  parser.add_option("--unigram_probs_file", action="store", type="string", dest="unigram_probs_file",default =None,help="The means and variances for each gaussian letter")
  parser.add_option("--std_mult", action="store", type="float", dest="std_mult",default =1.,help="The multiplier for the std ")
  parser.add_option("--u", action="store_true", dest="uniform_init",default=False)
  parser.add_option("--g", action="store_true", dest="gaussian_init",default=False)
  parser.add_option("--i", action="store_true", dest="identity_init",default=False)
  parser.add_option("--hpc", action="store_true", dest="hpc",default=False)
  parser.add_option("--full_fst", action="store_true", dest="full_fst",default=False)
  parser.add_option("--posterior_decoding", action="store_true", dest="posterior_decoding",default=False)
  
  (options, args) = parser.parse_args()

  print options
  print args
  
  #getting the values from optparse
  num_iterations = options.num_iterations
  initial_alpha = options.initial_alpha
  final_alpha = options.final_alpha
  beta = options.beta
  slack_option = options.slack_option
  num_pgd_iterations = options.num_pgd_iterations
  eta = options.eta
  armijo_beta = options.armijo_beta
  armijo_sigma = options.armijo_sigma
  lower_bound = options.lower_bound
  cipher_data_file = options.cipher_data_file
  cipher_decode_file = options.cipher_decode_file
  cipher_noq_file = options.cipher_noq_file
  cipher_gold_file = options.cipher_gold_file
  lm = options.lm
  noe_lm = options.noe_lm
  gaussian_params_file = options.gaussian_params_file
  unigram_probs_file = options.unigram_probs_file
  uniform_init = options.uniform_init
  gaussian_init = options.gaussian_init
  identity_init = options.identity_init
  hpc = options.hpc
  std_mult = options.std_mult
  full_fst = options.full_fst
  posterior_decoding_flag = options.posterior_decoding

  #setting up paramters
  fractional_counts_language = {}
  fractional_counts_channel = {}
  probabilities_channel = {}
  probabilities_language = {}
  current_probabilities = {}
  current_fractional_counts = {}
  #constraint_parameters = []
  initial_parameter_args = {}
  current_initial_parameter_args = {}
  parameter_to_index = {}
  parameter_counter = 0
  num_constraints = 1
  constraint_tags_dict = {} #this will hold the tag for the constraint. the key is the constraint id and the value is the tag corresponding to this constraint
  #beta = float(0)
  #alpha = float(0)
  global_num_parameters = 0
  init_option = ''
  current_optimization_tag = '' #this will hold the of the constraint for which we are doing the optimization

  #adding parser options
  '''
  print 'beta is ',beta
  print 'alpha is ',alpha
  print 'eta is ',eta
  raw_input()
  '''
  gold_cipher = emMethods.readCipherFile(cipher_gold_file)
  ciphertext = emMethods.readCipherFile(cipher_noq_file)
  ciphertext_with_spaces  = open(cipher_noq_file).readline().strip().split()
  print gold_cipher
  #dictionary = emMethods.createDictionary('complete.dict.new-formatted')#.small')
  #word_lines= emMethods.readWordLines('test.words.new-formatted')
  cipher_letter_dict = emMethods.getUniqCipherLetters(cipher_noq_file)
  cipher_probs = defaultdict(float)
  emMethods.getCipherLetterProbs(cipher_probs,cipher_noq_file)
  print 'cipher probs are '
  #gaussians = defaultdict(list)
  #emMethods.readGaussians(gaussians,gaussian_params_file)
  plain_unigram_probs = dict((line.strip().split()[0],float(line.strip().split()[1])) for line in open(unigram_probs_file))

  #get cipher letter probs
  del cipher_letter_dict['_']
  #word_list_five = emMethods.readWordList('TEXT.3.linear')
  #plaintext = map(chr, range(97, 123))
  plaintext_letters = []
  ciphertext_letters = []
  for k in range(65, 91):
    plaintext_letters.append(chr(k))
    ciphertext_letters.append(chr(k).lower())
  print plaintext_letters
  print 'the number of unique cipher letter is %d'%len(cipher_letter_dict.keys())
  print cipher_letter_dict
  num_cipher_letters = len(cipher_letter_dict.keys()) 
  num_plain_letters = 26
  #gold_tag_sequence = emMethods.readWordList('test.tags.new-formatted.linear')
     
  free_parameters_channel = defaultdict(lambda:defaultdict(float))
  free_parameters_language = defaultdict(lambda:defaultdict(float))
  print 'starting to create parameters'
  total_language_parameters = 0
  total_channel_parameters = 0
  #for line in cipher_lines :
    #print 'created parameters for a line'
    #(language_parameters,channel_parameters) = emMethods.getFreeParametersBigram(line,dictionary,free_parameters_language,free_parameters_channel)
  if full_fst == True:
    emMethods.getFreeCipherParametersChannel(plaintext_letters,free_parameters_channel)
    num_cipher_letters = 26
  else :
    emMethods.getFreeCipherParametersChannel(plaintext_letters,free_parameters_channel,cipher_letter_dict = cipher_letter_dict)
  temp = {'_':0.0}
  free_parameters_channel['_'] = temp
  #now, we will build all the lattices, and create a special start node and end node for every sentence
  start_node_end_node_list = []

  fractional_counts_channel = copy.deepcopy(free_parameters_channel)
  probabilities_channel = copy.deepcopy(free_parameters_channel)
  #print 'gaussians'
  #print gaussians
  #createRandomPoint(probabilities_channel)
  if (uniform_init == True) :
    print 'uniform initialization'
    emMethods.initUniformProbs(probabilities_channel)
#  emMethods.initUniformProbs(probabilities_language,probabilities_channel)
  if (gaussian_init == True) :
    print 'gaussian initialization'
    #emMethods.initFromGaussians(probabilities_channel,gaussians,cipher_probs,std_mult)

    emMethods.initFromGaussiansSingleStd(probabilities_channel,plain_unigram_probs,cipher_probs,std_mult)
  if (identity_init == True) :
    emMethods.initIdentity(probabilities_channel)
  #print 'channel probabilities after weighting are '
  #print probabilities_channel
  #raw_input()

  emMethods.writeFst('cipher.fst',probabilities_channel)
  #sys.exit()
  final_probabilities_channel = copy.deepcopy(free_parameters_channel)
  run_training = ''
  if hpc == True:
    run_training = "/home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel --train-cascade -u -M 0 -m -HJ %s %s cipher.fst"%(cipher_data_file,lm)
    run_posterior_decoding = "/home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel --train-cascade -u -M 0 -m -HJ %s %s posterior_decode.fst"%(cipher_data_file,lm)
  else :
    run_training = "/Users/avaswani/graehl/carmel/bin/macosx/carmel --train-cascade -u -M 0 -m -HJ %s %s cipher.fst"%(cipher_data_file,lm)
    run_posterior_decoding = "/Users/avaswani/graehl/carmel/bin/macosx/carmel --train-cascade -u -M 0 -m -HJ %s %s posterior_decode.fst"%(cipher_data_file,lm)


  #running the EM iterations
  #we are creating the indexes for algencan . Notice that here, the probabilities language is already uniform and therefore none of them will be zero
  index_to_parameter = {}
  createParametersForScaling(probabilities_channel,parameter_to_index,index_to_parameter)
  start_time = time.clock()
  print 'start time was ',start_time
  #fractional_counts_dump_file = open('fractional.params','w')
  #probabilities_dump_file = open('probs.params','w')
  #optimized_probabilities_dump_file = open('probs.optimized.params','w')
  alpha_delta = (final_alpha-initial_alpha)/(num_iterations-1)
  current_alpha = initial_alpha
  for i in range (0,num_iterations) :
    print 'the iteration number was ',i
    #this will create the parameters
    total_corpus_probability = 0.0
    (status,output) = commands.getstatusoutput(run_training)  
    print 'we just ran the training'
    print output
    print status
    prob_match = probability_re.search(output)
    print 'current alpha is ',current_alpha
    if prob_match == None :
      print'we should have found a probability'
    else :
      print 'the probability is %s'%prob_match.group(1)
    temp_corpus_probability = float(prob_match.group(1)[2:len(prob_match.group(1))])
    total_corpus_probability = 0.693147181 * temp_corpus_probability
    print 'reading language fractional counts'
  
    emMethods.readCarmelFractionalCounts('cipher.fst.trained',fractional_counts_channel,'channel')
    print 'read the fst'
      
    print' the probability of the corpus was %f' %total_corpus_probability
    print 'we are now checking the accuracies'
    noe_command = 'cat tagging.fsa | sed \'s/*e*//g\' > tagging.fsa.noe'
    (status,output) = commands.getstatusoutput(noe_command)
    print 'we wrote the noe fsa'
    if hpc == True:
      viterbi_command = "cat %s | /home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel  -u  -srbk 1 -QEWI %s  cipher.fst > decipherment_output"%(cipher_decode_file,noe_lm)
    else :
      viterbi_command = "cat %s | /Users/avaswani/graehl/carmel/bin/macosx/carmel  -u  -srbk 1 -QEWI %s  cipher.fst > decipherment_output"%(cipher_decode_file,noe_lm)

    (status,output) = commands.getstatusoutput(viterbi_command)
    print 'status',status
    print 'output',output
    #tagged_sequence = emMethods.readTaggingOutput('tagging_output')  
    deciphered_sequence = emMethods.readCipherFile('decipherment_output')
    print 'length of deciphered sequence was ',len(deciphered_sequence)
    accuracy = emMethods.calcAccuracy(gold_cipher,deciphered_sequence)

    print 'The accuracy was %s and the objective function value was %s'%(str(accuracy),str(evaluateObjectiveFuncValue(total_corpus_probability,probabilities_language,probabilities_channel,current_alpha,beta)))
      
    expected_counts = zeros(shape=(num_plain_letters,num_plain_letters))
    #current_fractional_counts = fractional_counts_channel
    #first populating the expected counts and probabilities matrix
    #for i,plain_letter in enumerate(probabilities_channel.keys()) :
    emMethods.dictionaryToArray(expected_counts,fractional_counts_channel,parameter_to_index)
    '''
    print 'Doing posterior decoding'
    # decode
    m = Munkres()
    #J is not a matrix. its pairs of coordinates
    J = m.compute(-expected_counts)
    key = dict((index_to_parameter[plaintext_letters[n]][J[n][1]],plaintext_letters[J[n][0]]) for n in range(len(ciphertext_letters)))
    print 'the key was ',key
    posterior_decoded_string = []
    for letter in ciphertext:
      if letter == '_':
        posterior_decoded_string.append('_')
      else :
        posterior_decoded_string.append(key[letter])
    posterior_decoded_accuracy = emMethods.calcAccuracy(gold_cipher,posterior_decoded_string)
    print 'The posterior decoded accuracy was',posterior_decoded_accuracy
    '''
    pdgRowAndColumnConstraints(probabilities_channel,parameter_to_index,num_pgd_iterations,current_alpha,beta,eta,lower_bound,armijo_beta,armijo_sigma,num_plain_letters,num_cipher_letters,expected_counts=expected_counts)

    #now writing the fsa back again
    emMethods.writeFst('cipher.fst',probabilities_channel)
  
    #if the user has asked for posterior decoding, then we must do it
    if posterior_decoding_flag == True:
      print 'running command',run_posterior_decoding
      emMethods.writePosteriorDecodingFST(ciphertext_with_spaces,probabilities_channel,'posterior_decode.fst')
      (status,output) = commands.getstatusoutput(run_posterior_decoding)  
      print 'we just ran posterior decoding'
      print output
      print status
      prob_match = probability_re.search(output)
      if prob_match == None :
        print'we should have found a probability in posterior decoding'
      else :
        print 'the posterior decoding probability is %s'%prob_match.group(1)
      temp_corpus_probability = float(prob_match.group(1)[2:len(prob_match.group(1))])
      total_corpus_probability = 0.693147181 * temp_corpus_probability
      print 'the posterior decoding probability in base e is',total_corpus_probability
      print 'Getting the posterior decode'
      posterior_decode = emMethods.getPosteriorDecode('posterior_decode.fst.trained')
      print 'the posterior decode was ',posterior_decode
      posterior_decode_accuracy = emMethods.calcAccuracy(gold_cipher,posterior_decode)
      print 'The posterior decoded accuracy was ',posterior_decode_accuracy


    #print 'checking the initial zeros in channel model'
    #checkZeros(probabilities_channel)
  
    fractional_counts_channel = copy.deepcopy(free_parameters_channel)
    final_probabilities_channel = copy.deepcopy(probabilities_channel)
    print 'at the end of the iteration'
    current_alpha += alpha_delta

  elapsed_time = time.clock() - start_time
  print 'the elapsed time was ',elapsed_time
def main() :
  # setting up options
  parser = OptionParser()
  parser.add_option("--num_iter", action="store", type="int", dest="num_iterations",default=100,help="number of iterations you would like to run em+smoothed l0. Default is 50")
  parser.add_option("--num_noise_samples", action="store", type="int", dest="num_noise_samples",default=10,help="number of noise samples. Default is 10")
  parser.add_option("--initial_alpha", action="store", type="float", dest="initial_alpha",default = 0.0,help="initial_alpha,the weight of the smoothed l0 penalty. Defaut is 0 ")
  parser.add_option("--final_alpha", action="store", type="float", dest="final_alpha",default = 0.0,help="final_alpha,the weight of the smoothed l0 penalty. Defaut is 0 ")
  parser.add_option("--beta", action="store", type="float", dest="beta",default = 0.5,help="beta, the smoothness of the l0 prior, smaller the sigma, closer the approximation to true L0. Default is 0.5")
  parser.add_option("--slack", action="store_true", dest="slack_option",default = False,help="if you want to project on the simplex with slack.")
  parser.add_option("--noslack", action="store_false", dest="slack_option",default = False, help="if you want to project on the simplex with no slack. This is the regular projection approach")
  parser.add_option("--num_pgd_iterations", action="store", type="int", dest="num_pgd_iterations",default = 10,help="Number of Projected Gradient Descent to run. Default is 100")
  parser.add_option("--eta", action="store", type="float", dest="eta",default = 0.1,help="Eta, the constant step size for PGD using armijo line search. Default is 0.1")
  parser.add_option("--armijo_beta", action="store", type="float", dest="armijo_beta",default = 0.5,help="Set value for Armijo beta, the beta used in in armijo line search. Default value is 0.2")
  parser.add_option("--armijo_sigma", action="store", type="float", dest="armijo_sigma",default = 0.5,help="Set value for Armijo sigma, the sigma used in in armijo line search. Lower bound is 0.0001")
  parser.add_option("--lower_bound", action="store", type="float", dest="lower_bound",default = 0.000001,help="Set value for the lower bound on the probability.Default is 10E-6")
  parser.add_option("--cipher_data_file", action="store", type="string", dest="cipher_data_file",default ='cipher.data',help="Cipher data file for training")
  parser.add_option("--cipher_noq_file", action="store", type="string", dest="cipher_noq_file",default ='cipher.noq',help="Cipher data without quotes")
  parser.add_option("--cipher_decode_file", action="store", type="string", dest="cipher_decode_file",default ='cipher.decode',help="Cipher file for decoding")
  parser.add_option("--cipher_gold_file", action="store", type="string", dest="cipher_gold_file",default ='cipher.gold',help="The correct decipherment")
  parser.add_option("--lm", action="store", type="string", dest="lm",default ='lm.carmel',help="The lm file")
  parser.add_option("--noe_lm", action="store", type="string", dest="noe_lm",default ='lm.carmel',help="The noe lm file")
  parser.add_option("--gaussian_params_file", action="store", type="string", dest="gaussian_params_file",default =None,help="The means and variances for each gaussian letter")

  parser.add_option("--unigram_probs_file", action="store", type="string", dest="unigram_probs_file",default =None,help="The means and variances for each gaussian letter")
  parser.add_option("--std_mult", action="store", type="float", dest="std_mult",default =1.,help="The multiplier for the std ")
  parser.add_option("--u", action="store_true", dest="uniform_init",default=False)
  parser.add_option("--g", action="store_true", dest="gaussian_init",default=False)
  parser.add_option("--i", action="store_true", dest="identity_init",default=False)
  parser.add_option("--hpc", action="store_true", dest="hpc",default=False)
  parser.add_option("--full_fst", action="store_true", dest="full_fst",default=False)
  parser.add_option("--fst_init", action="store_true", dest="fst_init",default=False)

  parser.add_option("--noise_lm", action="store", type="string", dest="noise_lm",default ='noise.lm.carmel',help="The noise lm file")
  parser.add_option("--noise_channel_fst", action="store", type="string", dest="noise_channel_fst",default ='noise.fst.carmel',help="The channel fst")
  parser.add_option("--noise_probs_file", action="store", type="string", dest="noise_probs_file",default ='noise.probs',help="The noise probs file")
  parser.add_option("--noise_samples_file", action="store", type="string", dest="noise_samples_file",default ='noise.samples',help="The noise samples file")
  
  (options, args) = parser.parse_args()

  print options
  print args
  
  #getting the values from optparse
  num_iterations = options.num_iterations
  initial_alpha = options.initial_alpha
  final_alpha = options.final_alpha
  beta = options.beta
  slack_option = options.slack_option
  num_pgd_iterations = options.num_pgd_iterations
  eta = options.eta
  armijo_beta = options.armijo_beta
  armijo_sigma = options.armijo_sigma
  lower_bound = options.lower_bound
  cipher_data_file = options.cipher_data_file
  cipher_decode_file = options.cipher_decode_file
  cipher_noq_file = options.cipher_noq_file
  cipher_gold_file = options.cipher_gold_file
  lm = options.lm
  noe_lm = options.noe_lm
  gaussian_params_file = options.gaussian_params_file
  unigram_probs_file = options.unigram_probs_file
  uniform_init = options.uniform_init
  gaussian_init = options.gaussian_init
  identity_init = options.identity_init
  hpc = options.hpc
  std_mult = options.std_mult
  full_fst = options.full_fst
  num_noise_samples = options.num_noise_samples
  noise_lm = options.noise_lm
  noise_channel_fst = options.noise_channel_fst
  fst_init = options.fst_init
  noise_probs_file = options.noise_probs_file
  noise_samples_file = options.noise_samples_file
  noise_probs_file = options.noise_probs_file

  #setting up paramters
  fractional_counts_language = {}
  fractional_counts_channel = {}
  probabilities_channel = {}
  probabilities_language = {}
  current_probabilities = {}
  current_fractional_counts = {}
  #constraint_parameters = []
  initial_parameter_args = {}
  current_initial_parameter_args = {}
  parameter_to_index = {}
  parameter_counter = 0
  num_constraints = 1
  constraint_tags_dict = {} #this will hold the tag for the constraint. the key is the constraint id and the value is the tag corresponding to this constraint
  #beta = float(0)
  #alpha = float(0)
  global_num_parameters = 0
  init_option = ''
  current_optimization_tag = '' #this will hold the of the constraint for which we are doing the optimization

  #adding parser options
  '''
  print 'beta is ',beta
  print 'alpha is ',alpha
  print 'eta is ',eta
  raw_input()
  '''
  gold_cipher = emMethods.readCipherFile(cipher_gold_file)
  print gold_cipher
  #dictionary = emMethods.createDictionary('complete.dict.new-formatted')#.small')
  #word_lines= emMethods.readWordLines('test.words.new-formatted')
  cipher_letter_dict = emMethods.getUniqCipherLetters(cipher_noq_file)
  cipher_letters = open(cipher_noq_file).readline().split()


  #sys.exit()

  cipher_probs = defaultdict(float)
  emMethods.getCipherLetterProbs(cipher_probs,cipher_noq_file)
  print 'cipher probs are '
  #gaussians = defaultdict(list)
  #emMethods.readGaussians(gaussians,gaussian_params_file)
  plain_unigram_probs = dict((line.strip().split()[0],float(line.strip().split()[1])) for line in open(unigram_probs_file))

  #get cipher letter probs
  del cipher_letter_dict['_']
  #word_list_five = emMethods.readWordList('TEXT.3.linear')
  #plaintext = map(chr, range(97, 123))
  plaintext = []
  for k in range(65, 91):
    plaintext.append(chr(k))
  print plaintext
  print 'the number of unique cipher letter is %d'%len(cipher_letter_dict.keys())
  print cipher_letter_dict
  num_cipher_letters = len(cipher_letter_dict.keys()) 
  num_plain_letters = 26
  #gold_tag_sequence = emMethods.readWordList('test.tags.new-formatted.linear')
     
  free_parameters_channel = defaultdict(lambda:defaultdict(float))
  free_parameters_language = defaultdict(lambda:defaultdict(float))
  print 'starting to create parameters'
  total_language_parameters = 0
  total_channel_parameters = 0
  #for line in cipher_lines :
    #print 'created parameters for a line'
    #(language_parameters,channel_parameters) = emMethods.getFreeParametersBigram(line,dictionary,free_parameters_language,free_parameters_channel)
  if full_fst == True:
    emMethods.getFreeCipherParametersChannel(plaintext,free_parameters_channel)
    num_cipher_letters = 26
  else :
    emMethods.getFreeCipherParametersChannel(plaintext,free_parameters_channel,cipher_letter_dict = cipher_letter_dict)
  temp = {'_':0.0}
  free_parameters_channel['_'] = temp
  #now, we will build all the lattices, and create a special start node and end node for every sentence
  start_node_end_node_list = []

  fractional_counts_channel = copy.deepcopy(free_parameters_channel)
  probabilities_channel = copy.deepcopy(free_parameters_channel)
  #print 'gaussians'
  #print gaussians
  #createRandomPoint(probabilities_channel)
  if (uniform_init == True) :
    print 'uniform initialization'
    emMethods.initUniformProbs(probabilities_channel)
#  emMethods.initUniformProbs(probabilities_language,probabilities_channel)
  if (gaussian_init == True) :
    print 'gaussian initialization'
    #emMethods.initFromGaussians(probabilities_channel,gaussians,cipher_probs,std_mult)

    emMethods.initFromGaussiansSingleStd(probabilities_channel,plain_unigram_probs,cipher_probs,std_mult)
  if (identity_init == True) :
    emMethods.initIdentity(probabilities_channel)

  if fst_init == True:
    emMethods.initFromWfst('init.fst',probabilities_channel,'channel') 
  #print 'channel probabilities after weighting are '
  #print probabilities_channel
  #raw_input()

  emMethods.writeFst('cipher.fst',probabilities_channel)
  #sys.exit()
  final_probabilities_channel = copy.deepcopy(free_parameters_channel)


  #running the EM iterations
  #we are creating the indexes for algencan . Notice that here, the probabilities language is already uniform and therefore none of them will be zero
  createParametersForScaling(probabilities_channel,parameter_to_index)
  start_time = time.clock()
  print 'start time was ',start_time
  #fractional_counts_dump_file = open('fractional.params','w')
  #probabilities_dump_file = open('probs.params','w')
  #optimized_probabilities_dump_file = open('probs.optimized.params','w')
  alpha_delta = (final_alpha-initial_alpha)/(num_iterations-1)
  current_alpha = initial_alpha
   
  ###########GENERATING THE NOISE FILE LIST##########################################
  noise_file_list = []
  for k in range(num_noise_samples):
    noise_file_list.append("%s.noise%d"%(cipher_noq_file,k))
  
  #reading the noise probsa
  noise_probs = []
  for line in open(noise_probs_file):
    if line.strip() == '':
      continue
    else:
      noise_probs.append(float(line.strip()))
  print 'number of noise probs is ',len(noise_probs)
  #this will be needed for computations
  log_num_noise_samples = math.log(num_noise_samples)

  #reading noise samples
  total_noise_samples = 0
  noise_samples = []
  for line in open(noise_samples_file):
    if line.strip() == '':
      continue
    else:
      noise_samples.append(line.strip())
    total_noise_samples += 1
  print 'total number of noise samples is',total_noise_samples
  ###############GENERATE CARMEL TRAINING AND DECODING COMMANDS##############################
  print 'Generating the training and decoding commands...'
  run_true_training = ''
  run_noise_training = ''
  if hpc == True:
    run_true_training = "/home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel --train-cascade -u -M 0 -m -HJ %s %s cipher.fst"%(cipher_data_file,lm)
    run_noise_training = "/home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel --train-cascade -u -M 0 -m -HJ %s %s %s"%(cipher_data_file,noise_lm,noise_channel_fst)
  else :
    run_true_training = "/Users/avaswani/graehl/carmel/bin/macosx/carmel --train-cascade -u -M 0 -m -HJ %s %s cipher.fst"%(cipher_data_file,lm)
    run_noise_training = "/Users/avaswani/graehl/carmel/bin/macosx/carmel --train-cascade -u -M 0 -m -HJ %s %s %s"%(cipher_data_file,noise_lm,noise_channel_fst)
  
  noise_sample_training_commands_for_model = []
  #noise_sample_training_commands_for_noise = []
  for k in range(num_noise_samples):
    run_noise_sample_true_training = ''
    #run_noise_sample_noise_training = ''
    if hpc == True:
      run_noise_sample_true_training = "/home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel --train-cascade -u -M 0 -m -HJ %s %s cipher.fst"%(noise_file_list[k],lm)

      #run_noise_sample_noise_training = "/home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel --train-cascade -u -M 0 -m -HJ %s %s %s"%(noise_file_list[k],noise_lm,noise_channel_fst)
    else :
      run_noise_sample_true_training = "/Users/avaswani/graehl/carmel/bin/macosx/carmel --train-cascade -u -M 0 -m -HJ %s %s cipher.fst"%(noise_file_list[k],lm)

      #run_noise_sample_noise_training = "/Users/avaswani/graehl/carmel/bin/macosx/carmel --train-cascade -u -M 0 -m -HJ %s %s %s"%(noise_file_list[k],noise_lm,noise_channel_fst)
    noise_sample_training_commands_for_model.append(run_noise_sample_true_training)
    #noise_sample_training_commands_for_noise.append(run_noise_sample_noise_training)

  viterbi_command = '' 
  if hpc == True:
    viterbi_command = "cat %s | /home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel  -u  -srbk 1 -QEWI %s  cipher.fst > decipherment_output"%(cipher_decode_file,noe_lm)
  else :
    viterbi_command = "cat %s | /Users/avaswani/graehl/carmel/bin/macosx/carmel  -u  -srbk 1 -QEWI %s  cipher.fst > decipherment_output"%(cipher_decode_file,noe_lm)
  
  print 'Running Training ...'
  print 'command for running noise training is ',run_noise_training
  log_prob_under_noise = runCarmel(run_noise_training)
  '''
  log_noise_sample_probs_under_noise = []
  for k in range(num_noise_samples):
    #log_noise_sample_prob_under_noise = runCarmel(noise_sample_training_commands_for_noise[k])
    log_noise_sample_probs_under_noise.append(log_noise_sample_prob_under_noise)
  print 'the log noise sample probs under noise are ',log_noise_sample_probs_under_noise 
  '''
  ##########################RUN NCE TRAINING###############################
  for i in range (0,num_iterations) :

    ###############GENERATING K NOISE SAMPLES#########################
    print 'Generating noise samples...'
    noise_ciphertext = list(cipher_letters)
    for k in range(num_noise_samples):
      noise_file = open(noise_file_list[k],'w')
      #emMethods.generateNoiseCipher(noise_ciphertext)
      noise_ciphertext = noise_samples[i*num_noise_samples+k]
      #print 'noise ciphertext is ',' '.join(noise_ciphertext)
      print 'noise ciphertext is ',noise_ciphertext
      #noise_file.write("\n%s\n"%' '.join(["\"%s\""%item for item in noise_ciphertext]))
      noise_file.write("\n%s"%noise_ciphertext)
      noise_file.close()

    print 'Getting Accuracy'
    ################RUNNING VITERBI AND GETTING ACCURCY###################
    runViterbiAndGetAccuracy(gold_cipher,viterbi_command)


    #dampening the learning rate
    eta = eta/(1+i)
    current_function_value = 0.
    print 'the iteration number was ',i
    print 'current alpha is ',current_alpha
  
    print 'Computing expected counts...'
    ###############COMPUTING EXPECTED COUNTS ###############################
    log_prob_under_model = runCarmel(run_true_training)
    print 'reading language fractional counts from cipher.fst.trained'
    emMethods.readCarmelFractionalCounts('cipher.fst.trained',fractional_counts_channel,'channel')

    print 'Running the noise model'
    log_prob_under_noise = runCarmel(run_noise_training)


    positive_weight = computePositiveWeight(log_prob_under_model,log_prob_under_noise,num_noise_samples)

    #compute the 
    p_d_equals_one = log_prob_under_model - (logaddexp(log_num_noise_samples+log_prob_under_noise,log_prob_under_model))
    current_function_value += p_d_equals_one
    print 'p of d equals 1 at the current point is ', p_d_equals_one
    print 'The positive weight was',positive_weight
    if positive_weight <= 10E-10:
      positive_weight = 10E-10
    expected_counts = zeros(shape=(num_plain_letters,num_plain_letters))
    emMethods.dictionaryToArray(expected_counts,fractional_counts_channel,parameter_to_index)
    expected_counts *= positive_weight


    total_noise_expected_counts = zeros(shape=(num_plain_letters,num_plain_letters))

    log_noise_sample_probs_under_noise = []
    #log_noise_sample_probs_under_noise = []
    for k in range(num_noise_samples):
      temp_noise_fractional_counts = copy.deepcopy(free_parameters_channel)
      log_noise_sample_prob_under_model = runCarmel(noise_sample_training_commands_for_model[k])
      print 'reading noise language fractional counts from cipher.fst.trained'
      emMethods.readCarmelFractionalCounts('cipher.fst.trained',temp_noise_fractional_counts,'channel')
      #log_noise_sample_prob_under_noise = runCarmel(noise_sample_training_commands_for_noise[k])
      log_noise_sample_prob_under_noise = noise_probs[i*num_noise_samples+k]
      log_noise_sample_probs_under_noise.append(log_noise_sample_prob_under_noise)
      negative_weight = computeNegativeWeight(log_noise_sample_prob_under_model,log_noise_sample_probs_under_noise[k],num_noise_samples)
      if negative_weight <= 10E-10 :
        negative_weight = 10E-10
      print 'The negative weight was',negative_weight
      p_d_equals_zero = log_noise_sample_probs_under_noise[k]+ log_num_noise_samples -\
          (logaddexp(log_num_noise_samples+log_noise_sample_probs_under_noise[k],log_noise_sample_prob_under_model))
      current_function_value += p_d_equals_zero
      print 'p of d equals 0 at the current point',k,' is ', p_d_equals_zero

      temp_noise_expected_counts = zeros(shape=(num_plain_letters,num_plain_letters))
      emMethods.dictionaryToArray(temp_noise_expected_counts,temp_noise_fractional_counts,parameter_to_index)
      #print 'temp noise expected counts are ',temp_noise_expected_counts
      temp_noise_expected_counts *= negative_weight 
      #print 'temp noise exptected counts are ',temp_noise_expected_counts
      total_noise_expected_counts += temp_noise_expected_counts

    print 'the log noise sample probs under noise are ',log_noise_sample_probs_under_noise 
    #print 'total noise expected counts are ',total_noise_expected_counts
    #getting the final exptected counts
    expected_counts -= total_noise_expected_counts
  
    print 'Current function value is ',current_function_value
    ###############PROJECTING THE POINT ONTO THE DOUBLY STOCHASTIC MATRIX###########################
    new_feasible_point,current_point,grad = nceUpdate(eta,expected_counts,probabilities_channel,parameter_to_index,num_plain_letters)
 
    print 'Running line search....'
    #############LINE SEARCH#########################
    armijo_bound = 0.0
    #print 'new feasible point is ',new_feasible_point
    #raw_input()
    #print 'gradient is ',grad
    #raw_input()
    armijo_bound = -armijo_sigma * armijo_beta * (grad*(new_feasible_point-current_point)).sum()
    print 'Armijo bound is ',armijo_bound
    terminate_line_srch = False
    num_steps = 1
    #current_beta = 1.0 #armijo_beta
    current_beta = armijo_beta
    final_beta = 0
    current_armijo_bound = armijo_bound
    no_update = True 
    best_func_value = current_function_value

    while(terminate_line_srch != True) :
      #print 'num steps is ',num_steps
      temp_point = zeros(shape=current_point.shape)
      temp_point = current_point * (1.0 - current_beta) + current_beta * new_feasible_point
      #print 'the temp point is '
      #print temp_point
      #evaluating the function at the current point
      #first writing the fst
      #assigning the probabilities and writing the fsa
      temp_probabilities_channel = copy.deepcopy(free_parameters_channel)
      assignProbsMatrix(temp_point,temp_probabilities_channel,parameter_to_index)
      temp_probabilities_channel['_']['_'] = 1.0
      emMethods.writeFst('cipher.fst',temp_probabilities_channel)
      #print 'temp point is ',temp_point
      #sys.exit()
      func_value_at_temp_point = 0.
      temp_log_prob_under_model = runCarmel(run_true_training)
      temp_p_d_equals_one = temp_log_prob_under_model - (logaddexp(log_num_noise_samples+log_prob_under_noise,temp_log_prob_under_model))
      print 'p of d equals 1 at the temp point is ', temp_p_d_equals_one
      func_value_at_temp_point += temp_p_d_equals_one

      #then go over the noise samples 
      for k in range(num_noise_samples):
        temp_log_noise_sample_prob_under_model = runCarmel(noise_sample_training_commands_for_model[k])
        temp_p_d_equals_zero = log_noise_sample_probs_under_noise[k] + log_num_noise_samples -\
            (logaddexp(log_num_noise_samples+log_noise_sample_probs_under_noise[k],temp_log_noise_sample_prob_under_model))

        func_value_at_temp_point += temp_p_d_equals_zero
        print 'p of d equals 0 at the temp point',k,' is ', temp_p_d_equals_zero
      
      print 'the function value at the temp point  is %.16f'%func_value_at_temp_point
      #raw_input()
      if func_value_at_temp_point > best_func_value :
        best_func_value = func_value_at_temp_point
        final_beta = current_beta
        no_update = False
        #print 'we arrived at a better function value'
        #raw_input()
      #  print 'we just updated thef final beta to ',final_beta
      #if (func_value_at_temp_point - current_function_value >= current_armijo_bound) :
      #  terminate_line_srch = True
      #elif current_function_value - func_value_at_temp_point < 0:
      #  terminate_line_srch = True
      if num_steps >= 5 :
        terminate_line_srch = True
      
      current_beta = armijo_beta * current_beta
      current_armijo_bound =  armijo_bound * current_beta
      num_steps += 1
 
    if no_update == False :
      current_point = (1.0-final_beta)*current_point + final_beta*new_feasible_point
    if no_update == True :#x.all() == current_point.all() :
      print 'not update was true'
      break;
    #print 'the current point is ',current_point
    #raw_input()
    #assigning the probabilities and writing the fsa
    assignProbsMatrix(current_point,probabilities_channel,parameter_to_index)
    emMethods.writeFst('cipher.fst',probabilities_channel)


    #print 'checking the initial zeros in channel model'
    #checkZeros(probabilities_channel)
  
    fractional_counts_channel = copy.deepcopy(free_parameters_channel)
    final_probabilities_channel = copy.deepcopy(probabilities_channel)
    print 'at the end of the iteration'
    current_alpha += alpha_delta

  elapsed_time = time.clock() - start_time
  print 'the elapsed time was ',elapsed_time