def runCarmelAndGetFracCounts(run_true_training,run_noise_training,fractional_counts_channel,trained_cipher_file):
  total_true_corpus_probability = runCarmel(run_true_training)
  total_noise_corpus_probability = runCarmel(run_noise_training)

  print 'reading language fractional counts from',trained_cipher_file
  emMethods.readCarmelFractionalCounts(trained_cipher_file,fractional_counts_channel,'channel')
  print 'read the fst'

  return(total_true_corpus_probability,total_noise_corpus_probability)
def main() :
  # setting up options
  parser = OptionParser()
  parser.add_option("--num_iter", action="store", type="int", dest="num_iterations",default=100,help="number of iterations you would like to run em+smoothed l0. Default is 50")
  parser.add_option("--initial_alpha", action="store", type="float", dest="initial_alpha",default = 0.0,help="initial_alpha,the weight of the smoothed l0 penalty. Defaut is 0 ")
  parser.add_option("--final_alpha", action="store", type="float", dest="final_alpha",default = 0.0,help="final_alpha,the weight of the smoothed l0 penalty. Defaut is 0 ")
  parser.add_option("--beta", action="store", type="float", dest="beta",default = 0.5,help="beta, the smoothness of the l0 prior, smaller the sigma, closer the approximation to true L0. Default is 0.5")
  parser.add_option("--slack", action="store_true", dest="slack_option",default = False,help="if you want to project on the simplex with slack.")
  parser.add_option("--noslack", action="store_false", dest="slack_option",default = False, help="if you want to project on the simplex with no slack. This is the regular projection approach")
  parser.add_option("--num_pgd_iterations", action="store", type="int", dest="num_pgd_iterations",default = 10,help="Number of Projected Gradient Descent to run. Default is 100")
  parser.add_option("--eta", action="store", type="float", dest="eta",default = 0.1,help="Eta, the constant step size for PGD using armijo line search. Default is 0.1")
  parser.add_option("--armijo_beta", action="store", type="float", dest="armijo_beta",default = 0.5,help="Set value for Armijo beta, the beta used in in armijo line search. Default value is 0.2")
  parser.add_option("--armijo_sigma", action="store", type="float", dest="armijo_sigma",default = 0.5,help="Set value for Armijo sigma, the sigma used in in armijo line search. Lower bound is 0.0001")
  parser.add_option("--lower_bound", action="store", type="float", dest="lower_bound",default = 0.000001,help="Set value for the lower bound on the probability.Default is 10E-6")
  parser.add_option("--cipher_data_file", action="store", type="string", dest="cipher_data_file",default ='cipher.data',help="Cipher data file for training")
  parser.add_option("--cipher_noq_file", action="store", type="string", dest="cipher_noq_file",default ='cipher.noq',help="Cipher data without quotes")
  parser.add_option("--cipher_decode_file", action="store", type="string", dest="cipher_decode_file",default ='cipher.decode',help="Cipher file for decoding")
  parser.add_option("--cipher_gold_file", action="store", type="string", dest="cipher_gold_file",default ='cipher.gold',help="The correct decipherment")
  parser.add_option("--lm", action="store", type="string", dest="lm",default ='lm.carmel',help="The lm file")
  parser.add_option("--noe_lm", action="store", type="string", dest="noe_lm",default ='lm.carmel',help="The noe lm file")
  parser.add_option("--gaussian_params_file", action="store", type="string", dest="gaussian_params_file",default =None,help="The means and variances for each gaussian letter")

  parser.add_option("--unigram_probs_file", action="store", type="string", dest="unigram_probs_file",default =None,help="The means and variances for each gaussian letter")
  parser.add_option("--std_mult", action="store", type="float", dest="std_mult",default =1.,help="The multiplier for the std ")
  parser.add_option("--u", action="store_true", dest="uniform_init",default=False)
  parser.add_option("--g", action="store_true", dest="gaussian_init",default=False)
  parser.add_option("--i", action="store_true", dest="identity_init",default=False)
  parser.add_option("--hpc", action="store_true", dest="hpc",default=False)
  parser.add_option("--full_fst", action="store_true", dest="full_fst",default=False)
  parser.add_option("--posterior_decoding", action="store_true", dest="posterior_decoding",default=False)
  
  (options, args) = parser.parse_args()

  print options
  print args
  
  #getting the values from optparse
  num_iterations = options.num_iterations
  initial_alpha = options.initial_alpha
  final_alpha = options.final_alpha
  beta = options.beta
  slack_option = options.slack_option
  num_pgd_iterations = options.num_pgd_iterations
  eta = options.eta
  armijo_beta = options.armijo_beta
  armijo_sigma = options.armijo_sigma
  lower_bound = options.lower_bound
  cipher_data_file = options.cipher_data_file
  cipher_decode_file = options.cipher_decode_file
  cipher_noq_file = options.cipher_noq_file
  cipher_gold_file = options.cipher_gold_file
  lm = options.lm
  noe_lm = options.noe_lm
  gaussian_params_file = options.gaussian_params_file
  unigram_probs_file = options.unigram_probs_file
  uniform_init = options.uniform_init
  gaussian_init = options.gaussian_init
  identity_init = options.identity_init
  hpc = options.hpc
  std_mult = options.std_mult
  full_fst = options.full_fst
  posterior_decoding_flag = options.posterior_decoding

  #setting up paramters
  fractional_counts_language = {}
  fractional_counts_channel = {}
  probabilities_channel = {}
  probabilities_language = {}
  current_probabilities = {}
  current_fractional_counts = {}
  #constraint_parameters = []
  initial_parameter_args = {}
  current_initial_parameter_args = {}
  parameter_to_index = {}
  parameter_counter = 0
  num_constraints = 1
  constraint_tags_dict = {} #this will hold the tag for the constraint. the key is the constraint id and the value is the tag corresponding to this constraint
  #beta = float(0)
  #alpha = float(0)
  global_num_parameters = 0
  init_option = ''
  current_optimization_tag = '' #this will hold the of the constraint for which we are doing the optimization

  #adding parser options
  '''
  print 'beta is ',beta
  print 'alpha is ',alpha
  print 'eta is ',eta
  raw_input()
  '''
  gold_cipher = emMethods.readCipherFile(cipher_gold_file)
  ciphertext = emMethods.readCipherFile(cipher_noq_file)
  ciphertext_with_spaces  = open(cipher_noq_file).readline().strip().split()
  print gold_cipher
  #dictionary = emMethods.createDictionary('complete.dict.new-formatted')#.small')
  #word_lines= emMethods.readWordLines('test.words.new-formatted')
  cipher_letter_dict = emMethods.getUniqCipherLetters(cipher_noq_file)
  cipher_probs = defaultdict(float)
  emMethods.getCipherLetterProbs(cipher_probs,cipher_noq_file)
  print 'cipher probs are '
  #gaussians = defaultdict(list)
  #emMethods.readGaussians(gaussians,gaussian_params_file)
  plain_unigram_probs = dict((line.strip().split()[0],float(line.strip().split()[1])) for line in open(unigram_probs_file))

  #get cipher letter probs
  del cipher_letter_dict['_']
  #word_list_five = emMethods.readWordList('TEXT.3.linear')
  #plaintext = map(chr, range(97, 123))
  plaintext_letters = []
  ciphertext_letters = []
  for k in range(65, 91):
    plaintext_letters.append(chr(k))
    ciphertext_letters.append(chr(k).lower())
  print plaintext_letters
  print 'the number of unique cipher letter is %d'%len(cipher_letter_dict.keys())
  print cipher_letter_dict
  num_cipher_letters = len(cipher_letter_dict.keys()) 
  num_plain_letters = 26
  #gold_tag_sequence = emMethods.readWordList('test.tags.new-formatted.linear')
     
  free_parameters_channel = defaultdict(lambda:defaultdict(float))
  free_parameters_language = defaultdict(lambda:defaultdict(float))
  print 'starting to create parameters'
  total_language_parameters = 0
  total_channel_parameters = 0
  #for line in cipher_lines :
    #print 'created parameters for a line'
    #(language_parameters,channel_parameters) = emMethods.getFreeParametersBigram(line,dictionary,free_parameters_language,free_parameters_channel)
  if full_fst == True:
    emMethods.getFreeCipherParametersChannel(plaintext_letters,free_parameters_channel)
    num_cipher_letters = 26
  else :
    emMethods.getFreeCipherParametersChannel(plaintext_letters,free_parameters_channel,cipher_letter_dict = cipher_letter_dict)
  temp = {'_':0.0}
  free_parameters_channel['_'] = temp
  #now, we will build all the lattices, and create a special start node and end node for every sentence
  start_node_end_node_list = []

  fractional_counts_channel = copy.deepcopy(free_parameters_channel)
  probabilities_channel = copy.deepcopy(free_parameters_channel)
  #print 'gaussians'
  #print gaussians
  #createRandomPoint(probabilities_channel)
  if (uniform_init == True) :
    print 'uniform initialization'
    emMethods.initUniformProbs(probabilities_channel)
#  emMethods.initUniformProbs(probabilities_language,probabilities_channel)
  if (gaussian_init == True) :
    print 'gaussian initialization'
    #emMethods.initFromGaussians(probabilities_channel,gaussians,cipher_probs,std_mult)

    emMethods.initFromGaussiansSingleStd(probabilities_channel,plain_unigram_probs,cipher_probs,std_mult)
  if (identity_init == True) :
    emMethods.initIdentity(probabilities_channel)
  #print 'channel probabilities after weighting are '
  #print probabilities_channel
  #raw_input()

  emMethods.writeFst('cipher.fst',probabilities_channel)
  #sys.exit()
  final_probabilities_channel = copy.deepcopy(free_parameters_channel)
  run_training = ''
  if hpc == True:
    run_training = "/home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel --train-cascade -u -M 0 -m -HJ %s %s cipher.fst"%(cipher_data_file,lm)
    run_posterior_decoding = "/home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel --train-cascade -u -M 0 -m -HJ %s %s posterior_decode.fst"%(cipher_data_file,lm)
  else :
    run_training = "/Users/avaswani/graehl/carmel/bin/macosx/carmel --train-cascade -u -M 0 -m -HJ %s %s cipher.fst"%(cipher_data_file,lm)
    run_posterior_decoding = "/Users/avaswani/graehl/carmel/bin/macosx/carmel --train-cascade -u -M 0 -m -HJ %s %s posterior_decode.fst"%(cipher_data_file,lm)


  #running the EM iterations
  #we are creating the indexes for algencan . Notice that here, the probabilities language is already uniform and therefore none of them will be zero
  index_to_parameter = {}
  createParametersForScaling(probabilities_channel,parameter_to_index,index_to_parameter)
  start_time = time.clock()
  print 'start time was ',start_time
  #fractional_counts_dump_file = open('fractional.params','w')
  #probabilities_dump_file = open('probs.params','w')
  #optimized_probabilities_dump_file = open('probs.optimized.params','w')
  alpha_delta = (final_alpha-initial_alpha)/(num_iterations-1)
  current_alpha = initial_alpha
  for i in range (0,num_iterations) :
    print 'the iteration number was ',i
    #this will create the parameters
    total_corpus_probability = 0.0
    (status,output) = commands.getstatusoutput(run_training)  
    print 'we just ran the training'
    print output
    print status
    prob_match = probability_re.search(output)
    print 'current alpha is ',current_alpha
    if prob_match == None :
      print'we should have found a probability'
    else :
      print 'the probability is %s'%prob_match.group(1)
    temp_corpus_probability = float(prob_match.group(1)[2:len(prob_match.group(1))])
    total_corpus_probability = 0.693147181 * temp_corpus_probability
    print 'reading language fractional counts'
  
    emMethods.readCarmelFractionalCounts('cipher.fst.trained',fractional_counts_channel,'channel')
    print 'read the fst'
      
    print' the probability of the corpus was %f' %total_corpus_probability
    print 'we are now checking the accuracies'
    noe_command = 'cat tagging.fsa | sed \'s/*e*//g\' > tagging.fsa.noe'
    (status,output) = commands.getstatusoutput(noe_command)
    print 'we wrote the noe fsa'
    if hpc == True:
      viterbi_command = "cat %s | /home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel  -u  -srbk 1 -QEWI %s  cipher.fst > decipherment_output"%(cipher_decode_file,noe_lm)
    else :
      viterbi_command = "cat %s | /Users/avaswani/graehl/carmel/bin/macosx/carmel  -u  -srbk 1 -QEWI %s  cipher.fst > decipherment_output"%(cipher_decode_file,noe_lm)

    (status,output) = commands.getstatusoutput(viterbi_command)
    print 'status',status
    print 'output',output
    #tagged_sequence = emMethods.readTaggingOutput('tagging_output')  
    deciphered_sequence = emMethods.readCipherFile('decipherment_output')
    print 'length of deciphered sequence was ',len(deciphered_sequence)
    accuracy = emMethods.calcAccuracy(gold_cipher,deciphered_sequence)

    print 'The accuracy was %s and the objective function value was %s'%(str(accuracy),str(evaluateObjectiveFuncValue(total_corpus_probability,probabilities_language,probabilities_channel,current_alpha,beta)))
      
    expected_counts = zeros(shape=(num_plain_letters,num_plain_letters))
    #current_fractional_counts = fractional_counts_channel
    #first populating the expected counts and probabilities matrix
    #for i,plain_letter in enumerate(probabilities_channel.keys()) :
    emMethods.dictionaryToArray(expected_counts,fractional_counts_channel,parameter_to_index)
    '''
    print 'Doing posterior decoding'
    # decode
    m = Munkres()
    #J is not a matrix. its pairs of coordinates
    J = m.compute(-expected_counts)
    key = dict((index_to_parameter[plaintext_letters[n]][J[n][1]],plaintext_letters[J[n][0]]) for n in range(len(ciphertext_letters)))
    print 'the key was ',key
    posterior_decoded_string = []
    for letter in ciphertext:
      if letter == '_':
        posterior_decoded_string.append('_')
      else :
        posterior_decoded_string.append(key[letter])
    posterior_decoded_accuracy = emMethods.calcAccuracy(gold_cipher,posterior_decoded_string)
    print 'The posterior decoded accuracy was',posterior_decoded_accuracy
    '''
    pdgRowAndColumnConstraints(probabilities_channel,parameter_to_index,num_pgd_iterations,current_alpha,beta,eta,lower_bound,armijo_beta,armijo_sigma,num_plain_letters,num_cipher_letters,expected_counts=expected_counts)

    #now writing the fsa back again
    emMethods.writeFst('cipher.fst',probabilities_channel)
  
    #if the user has asked for posterior decoding, then we must do it
    if posterior_decoding_flag == True:
      print 'running command',run_posterior_decoding
      emMethods.writePosteriorDecodingFST(ciphertext_with_spaces,probabilities_channel,'posterior_decode.fst')
      (status,output) = commands.getstatusoutput(run_posterior_decoding)  
      print 'we just ran posterior decoding'
      print output
      print status
      prob_match = probability_re.search(output)
      if prob_match == None :
        print'we should have found a probability in posterior decoding'
      else :
        print 'the posterior decoding probability is %s'%prob_match.group(1)
      temp_corpus_probability = float(prob_match.group(1)[2:len(prob_match.group(1))])
      total_corpus_probability = 0.693147181 * temp_corpus_probability
      print 'the posterior decoding probability in base e is',total_corpus_probability
      print 'Getting the posterior decode'
      posterior_decode = emMethods.getPosteriorDecode('posterior_decode.fst.trained')
      print 'the posterior decode was ',posterior_decode
      posterior_decode_accuracy = emMethods.calcAccuracy(gold_cipher,posterior_decode)
      print 'The posterior decoded accuracy was ',posterior_decode_accuracy


    #print 'checking the initial zeros in channel model'
    #checkZeros(probabilities_channel)
  
    fractional_counts_channel = copy.deepcopy(free_parameters_channel)
    final_probabilities_channel = copy.deepcopy(probabilities_channel)
    print 'at the end of the iteration'
    current_alpha += alpha_delta

  elapsed_time = time.clock() - start_time
  print 'the elapsed time was ',elapsed_time
def main() :
	global fractional_counts_language ,fractional_counts_channel,probabilities_channel,probabilities_language,sigma,alpha,current_fractional_counts,current_optimization_params,init_option,initial_parameter_args,constraint_tags_dict,parameter_counter,current_optimization_tag

	num_iterations = int(sys.argv[1])
	alpha = float(sys.argv[2])
	sigma = float(sys.argv[3])

	dictionary = emMethods.createDictionary('complete.dict.new-formatted')#.small')
	word_lines= emMethods.readWordLines('test.words.new-formatted')

	#word_list_five = emMethods.readWordList('TEXT.3.linear')
	gold_tag_sequence = emMethods.readWordList('test.tags.new-formatted.linear')

	free_parameters_channel = {}
	free_parameters_language = {}
	print 'starting to create parameters'
	total_language_parameters = 0
	total_channel_parameters = 0
	for line in word_lines :
		#print 'created parameters for a line'
		(language_parameters,channel_parameters) = emMethods.getFreeParametersBigram(line,dictionary,free_parameters_language,free_parameters_channel)
		#print language_parameters
		#print channel_parameters

		total_language_parameters += language_parameters
		total_channel_parameters += channel_parameters
	print 'total language parameters is %d' %(total_language_parameters)
	print 'total channel parameters is %d' %(total_channel_parameters)
	#now, we will build all the lattices, and create a special start node and end node for every sentence
	start_node_end_node_list = []
	print 'constraint_lengths are being printed'
	for tag in free_parameters_language.keys() :
		print len(free_parameters_language[tag].keys())
	#raw_input()
	sys.exit()
#	print len(word_list)
#	num_taggings = emMethods.getNumTaggings(word_list,dictionary)
#	print 'num_taggings '
#	print type(num_taggings)
	#print num_taggings
	fractional_counts_language = copy.deepcopy(free_parameters_language)
	fractional_counts_channel = copy.deepcopy(free_parameters_channel)
	probabilities_channel = copy.deepcopy(free_parameters_channel)
	probabilities_language = copy.deepcopy(free_parameters_language)
	emMethods.initUniformProbs(probabilities_channel,probabilities_language)
#	emMethods.initUniformProbs(probabilities_language,probabilities_channel)
	emMethods.writeFsa('tagging.fsa',probabilities_language)
	emMethods.writeFst('tagging.fst',probabilities_channel)


	run_training = r'./carmel.static --train-cascade -M 0 -m -HJ test.words.new-formatted.training tagging.fsa tagging.fst'
#		skel_size += len(col)
	#running the EM iterations
	#we are creating the indexes for algencan . Notice that here, the probabilities language is already uniform and therefore none of them will be zero
	createParametersForScaling(probabilities_language)
	for i in range (0,num_iterations) :
		'''
		print 'checking the initial zeros inlanguage'
		checkZeros(probabilities_language)
	
		print 'checking the initial zeros in channel'
		checkZeros(probabilities_channel)
		'''
		#best_tag_sequence = emMethods.viterbiSearch(start,end,probabilities_channel,probabilities_language,lattice_skeleton)
		#emMethods.calcAccuracy(gold_tag_sequence,best_tag_sequence)
		#raw_input()
		#this will create the parameters
		total_corpus_probability = 0.0
		(status,output) = commands.getstatusoutput(run_training)	
		print 'we just ran the training'
		prob_match = probability_re.search(output)
		if prob_match == None :
			print'we should have found a probability'
		else :
			print 'the probability is %s'%prob_match.group(1)
		total_corpus_probability = float(prob_match.group(1)[2:len(prob_match.group(1))])
		print 'reading language fractional counts'
		emMethods.readCarmelFractionalCounts('tagging.fsa.trained',fractional_counts_language,'bigram')
		print 'read the fsa'
		print 'reading channel fractional counts'
		emMethods.readCarmelFractionalCounts('tagging.fst.trained',fractional_counts_channel,'channel')
		print 'read the fst'
			
		print' the probability of the corpus was %f' %total_corpus_probability
		print 'we are now checking the accuracies'
		noe_command = 'cat tagging.fsa | sed \'s/*e*//g\' > tagging.fsa.noe'
		(status,output) = commands.getstatusoutput(noe_command)
		print 'we wrote the noe fsa'
		viterbi_command = r'cat test.words.new-formatted.quotes | ./carmel.static -srbk -QEWI 1 tagging.fsa.noe tagging.fst > tagging_output'
		(status,output) = commands.getstatusoutput(viterbi_command)
		tagged_sequence = emMethods.readTaggingOutput('tagging_output')	
		accuracy = emMethods.calcAccuracy(gold_tag_sequence,tagged_sequence)
		print 'The accuracy was %s and the objective function value was %s'%(str(accuracy),str(evaluateObjectiveFuncValue(total_corpus_probability,probabilities_language,probabilities_channel,alpha,sigma)))

		#emMethods.reEstimateProbabilities(probabilities_channel,probabilities_language,fractional_counts_channel,fractional_counts_language)
		#fractional_counts_language = copy.deepcopy(free_parameters_language)
		#fractional_counts_channel = copy.deepcopy(free_parameters_channel)

		#first optimizing the tag bigrams and doing it per parameter
		for tag in initial_parameter_args.keys() :
			if len(initial_parameter_args[tag].keys()) == 1 :
				continue
			#current_initial_parameter_args = initial_parameter_args[tag]
			current_optimization_tag = tag
			parameter_counter = len(initial_parameter_args[tag].keys())			
			current_fractional_counts = fractional_counts_language
			constraint_tags_dict[1] = tag
			temp_language_probs = dict(probabilities_language)
			#optimizing per constraint
			init_option = 'current_prob'
			current_optimization_params = 'tag_bigrams'
			algencan.solvers(evalf,evalg,evalh,evalc,evaljac,evalhc,evalfc,evalgjac,evalhl,evalhlp,inip,endp)
		
			
			
			#I should check if the objective function is increasing 
			language_probs_after_init_current_prob = copy.deepcopy(probabilities_language)
			#obj_val1 = evaluateObjectiveFuncValue(total_corpus_probability,probabilities_language,probabilities_channel,alpha,sigma)
			total_obj_val1 = evaluateObjectiveFuncValue(total_corpus_probability,probabilities_language,probabilities_channel,alpha,sigma)
	
			obj_val1 = evaluateOptimizationFunction(initial_parameter_args,probabilities_language,fractional_counts_language,alpha,sigma) 
			print 'the function value was obj 1 %f'%obj_val1
   			#emMethods.clearAlphaBeta(lattice_skeleton)
	
	
			init_option = 'zeros'
			current_optimization_params = 'tag_bigrams'
			algencan.solvers(evalf,evalg,evalh,evalc,evaljac,evalhc,evalfc,evalgjac,evalhl,evalhlp,inip,endp)
		
			
	
			language_probs_after_init_zeros = copy.deepcopy(probabilities_language)
			#obj_val2 = evaluateObjectiveFuncValue(total_corpus_probability,probabilities_language,probabilities_channel,alpha,sigma)a
			
			total_obj_val2 = evaluateObjectiveFuncValue(total_corpus_probability,probabilities_language,probabilities_channel,alpha,sigma)
			obj_val2 = evaluateOptimizationFunction(initial_parameter_args,probabilities_language,fractional_counts_language,alpha,sigma) 
			print 'the function value was obj 2 %f'%obj_val2
   			#emMethods.clearAlphaBeta(lattice_skeleton)
	
			if (total_obj_val1 >= total_obj_val2) :
				#init_option = 'current_prob'
				#current_optimization_params = 'tag_bigrams'
				if (obj_val1 < obj_val2) :
					print 'the final objective function value was opposite'
	
				probabilities_language = copy.deepcopy(language_probs_after_init_current_prob)
				#algencan.solvers(evalf,evalg,evalh,evalc,evaljac,evalhc,evalfc,evalgjac,evalhl,evalhlp,inip,endp)
		
				print 'the final objective function value was obj 1 %f'%total_obj_val1
			
			else :
				#init_option = 'zeros'
				#current_optimization_params = 'tag_bigrams'
				probabilities_language = copy.deepcopy(language_probs_after_init_zeros)
				#algencan.solvers(evalf,evalg,evalh,evalc,evaljac,evalhc,evalfc,evalgjac,evalhl,evalhlp,inip,endp)
		
				if (obj_val2 < obj_val1) :
					print 'the final objective function value was opposite'
	
				print 'the final objective function value was obj 2 %f'%total_obj_val2
				#raw_input()

#		emMethods.reEstimateProbabilities(probabilities_channel,probabilities_language,fractional_counts_channel,fractional_counts_language)
		emMethods.reEstimateProbabilities(probabilities_channel,probabilities_language,fractional_counts_channel,fractional_counts_language)
		print 'writing the fsa'
		#now writing the fsa back again
		emMethods.writeFsa('tagging.fsa',probabilities_language)
		emMethods.writeFst('tagging.fst',probabilities_channel)
		'''
		noe_command = 'cat tagging.fsa | sed \'s/*e*//g\' > tagging.fsa.noe'
		(status,output) = commands.getstatusoutput(noe_command)
		print 'we wrote the noe fsa'
		viterbi_command = r'cat test.words.new-formatted.quotes | ./carmel.static -srbk -QEWI 1 tagging.fsa.noe tagging.fst > tagging_output'
		(status,output) = commands.getstatusoutput(viterbi_command)
		tagged_sequence = emMethods.readTaggingOutput('tagging_output')	
		emMethods.calcAccuracy(gold_tag_sequence,tagged_sequence)
		'''
		print 'checking the zeros in tag bigram model'
		checkZeros(probabilities_language)
	
		print 'checking the initial zeros in channel model'
		checkZeros(probabilities_channel)

		#fractional_counts_language.clear()
		#fractional_counts_channel.clear()
		fractional_counts_language = copy.deepcopy(free_parameters_language)
		fractional_counts_channel = copy.deepcopy(free_parameters_channel)
		#probabilities_language = copy.deepcopy(free_parameters_language)
		probabilities_channel = copy.deepcopy(free_parameters_channel)
def main() :
	global fractional_counts_language ,fractional_counts_channel,probabilities_channel,sigma,alpha,current_fractional_counts,current_optimization_params,init_option

	num_iterations = int(sys.argv[1])
	alpha = float(sys.argv[2])
	sigma = float(sys.argv[3])

#	dictionary = emMethods.createDictionary('DICT2')#.small')
#	word_list = emMethods.readWordList('TEXT.linear')
	#word_list_five = emMethods.readWordList('TEXT.5.linear')
#	gold_tag_sequence = emMethods.readWordList('GOLD.linear')
	gold_cipher = emMethods.readCipherFile('cipher.gold.noq')
	print gold_cipher
	#dictionary = emMethods.createDictionary('complete.dict.new-formatted')#.small')
	#word_lines= emMethods.readWordLines('test.words.new-formatted')
	cipher_letter_dict = emMethods.getUniqCipherLetters('cipher.data.noq')
	#word_list_five = emMethods.readWordList('TEXT.3.linear')
	#plaintext = map(chr, range(97, 123))
	plaintext = []
	for k in range(65, 91):
		plaintext.append(chr(k))
	print plaintext
	print 'the number of unique cipher letter is %d'%len(cipher_letter_dict.keys())
	print cipher_letter_dict
	#gold_tag_sequence = emMethods.readWordList('test.tags.new-formatted.linear')
 		
	free_parameters_channel = {}
	free_parameters_language = {}
	print 'starting to create parameters'
	total_language_parameters = 0
	total_channel_parameters = 0
	#for line in cipher_lines :
		#print 'created parameters for a line'
		#(language_parameters,channel_parameters) = emMethods.getFreeParametersBigram(line,dictionary,free_parameters_language,free_parameters_channel)
	emMethods.getFreeCipherParametersChannel(cipher_letter_dict,plaintext,free_parameters_channel)
	temp = {'_':0.0}
	free_parameters_channel['_'] = temp
	#print free_parameters_channel
	#sys.exit()
		#print language_parameters
		#print channel_parameters

		#total_language_parameters += language_parameters
		#total_channel_parameters += channel_parameters
	#print 'total language parameters is %d' %(total_language_parameters)
	#print 'total channel parameters is %d' %(total_channel_parameters)
	#now, we will build all the lattices, and create a special start node and end node for every sentence
	start_node_end_node_list = []

#	print len(word_list)
#	num_taggings = emMethods.getNumTaggings(word_list,dictionary)
#	print 'num_taggings '
#	print type(num_taggings)
	#print num_taggings
	fractional_counts_channel = copy.deepcopy(free_parameters_channel)
	probabilities_channel = copy.deepcopy(free_parameters_channel)
	emMethods.initUniformProbs(probabilities_channel)
#	emMethods.initUniformProbs(probabilities_language,probabilities_channel)
	emMethods.writeFst('cipher.fst',probabilities_channel)


	run_training = r'./carmel.static --train-cascade -M 0 -m -HJ cipher.data cipher.wfsa cipher.fst'
#		skel_size += len(col)
	#running the EM iterations
	for i in range (0,num_iterations) :
		'''
		print 'checking the initial zeros inlanguage'
		checkZeros(probabilities_language)
	
		print 'checking the initial zeros in channel'
		checkZeros(probabilities_channel)
		'''
		#best_tag_sequence = emMethods.viterbiSearch(start,end,probabilities_channel,probabilities_language,lattice_skeleton)
		#emMethods.calcAccuracy(gold_tag_sequence,best_tag_sequence)
		#raw_input()
		#this will create the parameters
		total_corpus_probability = 0.0
		(status,output) = commands.getstatusoutput(run_training)	
		print 'we just ran the training'
		prob_match = probability_re.search(output)
		if prob_match == None :
			print'we should have found a probability'
		else :
			print 'the probability is %s'%prob_match.group(1)
		total_corpus_probability = float(prob_match.group(1)[2:len(prob_match.group(1))])
		print 'reading channel fractional counts'
		emMethods.readCarmelFractionalCounts('cipher.fst.trained',fractional_counts_channel,'channel')
		print 'read the fst'
			
		print' the probability of the corpus was %f' %total_corpus_probability
		print 'we are now checking the accuracies'
		noe_command = 'cat tagging.fsa | sed \'s/*e*//g\' > tagging.fsa.noe'
		(status,output) = commands.getstatusoutput(noe_command)
		print 'we wrote the noe fsa'
		viterbi_command = r'cat cipher.data | ./carmel.static -srbk -QEWI 1 cipher.wfsa.noe cipher.fst > decipherment_output'
		(status,output) = commands.getstatusoutput(viterbi_command)
		#tagged_sequence = emMethods.readTaggingOutput('tagging_output')	
		deciphered_sequence = emMethods.readCipherFile('decipherment_output')
		accuracy = emMethods.calcAccuracy(gold_cipher,deciphered_sequence)

		print 'The accuracy was %s and the objective function value was %s'%(str(accuracy),str(evaluateObjectiveFuncValue(total_corpus_probability,probabilities_channel,alpha,sigma)))


		#first optimizing the channel
		current_fractional_counts = fractional_counts_channel
		createParameters(probabilities_channel,current_fractional_counts,free_parameters_channel,alpha,sigma)
		temp_channel_probs = dict(probabilities_channel)	
		init_option = 'current_prob'
		current_optimization_params = 'channel'
		algencan.solvers(evalf,evalg,evalh,evalc,evaljac,evalhc,evalfc,evalgjac,evalhl,evalhlp,inip,endp)
	
		
		
		#I should check if the objective function is increasing 
		channel_probs_after_init_current_prob = copy.deepcopy(probabilities_channel)
		#obj_val1 = evaluateObjectiveFuncValue(total_corpus_probability,probabilities_language,probabilities_channel,alpha,sigma)
		total_obj_val1 = evaluateObjectiveFuncValue(total_corpus_probability,probabilities_channel,alpha,sigma)

		obj_val1 = evaluateOptimizationFunction(initial_parameter_args,probabilities_channel,fractional_counts_channel,alpha,sigma) 
		print 'the function value was obj 1 %f'%obj_val1
   		#emMethods.clearAlphaBeta(lattice_skeleton)


		init_option = 'zeros'
		current_optimization_params = 'channel'
		algencan.solvers(evalf,evalg,evalh,evalc,evaljac,evalhc,evalfc,evalgjac,evalhl,evalhlp,inip,endp)
	
		channel_probs_after_init_zeros = copy.deepcopy(probabilities_channel)
		#obj_val2 = evaluateObjectiveFuncValue(total_corpus_probability,probabilities_language,probabilities_channel,alpha,sigma)a
		
		total_obj_val2 = evaluateObjectiveFuncValue(total_corpus_probability,probabilities_channel,alpha,sigma)
		obj_val2 = evaluateOptimizationFunction(initial_parameter_args,probabilities_channel,fractional_counts_channel,alpha,sigma) 
		print 'the function value was obj 2 %f'%obj_val2
   		#emMethods.clearAlphaBeta(lattice_skeleton)

		if (total_obj_val1 >= total_obj_val2) :
			#init_option = 'current_prob'
			#current_optimization_params = 'tag_bigrams'
			if (obj_val1 < obj_val2) :
				print 'the final objective function value was opposite'

			probabilities_channel = copy.deepcopy(channel_probs_after_init_current_prob)
			#algencan.solvers(evalf,evalg,evalh,evalc,evaljac,evalhc,evalfc,evalgjac,evalhl,evalhlp,inip,endp)
	

			print 'the final objective function value was obj 1 %f'%total_obj_val1
		
		else :
			#init_option = 'zeros'
			#current_optimization_params = 'tag_bigrams'
			probabilities_channel = copy.deepcopy(channel_probs_after_init_zeros)
	
	
			if (obj_val2 < obj_val1) :
				print 'the final objective function value was opposite'

			print 'the final objective function value was obj 2 %f'%total_obj_val2
			#raw_input()

#		emMethods.reEstimateProbabilities(probabilities_channel,probabilities_language,fractional_counts_channel,fractional_counts_language)
		#emMethods.reEstimateProbabilities(probabilities_channel,probabilities_language,fractional_counts_channel,fractional_counts_language)
		#print 'writing the fsa'
		#now writing the fsa back again
		#emMethods.writeFsa('tagging.fsa',probabilities_language)
		emMethods.writeFst('cipher.fst',probabilities_channel)
		#print 'checking the zeros in tag bigram model'
		#checkZeros(probabilities_language)
	
		print 'checking the zeros in channel model'
		checkZeros(probabilities_channel)

		#fractional_counts_language.clear()
		#fractional_counts_channel.clear()
		#fractional_counts_language = copy.deepcopy(free_parameters_language)
		fractional_counts_channel = copy.deepcopy(free_parameters_channel)
def main() :
	global fractional_counts_language ,fractional_counts_channel,probabilities_channel,probabilities_language,beta,alpha,current_fractional_counts,current_optimization_params,init_option,initial_parameter_args,constraint_tags_dict,parameter_counter,current_optimization_tag,slack_option
	
	#adding parser options


	parser = OptionParser()
	parser.add_option("--num_iter", action="store", type="int", dest="num_iterations",default=100,help="number of iterations you would like to run em+smoothed l0. Default is 50")
	parser.add_option("--alpha", action="store", type="float", dest="alpha",default = 0.0,help="alpha,the weight of the smoothed l0 penalty. Defaut is 0 ")
	parser.add_option("--beta", action="store", type="float", dest="beta",default = 0.5,help="beta, the smoothness of the l0 prior, smaller the sigma, closer the approximation to true L0. Default is 0.5")
	parser.add_option("--slack", action="store_true", dest="slack_option",default = False,help="if you want to project on the simplex with slack.")
	parser.add_option("--noslack", action="store_false", dest="slack_option",default = False, help="if you want to project on the simplex with no slack. This is the regular projection approach")
	parser.add_option("--num_pgd_iterations", action="store", type="int", dest="num_pgd_iterations",default = 50,help="Number of Projected Gradient Descent to run. Default is 100")
	parser.add_option("--eta", action="store", type="float", dest="eta",default = 0.5,help="Eta, the constant step size for PGD using armijo line search. Default is 0.1")
	parser.add_option("--armijo_beta", action="store", type="float", dest="armijo_beta",default = 0.5,help="Set value for Armijo beta, the beta used in in armijo line search. Default value is 0.2")
	parser.add_option("--armijo_sigma", action="store", type="float", dest="armijo_sigma",default = 0.5,help="Set value for Armijo sigma, the sigma used in in armijo line search. Lower bound is 0.0001")
	parser.add_option("--lower_bound", action="store", type="float", dest="lower_bound",default = 0.000001,help="Set value for the lower bound on the probability.Default is 10E-6")
	(options, args) = parser.parse_args()

	#print options
	#print args
	
	#getting the values from optparse
	num_iterations = options.num_iterations
	alpha = options.alpha
	beta = options.beta
	slack_option = options.slack_option
	num_pgd_iterations = options.num_pgd_iterations
	eta = options.eta
	armijo_beta = options.armijo_beta
	armijo_sigma = options.armijo_sigma
	lower_bound = options.lower_bound

	gold_cipher = emMethods.readCipherFile('cipher.gold.noq')
	print gold_cipher
	#dictionary = emMethods.createDictionary('complete.dict.new-formatted')#.small')
	#word_lines= emMethods.readWordLines('test.words.new-formatted')
	cipher_letter_dict = emMethods.getUniqCipherLetters('cipher.data.noq')
	#word_list_five = emMethods.readWordList('TEXT.3.linear')
	#plaintext = map(chr, range(97, 123))
	plaintext = []
	for k in range(65, 91):
		plaintext.append(chr(k))
	print plaintext
	print 'the number of unique cipher letter is %d'%len(cipher_letter_dict.keys())
	print cipher_letter_dict
	#gold_tag_sequence = emMethods.readWordList('test.tags.new-formatted.linear')
 		
	free_parameters_channel = {}
	free_parameters_language = {}
	print 'starting to create parameters'
	total_language_parameters = 0
	total_channel_parameters = 0
	#for line in cipher_lines :
		#print 'created parameters for a line'
		#(language_parameters,channel_parameters) = emMethods.getFreeParametersBigram(line,dictionary,free_parameters_language,free_parameters_channel)
	emMethods.getFreeCipherParametersChannel(cipher_letter_dict,plaintext,free_parameters_channel)
	temp = {'_':0.0}
	free_parameters_channel['_'] = temp
	#print free_parameters_channel
	#sys.exit()
		#print language_parameters
		#print channel_parameters

		#total_language_parameters += language_parameters
		#total_channel_parameters += channel_parameters
	#print 'total language parameters is %d' %(total_language_parameters)
	#print 'total channel parameters is %d' %(total_channel_parameters)
	#now, we will build all the lattices, and create a special start node and end node for every sentence
	start_node_end_node_list = []

#	print len(word_list)
#	num_taggings = emMethods.getNumTaggings(word_list,dictionary)
#	print 'num_taggings '
#	print type(num_taggings)
	#print num_taggings
	fractional_counts_channel = copy.deepcopy(free_parameters_channel)
	probabilities_channel = copy.deepcopy(free_parameters_channel)

	createRandomPoint(probabilities_channel)
	#emMethods.initUniformProbs(probabilities_channel)
#	emMethods.initUniformProbs(probabilities_language,probabilities_channel)
	emMethods.writeFst('cipher.fst',probabilities_channel)

	final_probabilities_channel = copy.deepcopy(free_parameters_channel)

	run_training = r'./carmel --train-cascade -M 0 -m -HJ cipher.data cipher.wfsa cipher.fst'

	#running the EM iterations
	#we are creating the indexes for algencan . Notice that here, the probabilities language is already uniform and therefore none of them will be zero
	createParametersForScaling(probabilities_channel)
	start_time = time.clock()
	print 'start time was ',start_time
	#fractional_counts_dump_file = open('fractional.params','w')
	#probabilities_dump_file = open('probs.params','w')
	#optimized_probabilities_dump_file = open('probs.optimized.params','w')
	for i in range (0,num_iterations) :
		print 'the iteration number was ',i
		'''
		print 'checking the initial zeros inlanguage'
		checkZeros(probabilities_language)
	
		print 'checking the initial zeros in channel'
		checkZeros(probabilities_channel)
		'''
		#best_tag_sequence = emMethods.viterbiSearch(start,end,probabilities_channel,probabilities_language,lattice_skeleton)
		#emMethods.calcAccuracy(gold_tag_sequence,best_tag_sequence)
		#raw_input()
		#this will create the parameters
		total_corpus_probability = 0.0
		(status,output) = commands.getstatusoutput(run_training)	
		print 'we just ran the training'
		print output
		prob_match = probability_re.search(output)
		if prob_match == None :
			print'we should have found a probability'
		else :
			print 'the probability is %s'%prob_match.group(1)
		temp_corpus_probability = float(prob_match.group(1)[2:len(prob_match.group(1))])
		total_corpus_probability = 0.693147181 * temp_corpus_probability
		print 'reading language fractional counts'
	
		emMethods.readCarmelFractionalCounts('cipher.fst.trained',fractional_counts_channel,'channel')
		print 'read the fst'
			
		print' the probability of the corpus was %f' %total_corpus_probability
		print 'we are now checking the accuracies'
		noe_command = 'cat tagging.fsa | sed \'s/*e*//g\' > tagging.fsa.noe'
		(status,output) = commands.getstatusoutput(noe_command)
		print 'we wrote the noe fsa'
		viterbi_command = r'cat cipher.data.quotes | ./carmel -srbk 1 -QEWI cipher.wfsa.noe cipher.fst > decipherment_output'
		(status,output) = commands.getstatusoutput(viterbi_command)
		#tagged_sequence = emMethods.readTaggingOutput('tagging_output')	
		deciphered_sequence = emMethods.readCipherFile('decipherment_output')
		accuracy = emMethods.calcAccuracy(gold_cipher,deciphered_sequence)

		print 'The accuracy was %s and the objective function value was %s'%(str(accuracy),str(evaluateObjectiveFuncValue(total_corpus_probability,probabilities_channel,probabilities_language,alpha,beta)))
			

		#emMethods.reEstimateProbabilities(probabilities_channel,probabilities_language,fractional_counts_channel,fractional_counts_language)
		#fractional_counts_language = copy.deepcopy(free_parameters_language)
		#fractional_counts_channel = copy.deepcopy(free_parameters_channel)

		#first optimizing the tag bigrams and doing it per parameter
		for tag in initial_parameter_args.keys() :
			print 'we are starting a new tag'

			if len(initial_parameter_args[tag].keys()) == 1 :
				continue
			print 'we are currently optimizing for tag', tag
			#current_initial_parameter_args = initial_parameter_args[tag]
			current_optimization_tag = tag


			parameter_counter = len(initial_parameter_args[tag].keys())			
			current_fractional_counts = fractional_counts_channel
			constraint_tags_dict[1] = tag
			temp_language_probs = dict(probabilities_channel)
			#optimizing per constraint
			init_option = 'current_prob'
			current_optimization_params = 'channel'
			#creating the initial vector to pass to the projected gradient descent algorithm

			x=zeros(parameter_counter)
			expected_counts=zeros(parameter_counter)
			expected_counts_sum = 0.
			for next_tag in initial_parameter_args[current_optimization_tag].keys() :
				if initial_parameter_args[current_optimization_tag][next_tag] ==  'ONE' :
					continue
				else :
					parameter_number = initial_parameter_args[current_optimization_tag][next_tag]
					x[parameter_number] = probabilities_channel[current_optimization_tag][next_tag]	
					expected_counts[parameter_number] = current_fractional_counts[current_optimization_tag][next_tag]
					expected_counts_sum +=  current_fractional_counts[current_optimization_tag][next_tag]
					
			print 'expected counts sum was ',expected_counts_sum
			if expected_counts_sum <= 0.5 :
				#set all the probabilities to 0
				#for next_tag in initial_parameter_args[current_optimization_tag] :
				#	new_probabilities = zeros(len(initial_parameter_args[current_optimization_tag].keys()))
				#	assignProbs(new_probabilities,probabilities_channel)
				continue
			#raw_input()			
			#the optimization steps have to come here
			#print 'we are optimizing the tag bigrams now ' 

			#current_eta = eta_0 #/sqrt(num_iterations+1)
			print 'Doing projected gradient descent'
			new_probabilities = projectedGradientDescentWithArmijoRule(x = x,expected_counts = expected_counts,num_pgd_iterations = num_pgd_iterations,eta = eta,lower_bound = lower_bound,armijo_beta = armijo_beta,armijo_sigma = armijo_sigma)
			print 'finished projected gradient descent'
			#new_probabilities = newtonProjectedGradientDescentWithArmijoRule(x,num_pgd_iterations,eta,lower_bound,armijo_beta,armijo_sigma)
			#print new_probabilities
			#raw_input()
			if current_optimization_params == 'tag_bigrams' :
				#print 'we are replacing bigram probs'
				assignProbs(new_probabilities,probabilities_language)
		#	checkZeros(probabilities_language)
			else :
				print 'we are replacing channel probs'
				assignProbs(new_probabilities,probabilities_channel)
	
			
		#now writing the fsa back again
#		emMethods.writeFsa('tagging.fsa',probabilities_language)
#		emMethods.writeFst('tagging.fst',probabilities_channel)

		emMethods.writeFst('cipher.fst',probabilities_channel)
		#emMethods.writeFstLnFormat('tagging.fst',probabilities_channel)


		print 'checking the initial zeros in channel model'
		checkZeros(probabilities_channel)
	
		#fractional_counts_language.clear()
		#fractional_counts_channel.clear()

		fractional_counts_channel = copy.deepcopy(free_parameters_channel)
		final_probabilities_channel = copy.deepcopy(probabilities_channel)
		print 'at the end of the iteration'
	#raw_input()	

	elapsed_time = time.clock() - start_time
	print 'the elapsed time was ',elapsed_time
def main() :
  # setting up options
  parser = OptionParser()
  parser.add_option("--num_iter", action="store", type="int", dest="num_iterations",default=100,help="number of iterations you would like to run em+smoothed l0. Default is 50")
  parser.add_option("--num_noise_samples", action="store", type="int", dest="num_noise_samples",default=10,help="number of noise samples. Default is 10")
  parser.add_option("--initial_alpha", action="store", type="float", dest="initial_alpha",default = 0.0,help="initial_alpha,the weight of the smoothed l0 penalty. Defaut is 0 ")
  parser.add_option("--final_alpha", action="store", type="float", dest="final_alpha",default = 0.0,help="final_alpha,the weight of the smoothed l0 penalty. Defaut is 0 ")
  parser.add_option("--beta", action="store", type="float", dest="beta",default = 0.5,help="beta, the smoothness of the l0 prior, smaller the sigma, closer the approximation to true L0. Default is 0.5")
  parser.add_option("--slack", action="store_true", dest="slack_option",default = False,help="if you want to project on the simplex with slack.")
  parser.add_option("--noslack", action="store_false", dest="slack_option",default = False, help="if you want to project on the simplex with no slack. This is the regular projection approach")
  parser.add_option("--num_pgd_iterations", action="store", type="int", dest="num_pgd_iterations",default = 10,help="Number of Projected Gradient Descent to run. Default is 100")
  parser.add_option("--eta", action="store", type="float", dest="eta",default = 0.1,help="Eta, the constant step size for PGD using armijo line search. Default is 0.1")
  parser.add_option("--armijo_beta", action="store", type="float", dest="armijo_beta",default = 0.5,help="Set value for Armijo beta, the beta used in in armijo line search. Default value is 0.2")
  parser.add_option("--armijo_sigma", action="store", type="float", dest="armijo_sigma",default = 0.5,help="Set value for Armijo sigma, the sigma used in in armijo line search. Lower bound is 0.0001")
  parser.add_option("--lower_bound", action="store", type="float", dest="lower_bound",default = 0.000001,help="Set value for the lower bound on the probability.Default is 10E-6")
  parser.add_option("--cipher_data_file", action="store", type="string", dest="cipher_data_file",default ='cipher.data',help="Cipher data file for training")
  parser.add_option("--cipher_noq_file", action="store", type="string", dest="cipher_noq_file",default ='cipher.noq',help="Cipher data without quotes")
  parser.add_option("--cipher_decode_file", action="store", type="string", dest="cipher_decode_file",default ='cipher.decode',help="Cipher file for decoding")
  parser.add_option("--cipher_gold_file", action="store", type="string", dest="cipher_gold_file",default ='cipher.gold',help="The correct decipherment")
  parser.add_option("--lm", action="store", type="string", dest="lm",default ='lm.carmel',help="The lm file")
  parser.add_option("--noe_lm", action="store", type="string", dest="noe_lm",default ='lm.carmel',help="The noe lm file")
  parser.add_option("--gaussian_params_file", action="store", type="string", dest="gaussian_params_file",default =None,help="The means and variances for each gaussian letter")

  parser.add_option("--unigram_probs_file", action="store", type="string", dest="unigram_probs_file",default =None,help="The means and variances for each gaussian letter")
  parser.add_option("--std_mult", action="store", type="float", dest="std_mult",default =1.,help="The multiplier for the std ")
  parser.add_option("--u", action="store_true", dest="uniform_init",default=False)
  parser.add_option("--g", action="store_true", dest="gaussian_init",default=False)
  parser.add_option("--i", action="store_true", dest="identity_init",default=False)
  parser.add_option("--hpc", action="store_true", dest="hpc",default=False)
  parser.add_option("--full_fst", action="store_true", dest="full_fst",default=False)
  parser.add_option("--fst_init", action="store_true", dest="fst_init",default=False)

  parser.add_option("--noise_lm", action="store", type="string", dest="noise_lm",default ='noise.lm.carmel',help="The noise lm file")
  parser.add_option("--noise_channel_fst", action="store", type="string", dest="noise_channel_fst",default ='noise.fst.carmel',help="The channel fst")
  parser.add_option("--noise_probs_file", action="store", type="string", dest="noise_probs_file",default ='noise.probs',help="The noise probs file")
  parser.add_option("--noise_samples_file", action="store", type="string", dest="noise_samples_file",default ='noise.samples',help="The noise samples file")
  
  (options, args) = parser.parse_args()

  print options
  print args
  
  #getting the values from optparse
  num_iterations = options.num_iterations
  initial_alpha = options.initial_alpha
  final_alpha = options.final_alpha
  beta = options.beta
  slack_option = options.slack_option
  num_pgd_iterations = options.num_pgd_iterations
  eta = options.eta
  armijo_beta = options.armijo_beta
  armijo_sigma = options.armijo_sigma
  lower_bound = options.lower_bound
  cipher_data_file = options.cipher_data_file
  cipher_decode_file = options.cipher_decode_file
  cipher_noq_file = options.cipher_noq_file
  cipher_gold_file = options.cipher_gold_file
  lm = options.lm
  noe_lm = options.noe_lm
  gaussian_params_file = options.gaussian_params_file
  unigram_probs_file = options.unigram_probs_file
  uniform_init = options.uniform_init
  gaussian_init = options.gaussian_init
  identity_init = options.identity_init
  hpc = options.hpc
  std_mult = options.std_mult
  full_fst = options.full_fst
  num_noise_samples = options.num_noise_samples
  noise_lm = options.noise_lm
  noise_channel_fst = options.noise_channel_fst
  fst_init = options.fst_init
  noise_probs_file = options.noise_probs_file
  noise_samples_file = options.noise_samples_file
  noise_probs_file = options.noise_probs_file

  #setting up paramters
  fractional_counts_language = {}
  fractional_counts_channel = {}
  probabilities_channel = {}
  probabilities_language = {}
  current_probabilities = {}
  current_fractional_counts = {}
  #constraint_parameters = []
  initial_parameter_args = {}
  current_initial_parameter_args = {}
  parameter_to_index = {}
  parameter_counter = 0
  num_constraints = 1
  constraint_tags_dict = {} #this will hold the tag for the constraint. the key is the constraint id and the value is the tag corresponding to this constraint
  #beta = float(0)
  #alpha = float(0)
  global_num_parameters = 0
  init_option = ''
  current_optimization_tag = '' #this will hold the of the constraint for which we are doing the optimization

  #adding parser options
  '''
  print 'beta is ',beta
  print 'alpha is ',alpha
  print 'eta is ',eta
  raw_input()
  '''
  gold_cipher = emMethods.readCipherFile(cipher_gold_file)
  print gold_cipher
  #dictionary = emMethods.createDictionary('complete.dict.new-formatted')#.small')
  #word_lines= emMethods.readWordLines('test.words.new-formatted')
  cipher_letter_dict = emMethods.getUniqCipherLetters(cipher_noq_file)
  cipher_letters = open(cipher_noq_file).readline().split()


  #sys.exit()

  cipher_probs = defaultdict(float)
  emMethods.getCipherLetterProbs(cipher_probs,cipher_noq_file)
  print 'cipher probs are '
  #gaussians = defaultdict(list)
  #emMethods.readGaussians(gaussians,gaussian_params_file)
  plain_unigram_probs = dict((line.strip().split()[0],float(line.strip().split()[1])) for line in open(unigram_probs_file))

  #get cipher letter probs
  del cipher_letter_dict['_']
  #word_list_five = emMethods.readWordList('TEXT.3.linear')
  #plaintext = map(chr, range(97, 123))
  plaintext = []
  for k in range(65, 91):
    plaintext.append(chr(k))
  print plaintext
  print 'the number of unique cipher letter is %d'%len(cipher_letter_dict.keys())
  print cipher_letter_dict
  num_cipher_letters = len(cipher_letter_dict.keys()) 
  num_plain_letters = 26
  #gold_tag_sequence = emMethods.readWordList('test.tags.new-formatted.linear')
     
  free_parameters_channel = defaultdict(lambda:defaultdict(float))
  free_parameters_language = defaultdict(lambda:defaultdict(float))
  print 'starting to create parameters'
  total_language_parameters = 0
  total_channel_parameters = 0
  #for line in cipher_lines :
    #print 'created parameters for a line'
    #(language_parameters,channel_parameters) = emMethods.getFreeParametersBigram(line,dictionary,free_parameters_language,free_parameters_channel)
  if full_fst == True:
    emMethods.getFreeCipherParametersChannel(plaintext,free_parameters_channel)
    num_cipher_letters = 26
  else :
    emMethods.getFreeCipherParametersChannel(plaintext,free_parameters_channel,cipher_letter_dict = cipher_letter_dict)
  temp = {'_':0.0}
  free_parameters_channel['_'] = temp
  #now, we will build all the lattices, and create a special start node and end node for every sentence
  start_node_end_node_list = []

  fractional_counts_channel = copy.deepcopy(free_parameters_channel)
  probabilities_channel = copy.deepcopy(free_parameters_channel)
  #print 'gaussians'
  #print gaussians
  #createRandomPoint(probabilities_channel)
  if (uniform_init == True) :
    print 'uniform initialization'
    emMethods.initUniformProbs(probabilities_channel)
#  emMethods.initUniformProbs(probabilities_language,probabilities_channel)
  if (gaussian_init == True) :
    print 'gaussian initialization'
    #emMethods.initFromGaussians(probabilities_channel,gaussians,cipher_probs,std_mult)

    emMethods.initFromGaussiansSingleStd(probabilities_channel,plain_unigram_probs,cipher_probs,std_mult)
  if (identity_init == True) :
    emMethods.initIdentity(probabilities_channel)

  if fst_init == True:
    emMethods.initFromWfst('init.fst',probabilities_channel,'channel') 
  #print 'channel probabilities after weighting are '
  #print probabilities_channel
  #raw_input()

  emMethods.writeFst('cipher.fst',probabilities_channel)
  #sys.exit()
  final_probabilities_channel = copy.deepcopy(free_parameters_channel)


  #running the EM iterations
  #we are creating the indexes for algencan . Notice that here, the probabilities language is already uniform and therefore none of them will be zero
  createParametersForScaling(probabilities_channel,parameter_to_index)
  start_time = time.clock()
  print 'start time was ',start_time
  #fractional_counts_dump_file = open('fractional.params','w')
  #probabilities_dump_file = open('probs.params','w')
  #optimized_probabilities_dump_file = open('probs.optimized.params','w')
  alpha_delta = (final_alpha-initial_alpha)/(num_iterations-1)
  current_alpha = initial_alpha
   
  ###########GENERATING THE NOISE FILE LIST##########################################
  noise_file_list = []
  for k in range(num_noise_samples):
    noise_file_list.append("%s.noise%d"%(cipher_noq_file,k))
  
  #reading the noise probsa
  noise_probs = []
  for line in open(noise_probs_file):
    if line.strip() == '':
      continue
    else:
      noise_probs.append(float(line.strip()))
  print 'number of noise probs is ',len(noise_probs)
  #this will be needed for computations
  log_num_noise_samples = math.log(num_noise_samples)

  #reading noise samples
  total_noise_samples = 0
  noise_samples = []
  for line in open(noise_samples_file):
    if line.strip() == '':
      continue
    else:
      noise_samples.append(line.strip())
    total_noise_samples += 1
  print 'total number of noise samples is',total_noise_samples
  ###############GENERATE CARMEL TRAINING AND DECODING COMMANDS##############################
  print 'Generating the training and decoding commands...'
  run_true_training = ''
  run_noise_training = ''
  if hpc == True:
    run_true_training = "/home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel --train-cascade -u -M 0 -m -HJ %s %s cipher.fst"%(cipher_data_file,lm)
    run_noise_training = "/home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel --train-cascade -u -M 0 -m -HJ %s %s %s"%(cipher_data_file,noise_lm,noise_channel_fst)
  else :
    run_true_training = "/Users/avaswani/graehl/carmel/bin/macosx/carmel --train-cascade -u -M 0 -m -HJ %s %s cipher.fst"%(cipher_data_file,lm)
    run_noise_training = "/Users/avaswani/graehl/carmel/bin/macosx/carmel --train-cascade -u -M 0 -m -HJ %s %s %s"%(cipher_data_file,noise_lm,noise_channel_fst)
  
  noise_sample_training_commands_for_model = []
  #noise_sample_training_commands_for_noise = []
  for k in range(num_noise_samples):
    run_noise_sample_true_training = ''
    #run_noise_sample_noise_training = ''
    if hpc == True:
      run_noise_sample_true_training = "/home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel --train-cascade -u -M 0 -m -HJ %s %s cipher.fst"%(noise_file_list[k],lm)

      #run_noise_sample_noise_training = "/home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel --train-cascade -u -M 0 -m -HJ %s %s %s"%(noise_file_list[k],noise_lm,noise_channel_fst)
    else :
      run_noise_sample_true_training = "/Users/avaswani/graehl/carmel/bin/macosx/carmel --train-cascade -u -M 0 -m -HJ %s %s cipher.fst"%(noise_file_list[k],lm)

      #run_noise_sample_noise_training = "/Users/avaswani/graehl/carmel/bin/macosx/carmel --train-cascade -u -M 0 -m -HJ %s %s %s"%(noise_file_list[k],noise_lm,noise_channel_fst)
    noise_sample_training_commands_for_model.append(run_noise_sample_true_training)
    #noise_sample_training_commands_for_noise.append(run_noise_sample_noise_training)

  viterbi_command = '' 
  if hpc == True:
    viterbi_command = "cat %s | /home/nlg-05/vaswani/graehl/carmel/bin/linux64/carmel  -u  -srbk 1 -QEWI %s  cipher.fst > decipherment_output"%(cipher_decode_file,noe_lm)
  else :
    viterbi_command = "cat %s | /Users/avaswani/graehl/carmel/bin/macosx/carmel  -u  -srbk 1 -QEWI %s  cipher.fst > decipherment_output"%(cipher_decode_file,noe_lm)
  
  print 'Running Training ...'
  print 'command for running noise training is ',run_noise_training
  log_prob_under_noise = runCarmel(run_noise_training)
  '''
  log_noise_sample_probs_under_noise = []
  for k in range(num_noise_samples):
    #log_noise_sample_prob_under_noise = runCarmel(noise_sample_training_commands_for_noise[k])
    log_noise_sample_probs_under_noise.append(log_noise_sample_prob_under_noise)
  print 'the log noise sample probs under noise are ',log_noise_sample_probs_under_noise 
  '''
  ##########################RUN NCE TRAINING###############################
  for i in range (0,num_iterations) :

    ###############GENERATING K NOISE SAMPLES#########################
    print 'Generating noise samples...'
    noise_ciphertext = list(cipher_letters)
    for k in range(num_noise_samples):
      noise_file = open(noise_file_list[k],'w')
      #emMethods.generateNoiseCipher(noise_ciphertext)
      noise_ciphertext = noise_samples[i*num_noise_samples+k]
      #print 'noise ciphertext is ',' '.join(noise_ciphertext)
      print 'noise ciphertext is ',noise_ciphertext
      #noise_file.write("\n%s\n"%' '.join(["\"%s\""%item for item in noise_ciphertext]))
      noise_file.write("\n%s"%noise_ciphertext)
      noise_file.close()

    print 'Getting Accuracy'
    ################RUNNING VITERBI AND GETTING ACCURCY###################
    runViterbiAndGetAccuracy(gold_cipher,viterbi_command)


    #dampening the learning rate
    eta = eta/(1+i)
    current_function_value = 0.
    print 'the iteration number was ',i
    print 'current alpha is ',current_alpha
  
    print 'Computing expected counts...'
    ###############COMPUTING EXPECTED COUNTS ###############################
    log_prob_under_model = runCarmel(run_true_training)
    print 'reading language fractional counts from cipher.fst.trained'
    emMethods.readCarmelFractionalCounts('cipher.fst.trained',fractional_counts_channel,'channel')

    print 'Running the noise model'
    log_prob_under_noise = runCarmel(run_noise_training)


    positive_weight = computePositiveWeight(log_prob_under_model,log_prob_under_noise,num_noise_samples)

    #compute the 
    p_d_equals_one = log_prob_under_model - (logaddexp(log_num_noise_samples+log_prob_under_noise,log_prob_under_model))
    current_function_value += p_d_equals_one
    print 'p of d equals 1 at the current point is ', p_d_equals_one
    print 'The positive weight was',positive_weight
    if positive_weight <= 10E-10:
      positive_weight = 10E-10
    expected_counts = zeros(shape=(num_plain_letters,num_plain_letters))
    emMethods.dictionaryToArray(expected_counts,fractional_counts_channel,parameter_to_index)
    expected_counts *= positive_weight


    total_noise_expected_counts = zeros(shape=(num_plain_letters,num_plain_letters))

    log_noise_sample_probs_under_noise = []
    #log_noise_sample_probs_under_noise = []
    for k in range(num_noise_samples):
      temp_noise_fractional_counts = copy.deepcopy(free_parameters_channel)
      log_noise_sample_prob_under_model = runCarmel(noise_sample_training_commands_for_model[k])
      print 'reading noise language fractional counts from cipher.fst.trained'
      emMethods.readCarmelFractionalCounts('cipher.fst.trained',temp_noise_fractional_counts,'channel')
      #log_noise_sample_prob_under_noise = runCarmel(noise_sample_training_commands_for_noise[k])
      log_noise_sample_prob_under_noise = noise_probs[i*num_noise_samples+k]
      log_noise_sample_probs_under_noise.append(log_noise_sample_prob_under_noise)
      negative_weight = computeNegativeWeight(log_noise_sample_prob_under_model,log_noise_sample_probs_under_noise[k],num_noise_samples)
      if negative_weight <= 10E-10 :
        negative_weight = 10E-10
      print 'The negative weight was',negative_weight
      p_d_equals_zero = log_noise_sample_probs_under_noise[k]+ log_num_noise_samples -\
          (logaddexp(log_num_noise_samples+log_noise_sample_probs_under_noise[k],log_noise_sample_prob_under_model))
      current_function_value += p_d_equals_zero
      print 'p of d equals 0 at the current point',k,' is ', p_d_equals_zero

      temp_noise_expected_counts = zeros(shape=(num_plain_letters,num_plain_letters))
      emMethods.dictionaryToArray(temp_noise_expected_counts,temp_noise_fractional_counts,parameter_to_index)
      #print 'temp noise expected counts are ',temp_noise_expected_counts
      temp_noise_expected_counts *= negative_weight 
      #print 'temp noise exptected counts are ',temp_noise_expected_counts
      total_noise_expected_counts += temp_noise_expected_counts

    print 'the log noise sample probs under noise are ',log_noise_sample_probs_under_noise 
    #print 'total noise expected counts are ',total_noise_expected_counts
    #getting the final exptected counts
    expected_counts -= total_noise_expected_counts
  
    print 'Current function value is ',current_function_value
    ###############PROJECTING THE POINT ONTO THE DOUBLY STOCHASTIC MATRIX###########################
    new_feasible_point,current_point,grad = nceUpdate(eta,expected_counts,probabilities_channel,parameter_to_index,num_plain_letters)
 
    print 'Running line search....'
    #############LINE SEARCH#########################
    armijo_bound = 0.0
    #print 'new feasible point is ',new_feasible_point
    #raw_input()
    #print 'gradient is ',grad
    #raw_input()
    armijo_bound = -armijo_sigma * armijo_beta * (grad*(new_feasible_point-current_point)).sum()
    print 'Armijo bound is ',armijo_bound
    terminate_line_srch = False
    num_steps = 1
    #current_beta = 1.0 #armijo_beta
    current_beta = armijo_beta
    final_beta = 0
    current_armijo_bound = armijo_bound
    no_update = True 
    best_func_value = current_function_value

    while(terminate_line_srch != True) :
      #print 'num steps is ',num_steps
      temp_point = zeros(shape=current_point.shape)
      temp_point = current_point * (1.0 - current_beta) + current_beta * new_feasible_point
      #print 'the temp point is '
      #print temp_point
      #evaluating the function at the current point
      #first writing the fst
      #assigning the probabilities and writing the fsa
      temp_probabilities_channel = copy.deepcopy(free_parameters_channel)
      assignProbsMatrix(temp_point,temp_probabilities_channel,parameter_to_index)
      temp_probabilities_channel['_']['_'] = 1.0
      emMethods.writeFst('cipher.fst',temp_probabilities_channel)
      #print 'temp point is ',temp_point
      #sys.exit()
      func_value_at_temp_point = 0.
      temp_log_prob_under_model = runCarmel(run_true_training)
      temp_p_d_equals_one = temp_log_prob_under_model - (logaddexp(log_num_noise_samples+log_prob_under_noise,temp_log_prob_under_model))
      print 'p of d equals 1 at the temp point is ', temp_p_d_equals_one
      func_value_at_temp_point += temp_p_d_equals_one

      #then go over the noise samples 
      for k in range(num_noise_samples):
        temp_log_noise_sample_prob_under_model = runCarmel(noise_sample_training_commands_for_model[k])
        temp_p_d_equals_zero = log_noise_sample_probs_under_noise[k] + log_num_noise_samples -\
            (logaddexp(log_num_noise_samples+log_noise_sample_probs_under_noise[k],temp_log_noise_sample_prob_under_model))

        func_value_at_temp_point += temp_p_d_equals_zero
        print 'p of d equals 0 at the temp point',k,' is ', temp_p_d_equals_zero
      
      print 'the function value at the temp point  is %.16f'%func_value_at_temp_point
      #raw_input()
      if func_value_at_temp_point > best_func_value :
        best_func_value = func_value_at_temp_point
        final_beta = current_beta
        no_update = False
        #print 'we arrived at a better function value'
        #raw_input()
      #  print 'we just updated thef final beta to ',final_beta
      #if (func_value_at_temp_point - current_function_value >= current_armijo_bound) :
      #  terminate_line_srch = True
      #elif current_function_value - func_value_at_temp_point < 0:
      #  terminate_line_srch = True
      if num_steps >= 5 :
        terminate_line_srch = True
      
      current_beta = armijo_beta * current_beta
      current_armijo_bound =  armijo_bound * current_beta
      num_steps += 1
 
    if no_update == False :
      current_point = (1.0-final_beta)*current_point + final_beta*new_feasible_point
    if no_update == True :#x.all() == current_point.all() :
      print 'not update was true'
      break;
    #print 'the current point is ',current_point
    #raw_input()
    #assigning the probabilities and writing the fsa
    assignProbsMatrix(current_point,probabilities_channel,parameter_to_index)
    emMethods.writeFst('cipher.fst',probabilities_channel)


    #print 'checking the initial zeros in channel model'
    #checkZeros(probabilities_channel)
  
    fractional_counts_channel = copy.deepcopy(free_parameters_channel)
    final_probabilities_channel = copy.deepcopy(probabilities_channel)
    print 'at the end of the iteration'
    current_alpha += alpha_delta

  elapsed_time = time.clock() - start_time
  print 'the elapsed time was ',elapsed_time