def annealingSimulation(data_s,data_l,max_nodes,min_nodes,temperature=10000.0,cool=0.15,step=10):
  #generate a random number to start
  if min_nodes<0:
  	min_nodes=0
  initial_random_number = random.randint(min_nodes,max_nodes)
  nodeA = initial_random_number
  while temperature>0.1:
    direction = random.randint(-step,step)
    nodeB = nodeA+direction
    if nodeB>max_nodes:
      nodeB = max_nodes
    elif nodeB<min_nodes:
      nodeB = min_nodes
    #start to do the cross validation for number of nodeA
    percentage = 0.05
    size_training_set = len(data_s)
    total_num_loop = 1
    x = 0
    total_num = 0
    total_error = 0
    while x<total_num_loop:
      x=x+1
      #start to create net classifier
      print 'start to create NN'
      print ('Hidden nodeA are %d' %(nodeA))
      [sliced_training_data,sliced_testing_data,sliced_training_label,sliced_testing_label]=helper_cross_validation.spilteDataAndLabel(data_s,data_l,x,percentage,step=len(data_s))
      [net,number_of_input_features,tfidf_vectorizer] = creatingNeuralNetwork(nodeA,sliced_training_data,sliced_training_label)
      #start to testing
      indata = np.zeros((len(sliced_testing_data),number_of_input_features))
      for j,sentence in enumerate(sliced_testing_data):
        indata[j,:] =  tfidf_vectorizer.transform([sentence]).toarray()[0]
      #testing the result 
      results = net(indata)
      results_list = []
      for r in results:
        if r<0.5:
          r=0
        elif r>=0.5 and r <1.5:
          r=1
        elif r>=1.5:
          r=2
        results_list.append(r)
      total_num = total_num+len(results_list)
      index = 0
      while index < len(results_list):
        if results_list[index]!=sliced_testing_label[index]:
          total_error = total_error+1
        index = index+1
    error_rateA = float(total_error)/float(total_num)


    #cross validation for nodeB
    percentage = 0.02
    size_training_set = len(data_s)
    #too much time occupied
    total_num_loop = 1
    x = 0
    total_num = 0
    total_error = 0
    while x<total_num_loop:
      x=x+1
      #start to create net classifier
      [sliced_training_data,sliced_testing_data,sliced_training_label,sliced_testing_label]=helper_cross_validation.spilteDataAndLabel(data_s,data_l,x,percentage,step=len(data_s))
      print 'start to create NN'
      print ('Hidden nodeB are %d' %(nodeB))
      [net,number_of_input_features,tfidf_vectorizer] = creatingNeuralNetwork(nodeB,sliced_training_data,sliced_training_label)
      #start to testing
      indata = np.zeros((len(sliced_testing_data),number_of_input_features))
      for j,sentence in enumerate(sliced_testing_data):
          indata[j,:] =  tfidf_vectorizer.transform([sentence]).toarray()[0]
      #testing the result 
      results = net(indata)
      results_list = []
      for r in results:
        if r<0.5:
          r=0
        elif r>=0.5 and r <1.5:
          r=1
        elif r>=1.5 and r<2.5:
          r=2
        elif r>=2.5 and r<3.5:
          r=3
        elif r>=3.5 and r<4.5:
          r=4
        else:
          r=5
        results_list.append(r)
      total_num = total_num+len(results_list)
      index = 0
      while index < len(results_list):
        if results_list[index]!=sliced_testing_label[index]:
          total_error = total_error+1
        index = index+1
    error_rateB = float(total_error)/float(total_num)
    p=pow(math.e,(-error_rateB-error_rateA)/temperature)
    if (error_rateB<error_rateA or random.random()<p):
      nodeA=nodeB
      print ("Node number: %d, accurancy: %f" %(nodeB,1-error_rateB)) 
    else:
       print ("Node number: %d, accurancy: %f" %(nodeA,1-error_rateA)) 
    #cooling a little bit
    temperature=temperature*cool
  return nodeA
Ejemplo n.º 2
0
    total_testing_times = 0
    error_testing_times = 0
    size_training_set = len(training_set)
    total_num_loop = int(size_training_set / step)
    x = 0
    # print ("There are %d loops" % (total_num_loop))
    while x < total_num_loop:
        x = x + 1
        # print ("This is %d loop" % (x))
        # start to slice array and train classifer
        [
            sliced_training_data,
            sliced_testing_data,
            sliced_training_label,
            sliced_testing_label,
        ] = helper_cross_validation.spilteDataAndLabel(training_set, label_set, x, percentage, step=step)
        training_feature = tfidf.fit_transform(sliced_training_data)
        classifier.fit(training_feature, sliced_training_label)
        # start to testing
        indexOfTest = 0
        testing_feature = tfidf.transform(sliced_testing_data)
        test_result = classifier.predict(testing_feature)

        while indexOfTest < len(sliced_testing_data):
            total_testing_times = total_testing_times + 1
            if test_result[indexOfTest] != sliced_testing_label[indexOfTest]:
                error_testing_times = error_testing_times + 1
            indexOfTest = indexOfTest + 1
    successRatio = 1 - float(error_testing_times) / float(total_testing_times)
    print ("Success Ratio is %f" % (successRatio))
Ejemplo n.º 3
0
while n<max_ngram:
	n=n+1
	print ("Max ngram is %d" % (n))
	classifier = MultinomialNB()
	tfidf = TfidfVectorizer(sublinear_tf=True, max_df=0.9, stop_words=None, token_pattern=pattern, ngram_range=(1, n))
	total_testing_times = 0;
	error_testing_times = 0;
	size_training_set = len(training_set)
	total_num_loop = int(size_training_set/step)
	x = 0
	# print ("There are %d loops" % (total_num_loop))
	while x<total_num_loop:
		x = x+1
		# print ("This is %d loop" % (x))
		#start to slice array and train classifer
		[sliced_training_data,sliced_testing_data,sliced_training_label,sliced_testing_label]=helper_cross_validation.spilteDataAndLabel(
			training_set,label_set,x,percentage,step=step)
		training_feature = tfidf.fit_transform(sliced_training_data)
		classifier.fit(training_feature, sliced_training_label)
		#start to testing
		indexOfTest = 0
		testing_feature = tfidf.transform(sliced_testing_data)
		test_result = classifier.predict(testing_feature)
		
		while indexOfTest < len(sliced_testing_data):
			total_testing_times = total_testing_times+1
			if test_result[indexOfTest]!=sliced_testing_label[indexOfTest]:
				error_testing_times = error_testing_times+1
			indexOfTest=indexOfTest+1
	successRatio =1-float(error_testing_times)/float(total_testing_times)
	print ("Success Ratio is %f" % (successRatio))