예제 #1
0
    def train(self, history_tuples, reg_lambda = 0.01):        
        # history_tuples, function_obj, reg_lambda = 0.01,
        self.iteration = 0
        self.h_tuples = history_tuples
        self.reg = reg_lambda
        self.dataset = None # this will be set by create_dataset
        self.tag_set = self.func.supported_tags #None # this will be also be set by create_dataset - this is the set of all tags
        self.create_dataset()
        self.dim = self.dataset.shape[1] #len(self.dataset[0])
        self.num_examples = self.dataset.shape[0]
        if (self.model == None) or (self.model.shape[0] != self.dim):
            self.model = numpy.array([0 for d in range(self.dim)]) # initialize the model to all 0
        #self.model = numpy.array([0 for d in range(self.dim)]) # initialize the model to all 0

        dt1 = datetime.datetime.now()                   
        #print 'before training: ', dt1
        try:
            from scipy.optimize import minimize as mymin
            params = mymin(self.cost, self.model, method = 'L-BFGS-B', callback = self.cb, options = {'maxiter':25}) #, jac = self.gradient) # , options = {'maxiter':100}
        except:
            #print "Importing alternate minimizer fmin_l_bfgs_b"
            from scipy.optimize import fmin_l_bfgs_b as mymin 
            params = mymin(self.cost, self.model, fprime = self.gradient) # , options = {'maxiter':100}
        
        self.model = params.x
        dt2 = datetime.datetime.now()
        #print 'after training: ', dt2, '  total time = ', (dt2 - dt1).total_seconds()
        
        if self.pic_file != None:
            pickle.dump({'model': self.model, 'tag_set': self.tag_set}, open(self.pic_file, "wb"))
        return self.cost_value
예제 #2
0
 def train(self, history_tuples, reg_lambda=0.01, max_iter=5):
     # history_tuples, function_obj, reg_lambda = 0.01,
     self.iteration = 0
     self.h_tuples = history_tuples
     self.reg = reg_lambda
     self.dataset = None  # this will be set by create_dataset
     self.tag_set = self.func.get_supported_labels(
     )  #supported_tags - this is the set of all tags
     self.create_dataset()
     self.dim = self.dataset.shape[1]  #len(self.dataset[0])
     self.num_examples = self.dataset.shape[0]
     if (self.model == None) or (self.model.shape[0] != self.dim):
         self.model = numpy.array([0 for _ in range(self.dim)
                                   ])  # initialize the model to all 0
     dt1 = datetime.datetime.now()
     # print 'before training: ', dt1
     try:
         params = mymin(
             self.cost,
             self.model,
             method='L-BFGS-B',
             callback=self.cb,
             options={
                 'maxiter': max_iter
             })  #, jac = self.gradient) # , options = {'maxiter':100}
     except:
         # print "Importing alternate minimizer fmin_l_bfgs_b"
         from scipy.optimize import fmin_l_bfgs_b as mymin
         params = mymin(self.cost, self.model,
                        fprime=self.gradient)  # , options = {'maxiter':100}
         # print "Min Point is: ", params[1]
     #self.model = params.x
     dt2 = datetime.datetime.now()
     # print 'after training: ', dt2, '  total time = ', (dt2 - dt1).total_seconds()
     return self.cost_value
예제 #3
0
파일: MyMaxEnt.py 프로젝트: ppallal/NLP
    def train(self, history_tuples, reg_lambda = 0.01):        
        # history_tuples, function_obj, reg_lambda = 0.01,
        self.iteration = 0
        self.h_tuples = history_tuples
        self.reg = reg_lambda
        self.dataset = None # this will be set by create_dataset
        self.tag_set = self.func.supported_tags #None # this will be also be set by create_dataset - this is the set of all tags
        self.create_dataset()
        self.dim = self.dataset.shape[1] #len(self.dataset[0])
        self.num_examples = self.dataset.shape[0]
        if (self.model == None) or (self.model.shape[0] != self.dim):
            self.model = numpy.array([0 for d in range(self.dim)]) # initialize the model to all 0
        #self.model = numpy.array([0 for d in range(self.dim)]) # initialize the model to all 0

        dt1 = datetime.datetime.now()                   
        print 'before training: ', dt1
        try:
            from scipy.optimize import minimize as mymin
            params = mymin(self.cost, self.model, method = 'L-BFGS-B', callback = self.cb, options = {'maxiter':25}) #, jac = self.gradient) # , options = {'maxiter':100}
        except:
            print "Importing alternate minimizer fmin_l_bfgs_b"
            from scipy.optimize import fmin_l_bfgs_b as mymin 
            params = mymin(self.cost, self.model, fprime = self.gradient) # , options = {'maxiter':100}
        
        self.model = params.x
        dt2 = datetime.datetime.now()
        print 'after training: ', dt2, '  total time = ', (dt2 - dt1).total_seconds()
        
        if self.pic_file != None:
            pickle.dump({'model': self.model, 'tag_set': self.tag_set}, open(self.pic_file, "wb"))
        return self.cost_value
예제 #4
0
    def train(self,
              training_list_input,
              training_list_output,
              reg_lambda=0.01):
        self.iteration = 0
        self.training_list_input = training_list_input
        self.training_list_output = training_list_output
        self.reg = reg_lambda
        self.dataset = numpy.array(training_list_input)
        self.number_features = self.dataset.shape[1]  #len(self.dataset[0])
        self.number_training_examples = self.dataset.shape[0]
        if (self.model
                == None) or (self.model.shape[0] != self.number_features):
            self.model = numpy.array([0 for d in range(self.number_features)
                                      ])  # initialize the model to all 0

        dt1 = datetime.datetime.now()
        print 'before training: ', dt1

        from scipy.optimize import minimize as mymin
        params = mymin(self.cost, self.model, method='L-BFGS-B'
                       )  #, jac = self.gradient) # , options = {'maxiter':100}
        self.model = params.x
        dt2 = datetime.datetime.now()
        print 'after training: ', dt2, '  total time = ', (
            dt2 - dt1).total_seconds()

        if self.pic_file != None:
            pickle.dump(
                {
                    'model': self.model,
                    'number_features': self.number_features
                }, open(self.pic_file, "wb"))
        return
예제 #5
0
    def train(self):
        """
			Train the classifier
		"""
        self.count = 0
        # params = mymin(self.cost, self.model, method = 'L-BFGS-B', jac = self.gradient, options = {'disp' : True})
        params = mymin(self.cost, self.model, method="L-BFGS-B", options={"disp": True})
        self.model = params.x
        # help(params)
        print "Done Training", self.model, self.count, params.success
예제 #6
0
	def train(self):
		'''
			Train the classifier
		'''
		self.count = 0
		# params = mymin(self.cost, self.model, method = 'L-BFGS-B', jac = self.gradient, options = {'disp' : True})
		params = mymin(self.cost, self.model, method = 'L-BFGS-B', options = {'disp' : True})
		self.model = params.x
		# help(params)
		print "Done Training", self.model, self.count, params.success
예제 #7
0
 def train(self):
     dt1 = datetime.datetime.now()                   
     print 'before training: ', dt1         
     params = mymin(self.cost, self.model, method = 'L-BFGS-B', options= {'maxiter':100}) #, jac = self.gradient) # , options = {'maxiter':100}
     self.model = params.x
     dt2 = datetime.datetime.now()
     print 'after training: ', dt2, '  total time = ', (dt2 - dt1).total_seconds()
     
     if self.pic_file != None:
         pickle.dump(self.model, open(self.pic_file, "wb"))
     return
예제 #8
0
 def train(self):
     dt1 = datetime.datetime.now()                   
     #print 'before training: ', dt1         
     params = mymin(self.cost, self.model, method = 'L-BFGS-B') #, jac = self.gradient) # , options = {'maxiter':100}
     self.model = params.x
     dt2 = datetime.datetime.now()
     #print 'after training: ', dt2, '  total time = ', (dt2 - dt1).total_seconds()
     
     if self.pic_file != None:
         pickle.dump(self.model, open(self.pic_file, "wb"))
     return
예제 #9
0
	def train(self):
		dt1 = time()
		#print 'before training: ', dt1
		# this is the optimization function. spark already has this one. we'll use that.
		# it takes cost, parameter vector and modifies the parameter vector. the process continues
		# untill training is complete
		self.preprocess()
		params = mymin(self.cost, self.param, method = 'L-BFGS-B',jac = self.gradient, options = {'maxiter':1}) #, jac = self.gradient) # , options = {'maxiter':100}
		# self.gradient([0,0,0])
		#self.param = params.x
		# self.gradient(self.param)
		print params
		dt2 = time()
예제 #10
0
	def train(self):
		dt1 = datetime.datetime.now()
		print 'before training: ', dt1
		# this is the optimization function. spark already has this one. we'll use that.
		# it takes cost, parameter vector and modifies the parameter vector. the process continues
		# untill training is complete
		self.preprocess()
		params = mymin(self.cost, self.param, method = 'L-BFGS-B',jac = self.gradient, options = {'maxiter':1}) #, jac = self.gradient) # , options = {'maxiter':100}
		self.param = params.x
		print self.param
		# self.gradient(self.param)
		dt2 = datetime.datetime.now()
		print 'after training: ', dt2, '  total time = ', (dt2 - dt1).total_seconds()
예제 #11
0
	def train(self, tr=False):
		''' Train the model using scipy's optimization of minimizing cost function'''
		if(tr):
			self.set_feature_vector()
			#print self.F
			#print '*'*200
			params = mymin(self.cost, self.model, method = 'L-BFGS-B',options={"disp":True})
			#print params
			if(params.success):
				self.model = params.x
			else:
				print "Training Failed"
			pickle.dump(self.model,open("model.pkl","wb"))
			return self.model
		else:
			self.model = pickle.load(open("model.pkl","rb"))
			return self.model
예제 #12
0
	def train(self):
		dt1 = datetime.datetime.now()
		print 'before training: ', dt1
		# self.preprocess()
		# self.gradient2(self.param)
		params = mymin(self.cost, self.param, method = 'L-BFGS-B', jac = self.gradient1, options = {'maxiter':110}) #, jac = self.gradient) # , options = {'maxiter':100}
		self.param = params.x
		print self.param
		# import random
		# param = [random.random() for i in range(self.dim)]
		# print self.gradient1(param) == self.gradient2(param)
		# dt2 = datetime.datetime.now()
		# print 'after training: ', dt2, '  total time = ', (dt2 - dt1).total_seconds()
		# dt1 = datetime.datetime.now()
		# self.gradient2(self.param)
		dt2 = datetime.datetime.now()
		print 'after training: ', dt2, '  total time = ', (dt2 - dt1).total_seconds()
		print cost_no_of_calls, grad_no_of_calls
예제 #13
0
	def train(self):
		'''
			Train the classifier
		'''
		params = mymin(self.cost, self.model, method = 'L-BFGS-B', jac = gradient)
		self.model = params.x
예제 #14
0
if __name__ == '__main__':
	features = [f1,f2,f3,f4,f5,f6,f7,f8,f9,f10, f11, f12, f13,f14]
	# features = [f1,f2,f3]

	input_data,tags = create_input_dataset()
	distributed_input_data = sqlContext.jsonFile('data.json')
	gradient_preprocess1(input_data,list(set(tags)))
	# print distributed_input_data.show()
	
	all_tags = sc.broadcast(list(set(tags)))
	no_of_features = sc.broadcast(len(features))
	size = sc.broadcast(len(input_data))
	param = [0 for i in range(len(features))]
	# gradient1_new(param)
	# param = [1 for i in range(len(features))]
	# gradient1_new(param)
	dt1 = datetime.datetime.now()
	print 'before training: ', dt1
	params = mymin(cost1, param, method = 'L-BFGS-B', jac = gradient1_new, options = {'maxiter':100}) #, jac = self.gradient) # , options = {'maxiter':100}
	print params.x
	print params

	dt2 = datetime.datetime.now()
	print 'after training: ', dt2, '  total time = ', (dt2 - dt1).total_seconds()

	print 'For',num_calls_cost,'calls to cost total time taken is',tot_time_cost
	print 'Per call avg time taken is',tot_time_cost/num_calls_cost
	print 'For',num_calls_gradient,'calls to gradient total time taken is',tot_time_gradient
	print 'Per call avg time taken is',tot_time_gradient/num_calls_gradient

	def train(self):
		'''
			Train the classifier
		'''
		params = mymin(self.cost, self.model, method = 'L-BFGS-B', jac = gradient)
		self.model = params.x