def train(self, data_matrix, label_matrix):
     self.D = data_matrix
     self.Y = label_matrix
     
     num_feature_vectors = num_rows(self.D)
     num_features= num_columns(self.D)
     num_outputs = num_columns(self.Y)
     
     #initialize b
     for _ in range(num_outputs):
         self.b.append(0.0)
     
     #initialize M
     for _ in range(num_outputs):
         row = list()
         for _ in range(num_features):
             row.append(0.1*normalvariate(0,1))
         self.M.append(row)
     
     #maintain an array on indices of f.v which will be shuffled each time
     indices = list()
     for i in range(num_feature_vectors):
         indices.append(i)
     
     num_folds= 0.05
     for i in range(20):
         shuffle(indices)
         for i in range(num_feature_vectors):
             for j in range(num_outputs):
                 e = self.Y[indices[i]][j] - (dot_prod(self.D[indices[i]], self.M[j]) + self.b[j])
                 self.b[j] += num_folds*e
                 for components in range(num_features):
                     self.M[j][components] += num_folds*e*self.D[indices[i]][components]                         
         num_folds *= .85
    def train(self, indices):        
        num_outputs = len(self.D[0][-1])        
        num_features= num_columns(self.D)-1
        
        #initialize b
        self.b=[]
        for i in range(num_outputs):
            self.b.append(0.0)

        #initialize M
        self.M=[]
        for _ in range(num_outputs):
            row = list()
            for i in range(num_features):
                if i in self.nom_cols:
                    row.append(0.1*normalvariate(0,1))
                if i in self.cat_cols:
                    row.append( self.cat_cols[i][choice(self.cat_cols[i].keys())])                
            self.M.append(row)        
        #print 'before:', self.M
            
        #maintain an array on indices of f.v which will be shuffled each time
        
        
        num_folds= 0.05
        for i in range(10):            
            for i in indices:
                for j in range(num_outputs):
                    
                    predicted = 0
                    for components in range(num_features):                            
                        if components in self.nom_cols:
                            predicted += self.D[i][components]*self.M[j][components]
                        if components in self.cat_cols:
                            predicted += dot_prod(self.D[i][components], self.M[j][components])
                    predicted += self.b[j]
                    e = self.D[i][-1][j] - predicted
                    self.b[j] += num_folds*e
                    
                    for components in range(num_features):                            
                        if components in self.nom_cols:
                            self.M[j][components] += num_folds*e*self.D[i][components]                            
                        if components in self.cat_cols:
                            self.M[j][components] =  vector_addition(self.M[j][components], scalar_prod(num_folds*e, self.D[i][components]))                                             
            num_folds *= .85