def solveProximal(self, data, rho, master_Z): inputDF, outputDF = data n, d = inputDF.shape n, m = outputDF.shape features = inputDF.columns outputs = outputDF.columns #betas = SparseDataFrame(np.zeros(m,d), index = outputs, columns = features) #consolidate input data with master Z (features and/or outputs may be missing from either tables) #ghost_Z = SparseDataFrame(np.zeros((m,d)),index = outputs, columns = features) #target_Z = ghost_Z + master_Z #the columns (rows) of target_Z are a union of the columns (rows) of ghost_Z and master_Z; zeros are added as necessary target_Z = reconstructDataFrame(master_Z) betas = SparseDataFrame(target_Z.copy()) grad_norm_sum = 0.0 converged_sum = 0.0 it_sum = 0.0 count = 0.0 for out in outputDF.columns: beta_target = np.array(target_Z.loc[out, features]) y = np.array(outputDF.loc[:, out]) X = np.array(inputDF) beta, grad_norm, converged, it = self.solveSingle( X, y, rho, beta_target) count += 1.0 grad_norm_sum += grad_norm converged_sum += converged it_sum += it logging.info( str(datetime.datetime.now()) + '- Converged: ' + str(converged) + ' Gradient norm: ' + str(grad_norm)) logging.info( str(datetime.datetime.now()) + '- Beta learned: ' + str(beta)) betas.loc[out, features] = beta stats = {} stats['Grad'] = grad_norm_sum / (1. * count) stats['It'] = it_sum / (1. * count) stats['Conv'] = converged_sum / (1. * count) return unravelDataFrame(betas), stats
def solveProximal(self,data,rho,master_Z): inputDF,outputDF = data n,d = inputDF.shape n,m = outputDF.shape features = inputDF.columns outputs = outputDF.columns #betas = SparseDataFrame(np.zeros(m,d), index = outputs, columns = features) #consolidate input data with master Z (features and/or outputs may be missing from either tables) #ghost_Z = SparseDataFrame(np.zeros((m,d)),index = outputs, columns = features) #target_Z = ghost_Z + master_Z #the columns (rows) of target_Z are a union of the columns (rows) of ghost_Z and master_Z; zeros are added as necessary target_Z = reconstructDataFrame(master_Z) betas = SparseDataFrame(target_Z.copy()) grad_norm_sum = 0.0 converged_sum = 0.0 it_sum = 0.0 count = 0.0 for out in outputDF.columns: beta_target = np.array(target_Z.loc[out,features]) y = np.array(outputDF.loc[:,out]) X = np.array(inputDF) beta, grad_norm, converged, it = self.solveSingle(X,y,rho,beta_target) count += 1.0 grad_norm_sum +=grad_norm converged_sum += converged it_sum += it logging.info(str(datetime.datetime.now())+ '- Converged: ' + str(converged)+' Gradient norm: '+ str(grad_norm) ) logging.info( str(datetime.datetime.now())+'- Beta learned: '+ str(beta)) betas.loc[out,features] = beta stats = {} stats['Grad']=grad_norm_sum/(1.*count) stats['It']=it_sum/(1.*count) stats['Conv']=converged_sum/(1.*count) return unravelDataFrame(betas),stats
#d = 4 #m = 3 rho = 0.1 #inputDF,outputDF,true_beta = generateData(n,d,m) #saveFileOutput(inputDF,outputDF,'LR_example.txt') LR = LogisticRegressionSolver() (inputDF, outputDF), keys, stats = LR.readPointBatch(sys.stdin) #fudged_betas = true_beta + pandas.DataFrame(0.1* np.random.random( (m,d)) ,index= true_beta.index, columns =true_beta.columns) #logging.info(str(datetime.datetime.now())+'True Beta \n'+str(true_beta)) zeros = reconstructDataFrame(dict(zip(keys, [0.0] * len(keys)))) logging.info(str(zeros)) betas = reconstructDataFrame( LR.solveProximal((inputDF, outputDF), rho, unravelDataFrame(zeros))) betas = pandas.DataFrame(normalize_row(betas), index=betas.index, columns=betas.columns) logging.info( str(datetime.datetime.now()) + ' Estimated Betas \n' + str(betas)) #betas = LR.solveProximal( (inputDF,outputDF),rho,fudged_betas) #betas = SparseDataFrame(normalize_row(betas),index= true_beta.index, columns =true_beta.columns) #logging.info(str(true_beta)) #logging.info(str(betas))
logging.basicConfig(level=logging.DEBUG) #n = 10000 #d = 4 #m = 3 rho = 0.1 #inputDF,outputDF,true_beta = generateData(n,d,m) #saveFileOutput(inputDF,outputDF,'LR_example.txt') LR = LogisticRegressionSolver() (inputDF,outputDF), keys,stats = LR.readPointBatch(sys.stdin) #fudged_betas = true_beta + pandas.DataFrame(0.1* np.random.random( (m,d)) ,index= true_beta.index, columns =true_beta.columns) #logging.info(str(datetime.datetime.now())+'True Beta \n'+str(true_beta)) zeros = reconstructDataFrame(dict(zip(keys, [0.0]*len(keys)))) logging.info(str(zeros)) betas = reconstructDataFrame(LR.solveProximal( (inputDF,outputDF),rho, unravelDataFrame(zeros))) betas = pandas.DataFrame(normalize_row(betas) , index=betas.index,columns = betas.columns) logging.info(str(datetime.datetime.now())+' Estimated Betas \n'+str(betas)) #betas = LR.solveProximal( (inputDF,outputDF),rho,fudged_betas) #betas = SparseDataFrame(normalize_row(betas),index= true_beta.index, columns =true_beta.columns) #logging.info(str(true_beta)) #logging.info(str(betas))