def getAllFeatures(data): """ Get all the features present in dataset data. """ features = SparseVector({}) for (x,y) in data: features = features + x return features.keys()
def estimateGrad(fun, x, delta): """ Given a real-valued function fun, estimate its gradient numerically. """ grad = SparseVector({}) for key in x: e = SparseVector({}) e[key] = 1.0 grad[key] = (fun(x + delta * e) - fun(x)) / delta return grad
def getAllFeatures(data): """ Get all the features present in dataset data. The input is: - data: a python list containing pairs of the form (x,y), where x is a sparse vector and y is a binary value The output is: - a list containing all features present in all x in data. """ features = SparseVector({}) for (x,y) in data: features = features + x return features.keys()
def evaluate(self, degree=1, debug=False): """Evalue the Lagrangian function. degree = 0 computes the Lagrangian degree = +1 computes the Lagrnagian plus its gradient """ obj_lagrangian = 0.0 grad_lagrangian = {} #w.r.t. constraints constraint_func, constraint_grads, constraint_Hessian = self.Pr.evalFullConstraintsGrad( degree) #w.r.t. objective obj_func, obj_grads, obj_Hessian = self.Pr.evalGradandUtilities(degree) for obj in obj_func: #Objective obj_lagrangian += obj_func[obj] if degree < 1: continue #Grad for index in obj_grads[obj]: if index in grad_lagrangian: grad_lagrangian[index] += obj_grads[obj][index] else: grad_lagrangian[index] = obj_grads[obj][index] utility = obj_lagrangian for constraint in constraint_func: #Objective obj_lagrangian += -1.0 * self.LAMBDAS[ constraint] * constraint_func[constraint] if degree < 1: continue #Grad for index in constraint_grads[constraint]: grad_index = -1.0 * self.LAMBDAS[ constraint] * constraint_grads[constraint][index] if index in grad_lagrangian: grad_lagrangian[index] += grad_index else: grad_lagrangian[index] = grad_index dual_grad = SparseVector(constraint_func) * -1.0 feasibility = 1.0 * sum([dual_grad[key] <= 0.0 for key in dual_grad]) / len(dual_grad.keys()) return obj_lagrangian, SparseVector( grad_lagrangian), dual_grad, utility, feasibility
def readBeta(input): """ Read a vector β from file input. Each line contains pairs of the form: (feature,value) """ beta = SparseVector({}) with open(input, 'r') as fh: for line in fh: (feat, val) = eval(line.strip()) beta[feat] = val return beta
def getAllFeaturesRDD(dataRDD): """ Get all the features present in grouped dataset dataRDD. The input is: - dataRDD: containing pairs of the form (SparseVector(x),y). The return value is a list containing the keys of the union of all unique features present in sparse vectors inside dataRDD. """ features = SparseVector({}) sparseDict = dataRDD.map(lambda (x, y): x).reduce(add) return sparseDict.keys()
def __init__(self, RlaxedPr, logger): self.Pr = RlaxedPr #Create dual variables LAMBDAS constraint_func, constraint_grads, constraint_HESSIAN = self.Pr.evalFullConstraintsGrad( 0) self.LAMBDAS = SparseVector( dict([(key, 0.0) for key in constraint_func])) #step-size parameter self.beta = 2.0 self.gamma = 1.0 self.logger = logger
def readBeta(input): """ Read a vector β from file input. Each line of input contains pairs of the form: (feature,value) The return value is β represented as a sparse vector. """ beta = SparseVector({}) with open(input,'r') as fh: for line in fh: (feat,val) = line.strip('() \n').split(',') beta[feat] = eval(val) return beta
def gradTotalLossRDD(dataRDD, beta, lam=0.0): """ Given a sparse vector beta and a dataset compute the gradient of regularized total logistic loss : ∇L(β) = Σ_{(x,y) in data} ∇l(β;x,y) + 2λ β Inputs are: - data: a rdd containing pairs of the form (x,y), where x is a sparse vector and y is a binary value - beta: a sparse vector β - lam: the regularization parameter λ """ grad_total_loss = dataRDD.map(lambda elem: gradLogisticLoss(beta, elem[0], elem[1])).\ fold(SparseVector({}), add) return grad_total_loss + lam * beta
def gradTotalLoss(data,beta, lam = 0.0): """ Given a sparse vector beta and a dataset compute the gradient of regularized total logistic loss : ∇L(β) = Σ_{(x,y) in data} ∇l(β;x,y) + 2λ β Inputs are: - data: a python list containing pairs of the form (x,y), where x is a sparse vector and y is a binary value - beta: a sparse vector β - lam: the regularization parameter λ """ total_loss = SparseVector({}) for (x, y) in data: total_loss += gradLogisticLoss(beta, x, y) return total_loss + lam * beta
def totalLossRDD(groupedDataRDD, featuresToPartitionsRDD, betaRDD, N, lam=0.0): """ Given a β represented by RDD betaRDD and a grouped dataset data represented by groupedDataRDD compute the regularized total logistic loss: L(β) = Σ_{(x,y) in data} l(β;x,y) + λ ||β ||_2^2 Inputs are: - groupedDataRDD: a groupedRDD containing pairs of the form (partitionID,dataList), where partitionID is an integer and dataList is a list of (SparseVector(x),y) values - featuresToPartitionsRDD: RDD mapping features to partitions, generated by mapFeaturesToPartitionsRDD - betaRDD: a vector β represented as an RDD of (feature,value) pairs - N: Number of partitions of RDDs - lam (optional): the regularization parameter λ (default: 0.0) The return value is the scalar L(β). """ small_beta = sendToPartitions(betaRDD, featuresToPartitionsRDD, N) total_loss = groupedDataRDD.join(small_beta) \ .map(lambda beta: totalLoss(beta[1][0], beta[1][1], lam=0)) \ .reduce(lambda x, y: x + y) mybeta = SparseVector(betaRDD.collect()) return total_loss + lam * mybeta.dot(mybeta)
def sendToPartitions(betaRDD, featuresToPartitionsRDD, N): """ Given a betaRDD and a featuresToPartitionsRDD, create an RDD that contains pairs of the form (partitionID, small_beta) where small_beta is a SparseVector containing only the features present in the partition partitionID. The inputs are: - betaRDD: RDD storing β - featuresToPartitionsRDD: RDD mapping features to partitions, generated by mapFeaturesToPartitionsRDD - N: Number of partitions of the returned RDD The returned RDD is partitioned with the identityHash function and cached. """ return betaRDD.join(featuresToPartitionsRDD)\ .map(lambda pair: (pair[1][1], SparseVector({pair[0]:pair[1][0]})))\ .reduceByKey(lambda x,y: x+y,numPartitions=N, partitionFunc=identityHash).cache()
def readDataRDD(input_file, spark_context): """ Read data from an input file. Each line of the file contains tuples of the form (x,y) x is a dictionary of the form: { "feature1": value, "feature2":value, ...} and y is a binary value +1 or -1. The result is stored in an RDD containing tuples of the form (SparseVector(x),y) """ return spark_context.textFile(input_file)\ .map(eval)\ .map(lambda datapoint:(SparseVector(datapoint[0]),datapoint[1]))
def gradTotalLoss(data, beta, lam=0.0): """ Given a sparse vector beta and a dataset perform compute the gradient of regularized total logistic loss : ∇L(β) = Σ_{(x,y) in data} ∇l(β;x,y) + 2λ β Inputs are: - data: a python list containing pairs of the form (x,y), where x is a sparse vector and y is a binary value - beta: a sparse vector β - lam: the regularization parameter λ Output is: - The gradient ∇L(β) """ grad = SparseVector({}) for x, y in data: if y == 0: y = -1 grad = grad + gradLogisticLoss(beta, x, y) return grad + 2 * lam * beta
def readData(input_file): """ Read data from an input file. Each line of the file contains tuples of the form (x,y) x is a dictionary of the form: { "feature1": value, "feature2":value, ...} and y is a binary value +1 or -1. The return value is a list containing tuples of the form (SparseVector(x),y) """ listSoFar = [] with open(input_file,'r') as fh: for line in fh: (x,y) = eval(line) x = SparseVector(x) listSoFar.append((x,y)) return listSoFar
parser = argparse.ArgumentParser(description = 'Logistic Regression.',formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('traindata',default=None, help='Input file containing (x,y) pairs, used to train a logistic model') parser.add_argument('--testdata',default=None, help='Input file containing (x,y) pairs, used to test a logistic model') parser.add_argument('--beta', default='beta', help='File where beta is stored (when training) and read from (when testing)') parser.add_argument('--lam', type=float,default=0.0, help='Regularization parameter λ') parser.add_argument('--max_iter', type=int,default=100, help='Maximum number of iterations') parser.add_argument('--eps', type=float, default=0.1, help='ε-tolerance. If the l2_norm gradient is smaller than ε, gradient descent terminates.') args = parser.parse_args() print 'Reading training data from',args.traindata traindata = readData(args.traindata) print 'Read',len(traindata),'data points with',len(getAllFeatures(traindata)),'features in total' if args.testdata is not None: print 'Reading test data from',args.testdata testdata = readData(args.testdata) print 'Read',len(testdata),'data points with',len(getAllFeatures(testdata)),'features' else: testdata = None beta0 = SparseVector({}) print 'Training on data from',args.traindata,'with λ =',args.lam,', ε =',args.eps,', max iter = ',args.max_iter beta, gradNorm, k = train(traindata,beta_0=beta0,lam=args.lam,max_iter=args.max_iter,eps=args.eps,test_data=testdata) print 'Algorithm ran for',k,'iterations. Converged:',gradNorm<args.eps print 'Saving trained β in',args.beta writeBeta(args.beta,beta)
def evaluate(self, Pr, LAMBDAS, SHIFTS, degree=2, debug=False): """Evalue the objective function. degree = -1 only computes the LAMBAD BAR degree = 0 computes LAMBAD BAR plus the objective value degree = +1 computes LAMBDA BAR, the objective, and the objective`s gradient degree = +2 computes LAMBDA BAR, the objective, the objective`s gradient, and the objective`s Hessian """ obj_barrier = 0.0 grad_barrier = {} Hessian_barrier = {} LAMBDA_BAR = {} #w.r.t. constraints constraint_func, constraint_grads, constraint_Hessian = Pr.evalFullConstraintsGrad( degree) #w.r.t. objective obj_func, obj_grads, obj_Hessian = Pr.evalGradandUtilities(degree) for obj in obj_func: if degree < 0: continue #Objective obj_barrier += obj_func[obj] if degree < 1: continue #Grad for index in obj_grads[obj]: if index in grad_barrier: grad_barrier[index] += obj_grads[obj][index] else: grad_barrier[index] = obj_grads[obj][index] if degree < 2: continue #Hessian for index_pair in obj_Hessian[obj]: if index_pair in Hessian_barrier: Hessian_barrier[index_pair] += obj_Hessian[obj][index_pair] else: Hessian_barrier[index_pair] = obj_Hessian[obj][index_pair] for constraint in constraint_func: LAMBDA_BAR[ constraint] = LAMBDAS[constraint] * SHIFTS[constraint] / ( constraint_func[constraint] + SHIFTS[constraint]) if degree < 0: continue #Objective try: obj_barrier += -1.0 * LAMBDAS[constraint] * SHIFTS[ constraint] * math.log(constraint_func[constraint] + SHIFTS[constraint]) except ValueError: obj_barrier = float("inf") if degree < 1: continue #Grad for index in constraint_grads[constraint]: grad_index = -1.0 * LAMBDA_BAR[constraint] * constraint_grads[ constraint][index] if index in grad_barrier: grad_barrier[index] += grad_index else: grad_barrier[index] = grad_index if degree < 2: continue #Hessian for index_pair in constraint_Hessian[constraint]: if index_pair in Hessian_barrier: Hessian_barrier[index_pair] += constraint_Hessian[ constraint][index_pair] else: Hessian_barrier[index_pair] = constraint_Hessian[ constraint][index_pair] return LAMBDA_BAR, obj_barrier, SparseVector( grad_barrier), SparseVector(Hessian_barrier)
def _findCauchyPoint(self, grad, Hessian, Vars, Box, TrustRegionThreshold, scaling=False, debug=False): "Return the direction s_k as in Step 1 of the algorithm. Note that grad and Hessian are SparseVectors." if not scaling: scalingD = dict([(key, 1.0) for key in Vars]) else: scalingD = {} for key in Vars: try: if grad[key] >= 0: scalingD[key] = Vars[key] else: scalingD[key] = (Box[key] - Vars[key]) except ZeroDivisionError: scalingD[key] = 10.0 #Compute hitting times hitting_times = {'dummy': 0.0} for key in Vars: if grad[key] == 0.0: hitting_times[key] = sys.maxsize elif grad[key] > 0: hitting_times[key] = Vars[key] / (scalingD[key] * grad[key]) else: hitting_times[key] = (Box[key] - Vars[key]) / abs( scalingD[key] * grad[key]) sorted_hitting_times_items = sorted(hitting_times.items(), key=lambda x: x[1]) #Decompose S_k = S_independant_k + S_dependant_k * t S_independant_k = SparseVector({}) S_dependant_k = SparseVector( dict([(key, -1.0 * scalingD[key] * grad[key]) for key in grad])) vars_indepnedant_of_t = [] end_deriavative_sgn = 0 t_threshold = sys.maxsize for i in range(len(sorted_hitting_times_items)): key, t_key = sorted_hitting_times_items[i] if i < len(sorted_hitting_times_items) - 1: next_key, next_t_key = sorted_hitting_times_items[i + 1] else: next_t_key = -1 #dummy value if key != 'dummy': vars_indepnedant_of_t.append(key) del S_dependant_k[key] if grad[key] > 0.0: S_independant_k[key] = -1.0 * Vars[key] elif grad[key] < 0.0: S_independant_k[key] = Box[key] - Vars[key] else: S_independant_k[key] = 0.0 if next_t_key == t_key: continue if debug: print "Search ointerval is :", t_key, next_t_key #for key in vars_indepnedant_of_t: # del S_dependant_k[key] a, b = self._getQudraticQuoeff(S_independant_k, S_dependant_k, grad, Hessian) #Check if the current interval is inside the trusts region if squaredNorm(S_independant_k + S_dependant_k * next_t_key) >= TrustRegionThreshold: A = S_dependant_k.dot(S_dependant_k) B = 2.0 * S_dependant_k.dot(S_independant_k) C = S_independant_k.dot( S_independant_k) - TrustRegionThreshold**2 D = B**2 - 4.0 * A * C try: root_1_tc = (-1.0 * B - math.sqrt(D)) / (2 * A) root_2_tc = (-1.0 * B + math.sqrt(D)) / (2 * A) except ZeroDivisionError: try: root_1_tc = -1.0 * C / B root_2_tc = root_1_tc except ZeroDivisionError: root_1_tc = sys.maxsize root_2_tc = sys.maxsize if root_1_tc > t_key and root_1_tc <= next_t_key: t_threshold = root_1_tc else: t_threshold = root_2_tc #Find the first local minimum of the piece-wise quadratic function a * t**2 + b * t # this happens in two cases # (a) if the quadratic function is convex and its peak is in the interval [t_key, next_t_key] # (b) if the quadratic function was decreasing in the last interval and it is increasing now. # check (a) if a > 0.0 and -1.0 * b / (2 * a) > t_key and -1.0 * b / ( 2 * a) < next_t_key: t_C_k = -1.0 * b / (2 * a) if t_C_k > t_threshold: if debug: print "Iter ", i, " Convexity Rechaed the threhsold, T_thre is ", t_threshold return S_independant_k + S_dependant_k * t_threshold else: if debug: print "Convexity" return S_independant_k + S_dependant_k * t_C_k # check (b) beg_deriavative_sgn = np.sign(2 * a * t_key + b) if beg_deriavative_sgn == 0: #Check if the quadratic functions peaks coincide with the hitting_times if a > 0.0: beg_deriavative_sgn = 1 elif a == 0.0: beg_deriavative_sgn = 0 else: beg_deriavative_sgn = -1 if end_deriavative_sgn < 0 and beg_deriavative_sgn >= 0: t_C_k = t_key if debug: print "Changin sign" return S_independant_k + S_dependant_k * t_C_k end_deriavative_sgn = np.sign(2 * a * next_t_key + b) if end_deriavative_sgn == 0: #Check if the quadratic functions peaks coincide with the hitting_times if a > 0.0: end_deriavative_sgn = -1 else: end_deriavative_sgn = 1 if t_threshold < sys.maxsize: #If the quadratic function is decreasing in an interval before t_threshold if 2 * a * t_threshold + b <= 0.0: if debug: print "Iter ", i, " rechaed threshold, T_thre is ", t_threshold, TrustRegionThreshold return S_independant_k + S_dependant_k * t_threshold #else: # return S_independant_k return SparseVector({})