def getAllFeatures(data):
    """ Get all the features present in dataset data.
    """
    features = SparseVector({})
    for (x,y) in data:
        features = features + x
    return features.keys() 
Example #2
0
def estimateGrad(fun, x, delta):
    """ Given a real-valued function fun, estimate its gradient numerically.
     """
    grad = SparseVector({})
    for key in x:
        e = SparseVector({})
        e[key] = 1.0
        grad[key] = (fun(x + delta * e) - fun(x)) / delta
    return grad
Example #3
0
def getAllFeatures(data):
    """ Get all the features present in dataset data.
	The input is:
            - data: a python list containing pairs of the form (x,y), where x is a sparse vector and y is a binary value

	The output is:
	    - a list containing all features present in all x in data.

    """
    features = SparseVector({})
    for (x,y) in data:
        features = features + x
    return features.keys()
    def evaluate(self, degree=1, debug=False):
        """Evalue the Lagrangian function. 
           degree = 0 computes the Lagrangian 
           degree = +1 computes the Lagrnagian plus its gradient
        """
        obj_lagrangian = 0.0
        grad_lagrangian = {}
        #w.r.t. constraints
        constraint_func, constraint_grads, constraint_Hessian = self.Pr.evalFullConstraintsGrad(
            degree)
        #w.r.t. objective
        obj_func, obj_grads, obj_Hessian = self.Pr.evalGradandUtilities(degree)

        for obj in obj_func:
            #Objective
            obj_lagrangian += obj_func[obj]

            if degree < 1:
                continue
            #Grad
            for index in obj_grads[obj]:
                if index in grad_lagrangian:
                    grad_lagrangian[index] += obj_grads[obj][index]
                else:
                    grad_lagrangian[index] = obj_grads[obj][index]

        utility = obj_lagrangian

        for constraint in constraint_func:
            #Objective
            obj_lagrangian += -1.0 * self.LAMBDAS[
                constraint] * constraint_func[constraint]
            if degree < 1:
                continue
            #Grad
            for index in constraint_grads[constraint]:
                grad_index = -1.0 * self.LAMBDAS[
                    constraint] * constraint_grads[constraint][index]
                if index in grad_lagrangian:
                    grad_lagrangian[index] += grad_index
                else:
                    grad_lagrangian[index] = grad_index

        dual_grad = SparseVector(constraint_func) * -1.0
        feasibility = 1.0 * sum([dual_grad[key] <= 0.0
                                 for key in dual_grad]) / len(dual_grad.keys())
        return obj_lagrangian, SparseVector(
            grad_lagrangian), dual_grad, utility, feasibility
Example #5
0
def readBeta(input):
    """ Read a vector β from file input. Each line contains pairs of the form:
                (feature,value)
    """
    beta = SparseVector({})
    with open(input, 'r') as fh:
        for line in fh:
            (feat, val) = eval(line.strip())
            beta[feat] = val
    return beta
def getAllFeaturesRDD(dataRDD):
    """ Get all the features present in grouped dataset dataRDD.

	The input is:
            - dataRDD: containing pairs of the form (SparseVector(x),y).

        The return value is a list containing the keys of the union of all unique features present in sparse vectors inside dataRDD.
    """
    features = SparseVector({})
    sparseDict = dataRDD.map(lambda (x, y): x).reduce(add)
    return sparseDict.keys()
 def __init__(self, RlaxedPr, logger):
     self.Pr = RlaxedPr
     #Create dual variables LAMBDAS
     constraint_func, constraint_grads, constraint_HESSIAN = self.Pr.evalFullConstraintsGrad(
         0)
     self.LAMBDAS = SparseVector(
         dict([(key, 0.0) for key in constraint_func]))
     #step-size parameter
     self.beta = 2.0
     self.gamma = 1.0
     self.logger = logger
def readBeta(input):
    """ Read a vector β from file input. Each line of input contains pairs of the form:
                (feature,value)
        The return value is β represented as a sparse vector.
    """
    beta = SparseVector({})
    with open(input,'r') as fh:
        for  line in fh:
            (feat,val) = line.strip('() \n').split(',')
            beta[feat] = eval(val)
    return beta
Example #9
0
def gradTotalLossRDD(dataRDD, beta, lam=0.0):
    """  Given a sparse vector beta and a dataset compute the gradient of regularized total logistic loss :

              ∇L(β) = Σ_{(x,y) in data}  ∇l(β;x,y)  + 2λ β

         Inputs are:
            - data: a rdd containing pairs of the form (x,y), where x is a sparse vector and y is a binary value
            - beta: a sparse vector β
            - lam: the regularization parameter λ
    """
    grad_total_loss = dataRDD.map(lambda elem: gradLogisticLoss(beta, elem[0], elem[1])).\
        fold(SparseVector({}), add)
    return grad_total_loss + lam * beta
Example #10
0
def gradTotalLoss(data,beta, lam = 0.0):
    """  Given a sparse vector beta and a dataset compute the gradient of regularized total logistic loss :

              ∇L(β) = Σ_{(x,y) in data}  ∇l(β;x,y)  + 2λ β

         Inputs are:
            - data: a python list containing pairs of the form (x,y), where x is a sparse vector and y is a binary value
            - beta: a sparse vector β
            - lam: the regularization parameter λ
    """
    total_loss = SparseVector({})
    for (x, y) in data:
        total_loss += gradLogisticLoss(beta, x, y)
    return total_loss + lam * beta
Example #11
0
def totalLossRDD(groupedDataRDD, featuresToPartitionsRDD, betaRDD, N, lam=0.0):
    """  Given a β represented by RDD betaRDD and a grouped dataset data represented by groupedDataRDD  compute 
         the regularized total logistic loss:

            L(β) = Σ_{(x,y) in data}  l(β;x,y)  + λ ||β ||_2^2             
         
         Inputs are:
            - groupedDataRDD: a groupedRDD containing pairs of the form (partitionID,dataList), where 
              partitionID is an integer and dataList is a list of (SparseVector(x),y) values
            - featuresToPartitionsRDD: RDD mapping features to partitions, generated by mapFeaturesToPartitionsRDD
            - betaRDD: a vector β represented as an RDD of (feature,value) pairs
            - N: Number of partitions of RDDs
            - lam (optional): the regularization parameter λ (default: 0.0)

         The return value is the scalar L(β).
    """
    small_beta = sendToPartitions(betaRDD, featuresToPartitionsRDD, N)
    total_loss = groupedDataRDD.join(small_beta) \
                            .map(lambda beta: totalLoss(beta[1][0], beta[1][1], lam=0)) \
                            .reduce(lambda x, y: x + y)

    mybeta = SparseVector(betaRDD.collect())

    return total_loss + lam * mybeta.dot(mybeta)
Example #12
0
def sendToPartitions(betaRDD, featuresToPartitionsRDD, N):
    """ Given a betaRDD and a featuresToPartitionsRDD, create an RDD that contains pairs of the form 
                   (partitionID, small_beta)
        
        where small_beta is a SparseVector containing only the features present in the partition partitionID. 
        
        The inputs are:
            - betaRDD: RDD storing β
            - featuresToPartitionsRDD:  RDD mapping features to partitions, generated by mapFeaturesToPartitionsRDD
            - N: Number of partitions of the returned RDD

        The returned RDD is  partitioned with the identityHash function and cached.
    """
    return betaRDD.join(featuresToPartitionsRDD)\
           .map(lambda pair: (pair[1][1], SparseVector({pair[0]:pair[1][0]})))\
           .reduceByKey(lambda x,y: x+y,numPartitions=N, partitionFunc=identityHash).cache()
Example #13
0
def readDataRDD(input_file, spark_context):
    """  Read data from an input file. Each line of the file contains tuples of the form

                    (x,y)  

         x is a dictionary of the form:                 

           { "feature1": value, "feature2":value, ...}

         and y is a binary value +1 or -1.

         The result is stored in an RDD containing tuples of the form
                 (SparseVector(x),y)             
    """
    return spark_context.textFile(input_file)\
                        .map(eval)\
                        .map(lambda datapoint:(SparseVector(datapoint[0]),datapoint[1]))
Example #14
0
def gradTotalLoss(data, beta, lam=0.0):
    """  Given a sparse vector beta and a dataset perform compute the gradient of regularized total logistic loss :
            
              ∇L(β) = Σ_{(x,y) in data}  ∇l(β;x,y)  + 2λ β   
        
         Inputs are:
            - data: a python list containing pairs of the form (x,y), where x is a sparse vector and y is a binary value
            - beta: a sparse vector β
            - lam: the regularization parameter λ

         Output is:
            - The gradient ∇L(β) 
    """
    grad = SparseVector({})
    for x, y in data:
        if y == 0:
            y = -1
        grad = grad + gradLogisticLoss(beta, x, y)

    return grad + 2 * lam * beta
Example #15
0
def readData(input_file):
    """  Read data from an input file. Each line of the file contains tuples of the form

                    (x,y)

         x is a dictionary of the form:

           { "feature1": value, "feature2":value, ...}

         and y is a binary value +1 or -1.

         The return value is a list containing tuples of the form
                 (SparseVector(x),y)

    """
    listSoFar = []
    with open(input_file,'r') as fh:
        for line in fh:
                (x,y) = eval(line)
                x = SparseVector(x)
                listSoFar.append((x,y))

    return listSoFar
Example #16
0
    parser = argparse.ArgumentParser(description = 'Logistic Regression.',formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('traindata',default=None, help='Input file containing (x,y) pairs, used to train a logistic model')
    parser.add_argument('--testdata',default=None, help='Input file containing (x,y) pairs, used to test a logistic model')
    parser.add_argument('--beta', default='beta', help='File where beta is stored (when training) and read from (when testing)')
    parser.add_argument('--lam', type=float,default=0.0, help='Regularization parameter λ')
    parser.add_argument('--max_iter', type=int,default=100, help='Maximum number of iterations')
    parser.add_argument('--eps', type=float, default=0.1, help='ε-tolerance. If the l2_norm gradient is smaller than ε, gradient descent terminates.')


    args = parser.parse_args()


    print 'Reading training data from',args.traindata
    traindata = readData(args.traindata)
    print 'Read',len(traindata),'data points with',len(getAllFeatures(traindata)),'features in total'

    if args.testdata is not None:
        print 'Reading test data from',args.testdata
        testdata = readData(args.testdata)
        print 'Read',len(testdata),'data points with',len(getAllFeatures(testdata)),'features'
    else:
        testdata = None

    beta0 = SparseVector({})

    print 'Training on data from',args.traindata,'with λ =',args.lam,', ε =',args.eps,', max iter = ',args.max_iter
    beta, gradNorm, k = train(traindata,beta_0=beta0,lam=args.lam,max_iter=args.max_iter,eps=args.eps,test_data=testdata)
    print 'Algorithm ran for',k,'iterations. Converged:',gradNorm<args.eps
    print 'Saving trained β in',args.beta
    writeBeta(args.beta,beta)
    def evaluate(self, Pr, LAMBDAS, SHIFTS, degree=2, debug=False):
        """Evalue the objective function. 
           degree = -1 only computes the LAMBAD BAR
           degree = 0 computes LAMBAD BAR plus the objective value
           degree = +1 computes LAMBDA BAR, the objective, and the objective`s gradient
           degree = +2 computes  LAMBDA BAR, the objective, the objective`s gradient, and the objective`s Hessian  
        """
        obj_barrier = 0.0
        grad_barrier = {}
        Hessian_barrier = {}
        LAMBDA_BAR = {}
        #w.r.t. constraints
        constraint_func, constraint_grads, constraint_Hessian = Pr.evalFullConstraintsGrad(
            degree)
        #w.r.t. objective
        obj_func, obj_grads, obj_Hessian = Pr.evalGradandUtilities(degree)

        for obj in obj_func:
            if degree < 0:
                continue
            #Objective
            obj_barrier += obj_func[obj]

            if degree < 1:
                continue
            #Grad
            for index in obj_grads[obj]:
                if index in grad_barrier:
                    grad_barrier[index] += obj_grads[obj][index]
                else:
                    grad_barrier[index] = obj_grads[obj][index]
            if degree < 2:
                continue
            #Hessian
            for index_pair in obj_Hessian[obj]:
                if index_pair in Hessian_barrier:
                    Hessian_barrier[index_pair] += obj_Hessian[obj][index_pair]
                else:
                    Hessian_barrier[index_pair] = obj_Hessian[obj][index_pair]

        for constraint in constraint_func:
            LAMBDA_BAR[
                constraint] = LAMBDAS[constraint] * SHIFTS[constraint] / (
                    constraint_func[constraint] + SHIFTS[constraint])
            if degree < 0:
                continue
            #Objective
            try:
                obj_barrier += -1.0 * LAMBDAS[constraint] * SHIFTS[
                    constraint] * math.log(constraint_func[constraint] +
                                           SHIFTS[constraint])
            except ValueError:
                obj_barrier = float("inf")
            if degree < 1:
                continue
            #Grad
            for index in constraint_grads[constraint]:
                grad_index = -1.0 * LAMBDA_BAR[constraint] * constraint_grads[
                    constraint][index]
                if index in grad_barrier:
                    grad_barrier[index] += grad_index
                else:
                    grad_barrier[index] = grad_index

            if degree < 2:
                continue
            #Hessian
            for index_pair in constraint_Hessian[constraint]:
                if index_pair in Hessian_barrier:
                    Hessian_barrier[index_pair] += constraint_Hessian[
                        constraint][index_pair]
                else:
                    Hessian_barrier[index_pair] = constraint_Hessian[
                        constraint][index_pair]

        return LAMBDA_BAR, obj_barrier, SparseVector(
            grad_barrier), SparseVector(Hessian_barrier)
    def _findCauchyPoint(self,
                         grad,
                         Hessian,
                         Vars,
                         Box,
                         TrustRegionThreshold,
                         scaling=False,
                         debug=False):
        "Return the direction s_k as in Step 1 of the algorithm. Note that grad and Hessian are SparseVectors."

        if not scaling:
            scalingD = dict([(key, 1.0) for key in Vars])
        else:
            scalingD = {}
            for key in Vars:
                try:
                    if grad[key] >= 0:
                        scalingD[key] = Vars[key]
                    else:
                        scalingD[key] = (Box[key] - Vars[key])
                except ZeroDivisionError:
                    scalingD[key] = 10.0

    #Compute hitting times
        hitting_times = {'dummy': 0.0}
        for key in Vars:
            if grad[key] == 0.0:
                hitting_times[key] = sys.maxsize
            elif grad[key] > 0:
                hitting_times[key] = Vars[key] / (scalingD[key] * grad[key])
            else:
                hitting_times[key] = (Box[key] - Vars[key]) / abs(
                    scalingD[key] * grad[key])

        sorted_hitting_times_items = sorted(hitting_times.items(),
                                            key=lambda x: x[1])

        #Decompose S_k = S_independant_k + S_dependant_k * t
        S_independant_k = SparseVector({})
        S_dependant_k = SparseVector(
            dict([(key, -1.0 * scalingD[key] * grad[key]) for key in grad]))
        vars_indepnedant_of_t = []
        end_deriavative_sgn = 0
        t_threshold = sys.maxsize
        for i in range(len(sorted_hitting_times_items)):
            key, t_key = sorted_hitting_times_items[i]

            if i < len(sorted_hitting_times_items) - 1:
                next_key, next_t_key = sorted_hitting_times_items[i + 1]
            else:
                next_t_key = -1  #dummy value
            if key != 'dummy':
                vars_indepnedant_of_t.append(key)
                del S_dependant_k[key]
                if grad[key] > 0.0:
                    S_independant_k[key] = -1.0 * Vars[key]
                elif grad[key] < 0.0:
                    S_independant_k[key] = Box[key] - Vars[key]
                else:
                    S_independant_k[key] = 0.0
            if next_t_key == t_key:
                continue

            if debug:
                print "Search ointerval is :", t_key, next_t_key
            #for key in vars_indepnedant_of_t:
        #     del S_dependant_k[key]
            a, b = self._getQudraticQuoeff(S_independant_k, S_dependant_k,
                                           grad, Hessian)

            #Check if the current interval is inside the trusts region

            if squaredNorm(S_independant_k +
                           S_dependant_k * next_t_key) >= TrustRegionThreshold:
                A = S_dependant_k.dot(S_dependant_k)
                B = 2.0 * S_dependant_k.dot(S_independant_k)
                C = S_independant_k.dot(
                    S_independant_k) - TrustRegionThreshold**2
                D = B**2 - 4.0 * A * C

                try:
                    root_1_tc = (-1.0 * B - math.sqrt(D)) / (2 * A)
                    root_2_tc = (-1.0 * B + math.sqrt(D)) / (2 * A)
                except ZeroDivisionError:
                    try:
                        root_1_tc = -1.0 * C / B
                        root_2_tc = root_1_tc
                    except ZeroDivisionError:
                        root_1_tc = sys.maxsize
                        root_2_tc = sys.maxsize
                if root_1_tc > t_key and root_1_tc <= next_t_key:
                    t_threshold = root_1_tc
                else:
                    t_threshold = root_2_tc

            #Find the first local minimum of the piece-wise quadratic function a * t**2 + b * t
            # this happens in two cases
            # (a) if the quadratic function is convex and its peak is in the interval [t_key, next_t_key]
            # (b) if the quadratic function was decreasing in the last interval and it is increasing now.
            # check (a)
            if a > 0.0 and -1.0 * b / (2 * a) > t_key and -1.0 * b / (
                    2 * a) < next_t_key:
                t_C_k = -1.0 * b / (2 * a)

                if t_C_k > t_threshold:
                    if debug:
                        print "Iter ", i, " Convexity Rechaed the threhsold, T_thre is ", t_threshold
                    return S_independant_k + S_dependant_k * t_threshold

                else:
                    if debug:
                        print "Convexity"
                    return S_independant_k + S_dependant_k * t_C_k

            # check (b)
            beg_deriavative_sgn = np.sign(2 * a * t_key + b)
            if beg_deriavative_sgn == 0:
                #Check if the quadratic functions peaks coincide with the hitting_times
                if a > 0.0:
                    beg_deriavative_sgn = 1
                elif a == 0.0:
                    beg_deriavative_sgn = 0
                else:
                    beg_deriavative_sgn = -1

            if end_deriavative_sgn < 0 and beg_deriavative_sgn >= 0:
                t_C_k = t_key
                if debug:
                    print "Changin sign"
                return S_independant_k + S_dependant_k * t_C_k
            end_deriavative_sgn = np.sign(2 * a * next_t_key + b)
            if end_deriavative_sgn == 0:
                #Check if the quadratic functions peaks coincide with the hitting_times
                if a > 0.0:
                    end_deriavative_sgn = -1
                else:
                    end_deriavative_sgn = 1
            if t_threshold < sys.maxsize:
                #If the quadratic function is decreasing in an interval before t_threshold
                if 2 * a * t_threshold + b <= 0.0:
                    if debug:
                        print "Iter ", i, " rechaed threshold, T_thre is ", t_threshold, TrustRegionThreshold
                    return S_independant_k + S_dependant_k * t_threshold
                #else:
                #    return S_independant_k
        return SparseVector({})