Example #1
0
def getProbaForLastMeasures(numberOfLastMeasures):
    """
        Get last measure from database and calculate probabilities
        numberOfLastValues specifies how many measures should be considered in calculation
    """
    
    measures = getAllMeasures()
    #remove timestamp from measures
    measures = measures[:,1:]
    measures = np.array([list(m) for m in measures],dtype=object)
    measures = np.reshape(measures, (measures.shape[0],measures.shape[2]))
    
    #set numberOfLastMeasures to max if param equals 0
    if numberOfLastMeasures == 0:
        numberOfLastMeasures = measures.shape[0]
    
    logging.info( "Calculate proba of the last %s measures... ",format(numberOfLastMeasures) )
    
    if(len(measures) == 0):
        logging.warning( "No values are available for learning" )
    else:
            
        #get only required measures
        measuresToEvaluate = measures[:numberOfLastMeasures]
        
        #variable where proba result will be stored
        result = np.ones(len(measuresToEvaluate))
                                    
        for c in criteria:
            if(len(c.values) > 0):
                
                idsParent = []
                #if criterion has one or more parents          
                if(len(c.parents) > 0):
                    sortParents = sorted(c.parents, key=lambda p: p.id)
                    idsParent = [p.id for p in sortParents]
                    ids = list(np.array(idsParent) - 1)
                    
                    #init num with value of criterion
                    truncation = np.ones((len(measuresToEvaluate),len(c.parents) + 1),dtype=object)
                    index = 0
                    truncation[:,index] = measuresToEvaluate[:,c.id - 1]
                                
                    for p in c.parents:
                        index += 1
                        truncation[:,index] = measuresToEvaluate[:,p.id - 1]
                                                                                                                                                                    
                    #for each combination of category, calculation is done for denominator
                    den = np.array([np.count_nonzero([(t == m).all() for m in measures[:,ids]]) for t in truncation[:,1:]])
                    #for each combination of category, calculation is done for numerator
                    num = np.array([np.count_nonzero([(t == m).all() for m in measures[:,[c.id - 1,] + ids]]) for t in truncation])
                    
                    #set num = 0 and den = 1 for avoiding division by zero and getting a result
                    num[np.where(den == 0)] = 0;
                    den[np.where(den == 0)] = 1;
                                                                      
                #if there is no parent for this criterion  
                else:
                    #den is total number of values for this criteria
                    den = len(measures)
                    #for each combination of category, calculation is done for numerator
                    num = np.array([np.count_nonzero([m == mte for mte in measures[:,c.id - 1]]) for m in measuresToEvaluate[:,c.id - 1]])

                #result is calculated by multiplying previous value with current proba
                result = np.multiply(result,np.divide(num,den,dtype=float))
                                                    
        logging.info( "Final proba for last measures :\n%s",result ) 
        
        return result   
Example #2
0
def getProbaForLastMeasures(numberOfLastMeasures):
    """
        Get last measure from database and calculate probabilities
        numberOfLastValues specifies how many measures should be considered in calculation
    """
    
    measures = getAllMeasures()
    #remove timestamp from measures
    measures = measures[:,1:]
    measures = np.array([list(m) for m in measures],dtype=object)
    measures = np.reshape(measures, (measures.shape[0],measures.shape[2]))
    
    #set numberOfLastMeasures to max if param equals 0
    if numberOfLastMeasures == 0:
        numberOfLastMeasures = measures.shape[0]
    
    logging.info( "Calculate proba of the last %s measures... ",format(numberOfLastMeasures) )
    
    if(len(measures) == 0):
        logging.warning( "No values are available for learning" )
    else:
            
        #get only required measures
        measuresToEvaluate = measures[:numberOfLastMeasures]
        
        #variable where proba result will be stored
        result = np.ones(len(measuresToEvaluate))
                                    
        for c in criteria:
            if(len(c.values) > 0):
                
                idsParent = []
                #if criterion has one or more parents          
                if(len(c.parents) > 0):
                    sortParents = sorted(c.parents, key=lambda p: p.id)
                    idsParent = [p.id for p in sortParents]
                    ids = list(np.array(idsParent) - 1)
                    
                    #init num with value of criterion
                    truncation = np.ones((len(measuresToEvaluate),len(c.parents) + 1),dtype=object)
                    index = 0
                    truncation[:,index] = measuresToEvaluate[:,c.id - 1]
                                
                    for p in c.parents:
                        index += 1
                        truncation[:,index] = measuresToEvaluate[:,p.id - 1]
                                                                                                                                                                    
                    #for each combination of category, calculation is done for denominator
                    den = np.array([np.count_nonzero([(t == m).all() for m in measures[:,ids]]) for t in truncation[:,1:]])
                    #for each combination of category, calculation is done for numerator
                    num = np.array([np.count_nonzero([(t == m).all() for m in measures[:,[c.id - 1,] + ids]]) for t in truncation])
                    
                    #set num = 0 and den = 1 for avoiding division by zero and getting a result
                    num[np.where(den == 0)] = 0;
                    den[np.where(den == 0)] = 1;
                                                                      
                #if there is no parent for this criterion  
                else:
                    #den is total number of values for this criteria
                    den = len(measures)
                    #for each combination of category, calculation is done for numerator
                    num = np.array([np.count_nonzero([m == mte for mte in measures[:,c.id - 1]]) for m in measuresToEvaluate[:,c.id - 1]])

                #result is calculated by multiplying previous value with current proba
                result = np.multiply(result,np.divide(num,den,dtype=float))
                                                    
        logging.info( "Final proba for last measures :\n%s",result ) 
        
        return result   
Example #3
0
def getProbaForAllMeasures():
    """
        Algorithm for calculating conditional probabilities for all categories in all measures
    """
    
    logging.info( "Calculate all conditionnal probabilities" )
    istats = 0
    
    measures = getAllMeasures()
    measures = measures[:,1:]
    measures = np.array([list(m) for m in measures],dtype=object)
    measures = np.reshape(measures, (measures.shape[0],measures.shape[2]))
    
    if(len(measures) == 0):
        logging.info( "No values are available for learning" )
    else:
        
        logging.info( "Delete all previous calculated stats" )
        #first, remove all
        session.query(Stats).delete()
        session.commit()
            
        #loop through criteria
        for c in criteria:
            logging.info( "\nCriterion id : %s",format(c.id) )

            if len(c.values) == 0:
                continue
            
            stats = list()
            
            #if criterion has one or more parents
            if(len(c.parents) > 0):
                sortParents = sorted(c.parents, key=lambda p: p.id)
                ids = [str(c.id),] + [str(sp.id) for sp in sortParents]
                catChild = list(set([v.category for v in c.values]))
                catParent = [flatten(set([str(upc.category) for upc in p.values])) for p in sortParents]
                catChildAndParent = [catChild,] + catParent
        
                productNumerator = list(itertools.product(*catChildAndParent))
                
                #reshape combinationCatParents in a matrix [values number,parent number]
                #catNumerator = np.reshape(productNumerator, [len(productNumerator),len(c.parents) + 1])
                catNumerator = np.array(productNumerator)
                                
                if len(catNumerator) > 0:

                    catDenominator = catNumerator[:,1:]
                                                
                    #index for truncation measures matrix
                    index = 0
                    #init truncation matrix with 0
                    truncation = ()
                    truncation = np.zeros((measures.shape[0],len(c.parents) + 1),dtype=object)
                                                            
                    #truncate measures with only current criterion and parent columns
                    truncation[:,index] = measures[:,c.id - 1]
                                        
                    for p in c.parents:
                        index += 1
                        truncation[:,index] = measures[:,p.id - 1]
                                       
                    #for each combination of category, calculation is done for denominator
                    den = [np.count_nonzero([(cd == t).all() for t in truncation[:,1:]]) for cd in catDenominator]
                    #for each combination of category, calculation is done for numerator
                    num = [np.count_nonzero([(cn == t).all() for t in truncation]) for cn in catNumerator]       
                        
                    #for avoiding to divide by 0
                    num = np.take(num,np.nonzero(den))
                    #get categories of parents
                    productNumerator = [productNumerator[i] for i in list(np.nonzero(den)[0])]
                    den = np.take(den,np.nonzero(den))
                    
                    results = np.divide(num,den,dtype=float)
                    
                    #persist stats to db
                    for i in range(0,len(productNumerator)):
                        istats += 1
                        listProduct = list(productNumerator[i])
                        stats.append(Stats(id=istats,association=';'.join(ids),parent=';'.join(listProduct[:1]),children=';'.join(listProduct[1:]),value=';'.join(listProduct),proba=results[0][i]))
                    
                    session.add_all(stats)
                    session.commit()
                    
                    logging.info( "Criteria : %s",format(ids) )
                    logging.info( "Categories : %s",format(productNumerator) )
                    logging.info( "Proba : %s",format(results[0]) )
        
                else:
                    logging.warning( 'No measure available for this criterion and/or parents' )
                    
            #if there is no parent for this criterion
            else:
                logging.warning( 'No relationship find for criterion id : %s',format(c.id) )
                #TODO useless?
                #catChild = np.array(list(set([v.category for v in c.values])))            
                #print [np.count_nonzero([(m == cc).all() for cc in catChild]) for m in measures]
                
    if istats > 0:
        logging.info( '%s stats have been calculated and inserted in database',istats )
Example #4
0
def getProbaForAllMeasures():
    """
        Algorithm for calculating conditional probabilities for all categories in all measures
    """
    
    logging.info( "Calculate all conditionnal probabilities" )
    istats = 0
    
    measures = getAllMeasures()
    measures = measures[:,1:]
    measures = np.array([list(m) for m in measures],dtype=object)
    measures = np.reshape(measures, (measures.shape[0],measures.shape[2]))
    
    if(len(measures) == 0):
        logging.info( "No values are available for learning" )
    else:
        
        logging.info( "Delete all previous calculated stats" )
        #first, remove all
        session.query(Stats).delete()
        session.commit()
            
        #loop through criteria
        for c in criteria:
            logging.info( "\nCriterion id : %s",format(c.id) )

            if len(c.values) == 0:
                continue
            
            stats = list()
            
            #if criterion has one or more parents
            if(len(c.parents) > 0):
                sortParents = sorted(c.parents, key=lambda p: p.id)
                ids = [str(c.id),] + [str(sp.id) for sp in sortParents]
                catChild = list(set([v.category for v in c.values]))
                catParent = [flatten(set([str(upc.category) for upc in p.values])) for p in sortParents]
                catChildAndParent = [catChild,] + catParent
        
                productNumerator = list(itertools.product(*catChildAndParent))
                
                #reshape combinationCatParents in a matrix [values number,parent number]
                #catNumerator = np.reshape(productNumerator, [len(productNumerator),len(c.parents) + 1])
                catNumerator = np.array(productNumerator)
                                
                if len(catNumerator) > 0:

                    catDenominator = catNumerator[:,1:]
                                                
                    #index for truncation measures matrix
                    index = 0
                    #init truncation matrix with 0
                    truncation = ()
                    truncation = np.zeros((measures.shape[0],len(c.parents) + 1),dtype=object)
                                                            
                    #truncate measures with only current criterion and parent columns
                    truncation[:,index] = measures[:,c.id - 1]
                                        
                    for p in c.parents:
                        index += 1
                        truncation[:,index] = measures[:,p.id - 1]
                                       
                    #for each combination of category, calculation is done for denominator
                    den = [np.count_nonzero([(cd == t).all() for t in truncation[:,1:]]) for cd in catDenominator]
                    #for each combination of category, calculation is done for numerator
                    num = [np.count_nonzero([(cn == t).all() for t in truncation]) for cn in catNumerator]       
                        
                    #for avoiding to divide by 0
                    num = np.take(num,np.nonzero(den))
                    #get categories of parents
                    productNumerator = [productNumerator[i] for i in list(np.nonzero(den)[0])]
                    den = np.take(den,np.nonzero(den))
                    
                    results = np.divide(num,den,dtype=float)
                    
                    #persist stats to db
                    for i in range(0,len(productNumerator)):
                        istats += 1
                        listProduct = list(productNumerator[i])
                        stats.append(Stats(id=istats,association=';'.join(ids),parent=';'.join(listProduct[:1]),children=';'.join(listProduct[1:]),value=';'.join(listProduct),proba=results[0][i]))
                    
                    session.add_all(stats)
                    session.commit()
                    
                    logging.info( "Criteria : %s",format(ids) )
                    logging.info( "Categories : %s",format(productNumerator) )
                    logging.info( "Proba : %s",format(results[0]) )
        
                else:
                    logging.warning( 'No measure available for this criterion and/or parents' )
                    
            #if there is no parent for this criterion
            else:
                logging.warning( 'No relationship find for criterion id : %s',format(c.id) )
                #TODO useless?
                #catChild = np.array(list(set([v.category for v in c.values])))            
                #print [np.count_nonzero([(m == cc).all() for cc in catChild]) for m in measures]
                
    if istats > 0:
        print 'SUCCESS: {} stats have been calculated and inserted in database'.format(istats)