Example #1
0
class PosAlgorithm():
    def __init__(self,testDf,
                 mainDf,segmentsStepsDf,
                 correlator,
                 spread = 15,
                 timeStep = 1,
                 ):
        # original database
        self.SmoothedDf = pd.io.parsers.read_csv(mainDf,index_col = 'index')
        # source for section imitation
        self.testDf = pd.io.parsers.read_csv(testDf,index_col = 'index')
        self.segmentsStepsDf = pd.io.parsers.read_csv(segmentsStepsDf)
        self.powerCorrelator = correlator
        # output database contained predicted points
        self.predicted_df = None
        # by default the number of unpredicted segments is 0
        self.unpredicted = 0
        # the dictionary of coefficients of correlation
        self.corrCoeffs = {}
        # range of indexes at the test data frame
        # range between indexes of grabbed section.
        # Other words it is just about the time of user's waiting in seconds
        self.spread = spread
        # the time step as constant step between rows at the database
        self.timeStep = timeStep
        # The number of laccids,grabbed by user. For "byLacCidMod" algorithm it must be more then 2.
        # Otherwise, it will works as "byLacCid" algorithm
        # self.numLC = numLC
        self.filters = Filters()

    def initGrabbedSet(self):
        # initialize self.segments variable
        self.generateRandomSegment()
        self.grabbedDf = self.getTestSection()

        #self.truthPoint = self.randomSampling(self.grabbedDf,numsamples = 1)
        self.truthPoint = self.grabbedDf.tail(1)
        self.trueSegment = self.truthPoint['segment'].unique().all()
    def generateRandomSegment(self):
        """
        Generate segment where user is located.
        :return:
        """

        # additional criterias
        """
        uniqueLc = self.testDf.groupby(['segment'])['laccid'].unique()
        # get rows with number of laccids more than ...
        byLc = uniqueLc[uniqueLc.apply(len)>=self.numLC]
        segments = list(byLc.keys())
        # get rows with number of races more than ...
        byRaces = self.testDf.groupby('segment')['race_id'].unique().apply(len)
        segments2 = list(byRaces[byRaces>self.numRaces].keys())
        # find the intersection of founded sets
        Segments = set(segments).intersection(segments2)
        """
        # generate test segment
        # simple random
        segLens = self.testDf.groupby(['segment']).apply(len)
        self.randSeg = segLens[segLens>self.spread].sample(1).keys()
        #print self.randSeg
    def getTestSection(self):
        """
        Get the dataframe grabbed by user.
        :return:
        """
        #self.randSeg = ['074-075']
        df = self.testDf [self.testDf['segment'].isin(self.randSeg)]

        self.analyzedDf = df.copy()
        # filtrate
        self.analyzedDf = self.filters.medianFilter(self.analyzedDf)

        # Note! change 'ratio' to 'TimeStamp' for real situation or remove this sorting!
        #grouped = self.analyzedDf.groupby('ratio').sort('ratio')

        # generate test slice
        #firstStamp = 41.0
        firstStamp = random.sample(self.analyzedDf[self.analyzedDf.TimeStamp < max(self.analyzedDf.TimeStamp) - self.spread].TimeStamp,1)[0]
        print " : " + str(firstStamp)
        self.analyzedDf.loc[:,'grabbed'] = np.nan
        self.analyzedDf = self.analyzedDf.sort('ratio')
        #lastIx = self.analyzedDf[self.analyzedDf.TimeStamp == self.analyzedDf.TimeStamp[firstIx] + self.spread].index
        self.analyzedDf.loc[self.analyzedDf[(self.analyzedDf.TimeStamp>=firstStamp)&(self.analyzedDf.TimeStamp<=firstStamp+self.spread)].index,'grabbed'] = 1
        #self.analyzedDf.loc[i:i+self.spread,'grabbed'] = 1
        self.analyzedDf['grabbed'] = self.analyzedDf['grabbed'].fillna(0)
        grabbed_df = self.analyzedDf[self.analyzedDf['grabbed'] == 1]
        grabbed_df = grabbed_df.sort(['TimeStamp','laccid'])
        #grabbed_df['index'] = range(0,len(grabbed_df))
        return grabbed_df
    def predict(self,alg,useSmoothed):
        """
        initialize the algorithm of postiioning prediction.
        :param alg: keyword for algoruthm
        :return:
        """
        self.corrCoeffs = {}
        if alg == "r":
            self.randomSampling(self.SmoothedDf)
        if alg == "lc":
            self.byLacCid()
        if alg == "lcM":
            self.byLacCidMod()
            #self.()
        if alg == "pc":
            self.byPowerCorr(useSmoothed = useSmoothed)


    def randomSampling(self,df,numsamples = 50):
        """
        Generate subset from input dataframe.
        :param df: dataframe to analyse
        :param numsamples: the number of samples
        :return:
        """
        rows = random.sample(df.index,numsamples)
        self.predictedDf = df.ix[rows]
        self.predicted_segments = self.predictedDf['segment'].unique()
        return self.predictedDf
    def byLacCid(self):
        """
        Use Lac and Cid identifiers of Base station only.
        :return:
        """
        self.grabbed_lc =  self.grabbedDf['laccid'].unique()
        self.predictedDf = self.SmoothedDf[self.SmoothedDf['laccid'].isin(self.grabbed_lc)]
        self.predicted_segments = self.predictedDf['segment'].unique()
        if self.predictedDf[self.predictedDf['segment'].isin(self.truthPoint['segment'].unique())].empty == True:
            self.unpredicted = 1
            print self.truthPoint
    def byLacCidMod(self):
        predictedInfo = pd.DataFrame()
        check = True
        laccids = self.grabbedDf.laccid.unique()
        if laccids.__len__()>1:
            actives = self.grabbedDf.Active.unique()
            uniqueLevels = {'before':self.spread,'after':self.spread}
            changedLcs = self.extractChanges()
            if changedLcs:
                predictedInfo = self.findChanges(changedLcs,uniqueLevels)
            else:
                if actives.__len__()>1:
                    predictedInfo = self.findActives(uniqueLevels)
            if not predictedInfo.empty:
                predictedDf = self.reduceByChanges(predictedInfo)
                self.predictedDf = predictedDf.sort(columns = ['segment','ratio','laccid'])
            if predictedInfo.empty:
                self.unpredicted = 1
        else:
            check = False
        return check
    def reduceByChanges(self,predictedInfo):
        predictedDf = pd.DataFrame()
        grouped = self.predictedDf.groupby('segment')
        for seg,gr in grouped:
            segInfo = predictedInfo[predictedInfo.segment == seg]
            for ix,row in segInfo.iterrows():
                #it might be more than one if segment contains several "change points"
                _gr = gr[(gr.ratio>=row['left'])&(gr.ratio<=row['right'])]
                predictedDf = pd.concat([predictedDf,_gr])
        predictedDf = predictedDf.drop_duplicates()
        return predictedDf
    def extractChanges(self):
        grouped = self.grabbedDf.groupby(['TimeStamp'])
        LcsPrev = np.array([])
        changed = []
        for ts,gr in grouped:
            uniqueLcs = gr.laccid.unique()
            if len(LcsPrev)>0:
                uniqueLcsNext = uniqueLcs
                if sorted(list(LcsPrev))!=sorted(list(uniqueLcsNext)):
                    changed.append({'prev':list(LcsPrev),'next':list(uniqueLcsNext)})
                LcsPrev = uniqueLcs
            if not len(LcsPrev)>0:
                LcsPrev = uniqueLcs
        return changed
    def findChanges(self,changedLcs,uniqueLevels):

        grouped = self.predictedDf.groupby(['segment','ratio'])
        predictedInfo = pd.DataFrame()
        LcsPrev = np.array([])
        ix = 0
        #LcsNext = None
        for pare in changedLcs:
            for (seg,rat),gr in grouped:
                if len(LcsPrev)>0:
                    uniqueLcsNext = gr.laccid.unique()
                    if (pare['next'] in uniqueLcsNext)&(pare['next'] not in LcsPrev):
                        leftDelta,rightDelta = self.findDiff(seg,uniqueLevels)
                        row = pd.DataFrame({'segment':seg,'left':prevPoint-leftDelta,'right':rat+rightDelta},index = [ix])
                        predictedInfo = pd.concat([predictedInfo,row])
                        ix+=1
                    LcsPrev = np.array([])
                if not (LcsPrev)>0:
                    uniqueLcsPrev = gr.laccid.unique()
                    if pare['prev'] in uniqueLcsPrev:
                        LcsPrev = uniqueLcsPrev
                        prevPoint = rat
                    else:
                        LcsPrev = np.array([])
                        prevPoint = None
        return predictedInfo


    def findActives(self,uniqueLevels):
        lcGrouped = self.grabbedDf.groupby('TimeStamp').\
            filter(lambda x : len(x)>1).groupby('TimeStamp').\
            apply(lambda x: np.unique(x['laccid']))
        laccidsAll = np.unique(lcGrouped.to_dict().values())

        filtered = self.predictedDf.groupby(['segment','ratio']).filter(lambda x : len(x)>1)

        activeGroup = filtered.groupby(['segment','ratio'])
        activePoints = activeGroup['laccid'].apply(np.unique)
        d = activePoints.apply(lambda x: sorted(list(x)) == sorted(laccidsAll)).to_dict()
        predictedFrame = pd.DataFrame([key for key in d.keys() if d[key] == True],columns = ['segment','ratio']).sort(['segment','ratio'])
        predictedInfo = self.extractBounds(predictedFrame,uniqueLevels)

        return predictedInfo
    def extractBounds(self,frame,uniqueLevels = 'default',clip = True):
        """
        Extract minimum and maximum ratios from the frame by each segment and clip predicted earlier frame by them.
        :param frame: frame contains "active points" with 2 fields : segment and ratio {pd.DataFrame}
        :param uniqueLevels: length of boundaries by which is need to clip (seconds) {int}
        :param clip: if need to clip or not {boolean}
        :return: clipped dataFrame {pd.DataFrame}
        """
        if uniqueLevels == 'default':
            uniqueLevels = {'after':0,'before':0}
        leftDelta,rightDelta = 0,0
        grouped = frame.groupby('segment')
        Predicted = pd.DataFrame()
        for seg,gr in grouped:
            _gr = pd.DataFrame({'segment':[seg]})
            if not clip:
                leftDelta,rightDelta = self.findDiff(seg,uniqueLevels)
            _gr.loc[:,'left'],_gr.loc[:,'right'] = min(gr['ratio'])-leftDelta,max(gr['ratio'])+rightDelta
            Predicted = pd.concat([Predicted,_gr])
        return Predicted
    def findDiff(self,seg,spread):
        #frame = frame.sort(['segment','ratio','laccid'])
        #diffs = np.diff(frame['ratio'],1)

        interpStep = self.segmentsStepsDf[self.segmentsStepsDf.segment == seg].interpStep.values[0]
            #diffs[diffs!=0][0]
        left,right = interpStep*spread['before'],interpStep*spread['after']
        return left,right

    def byLacCidMod2(self):
        """
        Use the information from neighbours laccids.
        :return:
        """
        #Note! Attach probability according  length of founded laccids for each step.
        # For example,probability for sublist with length 4 more than siblist with length 2,
        # because this means that in the first case 4 cell's stations were founded correctly, when
        # in the second case only 2. But it might be lack of the data in origin database.
        predicted_segments =[]
        # get predicted frame and segments according base laccid algorithm
        #self.byLacCid()
        self.unpredicted = 0
        # iterate by laccids at grabbed list of laccids.
        for step in range(len(self.grabbed_lc),1,-1):
            # check all combinations
            for sublist in itertools.combinations(self.grabbed_lc,step):
                predicted_subDf = self.predictedDf[self.predictedDf['laccid'].isin(sublist)]
                segments = predicted_subDf['segment'].unique()
                # find the right segments for this combination
                for seg in segments:
                    seg_subDf = predicted_subDf[predicted_subDf['segment'] == seg]
                    lc_subList = seg_subDf['laccid'].unique()
                    if (set(sublist).issubset(set(lc_subList))) == True:
                        predicted_segments.append(seg)
            if predicted_segments!=[]:
                break
        # if something founded - reduce the selection of predicted segments.
        if predicted_segments!=[]:
            self.predictedDf = self.predictedDf[self.predictedDf['segment'].isin(predicted_segments)]
        # if no segments - use the segments from base algorithm.
        else:
            self.unpredicted = 1

    def byPowerCorr(self,
                    useSmoothed = False):
        """
        The input segment should contains varying of signal. Only in this case
        Suppose that user's telephone grabbed not only the base station but neighbours too
        it is possible to identify truth position
        :return: predicted data frame.
        """
        self.unpredicted = 0
        self.resultsDf = pd.DataFrame()
        predictedDf = pd.DataFrame()
        fullPredicted = pd.DataFrame()

        # dataFrame contained control Rows.
        ReducingTypes = {'byAbs':'maxLimit','byCorr':'localMaxima'}
        # 1. Split phone data on base step's sections.
        if useSmoothed  ==True:
            #self.interpPowers = self.grabbedDf.groupby(['laccid'])['Power'].apply(list).to_dict()
            self.interpPowers = list(self.grabbedDf['Power'])
        else:
            self.interpolateByTimeStep()
        # 2. Compare powers of grabbed log with powers from database
        # a) If the variance of grabbed log close to zero --> compare Mean by list of absolute Power values.
        # b) Else --> compare the coefficients of correlation
        #       If corrCoeff < 0 : extract this indexes from predicted dataFrame
        #       If corrCoeff > 0 : find local maximums at the list of corrCoeffs and
        #                          extract all of the others from predicted dataFrame

        absMeans = self.powerCorrelator.analyzeLC(self.grabbedDf.groupby(['laccid'])['Power'].apply(list).to_dict())
        # Extract indexes iteratively
        powersDf = self.predictedDf.groupby(['segment'])
        first,last = 0,0
        for (seg,SegLcGroup) in powersDf:
            #analyzedSection = self.interpPowers[lc]
            analyzedSection = self.interpPowers
            if len(self.grabbed_lc) == 1:
                method = self.powerCorrelator.checkPredictionMethod(self.grabbed_lc[0], absMeans)
            else:
                method = 'byCorr'
            redType = ReducingTypes[method]
            predictedPart,allPredicted,last = self.powerCorrelator.loopThroughLaccid(SegLcGroup,method,analyzedSection,redType,return_all=True,last = last)

            predictedPart['sliceNumber'] = range(first,last)
            first = last
            predictedDf = pd.concat([predictedDf,predictedPart])
            fullPredicted = pd.concat([fullPredicted,allPredicted])

        if predictedDf.empty != True:
            controlCheck = 'controls' not in predictedDf.columns.values
            if controlCheck == True:
                print ""
            self.predictedDf = predictedDf
            self.fullPredicted = fullPredicted
        else:
            self.unpredicted = 1

    def interpolateByTimeStep(self):
        """
        Linear interpolation of grabbed log by the constant.
        :return: the dictionary were key is the LAC-CID
        and value is the array of interpolated powers
        """
        self.interpPowers = {}
        old = self.grabbedDf.groupby(['laccid'])['TimeStamp']\
            .apply(lambda x: list((x -min(x))/1000))

        new = self.grabbedDf.groupby(['laccid'])['TimeStamp']\
            .apply(lambda x: range(0,max(x -min(x))/1000+1,self.timeStep))
        for lc in old.keys():
            self.interpPowers[lc] = np.interp(new[lc],
                                         old[lc],
                                         self.grabbedDf.loc[self.grabbedDf['laccid'] == lc, 'Power'])