예제 #1
0
 def extract_gauge_info(self, currmess):
     v = None
     # rip the float out of the line
     try:
         v = tools.find_double(str(currmess.body))
         # print "found val:" + str(v)
     except tools.NoNumError:  # first fail, attempt another!
         self.totals = tools.log_bad_contribution(currmess, self)
     if (v != None):
         currmess.gageheight = v
         userid = currmess.fromUUID
         if userid in self.totals:
             self.totals[userid] = ( self.totals[userid][0], self.totals[userid][1] + 1, self.totals[userid][2] )
         else:
             self.totals[userid] = ( currmess.date, 1, 0 )
예제 #2
0
 def extract_gauge_info(self, currmess):
     v = None
     # rip the float out of the line
     try:
         v = tools.find_double(str(currmess.body))
         # print "found val:" + str(v)
     except tools.NoNumError:  # first fail, attempt another!
         self.totals = tools.log_bad_contribution(currmess, self)
     if (v != None):
         currmess.gageheight = v
         userid = currmess.fromUUID
         if userid in self.totals:
             self.totals[userid] = (self.totals[userid][0],
                                    self.totals[userid][1] + 1,
                                    self.totals[userid][2])
         else:
             self.totals[userid] = (currmess.date, 1, 0)
예제 #3
0
    def extract_gauge_info(self, currmess):
        v = None
        # rip the float out of the line
        try:
            v = tools.find_double(str(currmess.body))
            # print "found val:" + str(v)
        except tools.NoNumError:  # first fail, attempt another!
            self.totals = tools.log_bad_contribution(currmess, self)
        if (v != None):
            currmess.gageheight = v
            userid = currmess.fromUUID
            station = self.stations[currmess.closest_station_match]

            if userid in self.totals:
                contribution_list = self.totals[userid][3]
                if station in contribution_list:
                    contribution_list[station] += 1
                else:
                    contribution_list[station] = 1

                contribution_date_list = self.totals[userid][4]
                if station in contribution_date_list:
                    contribution_date_list[station].append(currmess.datestamp)
                else:
                    contribution_date_list[station] = [currmess.datestamp]

                self.totals[userid] = (self.totals[userid][0],
                                       self.totals[userid][1] + 1,
                                       self.totals[userid][2],
                                       contribution_list,
                                       contribution_date_list)
            else:
                self.totals[userid] = (currmess.date, 1, 0, {
                    station: 1
                }, {
                    station: [currmess.datestamp]
                })
예제 #4
0
    def parsemsgs(self,site_params):
        # parse through all the messages
        for currmess in self.messages:
            # first the dates
            tmpdate = currmess.rawdate[:-5]
            currmess.date = datetime.strptime(tmpdate,self.dfmt)
            currmess.date = tz_adjust_STD_DST(currmess.date,self.tzdata)
            currmess.dateout = datetime.strftime(currmess.date,self.outfmt)
            currmess.datestamp = time.mktime(datetime.timetuple(currmess.date)) 
            
            
            # now the message bodies
            cm = currmess.body 
            # do a quick check that the message body is only a string - not a list
            # a list happens if there is a forwarded message
            if not isinstance(cm,str):   
                cm = cm[0].get_payload()
            maxratio = 0
            maxrat_count = -99999
           # maxrat_line = -99999
            line = cm.lower()
            line = string.rstrip(line,line[string.rfind(line,'sent using sms-to-email'):])
            line = re.sub('(\r)',' ',line)
            line = re.sub('(\n)',' ',line)
            line = re.sub('(--)',' ',line)
            
            for citem in site_params.msg_ids:
                if citem.lower() in line:
                    currmess.is_gage_msg = True 
                
            if currmess.robot_status:
                self.process_a_robot_message(currmess)
                continue ##if we have a robot message, we process it and skip the rest.
                ## we don't want these contributions being logged in the same way.

            if currmess.is_gage_msg == True:
                matched = False # set a flag to see if a match has been found
                # now check for the obvious - that the exact station number is in the line
                for j,cs in enumerate(self.stations):
                    # see if there's an exact match first
                    if cs.lower() in line.lower():
                        maxratio = 100
                        maxrat_count = j
                        matched = True
                        # also strip out the station ID, including possibly a '.' on the end
                        line = re.sub(cs.lower()+'\.','',line)
                        line = re.sub(cs.lower(),'',line)                  
                        currmess.station_line = line                        
                        break

                # if no exact match found, get fuzzy!
                if matched == False:
                    # we will test the line, but we need to remove some terms using regex substitutions
                    for cremitem in site_params.msg_rms:
                        line = re.sub('('+cremitem.lower()+')','',line)
                    # now get rid of the floating point values that should be the stage
                    # using regex code from: http://stackoverflow.com/questions/385558/
                    # python-and-regex-question-extract-float-double-value
                    currmess.station_line = line
                    line = re.sub("[+-]? *(?:\d+(?:\.\d*)|\.\d+)(?:[eE][+-]?\d+)?",'', line)
                    ##print line
                    tmp_ints = re.findall("\d+",line)
                    remaining_ints = []
                    for cval in tmp_ints:
                        remaining_ints.append(int(cval))

                    if len(remaining_ints) < 1:
                        maxratio = 0
                        
                    elif ((max(remaining_ints) < self.minstatnum) or 
                        (min(remaining_ints) > self.maxstatnum)):
                        maxratio = 0
                        
                    else:
                        for j,cs in enumerate(self.stations):
                            # get the similarity ratio
                            crat = fuzz.ratio(line,cs)
                            if crat > maxratio:
                                maxratio = crat
                                maxrat_count = j
                currmess.max_prox_ratio = maxratio    
                currmess.closest_station_match = maxrat_count

                self.extract_gauge_info(currmess)


            else:
                ##this message has no readable gauge, so we log it as a bad message.
                ##print "Bad Message" + str(currmess.header)
                tools.log_bad_contribution(currmess, self)
예제 #5
0
    def parsemsgs(self, site_params):
        # parse through all the messages
        for currmess in self.messages:
            # first the dates
            tmpdate = currmess.rawdate[:-5]
            currmess.date = datetime.strptime(tmpdate, self.dfmt)
            currmess.date = tz_adjust_STD_DST(currmess.date, self.tzdata)
            currmess.dateout = datetime.strftime(currmess.date, self.outfmt)
            currmess.datestamp = time.mktime(datetime.timetuple(currmess.date))

            # now the message bodies
            cm = currmess.body
            # do a quick check that the message body is only a string - not a list
            # a list happens if there is a forwarded message
            if not isinstance(cm, str):
                cm = cm[0].get_payload()
            maxratio = 0
            maxrat_count = -99999
            # maxrat_line = -99999
            line = cm.lower()
            line = string.rstrip(
                line, line[string.rfind(line, 'sent using sms-to-email'):])
            line = re.sub('(\r)', ' ', line)
            line = re.sub('(\n)', ' ', line)
            line = re.sub('(--)', ' ', line)

            for citem in site_params.msg_ids:
                if citem.lower() in line:
                    currmess.is_gage_msg = True

            if currmess.robot_status:
                self.process_a_robot_message(currmess)
                continue  ##if we have a robot message, we process it and skip the rest.
                ## we don't want these contributions being logged in the same way.

            if currmess.is_gage_msg == True:
                matched = False  # set a flag to see if a match has been found
                # now check for the obvious - that the exact station number is in the line
                for j, cs in enumerate(self.stations):
                    # see if there's an exact match first
                    if cs.lower() in line.lower():
                        maxratio = 100
                        maxrat_count = j
                        matched = True
                        # also strip out the station ID, including possibly a '.' on the end
                        line = re.sub(cs.lower() + '\.', '', line)
                        line = re.sub(cs.lower(), '', line)
                        currmess.station_line = line
                        break

                # if no exact match found, get fuzzy!
                if matched == False:
                    # we will test the line, but we need to remove some terms using regex substitutions
                    for cremitem in site_params.msg_rms:
                        line = re.sub('(' + cremitem.lower() + ')', '', line)
                    # now get rid of the floating point values that should be the stage
                    # using regex code from: http://stackoverflow.com/questions/385558/
                    # python-and-regex-question-extract-float-double-value
                    currmess.station_line = line
                    line = re.sub(
                        "[+-]? *(?:\d+(?:\.\d*)|\.\d+)(?:[eE][+-]?\d+)?", '',
                        line)
                    ##print line
                    tmp_ints = re.findall("\d+", line)
                    remaining_ints = []
                    for cval in tmp_ints:
                        remaining_ints.append(int(cval))

                    if len(remaining_ints) < 1:
                        maxratio = 0

                    elif ((max(remaining_ints) < self.minstatnum)
                          or (min(remaining_ints) > self.maxstatnum)):
                        maxratio = 0

                    else:
                        for j, cs in enumerate(self.stations):
                            # get the similarity ratio
                            crat = fuzz.ratio(line, cs)
                            if crat > maxratio:
                                maxratio = crat
                                maxrat_count = j
                currmess.max_prox_ratio = maxratio
                currmess.closest_station_match = maxrat_count

                self.extract_gauge_info(currmess)

            else:
                ##this message has no readable gauge, so we log it as a bad message.
                ##print "Bad Message" + str(currmess.header)
                tools.log_bad_contribution(currmess, self)