def extract_gauge_info(self, currmess): v = None # rip the float out of the line try: v = tools.find_double(str(currmess.body)) # print "found val:" + str(v) except tools.NoNumError: # first fail, attempt another! self.totals = tools.log_bad_contribution(currmess, self) if (v != None): currmess.gageheight = v userid = currmess.fromUUID if userid in self.totals: self.totals[userid] = ( self.totals[userid][0], self.totals[userid][1] + 1, self.totals[userid][2] ) else: self.totals[userid] = ( currmess.date, 1, 0 )
def extract_gauge_info(self, currmess): v = None # rip the float out of the line try: v = tools.find_double(str(currmess.body)) # print "found val:" + str(v) except tools.NoNumError: # first fail, attempt another! self.totals = tools.log_bad_contribution(currmess, self) if (v != None): currmess.gageheight = v userid = currmess.fromUUID if userid in self.totals: self.totals[userid] = (self.totals[userid][0], self.totals[userid][1] + 1, self.totals[userid][2]) else: self.totals[userid] = (currmess.date, 1, 0)
def extract_gauge_info(self, currmess): v = None # rip the float out of the line try: v = tools.find_double(str(currmess.body)) # print "found val:" + str(v) except tools.NoNumError: # first fail, attempt another! self.totals = tools.log_bad_contribution(currmess, self) if (v != None): currmess.gageheight = v userid = currmess.fromUUID station = self.stations[currmess.closest_station_match] if userid in self.totals: contribution_list = self.totals[userid][3] if station in contribution_list: contribution_list[station] += 1 else: contribution_list[station] = 1 contribution_date_list = self.totals[userid][4] if station in contribution_date_list: contribution_date_list[station].append(currmess.datestamp) else: contribution_date_list[station] = [currmess.datestamp] self.totals[userid] = (self.totals[userid][0], self.totals[userid][1] + 1, self.totals[userid][2], contribution_list, contribution_date_list) else: self.totals[userid] = (currmess.date, 1, 0, { station: 1 }, { station: [currmess.datestamp] })
def parsemsgs(self,site_params): # parse through all the messages for currmess in self.messages: # first the dates tmpdate = currmess.rawdate[:-5] currmess.date = datetime.strptime(tmpdate,self.dfmt) currmess.date = tz_adjust_STD_DST(currmess.date,self.tzdata) currmess.dateout = datetime.strftime(currmess.date,self.outfmt) currmess.datestamp = time.mktime(datetime.timetuple(currmess.date)) # now the message bodies cm = currmess.body # do a quick check that the message body is only a string - not a list # a list happens if there is a forwarded message if not isinstance(cm,str): cm = cm[0].get_payload() maxratio = 0 maxrat_count = -99999 # maxrat_line = -99999 line = cm.lower() line = string.rstrip(line,line[string.rfind(line,'sent using sms-to-email'):]) line = re.sub('(\r)',' ',line) line = re.sub('(\n)',' ',line) line = re.sub('(--)',' ',line) for citem in site_params.msg_ids: if citem.lower() in line: currmess.is_gage_msg = True if currmess.robot_status: self.process_a_robot_message(currmess) continue ##if we have a robot message, we process it and skip the rest. ## we don't want these contributions being logged in the same way. if currmess.is_gage_msg == True: matched = False # set a flag to see if a match has been found # now check for the obvious - that the exact station number is in the line for j,cs in enumerate(self.stations): # see if there's an exact match first if cs.lower() in line.lower(): maxratio = 100 maxrat_count = j matched = True # also strip out the station ID, including possibly a '.' on the end line = re.sub(cs.lower()+'\.','',line) line = re.sub(cs.lower(),'',line) currmess.station_line = line break # if no exact match found, get fuzzy! if matched == False: # we will test the line, but we need to remove some terms using regex substitutions for cremitem in site_params.msg_rms: line = re.sub('('+cremitem.lower()+')','',line) # now get rid of the floating point values that should be the stage # using regex code from: http://stackoverflow.com/questions/385558/ # python-and-regex-question-extract-float-double-value currmess.station_line = line line = re.sub("[+-]? *(?:\d+(?:\.\d*)|\.\d+)(?:[eE][+-]?\d+)?",'', line) ##print line tmp_ints = re.findall("\d+",line) remaining_ints = [] for cval in tmp_ints: remaining_ints.append(int(cval)) if len(remaining_ints) < 1: maxratio = 0 elif ((max(remaining_ints) < self.minstatnum) or (min(remaining_ints) > self.maxstatnum)): maxratio = 0 else: for j,cs in enumerate(self.stations): # get the similarity ratio crat = fuzz.ratio(line,cs) if crat > maxratio: maxratio = crat maxrat_count = j currmess.max_prox_ratio = maxratio currmess.closest_station_match = maxrat_count self.extract_gauge_info(currmess) else: ##this message has no readable gauge, so we log it as a bad message. ##print "Bad Message" + str(currmess.header) tools.log_bad_contribution(currmess, self)
def parsemsgs(self, site_params): # parse through all the messages for currmess in self.messages: # first the dates tmpdate = currmess.rawdate[:-5] currmess.date = datetime.strptime(tmpdate, self.dfmt) currmess.date = tz_adjust_STD_DST(currmess.date, self.tzdata) currmess.dateout = datetime.strftime(currmess.date, self.outfmt) currmess.datestamp = time.mktime(datetime.timetuple(currmess.date)) # now the message bodies cm = currmess.body # do a quick check that the message body is only a string - not a list # a list happens if there is a forwarded message if not isinstance(cm, str): cm = cm[0].get_payload() maxratio = 0 maxrat_count = -99999 # maxrat_line = -99999 line = cm.lower() line = string.rstrip( line, line[string.rfind(line, 'sent using sms-to-email'):]) line = re.sub('(\r)', ' ', line) line = re.sub('(\n)', ' ', line) line = re.sub('(--)', ' ', line) for citem in site_params.msg_ids: if citem.lower() in line: currmess.is_gage_msg = True if currmess.robot_status: self.process_a_robot_message(currmess) continue ##if we have a robot message, we process it and skip the rest. ## we don't want these contributions being logged in the same way. if currmess.is_gage_msg == True: matched = False # set a flag to see if a match has been found # now check for the obvious - that the exact station number is in the line for j, cs in enumerate(self.stations): # see if there's an exact match first if cs.lower() in line.lower(): maxratio = 100 maxrat_count = j matched = True # also strip out the station ID, including possibly a '.' on the end line = re.sub(cs.lower() + '\.', '', line) line = re.sub(cs.lower(), '', line) currmess.station_line = line break # if no exact match found, get fuzzy! if matched == False: # we will test the line, but we need to remove some terms using regex substitutions for cremitem in site_params.msg_rms: line = re.sub('(' + cremitem.lower() + ')', '', line) # now get rid of the floating point values that should be the stage # using regex code from: http://stackoverflow.com/questions/385558/ # python-and-regex-question-extract-float-double-value currmess.station_line = line line = re.sub( "[+-]? *(?:\d+(?:\.\d*)|\.\d+)(?:[eE][+-]?\d+)?", '', line) ##print line tmp_ints = re.findall("\d+", line) remaining_ints = [] for cval in tmp_ints: remaining_ints.append(int(cval)) if len(remaining_ints) < 1: maxratio = 0 elif ((max(remaining_ints) < self.minstatnum) or (min(remaining_ints) > self.maxstatnum)): maxratio = 0 else: for j, cs in enumerate(self.stations): # get the similarity ratio crat = fuzz.ratio(line, cs) if crat > maxratio: maxratio = crat maxrat_count = j currmess.max_prox_ratio = maxratio currmess.closest_station_match = maxrat_count self.extract_gauge_info(currmess) else: ##this message has no readable gauge, so we log it as a bad message. ##print "Bad Message" + str(currmess.header) tools.log_bad_contribution(currmess, self)