def append_thread_data(self, thread_data, thread_id, position_id): '''Appends new dictionary or json to existing json in ongoing thread in kema_thread table''' current_date = datetime.now() if type(thread_data) == dict: thread_data = json.dumps(thread_data) sql = ''' UPDATE kema_thread SET position_id = %s, thread_data = thread_data::jsonb || %s::jsonb, update_datetime = %s WHERE trigger_message_sid = %s AND user_phone = %s AND thread_id = %s; ''' params = ( position_id, thread_data, current_date, self.trigger_message_sid, self.user_phone, thread_id, ) update_table(sql, params)
def set_table_values(self, table_name, param_dict, trigger_message_sid=None, user_phone=None): '''param_dict: dictionary of col:value pairs to be updated''' current_date = datetime.now() # Construct SQL query updt_clause = '''UPDATE {} '''.format(table_name) set_clause = ' SET ' + ', '.join( ['{} = %s'.format(col) for col, val in param_dict.items()]) + ', update_datetime = %s ' where_clause = ''' WHERE trigger_message_sid = %s AND user_phone = %s;''' sql = updt_clause + set_clause + where_clause # Construct tuple of parameters params = tuple(param_dict.values()) + ( current_date, self.trigger_message_sid, self.user_phone, ) print(sql) print(params, type(params)) update_table(sql, params)
def parse_stop_route(self): stop_id = self.id type = self.get_node_text(self.root, "//div[@class='transportType']/h3").lower() # get route info in this stop route_nodes = self.root.xpath("//div[@class='lineInformationInner']/div/ul/li") for node in route_nodes: route_name = self.get_node_text(node, 'a') route_id = self.get_node(node, 'a').get('href').split('/')[-1] db.update_table('route', 'id', route_id, name=route_name, type=type) title=link=line_id=None for ttn in node.xpath('ul'): title = self.get_node_text(ttn, 'li') link = self.get_node(ttn, 'li/ul/li/a').get('href') groups = self.timetable_url_pattern.findall(link) if (len(groups) > 0): line_id = groups[0][0] tt_id = utils.get_timetable_id(route_id, stop_id, line_id) print " ", "parsed timetable '%s' %s" % (title, tt_id) db.update_table('timetable_index', 'id', tt_id, route_id=route_id, stop_id=stop_id, line_id=line_id, title=title, link=link)
def parse_map(self): map_node = self.get_node(self.root, "//div[@class='mapInner']/a") if map_node is None: return None # http://ptv.vic.gov.au/ map_link = map_node.get('href') db.update_table('map', 'link', map_link) map_id = db.query("SELECT id FROM map WHERE link=?", (map_link,)) return map_id
def lambda_handler_update_db(event, context): ''' Updates field in database ''' # Set logging logger = logging.getLogger() logger.setLevel(logging.DEBUG) logger.info(event) # Format data stream to dictionary data = urllib.parse.unquote(event['body']) data = json.loads(data) try: current_date = datetime.now() origin_trigger_message_sid = data['origin_trigger_message_sid'] new_barrier = data['barrier'] # Select past barrier sql = """ SELECT DISTINCT trigger_message_sid, barrier FROM kema_schedule WHERE trigger_message_id = %s; """ prev_barriers = select_from_table(sql, (origin_trigger_message_sid)) for (trigger_message_sid, prev_barrier) in list_barriers[0]: updt_barriers = ','.join([prev_barrier, new_barrier]) sql = ''' UPDATE kema_schedule SET barrier = %s, update_time = %s WHERE trigger_message_sid = %s; ''' params = (updt_barriers, current_date, origin_trigger_message_sid) update_table(sql, params) except: logger.error( "ERROR: Unexpected error: Could not update data from RDS instance." ) # logger.debug(data) sys.exit() return { 'statusCode': 200, 'body': json.dumps('Hello from Lambda!'), 'data': event }
def parse_timetable(self): tt_node = self.get_node(self.root, "//div[@class='timetablesInner']/ul") li_nodes = tt_node.xpath(".//li") for node in li_nodes: title = self.get_node_text(node, "./a") link = self.get_node(node, "./a").get('href') if link=="": continue line_id = self.timetable_lineid_pattern.findall(link)[0] tt_id = utils.get_timetable_id(route_id=self.id, line_id=line_id) print " ", "parsed timetable '%s' %s" % (title, tt_id) db.update_table('timetable_index', 'id', tt_id, route_id=self.id, line_id=line_id, link=link, title=title)
def lambda_handler_end_reminder(event, context): ''' Ends scheduled reminder ''' # Set logging logger = logging.getLogger() logger.setLevel(logging.DEBUG) logger.info(event) # Format data stream to dictionary data = urllib.parse.unquote(event['body']) data = json.loads(data) logger.info(data) # Connect to RDS instance try: conn = connect_to_rds() except: logger.error( "ERROR: Unexpected error: Could not connect to psql RDS instance.") sys.exit() logger.info("SUCCESS: Connection to psql RDS instance succeeded") try: current_date = datetime.now() trigger_message_sid = data['trigger_message_sid'] sql = ''' UPDATE kema_schedule SET schedule_end = %s, update_time = %s WHERE trigger_message_sid = %s ''' params = (current_date, current_date, trigger_message_sid) update_table(sql, params) except: logger.error( "ERROR: Unexpected error: Could not update data from RDS instance." ) # logger.debug(data) sys.exit() return { 'statusCode': 200, 'body': json.dumps('Hello from Lambda!'), 'data': event }
def parse_map(self): map_node = self.get_node(self.root, "//div[@class='routeMapInner']/a") if map_node is None: return None url = map_node.get('href') map_html = utils.get_html(utils.ptv_url + url) map_tree = etree.HTML(map_html) map_node = self.get_node(map_tree, "//div[@class='routeMapInner']/img") if map_node is None: return None # http://ptv.vic.gov.au/ map_link = map_node.get('src') db.update_table('map', 'link', map_link) map_id = db.query("SELECT id FROM map WHERE link=?", (map_link,)) return map_id
def saveUser(username, nickname): user_id = get_user_id_by_name(username) update = {'name': username, 'nick_name': nickname} if user_id is None: return db.insert_table('user', update) else: return db.update_table('user', update, {'name': username})
def update_schedule(self, trigger_message_sid=None, schedule_start=None, schedule_end=None): """Updates the schedule start or end date of a task Parameters: trigger_message_sid (str): Trigger message SID to search kema_schedule table for schedule_start (date): Start date to update the schedule to schedule_end (date, bool): End date to update the schedule to. If True, cancels the schedule """ current_date = datetime.now() if not trigger_message_sid: trigger_message_sid = self.trigger_message_sid params = tuple() sql = ''' UPDATE kema_schedule SET ''' if schedule_start: sql += '''schedule_start = %s, ''' params += (schedule_start, ) if schedule_end is True: # Cancel schedule sql += '''schedule_end = schedule_start - INTERVAL '1 DAY', ''' elif schedule_end: sql += '''schedule_end = %s, ''' params += (schedule_start, ) sql += '''update_datetime = %s WHERE trigger_message_sid = %s; ''' params += ( current_date, reminder.trigger_message_sid, ) update_table(sql, params)
def saveQuestion(qid, question, description): question_id = get_question_by_id(qid) args = { 'id': qid, 'title': question, 'description': description, 'fetch_time': int(time.time()), 'fetch': 0 } if question_id is None: return db.insert_table('question', args) else: return db.update_table('question', args, {'id': qid})
from stop_parser import StopParser import db import utils MAX_STOP_COUNT = 23611 ########################################################################### if __name__ == "__main__": db.init() progress = int(db.get_param('stop_progress', 0)) tobedone = xrange(progress+1, MAX_STOP_COUNT+1, 1) sp = StopParser() for id in tobedone: print "\n\nprocess stop id", id url = utils.get_stop_url(id) html = utils.get_html(url, data=id) try: sp.parse(id, html) db.cache_del(url) except Exception as e: print " ", id, "failed, error:", e db.update_table('stop', 'id', id, parsed='F') db.set_param('stop_progress', id) db.close()
def set_question_fetch(qid, fetch): sets = {'fetch': fetch} where = {'id': qid} return db.update_table('question', sets, where)
def parse_stop_info(self): result = {} table_node = self.get_node(self.root, "//table[@id='stopInfo']") td_nodes = table_node.xpath("//td") #get title title = self.get_node_text(self.root, "//h1[@class='fn org']") if title is None: raise Exception('not a valid stop, title is empty') try: result['name'] = title[0:title.rindex('-')].strip() except: result['name'] = title result['address_street'] = self.get_node_text(td_nodes[0], "//span[@class='street-address']") result['address_locality'] = self.get_node_text(td_nodes[0], "//span[@class='locality']") result['address_postalcode'] = self.get_node_text(td_nodes[0], "//span[@class='postal-code']") location_name = self.get_node_text(td_nodes[1], "a") location_id = self.get_node(td_nodes[1], "a").get('href').split('/')[-1] if (location_id != '' and location_name != ''): db.update_table('location', 'id', location_id, name=location_name) result['location_id'] = location_id else: raise Exception('not a valid stop, location is empty,') result['tickets'] = ','.join([n.text for n in td_nodes[3].xpath("ul/li")]) result['waiting_indoor'] = self.get_node_text(td_nodes[20].xpath("dl/dd")[0]) result['waiting_sheltered'] = self.get_node_text(td_nodes[20].xpath("dl/dd")[1]) result['bicycle_racks'] = self.get_node_text(td_nodes[22].xpath("dl/dd")[0]) result['bicycle_lockers'] = self.get_node_text(td_nodes[22].xpath("dl/dd")[1]) result['bicycle_cage'] = self.get_node_text(td_nodes[22].xpath("dl/dd")[2]) result['geo_latitude'] = self.get_node_text(self.root, "//span[@class='latitude']") result['geo_longitude'] = self.get_node_text(self.root, "//span[@class='longitude']") fields_index = { 'phone_lostproperty':2, 'phone_feedback':4, 'staff_available':5, 'phone_station':6, 'accessible':7, 'metcard_ticket_machine':8, 'myki_machine':9, 'myki_checks':10, 'vline_booking':11, 'seating':12, 'lighting':13, 'stairs':14, 'escalator':15, 'lifts':16, 'lockers':17, 'public_phone':18, 'public_toilet':19, 'car_parking':21, 'taxi_rank':23, 'tactile_paths':24, 'hearing_loop':25, } for k, v in fields_index.items(): try: result[k] = self.get_node_text(td_nodes[v]) except: print "Parse stop info error on %s, order %d" % (k,v) raise result['parsed'] = 'T' # print "parsed stop info:\n\t", '\n\t'.join(["%s = %s" % (k,v) for k,v in result.items()]) db.update_table_with_dict('stop', 'id', self.id, result) return result
import utils ########################################################################### if __name__ == "__main__": db.init() progress = int(db.get_param('location_update_progress', 0)) db.cur.execute("SELECT id FROM location WHERE id > ? ORDER BY id", (progress,)) rows = db.cur.fetchall() tobedone = map(lambda x: x[0], rows) lp = LocationParser() for id in tobedone: print "\n\nupdate location id", id url = utils.get_location_url(id) html = utils.get_html(url, data=id) try: lp.parse(id, html) db.cache_del(url) except Exception as e: print " ", id, "failed, error:", e db.update_table('location', 'id', id, parsed='F') raise db.set_param('location_update_progress', id) db.close()
import utils ########################################################################### if __name__ == "__main__": db.init() progress = int(db.get_param('route_update_progress', 0)) db.cur.execute("SELECT id FROM route WHERE id > ? ORDER BY id", (progress,)) rows = db.cur.fetchall() tobedone = map(lambda x: x[0], rows) rp = RouteParser() for id in tobedone: print "\n\nupdate route id", id url = utils.get_route_url(id) html = utils.get_html(url, data=id) try: rp.parse(id, html) db.cache_del(url) except Exception as e: print " ", id, "failed, error:", e db.update_table('route', 'id', id, parsed='F') raise db.set_param('route_update_progress', id) db.close()
def update_user_by_name(username, args): return db.update_table('user', args, {'name': username})