def update_all_db_users_data(): #Load list of all groups & users from DB groups = get_groups_with_users() all_users = MySqlConnector.get_all_users() #Generate users list from all groups all_group_users = set() for group, group_data in groups.items(): all_group_users.update(group_data.group_users) group_users = MySqlConnector.get_users_by_names(all_group_users) #Go over list, check if any changed changed_users = [] deleted_users = [ user for user in all_users if user.user_name not in all_group_users ] for user in group_users.values(): new_user = GroupUser(user.user_name) user_data_fetched = SteamGiftsScrapingUtils.update_user_additional_data( new_user) if new_user.steam_id: user_data_fetched &= SteamScrapingUtils.update_user_additional_data( new_user) if not user_data_fetched: deleted_users.append(user) elif not user.equals(new_user): changed_users.append(new_user) # Save changed users to the DB if changed_users: MySqlConnector.update_existing_users(changed_users) # Delete from DB users no longer on SteamGifts if deleted_users: MySqlConnector.delete_users(deleted_users)
def get_group_users(group_webpage): LogUtils.log_info('Processing users for group ' + group_webpage) group_users = dict() page_index = 1 while True: LogUtils.log_info('Processing users page #' + str(page_index)) users_page_url = SteamGiftsConsts.get_steamgifts_users_page( group_webpage) + SteamGiftsConsts.STEAMGIFTS_SEARCH_PAGE + str( page_index) html_content = WebUtils.get_html_page(users_page_url) if html_content is None: LogUtils.log_error('Cannot process users page: ' + users_page_url) break current_page_num = WebUtils.get_item_by_xpath( html_content, u'.//a[@class="is-selected"]/span/text()') if current_page_num and current_page_num != str(page_index): break user_elements = WebUtils.get_items_by_xpath( html_content, u'.//div[@class="table__row-outer-wrap"]') for user_elem in user_elements: user = WebUtils.get_item_by_xpath( user_elem, u'.//a[@class="table__column__heading"]/text()') group_users[user] = GroupUser(user) if not current_page_num: break page_index += 1 LogUtils.log_info('Finished processing users for group ' + group_webpage) return group_users
def get_users_by_names(user_names): start_time = time.time() users_data = dict() connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() cursor.execute('SELECT * FROM Users WHERE UserName IN (' + parse_list(user_names) + ')') data = cursor.fetchall() for row in data: # (group_user.user_name, group_user.steam_id, group_user.steam_user_name, group_user.creation_time) user_name = row[0] user_data = GroupUser(user_name, steam_id=row[1], steam_user_name=row[2], creation_time=row[3]) users_data[user_name] = user_data cursor.close() connection.close() LogUtils.log_info('Get users by name took ' + str(time.time() - start_time) + ' seconds') return users_data
def get_user_data(user_name): group_user = None connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() cursor.execute('SELECT * FROM Users WHERE UserName = "******"') data = cursor.fetchone() if data: # (group_user.user_name, group_user.steam_id, group_user.steam_user_name) group_user = GroupUser(user_name, steam_id=data[1], steam_user_name=data[2]) cursor.close() connection.close() return group_user
def get_all_users(): start_time = time.time() all_users = [] connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() cursor.execute('SELECT * FROM Users') data = cursor.fetchall() for row in data: # (group_user.user_name, group_user.steam_id, group_user.steam_user_name) group_user = GroupUser(row[0], steam_id=row[1], steam_user_name=row[2]) all_users.append(group_user) cursor.close() connection.close() LogUtils.log_info('Get all users took ' + str(time.time() - start_time) + ' seconds') return all_users
def load_user(group_user, user_name): if not group_user: group_user = GroupUser(user_name) SteamGiftsScrapingUtils.update_user_additional_data(group_user) SteamScrapingUtils.update_user_additional_data(group_user) return group_user
def load_group(group_website, load_users_data=True, load_giveaway_data=True, fetch_not_started_giveaways=False, limit_by_time=False, starts_after_str=None, ends_before_str=None, ends_after_str=None): start_time = time.time() connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() # Load Group group_id = StringUtils.get_hashed_id(group_website) cursor.execute( 'SELECT Users,Giveaways,Cookies FROM Groups WHERE GroupID="' + group_id + '"') data = cursor.fetchone() group_users_data = json.loads(data[0]) group_giveaways_data = json.loads(data[1]) cookies = data[2] # Load Users Data group_users = dict() if load_users_data and group_users_data: for row in group_users_data: # (group_user.user_name) if isinstance(row, basestring): user_name = row else: #Handling old data user_name = row[0] group_users[user_name] = GroupUser(user_name) cursor.execute('SELECT * FROM Users WHERE UserName in (' + parse_list(group_users.keys()) + ')') data = cursor.fetchall() for row in data: # (group_user.user_name, group_user.steam_id, group_user.steam_user_name, group_user.creation_time) user_name = row[0] user_data = GroupUser(user_name, steam_id=row[1], steam_user_name=row[2], creation_time=row[3]) group_users[user_name] = user_data # Load Giveaways Data group_giveaways = dict() giveaways_by_id = dict() if load_giveaway_data and group_giveaways_data: for row in group_giveaways_data: start_time_epoch = row[1] end_time_epoch = row[2] if not fetch_not_started_giveaways and not end_time_epoch: continue if limit_by_time and \ ((starts_after_str and datetime.datetime.utcfromtimestamp(start_time_epoch) < datetime.datetime.strptime(starts_after_str, '%Y-%m-%d')) or (ends_before_str and datetime.datetime.utcfromtimestamp(end_time_epoch) > datetime.datetime.strptime(ends_before_str, "%Y-%m-%d")) or (ends_after_str and datetime.datetime.utcfromtimestamp(end_time_epoch) < datetime.datetime.strptime(ends_after_str, "%Y-%m-%d"))): continue # (giveaway_id, calendar.timegm(group_giveaway.start_time), calendar.timegm(group_giveaway.end_time)) giveaway_id = row[0] giveaways_by_id[giveaway_id] = GroupGiveaway( None, start_time=from_epoch(start_time_epoch), end_time=from_epoch(end_time_epoch)) cursor.execute('SELECT * FROM Giveaways WHERE GiveawayID in (' + parse_list(giveaways_by_id.keys()) + ')') data = cursor.fetchall() for row in data: # (giveaway_id, group_giveaway.link, group_giveaway.creator, group_giveaway.game_name, json.dumps(entries_data), json.dumps(group_giveaway.groups)) giveaway_link = str(row[1]) giveaway_id = StringUtils.get_hashed_id(giveaway_link) group_giveaways[giveaway_link] = giveaways_by_id[giveaway_id] group_giveaways[giveaway_link].link = giveaway_link group_giveaways[giveaway_link].creator = str(row[2]) group_giveaways[giveaway_link].game_name = row[3].encode('utf-8') group_giveaways[giveaway_link].entries = dict() for ent_row in json.loads(row[4]): # (entry.user_name, entry.entry_time, entry.winner) user_name = str(ent_row[0]) group_giveaways[giveaway_link].entries[ user_name] = GiveawayEntry(user_name, entry_time=from_epoch( ent_row[1]), winner=ent_row[2]) group_giveaways[giveaway_link].groups = json.loads(str(row[5])) cursor.close() connection.close() LogUtils.log_info('Load Group ' + group_website + ' took ' + str(time.time() - start_time) + ' seconds') return Group(group_users, group_giveaways, cookies=cookies)