def get_user_contribution_data(user_name): html_content = WebUtils.get_html_page(SteamGiftsConsts.get_user_link(user_name)) if html_content is None: LogUtils.log_error('Cannot update additional data for user: '******'.//div[@class="sidebar__shortcut-inner-wrap"]/a/@href') if not steam_user: LogUtils.log_error('Cannot update non-existent user: '******'.//div[@class="featured__table__row"]') for row_content in all_rows: row_title = WebUtils.get_item_by_xpath(row_content, u'.//div[@class="featured__table__row__left"]/text()') if row_title == u'Gifts Won': global_won = StringUtils.normalize_int(WebUtils.get_item_by_xpath(row_content, u'.//div[@class="featured__table__row__right"]/span/span/a/text()')) elif row_title == u'Gifts Sent': global_sent = StringUtils.normalize_int(WebUtils.get_item_by_xpath(row_content, u'.//div[@class=" featured__table__row__right"]/span/span/a/text()')) elif row_title == u'Contributor Level': user_level_item = WebUtils.get_item_by_xpath(row_content, u'.//div[@class="featured__table__row__right"]/span/@data-ui-tooltip') level = StringUtils.normalize_float(user_level_item.split('name" : "')[2].split('", "color')[0]) if global_won or global_sent or level: return global_won, global_sent, level return None
def get_users_by_names(user_names): start_time = time.time() users_data = dict() connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() cursor.execute('SELECT * FROM Users WHERE UserName IN (' + parse_list(user_names) + ')') data = cursor.fetchall() for row in data: # (group_user.user_name, group_user.steam_id, group_user.steam_user_name, group_user.creation_time) user_name = row[0] user_data = GroupUser(user_name, steam_id=row[1], steam_user_name=row[2], creation_time=row[3]) users_data[user_name] = user_data cursor.close() connection.close() LogUtils.log_info('Get users by name took ' + str(time.time() - start_time) + ' seconds') return users_data
def get_existing_games_data(games_list): connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() existing_games_data = dict() cursor.execute("SELECT * FROM Games WHERE Name IN (" + parse_list(games_list) + ")") data = cursor.fetchall() for row in data: # (game.game_name, game.game_link, game.value, game.steam_score, game.num_of_reviews) game_name = row[0] game_data = GameData(game_name, row[1], row[2], steam_score=row[3], num_of_reviews=row[4]) existing_games_data[game_name] = game_data cursor.close() connection.close() LogUtils.log_info('Out of total ' + str(len(games_list)) + ' games in group, already exist in DB: ' + str(len(existing_games_data))) return existing_games_data
def update_existing_users(users): start_time = time.time() connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() users_data = [] for user_data in users: users_data.append( (user_data.steam_id, user_data.steam_user_name, to_mysql_date(user_data.creation_time), user_data.user_name)) cursor.executemany( "UPDATE Users SET SteamId=%s,SteamUserName=%s,CreationTime=%s WHERE UserName=%s", users_data) connection.commit( ) # you need to call commit() method to save your changes to the database cursor.close() connection.close() LogUtils.log_info('Update existing users for ' + str(len(users)) + ' users took ' + str(time.time() - start_time) + ' seconds')
def update_existing_games(games): start_time = time.time() connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() games_data = [] for game in games: games_data.append((game.game_link, game.value, game.steam_score, game.num_of_reviews, game.game_name)) cursor.executemany( "UPDATE Games SET LinkURL=%s,Value=%s,Score=%s,NumOfReviews=%s WHERE Name=%s", games_data) connection.commit( ) # you need to call commit() method to save your changes to the database cursor.close() connection.close() LogUtils.log_info('Update existing games for ' + str(len(games)) + ' games took ' + str(time.time() - start_time) + ' seconds')
def get_all_games(): start_time = time.time() all_games = [] connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() cursor.execute('SELECT * FROM Games') data = cursor.fetchall() for row in data: # (game.game_name, game.game_link, game.value, game.steam_score, game.num_of_reviews) game_data = GameData(row[0], row[1], row[2], steam_score=row[3], num_of_reviews=row[4]) all_games.append(game_data) cursor.close() connection.close() LogUtils.log_info('Get list of all games took ' + str(time.time() - start_time) + ' seconds') return all_games
def check_game_data(game_data, game_name): if not game_data: LogUtils.log_error(u'Could not load game data: ' + game_name.decode('utf-8')) elif game_data.value == -1 or game_data.num_of_reviews == -1 or game_data.steam_score == -1: LogUtils.log_error(u'Could not load full game data: ' + game_name.decode('utf-8'))
def get_all_groups_with_users(): start_time = time.time() connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() groups = dict() cursor.execute("SELECT Name,Webpage,Users FROM Groups WHERE Users<>'[]'") data = cursor.fetchall() for row in data: users = set() for user_data in json.loads(row[2]): # (group_user.user_name) if isinstance(user_data, basestring): users.add(user_data) else: #Handling old data users.add(user_data[0]) groups[row[0]] = Group(group_name=row[0], group_webpage=row[1], group_users=users) cursor.close() connection.close() LogUtils.log_info('Get all groups took ' + str(time.time() - start_time) + ' seconds') return groups
def save_games(games): start_time = time.time() connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() games_data = [] for game in games: games_data.append((game.game_name, game.game_link, game.value, game.steam_score, game.num_of_reviews)) cursor.executemany( "INSERT IGNORE INTO Games (Name,LinkURL,Value,Score,NumOfReviews) VALUES (%s, %s, %s, %s, %s)", games_data) connection.commit( ) # you need to call commit() method to save your changes to the database cursor.close() connection.close() LogUtils.log_info('Save games for ' + str(len(games)) + ' games took ' + str(time.time() - start_time) + ' seconds')
def is_giveaway_deleted(giveaway_link, cookies): LogUtils.log_info('Checking if giveaway was deleted: ' + giveaway_link) giveaway_content = WebUtils.get_html_page(giveaway_link, cookies=cookies, delay=delay_time) if giveaway_content is not None: error_messages = WebUtils.get_items_by_xpath(giveaway_content,u'.//div[@class="table__column--width-fill"]/text()') if error_messages and len(error_messages) >= 4 and error_messages[0].startswith('Deleted'): return True return False
def get_games_from_package(package_name, package_link): LogUtils.log_info('Processing package ' + package_name) html_content = WebUtils.get_html_page( package_link, "birthtime=-7199; lastagecheckage=1-January-1970; mature_content=1;") games = WebUtils.get_items_by_xpath( html_content, u'.//a[@class="tab_item_overlay"]/@href') return games
def get_free_games_list(): LogUtils.log_info('Getting list of games previously given away for free') free_games_list = set() json_content = WebUtils.get_https_page_content( BarterVGConsts.FREE_GIVEAWAYS_JSON_LINK, unverified=True) all_free_games_dict = json.loads(json_content) for free_game_data in all_free_games_dict.values(): if free_game_data['platform_id'] == 1: free_games_list.add(free_game_data['title']) return free_games_list
def remove_games(games): start_time = time.time() connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() cursor.execute('DELETE FROM Games WHERE Name IN (' + parse_list(list(map(lambda x: x.game_name, games))) + ')') connection.commit() # you need to call commit() method to save your changes to the database cursor.close() connection.close() LogUtils.log_info('Delete of ' + str(len(games)) + ' invalid games took ' + str(time.time() - start_time) + ' seconds')
def remove_deleted_giveaways(cookies, group, updated_group_giveaways, ignored_group_giveaways): # If any existing GA is missing from newly parsed data - remove it from group giveaways. giveaways_sorted_by_end_time = sorted(filter(lambda x: x.end_time, updated_group_giveaways.values()), key=lambda x: x.end_time) if not giveaways_sorted_by_end_time: return earliest_giveaway_end_time = giveaways_sorted_by_end_time[0].end_time for giveaway in sorted(filter(lambda x: x.end_time, group.group_giveaways.values()), key=lambda x: x.end_time, reverse=True): if giveaway.end_time < earliest_giveaway_end_time: break if giveaway.link not in updated_group_giveaways and giveaway.link not in ignored_group_giveaways and not giveaway.has_winners() and SteamGiftsScrapingUtils.is_giveaway_deleted(giveaway.link, cookies): LogUtils.log_info('Removing deleted giveaway: ' + giveaway.link) group.group_giveaways.pop(giveaway.link, None)
def update_all_users(): start_time = time.time() SGMTBusinessLogic.update_all_db_users_data() LogUtils.log_info('UpdateAllUsers took ' + str(time.time() - start_time) + ' seconds') return json.dumps({ 'success': True, 'timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") }), 200, { 'ContentType': 'application/json' }
def get_game_additional_data(game_name, game_link): LogUtils.log_info('Processing game ' + game_name) steam_score = 0 num_of_reviews = 0 html_content = WebUtils.get_html_page(game_link, "birthtime=-7199; lastagecheckage=1-January-1970; mature_content=1;") base_game_link = WebUtils.get_item_by_xpath(html_content, u'.//div[@class="glance_details"]/a/@href') if base_game_link is not None: # If this is DLC - get additional data according to base game html_content = WebUtils.get_html_page(base_game_link, "birthtime=-7199; lastagecheckage=1-January-1970; mature_content=1;") steam_game_tooltip = WebUtils.get_items_by_xpath(html_content, u'.//div[@class="user_reviews_summary_row"]/@data-tooltip-html')[-1] if steam_game_tooltip != 'Need more user reviews to generate a score' and steam_game_tooltip != 'No user reviews': steam_score = StringUtils.normalize_int(steam_game_tooltip.split('%')[0]) num_of_reviews = StringUtils.normalize_int(steam_game_tooltip.split('of the')[1].split('user reviews')[0]) return steam_score, num_of_reviews
def get_group_users(group_webpage): LogUtils.log_info('Processing users for group ' + group_webpage) group_users = dict() page_index = 1 while True: LogUtils.log_info('Processing users page #' + str(page_index)) users_page_url = SteamGiftsConsts.get_steamgifts_users_page( group_webpage) + SteamGiftsConsts.STEAMGIFTS_SEARCH_PAGE + str( page_index) html_content = WebUtils.get_html_page(users_page_url) if html_content is None: LogUtils.log_error('Cannot process users page: ' + users_page_url) break current_page_num = WebUtils.get_item_by_xpath( html_content, u'.//a[@class="is-selected"]/span/text()') if current_page_num and current_page_num != str(page_index): break user_elements = WebUtils.get_items_by_xpath( html_content, u'.//div[@class="table__row-outer-wrap"]') for user_elem in user_elements: user = WebUtils.get_item_by_xpath( user_elem, u'.//a[@class="table__column__heading"]/text()') group_users[user] = GroupUser(user) if not current_page_num: break page_index += 1 LogUtils.log_info('Finished processing users for group ' + group_webpage) return group_users
def check_existing_users(users_list): connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() existing_users = [] cursor.execute('SELECT UserName FROM Users WHERE UserName IN (' + parse_list(users_list) + ')') data = cursor.fetchall() for row in data: existing_users.append(row[0]) cursor.close() connection.close() LogUtils.log_info('Out of total ' + str(len(users_list)) + ' users in group, already exist in DB: ' + str(len(existing_users))) return existing_users
def add_new_group(): start_time = time.time() group_webpage = request.args.get('group_webpage') cookies = request.args.get('cookies') start_date = request.args.get('start_date') SGMTBusinessLogic.add_new_group(group_webpage, cookies, start_date) LogUtils.log_info('AddNewGroup ' + group_webpage + ' took ' + str(time.time() - start_time) + ' seconds') return json.dumps({ 'success': True, 'timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") }), 200, { 'ContentType': 'application/json' }
def get_all_empty_groups(): start_time = time.time() connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() groups = dict() cursor.execute("SELECT Name,Webpage FROM Groups WHERE Users='[]' AND Giveaways='[]'") data = cursor.fetchall() for row in data: groups[row[0]] = row[1] cursor.close() connection.close() LogUtils.log_info('Get all empty groups took ' + str(time.time() - start_time) + ' seconds') return groups
def get_all_users(): start_time = time.time() all_users = [] connection = pymysql.connect(host=host, port=port, user=user, passwd=password, db=db_schema, charset='utf8') cursor = connection.cursor() cursor.execute('SELECT * FROM Users') data = cursor.fetchall() for row in data: # (group_user.user_name, group_user.steam_id, group_user.steam_user_name) group_user = GroupUser(row[0], steam_id=row[1], steam_user_name=row[2]) all_users.append(group_user) cursor.close() connection.close() LogUtils.log_info('Get all users took ' + str(time.time() - start_time) + ' seconds') return all_users
def get_game_additional_data(game_name, game_link): LogUtils.log_info('Processing game in SteamDB: ' + game_name) steam_score = 0 num_of_reviews = 0 steam_app_id = game_link.split( SteamConsts.STEAM_GAME_LINK)[1].split('/')[0] html_content = WebUtils.get_html_page(SteamDBConsts.STEAM_DB_APP_LINK + steam_app_id, https=True) steam_score_positive = WebUtils.get_item_by_xpath( html_content, u'.//span[@class="header-thing-good"]/text()') steam_score_negative = WebUtils.get_item_by_xpath( html_content, u'.//span[@class="header-thing-poor"]/text()') if steam_score_positive and steam_score_negative: positive_score = int(steam_score_positive) negative_score = int(steam_score_negative) num_of_reviews = positive_score + negative_score steam_score = int(float(positive_score) / num_of_reviews * 100) return steam_score, num_of_reviews
def update_all_db_groups(): #Load list of all groups from DB groups, empty_groups = get_groups() #For each existing group, run: update_group_data from last 2 months start_date = (datetime.datetime.now() - relativedelta(months=1)).replace(day=1).strftime('%Y-%m-%d') for group_name, group_url in groups.items(): if group_name not in empty_groups.keys(): try: update_existing_group(group_url, start_date=start_date, update_games=True) except Exception as e: LogUtils.log_error('Cannot update data for group: ' + group_url + ' ERROR: ' + str(e)) traceback.print_exc() #For each new group, run: update_group_data from all time for group_url in empty_groups.values(): try: update_existing_group(group_url) except Exception as e: LogUtils.log_error('Cannot update data for group: ' + group_url + ' ERROR: ' + str(e)) traceback.print_exc()
def get_html_page(page_url, cookies=None, retries=3, https=False): while retries > 0: try: if https: return html.fromstring(get_https_page_content(page_url)) else: return html.fromstring(get_page_content(page_url, cookies)) except Exception as e: if retries > 0: LogUtils.log_error('Error downloading page ' + page_url + '. ' + str(retries) + ' retries left. retyring...') else: LogUtils.log_error( 'Error downloading page ' + page_url + '. now more retries left. stopping... Reason: ' + str(e)) traceback.print_exc() time.sleep(0.1) retries -= 1 return None
def update_group_data(group_webpage, cookies, group, force_full_run=False, start_date=None, end_date=None): group_users = SteamGiftsScrapingUtils.get_group_users(group_webpage) if not group_users: LogUtils.log_error("group_users is empty") return dict() existing_users = MySqlConnector.check_existing_users(group_users.keys()) for group_user in group_users.values(): if group_user.user_name not in existing_users: try: SteamGiftsScrapingUtils.update_user_additional_data(group_user) SteamScrapingUtils.update_user_additional_data(group_user) except Exception as e: LogUtils.log_error('Cannot add additional data for user: '******' ERROR: ' + str(e)) traceback.print_exc() group_giveaways, ignored_giveaways, games, reached_threshold = SteamGiftsScrapingUtils.get_group_giveaways(group_webpage, cookies, group.group_giveaways, force_full_run=force_full_run, start_date=start_date, end_date=end_date) if not reached_threshold: remove_deleted_giveaways(cookies, group, group_giveaways, ignored_giveaways) MySqlConnector.save_group(group_webpage, Group(group_users, group_giveaways, group_webpage=group_webpage, cookies=cookies, group_name=group.group_name), existing_users, group) return games
def update_group_data(): start_time = time.time() group_webpage = request.args.get('group_webpage') start_date = request.args.get('start_date') end_date = request.args.get('end_date') force_full_run = request.args.get('force_full_run') SGMTBusinessLogic.update_existing_group(group_webpage, start_date, end_date, force_full_run, update_games=True) LogUtils.log_info('UpdateGroupData ' + group_webpage + ' took ' + str(time.time() - start_time) + ' seconds') return json.dumps({ 'success': True, 'timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") }), 200, { 'ContentType': 'application/json' }
def get_game_giveaways(group_webpage, game_name, start_time): group = MySqlConnector.load_group(group_webpage, limit_by_time=start_time, starts_after_str=start_time) if not group: return None group_users = group.group_users.keys() all_game_giveaways = dict() for group_giveaway in group.group_giveaways.values(): group_game_name = group_giveaway.game_name.decode('utf-8') if not group_game_name: LogUtils.log_info("Invalid game name: " + group_giveaway.link) if group_game_name and (group_game_name.lower() in game_name.lower() or game_name.lower() in group_game_name.lower()): giveaway_entries = group_giveaway.entries.values() all_game_giveaways[group_giveaway] = len([ entry for entry in giveaway_entries if entry.user_name in group_users ]) return all_game_giveaways
def update_user_additional_data(user): LogUtils.log_info('Processing SteamGifts user ' + user.user_name) html_content = WebUtils.get_html_page(SteamGiftsConsts.get_user_link( user.user_name), delay=delay_time) if html_content is None: LogUtils.log_error('Cannot update additional data for user: '******'.//div[@class="sidebar__shortcut-inner-wrap"]/a/@href') if not steam_user: LogUtils.log_error('Cannot update non-existent user: '******'.//div[@class="featured__table__row"]') for row_content in user_menu_rows: user_menu_type = WebUtils.get_item_by_xpath( row_content, u'.//div[@class="featured__table__row__left"]/text()') if user_menu_type == 'Registered': data_timestamp = float( WebUtils.get_item_by_xpath( row_content, u'.//div[@class="featured__table__row__right"]/span/@data-timestamp' )) user.creation_time = datetime.fromtimestamp(data_timestamp) break return True
def group_users_check_rules(): group_webpage = request.args.get('group_webpage') check_nonactivated = request.args.get('check_nonactivated') check_multiple_wins = request.args.get('check_multiple_wins') check_real_cv_value = request.args.get('check_real_cv_value') check_steamgifts_ratio = request.args.get('check_steamgifts_ratio') check_level = request.args.get('check_level') level = get_optional_int_param('level') check_steamrep = request.args.get('check_steamrep') if not group_webpage: return 'GroupUsersCheckRules - Check if a user complies to group rules.<BR><BR>' \ '<B>Params:</B><BR> ' \ 'group_webpage - SteamGifts group webpage<BR>' \ '<B>Optional Params:</B> <BR>' \ 'check_nonactivated=True/False - Check user doesn\'t have non activated games<BR>' \ 'check_multiple_wins=True/False - Check user doesn\'t have multiple wins<BR>' \ 'check_real_cv_value=True/False - Check user has positive real CV ratio<BR>' \ 'check_steamgifts_ratio=True/False - Check user has positive SteamGifts global ratio<BR>' \ 'check_steamrep=True/Faalse - Check user has no SteamRep bans and his profile is public<BR>' \ 'check_level=True/False - Check user is above certain level<BR>' \ 'level=# - Check user is above certain level<BR>' \ '<BR>'\ '<A HREF="/SGMT/GroupUsersCheckRules?group_webpage=https://www.steamgifts.com/group/6HSPr/qgg-group&check_nonactivated=True&check_multiple_wins=True&check_real_cv_value=True&check_steamgifts_ratio=True&check_steamrep=True&check_level=True&level=1">Request Example</A>' response = u'' group_users_rules = SGMTBusinessLogic.group_users_check_rules( group_webpage, check_nonactivated, check_multiple_wins, check_real_cv_value, check_steamgifts_ratio, check_level, level, check_steamrep) for user, rules in group_users_rules.items(): user_response = HtmlResponseGenerationService.generate_user_check_rules_response( user, rules[0], rules[1], rules[2], rules[3], rules[4], rules[5]) LogUtils.log_info(user_response) response += user_response return response
def update_user_additional_data(user): LogUtils.log_info('Processing SteamGifts user ' + user.user_name) html_content = WebUtils.get_html_page( SteamGiftsConsts.get_user_link(user.user_name)) if html_content is None: LogUtils.log_error('Cannot update additional data for user: '******'.//div[@class="sidebar__shortcut-inner-wrap"]/a/@href') if not steam_user: LogUtils.log_error('Cannot update non-existent user: ' + user.user_name) return False user.steam_id = steam_user.split(SteamConsts.STEAM_PROFILE_LINK)[1] return True