def findMissingData(): GB_ID = 5 games = db_utils.customQuery( "SELECT game.title,game.id,game.alt_title,(SELECT JSON_ARRAYAGG(platform.short) FROM platform LEFT JOIN gameplatform on platform.id=gameplatform.platformID WHERE gameID=game.id) as 'platforms',gameplatform.* FROM `gameplatform` LEFT join game on gameID=game.id WHERE cover_platform_link is null and platformID=5" ) listpro = scrape_utils.findSuitableProxy() while (len(listpro) == 0): print("trying to extract proxylist") time.sleep(5) listpro = scrape_utils.findSuitableProxy() counter = 10 for count in range(0, len(games)): counter = counter - 1 if (counter == 0): listpro = scrape_utils.findSuitableProxy() while (len(listpro) == 0): print("trying to extract proxylist") time.sleep(5) listpro = scrape_utils.findSuitableProxy() counter = 10 game = games[count] title = re.sub('\(.*\)', '', game['title']) print('doing ' + str(game['id']) + ":" + title) data = scrape_utils.getGamefaqsDescAndImage(title, 'GAMEBOY', listpro) if (data != -1 and data is not None): db_utils.saveCoverPlatformLink(game['id'], GB_ID, data['img']) db_utils.saveDescription(game['id'], data['desc']) db_utils.saveWikiCoverLink(game['id'], data['img']) elif (game['alt_title'] is not None): data = scrape_utils.getGamefaqsDescAndImage( game['alt_title'], 'GAMEBOY', listpro) if (data != -1 and data is not None): db_utils.saveCoverPlatformLink(game['id'], GB_ID, data['img']) db_utils.saveDescription(game['id'], data['desc']) db_utils.saveWikiCoverLink(game['id'], data['img'])
def findMissingData(): games = db_utils.customQuery( "SELECT * FROM `game` WHERE description is null and game.id>4750;") listpro = scrape_utils.findSuitableProxy() while (len(listpro) == 0): print("trying to extract proxylist") listpro = scrape_utils.findSuitableProxy() for count in range(0, len(games)): game = games[count] if (r' (SMS)' in game['title']): cleanTitle = re.sub(' \(SMS\)', '', game['title']) data = scrape_utils.getGamefaqsDescAndImage( cleanTitle, 'SMS', listpro) elif (r'Starring' in game['title']): cleanTitleParts = re.split(' Starring', game['title']) cleanTitle = cleanTitleParts[1] + ": " + cleanTitleParts[0] data = scrape_utils.getGamefaqsDescAndImage( cleanTitle, 'SMS', listpro) elif ('Sonic' in game['title']): cleanTitle = re.sub('Sonic', 'Sonic The Hedgehog', game['title']) data = scrape_utils.getGamefaqsDescAndImage( cleanTitle, 'SMS', listpro) elif ('Solomon' in game['title']): cleanTitle = 'Solomon\'s Key' data = scrape_utils.getGamefaqsDescAndImage( cleanTitle, 'SMS', listpro) else: data = scrape_utils.getGamefaqsDescAndImage( game['title'], 'SMS', listpro) if (data != -1 and data is not None): db_utils.saveDescription(game['id'], data['desc']) db_utils.saveWikiCoverLink(game['id'], data['img']) db_utils.saveCoverPlatformLink(game['id'], 3, data['img'])
def gamefaqsScraping(cleanTitle, gameID, extractImage, newGame): data = scrape_utils.getGamefaqsDescAndImage(cleanTitle, 'GAMEBOY', listpro) if (data != -1 and data is not None): if (newGame): db_utils.saveDescription(gameID, data['desc']) if (extractImage): if (newGame): db_utils.saveWikiCoverLink(gameID, data['img']) db_utils.saveCoverPlatformLink(gameID, GB_ID, data['img'])
def findMissingData(): games = db_utils.customQuery( "SELECT * FROM `game` WHERE cover_wikipedia_link is null or description is null and game.id>4033;" ) listpro = scrape_utils.findSuitableProxy() for count in range(0, len(games)): game = games[count] data = scrape_utils.getGamefaqsDescAndImage(game['title'], 'NES', listpro) if (data != -1 and data is not None): db_utils.saveDescription(game['id'], data['desc']) db_utils.saveWikiCoverLink(game['id'], data['img'])
def gamefaqsScraping(cleanTitle, gameID, extractImage): data = scrape_utils.getGamefaqsDescAndImage(cleanTitle, 'NES') if (data != -1 and data is not None): db_utils.saveDescription(gameID, data['desc']) if (extractImage): db_utils.saveWikiCoverLink(gameID, data['img'])