def clrmamepro_to_romdata (clrmamepro_doc): romdata_doc = RomData() entries = clrmamepro.parse (clrmamepro_doc) if "game" in entries: # For each game entry in the TOSEC document for game in entries["game"]: rom = clrmamepro.game_to_rom (game, entries) game_id = RomData.name_to_id (rom["title"]) romdata_doc.add_rom (game_id, rom["crc"], rom["md5"], rom["sha1"], rom["size"]) return romdata_doc
def add_game_data_from_url (game_data, url): """Get a GameData object for the corresponding Guardiana URL.""" # # Download and read the page from Guardiana # response = None i = 0 while response == None and i < 5: try: response = urllib.request.urlopen (url) except URLError: pass if response == None: raise URLError doc = response.read () soup = BeautifulSoup(doc) # # Get the common game data # general_info_table = soup.find("table", {"class": "MDGD_GamesInfos"}) common = { "title": None, "developers": [], "genres": [], "players": [0, 0], "tags": [] } result = re.findall ("<\s*div.*?databaseInfosDesc.*?>(.+?)<\s*/\s*div\s*>\s*<\s*div.*?databaseInfosContent.*?>(.+?)<\s*/\s*div\s*>", str (general_info_table)) for info in result: key = info[0].lower() value = html_get_text (info[1]).strip () if key == "common title": common["title"] = value elif key == "theme": key = "tags" for item in re.split ("\s*,\s*", value.lower ()): if not item in common[key]: common[key].append (item) elif key == "developer": key = "developers" for item in re.split ("\s*/\s*", value): if not is_string_null (item) and not item in common[key]: common[key].append (item) elif key == "genre": key = "genres" for item in re.split ("\s*/\s*", value): if not item in common[key]: common[key].append (item) # Set the game's players number game_players = general_info_table.find ("span", {"class": "GamePlayers1"}).get_text () if game_players: game_players = game_players.split ('-', 1) if len (game_players) > 1: common["players"][0] = int (game_players[0]) common["players"][1] = int (game_players[1]) else: common["players"][0] = int (game_players[0]) # The script can't do anything without the game's title if not common["title"]: return False # Get the game's ID from its title game_id = RomData.name_to_id (common["title"]) # # Set the common game data # game_data.set_title (game_id, common["title"]) for developer in common["developers"]: game_data.add_developer (game_id, developer) for genre in common["genres"]: game_data.add_genre (game_id, genre) game_data.set_players (game_id, common["players"][0], common["players"][1]) for tag in common["tags"]: game_data.add_tag (game_id, tag) game_data.add_screenshot (game_id, get_title_screenshot (soup)) # # Get the versions' data # version_list = soup.find_all("div", {"class": "versionFiche"}) for v in version_list: # Country v_country = flags[0] result = re.search("/img/flags/(\d+).gif", str (v)) if result: flag_nbr = int (result.group(1)) if flag_nbr < len (flags): v_country = flags[flag_nbr] if game_data.contains_version (game_id, v_country): continue # # Set the version's title # # Get the version's title v_title = html_get_text (v.find ("span", {"class": "MDGDVersionTitle"}).get_text ()) result = re.match ("(.*?)\s*\((.*?)\)", v_title) if result: v_title = result.group (1) # Get the local title v_local_title = html_get_text (v.find ("td", {"class": "TextCenter", "colspan": "2"}).get_text ()) if not is_string_null (v_local_title): v_title = v_local_title game_data.set_version_title (game_id, v_country, v_local_title) # If the version's title is the same as the common title, it should be null if common["title"] == v_title: v_title = None game_data.set_version_title (game_id, v_country, v_title) # # Set the version's cover # covers_soup = v.find ("div", {"class": "alternatecoverbox"}) v_cover = { "front": None, "back": None, "side": None } side = covers_soup.find ("img", {"alt": "Side / SpinCard"}) if side and "src" in side.attrs: v_cover["side"] = side["src"] front = covers_soup.find ("img", {"alt": "Front"}) if front and "src" in front.attrs: v_cover["front"] = front["src"] back = covers_soup.find ("img", {"alt": "Back"}) if back and "src" in back.attrs: v_cover["back"] = back["src"] game_data.set_version_cover (game_id, v_country, v_cover["front"], v_cover["back"], v_cover["side"]) # Serial number, barcode, publisher, release date... result = re.findall ("<\s*div.*?databaseInfosDesc.*?>(.+?)<\s*/\s*div\s*>\s*<\s*div.*?databaseInfosContent.*?>(.+?)<\s*/\s*div\s*>", str (v)) for info in result: key = info[0].lower() if key == "publisher": if not is_string_null (info[1]): game_data.set_version_publisher (game_id, v_country, html_get_text (info[1])) # Release date elif key == "release date": if not is_string_null (info[1]): v_date = split_date (info[1]) game_data.set_version_release_date (game_id, v_country, v_date [0], v_date [1], v_date [2]) return True