def read_games(handle, handle_json=None): """Based on chess.pgn.scan_headers() from Niklas Fiekas python-chess""" if handle_json is not None: for line in handle_json: try: yield json.loads(line) except ValueError as e: try: if "\\" in line: line = line.replace("\\", "") yield json.loads(line) elif e.message.startswith("Expecting ',' delimiter"): if '"Date":' in line and '"Site":' in line and '"Event":' in line: left, date = line.split(', "Date":') left, site = left.split(', "Site":') left, event = left.split('"Event":') event = event.replace('"', '') site = site.replace('"', '') line = '{"Event":"%s", "Site":"%s", "Date":%s' % (event, site, date) yield json.loads(line) else: continue elif e.message.startswith("Invalid control character"): stripped = [] for char in line: if ord(char) >= 32: stripped.append(char) line = "".join(stripped) yield json.loads(line) elif e.message.startswith("No JSON object could be decoded"): if line.startswith("[Date"): line = line.replace("[Date", '{"Date') yield json.loads(line) else: continue else: continue except: continue except: continue return in_comment = False game_headers = None game_pos = None last_pos = 0 line = handle.readline() # scoutfish creates game offsets at previous game end line_end_fix = 2 if line.endswith("\r\n") else 1 while line: # Skip single line comments. if line.startswith("%"): last_pos += len(line) line = handle.readline() continue # Reading a header tag. Parse it and add it to the current headers. if not in_comment and line.startswith("["): tag_match = TAG_REGEX.match(line) if tag_match: if game_pos is None: game_headers = collections.defaultdict(str) game_pos = last_pos game_headers[tag_match.group(1)] = tag_match.group(2) last_pos += len(line) line = handle.readline() continue # Reading movetext. Update parser state in_comment in order to skip # comments that look like header tags. if (not in_comment and "{" in line) or (in_comment and "}" in line): in_comment = line.rfind("{") > line.rfind("}") # Reading movetext. If there were headers, previously, those are now # complete and can be yielded. if game_pos is not None: game_headers["offset"] = max(0, game_pos - line_end_fix) yield game_headers game_pos = None last_pos += len(line) line = handle.readline() # Yield the headers of the last game. if game_pos is not None: game_headers["offset"] = max(0, game_pos - line_end_fix) yield game_headers
def read_games(handle, handle_json=None): """Based on chess.pgn.scan_headers() from Niklas Fiekas python-chess""" if handle_json is not None: for line in handle_json: try: yield json.loads(line) except ValueError as e: try: if "\\" in line: line = line.replace("\\", "") yield json.loads(line) elif e.message.startswith("Expecting ',' delimiter"): if '"Date":' in line and '"Site":' in line and '"Event":' in line: left, date = line.split(', "Date":') left, site = left.split(', "Site":') left, event = left.split('"Event":') event = event.replace('"', '') site = site.replace('"', '') line = '{"Event":"%s", "Site":"%s", "Date":%s' % ( event, site, date) yield json.loads(line) else: continue elif e.message.startswith("Invalid control character"): stripped = [] for char in line: if ord(char) >= 32: stripped.append(char) line = "".join(stripped) yield json.loads(line) elif e.message.startswith( "No JSON object could be decoded"): if line.startswith("[Date"): line = line.replace("[Date", '{"Date') yield json.loads(line) else: continue else: continue except: continue except: continue return in_comment = False game_headers = None game_pos = None last_pos = 0 line = handle.readline() # scoutfish creates game offsets at previous game end line_end_fix = 2 if line.endswith("\r\n") else 1 while line: # Skip single line comments. if line.startswith("%"): last_pos += len(line) line = handle.readline() continue # Reading a header tag. Parse it and add it to the current headers. if not in_comment and line.startswith("["): tag_match = TAG_REGEX.match(line) if tag_match: if game_pos is None: game_headers = collections.defaultdict(str) game_pos = last_pos game_headers[tag_match.group(1)] = tag_match.group(2) last_pos += len(line) line = handle.readline() continue # Reading movetext. Update parser state in_comment in order to skip # comments that look like header tags. if (not in_comment and "{" in line) or (in_comment and "}" in line): in_comment = line.rfind("{") > line.rfind("}") # Reading movetext. If there were headers, previously, those are now # complete and can be yielded. if game_pos is not None: game_headers["offset"] = max(0, game_pos - line_end_fix) yield game_headers game_pos = None last_pos += len(line) line = handle.readline() # Yield the headers of the last game. if game_pos is not None: game_headers["offset"] = max(0, game_pos - line_end_fix) yield game_headers