Example #1
0
def read_games(handle, handle_json=None):
    """Based on chess.pgn.scan_headers() from Niklas Fiekas python-chess"""

    if handle_json is not None:
        for line in handle_json:
            try:
                yield json.loads(line)
            except ValueError as e:
                try:
                    if "\\" in line:
                        line = line.replace("\\", "")
                        yield json.loads(line)
                    elif e.message.startswith("Expecting ',' delimiter"):
                        if '"Date":' in line and '"Site":' in line and '"Event":' in line:
                            left, date = line.split(', "Date":')
                            left, site = left.split(', "Site":')
                            left, event = left.split('"Event":')
                            event = event.replace('"', '')
                            site = site.replace('"', '')
                            line = '{"Event":"%s", "Site":"%s", "Date":%s' % (event, site, date)
                            yield json.loads(line)
                        else:
                            continue
                    elif e.message.startswith("Invalid control character"):
                        stripped = []
                        for char in line:
                            if ord(char) >= 32:
                                stripped.append(char)
                        line = "".join(stripped)
                        yield json.loads(line)
                    elif e.message.startswith("No JSON object could be decoded"):
                        if line.startswith("[Date"):
                            line = line.replace("[Date", '{"Date')
                            yield json.loads(line)
                        else:
                            continue
                    else:
                        continue
                except:
                    continue
            except:
                continue
        return

    in_comment = False

    game_headers = None
    game_pos = None

    last_pos = 0
    line = handle.readline()

    # scoutfish creates game offsets at previous game end
    line_end_fix = 2 if line.endswith("\r\n") else 1

    while line:
        # Skip single line comments.
        if line.startswith("%"):
            last_pos += len(line)
            line = handle.readline()
            continue

        # Reading a header tag. Parse it and add it to the current headers.
        if not in_comment and line.startswith("["):
            tag_match = TAG_REGEX.match(line)
            if tag_match:
                if game_pos is None:
                    game_headers = collections.defaultdict(str)
                    game_pos = last_pos

                game_headers[tag_match.group(1)] = tag_match.group(2)

                last_pos += len(line)
                line = handle.readline()
                continue

        # Reading movetext. Update parser state in_comment in order to skip
        # comments that look like header tags.
        if (not in_comment and "{" in line) or (in_comment and "}" in line):
            in_comment = line.rfind("{") > line.rfind("}")

        # Reading movetext. If there were headers, previously, those are now
        # complete and can be yielded.
        if game_pos is not None:
            game_headers["offset"] = max(0, game_pos - line_end_fix)
            yield game_headers
            game_pos = None

        last_pos += len(line)
        line = handle.readline()

    # Yield the headers of the last game.
    if game_pos is not None:
        game_headers["offset"] = max(0, game_pos - line_end_fix)
        yield game_headers
Example #2
0
def read_games(handle, handle_json=None):
    """Based on chess.pgn.scan_headers() from Niklas Fiekas python-chess"""

    if handle_json is not None:
        for line in handle_json:
            try:
                yield json.loads(line)
            except ValueError as e:
                try:
                    if "\\" in line:
                        line = line.replace("\\", "")
                        yield json.loads(line)
                    elif e.message.startswith("Expecting ',' delimiter"):
                        if '"Date":' in line and '"Site":' in line and '"Event":' in line:
                            left, date = line.split(', "Date":')
                            left, site = left.split(', "Site":')
                            left, event = left.split('"Event":')
                            event = event.replace('"', '')
                            site = site.replace('"', '')
                            line = '{"Event":"%s", "Site":"%s", "Date":%s' % (
                                event, site, date)
                            yield json.loads(line)
                        else:
                            continue
                    elif e.message.startswith("Invalid control character"):
                        stripped = []
                        for char in line:
                            if ord(char) >= 32:
                                stripped.append(char)
                        line = "".join(stripped)
                        yield json.loads(line)
                    elif e.message.startswith(
                            "No JSON object could be decoded"):
                        if line.startswith("[Date"):
                            line = line.replace("[Date", '{"Date')
                            yield json.loads(line)
                        else:
                            continue
                    else:
                        continue
                except:
                    continue
            except:
                continue
        return

    in_comment = False

    game_headers = None
    game_pos = None

    last_pos = 0
    line = handle.readline()

    # scoutfish creates game offsets at previous game end
    line_end_fix = 2 if line.endswith("\r\n") else 1

    while line:
        # Skip single line comments.
        if line.startswith("%"):
            last_pos += len(line)
            line = handle.readline()
            continue

        # Reading a header tag. Parse it and add it to the current headers.
        if not in_comment and line.startswith("["):
            tag_match = TAG_REGEX.match(line)
            if tag_match:
                if game_pos is None:
                    game_headers = collections.defaultdict(str)
                    game_pos = last_pos

                game_headers[tag_match.group(1)] = tag_match.group(2)

                last_pos += len(line)
                line = handle.readline()
                continue

        # Reading movetext. Update parser state in_comment in order to skip
        # comments that look like header tags.
        if (not in_comment and "{" in line) or (in_comment and "}" in line):
            in_comment = line.rfind("{") > line.rfind("}")

        # Reading movetext. If there were headers, previously, those are now
        # complete and can be yielded.
        if game_pos is not None:
            game_headers["offset"] = max(0, game_pos - line_end_fix)
            yield game_headers
            game_pos = None

        last_pos += len(line)
        line = handle.readline()

    # Yield the headers of the last game.
    if game_pos is not None:
        game_headers["offset"] = max(0, game_pos - line_end_fix)
        yield game_headers