Python parse_turn_idの例、educe.stac.annotation.parse_turn_id Pythonの例

コード例 #1

0

ファイルを表示

ファイル: reacquire_game.py プロジェクト: eipiplusun/irit-stac

def infer_portioning(disc_dir):
    """Infer the portioning of a game given the glozz files in `disc_dir`.

    Parameters
    ----------
    disc_dir : string
        Path to a discourse folder containing one glozz .ac file per
        portion of the game ; `unannotated` seems the most convenient
        choice.

    Returns
    -------
    first_idx : list of parse_turn_id
        Identifier of the first turn of each portion.
    """
    ac_files = glob(os.path.join(disc_dir, '*.ac'))
    if not ac_files:
        err_msg = 'Unable to locate any glozz .ac file in {}'.format(disc_dir)
        raise ValueError(err_msg)

    first_idx = []
    for ac_file in ac_files:
        with open(ac_file, 'rb') as ac_file:
            for line in ac_file:
                fidx = parse_turn_id(line.split(':', 1)[0].strip())
                first_idx.append(fidx)
    return sorted(first_idx)

コード例 #2

0

ファイルを表示

ファイル: reacquire_game.py プロジェクト: eipiplusun/irit-stac

def backport_portioning(seg_file, first_idx):
    """Encode game portioning in `seg_file`, according to `first_idx`.

    Parameters
    ----------
    seg_file : string
        Path to the segmented version of the CSV file for the game.
    first_idx : list of int
        Identifier of the first turn of each portion.
    """
    seg_file_res = seg_file + '.fut'
    with open(seg_file, 'rb') as seg_f:
        with open(seg_file_res, 'w') as seg_f_res:
            reader = csv.reader(seg_f, delimiter='\t')
            writer = csv.writer(seg_f_res, delimiter='\t',
                                lineterminator='\n')
            # leave header line
            line = reader.next()
            writer.writerow(line)
            # regular lines
            for line in reader:
                # keep existing empty lines
                if ((not line or
                     not ''.join(line).strip())):
                    writer.writerow(line)
                    continue
                # insert an empty line just before a starting turn
                # (except for the turn starting the first portion)
                if parse_turn_id(line[0]) in first_idx[1:]:
                    writer.writerow([])
                # write the normal line
                writer.writerow(line)
    # replace the original segmented file
    os.rename(seg_file_res, seg_file)

コード例 #3

0

ファイルを表示

ファイル: reacquire_game.py プロジェクト: jrmyp/irit-stac

def backport_portioning(seg_file, first_idx):
    """Encode game portioning in `seg_file`, according to `first_idx`.

    Parameters
    ----------
    seg_file : string
        Path to the segmented version of the CSV file for the game.
    first_idx : list of int
        Identifier of the first turn of each portion.
    """
    seg_file_res = seg_file + '.fut'
    with open(seg_file, 'rb') as seg_f:
        with open(seg_file_res, 'w') as seg_f_res:
            reader = csv.reader(seg_f, delimiter='\t')
            writer = csv.writer(seg_f_res, delimiter='\t', lineterminator='\n')
            # leave header line
            line = reader.next()
            writer.writerow(line)
            # regular lines
            for line in reader:
                # keep existing empty lines
                if ((not line or not ''.join(line).strip())):
                    writer.writerow(line)
                    continue
                # insert an empty line just before a starting turn
                # (except for the turn starting the first portion)
                if parse_turn_id(line[0]) in first_idx[1:]:
                    writer.writerow([])
                # write the normal line
                writer.writerow(line)
    # replace the original segmented file
    os.rename(seg_file_res, seg_file)

コード例 #4

0

ファイルを表示

ファイル: reacquire_game.py プロジェクト: eipiplusun/irit-stac

def read_portioning(seg_file):
    """Read portioning in the segmented csv file.

    Parameters
    ----------
    seg_file : string
        TODO

    Returns
    -------
    first_idx : list of parse_turn_id
        Identifier of the first turn of each portion.
    """
    first_idx = []
    with open(seg_file, 'rb') as seg_f:
        reader = csv.reader(seg_f, delimiter='\t')
        # leave header line
        line = reader.next()
        grab_next = True
        # regular lines
        for line in reader:
            # if empty line, be ready to start a new portion
            if ((not line or
                 not ''.join(line).strip())):
                grab_next = True
                continue
            # new portion
            if grab_next:
                first_idx.append(parse_turn_id(line[0]))
                grab_next = False
    return first_idx

コード例 #5

0

ファイルを表示

ファイル: reacquire_game.py プロジェクト: jrmyp/irit-stac

def infer_portioning(disc_dir):
    """Infer the portioning of a game given the glozz files in `disc_dir`.

    Parameters
    ----------
    disc_dir : string
        Path to a discourse folder containing one glozz .ac file per
        portion of the game ; `unannotated` seems the most convenient
        choice.

    Returns
    -------
    first_idx : list of parse_turn_id
        Identifier of the first turn of each portion.
    """
    ac_files = glob(os.path.join(disc_dir, '*.ac'))
    if not ac_files:
        err_msg = 'Unable to locate any glozz .ac file in {}'.format(disc_dir)
        raise ValueError(err_msg)

    first_idx = []
    for ac_file in ac_files:
        with open(ac_file, 'rb') as ac_file:
            for line in ac_file:
                fidx = parse_turn_id(line.split(':', 1)[0].strip())
                first_idx.append(fidx)
    return sorted(first_idx)

コード例 #6

0

ファイルを表示

ファイル: reacquire_game.py プロジェクト: jrmyp/irit-stac

def read_portioning(seg_file):
    """Read portioning in the segmented csv file.

    Parameters
    ----------
    seg_file : string
        TODO

    Returns
    -------
    first_idx : list of parse_turn_id
        Identifier of the first turn of each portion.
    """
    first_idx = []
    with open(seg_file, 'rb') as seg_f:
        reader = csv.reader(seg_f, delimiter='\t')
        # leave header line
        line = reader.next()
        grab_next = True
        # regular lines
        for line in reader:
            # if empty line, be ready to start a new portion
            if ((not line or not ''.join(line).strip())):
                grab_next = True
                continue
            # new portion
            if grab_next:
                first_idx.append(parse_turn_id(line[0]))
                grab_next = False
    return first_idx

コード例 #7

0

ファイルを表示

ファイル: reacquire_game.py プロジェクト: eipiplusun/irit-stac

def _transfer_turns(f_orig, f_dest, f_res, verbose=0):
    """Transfer turns from `f_orig` to `f_dest` to produce `f_res`.

    Parameters
    ----------
    f_orig : File
        Version of the file with higher priority (typically, the
        currently annotated version).
    f_dest : File
        Version of the file with lower priority (typically, the
        unannotated but finer-grained version).
    f_res : File
        `f_dest` with turns transfered from `f_orig`.
    verbose : int
        Verbosity level.
    """
    reader_orig = csv.reader(f_orig, delimiter='\t')
    reader_dest = csv.reader(f_dest, delimiter='\t')
    writer_res = csv.writer(f_res, delimiter='\t',
                            lineterminator='\n')

    # read and write header
    header_orig = reader_orig.next()
    header_dest = reader_dest.next()
    writer_res.writerow(header_dest)

    # read and write content
    for line_dest in reader_dest:
        # _orig exhausted means new turns in _dest
        try:
            line_orig = reader_orig.next()
        except StopIteration:
            writer_res.writerow(line_dest)
            continue

        # easy case: keep lines that are identical on both sides
        if line_orig == line_dest:
            writer_res.writerow(line_dest)
            continue

        # otherwise:
        # * empty lines mark subdoc split:
        #   we need to be careful to avoid splitting inside chunks of
        #   game messages
        buff_orig = []
        while (not line_orig
               or not ''.join(line_orig).strip()):
            # TODO? get rid of spurious empty lines
            # look ahead for the next turn in _orig
            buff_orig.append(line_orig)
            line_orig = reader_orig.next()
        # adjust subdoc split: append extra turns from _dest until
        # either we reach a safe split point
        # or all extra turns have been consumed
        if buff_orig:
            try:  # why try/catch: cf. DEBUG below
                turn_id_orig = parse_turn_id(line_orig[0])
            except ValueError:
                print([i for i, c in enumerate(line_orig[0])
                       if c == '\t'])
                print('\n'.join(line_orig))
                print(line_orig[0].split('\t'))
                raise
            # new turns in _dest should be appended to the current subdoc,
            # until we reach "It's X's turn to roll the dice." (or none
            # remains)
            try:
                turn_id_dest = parse_turn_id(line_dest[0])
            except ValueError:
                print([i for i, c in enumerate(line_dest[0])
                       if c == '\t'])
                print('\n'.join(line_dest))
                print(line_dest[0].split('\t'))
                raise
            while (turn_id_dest < turn_id_orig
                   and not (
                       line_dest[2] == 'Server'
                       and line_dest[5].endswith('turn to roll the dice.'))):
                writer_res.writerow(line_dest)
                # read next turn from _dest
                line_dest = reader_dest.next()
                try:
                    turn_id_dest = parse_turn_id(line_dest[0])
                except ValueError:
                    print([i for i, c in enumerate(line_dest[0])
                           if c == '\t'])
                    print('\n'.join(line_dest))
                    print(line_dest[0].split('\t'))
                    raise
            # finally, write the (buffered) subdoc split
            for buff_line in buff_orig:
                writer_res.writerow(buff_line)

        # write extra turns from _dest
        # DEBUG
        # FIXME csv reader (doublequote=True) fails to split fields on
        # if one field contains a doubled double quote (supposedly read
        # as one double-quote)
        try:
            parse_turn_id(line_orig[0])
        except ValueError:
            print('line_orig[0]', line_orig[0])
            print('Positions of \\t in line_orig[0]',
                  [i for i, c in enumerate(line_orig[0])
                   if c == '\t'])
            print('\n'.join(line_orig))
            print(line_orig[0].split('\t'))
            raise
        # end DEBUG
        if parse_turn_id(line_orig[0]) < parse_turn_id(line_dest[0]):
            err_msg = 'Weird state that should never be reached: {}\t{}'
            raise ValueError(err_msg.format(line_orig, line_dest))

        # new turns in _dest: write as they are
        while parse_turn_id(line_orig[0]) > parse_turn_id(line_dest[0]):
            writer_res.writerow(line_dest)
            line_dest = reader_dest.next()

        if parse_turn_id(line_orig[0]) != parse_turn_id(line_dest[0]):
            err_msg = 'Weird state that should never be reached: {}\t{}'
            raise ValueError(err_msg.format(line_orig, line_dest))

        # matching turns: transfer line from _orig, with manually segmented
        # text
        # optional warnings for differing texts
        if verbose:
            txt_orig = line_orig[5]
            txt_dest = line_dest[5]
            raw_text_orig = ''.join(txt_orig.split('&'))
            raw_text_dest = ''.join(txt_dest.split('&'))
            if raw_text_orig != raw_text_dest:
                err_msg = [
                    "W: texts differ at turn {}".format(line_orig[0]),
                    "< " + txt_orig,
                    "> " + txt_dest,
                ]
                print('\n'.join(err_msg), file=sys.stderr)
        # finally, write the line
        writer_res.writerow(line_orig)

コード例 #8

0

ファイルを表示

ファイル: reacquire_game.py プロジェクト: eipiplusun/irit-stac

def _backport_turn_text(f_orig, f_dest, f_res, verbose=0):
    """Backport turn text from `f_orig` to `f_dest` to produce `f_res`.

    Segment delimiters are stripped from the turn text in `f_orig`.

    Parameters
    ----------
    f_orig : File
        Version of the file with higher priority.
    f_dest : File
        Version of the file with lower priority.
    f_res : File
        `f_dest` with turns transfered from `f_orig`.
    verbose : int
        Verbosity level.
    """
    reader_orig = csv.reader(f_orig, delimiter='\t')
    reader_dest = csv.reader(f_dest, delimiter='\t')
    writer_res = csv.writer(f_res, delimiter='\t',
                            lineterminator='\n')

    # read and write header
    header_orig = reader_orig.next()
    header_dest = reader_dest.next()
    writer_res.writerow(header_dest)

    # read and write content
    for line_dest in reader_dest:
        # TODO? handle exhaustion of f_orig (try...except?)
        line_orig = reader_orig.next()

        # easy case: keep lines that are identical on both sides
        if line_orig == line_dest:
            writer_res.writerow(line_dest)
            continue

        # skip additional empty lines from f_orig
        while (not line_orig or
               not ''.join(line_orig).strip()):
            line_orig = reader_orig.next()

        if parse_turn_id(line_orig[0]) != parse_turn_id(line_dest[0]):
            err_msg = 'Weird state that should never be reached: {}\t{}'
            raise ValueError(err_msg.format(line_orig, line_dest))

        # matching turns: transfer line from _orig, without '&'
        txt_orig = line_orig[5]
        # optional warnings for differing texts
        if verbose:
            txt_dest = line_dest[5]
            raw_text_orig = ''.join(txt_orig.split('&'))
            raw_text_dest = ''.join(txt_dest.split('&'))
            if raw_text_orig != raw_text_dest:
                err_msg = [
                    "W: texts differ at turn {}".format(line_orig[0]),
                    "< " + txt_orig,
                    "> " + txt_dest,
                ]
                print('\n'.join(err_msg), file=sys.stderr)
        # finally, write the line without '&'
        new_line_dest = line_orig
        new_line_dest[5] = txt_orig.replace('&', '')
        writer_res.writerow(new_line_dest)

コード例 #9

0

ファイルを表示

ファイル: inject_spectator.py プロジェクト: fbuijs/irit-stac

def _transfer_turns(f_orig, f_dest, f_res, verbose=0):
    """Transfer turns from `f_orig` to `f_dest` to produce `f_res`.

    Parameters
    ----------
    f_orig : File
        Version of the file with higher priority.
    f_dest : File
        Version of the file with lower priority.
    f_res : File
        `f_dest` with turns transfered from `f_orig`.
    verbose : int
        Verbosity level.
    """
    reader_orig = csv.reader(f_orig, delimiter='\t')
    reader_dest = csv.reader(f_dest, delimiter='\t')
    writer_res = csv.writer(f_res, delimiter='\t')

    # read and write header
    header_orig = reader_orig.next()
    header_dest = reader_dest.next()
    writer_res.writerow(header_dest)

    # read and write content
    for line_dest in reader_dest:
        # _orig exhausted means new turns in _dest
        try:
            line_orig = reader_orig.next()
        except StopIteration:
            writer_res.writerow(line_dest)
            continue

        # easy case: keep lines that are identical on both sides
        if line_orig == line_dest:
            writer_res.writerow(line_dest)
            continue

        # transfer empty lines, they mark subdoc split
        # TODO? get rid of spurious empty lines
        while (not line_orig or
               not ''.join(line_orig).strip()):
            writer_res.writerow(line_orig)  # transfer split
            line_orig = reader_orig.next()

        # DEBUG
        # FIXME csv reader (doublequote=True) fails to split fields on
        # if one field contains a doubled double quote (supposedly read
        # as one double-quote)
        try:
            parse_turn_id(line_orig[0])
        except ValueError:
            print([i for i, c in enumerate(line_orig[0])
                   if c == '\t'])
            print('\n'.join(line_orig))
            print(line_orig[0].split('\t'))
            raise
        # end DEBUG
        if parse_turn_id(line_orig[0]) < parse_turn_id(line_dest[0]):
            err_msg = 'Weird state that should never be reached: {}\t{}'
            raise ValueError(err_msg.format(line_orig, line_dest))

        # new turns in _dest: write as they are
        while parse_turn_id(line_orig[0]) > parse_turn_id(line_dest[0]):
            writer_res.writerow(line_dest)
            line_dest = reader_dest.next()

        if parse_turn_id(line_orig[0]) != parse_turn_id(line_dest[0]):
            err_msg = 'Weird state that should never be reached: {}\t{}'
            raise ValueError(err_msg.format(line_orig, line_dest))

        # matching turns: transfer line from _orig, with manually segmented
        # text
        # optional warnings for differing texts
        if verbose:
            txt_orig = line_orig[5]
            txt_dest = line_dest[5]
            raw_text_orig = ''.join(txt_orig.split('&'))
            raw_text_dest = ''.join(txt_dest.split('&'))
            if raw_text_orig != raw_text_dest:
                err_msg = [
                    "W: texts differ at turn {}".format(line_orig[0]),
                    "< " + txt_orig,
                    "> " + txt_dest,
                ]
                print('\n'.join(err_msg), file=sys.stderr)
        # finally, write the line
        writer_res.writerow(line_orig)

コード例 #10

0

ファイルを表示

ファイル: reacquire_game.py プロジェクト: jrmyp/irit-stac

def _transfer_turns(f_orig, f_dest, f_res, verbose=0):
    """Transfer turns from `f_orig` to `f_dest` to produce `f_res`.

    Parameters
    ----------
    f_orig : File
        Version of the file with higher priority (typically, the
        currently annotated version).
    f_dest : File
        Version of the file with lower priority (typically, the
        unannotated but finer-grained version).
    f_res : File
        `f_dest` with turns transfered from `f_orig`.
    verbose : int
        Verbosity level.
    """
    reader_orig = csv.reader(f_orig, delimiter='\t')
    reader_dest = csv.reader(f_dest, delimiter='\t')
    writer_res = csv.writer(f_res, delimiter='\t', lineterminator='\n')

    # read and write header
    header_orig = reader_orig.next()
    header_dest = reader_dest.next()
    writer_res.writerow(header_dest)

    # read and write content
    for line_dest in reader_dest:
        # _orig exhausted means new turns in _dest
        try:
            line_orig = reader_orig.next()
        except StopIteration:
            writer_res.writerow(line_dest)
            continue

        # easy case: keep lines that are identical on both sides
        if line_orig == line_dest:
            writer_res.writerow(line_dest)
            continue

        # otherwise:
        # * empty lines mark subdoc split:
        #   we need to be careful to avoid splitting inside chunks of
        #   game messages
        buff_orig = []
        while (not line_orig or not ''.join(line_orig).strip()):
            # TODO? get rid of spurious empty lines
            # look ahead for the next turn in _orig
            buff_orig.append(line_orig)
            line_orig = reader_orig.next()
        # adjust subdoc split: append extra turns from _dest until
        # either we reach a safe split point
        # or all extra turns have been consumed
        if buff_orig:
            try:  # why try/catch: cf. DEBUG below
                turn_id_orig = parse_turn_id(line_orig[0])
            except ValueError:
                print([i for i, c in enumerate(line_orig[0]) if c == '\t'])
                print('\n'.join(line_orig))
                print(line_orig[0].split('\t'))
                raise
            # new turns in _dest should be appended to the current subdoc,
            # until we reach "It's X's turn to roll the dice." (or none
            # remains)
            try:
                turn_id_dest = parse_turn_id(line_dest[0])
            except ValueError:
                print([i for i, c in enumerate(line_dest[0]) if c == '\t'])
                print('\n'.join(line_dest))
                print(line_dest[0].split('\t'))
                raise
            while (turn_id_dest < turn_id_orig
                   and not (line_dest[2] == 'Server' and
                            line_dest[5].endswith('turn to roll the dice.'))):
                writer_res.writerow(line_dest)
                # read next turn from _dest
                line_dest = reader_dest.next()
                try:
                    turn_id_dest = parse_turn_id(line_dest[0])
                except ValueError:
                    print([i for i, c in enumerate(line_dest[0]) if c == '\t'])
                    print('\n'.join(line_dest))
                    print(line_dest[0].split('\t'))
                    raise
            # finally, write the (buffered) subdoc split
            for buff_line in buff_orig:
                writer_res.writerow(buff_line)

        # write extra turns from _dest
        # DEBUG
        # FIXME csv reader (doublequote=True) fails to split fields on
        # if one field contains a doubled double quote (supposedly read
        # as one double-quote)
        try:
            parse_turn_id(line_orig[0])
        except ValueError:
            print('line_orig[0]', line_orig[0])
            print('Positions of \\t in line_orig[0]',
                  [i for i, c in enumerate(line_orig[0]) if c == '\t'])
            print('\n'.join(line_orig))
            print(line_orig[0].split('\t'))
            raise
        # end DEBUG
        if parse_turn_id(line_orig[0]) < parse_turn_id(line_dest[0]):
            err_msg = 'Weird state that should never be reached: {}\t{}'
            raise ValueError(err_msg.format(line_orig, line_dest))

        # new turns in _dest: write as they are
        while parse_turn_id(line_orig[0]) > parse_turn_id(line_dest[0]):
            writer_res.writerow(line_dest)
            line_dest = reader_dest.next()

        if parse_turn_id(line_orig[0]) != parse_turn_id(line_dest[0]):
            err_msg = 'Weird state that should never be reached: {}\t{}'
            raise ValueError(err_msg.format(line_orig, line_dest))

        # matching turns: transfer line from _orig, with manually segmented
        # text
        # optional warnings for differing texts
        if verbose:
            txt_orig = line_orig[5]
            txt_dest = line_dest[5]
            raw_text_orig = ''.join(txt_orig.split('&'))
            raw_text_dest = ''.join(txt_dest.split('&'))
            if raw_text_orig != raw_text_dest:
                err_msg = [
                    "W: texts differ at turn {}".format(line_orig[0]),
                    "< " + txt_orig,
                    "> " + txt_dest,
                ]
                print('\n'.join(err_msg), file=sys.stderr)
        # finally, write the line
        writer_res.writerow(line_orig)

コード例 #11

0

ファイルを表示

ファイル: reacquire_game.py プロジェクト: jrmyp/irit-stac

def _backport_turn_text(f_orig, f_dest, f_res, verbose=0):
    """Backport turn text from `f_orig` to `f_dest` to produce `f_res`.

    Segment delimiters are stripped from the turn text in `f_orig`.

    Parameters
    ----------
    f_orig : File
        Version of the file with higher priority.
    f_dest : File
        Version of the file with lower priority.
    f_res : File
        `f_dest` with turns transfered from `f_orig`.
    verbose : int
        Verbosity level.
    """
    reader_orig = csv.reader(f_orig, delimiter='\t')
    reader_dest = csv.reader(f_dest, delimiter='\t')
    writer_res = csv.writer(f_res, delimiter='\t', lineterminator='\n')

    # read and write header
    header_orig = reader_orig.next()
    header_dest = reader_dest.next()
    writer_res.writerow(header_dest)

    # read and write content
    for line_dest in reader_dest:
        # TODO? handle exhaustion of f_orig (try...except?)
        line_orig = reader_orig.next()

        # easy case: keep lines that are identical on both sides
        if line_orig == line_dest:
            writer_res.writerow(line_dest)
            continue

        # skip additional empty lines from f_orig
        while (not line_orig or not ''.join(line_orig).strip()):
            line_orig = reader_orig.next()

        if parse_turn_id(line_orig[0]) != parse_turn_id(line_dest[0]):
            err_msg = 'Weird state that should never be reached: {}\t{}'
            raise ValueError(err_msg.format(line_orig, line_dest))

        # matching turns: transfer line from _orig, without '&'
        txt_orig = line_orig[5]
        # optional warnings for differing texts
        if verbose:
            txt_dest = line_dest[5]
            raw_text_orig = ''.join(txt_orig.split('&'))
            raw_text_dest = ''.join(txt_dest.split('&'))
            if raw_text_orig != raw_text_dest:
                err_msg = [
                    "W: texts differ at turn {}".format(line_orig[0]),
                    "< " + txt_orig,
                    "> " + txt_dest,
                ]
                print('\n'.join(err_msg), file=sys.stderr)
        # finally, write the line without '&'
        new_line_dest = line_orig
        new_line_dest[5] = txt_orig.replace('&', '')
        writer_res.writerow(new_line_dest)