Esempio n. 1
0
def get_training_instances(listener=False):
    h, s, v = munroecorpus.get_training_handles()
    insts = [(Instance(input=name, output=color)
              if listener else Instance(input=color, output=name))
             for name in h for color in load_colors(h[name], s[name], v[name])]
    rng.shuffle(insts)
    return insts
Esempio n. 2
0
def get_eval_instances(handles, listener=False):
    insts = [(Instance(input=name, output=tuple(color))
              if listener else Instance(input=tuple(color), output=name))
             for name, handle in handles.iteritems()
             for color in munroecorpus.open_datafile(handle)]
    rng.shuffle(insts)
    return insts
Esempio n. 3
0
def triples_to_insts(data, listener=False):
    return [
        (Instance(input=name, output=color, alt_outputs=context)
         if listener else
         Instance(input=color, alt_inputs=context, output=name))
        for name, color, context in data
    ]
Esempio n. 4
0
def pairs_to_insts(data, listener=False):
    return [
        (Instance(input=name, output=color)
         if listener else
         Instance(input=color, output=name))
        for name, color in data
    ]
Esempio n. 5
0
def trials_to_insts(trials, listener=False):
    insts = []
    for trial in trials:
        desc = tuple(d.string_description for d in trial.descriptions)
        desc_attrs = tuple(
            tuple(sorted(set([str(a) for a in d.attribute_set])))
            for d in trial.descriptions)
        targets = tuple(i for i, e in enumerate(trial.entities)
                        if e.is_target())
        alt_referents = tuple(
            tuple(str(a) for a in e.attributes) for e in trial.entities)
        if listener:
            insts.append(
                Instance(input=desc,
                         annotated_input=desc_attrs,
                         output=targets,
                         alt_outputs=alt_referents,
                         source=trial.filenames))
        else:
            insts.append(
                Instance(input=targets,
                         alt_inputs=alt_referents,
                         output=desc,
                         annotated_output=desc_attrs,
                         source=trial.filenames))
    return insts
Esempio n. 6
0
def hawkins_target(listener=False):
    insts = hawkins_context(listener=listener)
    return [(Instance(output=inst.alt_outputs[inst.output],
                      input=inst.input,
                      source=inst.__dict__)
             if listener else Instance(input=inst.alt_inputs[inst.input],
                                       output=inst.output,
                                       source=inst.__dict__))
            for inst in insts]
Esempio n. 7
0
def filtered(listener=False):
    global FILTERED_DATASET_LISTENER, FILTERED_DATASET_SPEAKER, FILTERED_SPLIT
    if FILTERED_DATASET_LISTENER is not None:
        if listener:
            return FILTERED_DATASET_LISTENER
        else:
            return FILTERED_DATASET_SPEAKER

    FILTERED_SPLIT = []
    FILTERED_DATASET_LISTENER = []

    instances = defaultdict(list)
    with open('behavioralAnalysis/humanOutput/filteredCorpus.csv',
              'r') as infile:
        for row in csv.DictReader(infile):
            if row['role'] != 'speaker':
                continue

            key = (row['gameid'], row['roundNum'])

            if len(FILTERED_SPLIT) < len(FILTERED_SPLIT_IDS) and \
                    key[0] == FILTERED_SPLIT_IDS[len(FILTERED_SPLIT)]:
                FILTERED_SPLIT.append(len(FILTERED_DATASET_LISTENER))

            message = row['contents']

            if key in instances:
                current_dict = dict(
                    FILTERED_DATASET_LISTENER[instances[key]].__dict__)
                current_dict['input'] = ' ~ '.join(
                    (current_dict['input'], message))
                FILTERED_DATASET_LISTENER[instances[key]] = Instance(
                    **current_dict)
                continue

            instances[key] = len(FILTERED_DATASET_LISTENER)

            target_idx, alt_colors = context_from_row(row)

            FILTERED_DATASET_LISTENER.append(
                Instance(input=message,
                         output=target_idx,
                         alt_outputs=alt_colors,
                         source=key + (row['condition'], )))

    FILTERED_DATASET_SPEAKER = [
        inst.inverted() for inst in FILTERED_DATASET_LISTENER
    ]

    if listener:
        return FILTERED_DATASET_LISTENER  # [:206]
    else:
        return FILTERED_DATASET_SPEAKER
Esempio n. 8
0
def reference_game(insts, gen_func, listener=False):
    options = config.options()
    for i in range(len(insts)):
        color = insts[i].output if listener else insts[i].input
        distractors = [gen_func(color) for _ in range(options.num_distractors)]
        answer = rng.randint(0, len(distractors) + 1)
        context = distractors[:answer] + [color] + distractors[answer:]
        ref_inst = (Instance(insts[i].input, answer, alt_outputs=context)
                    if listener else Instance(
                        answer, insts[i].output, alt_inputs=context))
        insts[i] = ref_inst
    return insts
Esempio n. 9
0
def reference_game(insts, gen_func, listener=False):
    options = config.options()
    result = []
    for inst in insts:
        color = inst.output if listener else inst.input
        distractors = [gen_func(color) for _ in range(options.num_distractors)]
        answer = rng.randint(0, len(distractors) + 1)
        context = distractors[:answer] + [color] + distractors[answer:]
        ref_inst = (Instance(inst.input, answer, alt_outputs=context)
                    if listener else
                    Instance(answer, inst.output, alt_inputs=context))
        result.append(ref_inst)
    return result
Esempio n. 10
0
def hawkins_context(listener=False, speakerID='speaker', suffix=''):
    messages = defaultdict(list)
    with open('hawkins_data/colorReferenceMessage%s.csv' % suffix,
              'r') as infile:
        for row in csv.DictReader(infile):
            if row['sender'] == speakerID:
                message = row['contents'].decode(
                    'utf-8')  # TODO: clean, tokenize?
                messages[(row['gameid'], row['roundNum'])].append(message)

    result = []
    with open('hawkins_data/colorReferenceClicks%s.csv' % suffix,
              'r') as infile:
        reader = csv.DictReader(infile)
        seen = set()
        for row in reader:
            key = (row['gameid'], row['roundNum'])
            if key in seen:
                # print('Duplicate key: %s' % (key,))
                continue
            seen.add(key)
            context = [(hsl_to_hsv(
                (row['%sColH' % i], row['%sColS' % i], row['%sColL' % i])),
                        row['%sLocS' % i], row['%sStatus' % i])
                       for i in ('click', 'alt1', 'alt2')]
            context.sort(key=lambda c: c[1])
            target_idx = [
                i for i, (_, _, status) in enumerate(context)
                if status == 'target'
            ]
            assert len(target_idx) == 1, context
            target_idx = target_idx[0]
            alt_colors = [c for (c, _, _) in context]
            message = ' ~ '.join(messages[key])
            if suffix == 'Chinese_filtered' and not message.replace(
                    '~', '').strip():
                continue

            if listener:
                inst = Instance(input=message,
                                output=target_idx,
                                alt_outputs=alt_colors,
                                source=key + (row['condition'], ))
            else:
                inst = Instance(input=target_idx,
                                alt_inputs=alt_colors,
                                output=message,
                                source=key + (row['condition'], ))
            result.append(inst)
    return result
Esempio n. 11
0
def bilingual_tag_instance(inst, lang, listener=False, unicodify=False):
    inp, out = inst.input, inst.output

    if listener:
        if unicodify:
            assert isinstance(inp, basestring), repr(inp)
            inp = unicode(inp)
        if isinstance(inp, basestring):
            new_inp = ':'.join((lang, inp))
        else:
            new_inp = (lang, inp)
        new_out = out
    else:
        new_inp = inp
        if unicodify:
            assert isinstance(out, basestring), repr(out)
            out = unicode(out)
        if isinstance(out, basestring):
            new_out = ':'.join((lang, out))
        else:
            new_out = (lang, out)

    return Instance(input=new_inp,
                    output=new_out,
                    alt_inputs=inst.alt_inputs,
                    alt_outputs=inst.alt_outputs,
                    source=inst.source)
Esempio n. 12
0
def dataset(filename):
    if not filename:
        return
    openfunc = gzip.open if filename.endswith('.gz') else open
    with openfunc(filename, 'r') as infile:
        for line in infile:
            yield Instance(**json.loads(line.strip()))
Esempio n. 13
0
def hawkins_context(listener=False):
    messages = defaultdict(list)
    with open('hawkins_data/colorReferenceMessage.csv', 'r') as infile:
        for row in csv.DictReader(infile):
            if row['sender'] == 'speaker':
                message = row['contents']  # TODO: clean, tokenize?
                messages[(row['gameid'], row['roundNum'])].append(message)

    result = []
    with open('hawkins_data/colorReferenceClicks.csv', 'r') as infile:
        reader = csv.DictReader(infile)
        for row in reader:
            context = [(hsl_to_hsv(
                (row['%sColH' % i], row['%sColS' % i], row['%sColL' % i])),
                        row['%sLocS' % i], row['%sStatus' % i])
                       for i in ('click', 'alt1', 'alt2')]
            context.sort(key=lambda c: c[1])
            target_idx = [
                i for i, (_, _, status) in enumerate(context)
                if status == 'target'
            ]
            assert len(target_idx) == 1, context
            target_idx = target_idx[0]
            alt_colors = [c for (c, _, _) in context]
            key = (row['gameid'], row['roundNum'])
            message = ' ~ '.join(messages[key])

            if listener:
                inst = Instance(input=message,
                                output=target_idx,
                                alt_outputs=alt_colors,
                                source=key)
            else:
                inst = Instance(input=target_idx,
                                alt_inputs=alt_colors,
                                output=message,
                                source=key)
            result.append(inst)
    return result
Esempio n. 14
0
def cycle_shuffled(insts):
    '''
    A generator that cycles through insts, but in a random order each time through the list.

    Note: destructively modifies the order of the list! (but not the instances themselves)
    '''
    repeat = False
    while insts:
        for inst in insts:
            if repeat:
                inst = Instance(**inst.__dict__)
                if isinstance(inst.source, tuple):
                    inst.source += ('repeat', )
                elif isinstance(inst.source, dict):
                    inst.source = dict(inst.source)
                    inst.source['repeat'] = True
                elif inst.source is None:
                    inst.source = {'repeat': True}

            yield inst

        rng.shuffle(insts)
        repeat = True
def cycle_shuffled(insts):
    '''
    A generator that cycles through insts, but in a random order each time through the list.

    Note: destructively modifies the order of the list! (but not the instances themselves)
    '''
    repeat = False
    while insts:
        for inst in insts:
            if repeat:
                inst = Instance(**inst.__dict__)
                if isinstance(inst.source, tuple):
                    inst.source += ('repeat',)
                elif isinstance(inst.source, dict):
                    inst.source = dict(inst.source)
                    inst.source['repeat'] = True
                elif inst.source is None:
                    inst.source = {'repeat': True}

            yield inst

        rng.shuffle(insts)
        repeat = True
Esempio n. 16
0
def next_action():
    global NEXT_ACTION_DATASET, NEXT_ACTION_SPLIT
    if NEXT_ACTION_DATASET is not None:
        return NEXT_ACTION_DATASET

    NEXT_ACTION_SPLIT = []
    NEXT_ACTION_DATASET = []

    previous = []
    prev_key = None
    prev_context = None
    with open('behavioralAnalysis/humanOutput/filteredCorpus.csv',
              'r') as infile:
        for row in csv.DictReader(infile):
            key = (row['gameid'], row['roundNum'])

            if len(NEXT_ACTION_SPLIT) < len(FILTERED_SPLIT_IDS) and \
                    key[0] == FILTERED_SPLIT_IDS[len(NEXT_ACTION_SPLIT)]:
                NEXT_ACTION_SPLIT.append(len(NEXT_ACTION_DATASET))

            if key != prev_key:
                action = ACTION_CHOOSE
            elif row['role'] == 'listener':
                action = ACTION_SPEAK
            else:
                action = ACTION_NONE

            new_message = ('| ' if row['role'] == 'listener' else
                           '') + row['contents']
            prev_message = ' ~ '.join(previous)
            context = context_from_row(row)

            if prev_key is not None:
                target_idx, alt_colors = prev_context

                NEXT_ACTION_DATASET.append(
                    Instance(input=prev_message,
                             output=action,
                             alt_outputs=alt_colors,
                             source=prev_key +
                             (row['condition'], len(previous))))

            if key != prev_key:
                previous = []
            previous.append(new_message)
            prev_key = key
            prev_context = context

    return NEXT_ACTION_DATASET
Esempio n. 17
0
def sample(model_pkl_file, device, insts_file):
    dev_insts = []
    with open(insts_file, 'r') as infile:
        for line in infile:
            line = line.strip()
            if line:
                dev_insts.append(Instance(**json.loads(line)))

    with thutils.device_context(device):
        with open(model_pkl_file, 'rb') as infile:
            model = pickle.load(infile)

        import pdb; pdb.set_trace()
        samples = model.predict(dev_insts, random=True, verbosity=0)

    for inst, sample in zip(dev_insts, samples):
        print(json.dumps(sample))
Esempio n. 18
0
def sample_unicode(model_pkl_file, device):
    dev_insts = []
    with open('data/unicode_dev.json', 'r') as infile:
        for line in infile:
            line = line.strip()
            if line:
                dev_insts.append(Instance(**json.loads(line)))
    dev_insts = dev_insts[:256]

    with thutils.device_context(device):
        with open(model_pkl_file, 'rb') as infile:
            model = pickle.load(infile)

        samples = model.predict(dev_insts, random=True)

    for inst, sample in zip(dev_insts, samples):
        char = chr(int(inst.input, 16))
        print(f'{char} U+{inst.input} {sample}')
Esempio n. 19
0
 def get_input_instance(self, game, dialogue, invert=False):
     if invert:
         rewards = self.infer_their_rewards(game, self.dialogue)
     else:
         rewards = game[1]
     pieces = [f'{game[0][0]} {rewards[0]} {game[0][1]} {rewards[1]} {game[0][2]} {rewards[2]}']
     for entry in dialogue:
         if invert:
             entry = entry.replace('YOU:', 'XYOU:')
             entry = entry.replace('THEM:', 'YOU:')
             entry = entry.replace('XYOU:', 'THEM:')
         pieces.append(f'{entry} <eos>')
     input = ' '.join(pieces)
     if dialogue:
         input = input[:-len(' <eos>')]
     result = Instance(input, '')
     if self.options.verbosity >= 6:
         print(result.__dict__)
     return result
Esempio n. 20
0
    def score_all(self, colors, descriptions, format='rgb'):
        '''
        Return a list of log probabilities (base e) for the descriptions
        in `descriptions`, conditioned on the corresponding colors in `colors`,
        which are expressed in the colorspace given by `format`
        (one of 'rgb', 'hsv', 'hsl'). `descriptions` and `colors` must have the
        same length.

        >>> cd = ColorDescriber()
        >>> cd.score_all([(255, 0, 0), (0, 0, 255)], ['red', 'blue'])  # doctest: +ELLIPSIS
        [-0.23..., -0.26...]
        '''
        convert = {
            'hsv': (lambda c: c),
            'hsl': hsl_to_hsv,
            'rgb': rgb_to_hsv,
        }[format]
        insts = [Instance(convert(c), d) for c, d in zip(colors, descriptions)]
        return self.model.score(insts)
Esempio n. 21
0
    def describe_all(self, colors, format='rgb', sample=False):
        '''
        Return a list of descriptions, one for each color in `colors`, which
        is expressed in the colorspace given by `format`
        (one of 'rgb', 'hsv', 'hsl'). If `sample` is `True`,
        return descriptions sampled from the model's probability
        distribution; otherwise return the most likely, common descriptions.

        >>> cd = ColorDescriber()
        >>> cd.describe_all([(255, 0, 0), (0, 0, 255)])
        ['red', 'blue']
        '''
        convert = {
            'hsv': (lambda c: c),
            'hsl': hsl_to_hsv,
            'rgb': rgb_to_hsv,
        }[format]
        insts = [Instance(convert(c)) for c in colors]
        return self.model.predict(insts, random=sample)
Esempio n. 22
0
def hawkins_context(listener=False):
    assert not listener
    result = []
    with open('hawkins_data/colorReferenceClicks.csv', 'r') as infile:
        reader = csv.DictReader(infile)
        for row in reader:
            context = [
                (hsl_to_hsv((row['%sColH' % i],
                             row['%sColS' % i],
                             row['%sColL' % i])),
                 row['%sLocS' % i], row['%sStatus' % i])
                for i in ('click', 'alt1', 'alt2')
            ]
            context.sort(key=lambda c: c[1])
            target_idx = [i for i, (_, _, status) in enumerate(context) if status == 'target']
            assert len(target_idx) == 1, context
            target_idx = target_idx[0]
            alt_colors = [c for (c, _, _) in context]
            result.append(Instance(input=target_idx, alt_inputs=alt_colors, output=''))
    return result
Esempio n. 23
0
    def score_all(self, colors, descriptions, format='rgb'):
        '''
        Return a list of log probabilities (base e) for the descriptions
        in `descriptions`, conditioned on the corresponding colors in `colors`.
        `descriptions` and `colors` have the same length.
        is expressed in the colorspace given by `format`
        (one of 'rgb', 'hsv', 'hsl'). If `sample` is `True`,
        return descriptions sampled from the model's probability
        distribution; otherwise return the most likely, common descriptions.

        >>> cd = ColorDescriber()
        >>> cd.score_all([(255, 0, 0), (0, 0, 255)], ['red', 'blue'])  # doctest: +ELLIPSIS
        [-0.23..., -0.26...]
        '''
        convert = {
            'hsv': (lambda c: c),
            'hsl': hsl_to_hsv,
            'rgb': rgb_to_hsv,
        }[format]
        insts = [Instance(convert(c), d) for c, d in zip(colors, descriptions)]
        return self.model.score(insts)
Esempio n. 24
0
def foobar_train():
    return [Instance(input='foo', output='bar') for _ in range(1000)]
Esempio n. 25
0
 def process_all(self, inputs):
     insts = [Instance(i) for i in inputs]
     return self.model.predict(insts, verbosity=0)
Esempio n. 26
0
def _responses(insts):
    return [
        Instance(inst.input, inst.output[0], source=inst.source)
        for inst in insts
    ]
Esempio n. 27
0
def _selections(insts):
    return [
        Instance(inst.input, inst.output[1], source=inst.source)
        for inst in insts
    ]
Esempio n. 28
0
 def fill_score_instance(self, inst, rewards, counts):
     inst_dict = inst.__dict__.copy()
     inst_dict['output'] = \
         f'{counts[0]} {rewards[0]} {counts[1]} {rewards[1]} {counts[2]} {rewards[2]}'
     return Instance(**inst_dict)
Esempio n. 29
0
def json_file_test(listener='ignored'):
    options = config.options()
    with open(options.test_data_file, 'r') as infile:
        dataset = [json.loads(line.strip()) for line in infile]
    return [Instance(**d) for d in dataset]
Esempio n. 30
0
import cPickle as pickle

from stanza.research.instance import Instance

if __name__ == '__main__':
    with open('runs/speaker_fourier_3d0L/quickpickle.p', 'rb') as infile:
        model = pickle.load(infile)
    print(model.score([Instance((120., 100., 100.), 'green')]))