Ejemplo n.º 1
0
    def dump_chats(cls, cursor, scenario_db, json_path, uids=None):
        """Dump chat transcripts to a JSON file.

        Args:
            scenario_db (ScenarioDB): retrieve Scenario by logged uuid.
            json_path (str): output path.
            uids (list): if provided, only log chats from these users.

        """
        if uids is None:
            cursor.execute('SELECT DISTINCT chat_id FROM event')
            ids = cursor.fetchall()
        else:
            ids = []
            uids = [(x, ) for x in uids]
            for uid in uids:
                cursor.execute('SELECT chat_id FROM mturk_task WHERE name=?',
                               uid)
                ids_ = cursor.fetchall()
                ids.extend(ids_)

        def is_single_agent(chat):
            agent_event = {0: 0, 1: 0}
            for event in chat.events:
                agent_event[event.agent] += 1
            return agent_event[0] == 0 or agent_event[1] == 0

        examples = []
        for chat_id in ids:
            ex = cls.get_chat_example(cursor, chat_id[0], scenario_db)
            if ex is None or is_single_agent(ex):
                continue
            examples.append(ex)

        write_json([ex.to_dict() for ex in examples], json_path)
Ejemplo n.º 2
0
    def dump_surveys(cls, cursor, json_path):
        questions = ['humanlike', 'cooperative', 'comments']

        cursor.execute('''SELECT * FROM survey''')
        logged_surveys = cursor.fetchall()
        survey_data = {}
        agent_types = {}

        for survey in logged_surveys:
            # todo this is pretty lazy - support variable # of questions per task eventually..
            (userid, cid, _, q1, q2, comments) = survey
            responses = dict(zip(questions, [q1, q2, comments]))
            cursor.execute(
                '''SELECT agent_types, agent_ids FROM chat WHERE chat_id=?''',
                (cid, ))
            chat_result = cursor.fetchone()
            agents = json.loads(chat_result[0])
            agent_ids = json.loads(chat_result[1])
            agent_types[cid] = agents
            if cid not in survey_data.keys():
                survey_data[cid] = {0: {}, 1: {}}
            partner_idx = 0 if agent_ids['1'] == userid else 1
            survey_data[cid][partner_idx] = responses

        write_json([agent_types, survey_data], json_path)
Ejemplo n.º 3
0
 def write_metadata(cls, transcripts, outdir, responses=None):
     metadata = {'data': []}
     for chat in transcripts:
         if len(chat['events']) == 0:
             continue
         row = {}
         row['dialogue_id'] = chat['uuid']
         row['scenario_id'] = chat['scenario_uuid']
         scenario = cls.get_scenario(chat)
         row['num_items'] = len(scenario.kbs[0].items)
         row['num_attrs'] = len(scenario.attributes)
         row['outcome'] = 'fail' if chat['outcome'][
             'reward'] == 0 else 'success'
         row['agent0'] = cls.agent_labels[chat['agents']['0']]
         row['agent1'] = cls.agent_labels[chat['agents']['1']]
         if responses:
             dialogue_response = responses[chat['uuid']]
             question_scores = defaultdict(list)
             for agent_id, scores in dialogue_response.iteritems():
                 for question in cls.questions:
                     question_scores[question].extend(scores[question])
             for question, scores in question_scores.iteritems():
                 row[question] = np.mean(scores)
         metadata['data'].append(row)
     write_json(metadata, os.path.join(outdir, 'metadata.json'))
Ejemplo n.º 4
0
 def log_examples_with_templates(self, examples, log):
     for example in examples:
         if Preprocessor.skip_example(example):
             continue
         for event in example.events:
             template_id = event.template
             if template_id is not None:
                 event.template = self.templates[template_id]
     write_json([ex.to_dict() for ex in examples], log)
Ejemplo n.º 5
0
def log_worker_id_to_json(db_path, batch_results):
    '''
    {chat_id: {'0': worker_id; '1': worker_id}}
    '''
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    code_to_wid = read_results_csv(batch_results)
    worker_ids = chat_to_worker_id(cursor, code_to_wid)
    output_dir = os.path.dirname(batch_results)
    write_json(worker_ids, output_dir + '/worker_ids.json')
Ejemplo n.º 6
0
 def train(self, examples, preprocessor, output_path, stem=False):
     d = self.get_collocate_data(examples, preprocessor, stem=stem)
     scores = defaultdict(dict)
     for role in ('seller', 'buyer'):
         for category in ('car', 'housing', 'phone', 'electronics',
                          'furniture', 'bike'):
             scores[role][category] = self.compute_scores(d,
                                                          role=role,
                                                          category=category)
     write_json(dict(scores), output_path)
     return scores
Ejemplo n.º 7
0
 def dump_reviewed_chat(cls, cursor, json_path):
     review_info = {}
     cursor.execute('SELECT chat_id, accept, message FROM review')
     for _chat_id, _accept, _message in cursor.fetchall():
         review_info[_chat_id] = {}
         review_info[_chat_id]['accept'] = _accept
         _outcome = DatabaseReader.get_chat_outcome(cursor,
                                                    _chat_id)['reward']
         review_info[_chat_id]['success'] = _outcome
         review_info[_chat_id]['message'] = _message
     write_json(review_info, json_path)
Ejemplo n.º 8
0
 def dump(self, output):
     print 'Dumping data to {}'.format(output)
     write_json(self.data, output)
Ejemplo n.º 9
0
            print("Loading model from checkpoint ...")
            encoder = torch.load(args.init_from + args.checkpoint +
                                 'encoder.pt')
            decoder = torch.load(args.init_from + args.checkpoint +
                                 'decoder.pt')
        else:
            print("Creating new model...")
            encoder = GRU_Encoder(args.word_embed_size, args.num_layers)
            decoder = Attn_Decoder(args.word_embed_size, args.attn_method,
                                   args.dropout)
    else:
        # Save config
        if not os.path.isdir(args.checkpoint):
            os.makedirs(args.checkpoint)
        config_path = os.path.join(args.checkpoint, 'config.json')
        write_json(vars(args), config_path)
        model_args = args
        ckpt = None

    # Load vocab
    vocab_path = 'data/persona_vocab.pkl'
    if not os.path.exists(vocab_path):
        print 'Vocab not found at', vocab_path
        vocab = None
        args.ignore_cache = True
    else:
        print 'Loading vocab from', vocab_path
        vocab = read_pickle(vocab_path)

    # schema = Schema(model_args.schema_path, None)
    # train_batches = DialogueBatcher(vocab, "train")
Ejemplo n.º 10
0
parser.add_argument('--schema-path')
parser.add_argument(
    '--scenario-ints-file',
    help=
    'Path to the file containing 6 integers per line that describes the scenario'
)
parser.add_argument('--output', help='Path to the output JSON scenario file')
args = parser.parse_args()

schema = Schema(args.schema_path)

scenarios = []
with open(args.scenario_ints_file) as fin:
    kbs = []
    names = ['book', 'hat', 'ball']
    for line in fin:
        ints = [int(x) for x in line.strip().split()]
        kb = KB.from_ints(schema.attributes, names, ints)
        kbs.append(kb)
        if len(kbs) == 2:
            if kbs[0].item_counts != kbs[1].item_counts:
                del kbs[0]
                continue
            assert kbs[0].item_counts == kbs[1].item_counts
            scenario = Scenario(generate_uuid("FB"), schema.attributes, kbs)
            scenarios.append(scenario)
            kbs = []

scenario_db = ScenarioDB(scenarios)
write_json(scenario_db.to_dict(), args.output)
Ejemplo n.º 11
0
        print 'KB CONTEXT:', result.get('kb_context', None)
        print 'CANDIDATES:'
        for c in result['candidates']:
            if 'response' in c:
                print '----------'
                print c['hits']
                print to_str(c['response'])
                #print c['context']
                #print c['pos']

    # Write a json file of all the candidates
    if args.test_examples_paths and args.retriever_output:
        dialogues = preprocessor.preprocess(dataset.test_examples)
        print 'Retrieving candidates for %s' % ','.join(
            args.test_examples_paths)
        start_time = time.time()
        results = []
        for dialogue in dialogues:
            result = retriever.retrieve_candidates(dialogue, json_dict=True)
            results.extend(result)
            if args.verbose:
                for r in result:
                    dump_result(r)
        print '[%d s]' % (time.time() - start_time)
        write_json(results, args.retriever_output)

    #prev_turns = ["I 'm a poor student . can you go lower ?".split()]
    #results = retriever.search('seller', 'furniture', '', prev_turns)
    #for r in results:
    #    print r
Ejemplo n.º 12
0
                    help='Transciprts paths',
                    nargs='*',
                    default=[])
parser.add_argument('--train-frac',
                    help='Fraction of training examples',
                    type=float,
                    default=0.6)
parser.add_argument('--test-frac',
                    help='Fraction of test examples',
                    type=float,
                    default=0.2)
parser.add_argument('--dev-frac',
                    help='Fraction of dev examples',
                    type=float,
                    default=0.2)
parser.add_argument('--output-path', help='Output path for splits')
args = parser.parse_args()

np.random.seed(0)
json_data = ([], [], [])
for path in args.example_paths:
    examples = read_json(path)
    folds = np.random.choice(
        3, len(examples), p=[args.train_frac, args.dev_frac, args.test_frac])
    for ex, fold in izip(examples, folds):
        json_data[fold].append(ex)

for fold, dataset in izip(('train', 'dev', 'test'), json_data):
    if len(dataset) > 0:
        write_json(dataset, '%s%s.json' % (args.output_path, fold))
Ejemplo n.º 13
0
import argparse
import os
from cocoa.core.util import read_json, write_json

parser = argparse.ArgumentParser()
parser.add_argument('--paths', nargs='+', help='Paths to transcripts directories')
parser.add_argument('--output', help='Output directory')
args = parser.parse_args()

all_chats = []
# survey data structure: [{}, {}]
all_surveys = [{}, {}]

for d in args.paths:
    transcript_file = os.path.join(d, 'transcripts/transcripts.json')
    survey_file = os.path.join(d, 'transcripts/surveys.json')

    chats = read_json(transcript_file)
    all_chats.extend(chats)

    surveys = read_json(survey_file)
    for i, s in enumerate(surveys):
        all_surveys[i].update(s)
    print "Combined data from {}".format(d)

output_dir = args.output + '/transcripts'
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)
write_json(all_chats, os.path.join(output_dir, 'transcripts.json'))
write_json(all_surveys, os.path.join(output_dir, 'surveys.json'))
Ejemplo n.º 14
0
    parser.add_argument(
        '--worker-ids',
        nargs='+',
        help='Path to json file containing chat_id to worker_id mappings')
    parser.add_argument('--hist',
                        default=False,
                        action='store_true',
                        help='Plot histgram of ratings')

    HTMLVisualizer.add_html_visualizer_arguments(parser)
    args = parser.parse_args()

    visualizer = Visualizer(args.dialogue_transcripts, args.survey_transcripts)
    visualizer.compute_effectiveness()

    if args.hist:
        visualizer.hist(question_scores, args.outdir, partner=args.partner)
    if args.worker_ids:
        visualizer.worker_stats()

    # TODO: move summary and hist to analyzer
    if args.summary:
        summary = visualizer.summarize()
        write_json(summary, args.stats)
    if args.html_output:
        visualizer.html_visualize(args.viewer_mode,
                                  args.html_output,
                                  css_file=args.css_file,
                                  img_path=args.img_path,
                                  worker_ids=visualizer.worker_ids)
Ejemplo n.º 15
0
 def dump_results(self, output):
     """Write eval_results to a JSON file.
     """
     print 'Writing {} evaluation results to {}'.format(
         len(self.eval_results), output)
     write_json(self.eval_results, output)
Ejemplo n.º 16
0
import argparse
from cocoa.core.util import read_json, write_json
from cocoa.core.scenario_db import ScenarioDB
from cocoa.core.schema import Schema
from core.scenario import Scenario

parser = argparse.ArgumentParser()
parser.add_argument('--chats')
parser.add_argument('--scenarios')
parser.add_argument('--max', type=int)
args = parser.parse_args()

chats = read_json(args.chats)
scenarios = []
n = args.max or len(chats)
for chat in chats[:n]:
    scenarios.append(Scenario.from_dict(None, chat['scenario']))
scenario_db = ScenarioDB(scenarios)
write_json(scenario_db.to_dict(), args.scenarios)
Ejemplo n.º 17
0
 def dump_db(self):
     write_json(self.db, self.db_path)
     print 'HIT results dumped to {}'.format(self.db_path)