Ejemplo n.º 1
0
    def __init__(self, opt):
        self.second_resp = opt.get('second_response')
        self.examples_idx_stack_path = os.path.join(
            os.getcwd(), './{}_examples_stack{}.pkl'.format(
                'second_response' if self.second_resp else 'first_response',
                '_sandbox' if opt['is_sandbox'] else ''))
        self.OLD = OffensiveLanguageDetector()
        self.opt = opt
        build_pc(opt)
        build_ic(opt)
        df = 'personality_captions' if not self.second_resp else 'image_chat'
        data_path = os.path.join(self.opt['datapath'], '{}/{}.json')
        self.data = []
        for dt in ['train', 'val', 'test']:
            if self.second_resp and dt == 'val':
                dt = 'valid'
            with open(data_path.format(df, dt)) as f:
                self.data += json.load(f)

        if self.second_resp:
            self.data = [d for d in self.data if len(d['dialog']) > 1]

        if os.path.exists(self.examples_idx_stack_path):
            with open(self.examples_idx_stack_path, 'rb') as handle:
                self.idx_stack = pickle.load(handle)
        else:
            self.idx_stack = []
            self.add_idx_stack()
            self.save_idx_stack()
Ejemplo n.º 2
0
    def __init__(self, opt, agents=None, shared=None):
        # Add passed in agents directly.
        self.task_type = 'sandbox' if opt['is_sandbox'] else 'live'
        self.agents = agents
        self.acts = [None] * len(agents)
        self.episodeDone = False
        self.opt = opt
        self.data = []
        self.offensive_lang_detector = OffensiveLanguageDetector()
        self.rand_index = random.randint(0, self.opt["participants"] - 1)

        # read list of local images or links to S3 locations
        self.imgs = [
            "/projects2/ParlAI/data/yfcc_images/1e22a9cf867d718551386b427c3b6d18.jpg",
            "/projects2/ParlAI/data/yfcc_images/96472caea58db27769f1c282e2ac0.jpg",
            "/projects2/ParlAI/data/yfcc_images/f09d8fb76822158de129acb0fef463.jpg",
            "/projects2/ParlAI/data/yfcc_images/6e4ccc739ff44ed11da20ad9892317.jpg",
            "/projects2/ParlAI/data/yfcc_images/e7e1844aa9e67cddc6ffe8804d76e45b.jpg",
            "/projects2/ParlAI/data/yfcc_images/5547b3852afec328a491a696ace99a.jpg",
            "/projects2/ParlAI/data/yfcc_images/b326345ae2b2bd14ebf74aaa31e571a.jpg",
            "/projects2/ParlAI/data/yfcc_images/75a13ebe4be7ab5b3f68f692d7db081.jpg",
            "/projects2/ParlAI/data/yfcc_images/246eea26a3fc2d886be795790a7495.jpg",
            "/projects2/ParlAI/data/yfcc_images/010722aa6d2327deddb4ead5e089ea.jpg"
        ]

        # read list of links from a local file
        self.links = [
            "https://www.youtube.com/watch?v=7gUv0xcFqMk".replace(
                "watch?v=", "embed/"),
            "https://www.youtube.com/watch?v=6vYJyOGKCHE".replace(
                "watch?v=", "embed/"),
            "https://www.youtube.com/watch?v=3SJ0Rd7XU4Y".replace(
                "watch?v=", "embed/")
        ]
Ejemplo n.º 3
0
def detect(opt, printargs=None, print_parser=None):
    """Checks a task for offensive language.
    """
    if print_parser is not None:
        if print_parser is True and isinstance(opt, ParlaiParser):
            print_parser = opt
        elif print_parser is False:
            print_parser = None
    random.seed(42)

    # Create model and assign it to the specified task
    agent = create_agent(opt, requireModelExists=True)
    world = create_task(opt, agent)
    bad = OffensiveLanguageDetector()

    if print_parser:
        # Show arguments after loading model
        print_parser.opt = agent.opt
        print_parser.print_args()
    log_every_n_secs = opt.get('log_every_n_secs', -1)
    if log_every_n_secs <= 0:
        log_every_n_secs = float('inf')
    log_time = Timer()
    tot_time = 0

    # Show some example dialogs:
    cnt = 0
    while not world.epoch_done():
        world.parley()
        offensive = False
        for a in world.acts:
            if bad.contains_offensive_language(a.get('text', '')):
                offensive = True
            labels = a.get('labels', a.get('eval_labels', ''))
            for l in labels:
                if bad.contains_offensive_language(l):
                    offensive = True

        if offensive:
            if opt['display_examples']:
                print(world.display() + "\n~~")
            cnt += 1
        if log_time.time() > log_every_n_secs:
            tot_time += log_time.time()
            report = world.report()
            log = {'total': report['total']}
            log['done'] = report['total'] / world.num_examples()
            if log['done'] > 0:
                log['eta'] = int(tot_time / log['done'] - tot_time)
            z = '%.2f' % (100 * log['done'])
            log['done'] = str(z) + '%'
            log['offenses'] = cnt
            print(str(int(tot_time)) + "s elapsed: " + str(log))
            log_time.reset()
    if world.epoch_done():
        print("EPOCH DONE")
    print(
        str(cnt) + " offensive messages found out of " +
        str(world.num_examples()) + " messages.")
    return world.report()
Ejemplo n.º 4
0
def detect(opt, printargs=None, print_parser=None):
    """Checks a task for offensive language.
    """
    if print_parser is not None:
        if print_parser is True and isinstance(opt, ParlaiParser):
            print_parser = opt
        elif print_parser is False:
            print_parser = None
    random.seed(42)

    # Create model and assign it to the specified task
    agent = create_agent(opt, requireModelExists=True)
    world = create_task(opt, agent)
    bad = OffensiveLanguageDetector()

    if print_parser:
        # Show arguments after loading model
        print_parser.opt = agent.opt
        print_parser.print_args()
    log_every_n_secs = opt.get('log_every_n_secs', -1)
    if log_every_n_secs <= 0:
        log_every_n_secs = float('inf')
    log_time = TimeLogger()

    # Show some example dialogs:
    cnt = 0
    while not world.epoch_done():
        world.parley()
        words = []
        for a in world.acts:
            offensive = bad.contains_offensive_language(a.get('text', ''))
            if offensive:
                words.append(offensive)
            labels = a.get('labels', a.get('eval_labels', ''))
            for l in labels:
                offensive = bad.contains_offensive_language(l)
                if offensive:
                    words.append(offensive)
        if len(words) > 0 and opt['display_examples']:
            print(world.display())
            print("[Offensive words detected:]", ', '.join(words))
            print("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n")
        cnt += len(words)
        if log_time.time() > log_every_n_secs:
            report = world.report()
            log = {'offenses': cnt}
            text, log = log_time.log(report['exs'], world.num_examples(), log)
            print(text)

    if world.epoch_done():
        print("EPOCH DONE")
    print(
        str(cnt) + " offensive messages found out of " +
        str(world.num_examples()) + " messages.")
    return world.report()
Ejemplo n.º 5
0
    def __init__(
        self,
        opt,
        agents=None,
        shared=None,
        world_tag='NONE',
        ir_agent=None,
        task='',
        wiki_title_to_passage=None,
    ):
        self.turn_idx = 0
        self.min_turns = opt['min_turns']
        self.max_turns = opt['max_turns']
        self.num_turns = np.random.randint(self.min_turns, self.max_turns) + 1
        self.dialog = []
        self.wizard_eval = 0
        self.task_type = 'sandbox' if opt['is_sandbox'] else 'live'
        self.chat_done = False
        self.world_tag = world_tag
        self.max_resp_time = opt['max_resp_time']  # in secs
        self.num_passages_to_retrieve = opt['num_passages_retrieved']
        super().__init__(opt, agents, shared)
        self.agents = sorted(agents,
                             key=lambda x: x.id,
                             reverse=random.random() <= 0.5)
        #  Personas and retriever
        self.persona_generator = self.agents[0].persona_generator
        self.relevant_topics = []
        while not self.relevant_topics:
            self.persona_to_topics = {}
            self.persona_idx, persona_data = self.persona_generator.pop_persona(
            )
            for p in persona_data:
                if p[0] == ' ':
                    p = p[1:]
                if p not in self.persona_to_topics:
                    self.persona_to_topics[p] = []
                    topics = set(self.persona_generator.get_topics(p))
                    for t in topics:
                        self.relevant_topics.append(t + ' ({})'.format(p))
                        self.persona_to_topics[p].append(t)

        self.ir_agent = ir_agent
        self.setup_tokenizer(opt)
        self.chosen_topic = ''
        self.chosen_topic_passage = {}
        self.OLD = OffensiveLanguageDetector()
        # Load the title to passage dictionary
        self.wiki_title_to_passage = wiki_title_to_passage
Ejemplo n.º 6
0
 def __init__(self, opt, agents=None, shared=None, world_tag='NONE'):
     self.turn_idx = 0
     self.task_type = 'sandbox' if opt['is_sandbox'] else 'live'
     self.chat_done = False
     self.world_tag = world_tag
     self.max_resp_time = opt['max_resp_time']  # in secs
     super().__init__(opt, agents, shared)
     self.agents = agents
     self.offensive_lang_detector = OffensiveLanguageDetector()
     self.agent = agents[0]
     self.data = []
     self.exact_match = False
     self.num_images = opt['num_images']
     self.multiple_personality = opt.get('multiple_personality', False)
     self.eval = 0
     self.data_type = opt['task_type']
     self.task_type_title = TASK_TYPE_TO_TITLE[opt['task_type']]
     self.config = TASK_TYPE_TO_CONFIG[opt['task_type']]
Ejemplo n.º 7
0
 def __init__(self, opt, agents=None, shared=None, world_tag='NONE'):
     self.turn_idx = 0
     self.task_type = 'sandbox' if opt['is_sandbox'] else 'live'
     self.chat_done = False
     self.world_tag = world_tag
     self.max_resp_time = opt['max_resp_time']  # in secs
     super().__init__(opt, agents, shared)
     self.agents = agents
     self.agent = agents[0]
     self.offensive_lang_detector = OffensiveLanguageDetector()
     self.data = []
     self.exact_match = False
     self.num_images = opt['num_images']
     self.second_resp = opt.get('second_response', False)
     self.config = config_first if not self.second_resp else config_second
     if opt.get('yfcc_path'):
         self.image_path = opt['yfcc_path']
     else:
         self.image_path = os.path.join(opt['datapath'], 'yfcc_images')