Exemple #1
0
for i, batch in enumerate(iterator_unlabeled):
    inputs = {}
    inputs['words'], inputs['length'] = batch.token
    inputs['pos'] = batch.pos
    inputs['ner'] = batch.ner
    inputs['subj_pst'] = batch.subj_pst
    inputs['obj_pst'] = batch.obj_pst
    inputs['masks'] = torch.eq(batch.token[0], opt_p['vocab_pad_id'])

    target = batch.relation

    if args.cuda:
        inputs = dict([(k, v.cuda()) for k, v in inputs.items()])

    predictor.eval()
    pred = predictor.predict(inputs)
    pred = pred.data.cpu().numpy().tolist()
    preds_p += pred

    selector.eval()
    pred = selector.predict(inputs)
    pred = pred.data.cpu().numpy().tolist()
    preds_s += pred


def arg_max(l):
    bvl, bid = -1, -1
    for k in range(len(l)):
        if l[k] > bvl:
            bvl = l[k]
            bid = k
Exemple #2
0
                    #print("pred_missing_tagspace", curr_pred_missing_tagspace)
                    if args.post:
                        curr_pred_missing_tagspace = set()
                    pred_missing_tagspace = sorted([tag2idx[untag] for untag in curr_pred_missing_tagspace]) 
                    pred_missing_tagspace = [pred_missing_tagspace] * len(filtered_gold_labels)

                    dataloader = build_dataloader(pred_tokens, filtered_gold_labels, 50, pred_missing_tagspace, 0, tag2idx, chr2idx, token2idx, train_args['caseless'], shuffle=False, drop_last=False)

                    # choose to eval the local prediction on the training brach
                    
                    # unzip batch and group by sents
                    merge_batch = True

                    #pred_tags = predictor.predict(ner_model, dataloader, crf_idx, merge_batch=merge_batch, totag=False)                
                    pred_tags = predictor.predict(ner_model, dataloader, crf_idx, args.pred_method, merge_batch=True, totag=True)
                    pred_tags = sorted(pred_tags, key=lambda item:item[0])
                    reorder_idx, pred_tags = zip(*pred_tags)
                    for i, re_idx in enumerate(sorted(reorder_idx)):
                        assert i == re_idx

                    if merge_batch:
                        assert len(pred_tokens) == len(pred_tags), "{} {}".format(len(pred_tokens), len(pred_tags))
                    else:
                        num_sample = sum(map(lambda t: len(t), dataloader)) 
                        assert num_sample == len(pred_tags), "{} {}".format(len(pred_tokens), len(pred_tags))
                    train_corpus = [train_args["train_file"][i].split("/")[-2] for i in crf2corpus[crf_idx]]
                    crf_training_corpus.append(train_corpus)

                    # store the prediction of current brach
                    local_pred_tags.append(pred_tags)
def planner(trajectory):
    map_size = 12
    step_resolution = 0.25
    step_size = 10
    obstacle_width = 10
    obstacle_thickness = 5
    agent_step = step_resolution * step_size
    plt.figure()
    currentAxis = plt.gca()
    plt.axis([-map_size, map_size, -map_size, map_size])
    plt.title("RRG Route Map for the Maze Problem")
    plt.xlabel('X')
    plt.ylabel('Y')
    Node = namedtuple('Node', ['x', 'y'])
    task_test = tasks[105]
    predictor = Predictor(task=task_test,
                          checkpoint_path='../data/checkpoints/checkpoint_' +
                          task_test.task_name + '.pt')
    _, obstacle_pos_prediction = predictor.predict(trajectory)

    def is_valid_move(start_node, dir_x, dir_y):
        global obstacle
        end_x = start_node.x
        end_y = start_node.y
        start_step = calculate_step(start_node)

        for s in range(step_size):
            if (end_x < -map_size) or (end_x > map_size) or (
                    end_y < -map_size) or (end_y > map_size):
                return False

            for obstacle in walls:
                if obstacle.interfere_node(end_x, end_y):
                    return False

            if obstacle_range.interfere_node(end_x, end_y):
                cur_step = start_step + s

                if cur_step >= 100:
                    return False

                obstacle = Area(obstacle_pos_prediction[0, cur_step, 0].item(),
                                0, obstacle_width, obstacle_thickness)

                if obstacle.interfere_node(end_x, end_y):
                    return False

            end_x += dir_x * step_resolution
            end_y += dir_y * step_resolution

        return True

    def bfs(end_node, plot=False):
        parents = {}
        finished = set()
        queue = deque()
        queue.append(origin)

        while queue:
            parent = queue.popleft()
            finished.add(parent)

            if plot:
                plt.plot([
                    parent.x,
                ], [
                    parent.y,
                ],
                         'o',
                         color='gray',
                         markersize=4)

            for child in route_dict[parent]:
                if child not in finished:
                    if plot:
                        plt.plot([parent.x, child.x], [parent.y, child.y],
                                 color='gray')

                    if child not in queue:
                        queue.append(child)
                        parents[child] = parent

        cur_node = end_node
        path = [end_node]

        while cur_node != origin:
            cur_node = parents[cur_node]
            path.append(cur_node)

        path.reverse()

        return path

    def calculate_step(end_node):
        cur_path = bfs(end_node=end_node, plot=False)

        return (len(cur_path) - 1) * step_size

    obstacle = Area(0, 0, obstacle_width, obstacle_thickness)
    wall0 = Area(-8, 7.5, 12, 5)
    wall1 = Area(8, -7.5, 12, 5)
    walls = [wall0, wall1]
    target = Area(-8, 11, 8, 2)
    obstacle_range = Area(0, 0, map_size * 2 + 4, 5)
    origin = Node(2, -11)
    route_dict = {origin: set()}

    done = False
    last_node = None

    while not done:
        pos_rand_x = random.uniform(-map_size, map_size)
        pos_rand_y = random.uniform(-map_size, map_size)

        min_distance = float('inf')
        n_nearest = None

        for node in route_dict.keys():
            distance = sqrt(
                abs(node.x - pos_rand_x) + abs(node.y - pos_rand_y))

            if min_distance > distance:
                min_distance = distance
                n_nearest = node

        if min_distance > agent_step:
            continue

        direction_x, direction_y = choose_direction(n_nearest.x, n_nearest.y,
                                                    pos_rand_x, pos_rand_y)

        if not is_valid_move(n_nearest, direction_x, direction_y):
            continue

        new_node_x = n_nearest.x + direction_x * agent_step
        new_node_y = n_nearest.y + direction_y * agent_step

        new_node = Node(new_node_x, new_node_y)

        if new_node in route_dict:
            continue

        route_dict[new_node] = set()
        route_dict[n_nearest].add(new_node)
        route_dict[new_node].add(n_nearest)

        for p in route_dict.keys():
            direction_x, direction_y = choose_direction(
                p.x, p.y, new_node.x, new_node.y)

            if abs(p.x - new_node.x) == agent_step and abs(p.y - new_node.y) == agent_step \
                    and is_valid_move(p, direction_x, direction_y):
                route_dict[p].add(new_node)

        if target.interfere_node(new_node_x, new_node_y):
            last_node = new_node
            done = True

    path = bfs(end_node=last_node, plot=True)

    for milestone in path:
        if obstacle_range.interfere_node(milestone.x, milestone.y):
            danger_step = calculate_step(milestone)

            if obstacle.interfere_node(milestone.x, milestone.y):
                return None

            obstacle = Area(obstacle_pos_prediction[0, danger_step, 0].item(),
                            0, obstacle_width, obstacle_thickness)
            currentAxis.add_patch(
                Rectangle((obstacle.min_x, obstacle.min_y),
                          obstacle.x_width,
                          obstacle.y_width,
                          fill=True,
                          facecolor="blue",
                          alpha=1))

    policy = []

    for i in range(len(path) - 1):
        if path[i + 1].x < path[i].x:
            policy_unit_x = -1
        elif path[i + 1].x > path[i].x:
            policy_unit_x = 1
        else:
            policy_unit_x = 0

        if path[i + 1].y < path[i].y:
            policy_unit_y = -1
        elif path[i + 1].y > path[i].y:
            policy_unit_y = 1
        else:
            policy_unit_y = 0

        policy.append([policy_unit_x, policy_unit_y])

    currentAxis.add_patch(
        Rectangle((wall0.min_x, wall0.min_y),
                  wall0.x_width,
                  wall0.y_width,
                  fill=True,
                  facecolor="green",
                  alpha=1))
    currentAxis.add_patch(
        Rectangle((wall1.min_x, wall1.min_y),
                  wall1.x_width,
                  wall1.y_width,
                  fill=True,
                  facecolor="green",
                  alpha=1))
    currentAxis.add_patch(
        Rectangle((obstacle_range.min_x, obstacle_range.min_y),
                  obstacle_range.x_width,
                  obstacle_range.y_width,
                  fill=True,
                  facecolor="blue",
                  alpha=0.2))
    currentAxis.add_patch(
        Rectangle((target.min_x, target.min_y),
                  target.x_width,
                  target.y_width,
                  fill=True,
                  facecolor="purple",
                  alpha=0.5))

    for i in range(1, len(path)):
        plt.plot([
            path[i].x,
        ], [
            path[i].y,
        ], ' o', color='orange')
        plt.plot([path[i - 1].x, path[i].x], [path[i - 1].y, path[i].y],
                 color='orange',
                 lw=3)

    plt.plot([
        origin.x,
    ], [
        origin.y,
    ], 'rs', markersize=10)

    plt.show()

    return policy
Exemple #4
0
class Bot:
    def __init__(self, db, model_name):
        group_token = os.environ['GROUP_TOKEN']
        group_id = int(os.environ['GROUP_ID'])
        service_token = os.environ['SERVICE_TOKEN']
        app_id = int(os.environ['APP_ID'])
        client_secret = os.environ['CLIENT_SECRET']

        self.visited = set()

        self.admin_pwd = os.environ['ADMIN_PWD']
        self.new_cats = sorted([
            'физика', 'математика', 'лингвистика', 'информатика', 'литература',
            'химия', 'география', "психология", "обществознание", "история",
            "музыка", "астрономия", "маркетинг", "биология", "спорт",
            "искусство", "бизнес"
        ])

        self.predictor = Predictor(model_name)
        self.db = db
        self.db_session = db.create_session()
        self.group_session = vk_api.VkApi(token=group_token,
                                          api_version='5.126')
        self.service_session = vk_api.VkApi(app_id=app_id,
                                            token=service_token,
                                            client_secret=client_secret)
        self.long_poll = VkBotLongPoll(self.group_session, group_id)
        self.group_api = self.group_session.get_api()
        self.service_api = self.service_session.get_api()

        # For dataset filtering
        self.latest_id = self.db_session.query(db.Groups.group_id).order_by(
            db.Groups.group_id.desc()).first()
        if self.latest_id is None:
            self.latest_id = 0
        else:
            self.latest_id = self.latest_id[0]

    def send_message(self,
                     user_id: int,
                     message: str,
                     keyboard: str = None) -> None:
        """
        sends a message to user using method messages.send
        (https://vk.com/dev/messages.send)

        :param user_id: recipient user ID
        :param message: message text
        :param keyboard: json describing keyboard attached with message
        :return: None
        """
        self.group_api.messages.send(user_id=user_id,
                                     random_id=get_random_id(),
                                     message=message,
                                     keyboard=keyboard)
        print(f'<-- message {message[:30]}{"..." if len(message) > 30 else ""}'
              f' to {user_id} has been sent')

    def get_posts(self,
                  owner_id: int,
                  count: int = 1) -> Union[List[dict], dict]:
        """
        gets posts from user's or group's wall using method wall.get
        (https://vk.com/dev/wall.get)

        :param owner_id: wall's owner ID
        :param count: count of posts
        :return: list of dictionaries of dictionary, describing post
        """
        posts = self.service_api.wall.get(owner_id=owner_id, count=count)
        print(f'group {owner_id} posts received')
        try:
            if len(posts['items']) > 1:
                return posts['items']
            else:
                return posts['items'][0]
        except IndexError:
            print(f'error: {owner_id} {posts}')

    def get_subscriptions(self, user_id: int, count=100) -> List[int]:
        """
        gets user's subscriptions using method users.getSubscriptions
        (https://vk.com/dev/users.getSubscriptions)

        :param user_id: user ID
        :param count: get random {count} groups
        :return: list of numbers defining user IDs
        """
        subscriptions = self.service_api.users.getSubscriptions(
            user_id=user_id, extended=1)
        print(f'received subscriptions from '
              f'{"user" if user_id > 0 else "group"} {abs(user_id)}')
        ids = [
            i['id'] for i in subscriptions['items']
            if not i['is_closed'] and 'type' in i and 'deactivated' not in i
        ]
        return ids if len(ids) <= count else sample(ids, count)

    def get_group_info(
        self, group_id: int
    ) -> Union[Dict[str, Union[str, int]], List[Dict[str, Union[str, int]]]]:
        """
        gets information about one or more groups using method groups.getById
        (https://vk.com/dev/groups.getById)

        :param group_id: group ID
        :return: list of dictionaries of dictionary, describing information
        about group
        """
        info = self.service_api.groups.getById(group_id=group_id)
        print(f'received info from {group_id}')
        if len(info) == 1:
            return info[0]
        else:
            return info

    def listen(self) -> None:
        """
        gets updates from server and handling them
        :return: None
        """
        for event in self.long_poll.listen():
            if event.type == VkBotEventType.MESSAGE_NEW:
                self.process_new_message(event)

    def process_new_message(self, event):
        from_id = event.object['message']['from_id']
        cmd = event.object['message']['text']
        print(f'--> {from_id} sent "{cmd}"')

        payload = json.loads(event.object['message'].get('payload', '{}'))

        if payload.get('button') == 'start_analysis':
            self.command_start_analysis(from_id)
        elif ('button' in payload
              and 'show_recommendation' in payload['button']):
            self.command_show_recommendation(from_id, payload)
        elif ''.join(filter(str.isalpha, cmd.lower())) == self.admin_pwd:
            self.command_admin(from_id)
        elif ('button' in payload and 'dataset_filter' in payload['button']):
            self.command_dataset_filter(from_id, payload)
        else:
            self.command_start(from_id)

    def command_start(self, from_id):

        keyboard = VkKeyboard(one_time=True)
        keyboard.add_button('Начать анализ',
                            color=VkKeyboardColor.POSITIVE,
                            payload=json.dumps({'button': 'start_analysis'}))
        msg = ('Здравствуйте, я - Виталя, бот-рекомендатор. Я помогу вам '
               'определить ваши интересы и подскажу, где найти ещё больше '
               'полезных групп ВКонтакте. Начнём анализ?')
        user = self.db_session.query(self.db.UserStatuses).filter(
            self.db.UserStatuses.user_id == from_id).first()
        if user and user.subjects:
            keyboard.add_button('Перейти к рекомендациям',
                                color=VkKeyboardColor.SECONDARY,
                                payload=json.dumps(
                                    {'button': 'show_recommendation_1'}))
            msg = ('С возвращением! Желаете провести анализ снова или '
                   'посмотреть, что я рекомендовал вам в прошлый раз?'
                   if from_id in self.visited else 'Нужно нажать на кнопку')
            self.visited.add(from_id)
        self.send_message(from_id, msg, keyboard.get_keyboard())
        user_status = self.db_session.query(self.db.UserStatuses).filter(
            self.db.UserStatuses.user_id == from_id).first()
        if user_status:
            user_status.status = 'started'
        else:
            self.db_session.add(
                self.db.UserStatuses(user_id=from_id, status='started'))
            print(f'=== user {from_id} added')
        self.db_session.commit()

    def command_start_analysis(self, from_id):
        texts = []

        try:
            group_ids = self.get_subscriptions(from_id)
        except vk_api.exceptions.ApiError:
            message = 'Ваш профиль закрыт, я не могу увидеть подписки'
            keyboard = VkKeyboard(one_time=True)
            keyboard.add_button('Теперь профиль открыт, начать анализ',
                                color=VkKeyboardColor.POSITIVE,
                                payload=json.dumps(
                                    {'button': 'start_analysis'}))
            self.send_message(from_id, message, keyboard.get_keyboard())
            return

        message = ('Анализ может занять несколько минут. Пожалуйста, '
                   'подождите.')
        self.send_message(from_id, message)
        for _id in group_ids:
            try:
                posts = map(
                    itemgetter('text'),
                    filter(lambda x: not x['marked_as_ads'],
                           self.get_posts(-_id, 10)))
                texts.append('\n'.join(posts))
            except TypeError:
                continue

        prediction = list(map(itemgetter(0),
                              self.predictor.predict(texts)[:3]))

        user_status = self.db_session.query(self.db.UserStatuses).filter(
            self.db.UserStatuses.user_id == from_id).first()
        user_status.subjects = '&'.join(prediction)
        user_status.status = 'show_page'
        user_status.page = 1
        self.db_session.commit()

        message = 'В ходе анализа было выявлено, что вас ' \
                  'интересуют следующие категории групп:\n'
        message += '\n'.join([
            f'{i}. {category.capitalize()}'
            for i, category in enumerate(prediction, 1)
        ])

        self.send_message(from_id, message)

        keyboard = VkKeyboard(one_time=True)
        keyboard.add_button('Начать анализ повторно',
                            color=VkKeyboardColor.SECONDARY,
                            payload=json.dumps({'button': 'start_analysis'}))

        group_ids = self.db_session.query(self.db.GroupsIds).filter(
            or_(self.db.GroupsIds.subject == prediction[0],
                self.db.GroupsIds.subject == prediction[1],
                self.db.GroupsIds.subject == prediction[2])).all()

        if len(group_ids) > 0:
            show_groups = group_ids[:10]
            message = 'Страница 1:\n'
            message += '\n'.join([
                f'{i + 1}. {show_groups[i].name} -- '
                f'https://vk.com/club{show_groups[i].group_id} '
                for i in range(len(show_groups))
            ])
            page_number = len(group_ids) // 10 + 1

            keyboard.add_line()
            keyboard.add_button(f'Страница {page_number}',
                                color=VkKeyboardColor.PRIMARY,
                                payload=json.dumps({
                                    'button':
                                    f'show_recommendation_{page_number}'
                                }))
            keyboard.add_button(f'Страница 2',
                                color=VkKeyboardColor.PRIMARY,
                                payload=json.dumps(
                                    {'button': f'show_recommendation_2'}))
        else:
            message = "Проанализировать ещё раз?"
        self.send_message(from_id, message, keyboard.get_keyboard())

    def command_show_recommendation(self, from_id, payload):
        page = int(payload['button'].split('_')[2])
        recommendation = self.db_session.query(self.db.UserStatuses).filter(
            self.db.UserStatuses.user_id == from_id).first()
        recommendation = recommendation.subjects.split('&')
        group_ids = self.db_session.query(self.db.GroupsIds).filter(
            or_(self.db.GroupsIds.subject == recommendation[0],
                self.db.GroupsIds.subject == recommendation[1],
                self.db.GroupsIds.subject == recommendation[2])).all()
        show_groups = group_ids[(page - 1) * 10:page * 10]
        message = f'Страница {page}:\n'
        message += '\n'.join([
            f'{i + 1}. {show_groups[i].name} -- '
            f'https://vk.com/club{show_groups[i].group_id}'
            for i in range(len(show_groups))
        ])
        keyboard = VkKeyboard(one_time=True)
        keyboard.add_button('Начать анализ повторно',
                            color=VkKeyboardColor.SECONDARY,
                            payload=json.dumps({'button': 'start_analysis'}))
        keyboard.add_line()
        page_number = page - 1 if page > 1 else len(group_ids) // 10 + 1
        keyboard.add_button(f'Страница {page_number}',
                            color=VkKeyboardColor.PRIMARY,
                            payload=json.dumps({
                                'button':
                                f'show_recommendation_{page_number}'
                            }))
        page_number = (page + 1) % (len(group_ids) // 10 + 1)
        page_number = page_number or len(group_ids) // 10 + 1
        keyboard.add_button(f'Страница {page_number}',
                            color=VkKeyboardColor.PRIMARY,
                            payload=json.dumps({
                                'button':
                                f'show_recommendation_{page_number}'
                            }))
        self.send_message(from_id, message, keyboard.get_keyboard())
        user_status = self.db_session.query(self.db.UserStatuses).filter(
            self.db.UserStatuses.user_id == from_id).first()
        user_status.status = 'show_page'
        user_status.page = page
        self.db_session.commit()

    def command_admin(self, from_id):
        print(f'*** {from_id} entered admin panel')

        keyboard = VkKeyboard(one_time=True)
        keyboard.add_button('Фильтровать датасет',
                            color=VkKeyboardColor.PRIMARY,
                            payload=json.dumps({'button': 'dataset_filter'}))
        keyboard.add_button('Выйти',
                            color=VkKeyboardColor.NEGATIVE,
                            payload=json.dumps({'command': 'start'}))
        msg = 'Вы вошли в панель администратора'
        self.send_message(from_id, msg, keyboard.get_keyboard())

        user_status = self.db_session.query(self.db.UserStatuses).filter(
            self.db.UserStatuses.user_id == from_id).first()
        if user_status:
            user_status.status = 'admin'
        else:
            self.db_session.add(
                self.db.UserStatuses(user_id=from_id, status='admin'))
        self.db_session.commit()

    def command_dataset_filter(self, from_id, payload):
        user_status = self.db_session.query(self.db.UserStatuses).filter(
            self.db.UserStatuses.user_id == from_id).first()
        if user_status.status == 'admin':
            if '#' in payload['button']:
                _, gr_id, cat = payload['button'].split('#')
                gr_id = int(gr_id)
                if gr_id > self.latest_id:
                    self.latest_id = gr_id
                    cat = self.new_cats[int(cat)] if cat != '-1' else 'other'
                    old_group = self.db_session.query(self.db.GroupsIds).get(
                        self.latest_id)
                    self.db_session.add(
                        self.db.Groups(group_id=self.latest_id,
                                       name=old_group.name,
                                       subject=cat,
                                       link=old_group.link))
                    msg = (f"{old_group.name} теперь относится к группе "
                           f"{cat.capitalize()}")
                else:
                    msg = f'Группа {gr_id} уже была добавлена'
                self.send_message(from_id, msg)

            group = self.db_session.query(self.db.GroupsIds).order_by(
                self.db.GroupsIds.group_id.asc()).filter(
                    self.db.GroupsIds.group_id > self.latest_id).first()

            keyboard = VkKeyboard(one_time=True)
            msg = ('К какой категории относится эта группа?\n'
                   f'https://vk.com/club{group.group_id}\n\n')
            for i, cat in enumerate(self.new_cats):
                keyboard.add_button(
                    cat.capitalize(),
                    color=VkKeyboardColor.SECONDARY,
                    payload=json.dumps({
                        'button':
                        f'dataset_filter#{group.group_id}#{self.new_cats.index(cat)}'
                    }))
                if (i + 1) % 3 == 0:
                    keyboard.add_line()
            if (i + 1) % 3 != 0:
                keyboard.add_line()
            keyboard.add_button('Ни к одной',
                                color=VkKeyboardColor.NEGATIVE,
                                payload=json.dumps({
                                    'button':
                                    f'dataset_filter#{group.group_id}#-1'
                                }))
            keyboard.add_button('Завершить',
                                color=VkKeyboardColor.NEGATIVE,
                                payload=json.dumps({'command': 'start'}))
            self.send_message(from_id, msg, keyboard.get_keyboard())
        else:
            keyboard = VkKeyboard(one_time=True)
            keyboard.add_button('Начать анализ',
                                color=VkKeyboardColor.POSITIVE,
                                payload=json.dumps(
                                    {'button': 'start_analysis'}))
            msg = 'Начнём анализ?'
            self.send_message(from_id, msg, keyboard.get_keyboard())
Exemple #5
0
n_test = get_test_sample_size(X_test.shape[0], k=TEST_BATCH_SIZE)
X_test = X_test[:n_test, :]
y_test = y_test[:n_test, :]

users_test = X_test[:, 0].reshape(-1,1)
items_test = X_test[:, 1].reshape(-1,1)
y_test = y_test.reshape(-1,1)


predictor = Predictor(model=model, batch_size=TEST_BATCH_SIZE, users=users_test, items=items_test, y=y_test,
                      use_cuda=args.cuda, n_items=stats["n_items"])



preds = predictor.predict().reshape(-1,1)


output = pd.DataFrame(np.concatenate((users_test, preds, y_test), axis=1),
                      columns = ['user_id', 'pred', 'y_true'])


if args.task == "choice":

    output, hit_ratio, ndcg = get_choice_eval_metrics(output, at_k=EVAL_K)

    print("hit ratio: {:.4f}".format(hit_ratio))
    print("ndcg: {:.4f}".format(ndcg))

else: