def _scrape(group_repository: GroupRepository, vk_api: vk.API, urls: List):
    for group_url in urls:
        try:
            (group_data, attribute_data) = _get_group_info(vk_api, group_url, _get_user_count_attribute)
            group_repository.save(group_data, attribute_data)
        except:
            print('Error occurred while processing for group url {0}'.format(group_url))
Exemple #2
0
    def test__should_save_group_data_when_not_exists(self):
        group_data: GroupRepository.GroupData = GroupRepository.GroupData(123, 'url', 'blabla')
        user_count_data: GroupRepository.AttributeData = GroupRepository.AttributeData('user_count', 199)

        repo: GroupRepository = GroupRepository(self.session)
        repo.save(group_data, user_count_data)

        group_obj: Group = self.session.query(Group).filter(Group.vk_id == group_data.group_id).first()
        self._checkGroupValidity(group_data, user_count_data, group_obj)
def scrape(service_token, file_name='input.csv', db_name='vk_analytics.sqlite'):
    print('Scraping vk groups attributes on {0}'.format(datetime.datetime.now()))
    urls = _parse_urls(file_name)
    vk_api = _vk_auth(service_token)
    db_session = schema.init_db_session(db_name)
    group_repository = GroupRepository(db_session)

    _scrape(group_repository, vk_api, urls)
    db_session.close()
Exemple #4
0
 def __init__(self):
     self.user_repository = UserRepository.get_instance()
     self.group_repository = GroupRepository.get_instance()
     pass
def _get_group_info(_vk_api: vk.API, group_url, attribute_function) -> \
        Tuple[GroupRepository.GroupData, GroupRepository.AttributeData]:
    group_name = group_url.split('/')[1]
    group_id = _vk_api.groups.getById(group_id=group_name, v=5.122)[0]['id']
    return (GroupRepository.GroupData(group_id, group_url, group_name),
            attribute_function(_vk_api, group_id))
def _get_user_count_attribute(_vk_api: vk.API, group_id) -> GroupRepository.AttributeData:
    res = _vk_api.groups.getMembers(group_id=group_id, v=5.122)
    return GroupRepository.AttributeData('user_count', res["count"])