def make_new_members_request(self): log('Doing the request with cursor {}'.format(self.cursor.group(1))) payload = { 'gid': self.group_id, 'order': 'date', 'view': 'list', 'limit': self.basket_size, 'sectiontype': 'recently_joined', 'memberstabexp': 1, 'cursor': self.cursor.group(1), 'start': 15, 'dpr': 1, '__user': self.user_id, '__a': 1, '__dyn': self.dyn_rand(), '__req': 1, '__be': 1, '__pc': 'PHASED%3ADEFAULT', '__rev': self.next_req_params['__rev'], '__spin_r': self.next_req_params['__spin_r'], '__spin_b': 'trunk', '__spin_t': self.next_req_params['__spin_t'] } start_time = time.time() self.current_result = self.sess.get(self.members_url, params=payload) log('Doing new request:') exec_time = round(time.time() - start_time, 2) self.requests += 1 log('~~~~URL~~~~') log(self.current_result.url) log_html(self.current_result, 'members_last_req') log('Request number {} finished with status code {} in {} seconds'. format(self.requests, self.current_result.status_code, exec_time)) time.sleep(2)
def get_current_cursor(self): ''' If no match is found, cursor is set no None :return: ''' log('Retrieving cursor from response ..') self.cursor = re.search( r'sectiontype=recently_joined&memberstabexp=1&cursor=(.*?)&', self.current_result.text) if self.cursor is None: log('No cursor found in current response!') log_html(self.current_result, 'cursor_error') raise NoCursor('No cursor found in current response!') log('Current cursor is now {}'.format(self.cursor.group(1)))
def __init__(self, group_id, user_id, basket_size, sess): self.group_id = group_id self.user_id = user_id self.basket_size = basket_size self.sess = sess self.cursor = '' self.members = set() self.cache_file_path = os.path.join(self.cache_path, '{}.pkl'.format(self.group_id)) # set current result to members page self.current_result = self.sess.get( 'https://www.facebook.com/groups/{}/members/'.format(group_id)) log_html(self.current_result, 'group_members') log('Retrieving members from initial group page..') self.gather_members() self.get_current_cursor() self.next_req_params = self.get_next_req_params()
def login(self): log('Getting to login page...') login_url = "https://www.facebook.com/" self.current_result = self.sess.get(login_url) # we need to parse login form for constructing the post data page_soup = BeautifulSoup(self.current_result.text, 'html.parser') login_form = page_soup.find_all('form', id='login_form')[0] send_login_form_to = login_form["action"] post_data = {} for input_field in login_form.find_all('input'): if input_field.has_attr("name"): name = input_field["name"] value = input_field["value"] if input_field.has_attr( "value") else '' post_data[name] = value post_data['email'] = self.user['username'] post_data['pass'] = self.user['password'] log('Sending login request to {}'.format(send_login_form_to)) self.current_result = self.sess.post(send_login_form_to, post_data) log_html(self.current_result, 'login')
def get_about_section(self, section): if section not in self.profile_sections: self.profile_sections[section] = self.sess.get( self.secured_url, params={'section': section}) log_html(self.profile_sections[section], section) return self.profile_sections[section]