def getprofile(self, params): """Retrieve user profile and send it to the server.""" log = logging.getLogger("GETPROFILE") # The only parameter we get is the user name username = params # Download search result pages log.info("BEGIN %s", params) try: profile, friends = get_user_encoded_profile(username) log.info("GOT PROFILE FOR USER %s", username) # Setup form and headers # Although we used "upload" code, this is a plain POST upload_headers = dict(self.headers) form_data = {'username' : username, 'profile' : profile, 'friends-list' : FINDUSERS_SEPARATOR.join(friends), 'friends-list-count' : str(len(friends)), 'client-id' : self.id} # Upload the article log.info("UPLOADING TO SERVER %s", params) upload_url = self.base_url + '/getprofile/' + username response = upload_aux.upload_form(upload_url, form_data, upload_headers) except PageNotFound: response = self.report_not_found_user(username, log) log.info("END %s", params) # Ok. Command, handled. Now what? # Do what the server told us to. # Command MUST be SLEEP. We will sleep for at least self.MIN_SLEEP command = response.read() self._handleCommand(command, do_sleep=True)
def findusers(self, params): """Retrieve pages from the users search and send it to the server.""" # Retrieve the search gender (male, female) and search page gender, page_num = params[0], int(params[1:]) assert gender in FINDUSERS_VALID_GENDERS # Download search result pages logging.info("FINDUSERS %s BEGIN", params) retriever = FindUsersRetriver() found_users = retriever.get_users_from_pages(gender, page_num) logging.info("FINDUSERS %s GOT FINDUSERS DATA", params) # Setup form and headers # Although we used "upload" code, this is a plain POST upload_headers = dict(self.headers) form_data = {'page-id' : params, 'page-users' : FINDUSERS_SEPARATOR.join(found_users), 'page-users-count' : str(len(found_users)), 'client-id' : self.id} # Upload the article upload_url = self.base_url + '/findusers/' + params response = upload_aux.upload_form(upload_url, form_data, upload_headers) logging.info("FINDUSERS %s END", params) # Ok. Command, handled. Now what? # Do what the server told us to. # Command MUST be SLEEP. We will sleep for at least self.MIN_SLEEP command = response.read() self._handleCommand(command, do_sleep=True)
def run(self): """Thread execution code....""" # Setup form and headers # Although we used "upload" code, this is a plain POST try: upload_headers = dict(self.headers) form_data = {'backtrace': self.backtrace} form_data.update(self.extra_params) # Upload the article response = upload_aux.upload_form(self.submission_url, form_data, upload_headers) except: # Really, we cannot do anything here. We cannot report backtraces # on a a backtrace reporter, for god's sake self.LOG.exception("error while submitting backtrace.") pass
def getlibrary(self, params): """Retrive a user's music library. TODO: we should've encoded the command/job as "username#last_crawled_ts" but this will be left as pending work for the next crawling... Encoded params: username, as string """ log = logging.getLogger("GETLIBRARY") # The only parameter we get is the user name username = params # XXX The listened_time_threshold was supposed to come encoded with # the username but we had to change too much stuff in the server-side # to make this happen. See above todo. listened_time_threshold = LibrarySnapshotsRetriever.DAY_ONE # Download search result pages log.info("BEGIN %s", params) try: result = retrieve_encoded_user_library_snapshot(username, listened_time_threshold) library, last_crawled_ts = result log.info("GOT LIBRARY FOR USER %s", username) # Setup form and headers # Although we used "upload" code, this is a plain POST upload_headers = dict(self.headers) form_data = {'username' : username, 'library' : library, 'last-crawled-ts' : str(int(last_crawled_ts)), 'client-id' : self.id} # Upload the article log.info("UPLOADING TO SERVER %s", params) upload_url = self.base_url + '/getlibrary/' + username response = upload_aux.upload_form(upload_url, form_data, upload_headers) finally: # XXX We are not handling errors -- pass log.info("END %s", params) # Ok. Command, handled. Now what? # Do what the server told us to. # Command MUST be SLEEP. We will sleep for at least self.MIN_SLEEP command = response.read() self._handleCommand(command, do_sleep=True)