Ejemplo n.º 1
0
 def getprofile(self, params):
     """Retrieve user profile and send it to the server."""
     log = logging.getLogger("GETPROFILE")
     # The only parameter we get is the user name
     username = params
     # Download search result pages
     log.info("BEGIN %s", params)
     try:
         profile, friends = get_user_encoded_profile(username)
         log.info("GOT PROFILE FOR USER %s", username)
         # Setup form and headers
         #    Although we used "upload" code, this is a plain POST
         upload_headers = dict(self.headers)
         form_data = {'username' : username,
                      'profile' : profile, 
                      'friends-list'  : FINDUSERS_SEPARATOR.join(friends),
                      'friends-list-count'  : str(len(friends)),
                      'client-id'    : self.id}
         # Upload the article
         log.info("UPLOADING TO SERVER %s", params)
         upload_url = self.base_url + '/getprofile/' + username
         response = upload_aux.upload_form(upload_url, form_data,
                                           upload_headers)
     except PageNotFound:
         response = self.report_not_found_user(username, log)
     log.info("END %s", params)
     # Ok. Command, handled. Now what?
     # Do what the server told us to.
     # Command MUST be SLEEP. We will sleep for at least self.MIN_SLEEP
     command = response.read()
     self._handleCommand(command, do_sleep=True)
Ejemplo n.º 2
0
 def findusers(self, params):
     """Retrieve pages from the users search and send it to the server."""
     # Retrieve the search gender (male, female) and search page
     gender, page_num = params[0], int(params[1:])
     assert gender in FINDUSERS_VALID_GENDERS
     # Download search result pages
     logging.info("FINDUSERS %s BEGIN", params)
     retriever = FindUsersRetriver()
     found_users = retriever.get_users_from_pages(gender, page_num)
     logging.info("FINDUSERS %s GOT FINDUSERS DATA", params)
     # Setup form and headers
     #    Although we used "upload" code, this is a plain POST
     upload_headers = dict(self.headers)
     form_data = {'page-id' : params,
                  'page-users'  : FINDUSERS_SEPARATOR.join(found_users),
                  'page-users-count'  : str(len(found_users)),
                  'client-id'    : self.id}
     # Upload the article
     upload_url = self.base_url + '/findusers/' + params
     response = upload_aux.upload_form(upload_url, form_data, upload_headers)
     logging.info("FINDUSERS %s END", params)
     # Ok. Command, handled. Now what?
     # Do what the server told us to.
     # Command MUST be SLEEP. We will sleep for at least self.MIN_SLEEP
     command = response.read()
     self._handleCommand(command, do_sleep=True)
Ejemplo n.º 3
0
 def run(self):
     """Thread execution code...."""
     # Setup form and headers
     #    Although we used "upload" code, this is a plain POST
     try:
         upload_headers = dict(self.headers)
         form_data = {'backtrace': self.backtrace}
         form_data.update(self.extra_params)
         # Upload the article
         response = upload_aux.upload_form(self.submission_url, form_data,
                 upload_headers)
     except:
         # Really, we cannot do anything here.  We cannot report backtraces
         # on a a backtrace reporter, for god's sake
         self.LOG.exception("error while submitting backtrace.")
         pass
Ejemplo n.º 4
0
    def getlibrary(self, params):
        """Retrive a user's music library.

        TODO: we should've encoded the command/job as "username#last_crawled_ts"
              but this will be left as pending work for the next crawling...

        Encoded params:
            username, as string
        """
        log = logging.getLogger("GETLIBRARY")
        # The only parameter we get is the user name
        username = params
        # XXX The listened_time_threshold was supposed to come encoded with
        # the username but we had to change too much stuff in the server-side
        # to make this happen. See above todo.
        listened_time_threshold = LibrarySnapshotsRetriever.DAY_ONE
        # Download search result pages
        log.info("BEGIN %s", params)
        try:
            result = retrieve_encoded_user_library_snapshot(username,
                listened_time_threshold)
            library, last_crawled_ts = result
            log.info("GOT LIBRARY FOR USER %s", username)
            # Setup form and headers
            #    Although we used "upload" code, this is a plain POST
            upload_headers = dict(self.headers)
            form_data = {'username' : username,
                         'library' : library, 
                         'last-crawled-ts' : str(int(last_crawled_ts)), 
                         'client-id'    : self.id}
            # Upload the article
            log.info("UPLOADING TO SERVER %s", params)
            upload_url = self.base_url + '/getlibrary/' + username
            response = upload_aux.upload_form(upload_url, form_data,
                                              upload_headers)
        finally:
            # XXX We are not handling errors -- 
            pass
        log.info("END %s", params)
        # Ok. Command, handled. Now what?
        # Do what the server told us to.
        # Command MUST be SLEEP. We will sleep for at least self.MIN_SLEEP
        command = response.read()
        self._handleCommand(command, do_sleep=True)