コード例 #1
0
ファイル: discourse.py プロジェクト: albertinisg/Sibyl
    def process_users(self, users_ids):
        if users_ids is None: return

        for user_id in users_ids:
            if user_id in self.users_blacklist: continue
            user = self.session.query(People).filter(People.username == user_id).first()
            if user is not None: continue

            url = self.url + "/users/" + user_id + ".json"
            logging.info("Getting user " + user_id)
            logging.info(url)
            stream = requests.get(url, verify=False)
            try:
                parser = JSONParser(unicode(stream.text))
                parser.parse()
            except:
                logging.error("Can't get " + user_id + " data")
                self.users_blacklist.append(user_id)
                # print unicode(stream.text)
                continue

            user = parser.data['user']

            dbuser = People()
            dbuser.username = user['username']
            dbuser.reputation = user['trust_level']
            dbuser.avatar = user['uploaded_avatar_id']
            dbuser.last_seen_at = user['last_posted_at']
            dbuser.joined_at = user['created_at']
            dbuser.identifier = user['id']
            self.session.add(dbuser)
            self.total_users += 1
        self.session.commit()

        return
コード例 #2
0
    def process_users(self, users_ids):
        if users_ids is None: return
        if len(users_ids.split(";")) > self.pagesize:
            logging.error("Max ids overcome in process_users " + users_ids)
            raise Exception
        base_url = self.url + '/2.2/users/' + str(users_ids) + '?'
        base_url += 'order=desc&sort=reputation&site=stackoverflow&key=' + self.api_key
        base_url += '&' + 'pagesize=' + str(self.pagesize)
        has_more = True
        page = 1

        while has_more:
            url = base_url + "&page=" + str(page)
            if not self.debug:
                data = self._get_api_data(url)
            else:
                data = StackSampleData.users
                has_more = False
            parser = JSONParser(unicode(data))
            parser.parse()
            # [u'has_more', u'items', u'quota_max', u'quota_remaining']
            if 'has_more' not in parser.data:
                logging.error("No has_more in JSON response")
                print parser.data
                raise
            has_more = parser.data['has_more']
            data = parser.data['items']

            for user in data:
                dbuser = People()
                dbuser.username = user['display_name']
                dbuser.reputation = user['reputation']
                if 'profile_image' in user:
                    dbuser.avatar = user['profile_image']
                dbuser.last_seen_at = datetime.datetime.fromtimestamp(
                    int(user['last_access_date'])).strftime(
                        '%Y-%m-%d %H:%M:%S')
                dbuser.joined_at = datetime.datetime.fromtimestamp(
                    int(user['creation_date'])).strftime('%Y-%m-%d %H:%M:%S')
                dbuser.identifier = user['user_id']
                self.session.add(dbuser)
            self.session.commit()

        return
コード例 #3
0
ファイル: discourse.py プロジェクト: albertinisg/Sibyl
    def process_users(self, users_ids):
        if users_ids is None: return

        for user_id in users_ids:
            if user_id in self.users_blacklist: continue
            user = self.session.query(People).filter(
                People.username == user_id).first()
            if user is not None: continue

            url = self.url + "/users/" + user_id + ".json"
            logging.info("Getting user " + user_id)
            logging.info(url)
            stream = requests.get(url, verify=False)
            try:
                parser = JSONParser(unicode(stream.text))
                parser.parse()
            except:
                logging.error("Can't get " + user_id + " data")
                self.users_blacklist.append(user_id)
                # print unicode(stream.text)
                continue

            user = parser.data['user']

            dbuser = People()
            dbuser.username = user['username']
            dbuser.reputation = user['trust_level']
            dbuser.avatar = user['uploaded_avatar_id']
            dbuser.last_seen_at = user['last_posted_at']
            dbuser.joined_at = user['created_at']
            dbuser.identifier = user['id']
            self.session.add(dbuser)
            self.total_users += 1
        self.session.commit()

        return
コード例 #4
0
    def process_users(self, users_ids):
        if users_ids is None: return
        if len(users_ids.split(";"))>self.pagesize:
            logging.error("Max ids overcome in process_users " + users_ids)
            raise Exception
        base_url = self.url + '/2.2/users/'+str(users_ids)+'?'
        base_url += 'order=desc&sort=reputation&site=stackoverflow&key='+self.api_key
        base_url += '&' + 'pagesize='+str(self.pagesize)
        has_more = True
        page = 1

        while has_more:
            url = base_url + "&page="+str(page)
            if not self.debug:
                data = self._get_api_data(url)
            else:
                data = StackSampleData.users
                has_more = False
            parser = JSONParser(unicode(data))
            parser.parse()
            # [u'has_more', u'items', u'quota_max', u'quota_remaining']
            if 'has_more' not in parser.data:
                logging.error("No has_more in JSON response")
                print parser.data
                raise
            has_more = parser.data['has_more']
            data = parser.data['items']

            for user in data:
                dbuser = People()
                dbuser.username = user['display_name']
                dbuser.reputation = user['reputation']
                if 'profile_image' in user:
                    dbuser.avatar = user['profile_image']
                dbuser.last_seen_at = datetime.datetime.fromtimestamp(int(user['last_access_date'])).strftime('%Y-%m-%d %H:%M:%S')
                dbuser.joined_at = datetime.datetime.fromtimestamp(int(user['creation_date'])).strftime('%Y-%m-%d %H:%M:%S')
                dbuser.identifier = user['user_id']
                self.session.add(dbuser)
            self.session.commit()

        return