Exemple #1
0
def store(database, docs):
    """
    Store documents in a given database; store the documents in chunks to increase performance.
    :param database: the database to store in
    :param docs: the documents to store
    """

    def chunker(document_list, chunk_size):
        """
        Split a given document list in chunks of a given size.
        :param document_list: the list of documents
        :param chunk_size: the chunk size
        :return: a list of chunks of the document list, each with at most chunk_size elements
        """
        return (document_list[pos:pos + chunk_size] for pos in xrange(0, len(document_list), chunk_size))

    chunksize = 1000
    done = 0

    # instead of storing each document individually, store whole chunks to limit the number of HTTP calls to CouchDB
    for chunk in chunker(docs, chunksize):
        # store the chunk
        database.update(chunk)

        # print statistics
        done += len(chunk)
        s = "added %d of %d entries to database '%s'." % (len(docs) if len(docs) < done else done, len(docs), database.name)
        logger.debug(s)
Exemple #2
0
def store(database, docs):
    """
    Store documents in a given database; store the documents in chunks to increase performance.
    :param database: the database to store in
    :param docs: the documents to store
    """

    def chunker(document_list, chunk_size):
        """
        Split a given document list in chunks of a given size.
        :param document_list: the list of documents
        :param chunk_size: the chunk size
        :return: a list of chunks of the document list, each with at most chunk_size elements
        """
        return (document_list[pos : pos + chunk_size] for pos in xrange(0, len(document_list), chunk_size))

    chunksize = 1000
    done = 0

    # instead of storing each document individually, store whole chunks to limit the number of HTTP calls to CouchDB
    for chunk in chunker(docs, chunksize):
        # store the chunk
        database.update(chunk)

        # print statistics
        done += len(chunk)
        s = "added %d of %d entries to database '%s'." % (
            len(docs) if len(docs) < done else done,
            len(docs),
            database.name,
        )
        logger.debug(s)
Exemple #3
0
    def get_achievements(self):
        games_360 = self.api_call(
            "/v2/{xuid}/xbox360games".format(xuid=self.user_id))
        games_one = self.api_call(
            "/v2/{xuid}/xboxonegames".format(xuid=self.user_id))
        games = games_360['titles'] + games_one['titles']

        logger.debug("found %d games" % len(games))

        for game in games:
            logger.debug("processing achievements of %s" % game['name'])
            title_id = game['titleId']
            achievements = self.api_call(
                '/v2/{xuid}/achievements/{titleId}'.format(xuid=self.user_id,
                                                           titleId=title_id))

            for achievement in achievements:
                if 'unlockedOnline' in achievement:
                    achieved = achievement['unlockedOnline']
                    date = datetime.strptime(achievement['timeUnlocked'],
                                             "%Y-%m-%d %H:%M:%S")
                else:
                    achieved = (achievement['progressState'] == 'Achieved')
                    date = iso8601.parse_date(
                        achievement['progression']['timeUnlocked'])

                if achieved:
                    achievement['parsed_date'] = date.isoformat()
                    achievement['game'] = game
                    achievement['_id'] = "%s-%d" % (title_id,
                                                    achievement['id'])

                    yield achievement
Exemple #4
0
    def get_last_id(self):
        for x in self.database.view('_design/diarybot/_view/lastid'):
            logger.debug("most recent data found is from %s - incremental import" % datetime.fromtimestamp(x['value']).isoformat())
            return x['value']

        logger.debug("no previous data found - full import")
        return -1
Exemple #5
0
def create_database(module_name, design_document):
    """
    Create a database for a module and store a design document.
    :param module_name: the name of the module for which a database shall be created
    :param design_document: a design document passed from the creating module
    """

    # get the full database name from the module name and the database prefix
    full_database_name = config.get('couchdb', 'database-prefix') + module_name

    # configure the call to CouchDB
    couch = couchdb.Server(config.get('couchdb', 'database-url'))
    couch.resource.credentials = (config.get('couchdb', 'database-user'), config.get('couchdb', 'database-password'))

    # create or select the databse
    if full_database_name in couch:
        db = couch[full_database_name]
        logger.debug('found database %s' % full_database_name)
    else:
        db = couch.create(full_database_name)
        logger.debug('created database %s' % full_database_name)

    # replace the passed design document
    if '_design/diarybot' in db:
        del db['_design/diarybot']
    db['_design/diarybot'] = json.loads(design_document)
Exemple #6
0
    def get_achievements(self):
        games_360 = self.api_call("/v2/{xuid}/xbox360games".format(xuid=self.user_id))
        games_one = self.api_call("/v2/{xuid}/xboxonegames".format(xuid=self.user_id))
        games = games_360['titles'] + games_one['titles']

        logger.debug("found %d games" % len(games))

        for game in games:
            logger.debug("processing achievements of %s" % game['name'])
            title_id = game['titleId']
            achievements = self.api_call('/v2/{xuid}/achievements/{titleId}'.format(xuid=self.user_id, titleId=title_id))

            for achievement in achievements:
                if 'unlockedOnline' in achievement:
                    achieved = achievement['unlockedOnline']
                    date = datetime.strptime(achievement['timeUnlocked'], "%Y-%m-%d %H:%M:%S")
                else:
                    achieved = (achievement['progressState'] == 'Achieved')
                    date = iso8601.parse_date(achievement['progression']['timeUnlocked'])

                if achieved:
                    achievement['parsed_date'] = date.isoformat()
                    achievement['game'] = game
                    achievement['_id'] = "%s-%d" % (title_id, achievement['id'])

                    yield achievement
Exemple #7
0
def create_database(module_name, design_document):
    """
    Create a database for a module and store a design document.
    :param module_name: the name of the module for which a database shall be created
    :param design_document: a design document passed from the creating module
    """

    # get the full database name from the module name and the database prefix
    full_database_name = config.get("couchdb", "database-prefix") + module_name

    # configure the call to CouchDB
    couch = couchdb.Server(config.get("couchdb", "database-url"))
    couch.resource.credentials = (config.get("couchdb", "database-user"), config.get("couchdb", "database-password"))

    # create or select the databse
    if full_database_name in couch:
        db = couch[full_database_name]
        logger.debug("found database %s" % full_database_name)
    else:
        db = couch.create(full_database_name)
        logger.debug("created database %s" % full_database_name)

    # replace the passed design document
    if "_design/diarybot" in db:
        del db["_design/diarybot"]
    db["_design/diarybot"] = json.loads(design_document)
Exemple #8
0
    def get_docs(last_id):
        logger.debug("reading new entries from Skype's local database")

        db_filename = expanduser(
            "~") + '/Library/Application Support/Skype/' + config.get(
                'skype', 'skype_username') + '/main.db'
        conn = sqlite3.connect(db_filename)
        conn.row_factory = sqlite3.Row
        c = conn.cursor()

        sql_statement = '''
SELECT   author, from_dispname, timestamp, body_xml, chatname
FROM     Messages
WHERE    timestamp > {timestamp}
ORDER BY id
'''

        c.execute(sql_statement.format(timestamp=last_id))

        for entry in c.fetchall():
            yield {
                'date': datetime.fromtimestamp(entry['timestamp']).isoformat(),
                'text': entry['body_xml'],
                'author': entry['author'],
                'display_name': entry['from_dispname'],
                'chatname': entry['chatname']
            }
Exemple #9
0
    def get_docs(last_id):
        logger.debug("reading new entries from Skype's local database")

        db_filename = expanduser("~") + '/Library/Application Support/Skype/' + config.get('skype', 'skype_username') + '/main.db'
        conn = sqlite3.connect(db_filename)
        conn.row_factory = sqlite3.Row
        c = conn.cursor()

        sql_statement = '''
SELECT   author, from_dispname, timestamp, body_xml, chatname
FROM     Messages
WHERE    timestamp > {timestamp}
ORDER BY id
'''

        c.execute(sql_statement.format(timestamp=last_id))

        for entry in c.fetchall():
            yield {
                'date': datetime.fromtimestamp(entry['timestamp']).isoformat(),
                'text': entry['body_xml'],
                'author': entry['author'],
                'display_name': entry['from_dispname'],
                'chatname': entry['chatname']
            }
Exemple #10
0
    def regular_import(self):
        """
        Import new images and moves
        Check first if all old media files were imported. This can happen when the import will be interrupted.
        Then check if newer media files exists and import them
        :return:
        """

        latest_local_image = self._get_latest_media()
        first_local_image = self._get_first_media()

        # check if we have imported all images previews the oldes we have
        url = self.base_url + "/users/self/media/recent/?access_token=%s&max_id=%s" % (
            config.get('instagram', 'access_token'), first_local_image['id']
        )
        res = self._api_call(url)

        if len(res['data']) > 0:
            self._run_fetch({'min_id': first_local_image['id']})

        # now import all newer images thince the newest we have
        logger.debug("latest local stored image is from %s" % datetime.datetime.fromtimestamp(
            int(latest_local_image['created_time'])).isoformat())
        self._run_fetch({'min_id': latest_local_image['id'], 'max_timestamp': latest_local_image['created_time']})

        return True
Exemple #11
0
    def __init__(self):
        try:
            Module.__init__(self, "moves")
        except:
            return

        self.docs = [x for x in self.get_data()]
        logger.debug("read %d new entries from the Moves API" % len(self.docs))
        store(self.database, self.docs)
Exemple #12
0
    def get_last_id(self):
        for x in self.database.view('_design/diarybot/_view/lastid'):
            logger.debug(
                "most recent data found is from %s - incremental import" %
                datetime.fromtimestamp(x['value']).isoformat())
            return x['value']

        logger.debug("no previous data found - full import")
        return -1
Exemple #13
0
    def __init__(self):
        try:
            Module.__init__(self, "moves")
        except:
            return

        self.docs = [x for x in self.get_data()]
        logger.debug("read %d new entries from the Moves API" % len(self.docs))
        store(self.database, self.docs)
Exemple #14
0
    def get_last_id(self):
        for x in self.database.view('_design/diarybot/_view/lastid'):
            logger.debug("most recent data found is from %s - incremental import" % datetime.datetime.fromtimestamp(
                x['value']).isoformat())
            # add a day, because we can only get the data from up to yesterday
            return datetime.date.fromtimestamp(x['value']) + datetime.timedelta(days=1)

        logger.debug("no previous data found - full import")
        return datetime.datetime.strptime(self.get_first_id(), '%Y%m%d').date()
Exemple #15
0
    def __init__(self):
        try:
            Module.__init__(self, "imessage")
        except:
            return

        last_id = self.get_last_id()
        self.docs = [x for x in self.get_docs(last_id)]
        logger.debug("read %d new entries from iMessage's local database" % len(self.docs))
        store(self.database, self.docs)
Exemple #16
0
    def __init__(self):
        try:
            Module.__init__(self, "xbox")
        except:
            return

        self.user_id = self.get_user_id()
        self.docs = [x for x in self.get_achievements()]
        logger.debug("read %d achievements from Xbox live" % len(self.docs))
        store(self.database, self.docs)
Exemple #17
0
    def __init__(self):
        try:
            Module.__init__(self, "xbox")
        except:
            return

        self.user_id = self.get_user_id()
        self.docs = [x for x in self.get_achievements()]
        logger.debug("read %d achievements from Xbox live" % len(self.docs))
        store(self.database, self.docs)
Exemple #18
0
    def get_last_stored_tweet(self):
        """
        :return: id of last stored tweet or None if no tweet was stored so far
        """
        for x in self.database.view('_design/diarybot/_view/lastid'):
            logger.debug("most recent tweet id is %d" % x['value'])
            return x['value']

        logger.debug("no previous stored tweets found - full import")
        return None
Exemple #19
0
    def __init__(self, module_name):
        logger.debug("opening module %s" % module_name)

        self.name = module_name

        # skip this module unless it is enabled in the "diarybot.cfg"
        if not config.getboolean(module_name, 'enable'):
            logger.debug("module %s not enabled" % module_name)
            raise NotImplementedError

        self.database = get_database(module_name)
Exemple #20
0
    def get_last_id(self):
        for x in self.database.view('_design/diarybot/_view/lastid'):
            logger.debug(
                "most recent data found is from %s - incremental import" %
                datetime.datetime.fromtimestamp(x['value']).isoformat())
            # add a day, because we can only get the data from up to yesterday
            return datetime.date.fromtimestamp(
                x['value']) + datetime.timedelta(days=1)

        logger.debug("no previous data found - full import")
        return datetime.datetime.strptime(self.get_first_id(), '%Y%m%d').date()
Exemple #21
0
    def __init__(self, module_name):
        logger.debug("opening module %s" % module_name)

        self.name = module_name

        # skip this module unless it is enabled in the "diarybot.cfg"
        if not config.getboolean(module_name, 'enable'):
            logger.debug("module %s not enabled" % module_name)
            raise NotImplementedError

        self.database = get_database(module_name)
Exemple #22
0
    def get_first_id(self):
        """
        Query the Moves API for the first date for which data is stored for the user
        :return: a date as string formatted "%Y%m%d"
        """
        logger.debug("getting user's first date")

        endpoint = '/user/profile'
        data = {'access_token': config.get('moves', 'access_token')}

        res = self.api_call(endpoint, data)
        return res['profile']['firstDate']
Exemple #23
0
    def __init__(self):
        try:
            Module.__init__(self, "skype")
        except:
            return

        last_id = self.get_last_id()

        self.docs = [x for x in self.get_docs(last_id)]
        logger.debug("read %d new entries from Skype's local database" %
                     len(self.docs))
        store(self.database, self.docs)
Exemple #24
0
    def __init__(self):
        try:
            Module.__init__(self, "twitter")
        except:
            return

        self.screen_name = None
        self.api = self.get_api()

        self.docs = self.get_tweets()
        logger.debug("read the most recent %d tweets" % len(self.docs))
        store(self.database, self.docs)
Exemple #25
0
    def initial_import(self):
        """
        Import all images and movies at once if no import before were executed
        :return: Boolean
        """
        if not self._get_latest_media():
            logger.debug("no local stored data found - inital import needed")
            logger.debug("%s tracks have to be imported" % self.meta_local_image_count)

            self._run_fetch()
            return True

        return False
Exemple #26
0
    def get_first_id(self):
        """
        Query the Moves API for the first date for which data is stored for the user
        :return: a date as string formatted "%Y%m%d"
        """
        logger.debug("getting user's first date")

        endpoint = '/user/profile'
        data = {
            'access_token': config.get('moves', 'access_token')
        }

        res = self.api_call(endpoint, data)
        return res['profile']['firstDate']
Exemple #27
0
    def _api_call(self, url):
        """
        handles the API calls and errors
        :param url: full url to API endpoint
        :return: json-data
        """

        r = requests.get(url)
        data = r.json()

        # do it in a better way (chicking rate-limit etc)
        if r.status_code == 429:
            logger.debug("hour rate limit reached - wait 60 minutes")
            exit(1)

        return data
Exemple #28
0
        def get_storyline(self, date):
            logger.debug("querying story line for %s..." % date)

            endpoint = '/user/storyline/daily/%s' % date
            data = {
                'access_token': config.get('moves', 'access_token'),
                'trackPoints': 'true'
            }

            res = self.api_call(endpoint, data)

            # the result is a list - get the first (and only) element and add the date as id
            doc = res[0]
            doc['_id'] = date.isoformat()

            return doc
Exemple #29
0
    def api_call(endpoint, data):
        base_url = 'https://api.moves-app.com/api/1.1'

        r = requests.get(base_url + endpoint, params=data)

        if r.status_code == 429:
            logger.debug("rate limit exceeded, please try again in %s minutes",
                         r.headers['x-ratelimit-minuteremaining'])

        # check rate limit
        ratelimit = int(r.headers['x-ratelimit-minuteremaining'])
        if ratelimit <= 2:
            logger.debug("minute rate limit almost reached - wait for a minute")
            time.sleep(60)

        return r.json()
Exemple #30
0
        def get_storyline(self, date):
            logger.debug("querying story line for %s..." % date)

            endpoint = '/user/storyline/daily/%s' % date
            data = {
                'access_token': config.get('moves', 'access_token'),
                'trackPoints': 'true'
            }

            res = self.api_call(endpoint, data)

            # the result is a list - get the first (and only) element and add the date as id
            doc = res[0]
            doc['_id'] = date.isoformat()

            return doc
Exemple #31
0
    def initial_import(self):
        """
        Import all checkins at once if no import before were executed
        :return: Boolean
        """

        if not self._get_latest_checkin():
            logger.debug("no local stored data found - inital import needed")

            # import all checkins because we imported nothing before
            checkins = [checkin for checkin in self.client.users.all_checkins()]

            store(self.database, checkins)
            return True

        return False
Exemple #32
0
def get_database(module_name):
    """
    Get a module's database from CouchDB.
    :param module_name: the name of the module for which the database shall be returned
    :return: the CouchDB database for the given module
    """
    # get the full database name from the module name and the database prefix
    full_database_name = config.get("couchdb", "database-prefix") + module_name

    # configure the call to CouchDB
    couch = couchdb.Server(config.get("couchdb", "database-url"))
    couch.resource.credentials = (config.get("couchdb", "database-user"), config.get("couchdb", "database-password"))

    # return the database
    db = couch[full_database_name]
    logger.debug("opening database %s" % full_database_name)
    return db
Exemple #33
0
def get_database(module_name):
    """
    Get a module's database from CouchDB.
    :param module_name: the name of the module for which the database shall be returned
    :return: the CouchDB database for the given module
    """
    # get the full database name from the module name and the database prefix
    full_database_name = config.get('couchdb', 'database-prefix') + module_name

    # configure the call to CouchDB
    couch = couchdb.Server(config.get('couchdb', 'database-url'))
    couch.resource.credentials = (config.get('couchdb', 'database-user'), config.get('couchdb', 'database-password'))

    # return the database
    db = couch[full_database_name]
    logger.debug('opening database %s' % full_database_name)
    return db
Exemple #34
0
    def api_call(self, min_params, opt_params={}):
        """
        handles the API calls and errors
        :param url: full url to API endpoint
        :return: json-data
        """

        r = requests.get(self.base_url, params=dict(min_params.items() + opt_params.items()))
        data = r.json()

        # do it in a better way (chicking rate-limit etc)
        if 'error' in data:
            if data['error'] == 29:
                logger.debug("minute rate limit almost reached - wait for a minute")
                time.sleep(60)

        return data
Exemple #35
0
    def api_call(endpoint, data):
        base_url = 'https://api.moves-app.com/api/1.1'

        r = requests.get(base_url + endpoint, params=data)

        if r.status_code == 429:
            logger.debug("rate limit exceeded, please try again in %s minutes",
                         r.headers['x-ratelimit-minuteremaining'])

        # check rate limit
        ratelimit = int(r.headers['x-ratelimit-minuteremaining'])
        if ratelimit <= 2:
            logger.debug(
                "minute rate limit almost reached - wait for a minute")
            time.sleep(60)

        return r.json()
Exemple #36
0
    def initial_import(self):
        """
        Import all checkins at once if no import before were executed
        :return: Boolean
        """

        if not self._get_latest_checkin():
            logger.debug("no local stored data found - inital import needed")

            # import all checkins because we imported nothing before
            checkins = [
                checkin for checkin in self.client.users.all_checkins()
            ]

            store(self.database, checkins)
            return True

        return False
Exemple #37
0
    def api_call(self, min_params, opt_params={}):
        """
        handles the API calls and errors
        :param url: full url to API endpoint
        :return: json-data
        """

        r = requests.get(self.base_url,
                         params=dict(min_params.items() + opt_params.items()))
        data = r.json()

        # do it in a better way (chicking rate-limit etc)
        if 'error' in data:
            if data['error'] == 29:
                logger.debug(
                    "minute rate limit almost reached - wait for a minute")
                time.sleep(60)

        return data
Exemple #38
0
    def initial_import(self):
        """
        Imports all tracks at once if no import before were executed
        :return: Boolean
        """
        if not self._get_latest_track():
            logger.debug("no local stored data found - inital import needed")

            params = {'method': 'user.getrecenttracks',
                      'user': config.get('lastfm', 'username'),
                      'api_key': config.get('lastfm', 'api_key'),
                      'format': 'json'}

            # check how many entries in total available
            data = self.api_call(params, {'limit': 1})
            logger.debug("%s tracks have to be imported" % (data['recenttracks']['@attr']['total']))

            self._run_fetch_store(params)
            return True

        return False
Exemple #39
0
    def regular_import(self):
        """
        Import new checkins, beginning from the last checkin found in the local database
        :return:
        """
        # todo: check request limit and handle exhausting
        while True:

            #+1 to get new checkins, not the same we have already
            latest_local_checkin = self._get_latest_checkin()
            latest_local_checkin_time = latest_local_checkin['createdAt'] + 1

            logger.debug("latest local stored checkin entry is from %s" % datetime.datetime.fromtimestamp(
                latest_local_checkin['createdAt']).isoformat())

            res = self.client.users.checkins(
                params={'limit': 250, 'sort': 'oldestfirst', 'afterTimestamp': latest_local_checkin_time})

            if len(res['checkins']['items']) == 0:
                logger.debug("no further checkins to import - finishing")
                break

            start_time = datetime.datetime.fromtimestamp(latest_local_checkin['createdAt']).isoformat()
            end_time = datetime.datetime.fromtimestamp(res['checkins']['items'][-1]['createdAt']).isoformat()

            logger.debug("store checkins from %s to %s" % (start_time, end_time))
            store(self.database, res['checkins']['items'])

        return True
Exemple #40
0
    def get_tweets(self):
        # script from https://gist.github.com/yanofsky/5436496

        all_tweets = list()

        last_stored_id = self.get_last_stored_tweet()
        oldest_tweet_id = None

        logger.debug("loading tweets...")

        while True:
            try:
                # make initial request for most recent tweets (200 is the maximum allowed count)
                new_tweets = self.api.user_timeline(screen_name=self.screen_name, count=200, max_id=oldest_tweet_id, since_id=last_stored_id)
            except tweepy.TweepError as e:
                logger.debug("got no more tweets - %s" % e.reason)
                break

            # save most recent tweets
            all_tweets.extend(json.loads(new_tweets))

            logger.debug("got %d tweets so far" % len(all_tweets))

            if not all_tweets or oldest_tweet_id == all_tweets[-1]['id'] - 1:
                break
            else:
                oldest_tweet_id = all_tweets[-1]['id'] - 1

        # add ids for CouchDB
        for tweet in all_tweets:
            tweet['_id'] = tweet['id_str']

        return all_tweets
Exemple #41
0
    def _store_wattatchment(self, db, docs):
        """
        stores the API result, fetches for each of it the media file and saves it base64encoded aside each record.
        :param db: database handle
        :param docs: API result
        :return:
        """

        if len(docs) < 1:
            logger.debug("nothing to import.")
            return False

        # store json with main data
        store_results = db.update(docs)

        # attach binary image to it database record
        for db_res in store_results:
            # extract attachment date from json object
            doc = self.database.get(db_res[1])
            image_file = requests.get(doc['images']['standard_resolution']['url'])
            key = {'_id': db_res[1], '_rev': db_res[2]}
            db.put_attachment(key, image_file.content, filename="standard_resolution.jpg",
                              content_type=image_file.headers['content-type'])

            if doc['caption']:
                text = doc['caption']['text']
            else:
                text = "untitled"

            logger.debug('Attached "%s" image' % text)

        logger.debug("import photo is done.")
        return True
Exemple #42
0
    def get_api(self):
        """
        :return: an initialized Twitter API object
        """
        logger.debug("logging in at Twitter")

        # add a parser to access the raw JSON from the tweets
        # from http://www.hongliangjie.com/2012/04/04/hack-tweepy-to-get-raw-json/
        class RawJsonParser(tweepy.parsers.Parser):
            def parse(self, method, payload):
                return payload

        #authorize twitter, initialize Tweepy
        auth = tweepy.OAuthHandler(config.get('twitter', 'consumer_key'), config.get('twitter', 'consumer_secret'))
        auth.set_access_token(config.get('twitter', 'access_token'), config.get('twitter', 'access_token_secret'))
        api = tweepy.API(auth, parser=RawJsonParser())

        # get screen name for later access to user timeline
        self.screen_name = json.loads(api.me())['screen_name']
        logger.debug("screen name: %s" % self.screen_name)

        return api
Exemple #43
0
    def initial_import(self):
        """
        Imports all tracks at once if no import before were executed
        :return: Boolean
        """
        if not self._get_latest_track():
            logger.debug("no local stored data found - inital import needed")

            params = {
                'method': 'user.getrecenttracks',
                'user': config.get('lastfm', 'username'),
                'api_key': config.get('lastfm', 'api_key'),
                'format': 'json'
            }

            # check how many entries in total available
            data = self.api_call(params, {'limit': 1})
            logger.debug("%s tracks have to be imported" %
                         (data['recenttracks']['@attr']['total']))

            self._run_fetch_store(params)
            return True

        return False
Exemple #44
0
    def _run_fetch_store(self, param, opt_params):
        """
        fetches available tracks from lastfm page by page and stores inside the database
        :param params: parameter to narrow the API result
        :return: True
        """

        next_page = 1

        while True:

            # build parameter set to get step by step all data
            opt_params['extended'] = 1
            opt_params['page'] = next_page
            opt_params['limit'] = 200

            data = self.api_call(param, opt_params)

            if 'recenttracks' in data:
                if '@attr' in data['recenttracks']:
                    attr = data['recenttracks']['@attr']
                else:
                    attr = data['recenttracks']
            else:
                logger.debug("Finished import to early?")
                break

            # stop import process if we do not have any import!
            if int(attr['total']) == 0:
                break

            # when we get a single track it is not a list so wie have fix this manually
            tracks = data['recenttracks']['track']
            if not isinstance(tracks, list):
                tracks = list(data['recenttracks']['track'])

            store(self.database, tracks)
            logger.debug("Stored page %s with %s tracks" %
                         (attr['page'], len(data['recenttracks']['track'])))

            # calculate next iteration
            cur_page = int(attr['page'])
            if cur_page < int(attr['totalPages']):
                next_page = cur_page + 1
            else:
                logger.debug("All tracks fetched.")
                break

        return True
Exemple #45
0
    def _run_fetch_store(self, param, opt_params):
        """
        fetches available tracks from lastfm page by page and stores inside the database
        :param params: parameter to narrow the API result
        :return: True
        """

        next_page = 1

        while True:

            # build parameter set to get step by step all data
            opt_params['extended'] = 1
            opt_params['page'] = next_page
            opt_params['limit'] = 200

            data = self.api_call(param, opt_params)

            if 'recenttracks' in data:
                if '@attr' in data['recenttracks']:
                    attr = data['recenttracks']['@attr']
                else:
                    attr = data['recenttracks']
            else:
                logger.debug("Finished import to early?")
                break

            # stop import process if we do not have any import!
            if int(attr['total']) == 0:
                break

            # when we get a single track it is not a list so wie have fix this manually
            tracks = data['recenttracks']['track']
            if not isinstance(tracks, list):
                tracks = list(data['recenttracks']['track'])

            store(self.database, tracks)
            logger.debug("Stored page %s with %s tracks" % (attr['page'], len(data['recenttracks']['track'])))

            # calculate next iteration
            cur_page = int(attr['page'])
            if cur_page < int(attr['totalPages']):
                next_page = cur_page + 1
            else:
                logger.debug("All tracks fetched.")
                break

        return True
Exemple #46
0
    def regular_import(self):
        """
        Import new checkins, beginning from the last checkin found in the local database
        :return:
        """
        # todo: check request limit and handle exhausting
        while True:

            #+1 to get new checkins, not the same we have already
            latest_local_checkin = self._get_latest_checkin()
            latest_local_checkin_time = latest_local_checkin['createdAt'] + 1

            logger.debug("latest local stored checkin entry is from %s" %
                         datetime.datetime.fromtimestamp(
                             latest_local_checkin['createdAt']).isoformat())

            res = self.client.users.checkins(
                params={
                    'limit': 250,
                    'sort': 'oldestfirst',
                    'afterTimestamp': latest_local_checkin_time
                })

            if len(res['checkins']['items']) == 0:
                logger.debug("no further checkins to import - finishing")
                break

            start_time = datetime.datetime.fromtimestamp(
                latest_local_checkin['createdAt']).isoformat()
            end_time = datetime.datetime.fromtimestamp(
                res['checkins']['items'][-1]['createdAt']).isoformat()

            logger.debug("store checkins from %s to %s" %
                         (start_time, end_time))
            store(self.database, res['checkins']['items'])

        return True
Exemple #47
0
    def _run_fetch(self, params={}):
        """
        fetches available media from instagram page by page
        :param params: parameter to narrow the API result
        :return: True
        """

        record_count = 0

        # at the first iteration we have to build the url by our self
        # append additional parameters
        url = self.base_url + "/users/self/media/recent/?access_token=%s" % config.get('instagram', 'access_token') + '&' + '&'.join(
            ["%s=%s" % (k, v) for k, v in params.iteritems()])

        while True:

            # build parameter set to get step by step all data
            res = self._api_call(url)

            # break here if there is nothing to import
            if len(res['data']) < 1:
                logger.debug("nothing to import.")
                break

            self._store_wattatchment(self.database, res['data'])
            record_count += len(res['data'])
            logger.debug("Stored %s of in total %s images in database" % (
                record_count, (self.meta_online_image_count - self.meta_local_image_count)))

            # get next url from api - if we are not at the end
            if 'next_url' in res['pagination']:
                url = res['pagination']['next_url']
            else:
                logger.debug("All images fetched.")
                break

        return True
Exemple #48
0
    def regular_import(self):
        """
        Imports new tracks, beginning from the last tracks found in the local database
        :return:
        """

        first_local_track = self._get_first_track()
        latest_local_track = self._get_latest_track()

        # check if there are all old tracks imported.
        # this can happen when the initial import become interrupted
        logger.debug("first local stored track is from %s" % datetime.datetime.fromtimestamp(
            int(first_local_track['date']['uts'])).isoformat()
        )

        params = {'method': 'user.getrecenttracks',
                  'user': config.get('lastfm', 'username'),
                  'api_key': config.get('lastfm', 'api_key'),
                  'format': 'json'}

        opt_params = {'to': int(first_local_track['date']['uts']),
                      'limit': 1}
        data = self.api_call(params, opt_params)
        if '@attr' in data['recenttracks']:
            track_count = int(data['recenttracks']['@attr']['total'])
        else:
            track_count = int(data['recenttracks']['total'])

        if track_count > 0:
            logger.debug("%s tracks are older as the oldest local track. they have to be imported" % track_count)
            self._run_fetch_store(params, opt_params)
        else:
            logger.debug("all older tracks are imported")

        logger.debug("latest local stored track is from %s" % datetime.datetime.fromtimestamp(
            int(latest_local_track['date']['uts'])).isoformat())

        # check if newer tracks have to be imported.
        opt_params = {'from': int(latest_local_track['date']['uts']),
                      'limit': 1}
        data = self.api_call(params, opt_params)
        if '@attr' in data['recenttracks']:
            track_count = int(data['recenttracks']['@attr']['total'])
        else:
            track_count = int(data['recenttracks']['total'])

        if track_count > 0:
            logger.debug("%s tracks are newer as the latest local track. they have to be imported." % track_count)
            self._run_fetch_store(params, opt_params)
        else:
            logger.debug("all newer tracks are imported")

        return True
Exemple #49
0
 def get_user_id(self):
     data = self.api_call('/v2/accountXuid')
     logger.debug("collecting data for gamer tag %s" % data['gamerTag'])
     return data['xuid']
Exemple #50
0
import couchdb

from diarybot.utils.logger import logger
from diarybot.config import config

# connect to the CouchDB server
couch = couchdb.Server(config.get('couchdb', 'database-url'))
couch.resource.credentials = (config.get('couchdb', 'database-user'),
                              config.get('couchdb', 'database-password'))

# select the databases of Diary Bot
diarybot_databases = [
    couch[db_name] for db_name in couch
    if db_name.startswith(config.get('couchdb', 'database-prefix'))
]
logger.debug("performing maintenance for %d Diary Bot databases" %
             len(diarybot_databases))

# Clean up and compact each database and its design documents
for db in diarybot_databases:
    logger.debug("cleaning up and compacting database %s" % db.name)
    db.cleanup()
    db.compact()

    for design_document in db.view('_all_docs')['_design':'_design0']:
        # get the basename of the design document
        design_document_name = design_document.id.split('/')[-1]
        db.compact(design_document_name)

logger.debug("done")
Exemple #51
0
import couchdb

from diarybot.utils.logger import logger
from diarybot.config import config

# connect to the CouchDB server
couch = couchdb.Server(config.get("couchdb", "database-url"))
couch.resource.credentials = (config.get("couchdb", "database-user"), config.get("couchdb", "database-password"))

# select the databases of Diary Bot
diarybot_databases = [
    couch[db_name] for db_name in couch if db_name.startswith(config.get("couchdb", "database-prefix"))
]
logger.debug("performing maintenance for %d Diary Bot databases" % len(diarybot_databases))

# Clean up and compact each database and its design documents
for db in diarybot_databases:
    logger.debug("cleaning up and compacting database %s" % db.name)
    db.cleanup()
    db.compact()

    for design_document in db.view("_all_docs")["_design":"_design0"]:
        # get the basename of the design document
        design_document_name = design_document.id.split("/")[-1]
        db.compact(design_document_name)

logger.debug("done")