def main_generator(start_timestamp):
    #chirpradio.connect("10.0.1.98:8000")
    chirpradio.connect()

    sql_db = database.Database(conf.LIBRARY_DB)
    pending_albums = []
    this_album = []
    # TODO(trow): Select the albums to import in a saner way.
    for vol, import_timestamp in sql_db.get_all_imports():
        if start_timestamp is not None and import_timestamp < start_timestamp:
            continue
        cprint("***")
        cprint("*** import_timestamp = %s" %
               timestamp.get_human_readable(import_timestamp))
        cprint("***")
        for au_file in sql_db.get_by_import(vol, import_timestamp):
            if this_album and this_album[0].album_id != au_file.album_id:
                alb = album.Album(this_album)
                pending_albums.append(alb)
                cprint('Adding "%s"' % alb.title())
                pending_albums = maybe_flush(pending_albums)
                this_album = []
            this_album.append(au_file)
            yield

    # Add the last album to the list of pending albums, then do the
    # final flush.
    if this_album:
        alb = album.Album(this_album)
        cprint('Adding "%s"' % alb.title())
        pending_albums.append(alb)
        this_album = []
    flush(pending_albums)
def main():
    #chirpradio.connect("10.0.1.98:8000")
    chirpradio.connect()

    sql_db = database.Database(conf.LIBRARY_DB)
    pending_albums = []
    this_album = []
    # TODO(trow): Select the albums to import in a saner way.
    for vol, import_timestamp in sql_db.get_all_imports():
        if START_TIMESTAMP is not None and import_timestamp < START_TIMESTAMP:
            continue
        print "***"
        print "*** import_timestamp = %s" % timestamp.get_human_readable(
            import_timestamp)
        print "***"
        for au_file in sql_db.get_by_import(vol, import_timestamp):
            if this_album and this_album[0].album_id != au_file.album_id:
                alb = album.Album(this_album)
                pending_albums.append(alb)
                print 'Adding "%s"' % alb.title()
                pending_albums = maybe_flush(pending_albums)
                this_album = []
            this_album.append(au_file)
        
    # Add the last album to the list of pending albums, then do the
    # final flush.
    if this_album:
        alb = album.Album(this_album)
        print 'Adding "%s"' % alb.title()
        pending_albums.append(alb)
        this_album = []
    flush(pending_albums)
Ejemplo n.º 3
0
    def do_push_artists(self):
        # patch credentials
        if not request.headers.get('Authorization'):
            abort(401)
        else:
            auth = request.headers['Authorization'].lstrip('Basic ')
            username, password = base64.b64decode(auth).split(':')
            if username and password:
                conf.CHIRPRADIO_AUTH = '%s %s' % (username, password)
                chirpradio.connect()
            else:
                abort(401)

        dry_run = False

        # reload artists from file
        artists._init()

        # Find all of the library artists
        all_library_artists = set(artists.all())

        # Find all of the artists in the cloud.
        all_chirpradio_artists = set()
        mapped = 0
        t1 = time.time()
        for art in models.Artist.fetch_all():
            if art.revoked:
                continue
            std_name = artists.standardize(art.name)
            if std_name != art.name:
                #print "Mapping %d: %s => %s" % (mapped, art.name, std_name)
                mapped += 1
                art.name = std_name
                idx = search.Indexer()
                idx._transaction = art.parent_key()
                idx.add_artist(art)
                if not dry_run:
                    idx.save()
            all_chirpradio_artists.add(art.name)

        to_push = list(all_library_artists.difference(all_chirpradio_artists))

        Messages.add_message("Pushing %d artists" % len(to_push), 'warning')
        while to_push:
            # Push the artists in batches of 50
            this_push = to_push[:50]
            to_push = to_push[50:]
            idx = search.Indexer()
            for name in this_push:
                #print name
                art = models.Artist.create(parent=idx.transaction, name=name)
                idx.add_artist(art)
            if not dry_run:
                idx.save()
            #print "+++++ Indexer saved"

        Messages.add_message("Artist push complete. OK!", 'success')
Ejemplo n.º 4
0
    def do_push_artists(self):
        # patch credentials
        if not request.headers.get('Authorization'):
            abort(401)
        else:
            auth = request.headers['Authorization'].lstrip('Basic ')
            username, password = base64.b64decode(auth).split(':')
            if username and password:
                conf.CHIRPRADIO_AUTH = '%s %s' % (username, password)
                chirpradio.connect()
            else:
                abort(401)

        dry_run = False

        # reload artists from file
        artists._init()

        # Find all of the library artists
        all_library_artists = set(artists.all())

        # Find all of the artists in the cloud.
        all_chirpradio_artists = set()
        mapped = 0
        t1 = time.time()
        for art in models.Artist.fetch_all():
            if art.revoked:
                continue
            std_name = artists.standardize(art.name)
            if std_name != art.name:
                #print "Mapping %d: %s => %s" % (mapped, art.name, std_name)
                mapped += 1
                art.name = std_name
                idx = search.Indexer()
                idx._transaction = art.parent_key()
                idx.add_artist(art)
                if not dry_run:
                    idx.save()
            all_chirpradio_artists.add(art.name)

        to_push = list(all_library_artists.difference(all_chirpradio_artists))

        Messages.add_message("Pushing %d artists" % len(to_push), 'warning')
        while to_push:
            # Push the artists in batches of 50
            this_push = to_push[:50]
            to_push = to_push[50:]
            idx = search.Indexer()
            for name in this_push:
                #print name
                art = models.Artist.create(parent=idx.transaction, name=name)
                idx.add_artist(art)
            if not dry_run:
                idx.save()
            #print "+++++ Indexer saved"

        Messages.add_message("Artist push complete. OK!", 'success')
def main_generator():
    chirpradio.connect()

    dry_run = False

    # Find all of the library artists
    all_library_artists = set(artists.all())

    # Find all of the artists in the cloud.
    all_chirpradio_artists = set()
    mapped = 0
    t1 = time.time()
    for art in models.Artist.fetch_all():
        if art.revoked:
            continue
        std_name = artists.standardize(art.name)
        if std_name != art.name:
            cprint(u"Mapping {}: {} => {}".format(mapped, art.name, std_name))
            mapped += 1
            art.name = std_name
            idx = search.Indexer()
            idx._transaction = art.parent_key()
            idx.add_artist(art)
            if not dry_run:
                idx.save()
        all_chirpradio_artists.add(art.name)
        yield

    to_push = list(all_library_artists.difference(all_chirpradio_artists))

    cprint("Pushing %d artists" % len(to_push))
    while to_push:
        # Push the artists in batches of 50
        this_push = to_push[:50]
        to_push = to_push[50:]
        idx = search.Indexer()
        for name in this_push:
            cprint(name)
            art = models.Artist.create(parent=idx.transaction, name=name)
            idx.add_artist(art)
        if not dry_run:
            idx.save()
        cprint("+++++ Indexer saved")
        yield
Ejemplo n.º 6
0
    def do_push(self):

        # IMPORT_TIME_STAMP from import step
        START_TIMESTAMP = ImportTimeStamp.import_time_stamp
        # TODO(trow): Is this optimal?
        _NUM_ALBUMS_PER_FLUSH = 3

        _DISC_NUM_RE = re.compile("disc\s+(\d+)", re.IGNORECASE)

        class UnknownArtistError(Exception):
            pass

        def get_artist_by_name(name):
            global _artist_cache
            if name in _artist_cache:
                return _artist_cache[name]
            while True:
                try:
                    art = models.Artist.fetch_by_name(name)
                    if art is None:
                        raise UnknownArtistError("Unknown artist: %s" % name)
                    _artist_cache[name] = art
                    return art
                except urllib2.URLError:
                    #print "Retrying fetch_by_name for '%s'" % name
                    pass

        def seen_album(album_id):
            while True:
                try:
                    for alb in models.Album.all().filter(
                            "album_id =", album_id):
                        if not alb.revoked:
                            return True
                    return False
                except urllib2.URLError:
                    #print "Retrying fetch of album_id=%s" % album_id
                    pass

        def process_one_album(idx, alb):
            # Build up an Album entity.
            kwargs = {}
            kwargs["parent"] = idx.transaction
            kwargs["title"] = alb.title()
            kwargs["album_id"] = alb.album_id
            kwargs["import_timestamp"] = datetime.datetime.utcfromtimestamp(
                alb.import_timestamp())
            kwargs["num_tracks"] = len(alb.all_au_files)
            kwargs["import_tags"] = alb.tags()

            if alb.is_compilation():
                kwargs["is_compilation"] = True
            else:
                kwargs["is_compilation"] = False
                kwargs["album_artist"] = get_artist_by_name(alb.artist_name())

            #for key, val in sorted(kwargs.iteritems()):
            #print "%s: %s" % (key, val)
            if seen_album(alb.album_id):
                #print "   Skipping"
                return

            album = models.Album(**kwargs)

            # Look for a disc number in the tags.
            for tag in kwargs["import_tags"]:
                m = _DISC_NUM_RE.search(tag)
                if m:
                    album.disc_number = int(m.group(1))
                    break

            idx.add_album(album)

            for au_file in alb.all_au_files:
                track_title, import_tags = titles.split_tags(au_file.tit2())
                track_num, _ = order.decode(
                    unicode(au_file.mutagen_id3["TRCK"]))
                kwargs = {}
                if alb.is_compilation():
                    kwargs["track_artist"] = get_artist_by_name(au_file.tpe1())
                track = models.Track(
                    parent=idx.transaction,
                    ufid=au_file.ufid(),
                    album=album,
                    title=track_title,
                    import_tags=import_tags,
                    track_num=track_num,
                    sampling_rate_hz=au_file.mp3_header.sampling_rate_hz,
                    bit_rate_kbps=int(au_file.mp3_header.bit_rate_kbps),
                    channels=au_file.mp3_header.channels_str,
                    duration_ms=au_file.duration_ms,
                    **kwargs)
                idx.add_track(track)

        def flush(list_of_pending_albums):
            if not list_of_pending_albums:
                return
            idx = search.Indexer()
            for alb in list_of_pending_albums:
                process_one_album(idx, alb)
            # This runs as a batch job, so set a very long deadline.
            while True:
                try:
                    rpc = db.create_rpc(deadline=120)
                    idx.save(rpc=rpc)
                    return
                except urllib2.URLError:
                    #print "Retrying indexer flush"
                    pass

        def maybe_flush(list_of_pending_albums):
            if len(list_of_pending_albums) < _NUM_ALBUMS_PER_FLUSH:
                return list_of_pending_albums
            flush(list_of_pending_albums)
            return []

        # main

        #chirpradio.connect("10.0.1.98:8000")
        chirpradio.connect()

        Messages.add_message('Beginning to push albums.', 'warning')

        sql_db = database.Database(conf.LIBRARY_DB)
        pending_albums = []
        this_album = []
        # TODO(trow): Select the albums to import in a saner way.
        for vol, import_timestamp in sql_db.get_all_imports():
            if START_TIMESTAMP is not None and import_timestamp < START_TIMESTAMP:
                continue
            #print "***"
            #print "*** import_timestamp = %s" % timestamp.get_human_readable(
            #import_timestamp)
            #print "***"
            for au_file in sql_db.get_by_import(vol, import_timestamp):
                if this_album and this_album[0].album_id != au_file.album_id:
                    alb = album.Album(this_album)
                    pending_albums.append(alb)
                    Messages.add_message('Adding "%s"' % alb.title(),
                                         'success')
                    pending_albums = maybe_flush(pending_albums)
                    this_album = []
                this_album.append(au_file)

        # Add the last album to the list of pending albums, then do the
        # final flush.
        if this_album:
            alb = album.Album(this_album)
            Messages.add_message('Adding "%s"' % alb.title(), 'success')
            pending_albums.append(alb)
            this_album = []
        flush(pending_albums)

        Messages.add_message('Album push complete. OK!', 'success')
        Messages.add_message('Import process complete. OK!', 'success')

        current_route.CURRENT_ROUTE = 'import'
Ejemplo n.º 7
0
    def do_push(self):

        # IMPORT_TIME_STAMP from import step
        START_TIMESTAMP = ImportTimeStamp.import_time_stamp
        # TODO(trow): Is this optimal?
        _NUM_ALBUMS_PER_FLUSH = 3

        _DISC_NUM_RE = re.compile("disc\s+(\d+)", re.IGNORECASE)

        class UnknownArtistError(Exception):
            pass

        def get_artist_by_name(name):
            global _artist_cache
            if name in _artist_cache:
                return _artist_cache[name]
            while True:
                try:
                    art = models.Artist.fetch_by_name(name)
                    if art is None:
                        raise UnknownArtistError("Unknown artist: %s" % name)
                    _artist_cache[name] = art
                    return art
                except urllib2.URLError:
                    #print "Retrying fetch_by_name for '%s'" % name
                    pass

        def seen_album(album_id):
            while True:
                try:
                    for alb in models.Album.all().filter("album_id =", album_id):
                        if not alb.revoked:
                            return True
                    return False
                except urllib2.URLError:
                    #print "Retrying fetch of album_id=%s" % album_id
                    pass

        def process_one_album(idx, alb):
            # Build up an Album entity.
            kwargs = {}
            kwargs["parent"] = idx.transaction
            kwargs["title"] = alb.title()
            kwargs["album_id"] = alb.album_id
            kwargs["import_timestamp"] = datetime.datetime.utcfromtimestamp(
                alb.import_timestamp())
            kwargs["num_tracks"] = len(alb.all_au_files)
            kwargs["import_tags"] = alb.tags()

            if alb.is_compilation():
                kwargs["is_compilation"] = True
            else:
                kwargs["is_compilation"] = False
                kwargs["album_artist"] = get_artist_by_name(alb.artist_name())

            #for key, val in sorted(kwargs.iteritems()):
                #print "%s: %s" % (key, val)
            if seen_album(alb.album_id):
                #print "   Skipping"
                return

            album = models.Album(**kwargs)

            # Look for a disc number in the tags.
            for tag in kwargs["import_tags"]:
                m = _DISC_NUM_RE.search(tag)
                if m:
                    album.disc_number = int(m.group(1))
                    break

            idx.add_album(album)

            for au_file in alb.all_au_files:
                track_title, import_tags = titles.split_tags(au_file.tit2())
                track_num, _ = order.decode(unicode(au_file.mutagen_id3["TRCK"]))
                kwargs = {}
                if alb.is_compilation():
                    kwargs["track_artist"] = get_artist_by_name(au_file.tpe1())
                track = models.Track(
                    parent=idx.transaction,
                    ufid=au_file.ufid(),
                    album=album,
                    title=track_title,
                    import_tags=import_tags,
                    track_num=track_num,
                    sampling_rate_hz=au_file.mp3_header.sampling_rate_hz,
                    bit_rate_kbps=int(au_file.mp3_header.bit_rate_kbps),
                    channels=au_file.mp3_header.channels_str,
                    duration_ms=au_file.duration_ms,
                    **kwargs)
                idx.add_track(track)

        def flush(list_of_pending_albums):
            if not list_of_pending_albums:
                return
            idx = search.Indexer()
            for alb in list_of_pending_albums:
                process_one_album(idx, alb)
            # This runs as a batch job, so set a very long deadline.
            while True:
                try:
                    rpc = db.create_rpc(deadline=120)
                    idx.save(rpc=rpc)
                    return
                except urllib2.URLError:
                    #print "Retrying indexer flush"
                    pass


        def maybe_flush(list_of_pending_albums):
            if len(list_of_pending_albums) < _NUM_ALBUMS_PER_FLUSH:
                return list_of_pending_albums
            flush(list_of_pending_albums)
            return []

        # main

        #chirpradio.connect("10.0.1.98:8000")
        chirpradio.connect()

        Messages.add_message('Beginning to push albums.', 'warning')

        sql_db = database.Database(conf.LIBRARY_DB)
        pending_albums = []
        this_album = []
        # TODO(trow): Select the albums to import in a saner way.
        for vol, import_timestamp in sql_db.get_all_imports():
            if START_TIMESTAMP is not None and import_timestamp < START_TIMESTAMP:
                continue
            #print "***"
            #print "*** import_timestamp = %s" % timestamp.get_human_readable(
                #import_timestamp)
            #print "***"
            for au_file in sql_db.get_by_import(vol, import_timestamp):
                if this_album and this_album[0].album_id != au_file.album_id:
                    alb = album.Album(this_album)
                    pending_albums.append(alb)
                    Messages.add_message('Adding "%s"' % alb.title(), 'success')
                    pending_albums = maybe_flush(pending_albums)
                    this_album = []
                this_album.append(au_file)

        # Add the last album to the list of pending albums, then do the
        # final flush.
        if this_album:
            alb = album.Album(this_album)
            Messages.add_message('Adding "%s"' % alb.title(), 'success')
            pending_albums.append(alb)
            this_album = []
        flush(pending_albums)

        Messages.add_message('Album push complete. OK!', 'success')
        Messages.add_message('Import process complete. OK!', 'success')

        current_route.CURRENT_ROUTE = 'import'
Ejemplo n.º 8
0
def all_matches():
    last_key = None
    while True:
        q = models.SearchMatches.all().order("__key__")
        if last_key:
            q.filter("__key__ >", last_key)
        batch = list(q.fetch(100))
        if not batch:
            break
        last_key = batch[-1].key()
        for sm in batch:
            yield sm


chirpradio.connect()

counts = {}

total_obj = 0
total_matches = 0
for sm in all_matches():
    key = (sm.term, sm.field)
    L = counts.get(key)
    if not L:
        L = counts[key] = []
    L.append(len(sm.matches))
    total_obj += 1
    total_matches += len(sm.matches)
    if total_obj % 500 == 0:
        print total_obj, total_matches