Beispiel #1
0
def volume_modified(request, pid):
    'last modification time for a single volume'
    solr = solr_interface()
    results = solr.query(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL,
                         pid=pid) \
                  .sort_by('-timestamp').field_limit('timestamp')
    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified,
    # and index timestamp for a volume will be updated when pages are added

    # if a user is logged in, page should show as modified
    # when annotation count changes
    latest_note = None
    if request.user.is_authenticated():
        # NOTE: shouldn't be very expensive to init volume here; not actually
        # making any api calls, just using volume to get volume
        # uri and associated annotations
        repo = Repository()
        vol = repo.get_object(pid, type=Volume)
        # newest annotation creation for pages in this volume
        latest_note = vol.annotations().visible_to(request.user) \
                         .last_created_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Beispiel #2
0
    def get_context_data(self, **kwargs):
        solr = solr_interface()
        q = solr.query(content_model=Volume.VOLUME_CMODEL_PATTERN) \
                .facet_by('collection_id', sort='count', mincount=1) \
                .paginate(rows=0)
        facets = q.execute().facet_counts.facet_fields
        # convert into dictionary for access by pid
        collection_counts = dict([(pid, total)
                                  for pid, total in facets['collection_id']])
        # generate a list of tuple of solr result, volume count,
        # filtering out any collections with no items
        collections = [(r, collection_counts.get(r['pid']))
                       for r in self.object_list
                       if r['pid'] in collection_counts]

        # generate a random list of 4 covers for use in twitter gallery card
        # - restrict to collections with cover images
        covers = [coll.cover for coll, count in collections if coll.cover]
        # - randomize the list in place so we can grab the first N
        shuffle(covers)

        return {
            'collections': collections,
            'mode': self.display_mode,
            'meta_covers': covers[:4]
        }
Beispiel #3
0
    def render(self, **kwargs):
        # FIXME: lots of redundancy with collection list view
        solr = solr_interface()
        collq = solr.query(content_model=Collection.COLLECTION_CONTENT_MODEL) \
                    .results_as(SolrCollection)

        collection_owner = getattr(settings, 'COLLECTIONS_OWNER', None)
        if collection_owner:
            collq = collq.filter(owner=collection_owner)

        volq = solr.query(content_model=Volume.VOLUME_CMODEL_PATTERN) \
                   .facet_by('collection_id', sort='count', mincount=1) \
                   .paginate(rows=0)
        facets = volq.execute().facet_counts.facet_fields
        # convert into dictionary for access by pid
        collection_counts = dict([(pid, total) for pid, total in facets['collection_id']])
        # generate a list of tuple of solr result, volume count,
        # filtering out any collections with no items
        collections = [(r, collection_counts.get(r['pid'])) for r in collq
                       if r['pid'] in collection_counts]
        # randomize the order, then select the requested number for display
        # shuffle(collections)

        # order alphabetically
        # please note that if we allow less collections to be displayed on the homepage
        # than actual number of collections the ones from the lower side of the alphabet
        # will never show up, compared with the randomized version
        collections = sorted(collections, key=lambda collection: collection[0]["identifier"][0])
        collections = collections[:self.num_collections]

        tpl = get_template('pages/collection_list.html')
        return tpl.render({'collections': collections})
Beispiel #4
0
def volume_modified(request, pid):
    'last modification time for a single volume'
    solr = solr_interface()
    results = solr.query(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL,
                         pid=pid) \
                  .sort_by('-timestamp').field_limit('timestamp')
    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified,
    # and index timestamp for a volume will be updated when pages are added

    # if a user is logged in, page should show as modified
    # when annotation count changes
    latest_note = None
    if request.user.is_authenticated():
        # NOTE: shouldn't be very expensive to init volume here; not actually
        # making any api calls, just using volume to get volume
        # uri and associated annotations
        repo = Repository()
        vol = repo.get_object(pid, type=Volume)
        # newest annotation creation for pages in this volume
        latest_note = vol.annotations().visible_to(request.user) \
                         .last_created_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Beispiel #5
0
 def volumes_with_pages():
     '''Search for Volumes with pages loaded and return a list of matching pids.'''
     solr = solr_interface()
     # searching on page count > 1 because volumes with cover only
     # have page count of 1
     q = solr.query(content_model=Volume.VOLUME_CMODEL_PATTERN,
                    page_count__gt=1).field_limit('pid')
     return [result['pid'] for result in q]
Beispiel #6
0
 def collection_label(self):
     'collection label; pulled from Solr by pid'
     # get collection label from solr via pid
     solr = solr_interface()
     results = solr.query(pid=self.collection).field_limit('title')
     if results:
         return results[0]['title']
     else:
         # return collection pid as fallback if lookup title fails
         return self.collection
Beispiel #7
0
def collection_choices():
    '''Collection choices (pid and title) to be used when editing
    :attr:`CollectionImage.collection`'''
    solr = solr_interface()
    results = solr.query(content_model=Collection.COLLECTION_CONTENT_MODEL) \
          .filter(owner='LSDI-project') \
          .sort_by('title_exact')  \
          .field_limit(['pid', 'title'])

    choices = [(r['pid'], r['title']) for r in results]
    return choices
Beispiel #8
0
 def find_solr_pages(self):
     '''Find pages for the current volume, sorted by page order; returns solr query
     for any further filtering or pagination.'''
     solr = solr_interface()
     # find all pages that belong to the same volume and sort by page order
     # - filtering separately should allow solr to cache filtered result sets more efficiently
     return solr.query(isConstituentOf=self.uri) \
                    .filter(content_model=Page.PAGE_CMODEL_PATTERN) \
                    .filter(state='A') \
                    .sort_by('page_order') \
                    .field_limit(['pid', 'page_order']) \
                    .results_as(SolrPage)
Beispiel #9
0
    def get_queryset(self):
        solr = solr_interface()
        solrq = solr.query(content_model=Collection.COLLECTION_CONTENT_MODEL) \
                    .sort_by('title_exact') \
                    .results_as(SolrCollection)

        # optional collection owner in settings; if set, filter collections
        # by the specified owner
        collection_owner = getattr(settings, 'COLLECTIONS_OWNER', None)
        if collection_owner:
            solrq = solrq.filter(owner=collection_owner)

        return solrq
Beispiel #10
0
    def get_queryset(self):
        solr = solr_interface()
        solrq = solr.query(content_model=Collection.COLLECTION_CONTENT_MODEL) \
                    .sort_by('title_exact') \
                    .results_as(SolrCollection)

        # optional collection owner in settings; if set, filter collections
        # by the specified owner
        collection_owner = getattr(settings, 'COLLECTIONS_OWNER', None)
        if collection_owner:
            solrq = solrq.filter(owner=collection_owner)

        return solrq
Beispiel #11
0
def collections_modified(request, *args, **kwargs):
    'Last modification time for list of all collections'
    # - collection browse includes collection information and volume counts,
    # so should be considered modified if any of those objects change

    # NOTE: this does not take into account changes in images for collections,
    # as there is currently no good way to determine the last-modification
    # date for a collection image
    solr = solr_interface()
    results = solr.query(solr.Q(solr.Q(content_model=Collection.COLLECTION_CONTENT_MODEL) &
                                solr.Q(owner='LSDI-project')) | \
                         solr.Q(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL)) \
                  .sort_by('-timestamp').field_limit('timestamp')
    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified
    if results.count():
        return results[0]['timestamp']
Beispiel #12
0
def volumes_modified(request, *args, **kwargs):
    'last modification time for all volumes'
    solr = solr_interface()
    results = solr.query(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL) \
                  .sort_by('-timestamp').field_limit('timestamp')
    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified

    # if user is logged in, changes in annotation totals result
    # in volume page display modifications
    latest_note = None
    if request.user.is_authenticated():
        latest_note = Annotation.objects.visible_to(request.user) \
                                .last_created_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Beispiel #13
0
def collections_modified(request, *args, **kwargs):
    'Last modification time for list of all collections'
    # - collection browse includes collection information and volume counts,
    # so should be considered modified if any of those objects change

    # NOTE: this does not take into account changes in images for collections,
    # as there is currently no good way to determine the last-modification
    # date for a collection image
    solr = solr_interface()
    results = solr.query(solr.Q(solr.Q(content_model=Collection.COLLECTION_CONTENT_MODEL) &
                                solr.Q(owner='LSDI-project')) | \
                         solr.Q(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL)) \
                  .sort_by('-timestamp').field_limit('timestamp')
    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified
    if results.count():
        return results[0]['timestamp']
Beispiel #14
0
def volumes_modified(request, *args, **kwargs):
    'last modification time for all volumes'
    solr = solr_interface()
    results = solr.query(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL) \
                  .sort_by('-timestamp').field_limit('timestamp')
    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified

    # if user is logged in, changes in annotation totals result
    # in volume page display modifications
    latest_note = None
    if request.user.is_authenticated():
        latest_note = Annotation.objects.visible_to(request.user) \
                                .last_created_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Beispiel #15
0
def page_modified(request, vol_pid, pid):
    'last modification time for a single page'
    solr = solr_interface()
    # TODO: use volume pid in query
    results = solr.query(content_model=PageV1_0.PAGE_CONTENT_MODEL,
                         pid=pid) \
                  .sort_by('-timestamp').field_limit('timestamp')

    # if user is logged in, page should show as modified
    # when annotations have changed
    latest_note = None
    if request.user.is_authenticated():
        # last update for annotations on this volume, if any
        repo = Repository()
        page = repo.get_object(pid, type=Page)
        latest_note = page.annotations().visible_to(request.user) \
                          .last_updated_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Beispiel #16
0
def page_modified(request, vol_pid, pid):
    'last modification time for a single page'
    solr = solr_interface()
    # TODO: use volume pid in query
    results = solr.query(content_model=PageV1_0.PAGE_CONTENT_MODEL,
                         pid=pid) \
                  .sort_by('-timestamp').field_limit('timestamp')

    # if user is logged in, page should show as modified
    # when annotations have changed
    latest_note = None
    if request.user.is_authenticated():
        # last update for annotations on this volume, if any
        repo = Repository()
        page = repo.get_object(pid, type=Page)
        latest_note = page.annotations().visible_to(request.user) \
                          .last_updated_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Beispiel #17
0
    def get_context_data(self, **kwargs):
        solr = solr_interface()
        q = solr.query(content_model=Volume.VOLUME_CMODEL_PATTERN) \
                .facet_by('collection_id', sort='count', mincount=1) \
                .paginate(rows=0)
        facets = q.execute().facet_counts.facet_fields
        # convert into dictionary for access by pid
        collection_counts = dict([(pid, total) for pid, total in facets['collection_id']])
        # generate a list of tuple of solr result, volume count,
        # filtering out any collections with no items
        collections = [(r, collection_counts.get(r['pid'])) for r in self.object_list
                      if r['pid'] in collection_counts]

        # generate a random list of 4 covers for use in twitter gallery card
        # - restrict to collections with cover images
        covers = [coll.cover for coll, count in collections if coll.cover]
        # - randomize the list in place so we can grab the first N
        shuffle(covers)

        return {'collections': collections, 'mode': self.display_mode,
                'meta_covers': covers[:4]}
Beispiel #18
0
def volume_pages_modified(request, pid):
    '''Last modification time for a single volume or its pages, or for
    any annotations of those pages.'''
    solr = solr_interface()
    repo = Repository()
    vol = repo.get_object(pid, type=Volume)

    # NOTE: some overlap with Volume find_solr_pages method...
    results = solr.query((solr.Q(content_model=Volume.VOLUME_CMODEL_PATTERN) & solr.Q(pid=pid)) | \
                         (solr.Q(content_model=Page.PAGE_CMODEL_PATTERN) & solr.Q(isConstituentOf=vol.uri))) \
                  .sort_by('-timestamp').field_limit('timestamp')

    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified,
    # and index timestamp for a volume will be updated when pages are added

    # Page could also be modified based on annotations of the pages.
    # We only show total counts per page, so might not be modified if the
    # total number has not changed, but simplest just to get last modification
    # date in case of changes.
    # Note that this does NOT account for annotation deletions.

    # if a user is logged in, page should show as modified
    # based on annotations
    # Only displaying annotation *count* so creation time should
    # be sufficient. (Does not take into account deletions...)
    latest_note = None
    if request.user.is_authenticated():
        # get annotations for pages in this volume
        try:
            latest_note = vol.annotations().visible_to(request.user) \
                             .last_created_time()
        except Annotation.DoesNotExist:
            # no notes for this volume
            pass

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Beispiel #19
0
def volume_pages_modified(request, pid):
    '''Last modification time for a single volume or its pages, or for
    any annotations of those pages.'''
    solr = solr_interface()
    repo = Repository()
    vol = repo.get_object(pid, type=Volume)

    # NOTE: some overlap with Volume find_solr_pages method...
    results = solr.query((solr.Q(content_model=Volume.VOLUME_CMODEL_PATTERN) & solr.Q(pid=pid)) | \
                         (solr.Q(content_model=Page.PAGE_CMODEL_PATTERN) & solr.Q(isConstituentOf=vol.uri))) \
                  .sort_by('-timestamp').field_limit('timestamp')

    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified,
    # and index timestamp for a volume will be updated when pages are added

    # Page could also be modified based on annotations of the pages.
    # We only show total counts per page, so might not be modified if the
    # total number has not changed, but simplest just to get last modification
    # date in case of changes.
    # Note that this does NOT account for annotation deletions.

    # if a user is logged in, page should show as modified
    # based on annotations
    # Only displaying annotation *count* so creation time should
    # be sufficient. (Does not take into account deletions...)
    latest_note = None
    if request.user.is_authenticated():
        # get annotations for pages in this volume
        try:
            latest_note = vol.annotations().visible_to(request.user) \
                             .last_created_time()
        except Annotation.DoesNotExist:
            # no notes for this volume
            pass

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Beispiel #20
0
def collection_modified(request, pid, **kwargs):
    '''last modification time for single collection view.
     Includes collection information and volumes in the collection,
     so should be considered modified if any of those objects change.
     Does *not* take into account changes in collection image via Django admin.
     '''
    solr = solr_interface()
    results = solr.query(solr.Q(pid=pid) | \
                         solr.Q(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL,
                                collection_id=pid)) \
                  .sort_by('-timestamp').field_limit('timestamp')

    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified

    # if user is logged in, annotations modifications can result in
    # changes to the collection page display (annotation count)
    latest_note = None
    if request.user.is_authenticated():
        latest_note = Annotation.objects.visible_to(request.user) \
                                .last_created_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Beispiel #21
0
def collection_modified(request, pid, **kwargs):
    '''last modification time for single collection view.
     Includes collection information and volumes in the collection,
     so should be considered modified if any of those objects change.
     Does *not* take into account changes in collection image via Django admin.
     '''
    solr = solr_interface()
    results = solr.query(solr.Q(pid=pid) | \
                         solr.Q(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL,
                                collection_id=pid)) \
                  .sort_by('-timestamp').field_limit('timestamp')

    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified

    # if user is logged in, annotations modifications can result in
    # changes to the collection page display (annotation count)
    latest_note = None
    if request.user.is_authenticated():
        latest_note = Annotation.objects.visible_to(request.user) \
                                .last_created_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Beispiel #22
0
    def get_queryset(self):
        self.form = BookSearch(self.request.GET)

        # sort: currently supports relevance, title, or date added
        self.sort = self.request.GET.get('sort', None)

        if self.form.is_valid():
            # get list of keywords and phrases
            terms = self.form.search_terms()
            solr = solr_interface()
            # generate queries text and boost-field queries
            text_query = solr.Q()
            author_query = solr.Q()
            title_query = solr.Q()
            for t in terms:
                text_query |= solr.Q(t)
                author_query |= solr.Q(creator=t)
                title_query |= solr.Q(title=t)

            q = solr.query().filter(content_model=Volume.VOLUME_CMODEL_PATTERN) \
                    .query(text_query | author_query**3 | title_query**3) \
                    .field_limit(SolrVolume.necessary_fields, score=True)  \
                    .results_as(SolrVolume)

            if self.sort not in self.sort_options:
                # by default, sort by relevance score
                self.sort = 'relevance'
            if self.sort == 'relevance':
                q = q.sort_by('-score')
            elif self.sort == 'title':
                # sort by title and then by label so multi-volume works should group
                # together in the correct order
                q = q.sort_by('title_exact').sort_by('label')
            elif self.sort == 'date added':
                q = q.sort_by('-created')

            url_params = self.request.GET.copy()

            # don't need to facet on collection if we are already filtered on collection
            if 'collection' not in self.request.GET:
                q = q.facet_by('collection_label_facet',
                               sort='index',
                               mincount=1)

            self.display_filters = []
            if 'collection' in self.request.GET:
                filter_val = self.request.GET['collection']
                # filter the solr query based on the requested collection
                q = q.query(collection_label='"%s"' % filter_val)
                # generate link to remove the facet
                unfacet_urlopts = url_params.copy()
                del unfacet_urlopts['collection']
                self.display_filters.append(
                    ('collection', filter_val, unfacet_urlopts.urlencode()))

            # active filter - only show volumes with pages loaded
            if 'read_online' in self.request.GET and self.request.GET[
                    'read_online']:
                q = q.query(page_count__gte=2)
                unfacet_urlopts = url_params.copy()
                del unfacet_urlopts['read_online']
                self.display_filters.append(
                    ('Read online', '', unfacet_urlopts.urlencode()))
            else:
                # generate a facet count for books with pages loaded
                q = q.facet_query(page_count__gte=2)

            return q

        else:
            # empty 'queryset' result required by view methods
            return []
Beispiel #23
0
 def items(self):
     solr = solr_interface()
     return solr.query(content_model=Collection.COLLECTION_CONTENT_MODEL) \
                .filter(owner='LSDI-project') \
                .sort_by('title_exact') \
                .field_limit(['pid', 'last_modified'])
Beispiel #24
0
    def get_context_data(self, **kwargs):
        context_data = super(VolumeDetail, self).get_context_data()
        # sort: currently supports title or date added

        self.form = BookSearch(self.request.GET)
        context_data['form'] = self.form
        # if form is valid, then search within the book and display matching pages
        # instead of volume info
        if self.form.is_valid():
            terms = self.form.search_terms()
            solr = solr_interface()
            query = solr.Q()
            for t in terms:
                # NOTE: should this be OR or AND?
                query |= solr.Q(page_text=t)
                if t.isnumeric():
                    query |= solr.Q(page_order=t)**2
                query |= solr.Q(identifier=t)**3
            # search for pages that belong to this book
            q = solr.query().filter(content_model=Page.PAGE_CMODEL_PATTERN,
                                    isConstituentOf=self.object.uri) \
                    .query(query) \
                    .field_limit(['page_order', 'pid', 'identifier'], score=True) \
                    .highlight('page_text', snippets=3) \
                    .sort_by('-score').sort_by('page_order') \
                    .results_as(SolrPage)

            # return highlighted snippets from page text
            # sort by relevance and then by page order

            # paginate the solr result set
            paginator = Paginator(q, 30)
            try:
                page = int(self.request.GET.get('page', '1'))
            except ValueError:
                page = 1
            try:
                results = paginator.page(page)
            except (EmptyPage, InvalidPage):
                results = paginator.page(paginator.num_pages)

            # NOTE: highlight snippets are available at
            # results.object_list.highlighting but are *NOT* currently
            # getting propagated to solrpage objects

            # url parameters for pagination
            url_params = self.request.GET.copy()
            if 'page' in url_params:
                del url_params['page']

            context_data.update({
                'pages': results,
                'url_params': urlencode(url_params),
                # provided for consistency with class-based view pagination
                'paginator': paginator,
                'page_obj': results
            })

        else:
            # if not searching the volume, get annotation count for display
            # - annotation is only possibly on books with pages loaded
            if self.object.has_pages:
                # uses same dictionary lookup form as for browse/search volume
                annotation_count = self.object.annotation_count(
                    self.request.user)
                if annotation_count != 0:
                    context_data['annotated_volumes'] = {
                        self.object.get_absolute_url(): annotation_count
                    }
                # enable annotation search if any annotations are present
                context_data['annotation_search_enabled'] = bool(
                    annotation_count)

        return context_data
Beispiel #25
0
 def items(self):
     solr = solr_interface()
     return solr.query(content_model=Collection.COLLECTION_CONTENT_MODEL) \
                .filter(owner='LSDI-project') \
                .sort_by('title_exact') \
                .field_limit(['pid', 'last_modified'])
Beispiel #26
0
    def handle(self, **options):
        dry_run = options.get('dry_run', False)
        collection_id = "emory-control:LSDI-EmoryYearbooks"
        year_threadhold = 1922
        solr = solr_interface()
        resp = solr.query(collection_id=collection_id).execute()
        page_interval = 10
        paginator = Paginator(solr.query(collection_id=collection_id), page_interval)

        # Announcements
        print "\n"
        print "###################### Important ######################"
        if dry_run: print "*********************** DRY RUN ***********************"
        print "environmental varaibles configured as follows"
        print "collection_id to match: {}".format(collection_id)
        print "year threshold: {} (not including {})".format(year_threadhold, year_threadhold)
        print "solr env: {}".format(settings.SOLR_SERVER_URL)
        print "#######################################################"
        print "\n"

        # When there are results returned
        if paginator.count > 0:
            summary = [] # store index to be purged

            # Print summary on top
            print "Records with collection_id {} found: {}, listing: ".format(collection_id, paginator.count)

            # Regex to match "_yyyy"
            regex = r"(\_\d{4})"

            # Counter of the currently processed pid
            current = 1

            # Iterate through search results
            for page in range(1, paginator.num_pages + 1):
                for i in range(0, len(paginator.page(page))):
                    if paginator.page(page)[i]:
                        result = paginator.page(page)[i]
                        output = "{}/{}: {}, title: {}, label: {}".format(\
                            current, paginator.count, result["pid"], result["title"], result["label"])

                        # Match "_yyyy", ask if to delete
                        if re.search(regex, result["label"]):
                            match = re.search(regex, result["label"])
                            year = int(match.group(0)[1:])
                            if year > year_threadhold:
                                # dry run - not remove item
                                if dry_run:
                                    output += " - matched with year {} and can be removed from solr index - dry run!".format(year)
                                else:
                                    # actually remove the record
                                    solr.delete(queries=solr.Q(pid=result["pid"]))
                                    solr.commit()
                                    output += " - matched with year {} and is removed from solr index".format(year)
                                record = {"pid": result["pid"], "title": result["title"], "label": result["label"], "year": year}
                                summary.append(record)
                        print output
                        current += 1 # increment for the next item

            # Print summary when there is one
            if len(summary) > 0:
                if dry_run:
                    print "Dry run summary (these will be removed):"
                else:
                    print "Index deletion summary:"

                for record in summary:
                    print record

        # When there is no matching result
        else:
            print "No matching condition found. Aborted."
Beispiel #27
0
 def items(self):
     solr = solr_interface()
     return solr.query(content_model=Page.PAGE_CMODEL_PATTERN) \
                .field_limit(['pid', 'last_modified',
                              'isConstituentOf'])
Beispiel #28
0
    def get_context_data(self, **kwargs):
        context_data = super(CollectionDetail, self).get_context_data()
        # sort: currently supports title or date added
        sort = self.request.GET.get('sort', None)

        if self.request.user.is_authenticated():
            notes = Volume.volume_annotation_count(self.request.user)
            domain = get_current_site(self.request).domain.rstrip('/')
            if not domain.startswith('http'):
                domain = 'http://' + domain
            annotated_volumes = dict([(k.replace(domain, ''), v)
                 for k, v in notes.iteritems()])
        else:
            annotated_volumes = {}

        # search for all books that are in this collection
        solr = solr_interface()
        q = solr.query(content_model=Volume.VOLUME_CMODEL_PATTERN,
                       collection_id=self.object.pid) \
                .results_as(SolrVolume)

        # url parameters for pagination and facet links
        url_params = self.request.GET.copy()

        # generate list for display and removal of active filters
        # NOTE: borrowed from books.view.search
        display_filters = []
        # active filter - only show volumes with pages loaded
        if 'read_online' in self.request.GET and self.request.GET['read_online']:
            q = q.query(page_count__gte=2)

            unfacet_urlopts = url_params.copy()
            del unfacet_urlopts['read_online']
            display_filters.append(('Read online', '',
                                    unfacet_urlopts.urlencode()))
        else:
            # generate a facet count for books with pages loaded
            q = q.facet_query(page_count__gte=2)


        sort_options = ['title', 'date added']
        if sort not in sort_options:
            # by default, sort by title
            sort = 'title'

        if sort == 'title':
            # sort by title and then by label so multi-volume works should group
            # together in the correct order
            q = q.sort_by('title_exact').sort_by('label')
        elif sort == 'date added':
            # sort by most recent creation date (newest additions first)
            q = q.sort_by('-created')

        # paginate the solr result set
        paginator = Paginator(q, 30)
        try:
            page = int(self.request.GET.get('page', '1'))
        except ValueError:
            page = 1
        try:
            results = paginator.page(page)
        except (EmptyPage, InvalidPage):
            results = paginator.page(paginator.num_pages)

        # facets for diplay
        facet_counts = results.object_list.facet_counts
        facets = {}
        if facet_counts.facet_queries:
            # number of volumes with pages loaded;
            # facet query is a list of tuple; second value is the count
            pages_loaded = facet_counts.facet_queries[0][1]
            if pages_loaded < q.count():
                facets['pages_loaded'] = facet_counts.facet_queries[0][1]

        # url parameters for pagination & sort links
        url_params = self.request.GET.copy()
        if 'page' in url_params:
            del url_params['page']
        sort_url_params = self.request.GET.copy()
        if 'sort' in sort_url_params:
            del sort_url_params['sort']

        context_data.update({
             'items': results.object_list,
             'mode': self.display_mode,
             'url_params': urlencode(url_params),
             'sort_url_params': urlencode(sort_url_params),
             'current_url_params': urlencode(self.request.GET.copy()),
             'sort': sort, 'sort_options': sort_options,
             'annotated_volumes': annotated_volumes,
             'facets': facets,  # available facets
             'filters': display_filters,  # active filters
             # for compatibility with class-based view pagination
             'paginator': paginator,
             'page_obj': results,

             })
        return context_data
Beispiel #29
0
 def pids_by_label(label):
     '''Search Books by label and return a list of matching pids.'''
     solr = solr_interface()
     q = solr.query(content_model=Book.BOOK_CONTENT_MODEL,
                    label=label).field_limit('pid')
     return [result['pid'] for result in q]
Beispiel #30
0
    def get_context_data(self, **kwargs):
        context_data = super(CollectionDetail, self).get_context_data()
        # sort: currently supports title or date added
        sort = self.request.GET.get('sort', None)

        if self.request.user.is_authenticated():
            notes = Volume.volume_annotation_count(self.request.user)
            domain = get_current_site(self.request).domain.rstrip('/')
            if not domain.startswith('http'):
                domain = 'http://' + domain
            annotated_volumes = dict([(k.replace(domain, ''), v)
                                      for k, v in notes.iteritems()])
        else:
            annotated_volumes = {}

        # search for all books that are in this collection
        solr = solr_interface()
        q = solr.query(content_model=Volume.VOLUME_CMODEL_PATTERN,
                       collection_id=self.object.pid) \
                .results_as(SolrVolume)

        # url parameters for pagination and facet links
        url_params = self.request.GET.copy()

        # generate list for display and removal of active filters
        # NOTE: borrowed from books.view.search
        display_filters = []
        # active filter - only show volumes with pages loaded
        if 'read_online' in self.request.GET and self.request.GET[
                'read_online']:
            q = q.query(page_count__gte=2)

            unfacet_urlopts = url_params.copy()
            del unfacet_urlopts['read_online']
            display_filters.append(
                ('Read online', '', unfacet_urlopts.urlencode()))
        else:
            # generate a facet count for books with pages loaded
            q = q.facet_query(page_count__gte=2)

        sort_options = ['title', 'date added']
        if sort not in sort_options:
            # by default, sort by title
            sort = 'title'

        if sort == 'title':
            # sort by title and then by label so multi-volume works should group
            # together in the correct order
            q = q.sort_by('title_exact').sort_by('label')
        elif sort == 'date added':
            # sort by most recent creation date (newest additions first)
            q = q.sort_by('-created')

        # paginate the solr result set
        paginator = Paginator(q, 30)
        try:
            page = int(self.request.GET.get('page', '1'))
        except ValueError:
            page = 1
        try:
            results = paginator.page(page)
        except (EmptyPage, InvalidPage):
            results = paginator.page(paginator.num_pages)

        # facets for diplay
        facet_counts = results.object_list.facet_counts
        facets = {}
        if facet_counts.facet_queries:
            # number of volumes with pages loaded;
            # facet query is a list of tuple; second value is the count
            pages_loaded = facet_counts.facet_queries[0][1]
            if pages_loaded < q.count():
                facets['pages_loaded'] = facet_counts.facet_queries[0][1]

        # url parameters for pagination & sort links
        url_params = self.request.GET.copy()
        if 'page' in url_params:
            del url_params['page']
        sort_url_params = self.request.GET.copy()
        if 'sort' in sort_url_params:
            del sort_url_params['sort']

        context_data.update({
            'items':
            results.object_list,
            'mode':
            self.display_mode,
            'url_params':
            urlencode(url_params),
            'sort_url_params':
            urlencode(sort_url_params),
            'current_url_params':
            urlencode(self.request.GET.copy()),
            'sort':
            sort,
            'sort_options':
            sort_options,
            'annotated_volumes':
            annotated_volumes,
            'facets':
            facets,  # available facets
            'filters':
            display_filters,  # active filters
            # for compatibility with class-based view pagination
            'paginator':
            paginator,
            'page_obj':
            results,
        })
        return context_data
Beispiel #31
0
    def get_queryset(self):
        self.form = BookSearch(self.request.GET)

        # sort: currently supports relevance, title, or date added
        self.sort = self.request.GET.get('sort', None)

        if self.form.is_valid():
            # get list of keywords and phrases
            terms = self.form.search_terms()
            solr = solr_interface()
            # generate queries text and boost-field queries
            text_query = solr.Q()
            author_query = solr.Q()
            title_query = solr.Q()
            for t in terms:
                text_query |= solr.Q(t)
                author_query |= solr.Q(creator=t)
                title_query |= solr.Q(title=t)

            q = solr.query().filter(content_model=Volume.VOLUME_CMODEL_PATTERN) \
                    .query(text_query | author_query**3 | title_query**3) \
                    .field_limit(SolrVolume.necessary_fields, score=True)  \
                    .results_as(SolrVolume)

            if self.sort not in self.sort_options:
                # by default, sort by relevance score
                self.sort = 'relevance'
            if self.sort == 'relevance':
                q = q.sort_by('-score')
            elif self.sort == 'title':
                # sort by title and then by label so multi-volume works should group
                # together in the correct order
                q = q.sort_by('title_exact').sort_by('label')
            elif self.sort == 'date added':
                q = q.sort_by('-created')

            url_params = self.request.GET.copy()

            # don't need to facet on collection if we are already filtered on collection
            if 'collection' not in self.request.GET:
                q = q.facet_by('collection_label_facet', sort='index', mincount=1)

            self.display_filters = []
            if 'collection' in self.request.GET:
                filter_val = self.request.GET['collection']
                # filter the solr query based on the requested collection
                q = q.query(collection_label='"%s"' % filter_val)
                # generate link to remove the facet
                unfacet_urlopts = url_params.copy()
                del unfacet_urlopts['collection']
                self.display_filters.append(('collection', filter_val,
                                        unfacet_urlopts.urlencode()))

            # active filter - only show volumes with pages loaded
            if 'read_online' in self.request.GET and self.request.GET['read_online']:
                q = q.query(page_count__gte=2)
                unfacet_urlopts = url_params.copy()
                del unfacet_urlopts['read_online']
                self.display_filters.append(('Read online', '',
                                        unfacet_urlopts.urlencode()))
            else:
                # generate a facet count for books with pages loaded
                q = q.facet_query(page_count__gte=2)

            return q

        else:
            # empty 'queryset' result required by view methods
            return []
Beispiel #32
0
    def get_context_data(self, **kwargs):
        context_data = super(VolumeDetail, self).get_context_data()
        # sort: currently supports title or date added

        self.form = BookSearch(self.request.GET)
        context_data['form'] = self.form
        # if form is valid, then search within the book and display matching pages
        # instead of volume info
        if self.form.is_valid():
            terms = self.form.search_terms()
            solr = solr_interface()
            query = solr.Q()
            for t in terms:
                # NOTE: should this be OR or AND?
                query |= solr.Q(page_text=t)
                if t.isnumeric():
                    query |= solr.Q(page_order=t)**2
                query |= solr.Q(identifier=t)**3
            # search for pages that belong to this book
            q = solr.query().filter(content_model=Page.PAGE_CMODEL_PATTERN,
                                    isConstituentOf=self.object.uri) \
                    .query(query) \
                    .field_limit(['page_order', 'pid', 'identifier'], score=True) \
                    .highlight('page_text', snippets=3) \
                    .sort_by('-score').sort_by('page_order') \
                    .results_as(SolrPage)

            # return highlighted snippets from page text
            # sort by relevance and then by page order


            # paginate the solr result set
            paginator = Paginator(q, 30)
            try:
                page = int(self.request.GET.get('page', '1'))
            except ValueError:
                page = 1
            try:
                results = paginator.page(page)
            except (EmptyPage, InvalidPage):
                results = paginator.page(paginator.num_pages)

            # NOTE: highlight snippets are available at
            # results.object_list.highlighting but are *NOT* currently
            # getting propagated to solrpage objects

            # url parameters for pagination
            url_params = self.request.GET.copy()
            if 'page' in url_params:
                del url_params['page']

            context_data.update({
                'pages': results,
                'url_params': urlencode(url_params),
                # provided for consistency with class-based view pagination
                'paginator': paginator,
                'page_obj': results
            })

        else:
            # if not searching the volume, get annotation count for display
            # - annotation is only possibly on books with pages loaded
            if self.object.has_pages:
                # uses same dictionary lookup form as for browse/search volume
                annotation_count = self.object.annotation_count(self.request.user)
                if annotation_count != 0:
                    context_data['annotated_volumes'] = {
                        self.object.get_absolute_url(): annotation_count
                    }
                # enable annotation search if any annotations are present
                context_data['annotation_search_enabled'] = bool(annotation_count)

        return context_data
Beispiel #33
0
 def items(self):
     solr = solr_interface()
     return solr.query(content_model=Volume.VOLUME_CMODEL_PATTERN) \
                .field_limit(['pid', 'last_modified'])