Python download Examples, scopus.utils.download Python Examples

Example #1

0

Show file

File: scopus_author.py Project: alistairwalsh/scopus

    def get_coauthors(self):
        """Return list of coauthors, their scopus-id and research areas."""
        url = self.xml.find('coredata/link[@rel="coauthor-search"]').get(
            'href')
        xml = download(url=url).text.encode('utf-8')
        xml = ET.fromstring(xml)
        coauthors = []
        N = int(get_encoded_text(xml, 'opensearch:totalResults') or 0)

        AUTHOR = namedtuple('Author',
                            ['name', 'scopus_id', 'affiliation', 'categories'])

        count = 0
        while count < N:
            params = {'start': count, 'count': 25}
            xml = download(url=url, params=params).text.encode('utf-8')
            xml = ET.fromstring(xml)

            for entry in xml.findall('atom:entry', ns):

                given_name = get_encoded_text(
                    entry, 'atom:preferred-name/atom:given-name')
                surname = get_encoded_text(entry,
                                           'atom:preferred-name/atom:surname')

                coauthor_name = '{0} {1}'.format(given_name, surname)

                scopus_id = get_encoded_text(entry, 'dc:identifier').replace(
                    'AUTHOR_ID:', '')

                affiliation = get_encoded_text(
                    entry, 'atom:affiliation-current/atom:affiliation-name')

                # get categories for this author
                s = ', '.join([
                    '{0} ({1})'.format(subject.text,
                                       subject.attrib['frequency'])
                    for subject in entry.findall('atom:subject-area', ns)
                ])

                coauthors += [AUTHOR(coauthor_name, scopus_id, affiliation, s)]
            count += 25

        return coauthors

Example #2

0

Show file

 def get_coauthors(self):
     """Retrieves basic information about co-authors as a list of
     namedtuples in the form
     (surname, given_name, id, areas, affiliation_id, name, city, country),
     where areas is a list of subject area codes joined by "; ".
     Note: These information will not be cached and are slow for large
     coauthor groups.
     """
     # Get number of authors to search for
     res = download(url=self.coauthor_link, accept='json')
     data = loads(res.text)['search-results']
     N = int(data.get('opensearch:totalResults', 0))
     # Store information in namedtuples
     fields = 'surname given_name id areas affiliation_id name city country'
     coauth = namedtuple('Coauthor', fields)
     coauthors = []
     # Iterate over search results in chunks of 25 results
     count = 0
     while count < N:
         params = {'start': count, 'count': 25}
         res = download(url=self.coauthor_link,
                        params=params,
                        accept='json')
         data = loads(res.text)['search-results'].get('entry', [])
         # Extract information for each coauthor
         for entry in data:
             aff = entry.get('affiliation-current', {})
             try:
                 areas = [a['$'] for a in entry.get('subject-area', [])]
             except TypeError:  # Only one subject area given
                 areas = [entry['subject-area']['$']]
             new = coauth(
                 surname=entry['preferred-name']['surname'],
                 given_name=entry['preferred-name'].get('given-name'),
                 id=entry['dc:identifier'].split(':')[-1],
                 areas='; '.join(areas),
                 affiliation_id=aff.get('affiliation-id'),
                 name=aff.get('affiliation-name'),
                 city=aff.get('affiliation-city'),
                 country=aff.get('affiliation-country'))
             coauthors.append(new)
         count += 25
     return coauthors

Example #3

0

Show file

File: scopus_author.py Project: jkitchin/scopus

    def get_coauthors(self):
        """Return list of coauthors, their scopus-id and research areas."""
        url = self.xml.find('coredata/link[@rel="coauthor-search"]').get('href')
        xml = download(url=url).text.encode('utf-8')
        xml = ET.fromstring(xml)
        coauthors = []
        N = int(get_encoded_text(xml, 'opensearch:totalResults') or 0)

        AUTHOR = namedtuple('Author',
                            ['name', 'scopus_id', 'affiliation', 'categories'])

        count = 0
        while count < N:
            params = {'start': count, 'count': 25}
            xml = download(url=url, params=params).text.encode('utf-8')
            xml = ET.fromstring(xml)

            for entry in xml.findall('atom:entry', ns):

                given_name = get_encoded_text(entry,
                    'atom:preferred-name/atom:given-name')
                surname = get_encoded_text(entry,
                    'atom:preferred-name/atom:surname')
                coauthor_name = u'{0} {1}'.format(given_name, surname)

                scopus_id = get_encoded_text(entry,
                    'dc:identifier').replace('AUTHOR_ID:', '')

                affiliation = get_encoded_text(entry,
                    'atom:affiliation-current/atom:affiliation-name')

                # get categories for this author
                s = u', '.join(['{0} ({1})'.format(subject.text,
                                                   subject.attrib['frequency'])
                               for subject in
                               entry.findall('atom:subject-area', ns)])

                coauthors += [AUTHOR(coauthor_name, scopus_id, affiliation, s)]
            count += 25

        return coauthors

Example #4

0

Show file

File: author_retrieval.py Project: jkitchin/scopus

 def get_coauthors(self):
     """Retrieves basic information about co-authors as a list of
     namedtuples in the form
     (surname, given_name, id, areas, affiliation_id, name, city, country),
     where areas is a list of subject area codes joined by "; ".
     Note: These information will not be cached and are slow for large
     coauthor groups.
     """
     # Get number of authors to search for
     res = download(url=self.coauthor_link, accept='json')
     data = loads(res.text)['search-results']
     N = int(data.get('opensearch:totalResults', 0))
     # Store information in namedtuples
     fields = 'surname given_name id areas affiliation_id name city country'
     coauth = namedtuple('Coauthor', fields)
     coauthors = []
     # Iterate over search results in chunks of 25 results
     count = 0
     while count < N:
         params = {'start': count, 'count': 25}
         res = download(url=self.coauthor_link, params=params, accept='json')
         data = loads(res.text)['search-results'].get('entry', [])
         # Extract information for each coauthor
         for entry in data:
             aff = entry.get('affiliation-current', {})
             try:
                 areas = [a['$'] for a in entry.get('subject-area', [])]
             except TypeError:  # Only one subject area given
                 areas = [entry['subject-area']['$']]
             new = coauth(surname=entry['preferred-name']['surname'],
                 given_name=entry['preferred-name'].get('given-name'),
                 id=entry['dc:identifier'].split(':')[-1],
                 areas='; '.join(areas), name=aff.get('affiliation-name'),
                 affiliation_id=aff.get('affiliation-id'),
                 city=aff.get('affiliation-city'),
                 country=aff.get('affiliation-country'))
             coauthors.append(new)
         count += 25
     return coauthors

Example #5

0

Show file

File: search.py Project: jkitchin/scopus

def _parse(res, params, n, api, **kwds):
    """Auxiliary function to download results and parse json."""
    cursor = "cursor" in params
    if not cursor:
        start = params["start"]
    if n == 0:
        return ""
    _json = res.get('search-results', {}).get('entry', [])
    # Download the remaining information in chunks
    while n > 0:
        n -= params["count"]
        if cursor:
            pointer = res['search-results']['cursor'].get('@next')
            params.update({'cursor': pointer})
        else:
            start += params["count"]
            params.update({'start': start})
        res = download(url=URL[api], params=params, accept="json", **kwds).json()
        _json.extend(res.get('search-results', {}).get('entry', []))
    return _json

Example #6

0

Show file

    def __init__(self,
                 query,
                 api,
                 refresh,
                 count=200,
                 start=0,
                 max_entries=5000,
                 view='STANDARD',
                 cursor=False,
                 **kwds):
        """Class intended as superclass to perform a search query.

        Parameters
        ----------
        query : str
            A string of the query.

        api : str
            The name of the Scopus API to be accessed.  Allowed values:
            AffiliationSearch, AuthorSearch, ScopusSearch.

        refresh : bool
            Whether to refresh the cached file if it exists or not.

        count : int (optional, default=200)
            The number of entries to be displayed at once.  A smaller number
            means more queries with each query having less results.

        start : int (optional, default=0)
            DEPRECATED! The entry number of the first search item
            to start with.

        max_entries : int (optional, default=5000)
            Raise error when the number of results is beyond this number.
            To skip this check, set `max_entries` to `None`.

        view : str (optional, default=STANDARD)
            The view of the file that should be downloaded.  Will not take
            effect for already cached files.

        cursor : str (optional, default=False)
            Whether to use the cursor in order to iterate over all search
            results without limit on the number of the results.  In contrast
            to `start` parameter, the `cursor` parameter does not allow users
            to obtain partial results.

        kwds : key-value parings, optional
            Keywords passed on to requests header.  Must contain fields
            and values specified in the respective API specification.

        Raises
        ------
        ScopusQueryError
            If the number of search results exceeds max_entries.

        ValueError
            If the api parameteris an invalid entry.
        """
        # Checks
        if api not in URL:
            raise ValueError('api parameter must be one of ' +
                             ', '.join(URL.keys()))
        if not config.has_section('Directories'):
            create_config()
        if start != 0:
            text = "Parameter start is deprecated and will be removed "\
                   "in scopus 1.6."
            warn(text, UserWarning)

        # Read the file contents if file exists and we are not refreshing,
        # otherwise download query anew and cache file
        qfile = join(config.get('Directories', api),
                     md5(query.encode('utf8')).hexdigest())
        if not refresh and exists(qfile):
            with open(qfile, "rb") as f:
                self._json = [loads(line) for line in f.readlines()]
        else:
            # Get a count of how many things to retrieve from first chunk
            params = {'query': query, 'count': count, 'view': view}
            if cursor:
                params.update({'cursor': '*'})
            else:
                params.update({'start': 0})
            res = download(url=URL[api], params=params, accept="json",
                           **kwds).json()
            n = int(res['search-results'].get('opensearch:totalResults', 0))
            if not cursor and n > max_entries:  # Stop if there are too many results
                text = ('Found {} matches. Set max_entries to a higher '
                        'number, change your query ({}) or set '
                        'subscription=True'.format(n, query))
                raise ScopusQueryError(text)
            self._json = res.get('search-results', {}).get('entry', [])
            if n == 0:
                self._json = ""
            # Download the remaining information in chunks
            while n > 0:
                n -= count
                params.update({'count': count})
                if cursor:
                    pointer = res['search-results']['cursor'].get('@next')
                    params.update({'cursor': pointer})
                else:
                    start += count
                    params.update({'start': start})
                res = download(url=URL[api],
                               params=params,
                               accept="json",
                               **kwds).json()
                self._json.extend(
                    res.get('search-results', {}).get('entry', []))
            # Finally write out the file
            with open(qfile, 'wb') as f:
                for item in self._json:
                    f.write('{}\n'.format(dumps(item)).encode('utf-8'))

Example #7

0

Show file

    def __init__(self, query, api, refresh, count=200, start=0,
                 max_entries=5000, view='STANDARD'):
        """Class intended as superclass to perform a search query.

        Parameters
        ----------
        query : str
            A string of the query.

        api : str
            The name of the Scopus API to be accessed.  Allowed values:
            AffiliationSearch, AuthorSearch, ScopusSearch.

        refresh : bool
            Whether to refresh the cached file if it exists or not.

        count : int (optional, default=200)
            The number of entries to be displayed at once.  A smaller number
            means more queries with each query having less results.

        start : int (optional, default=0)
            The entry number of the first search item to start with.

        max_entries : int (optional, default=5000)
            Raise error when the number of results is beyond this number.
            The Scopus Search Engine does not allow more than 5000 entries.

        view : str (optional, default=STANDARD)
            The view of the file that should be downloaded.  Will not take
            effect for already cached files.  Allowed values: STANDARD,
            COMPLETE.
            Note: Only the ScopusSearch API additionally uses view COMPLETE.

        Raises
        ------
        ScopusQueryError
            If the number of search results exceeds max_entries.

        ValueError
            If the api parameter or view parameter is an invalid entry.
        """
        # Checks
        if api not in URL:
            raise ValueError('api parameter must be one of ' +
                             ', '.join(URL.keys()))
        allowed_views = ('STANDARD', 'COMPLETE')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))
        if not config.has_section('Directories'):
            create_config()

        # Read the file contents if file exists and we are not refreshing,
        # otherwise download query anew and cache file
        qfile = join(config.get('Directories', api),
                     md5(query.encode('utf8')).hexdigest())
        if not refresh and exists(qfile):
            with open(qfile, "rb") as f:
                self._json = [loads(line) for line in f.readlines()]
        else:
            # Get a count of how many things to retrieve from first chunk
            params = {'query': query, 'count': count, 'start': 0, 'view': view}
            res = download(url=URL[api], params=params, accept="json").json()
            n = int(res['search-results'].get('opensearch:totalResults', 0))
            if n > max_entries:  # Stop if there are too many results
                text = ('Found {} matches. Set max_entries to a higher '
                        'number or change your query ({})'.format(n, query))
                raise ScopusQueryError(text)
            self._json = res.get('search-results', {}).get('entry', [])
            if n == 0:
                self._json = ""
            # Download the remaining information in chunks
            while n > 0:
                n -= count
                start += count
                params.update({'count': count, 'start': start})
                res = download(url=URL[api], params=params, accept="json").json()
                self._json.extend(res.get('search-results', {}).get('entry', []))
            # Finally write out the file
            with open(qfile, 'wb') as f:
                for item in self._json:
                    f.write('{}\n'.format(dumps(item)).encode('utf-8'))

Example #8

0

Show file

    def __init__(self,
                 query,
                 fields='eid',
                 count=200,
                 start=0,
                 max_entries=5000,
                 refresh=False):
        """Class to search a query, and retrieve a list of EIDs as results.

        Parameters
        ----------
        query : str
            A string of the query.

        fields : str (optional, default='eid')
            The fields you want returned.  Allowed fields are specified in
            https://dev.elsevier.com/guides/ScopusSearchViews.htm.  Since
            currently only EIDs are stored, this parameter is being kept
            for later use only.

        count : int (optional, default=200)
            The number of entries to be displayed at once.  A smaller number
            means more queries with each query having less results.

        start : int (optional, default=0)
            The entry number of the first search item to start with.

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        max_entries : int (optional, default=5000)
            Raise error when the number of results is beyond this number.
            The Scopus Search Engine does not allow more than 5000 entries.

        Raises
        ------
        Exception
            If the number of search results exceeds max_entries.

        Notes
        -----
        XML results are cached in ~/.scopus/search/{query}.

        The EIDs are stored as a property named EIDS.
        """

        qfile = os.path.join(
            SCOPUS_SEARCH_DIR,
            # We need to remove / in a DOI here so we can save
            # it as a file.
            query.replace('/', '_slash_'))

        if os.path.exists(qfile) and not refresh:
            with open(qfile) as f:
                self._EIDS = [
                    eid for eid in f.read().strip().split('\n') if eid
                ]
        else:
            # No cached file exists, or we are refreshing.
            # First, we get a count of how many things to retrieve
            url = 'https://api.elsevier.com/content/search/scopus'
            params = {
                'query': query,
                'field': fields,
                'count': 0,
                'start': 0,
                'date': '2017-2018'
            }
            xml = download(url=url, params=params).text.encode('utf-8')
            results = ET.fromstring(xml)

            N = results.find('opensearch:totalResults', ns)
            try:
                N = int(N.text)
            except:
                N = 0

            if N > max_entries:
                raise Exception(('N = {}. '
                                 'Set max_entries to a higher number or '
                                 'change your query ({})').format(N, query))

            self._EIDS = []
            while N > 0:
                params = {
                    'query': query,
                    'fields': fields,
                    'count': count,
                    'start': start,
                    'date': '2017-2018'
                }
                resp = download(url=url, params=params, accept="json")
                results = resp.json()

                if 'entry' in results.get('search-results', []):
                    self._EIDS += [
                        str(r['eid'])
                        for r in results['search-results']['entry']
                    ]
                start += count
                N -= count

            with open(qfile, 'wb') as f:
                for eid in self.EIDS:
                    f.write('{}\n'.format(eid).encode('utf-8'))

Example #9

0

Show file

    def __init__(self,
                 query,
                 fields='eid',
                 count=200,
                 start=0,
                 refresh=False,
                 max_entries=1000):
        """Class to search a query, and retrieve a list of EIDs as results.

        Parameters
        ----------
        query : str
            A string of the query.

        fields : str (optional, default='eid')
            The list of fields you want returned.

        count : int (optional, default=200)
            The number of entries to be displayed.

        start : int (optional, default=0)
            The entry number of the first search item to start with.

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        Notes
        -----
        XML results are cached in ~/.scopus/search/{query}.

        The EIDs are stored as a property.
        """

        self.query = query

        qfile = os.path.join(
            SCOPUS_SEARCH_DIR,
            # We need to remove / in a DOI here so we can save
            # it as a file.
            query.replace('/', '_slash_'))

        if os.path.exists(qfile) and not refresh:
            with open(qfile) as f:
                self._EIDS = [
                    eid for eid in f.read().strip().split('\n') if eid
                ]
        else:
            # No cached file exists, or we are refreshing.
            # First, we get a count of how many things to retrieve
            url = 'http://api.elsevier.com/content/search/scopus'
            params = {'query': query, 'field': fields, 'count': 0, 'start': 0}
            xml = download(url=url, params=params).text.encode('utf-8')
            results = ET.fromstring(xml)

            N = results.find('opensearch:totalResults', ns)
            try:
                N = int(N.text)
            except:
                N = 0

            if N > max_entries:
                raise Exception(('N = {}. '
                                 'Set max_entries to a higher number or '
                                 'change your query ({})').format(N, query))

            self._EIDS = []
            while N > 0:
                params = {
                    'query': query,
                    'fields': fields,
                    'count': count,
                    'start': start
                }
                resp = download(url=url, params=params, accept="json")
                results = resp.json()

                if 'entry' in results.get('search-results', []):
                    self._EIDS += [
                        str(r['eid'])
                        for r in results['search-results']['entry']
                    ]
                start += count
                N -= count

            with open(qfile, 'wb') as f:
                for eid in self.EIDS:
                    f.write('{}\n'.format(eid).encode('utf-8'))

Example #10

0

Show file

File: search.py Project: mbrcic/scopus

    def __init__(self,
                 query,
                 filepath,
                 url,
                 refresh,
                 count=200,
                 start=0,
                 max_entries=5000,
                 view='STANDARD'):
        """Class intended as superclass to perform a search query.

        Parameters
        ----------
        query : str
            A string of the query.

        filepath : str
            The complete filepath and -name of the cached file.

        url : str
            The API access point.

        refresh : bool
            Whether to refresh the cached file if it exists or not.

        count : int (optional, default=200)
            The number of entries to be displayed at once.  A smaller number
            means more queries with each query having less results.

        start : int (optional, default=0)
            The entry number of the first search item to start with.

        max_entries : int (optional, default=5000)
            Raise error when the number of results is beyond this number.
            The Scopus Search Engine does not allow more than 5000 entries.

        view : str (optional, default=STANDARD)
            The view of the file that should be downloaded.  Will not take
            effect for already cached files.  Allowed values: STANDARD,
            COMPLETE.
            Note: Only the Scopus search API additionally uses view COMPLETE.

        Raises
        ------
        Exception
            If the number of search results exceeds max_entries.

        ValueError
            If the view parameters contains invalid entries.
        """
        allowed_views = ('STANDARD', 'COMPLETE')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))
        # Read the file contents if it exists and we are not refreshing
        if not refresh and exists(filepath):
            self._json = []
            with open(filepath) as f:
                for r in f.readlines():
                    self._json.append(loads(r))
        # Download file if cached file doesn't exists or we are refreshing
        else:
            # First, get a count of how many things to retrieve
            params = {'query': query, 'count': 0, 'start': 0, 'view': view}
            res = get_content(filepath,
                              url=url,
                              refresh=refresh,
                              params=params,
                              accept='json')
            data = loads(res.decode('utf-8'))['search-results']
            N = int(data.get('opensearch:totalResults', 0))
            if N > max_entries:
                raise Exception(('Found {} matches. '
                                 'Set max_entries to a higher number or '
                                 'change your query ({})').format(N, query))

            # Then download the information in chunks
            self._json = []
            while N > 0:
                params.update({'count': count, 'start': start})
                res = download(url=url, params=params, accept="json")
                results = res.json()

                if 'entry' in results.get('search-results', []):
                    for r in results['search-results']['entry']:
                        self._json.append({f: r[f] for f in r.keys()})
                start += count
                N -= count

            # Finally write out the file
            with open(filepath, 'wb') as f:
                for item in self._json:
                    f.write('{}\n'.format(dumps(item)).encode('utf-8'))

Example #11

0

Show file

File: search.py Project: jkitchin/scopus

    def __init__(self, query, api, refresh, count=200, start=0,
                 max_entries=5000, view='STANDARD', cursor=False,
                 download_results=True, **kwds):
        """Class intended as superclass to perform a search query.

        Parameters
        ----------
        query : str
            A string of the query.

        api : str
            The name of the Scopus API to be accessed.  Allowed values:
            AffiliationSearch, AuthorSearch, ScopusSearch.

        refresh : bool
            Whether to refresh the cached file if it exists or not.

        count : int (optional, default=200)
            The number of entries to be displayed at once.  A smaller number
            means more queries with each query having less results.

        start : int (optional, default=0)
            DEPRECATED! The entry number of the first search item
            to start with.

        max_entries : int (optional, default=5000)
            Raise error when the number of results is beyond this number.
            To skip this check, set `max_entries` to `None`.

        view : str (optional, default=STANDARD)
            The view of the file that should be downloaded.  Will not take
            effect for already cached files.

        cursor : str (optional, default=False)
            Whether to use the cursor in order to iterate over all search
            results without limit on the number of the results.  In contrast
            to `start` parameter, the `cursor` parameter does not allow users
            to obtain partial results.

        download_results : bool (optional, default=True)
            Whether to download results (if they have not been cached) or not.

        kwds : key-value parings, optional
            Keywords passed on to requests header.  Must contain fields
            and values specified in the respective API specification.

        Raises
        ------
        ScopusQueryError
            If the number of search results exceeds max_entries.

        ValueError
            If the api parameteris an invalid entry.
        """
        # Checks
        if api not in URL:
            raise ValueError('api parameter must be one of ' +
                             ', '.join(URL.keys()))
        if not config.has_section('Directories'):
            create_config()
        if start != 0:
            text = "Parameter start is deprecated and will be removed "\
                   "in scopus 1.6."
            warn(text, UserWarning)

        # Read the file contents if file exists and we are not refreshing,
        # otherwise download query anew and cache file
        qfile = join(config.get('Directories', api),
                     md5(query.encode('utf8')).hexdigest())
        if not refresh and exists(qfile):
            with open(qfile, "rb") as f:
                self._json = [loads(line) for line in f.readlines()]
            self._n = n = len(self._json)
        else:
            # Set query parameters
            params = {'query': query, 'count': count, 'view': view}
            if cursor:
                params.update({'cursor': '*'})
            else:
                params.update({'start': 0})
            # Download results
            res = download(url=URL[api], params=params, accept="json", **kwds).json()
            n = int(res['search-results'].get('opensearch:totalResults', 0))
            self._n = n
            if not cursor and n > max_entries:  # Stop if there are too many results
                text = ('Found {} matches. Set max_entries to a higher '
                        'number, change your query ({}) or set '
                        'subscription=True'.format(n, query))
                raise ScopusQueryError(text)
            if download_results:
                self._json = _parse(res, params, n, api, **kwds)
                # Finally write out the file
                with open(qfile, 'wb') as f:
                    for item in self._json:
                        f.write('{}\n'.format(dumps(item)).encode('utf-8'))

Example #12

0

Show file

    def __init__(self,
                 query,
                 filepath,
                 url,
                 refresh,
                 count=200,
                 start=0,
                 max_entries=5000):
        """Class intended for use a superclass to perform a search query.

        Parameters
        ----------
        query : str
            A string of the query.

        filepath : str
            The complete filepath and -name of the cached file.

        url : str
            The API access point.

        refresh : bool
            Whether to refresh the cached file if it exists or not.

        count : int (optional, default=200)
            The number of entries to be displayed at once.  A smaller number
            means more queries with each query having less results.

        start : int (optional, default=0)
            The entry number of the first search item to start with.


        max_entries : int (optional, default=5000)
            Raise error when the number of results is beyond this number.
            The Scopus Search Engine does not allow more than 5000 entries.

        Raises
        ------
        Exception
            If the number of search results exceeds max_entries.
        """
        # Read the file contents if it exists and we are not refreshing.
        if not refresh and exists(filepath):
            self._json = []
            with open(filepath) as f:
                for r in f.readlines():
                    self._json.append(loads(r))
        # If cached file doesn't exists, or we are refreshing, download file.
        else:
            # First, we get a count of how many things to retrieve.
            params = {'query': query, 'count': 0, 'start': 0}
            res = get_content(filepath,
                              url=url,
                              refresh=refresh,
                              params=params,
                              accept='json')
            data = loads(res.decode('utf-8'))['search-results']
            N = int(data.get('opensearch:totalResults', 0))
            if N > max_entries:
                raise Exception(('Found {} matches. '
                                 'Set max_entries to a higher number or '
                                 'change your query ({})').format(N, query))

            # Then we download the information in chunks.
            self._json = []
            while N > 0:
                params = {'query': query, 'count': count, 'start': start}
                resp = download(url=url, params=params, accept="json")
                results = resp.json()

                if 'entry' in results.get('search-results', []):
                    for r in results['search-results']['entry']:
                        self._json.append({f: r[f] for f in r.keys()})
                start += count
                N -= count

            # Finally write out the file.
            with open(filepath, 'wb') as f:
                for author in self._json:
                    f.write('{}\n'.format(dumps(author)).encode('utf-8'))