Beispiel #1
0
    def search_items(self, query,
                     fields=None,
                     sorts=None,
                     params=None,
                     request_kwargs=None):
        """Search for items on Archive.org.

        :type query: str
        :param query: The Archive.org search query to yield results for. Refer to
                      https://archive.org/advancedsearch.php#raw for help formatting your
                      query.

        :type fields: bool
        :param fields: (optional) The metadata fields to return in the search results.

        :type params: dict
        :param params: (optional) The URL parameters to send with each request sent to the
                       Archive.org Advancedsearch Api.

        :returns: A :class:`Search` object, yielding search results.
        """
        request_kwargs = {} if not request_kwargs else request_kwargs
        return Search(self, query,
                      fields=fields,
                      sorts=sorts,
                      params=params,
                      request_kwargs=request_kwargs)
Beispiel #2
0
    def search_items(self,
                     query,
                     fields=None,
                     sorts=None,
                     params=None,
                     full_text_search=None,
                     dsl_fts=None,
                     request_kwargs=None,
                     max_retries=None):
        """Search for items on Archive.org.

        :type query: str
        :param query: The Archive.org search query to yield results for. Refer to
                      https://archive.org/advancedsearch.php#raw for help formatting your
                      query.

        :type fields: bool
        :param fields: (optional) The metadata fields to return in the search results.

        :type params: dict
        :param params: (optional) The URL parameters to send with each request sent to the
                       Archive.org Advancedsearch Api.

        :type full_text_search: bool
        :param full_text_search: (optional) Beta support for querying the archive.org
                                 Full Text Search API [default: False].

        :type dsl_fts: bool
        :param dsl_fts: (optional) Beta support for querying the archive.org Full Text
                        Search API in dsl (i.e. do not prepend ``!L `` to the
                        ``full_text_search`` query [default: False].

        :returns: A :class:`Search` object, yielding search results.
        """
        request_kwargs = {} if not request_kwargs else request_kwargs
        return Search(self,
                      query,
                      fields=fields,
                      sorts=sorts,
                      params=params,
                      full_text_search=full_text_search,
                      dsl_fts=dsl_fts,
                      request_kwargs=request_kwargs,
                      max_retries=max_retries)
Beispiel #3
0
    def search_items(self,
                     query: str,
                     fields: Iterable[str] | None = None,
                     sorts: Iterable[str] | None = None,
                     params: Mapping | None = None,
                     full_text_search: bool = False,
                     dsl_fts: bool = False,
                     request_kwargs: Mapping | None = None,
                     max_retries: int | Retry | None = None) -> Search:
        """Search for items on Archive.org.

        :param query: The Archive.org search query to yield results for. Refer to
                      https://archive.org/advancedsearch.php#raw for help formatting your
                      query.

        :param fields: The metadata fields to return in the search results.

        :param params: The URL parameters to send with each request sent to the
                       Archive.org Advancedsearch Api.

        :param full_text_search: Beta support for querying the archive.org
                                 Full Text Search API [default: False].

        :param dsl_fts: Beta support for querying the archive.org Full Text
                        Search API in dsl (i.e. do not prepend ``!L `` to the
                        ``full_text_search`` query [default: False].

        :returns: A :class:`Search` object, yielding search results.
        """
        request_kwargs = request_kwargs or {}
        return Search(self,
                      query,
                      fields=fields,
                      sorts=sorts,
                      params=params,
                      full_text_search=full_text_search,
                      dsl_fts=dsl_fts,
                      request_kwargs=request_kwargs,
                      max_retries=max_retries)
def grab_key_values(key):
    s = ArchiveSession()
    search = Search(
        s,
        '(mediatype:audio OR mediatype:movies) AND (closed_captioning:yes OR format:SubRip OR format:"Web Video Text Tracks")',
        fields=[key])
    licenses = defaultdict(int)
    for result in tqdm(search):
        if key not in result:
            print(f"No {key} result:", result)
            continue
        if isinstance(result[key], str):
            licenses[result[key]] += 1
        elif isinstance(result[key], list):
            for licenseurl in result[key]:
                licenses[licenseurl] += 1
            else:
                raise ValueError(f"Unexpected type for {key}: ",
                                 type(result[key]), result)

    print(f"Counts for key={key}")
    print("\n".join(
        str(x) for x in sorted((v, k) for k, v in licenses.items())))
Beispiel #5
0
    except OSError:
        output('could not create kasette')

    return True

output('Tuning …')

reset_folder(FILE_DIR)

queries = []

for x in range(1,10):
    queries.append('%s %s' % (SEARCH_QUERY, random.choice(string.ascii_letters)))

for query in queries:
    search = Search(s, '(subject:%s OR title:%s AND mediatype:(audio) AND item_size:[0 TO "%s"])' % (SEARCH_QUERY, SEARCH_QUERY, MAX_MB_FILE))

    for result in search:
        item = get_item(result['identifier'])

        output('Found %s' % result['identifier'])

        files = item.files
        metadata = item.metadata

        for file in files:
            name = file['name'].lower()

            output('listening to %s' % name)

            if 'MP3' in file['format']: