def check_for_new_items(username,password,collection,collections_db): """username->(String) IA username password->(String) IA password collection->(String) IA identifier for the collection to watch collections_db->(String) file path of plain text database of collections to ignore. ident_1 ident_d ident_3 returns->(list) list of identifiers of new items in collection Checks if there is a new group of scans in the collection from a list in a text file""" configure(username,password) # Configure log in information for IA downloaded_collections = [] with open(collections_db) as f: for line in f: downloaded_collections.append(line.rstrip("\n")) new_collections = [] for book in get_item(collection).contents(): if(book.identifier not in downloaded_collections): new_collections.append(book.identifier) return new_collections
def main(argv, session): args = docopt(__doc__, argv=argv) if args['--print-cookies']: user = session.config.get('cookies', {}).get('logged-in-user') sig = session.config.get('cookies', {}).get('logged-in-sig') if not user or not sig: if not user and not sig: print( 'error: "logged-in-user" and "logged-in-sig" cookies ' 'not found in config file, try reconfiguring.', file=sys.stderr) elif not user: print( 'error: "logged-in-user" cookie not found in config file, ' 'try reconfiguring.', file=sys.stderr) elif not sig: print( 'error: "logged-in-sig" cookie not found in config file, ' 'try reconfiguring.', file=sys.stderr) sys.exit(1) print('logged-in-user={}; logged-in-sig={}'.format(user, sig)) sys.exit() try: # CLI params. if args['--username'] and args['--password']: config_file_path = configure(args['--username'], args['--password'], config_file=session.config_file, host=session.host) print('Config saved to: {0}'.format(config_file_path)) # Netrc elif args['--netrc']: print("Configuring 'ia' with netrc file...") try: n = netrc.netrc() except netrc.NetrcParseError as exc: print('error: netrc.netrc() cannot parse your .netrc file.') sys.exit(1) username, _, password = n.hosts['archive.org'] config_file_path = configure(username, password, config_file=session.config_file, host=session.host) print('Config saved to: {0}'.format(config_file_path)) # Interactive input. else: print( "Enter your Archive.org credentials below to configure 'ia'.\n" ) config_file_path = configure(config_file=session.config_file, host=session.host) print('\nConfig saved to: {0}'.format(config_file_path)) except AuthenticationError as exc: print('\nerror: {0}'.format(str(exc))) sys.exit(1)
def check_for_new_items(username, password, collection, collections_db): """username->(String) IA username password->(String) IA password collection->(String) IA identifier for the collection to watch collections_db->(String) file path of plain text database of collections to ignore. ident_1 ident_d ident_3 returns->(list) list of identifiers of new items in collection Checks if there is a new group of scans in the collection from a list in a text file""" configure(username, password) # Configure log in information for IA downloaded_collections = [] with open(collections_db) as f: for line in f: downloaded_collections.append(line.rstrip("\n")) new_collections = [] for book in get_item(collection).contents(): if (book.identifier not in downloaded_collections): new_collections.append(book.identifier) return new_collections
def main(argv, session): args = docopt(__doc__, argv=argv) try: # CLI params. if args['--username'] and args['--password']: config_file_path = configure(args['--username'], args['--password'], session.config_file) print('Config saved to: {0}'.format(config_file_path)) # Netrc elif args['--netrc']: print("Configuring 'ia' with netrc file...") try: n = netrc.netrc() except netrc.NetrcParseError as exc: print('error: netrc.netrc() cannot parse your .netrc file.') sys.exit(1) username, _, password = n.hosts['archive.org'] config_file_path = configure(username, password, config_file=session.config_file) print('Config saved to: {0}'.format(config_file_path)) # Interactive input. else: print( "Enter your Archive.org credentials below to configure 'ia'.\n" ) config_file_path = configure(config_file=session.config_file) print('\nConfig saved to: {0}'.format(config_file_path)) except AuthenticationError as exc: print('\nerror: {0}'.format(str(exc))) sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) try: # CLI params. if args['--username'] and args['--password']: config_file_path = configure(args['--username'], args['--password'], session.config_file) print('Config saved to: {0}'.format(config_file_path)) # Netrc elif args['--netrc']: print("Configuring 'ia' with netrc file...") try: n = netrc.netrc() except netrc.NetrcParseError as exc: print('error: netrc.netrc() cannot parse your .netrc file.') sys.exit(1) username, _, password = n.hosts['archive.org'] config_file_path = configure(username, password, config_file=session.config_file) print('Config saved to: {0}'.format(config_file_path)) # Interactive input. else: print("Enter your Archive.org credentials below to configure 'ia'.\n") config_file_path = configure(config_file=session.config_file) print('\nConfig saved to: {0}'.format(config_file_path)) except AuthenticationError as exc: print('\nerror: {0}'.format(str(exc))) sys.exit(1)
def main(argv, session): docopt(__doc__, argv=argv) print("Enter your Archive.org credentials below to configure 'ia'.\n") try: configure() except AuthenticationError as exc: print('\nerror: {0}'.format(str(exc))) sys.exit(1)
def main(argv, session): docopt(__doc__, argv=argv) print("Enter your Archive.org credentials below to configure 'ia'.\n") try: configure(config_file=session.config_file) except AuthenticationError as exc: print('\nerror: {0}'.format(str(exc))) sys.exit(1)
def download_collection(username, password, collection, destination, glob="*", dry_run=False): configure(username, password) download(collection, destdir=destination, glob_pattern=glob, dry_run=dry_run)
def main(): parser = argparser() args = parser.parse_args() if args.configure: email = args.email or raw_input("Archive.org Email: ") if not email: raise ValueError("--email required for configuration") password = getpass.getpass("Password: "******"Incorrect credentials, not updating config." config_tool.update(config) return "Successfully configured " ol = OpenLibrary() if args.get_olid: return ol.Edition.get_olid_by_isbn(args.isbn) elif args.get_book: if args.olid: return jsonpickle.encode(ol.Edition.get(olid=args.olid)) elif args.isbn: return jsonpickle.encode(ol.Edition.get(isbn=args.isbn)) elif args.get_work: if args.olid: return jsonpickle.encode(ol.Work.get(args.olid)) elif args.title: return jsonpickle.encode(ol.Work.search(args.title)) elif args.create: data = json.loads(args.create) title = data.pop('title') author = common.Author(data.pop('author')) book = common.Book(title, authors=[author], **data) edition = ol.Work.create(book) return edition.olid else: return parser.print_help()
def main(argv, session): args = docopt(__doc__, argv=argv) try: if args["--username"] and args["--password"]: config_file_path = configure(args["--username"], args["--password"], session.config_file) print("Config saved to: {0}".format(config_file_path)) else: print("Enter your Archive.org credentials below to configure 'ia'.\n") config_file_path = configure(config_file=session.config_file) print("\nConfig saved to: {0}".format(config_file_path)) except AuthenticationError as exc: # TODO: refactor output so we don't have to have special cases # for adding newlines! if args["--username"]: print("error: {0}".format(str(exc))) else: print("\nerror: {0}".format(str(exc))) sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) try: if args['--username'] and args['--password']: config_file_path = configure(args['--username'], args['--password'], session.config_file) print('Config saved to: {0}'.format(config_file_path)) else: print( "Enter your Archive.org credentials below to configure 'ia'.\n" ) config_file_path = configure(config_file=session.config_file) print('\nConfig saved to: {0}'.format(config_file_path)) except AuthenticationError as exc: # TODO: refactor output so we don't have to have special cases # for adding newlines! if args['--username']: print('error: {0}'.format(str(exc))) else: print('\nerror: {0}'.format(str(exc))) sys.exit(1)
def main(): parser = argparser() args = parser.parse_args() if args.configure: email = args.email or raw_input("Archive.org Email: ") if not email: raise ValueError("--email required for configuration") password = getpass.getpass("Password: "******"Incorrect credentials, not updating config." config_tool.update(config) return "Successfully configured " # prompt first time users to configure their OpenLibrary credentials try: ol = OpenLibrary() except ValueError as e: if str(e) == 'No cookie set': print("Seems like you haven't configured your olclient with credentials.\n" "You can configure olclient using the following command:\n" "$ol --configure --email <EMAIL>\n") return parser.print_help() else: raise if args.get_olid: return ol.Edition.get_olid_by_isbn(args.isbn) elif args.get_book: if args.olid: return jsonpickle.encode(ol.Edition.get(olid=args.olid)) elif args.isbn: return jsonpickle.encode(ol.Edition.get(isbn=args.isbn)) elif args.get_work: if args.olid: return jsonpickle.encode(ol.Work.get(args.olid)) elif args.title: return jsonpickle.encode(ol.Work.search(args.title)) elif args.get_author_works: if args.olid: return jsonpickle.encode(ol.Author.get(args.olid).works()) elif args.author_name: return jsonpickle.encode(ol.Author.get(ol.Author.get_olid_by_name(args.author_name)).works()) elif args.create: data = json.loads(args.create) title = data.pop('title') author = common.Author(data.pop('author')) book = common.Book(title, authors=[author], **data) edition = ol.Work.create(book) return edition.olid else: return parser.print_help()
def download_collection(username,password,collection,destination,glob="*",dry_run=False): configure(username,password) download(collection,destdir=destination,glob_pattern=glob,dry_run=dry_run)
import os os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'the_big_splice.settings') import django django.setup() import asyncio from internetarchive import search_items, get_item, configure import json import subprocess import ffmpy from api.models import Film # Allows access to Internet Archive API through key configure(os.environ['IA_USER'], os.environ['IA_PASSWORD']) list_of_meta_keys = [ 'identifier', 'title', 'collection', 'description', 'subject' ] list_of_probe_keys = ['duration', 'width', 'height', 'avg_frame_rate'] def populate(): # Finds all titles from IA API under the Film Noir collection, filtering out known unwanted files for i in search_items('collection:Film_Noir', list_of_meta_keys): if "Weirdness Bad Movie" in i['title'] or i['title'] == 'Sobaka': continue identifier = i['identifier'] title = i['title'] collection = i.get('collection', ['Film_Noir']) description = i.get('description', 'no description') tags = i.get('subject', [])
# Original code from : # Robin Camille Davis # March 24, 2014 # downloads all items in a given Internet Archive collection # !! will probably crash after 10 or so items !! feel free to edit the script to make it better for bigger collections # See # http://programminghistorian.org/lessons/data-mining-the-internet-archive # for more detailed info import os import time import internetarchive as ia from internetarchive.session import ArchiveSession from internetarchive.search import Search from internetarchive import download, configure configure( ) # interactive login, for automateed scripting use configure('*****@*****.**', 'password') s = ArchiveSession() pattern = None #change this to download only selected filetypes, e.g.: pattern='*mobi' will download only Kindle formatted e-books # fill this in -- searches for the ID of a collection in IA coll = ia.Search(s, 'collection:xxxxxxxx') # example of collection page: https://archive.org/details/johnjaycollegeofcriminaljustice # the collection ID for that page is johnjaycollegeofcriminaljustice # you can tell a page is a collection if it has a 'Spotlight Item' on the left num = 0 for result in coll: # for all items in a collection num = num + 1 # item count