def check_for_new_items(username,password,collection,collections_db):
    """username->(String) IA username
       password->(String) IA password
       collection->(String) IA identifier for the collection to watch
       collections_db->(String) file path of plain text database of collections to ignore.

       ident_1
       ident_d
       ident_3

       
       returns->(list) list of identifiers of new items in collection

       Checks if there is a new group of scans in the collection from a list in a text file"""

    configure(username,password) # Configure log in information for IA
    downloaded_collections = []
    with open(collections_db) as f:
        for line in f:
            downloaded_collections.append(line.rstrip("\n"))
    new_collections = []
    for book in get_item(collection).contents():
        if(book.identifier not in downloaded_collections):
            new_collections.append(book.identifier)
    return new_collections
def main(argv, session):
    args = docopt(__doc__, argv=argv)
    if args['--print-cookies']:
        user = session.config.get('cookies', {}).get('logged-in-user')
        sig = session.config.get('cookies', {}).get('logged-in-sig')
        if not user or not sig:
            if not user and not sig:
                print(
                    'error: "logged-in-user" and "logged-in-sig" cookies '
                    'not found in config file, try reconfiguring.',
                    file=sys.stderr)
            elif not user:
                print(
                    'error: "logged-in-user" cookie not found in config file, '
                    'try reconfiguring.',
                    file=sys.stderr)
            elif not sig:
                print(
                    'error: "logged-in-sig" cookie not found in config file, '
                    'try reconfiguring.',
                    file=sys.stderr)
            sys.exit(1)
        print('logged-in-user={}; logged-in-sig={}'.format(user, sig))
        sys.exit()
    try:
        # CLI params.
        if args['--username'] and args['--password']:
            config_file_path = configure(args['--username'],
                                         args['--password'],
                                         config_file=session.config_file,
                                         host=session.host)
            print('Config saved to: {0}'.format(config_file_path))

        # Netrc
        elif args['--netrc']:
            print("Configuring 'ia' with netrc file...")
            try:
                n = netrc.netrc()
            except netrc.NetrcParseError as exc:
                print('error: netrc.netrc() cannot parse your .netrc file.')
                sys.exit(1)
            username, _, password = n.hosts['archive.org']
            config_file_path = configure(username,
                                         password,
                                         config_file=session.config_file,
                                         host=session.host)
            print('Config saved to: {0}'.format(config_file_path))

        # Interactive input.
        else:
            print(
                "Enter your Archive.org credentials below to configure 'ia'.\n"
            )
            config_file_path = configure(config_file=session.config_file,
                                         host=session.host)
            print('\nConfig saved to: {0}'.format(config_file_path))

    except AuthenticationError as exc:
        print('\nerror: {0}'.format(str(exc)))
        sys.exit(1)
def check_for_new_items(username, password, collection, collections_db):
    """username->(String) IA username
       password->(String) IA password
       collection->(String) IA identifier for the collection to watch
       collections_db->(String) file path of plain text database of collections to ignore.

       ident_1
       ident_d
       ident_3

       
       returns->(list) list of identifiers of new items in collection

       Checks if there is a new group of scans in the collection from a list in a text file"""

    configure(username, password)  # Configure log in information for IA
    downloaded_collections = []
    with open(collections_db) as f:
        for line in f:
            downloaded_collections.append(line.rstrip("\n"))
    new_collections = []
    for book in get_item(collection).contents():
        if (book.identifier not in downloaded_collections):
            new_collections.append(book.identifier)
    return new_collections
def main(argv, session):
    args = docopt(__doc__, argv=argv)
    try:
        # CLI params.
        if args['--username'] and args['--password']:
            config_file_path = configure(args['--username'],
                                         args['--password'],
                                         session.config_file)
            print('Config saved to: {0}'.format(config_file_path))

        # Netrc
        elif args['--netrc']:
            print("Configuring 'ia' with netrc file...")
            try:
                n = netrc.netrc()
            except netrc.NetrcParseError as exc:
                print('error: netrc.netrc() cannot parse your .netrc file.')
                sys.exit(1)
            username, _, password = n.hosts['archive.org']
            config_file_path = configure(username,
                                         password,
                                         config_file=session.config_file)
            print('Config saved to: {0}'.format(config_file_path))

        # Interactive input.
        else:
            print(
                "Enter your Archive.org credentials below to configure 'ia'.\n"
            )
            config_file_path = configure(config_file=session.config_file)
            print('\nConfig saved to: {0}'.format(config_file_path))

    except AuthenticationError as exc:
        print('\nerror: {0}'.format(str(exc)))
        sys.exit(1)
Example #5
0
def main(argv, session):
    args = docopt(__doc__, argv=argv)
    try:
        # CLI params.
        if args['--username'] and args['--password']:
            config_file_path = configure(args['--username'],
                                         args['--password'],
                                         session.config_file)
            print('Config saved to: {0}'.format(config_file_path))

        # Netrc
        elif args['--netrc']:
            print("Configuring 'ia' with netrc file...")
            try:
                n = netrc.netrc()
            except netrc.NetrcParseError as exc:
                print('error: netrc.netrc() cannot parse your .netrc file.')
                sys.exit(1)
            username, _, password = n.hosts['archive.org']
            config_file_path = configure(username, password,
                                         config_file=session.config_file)
            print('Config saved to: {0}'.format(config_file_path))

        # Interactive input.
        else:
            print("Enter your Archive.org credentials below to configure 'ia'.\n")
            config_file_path = configure(config_file=session.config_file)
            print('\nConfig saved to: {0}'.format(config_file_path))

    except AuthenticationError as exc:
        print('\nerror: {0}'.format(str(exc)))
        sys.exit(1)
def main(argv, session):
    docopt(__doc__, argv=argv)
    print("Enter your Archive.org credentials below to configure 'ia'.\n")
    try:
        configure()
    except AuthenticationError as exc:
        print('\nerror: {0}'.format(str(exc)))
        sys.exit(1)
def main(argv, session):
    docopt(__doc__, argv=argv)
    print("Enter your Archive.org credentials below to configure 'ia'.\n")
    try:
        configure(config_file=session.config_file)
    except AuthenticationError as exc:
        print('\nerror: {0}'.format(str(exc)))
        sys.exit(1)
def download_collection(username,
                        password,
                        collection,
                        destination,
                        glob="*",
                        dry_run=False):

    configure(username, password)
    download(collection,
             destdir=destination,
             glob_pattern=glob,
             dry_run=dry_run)
Example #9
0
def main():
    parser = argparser()
    args = parser.parse_args()

    if args.configure:
        email = args.email or raw_input("Archive.org Email: ")
        if not email:
            raise ValueError("--email required for configuration")
        password = getpass.getpass("Password: "******"Incorrect credentials, not updating config."

        config_tool.update(config)
        return "Successfully configured "

    ol = OpenLibrary()
    if args.get_olid:
        return ol.Edition.get_olid_by_isbn(args.isbn)
    elif args.get_book:
        if args.olid:
            return jsonpickle.encode(ol.Edition.get(olid=args.olid))
        elif args.isbn:
            return jsonpickle.encode(ol.Edition.get(isbn=args.isbn))
    elif args.get_work:
        if args.olid:
            return jsonpickle.encode(ol.Work.get(args.olid))
        elif args.title:
            return jsonpickle.encode(ol.Work.search(args.title))
    elif args.create:
        data = json.loads(args.create)
        title = data.pop('title')
        author = common.Author(data.pop('author'))
        book = common.Book(title, authors=[author], **data)
        edition = ol.Work.create(book)
        return edition.olid
    else:
        return parser.print_help()
def main(argv, session):
    args = docopt(__doc__, argv=argv)
    try:
        if args["--username"] and args["--password"]:
            config_file_path = configure(args["--username"], args["--password"], session.config_file)
            print("Config saved to: {0}".format(config_file_path))
        else:
            print("Enter your Archive.org credentials below to configure 'ia'.\n")
            config_file_path = configure(config_file=session.config_file)
            print("\nConfig saved to: {0}".format(config_file_path))
    except AuthenticationError as exc:
        # TODO: refactor output so we don't have to have special cases
        # for adding newlines!
        if args["--username"]:
            print("error: {0}".format(str(exc)))
        else:
            print("\nerror: {0}".format(str(exc)))
        sys.exit(1)
Example #11
0
def main(argv, session):
    args = docopt(__doc__, argv=argv)
    try:
        if args['--username'] and args['--password']:
            config_file_path = configure(args['--username'],
                                         args['--password'],
                                         session.config_file)
            print('Config saved to: {0}'.format(config_file_path))
        else:
            print(
                "Enter your Archive.org credentials below to configure 'ia'.\n"
            )
            config_file_path = configure(config_file=session.config_file)
            print('\nConfig saved to: {0}'.format(config_file_path))
    except AuthenticationError as exc:
        # TODO: refactor output so we don't have to have special cases
        # for adding newlines!
        if args['--username']:
            print('error: {0}'.format(str(exc)))
        else:
            print('\nerror: {0}'.format(str(exc)))
        sys.exit(1)
Example #12
0
def main():
    parser = argparser()
    args = parser.parse_args()

    if args.configure:
        email = args.email or raw_input("Archive.org Email: ")
        if not email:
            raise ValueError("--email required for configuration")
        password = getpass.getpass("Password: "******"Incorrect credentials, not updating config."

        config_tool.update(config)
        return "Successfully configured "

    # prompt first time users to configure their OpenLibrary credentials
    try:
        ol = OpenLibrary()
    except ValueError as e:
        if str(e) == 'No cookie set':
            print("Seems like you haven't configured your olclient with credentials.\n"
              "You can configure olclient using the following command:\n"
              "$ol --configure --email <EMAIL>\n")
            return parser.print_help()
        else:
            raise

    if args.get_olid:
        return ol.Edition.get_olid_by_isbn(args.isbn)
    elif args.get_book:
        if args.olid:
            return jsonpickle.encode(ol.Edition.get(olid=args.olid))
        elif args.isbn:
            return jsonpickle.encode(ol.Edition.get(isbn=args.isbn))
    elif args.get_work:
        if args.olid:
            return jsonpickle.encode(ol.Work.get(args.olid))
        elif args.title:
            return jsonpickle.encode(ol.Work.search(args.title))
    elif args.get_author_works:
        if args.olid:
            return jsonpickle.encode(ol.Author.get(args.olid).works())
        elif args.author_name:
            return jsonpickle.encode(ol.Author.get(ol.Author.get_olid_by_name(args.author_name)).works())
    elif args.create:
        data = json.loads(args.create)
        title = data.pop('title')
        author = common.Author(data.pop('author'))
        book = common.Book(title, authors=[author], **data)
        edition = ol.Work.create(book)
        return edition.olid
    else:
        return parser.print_help()
def download_collection(username,password,collection,destination,glob="*",dry_run=False):

    configure(username,password)
    download(collection,destdir=destination,glob_pattern=glob,dry_run=dry_run)
Example #14
0
import os
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'the_big_splice.settings')
import django
django.setup()

import asyncio
from internetarchive import search_items, get_item, configure
import json
import subprocess
import ffmpy
from api.models import Film

# Allows access to Internet Archive API through key
configure(os.environ['IA_USER'], os.environ['IA_PASSWORD'])

list_of_meta_keys = [
    'identifier', 'title', 'collection', 'description', 'subject'
]
list_of_probe_keys = ['duration', 'width', 'height', 'avg_frame_rate']


def populate():
    # Finds all titles from IA API under the Film Noir collection, filtering out known unwanted files
    for i in search_items('collection:Film_Noir', list_of_meta_keys):
        if "Weirdness Bad Movie" in i['title'] or i['title'] == 'Sobaka':
            continue
        identifier = i['identifier']
        title = i['title']
        collection = i.get('collection', ['Film_Noir'])
        description = i.get('description', 'no description')
        tags = i.get('subject', [])
Example #15
0
# Original code from :
# Robin Camille Davis
# March 24, 2014
# downloads all items in a given Internet Archive collection
# !! will probably crash after 10 or so items !! feel free to edit the script to make it better for bigger collections
# See
# http://programminghistorian.org/lessons/data-mining-the-internet-archive
# for more detailed info
import os
import time
import internetarchive as ia
from internetarchive.session import ArchiveSession
from internetarchive.search import Search
from internetarchive import download, configure

configure(
)  # interactive login, for automateed scripting use configure('*****@*****.**', 'password')

s = ArchiveSession()

pattern = None  #change this to download only selected filetypes, e.g.: pattern='*mobi' will download only Kindle formatted e-books

# fill this in -- searches for the ID of a collection in IA
coll = ia.Search(s, 'collection:xxxxxxxx')
# example of collection page: https://archive.org/details/johnjaycollegeofcriminaljustice
# the collection ID for that page is johnjaycollegeofcriminaljustice
# you can tell a page is a collection if it has a 'Spotlight Item' on the left

num = 0

for result in coll:  # for all items in a collection
    num = num + 1  # item count
Example #16
0
def main():
    parser = argparser()
    args = parser.parse_args()

    if args.configure:
        email = args.email or raw_input("Archive.org Email: ")
        if not email:
            raise ValueError("--email required for configuration")
        password = getpass.getpass("Password: "******"Incorrect credentials, not updating config."

        config_tool.update(config)
        return "Successfully configured "

    # prompt first time users to configure their OpenLibrary credentials
    try:
        ol = OpenLibrary()
    except ValueError as e:
        if str(e) == 'No cookie set':
            print("Seems like you haven't configured your olclient with credentials.\n"
              "You can configure olclient using the following command:\n"
              "$ol --configure --email <EMAIL>\n")
            return parser.print_help()
        else:
            raise

    if args.get_olid:
        return ol.Edition.get_olid_by_isbn(args.isbn)
    elif args.get_book:
        if args.olid:
            return jsonpickle.encode(ol.Edition.get(olid=args.olid))
        elif args.isbn:
            return jsonpickle.encode(ol.Edition.get(isbn=args.isbn))
    elif args.get_work:
        if args.olid:
            return jsonpickle.encode(ol.Work.get(args.olid))
        elif args.title:
            return jsonpickle.encode(ol.Work.search(args.title))
    elif args.get_author_works:
        if args.olid:
            return jsonpickle.encode(ol.Author.get(args.olid).works())
        elif args.author_name:
            return jsonpickle.encode(ol.Author.get(ol.Author.get_olid_by_name(args.author_name)).works())
    elif args.create:
        data = json.loads(args.create)
        title = data.pop('title')
        author = common.Author(data.pop('author'))
        book = common.Book(title, authors=[author], **data)
        edition = ol.Work.create(book)
        return edition.olid
    else:
        return parser.print_help()