Example #1
0
 def ia_modify_metadata(self, identifier, metadata):
     try:
         modify_metadata(identifier, metadata = metadata, \
                         access_key = self.access_key, \
                         secret_key = self.secret_key)
     except Exception as e:
         self.logger.warn('Could not  modify metadata %s. Error %s' , identifier, e)
         return False
     return True
Example #2
0
def update_metadata(_id, meta, for_real=False):
    print "modify_metadata(%s)" % _id
    for item in sorted(meta):
        _md = {'title': meta[item]["title"]}
        tgt = meta[item]["target"]
        if for_real:
            print ("modify_metadata(%s, metadata=%s, target='%s')"
                   % (_id, _md, tgt))
            modify_metadata(_id, metadata=_md, target=tgt)
        else:
            print "  target=%s metadata=%s" % (tgt, _md)
Example #3
0
def update_metadata(_id, meta, for_real=False):
    print "modify_metadata(%s)" % _id
    for item in sorted(meta):
        _md = {'title': meta[item]["title"]}
        tgt = meta[item]["target"]
        if for_real:
            print("modify_metadata(%s, metadata=%s, target='%s')" %
                  (_id, _md, tgt))
            modify_metadata(_id, metadata=_md, target=tgt)
        else:
            print "  target=%s metadata=%s" % (tgt, _md)
Example #4
0
def upload_to_internet_archive(self, link_guid):
    link = Link.objects.get(guid=link_guid)

    if not settings.UPLOAD_TO_INTERNET_ARCHIVE:
        return

    if not link.can_upload_to_internet_archive():
        print "Not eligible for upload."
        return


    metadata = {
        "collection":settings.INTERNET_ARCHIVE_COLLECTION,
        "title":'%s: %s' % (link_guid, truncatechars(link.submitted_title, 50)),
        "mediatype":'web',
        "description":'Perma.cc archive of %s created on %s.' % (link.submitted_url, link.creation_timestamp,),
        "contributor":'Perma.cc',
        "submitted_url":link.submitted_url,
        "perma_url":"http://%s/%s" % (settings.HOST, link_guid),
        "external-identifier":'urn:X-perma:%s' % link_guid,
        }

    # set sponsor if organization exists
    if link.organization:
        metadata["sponsor"] = "%s - %s" % (link.organization, link.organization.registrar)

    identifier = settings.INTERNET_ARCHIVE_IDENTIFIER_PREFIX + link_guid
    with default_storage.open(link.warc_storage_file(), 'rb') as warc_file:
        success = internetarchive.upload(
                        identifier,
                        warc_file,
                        access_key=settings.INTERNET_ARCHIVE_ACCESS_KEY,
                        secret_key=settings.INTERNET_ARCHIVE_SECRET_KEY,
                        retries=10,
                        retries_sleep=60,
                        verbose=True,
                    )

        if success:
            internetarchive.modify_metadata(
                identifier,
                metadata=metadata,
            )

            link.uploaded_to_internet_archive = True
            link.save()

        else:
            self.retry(exc=Exception("Internet Archive reported upload failure."))
            print "Failed."

        return success
Example #5
0
def main(argv):
    args = docopt(__doc__, argv=argv)
    item = get_item(args['<identifier>'])

    # Check existence of item.
    if args['--exists']:
        if item.exists:
            sys.stdout.write('{0} exists\n'.format(item.identifier))
            sys.exit(0)
        else:
            sys.stderr.write('{0} does not exist\n'.format(item.identifier))
            sys.exit(1)

    # Modify metadata.
    elif args['--modify'] or args['--append']:
        append = True if args['--append'] else False
        metadata_args = args['--modify'] if args['--modify'] else args['--append']
        metadata = get_args_dict(metadata_args)
        response = modify_metadata(args['<identifier>'], metadata, append=append)
        if not response.json()['success']:
            error_msg = response.json()['error']
            sys.stderr.write('error: {0} ({1})\n'.format(error_msg, response.status_code))
            sys.exit(1)
        sys.stdout.write('success: {0}\n'.format(response.json()['log']))

    # Get metadata.
    elif args['--formats']:
        formats = set([f.format for f in item.iter_files()])
        sys.stdout.write('\n'.join(formats) + '\n')
    else:
        metadata = dumps(item.metadata)
        sys.stdout.write(metadata + '\n')
    sys.exit(0)
 def update_mp3_metadata(self, mp3_file):
     """
     Update metadata for a given file.
 
     :param mp3_file: string or :py:class:mp3_utility.Mp3File 
     """
     if isinstance(mp3_file, str):
         mp3_file = mp3_utility.Mp3File(file_path=mp3_file,
                                        load_tags_from_file=True)
     remote_name = self.get_remote_name(mp3_file.file_path)
     archive_item_file_details = self.item_files_dict.get(remote_name, None)
     mp3_metadata = mp3_file.metadata
     if archive_item_file_details is None:
         logging.warning("The file does not exist! Skipping.")
     else:
         remote_tag_update_needed = (archive_item_file_details.get(
             "artist", "") != mp3_metadata.artist) or (
                 archive_item_file_details.get("creator", "") !=
                 mp3_metadata.artist) or (archive_item_file_details.get(
                     "title", "") != mp3_metadata.title) or (
                         archive_item_file_details.get(
                             "album", "") != mp3_metadata.album
                     ) or (archive_item_file_details.get(
                         "album_artist", "") != mp3_metadata.album_artist)
         if remote_tag_update_needed:
             logging.info("***Updating %s in archive item." % remote_name)
             logging.info(
                 internetarchive.modify_metadata(
                     self.archive_id,
                     metadata=dict(title=mp3_metadata.title,
                                   album=mp3_metadata.album,
                                   album_artist=mp3_metadata.album_artist,
                                   artist=mp3_metadata.artist,
                                   creator=mp3_metadata.artist),
                     target=os.path.join("files", remote_name)))
Example #7
0
def main(argv):
    args = docopt(__doc__, argv=argv)
    item = get_item(args['<identifier>'])

    # Check existence of item.
    if args['--exists']:
        if item.exists:
            stdout.write('{0} exists\n'.format(item.identifier))
            exit(0)
        else:
            stderr.write('{0} does not exist\n'.format(item.identifier))
            exit(1)

    # Modify metadata.
    elif args['--modify']:
        metadata = get_args_dict(args['--modify'])
        response = modify_metadata(args['<identifier>'], metadata)
        status_code = response['status_code']
        if not response['content']['success']:
            error_msg = response['content']['error']
            stderr.write('error: {0} ({1})\n'.format(error_msg, status_code))
            exit(1)
        stdout.write('success: {0}\n'.format(response['content']['log']))

    # Get metadata.
    elif args['--files']:
        for i, f in enumerate(item.files()):
            if not args['--target']:
                files_md = [f.identifier, f.name, f.source, f.format, f.size, f.md5]
            else:
                files_md = [f.__dict__.get(k) for k in args['--target']]
            stdout.write('\t'.join([str(x) for x in files_md]) + '\n')
    elif args['--formats']:
        formats = set([f.format for f in item.files()])
        stdout.write('\n'.join(formats) + '\n')
    elif args['--target']:
        metadata = []
        for key in args['--target']:
            if '/' in key:
                for i, k in enumerate(key.split('/')):
                    if i == 0:
                        md = item.metadata.get(k)
                    else:
                        if md:    
                            md = md.get(k)
            else:
                md = item.metadata.get(key)
            if md:
                metadata.append(md)
        stdout.write('\t'.join([str(x) for x in metadata]) + '\n')
    else:
        metadata = dumps(item.metadata)
        stdout.write(metadata + '\n')
    exit(0)
Example #8
0
def archive_update_metadata(archive_id, metadata, session):
    success = True

    m = modify_metadata(archive_id, metadata)
    if m.status_code != 200:
        success = False
        log(session, "Failed to update metadata on archive.org: " + m.reason,
            c.SESSION_FAILED, c.LOG_ERROR)
    else:
        log(session, "Session metadata updated on archive.org",
            c.SESSION_SYNCED)
    return success
Example #9
0
def main(argv):
    args = docopt(__doc__, argv=argv)
    item = internetarchive.Item(args['<identifier>'])

    # Check existence of item.
    if args['--exists']:
        if item.exists:
            stdout.write('{0} exists\n'.format(item.identifier))
            exit(0)
        else:
            stderr.write('{0} does not exist\n'.format(item.identifier))
            exit(1)

    # Modify metadata.
    elif args['--modify']:
        metadata = get_args_dict(args['--modify'])
        response = modify_metadata(args['<identifier>'], metadata)
        status_code = response['status_code']
        if not response['content']['success']:
            error_msg = response['content']['error']
            stderr.write('error: {0} ({1})\n'.format(error_msg, status_code))
            exit(1)
        stdout.write('success: {0}\n'.format(response['content']['log']))

    # Get metadata.
    elif args['--files']:
        for f in item.files():
            files_md = [
                f.item.identifier, f.name, f.source, f.format, f.size, f.md5
            ]
            stdout.write('\t'.join([str(x) for x in files_md]) + '\n')
    elif args['--formats']:
        formats = set([f.format for f in item.files()])
        stdout.write('\n'.join(formats) + '\n')
    elif args['--target']:
        metadata = []
        for key in args['--target']:
            if '/' in key:
                for i, k in enumerate(key.split('/')):
                    if i == 0:
                        md = item.metadata.get(k)
                    else:
                        if md:
                            md = md.get(k)
            else:
                md = item.metadata.get(key)
            if md:
                metadata.append(md)
        stdout.write('\t'.join([str(x) for x in metadata]) + '\n')
    else:
        metadata = dumps(item.metadata)
        stdout.write(metadata + '\n')
    exit(0)
Example #10
0
def test_modify_metadata():
    with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
        rsps.add(responses.GET, "{0}//archive.org/metadata/test".format(protocol), body={}, status=200)
        rsps.add(
            responses.POST,
            "{0}//archive.org/metadata/test".format(protocol),
            body=('{"success":true,"task_id":423444944,' '"log":"https://catalogd.archive.org/log/423444944"}'),
            status=200,
        )
        r = modify_metadata("test", dict(foo=1))
        assert r.status_code == 200
        assert r.json() == {"task_id": 423444944, "success": True, "log": "https://catalogd.archive.org/log/423444944"}
Example #11
0
def test_modify_metadata():
    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
        rsps.add(responses.GET, '{0}//archive.org/metadata/test'.format(PROTOCOL),
                 body='{}')
        rsps.add(responses.POST, '{0}//archive.org/metadata/test'.format(PROTOCOL),
                 body=('{"success":true,"task_id":423444944,'
                       '"log":"https://catalogd.archive.org/log/423444944"}'))
        r = modify_metadata('test', dict(foo=1))
        assert r.status_code == 200
        assert r.json() == {
            'task_id': 423444944,
            'success': True,
            'log': 'https://catalogd.archive.org/log/423444944'
        }
Example #12
0
def test_modify_metadata():
    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
        rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(PROTOCOL),
                 body='{"metadata":{"title":"foo"}}')
        rsps.add(responses.POST, '{0}//archive.org/metadata/nasa'.format(PROTOCOL),
                 body=('{"success":true,"task_id":423444944,'
                       '"log":"https://catalogd.archive.org/log/423444944"}'))
        r = modify_metadata('nasa', dict(foo=1))
        assert r.status_code == 200
        assert r.json() == {
            'task_id': 423444944,
            'success': True,
            'log': 'https://catalogd.archive.org/log/423444944'
        }
def main():
    desc = 'You can open ZIM files with <a href="https://www.kiwix.org/">Kiwix</a> software.'
    for i in internetarchive.search_items(
            'subject:"kiwix" AND subject:"zim"').iter_as_items():
        try:
            itemid = i.item_metadata['metadata']['identifier']
            print(itemid)
        except:
            print('Error in', i)
            continue
        if not 'description' in i.item_metadata['metadata']:
            r = internetarchive.modify_metadata(
                itemid, metadata=dict(description=desc))
            if r.status_code == 200:
                print('Description added: %s' % (desc))
            else:
                print('Error (%s) adding description: %s' %
                      (r.status_code, desc))
        else:
            print('Already has description: %s' %
                  (i.item_metadata['metadata']['description']))
Example #14
0
def main():
    genres = {
        'Gutenberg': 'Literature',
        'Khan-academy-videos': 'Course',
        'Wikibooks': 'Course',
        'Wikinews': 'News',
        'Wikipedia': 'Encyclopedia',
        'Wikiquote': 'Quotes',
        'Wikisource': 'Literature',
        'Wikispecies': 'Encyclopedia',
        'Wikiversity': 'Course',
        'Wikivoyage': 'Travel',
        'Wiktionary': 'Dictionary',
    }
    for project, genre in genres.items():
        #https://archive.org/services/docs/api/internetarchive/quickstart.html#searching
        for i in internetarchive.search_items(
                'subject:"kiwix" AND subject:"zim" AND subject:"%s"' %
            (project.lower())).iter_as_items():
            try:
                itemid = i.item_metadata['metadata']['identifier']
                print(itemid)
            except:
                print('Error in', i)
                continue
            if not 'genre' in i.item_metadata['metadata']:
                if project.lower() in itemid.lower():
                    r = internetarchive.modify_metadata(
                        itemid, metadata=dict(genre=genre))
                    if r.status_code == 200:
                        print('Genre added: %s' % (genre))
                    else:
                        print('Error (%s) adding genre: %s' %
                              (r.status_code, genre))
                else:
                    print('Unknown project')
            else:
                print('Already has genre: %s' %
                      (i.item_metadata['metadata']['genre']))
Example #15
0
fname = sys.argv[1]

ol = OpenLibrary()

n = 0
with open(fname, 'r') as f:
   for line in f.readlines():
       data = json.loads(line)
       olid = data['openlibrary']
       ocaid = data['identifier']
       try: 
           e = ol.get(olid)
           wolid = e.work.olid
           assert wolid
       except requests.exceptions.HTTPError as e:
           print('404', olid, ocaid)
           wolid = None
       to_write = {
           'openlibrary_edition': olid
       }
       if wolid:
           to_write['openlibrary_work'] = wolid
       #print(ocaid, to_write)
       r = modify_metadata(ocaid, metadata=to_write)
       print('%s: %s' % (ocaid, r.status_code))
       n += 1
       if n > 300:
           print('PAUSE')
           time.sleep(900)
           n = 0 
Example #16
0
def upload_to_internet_archive(self, link_guid):
    link = Link.objects.get(guid=link_guid)

    if not settings.UPLOAD_TO_INTERNET_ARCHIVE:
        return

    if not link.can_upload_to_internet_archive():
        print "Not eligible for upload."
        return

    metadata = {
        "collection":
        settings.INTERNET_ARCHIVE_COLLECTION,
        "title":
        '%s: %s' % (link_guid, truncatechars(link.submitted_title, 50)),
        "mediatype":
        'web',
        "description":
        'Perma.cc archive of %s created on %s.' % (
            link.submitted_url,
            link.creation_timestamp,
        ),
        "contributor":
        'Perma.cc',
        "submitted_url":
        link.submitted_url,
        "perma_url":
        "http://%s/%s" % (settings.HOST, link_guid),
        "external-identifier":
        'urn:X-perma:%s' % link_guid,
    }

    # set sponsor if organization exists
    if link.organization:
        metadata["sponsor"] = "%s - %s" % (link.organization,
                                           link.organization.registrar)

    identifier = settings.INTERNET_ARCHIVE_IDENTIFIER_PREFIX + link_guid
    with default_storage.open(link.warc_storage_file(), 'rb') as warc_file:
        success = internetarchive.upload(
            identifier,
            warc_file,
            access_key=settings.INTERNET_ARCHIVE_ACCESS_KEY,
            secret_key=settings.INTERNET_ARCHIVE_SECRET_KEY,
            retries=10,
            retries_sleep=60,
            verbose=True,
        )

        if success:
            internetarchive.modify_metadata(
                identifier,
                metadata=metadata,
            )

            link.uploaded_to_internet_archive = True
            link.save()

        else:
            self.retry(
                exc=Exception("Internet Archive reported upload failure."))
            print "Failed."

        return success
Example #17
0
def main():
    #https://meta.wikimedia.org/wiki/List_of_Wikipedias
    langs = {
        "ab": "Abkhazian",
        "ace": "Acehnese",
        "ady": "Adyghe",
        "af": "Afrikaans",
        "ak": "Akan",
        "am": "Amharic",
        "an": "Aragonese",
        "ang": "Anglo-Saxon",
        "ar": "Arabic",
        "arc": "Aramaic",
        "arz": "Egyptian Arabic",
        "as": "Assamese",
        "ast": "Asturian",
        "atj": "Atikamekw",
        "av": "Avar",
        "ay": "Aymara",
        "az": "Azerbaijani",
        "azb": "South Azerbaijani",
        "ba": "Bashkir",
        "bar": "Bavarian",
        "bcl": "Central Bicolano",
        "be": "Belarusian",
        "bg": "Bulgarian",
        "bi": "Bislama",
        "bjn": "Banjar",
        "bm": "Bambara",
        "bn": "Bengali",
        "bo": "Tibetan",
        "bpy": "Bishnupriya Manipuri",
        "br": "Breton",
        "bs": "Bosnian",
        "bug": "Buginese",
        "bxr": "Buryat",
        "ca": "Catalan",
        "cdo": "Min Dong",
        "ce": "Chechen",
        "ceb": "Cebuano",
        "ch": "Chamorro",
        "cho": "Choctaw",
        "chr": "Cherokee",
        "chy": "Cheyenne",
        "ckb": "Sorani",
        "co": "Corsican",
        "cr": "Cree",
        "crh": "Crimean Tatar",
        "cs": "Czech",
        "csb": "Kashubian",
        "cu": "Old Church Slavonic",
        "cv": "Chuvash",
        "cy": "Welsh",
        "da": "Danish",
        "de": "German",
        "din": "Dinka",
        "diq": "Zazaki",
        "dsb": "Lower Sorbian",
        "dty": "Doteli",
        "dv": "Divehi",
        "dz": "Dzongkha",
        "ee": "Ewe",
        "el": "Greek",
        "eml": "Emilian-Romagnol",
        "en": "English",
        "eo": "Esperanto",
        "es": "Spanish",
        "et": "Estonian",
        "eu": "Basque",
        "ext": "Extremaduran",
        "fa": "Persian",
        "ff": "Fula",
        "fi": "Finnish",
        "fj": "Fijian",
        "fo": "Faroese",
        "fr": "French",
        "frp": "Franco-Provençal",
        "frr": "North Frisian",
        "fur": "Friulian",
        "fy": "West Frisian",
        "ga": "Irish",
        "gag": "Gagauz",
        "gan": "Gan",
        "gd": "Scottish Gaelic",
        "gl": "Galician",
        "glk": "Gilaki",
        "gn": "Guarani",
        "gom": "Goan Konkani",
        "gor": "Gorontalo",
        "got": "Gothic",
        "gu": "Gujarati",
        "gv": "Manx",
        "ha": "Hausa",
        "hak": "Hakka",
        "haw": "Hawaiian",
        "he": "Hebrew",
        "hi": "Hindi",
        "hif": "Fiji Hindi",
        "ho": "Hiri Motu",
        "hr": "Croatian",
        "hsb": "Upper Sorbian",
        "ht": "Haitian",
        "hu": "Hungarian",
        "hy": "Armenian",
        "ia": "Interlingua",
        "id": "Indonesian",
        "ie": "Interlingue",
        "ig": "Igbo",
        "ik": "Inupiak",
        "ilo": "Ilokano",
        "inh": "Ingush",
        "io": "Ido",
        "is": "Icelandic",
        "it": "Italian",
        "iu": "Inuktitut",
        "ja": "Japanese",
        "jam": "Jamaican Patois",
        "jbo": "Lojban",
        "jv": "Javanese",
        "ka": "Georgian",
        "kaa": "Karakalpak",
        "kab": "Kabyle",
        "kbd": "Kabardian Circassian",
        "kbp": "Kabiye",
        "kg": "Kongo",
        "ki": "Kikuyu",
        "kj": "Kuanyama",
        "kk": "Kazakh",
        "kl": "Greenlandic",
        "km": "Khmer",
        "kn": "Kannada",
        "ko": "Korean",
        "koi": "Komi-Permyak",
        "kr": "Kanuri",
        "krc": "Karachay-Balkar",
        "ks": "Kashmiri",
        "ksh": "Ripuarian",
        "ku": "Kurdish",
        "kv": "Komi",
        "kw": "Cornish",
        "ky": "Kirghiz",
        "la": "Latin",
        "lad": "Ladino",
        "lb": "Luxembourgish",
        "lbe": "Lak",
        "lez": "Lezgian",
        "lfn": "Lingua Franca Nova",
        "lg": "Luganda",
        "li": "Limburgish",
        "lij": "Ligurian",
        "lmo": "Lombard",
        "ln": "Lingala",
        "lo": "Lao",
        "lrc": "Northern Luri",
        "lt": "Lithuanian",
        "ltg": "Latgalian",
        "lv": "Latvian",
        "mai": "Maithili",
        "mdf": "Moksha",
        "mg": "Malagasy",
        "mh": "Marshallese",
        "mhr": "Meadow Mari",
        "mi": "Maori",
        "min": "Minangkabau",
        "mk": "Macedonian",
        "ml": "Malayalam",
        "mn": "Mongolian",
        "mr": "Marathi",
        "mrj": "Hill Mari",
        "ms": "Malay",
        "mt": "Maltese",
        "mus": "Muscogee",
        "mwl": "Mirandese",
        "my": "Burmese",
        "myv": "Erzya",
        "mzn": "Mazandarani",
        "na": "Nauruan",
        "nah": "Nahuatl",
        "nap": "Neapolitan",
        "nds": "Low Saxon",
        "ne": "Nepali",
        "new": "Newar",
        "ng": "Ndonga",
        "nl": "Dutch",
        "nn": "Norwegian (Nynorsk)",
        "no": "Norwegian (Bokmål)",
        "nov": "Novial",
        "nrm": "Norman",
        "nso": "Northern Sotho",
        "nv": "Navajo",
        "ny": "Chichewa",
        "oc": "Occitan",
        "olo": "Livvi-Karelian",
        "om": "Oromo",
        "or": "Oriya",
        "os": "Ossetian",
        "pa": "Punjabi",
        "pag": "Pangasinan",
        "pam": "Kapampangan",
        "pap": "Papiamentu",
        "pcd": "Picard",
        "pdc": "Pennsylvania German",
        "pfl": "Palatinate German",
        "pi": "Pali",
        "pih": "Norfolk",
        "pl": "Polish",
        "pms": "Piedmontese",
        "pnb": "Western Punjabi",
        "pnt": "Pontic",
        "ps": "Pashto",
        "pt": "Portuguese",
        "qu": "Quechua",
        "rm": "Romansh",
        "rmy": "Romani",
        "rn": "Kirundi",
        "ro": "Romanian",
        "ru": "Russian",
        "rue": "Rusyn",
        "rw": "Kinyarwanda",
        "sa": "Sanskrit",
        "sah": "Sakha",
        "sat": "Santali",
        "sc": "Sardinian",
        "scn": "Sicilian",
        "sco": "Scots",
        "sd": "Sindhi",
        "se": "Northern Sami",
        "sg": "Sango",
        "sh": "Serbo-Croatian",
        "shn": "Shan",
        "si": "Sinhalese",
        "sk": "Slovak",
        "sl": "Slovenian",
        "sm": "Samoan",
        "sn": "Shona",
        "so": "Somali",
        "sq": "Albanian",
        "sr": "Serbian",
        "srn": "Sranan",
        "ss": "Swati",
        "st": "Sesotho",
        "stq": "Saterland Frisian",
        "su": "Sundanese",
        "sv": "Swedish",
        "sw": "Swahili",
        "szl": "Silesian",
        "ta": "Tamil",
        "tcy": "Tulu",
        "te": "Telugu",
        "tet": "Tetum",
        "tg": "Tajik",
        "th": "Thai",
        "ti": "Tigrinya",
        "tk": "Turkmen",
        "tl": "Tagalog",
        "tn": "Tswana",
        "to": "Tongan",
        "tpi": "Tok Pisin",
        "tr": "Turkish",
        "ts": "Tsonga",
        "tt": "Tatar",
        "tum": "Tumbuka",
        "tw": "Twi",
        "ty": "Tahitian",
        "tyv": "Tuvan",
        "udm": "Udmurt",
        "ug": "Uyghur",
        "uk": "Ukrainian",
        "ur": "Urdu",
        "uz": "Uzbek",
        "ve": "Venda",
        "vec": "Venetian",
        "vep": "Vepsian",
        "vi": "Vietnamese",
        "vls": "West Flemish",
        "vo": "Volapük",
        "wa": "Walloon",
        "war": "Waray-Waray",
        "wo": "Wolof",
        "wuu": "Wu",
        "xal": "Kalmyk",
        "xh": "Xhosa",
        "xmf": "Mingrelian",
        "yi": "Yiddish",
        "yo": "Yoruba",
        "za": "Zhuang",
        "zea": "Zeelandic",
        "zh": "Chinese",
        "zu": "Zulu",
    }
    for langid, langword in langs.items():
        #https://archive.org/services/docs/api/internetarchive/quickstart.html#searching
        for i in internetarchive.search_items(
                'subject:"kiwix" AND subject:"zim" AND subject:"%s"' %
            (langid)).iter_as_items():
            try:
                itemid = i.item_metadata['metadata']['identifier']
                print(itemid)
            except:
                print('Error in', i)
                continue
            if not 'language' in i.item_metadata['metadata']:
                if '_%s_' % (langid) in itemid:
                    r = internetarchive.modify_metadata(
                        itemid, metadata=dict(language=langword))
                    if r.status_code == 200:
                        print('Language added: %s' % (langword))
                    else:
                        print('Error (%s) adding language: %s' %
                              (r.status_code, langword))
                else:
                    print(i.item_metadata['metadata'])
                    print('Unknown language')
            else:
                print('Already has language: %s' %
                      (i.item_metadata['metadata']['language']))