def test_to_from_json(self, m): """ Test packing the oath object into a json form then reloading it. """ api_url = 'https://example.org' api_key = 'thisisrandomtext' verifySSL = False cache_dir = 'tmp' oauth_dict = { 'client_id': 'morerandomtext', 'client_secret': 'secretrandomtext', 'redirect_url': 'https://anotherfake.com' } expected_json = '{"use_cache": false, "api_url": "https://example.org", "cache": {"max_age": 0, "cache_dir": "tmp"}, "cache_dir": "tmp", "update_cache": true, "oauth": {"client_id": "morerandomtext", "state": "cgLXfsICCMsuTeY6HWkzsqMPyxTA8K", "token": null, "auth_url": "https://example.org/oauth/authorize?response_type=code&client_id=morerandomtext&redirect_uri=https%3A%2F%2Fanotherfake.com&state=cgLXfsICCMsuTeY6HWkzsqMPyxTA8K", "redirect_url": "https://anotherfake.com", "client_secret": "secretrandomtext", "api_key": "secretkey", "verifySSL": false, "api_url": "https://example.org"}, "api_key": "secretkey"}' client = pyalveo.Client(api_url=api_url, oauth=oauth_dict, verifySSL=verifySSL, use_cache=False, cache_dir=cache_dir, configfile="tests/alveo.config") json_string = client.to_json() #Test json comes out as expected #A state will be generated which should be different always #So we need to load the json into a dict, remove the state key then check equality json_dict = json.loads(json_string) expected_dict = json.loads(expected_json) json_dict['oauth'].pop('state', None) expected_dict['oauth'].pop('state', None) #Do the same with auth url as it's a string that contains the state json_dict['oauth'].pop('auth_url', None) expected_dict['oauth'].pop('auth_url', None) #Do the same with cache dir as that also can't be predicted json_dict['cache'].pop('cache_dir', None) expected_dict['cache'].pop('cache_dir', None) json_dict.pop('cache_dir', None) expected_dict.pop('cache_dir', None) self.assertEqual(json_dict, expected_dict) client2 = pyalveo.Client.from_json(json_string) #Test generated json creates an identical object #These should have identical states however self.assertEqual(client, client2) starting_json = '{"use_cache": true, "api_url": "https://example.org", "cache": {"max_age": 0, "cache_dir": "tmp"}, "cache_dir": "tmp", "update_cache": true, "oauth": {"client_id": null, "state": null, "token": null, "auth_url": null, "redirect_url": null, "client_secret": null, "api_key": "thisisrandomtext", "verifySSL": false, "api_url": "https://example.org"}, "api_key": "thisisrandomtext"}' client = pyalveo.Client(api_url=api_url, api_key=api_key, verifySSL=verifySSL, use_cache=True, cache_dir=cache_dir) client2 = pyalveo.Client.from_json(starting_json) #test manually created json creates an identical cache to one properly setup self.assertEqual(client, client2)
def test_add_annotations(self, m): """Test that we can add new annotations for an item""" m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY) collection_uri = API_URL + "/catalog/collection1" # create an item itemname = "testitem1" m.post(collection_uri, json={"success": [itemname]}) meta = { 'dcterms:title': 'Test Item', 'dcterms:creator': 'A. Programmer' } item_uri = client.add_item(collection_uri, itemname, meta) anns = [{ "@type": "dada:TextAnnotation", "type": "pageno", "label": "hello", "start": 421, "end": 425 }, { "@type": "dada:TextAnnotation", "type": "pageno", "label": "world", "start": 2524, "end": 2529 } ] # now add some annotations m.post(item_uri + "/annotations", json={'success': 'yes'}) client.add_annotations(item_uri, anns)
def oauth_login(request, redirect_url='/'): request.session.flush() client = request.session.get('client', None) #If there a client exists and is valid, don't bother doing anything, redirect home. if client != None: if client.oauth.validate(): return HttpResponseRedirect(redirect_url) oauth_redirect_url = 'https://%s/oauth/callback' % request.get_host() OAUTH = { 'client_id': settings.OAUTH_CLIENT_ID, 'client_secret': settings.OAUTH_CLIENT_SECRET, 'redirect_url': oauth_redirect_url, } client = pyalveo.Client(api_url=settings.API_URL, oauth=OAUTH, verifySSL=False) url = client.oauth.get_authorisation_url() request.session['client'] = client.to_json() request.session['next'] = request.GET.get('next', redirect_url) redirect_url = url return HttpResponseRedirect(redirect_url)
def test_add_text_item(self, m): """Test that we can add new items that have just a text document to a collection """ doctext = "This is the text of my test document.\nTwo lines.\n" m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY) collection_name = "testcollection1" collection_uri = API_URL + "/catalog/" + collection_name itemname = "item1" m.post(collection_uri, json={"success": [itemname]}) meta = { 'dcterms:title': 'Test Item', 'dcterms:creator': 'A. Programmer' } item_uri = client.add_text_item(collection_uri, itemname, meta, text=doctext, title='my test document') self.assertIn(itemname, item_uri) req = m.last_request self.assertEqual(req.method, 'POST') self.assertEqual(req.headers['Content-Type'], 'application/json') self.assertEqual(req.headers['X-API-KEY'], API_KEY) self.assertIn('items', req.json()) self.assertEqual(1, len(req.json()['items'])) itemdict = req.json()['items'][0] self.assertIn('documents', itemdict) self.assertEqual(1, len(itemdict['documents'])) self.assertEqual(doctext, itemdict['documents'][0]['content']) self.assertEqual(itemname+'.txt', itemdict['documents'][0]['identifier'])
def test_get_annotations(self, m): m.get(API_URL + "/item_lists.json", json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False) item_url = client.oauth.api_url + "/catalog/ace/A01b" with open('tests/responses/A01b.json', 'rb') as fd: m.get(item_url, content=fd.read()) item = client.get_item(item_url) # get annotations for this item of type 'speaker' ann_url = item_url + '/annotations.json' with open('tests/responses/A01b-annotations.json', 'rb') as fd: m.get(ann_url, content=fd.read()) anns = item.get_annotations( atype=u'http://ns.ausnc.org.au/schemas/annotation/ice/speaker') self.assertListEqual( sorted(anns.keys()), [u'@context', u'alveo:annotations', u'commonProperties']) ann = anns['alveo:annotations'][0] self.assertEqual(sorted(ann.keys()), [u'@id', u'@type', u'end', u'start', u'type'])
def test_add_document(self, m): """Test adding documents to items""" m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY) collection_name = "testcollection1" itemname = "item1" docname = "doc1.txt" content = "Hello World!\n" item_uri = API_URL + "/catalog/%s/%s" % (collection_name, itemname) m.post(item_uri, json={"success":"Added the document %s to item %s in collection %s" % (docname, itemname, collection_name)}) docmeta = { "dcterms:title": "Sample Document", "dcterms:type": "Text" } document_uri = client.add_document(item_uri, docname, docmeta, content=content) req = m.last_request payload = req.json() self.assertEqual(payload['document_content'], content) self.assertIn('metadata', payload) md = payload['metadata'] self.assertIn('dcterms:title', md) self.assertEqual(md['dcterms:title'], docmeta['dcterms:title']) self.assertEqual(md['@type'], "foaf:Document") self.assertEqual(md['dcterms:identifier'], docname)
def test_item_download(self, m): """Test access to individual items""" m.get(API_URL + "/item_lists.json", json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=True) item_url = client.oauth.api_url + '/catalog/cooee/1-190' with open('tests/responses/1-190.json', 'rb') as rh: m.get(item_url, body=rh) item = client.get_item(item_url) self.assertEqual(item_url, item.url()) meta = item.metadata() self.assertEqual( meta['alveo:primary_text_url'], client.oauth.api_url + u'/catalog/cooee/1-190/primary_text.json') # now try it with the cache, should not make a request item2 = client.get_item(item_url) self.assertEqual(item_url, item2.url()) self.assertEqual(item.metadata(), item2.metadata())
def test_client_no_cache(self, m): """Test that we can create and use a client without a cache enabled""" m.get(API_URL + "/item_lists.json", json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False) item_url = client.oauth.api_url + "/catalog/cooee/1-190" item_meta = "" with open('tests/responses/1-190.json', 'rb') as rh: m.get(item_url, body=rh) item = client.get_item(item_url) self.assertEqual(type(item), pyalveo.Item) # get a document with open('tests/responses/1-190-plain.txt', 'rb') as rh: m.get(item_url + "/document/1-190-plain.txt", body=rh) doc = item.get_document(0) self.assertEqual(type(doc), pyalveo.Document) doc_content = doc.get_content() self.assertEqual(doc_content[:20].decode(), "Sydney, New South Wa")
def test_create_collection(self, m): """Test that we can create a new collection""" m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY) cname = 'testcollection1' curl = client.oauth.api_url + "/catalog/" + cname m.post(client.oauth.api_url + "/catalog", json={"success":"New collection \'%s\' (%s) created" % (cname, curl)}) meta = { "@context": CONTEXT, "@type": "dcmitype:Collection", "dcterms:creator": "Data Owner", "dcterms:rights": "All rights reserved to Data Owner", "dcterms:subject": "English Language", "dcterms:title": "Test Collection" } result = client.create_collection('testcollection1', meta) self.assertIn("testcollection1", result) self.assertIn("created", result) # validate the request we made req = m.last_request self.assertEqual(req.method, 'POST') self.assertIn('name', req.json()) self.assertIn('collection_metadata', req.json()) self.assertDictEqual(meta, req.json()['collection_metadata'])
def test_identical_clients(self, m): """ Test that multiple clients can be created with default configuration or specific configuration and check if they are identical or not """ m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) first_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False) second_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False) self.assertTrue(first_client.__eq__(second_client)) self.assertTrue(second_client.__eq__(first_client)) first_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, cache="cache.db", use_cache=True, update_cache=True) second_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, cache="cache.db", use_cache=True, update_cache=True) # Two clients created with same api key and same arguments must be same self.assertTrue(first_client.__eq__(second_client)) self.assertTrue(second_client.__eq__(first_client)) # Two clients with same api key but diffent database configuration must be different third_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, cache="cache.db", use_cache=False, update_cache=False) self.assertTrue(first_client.__ne__(third_client)) self.assertTrue(second_client.__ne__(third_client)) # Client without any arguments should be equal to client with all the default arguments first_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False) second_client = first_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, cache="cache.db", use_cache=True, update_cache=True) self.assertTrue(first_client.__eq__(second_client))
def test_client_context(self, m): """add_context extends the context that is used by the client""" m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY) client.add_context('test', 'http://test.org/') self.assertIn('test', client.context) self.assertEqual('http://test.org/', client.context['test'])
def test_client_cache(self, m): """Test that we can create a client with a cache enabled and that it caches things""" cache_dir = "tmp" m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=True, cache_dir=cache_dir) item_url = client.oauth.api_url + "/catalog/cooee/1-190" item_meta = "" self.addCleanup(shutil.rmtree, cache_dir, True) self.assertEqual(type(client.cache), pyalveo.Cache) with open('tests/responses/1-190.json', 'rb') as rh: m.get(item_url, body=rh) item = client.get_item(item_url) self.assertEqual(type(item), pyalveo.Item) # look in the cache for this item metadata self.assertTrue(client.cache.has_item(item_url)) meta = client.cache.get_item(item_url) # check a few things about the metadata json self.assertIn("@context", meta.decode('utf-8')) self.assertIn(item_url, meta.decode('utf-8')) # get a document with open('tests/responses/1-190-plain.txt', 'rb') as rh: m.get(item_url + "/document/1-190-plain.txt", body=rh) doc = item.get_document(0) self.assertEqual(type(doc), pyalveo.Document) doc_content = doc.get_content() self.assertEqual(doc_content[:20].decode(), "Sydney, New South Wa") # there should be a cached file somewhere under cache_dir ldir = os.listdir(os.path.join(cache_dir, "files")) self.assertEqual(1, len(ldir)) # the content of the file should be the same as our doc_content with open(os.path.join(cache_dir, "files", ldir[0]), 'rb') as h: self.assertEqual(h.read(), doc_content) # now trigger a cache hit doc_content_cache = doc.get_content() self.assertEqual(doc_content, doc_content_cache)
def download_item_list(item_list_url, channel='all', outputdir='data/austalk'): ''' download all the files from an item list in alveo :param item_list_url: :param speaker: :param documents: :param outputdir: :return: ''' client = pyalveo.Client(configfile="alveo.config", use_cache=False) item_lists = client.get_item_list(item_list_url) label = 0 label_dict = {} count = 0 #focus on a particular item for url in item_lists[0:5]: item_url = url item = client.get_item(item_url) meta = read_meta(client, item.metadata()) word = meta['word'].split('_')[1] # encode labels if word not in label_dict.keys(): print(word) label_dict[word] = label label += 1 # create sub-folder based on speaker names if not os.path.exists(outputdir): os.makedirs(outputdir) for doc in item.get_documents(): filename = doc.get_filename() if filename.endswith('.wav') or filename.endswith('.TextGrid'): if channel != 'all': if channel in filename: print(filename) doc.download_content(dir_path=outputdir, filename=('data.wav')) y, sr = librosa.load( os.path.join(outputdir, 'data.wav')) y = librosa.resample(y, 16000, 8000) observation = [label_dict[word]] observation = observation + y.tolist() write_file(observation, 'data/data.tsv') else: print('please specify channel to download') exit() print('download complete')
def test_create_client(self, m): """ Test that the clients can be created with or without alveo.config file and correct database is created """ m.get(API_URL + "/item_lists.json", json={'failure': 'Client could not be created. Check your api key'}, status_code=401) # Test with wrong api key with self.assertRaises(pyalveo.APIError) as cm: client = pyalveo.Client(api_url=API_URL, api_key=API_KEY) client.get_item_lists() self.assertTrue( "Client could not be created. Check your api key" in str(cm.exception) ) m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) alveo_config_path = os.path.expanduser('~/alveo.config') cache_db_path = 'tmp' if False: # how to mock these? # Test when alveo.config is present if os.path.exists(alveo_config_path): client = pyalveo.Client() self.assertEqual(type(client), pyalveo.Client) else: # Test when alveo.config is absent with self.assertRaises(IOError) as cm: client = pyalveo.Client() self.assertEqual( "Could not find file ~/alveo.config. Please download your configuration file from http://pyalveo.org.au/ OR try to create a client by specifying your api key", str(cm.exception) ) # Test with correct api key client = pyalveo.Client() self.assertEqual(type(client), pyalveo.Client)
def test_create_contribution(self, m): """Test that we can create a new contribution""" m.get(API_URL + "/item_lists.json", json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY) cname = 'testcontrib' m.post( client.oauth.api_url + "/contrib/", json={ 'description': 'This is contribution description', 'documents': [{ 'name': 'testfile.txt', 'url': 'https://staging.alveo.edu.au/catalog/demotext/2006-05-28-19/document/testfile.txt' }], 'id': '29', 'metadata': { 'abstract': '"This is contribution abstract"', 'collection': 'https://staging.alveo.edu.au/catalog/demotext', 'created': '2018-12-06T05:46:11Z', 'creator': 'Data Owner', 'title': 'HelloWorld' }, 'name': 'HelloWorld', 'url': 'https://staging.alveo.edu.au/contrib/29' }) meta = { "contribution_name": "HelloWorld", "contribution_collection": "demotext", "contribution_text": "This is contribution description", "contribution_abstract": "This is contribution abstract" } result = client.create_contribution(meta) # validate the request we made req = m.last_request self.assertEqual(req.method, 'POST') # check that the right things were in the request self.assertIn('contribution_collection', req.json()) self.assertIn('contribution_name', req.json()) self.assertDictEqual(meta, req.json())
def test_sparql_query(self, m): """Can we run a simple SPARQL query""" m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False) query = """select * where { ?a ?b ?c } LIMIT 10""" m.get(API_URL + "/sparql/mitcheldelbridge", json={'results': {'bindings': [1,2,3,4,5,6,7,8,9,0]}}) result = client.sparql_query('mitcheldelbridge', query) self.assertIn('results', result) self.assertIn('bindings', result['results']) self.assertEqual(len(result['results']['bindings']), 10)
def main(): args = parser() try: api_key = open(args.api_key, 'r').read().strip() client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False) item_list = read_item_list(args.item_list, client) patterns = args.patterns.split(',') downloaded = download_documents(item_list, patterns, args.output_path) except pyalveo.APIError as e: print("ERROR: " + str(e), file=sys.stderr) sys.exit(1)
def test_item_lists(self, m): """ Test that the item list can be created, item can be added to the item list, item list can be renamed and deleted """ m.get(API_URL + "/item_lists.json", json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False) base_url = client.oauth.api_url item_list_name = 'pyalveo_test_item_list' msg = '1 items added to new item list ' + item_list_name m.post(API_URL + '/item_lists', json={'success': msg}) new_item_url_1 = [base_url + '/catalog/ace/A01a'] self.assertEqual( client.add_to_item_list_by_name(new_item_url_1, item_list_name), msg) with open('tests/responses/item-lists.json', 'rb') as fd: m.get(API_URL + '/item_lists', content=fd.read()) with open('tests/responses/item-list-831.json') as fd: ilist_831 = json.loads(fd.read()) m.get(API_URL + '/item_lists/831', json=ilist_831) my_list = client.get_item_list_by_name(item_list_name) self.assertEqual(my_list.name(), item_list_name) msg = '1 items added to existing item list ' + item_list_name m.post(API_URL + '/item_lists', json={'success': msg}) new_item_url_2 = [base_url + 'catalog/ace/A01b'] self.assertEqual( client.add_to_item_list(new_item_url_2, my_list.url()), '1 items added to existing item list ' + my_list.name()) # Test Rename List ilist_831['name'] = 'brand new list' m.put(API_URL + '/item_lists/831', json=ilist_831) client.rename_item_list(my_list, 'brand new list') # Deleting an Item List m.delete(API_URL + '/item_lists/831', json={'success': 'item list deleted'}) self.assertEqual(client.delete_item_list(my_list), True) # deleting an Item List that isn't there raises an exception m.delete(API_URL + '/item_lists/831', status_code=404) self.assertRaises(pyalveo.APIError, client.delete_item_list, my_list)
def test_modify_item(self, m): """Test modify item metadata""" m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY) collection_name = "testcollection1" itemname = "item1" item_uri = API_URL + "/catalog/%s/%s" % (collection_name, itemname) meta = {"http://ns.ausnc.org.au/schemas/ausnc_md_model/mode":"An updated test mode"} m.put(item_uri, json={'success': "item metadata updated"}) client.modify_item(item_uri, meta) req = m.last_request self.assertIn('metadata', req.json()) self.assertEqual(meta, req.json()['metadata'])
def test_delete_item(self, m): """Test deleting an item""" m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY) collection_name = "testcollection1" itemname = "item1" docname = "doc1.txt" item_uri = API_URL + "/catalog/%s/%s" % (collection_name, itemname) # now delete the item m.delete(item_uri, json={"success": itemname}) client.delete_item(item_uri) req = m.last_request self.assertEqual(req.method, 'DELETE')
def test_delete_document(self, m): """Test deleting a document""" m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY) collection_name = "testcollection1" itemname = "item1" docname = "doc1.txt" document_uri = API_URL + "/catalog/%s/%s/documents/%s" % (collection_name, itemname, docname) # delete the document m.delete(document_uri, json={"success":"Deleted the document %s from item %s in collection %s" % (docname, itemname, collection_name)}) client.delete_document(document_uri) req = m.last_request self.assertEqual(req.method, 'DELETE')
def find_hVd_words(api_key, speakerid, output, words='all'): """Find words in the Austalk corpus """ client = pyalveo.Client(api_key, API_URL, use_cache=False) query = PREFIXES + """ SELECT distinct ?item ?prompt ?compname WHERE { ?item a ausnc:AusNCObject . ?item olac:speaker ?speaker . ?speaker austalk:id "%s" . ?item austalk:prototype ?prot . ?prot austalk:prompt ?prompt . ?item austalk:componentName ?compname . """ % speakerid hVdWords = dict( monopthongs=[ 'head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod', 'whod', 'herd', 'haired', 'hard', 'horde' ], dipthongs=['howd', 'hoyd', 'hide', 'hode', 'hade', 'heared']) if words == 'all': words = hVdWords['monopthongs'] + hVdWords['dipthongs'] else: words = hVdWords[words] filterclause = 'FILTER regex(?prompt, "^' filterclause += '$|^'.join(words) filterclause += '$", "i")\n' query += filterclause + "}" print(query) result = client.sparql_query('austalk', query) items = [] for b in result['results']['bindings']: items.append((b['prompt']['value'], b['item']['value'])) with open(output, 'w') as out: out.write("Speaker\tPrompt\tItemURL\n") for item in items: out.write(speakerid + "\t" + item[0] + "\t" + item[1] + "\n")
def main(): args = parser() try: api_key = open(args.api_key, 'r').read().strip() client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False) item_list = client.get_item_list(args.item_list_url) with open(args.output, 'w') as out: out.write("ItemURL\n") for item in item_list: out.write(item + "\n") except pyalveo.APIError as e: print("ERROR: " + str(e), file=sys.stderr) sys.exit(1)
def retrieve_doc_as_user(document_id, api_key): alveo_metadata = get_module_metadata("alveo") if alveo_metadata is None: abort(404, "Could not segment document. 'alveo' module not loaded") api_url = alveo_metadata['api_url'] client = pyalveo.Client(api_url=api_url, api_key=api_key, use_cache=False, update_cache=False, cache_dir=None) audio_data = None try: audio_data = client.get_document(document_id) except BaseException: pass return audio_data
def test_download_document(self, m): """Download a document""" m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False) # temp directory for output output_dir = tempfile.mkdtemp() outname = "downloaded_sample.wav" document_url = client.oauth.api_url + '/catalog/cooee/1-190/document/sample.wav' meta = {'alveo:url': document_url} document = pyalveo.Document(meta, client) with open('tests/responses/sample.wav', 'rb') as rh: m.get(document_url, body=rh) document.download_content(output_dir, outname, force_download=True) self.assertTrue(os.path.exists(os.path.join(output_dir, outname)))
def test_get_contribution(self, m): """Get details of a contribution""" m.get(API_URL + "/item_lists.json", json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY) cname = '29' contrib_url = client.oauth.api_url + "/contrib/" + cname m.get( contrib_url, json={ 'description': 'This is contribution description', 'documents': [{ 'name': 'testfile.txt', 'url': 'https://staging.alveo.edu.au/catalog/demotext/2006-05-28-19/document/testfile.txt' }], 'metadata': { 'abstract': '"This is contribution abstract"', 'collection': 'https://staging.alveo.edu.au/catalog/demotext', 'created': '2018-12-06T05:46:11Z', 'creator': 'Data Owner', 'title': 'HelloWorld' }, 'name': 'HelloWorld', 'url': contrib_url }) result = client.get_contribution(contrib_url) req = m.last_request self.assertEqual(req.method, "GET") self.assertEqual(result['id'], cname) self.assertEqual(result['description'], 'This is contribution description')
def get_alveo_data(): """Using the Alveo API get the audio data for the configured item list. Return a list of speaker identifiers and a list of file basenames that have been stored in DATA_DIR config: ITEM_LIST_URL, DATA_DIR, ALVEO_API_URL, ALVEO_API_KEY """ item_list_url = config("ITEM_LIST_URL") client = pyalveo.Client(api_url=config("ALVEO_API_URL"), api_key=config("ALVEO_API_KEY")) item_list = client.get_item_list(item_list_url) # For each item we need to get the speaker identifier and the target audio file. item_meta = item_list.get_all() speakers = [ i.metadata()['alveo:metadata']['olac:speaker'] for i in item_meta ] data_dir = config("DATA_DIR") if not os.path.exists(data_dir): os.makedirs(data_dir) filepaths = [] basenames = [] for item in item_meta: docs = item.get_documents() for doc in docs: if doc.get_filename().endswith("wav"): path = doc.download_content(dir_path="data") filepaths.append(path) basenames.append( os.path.splitext(os.path.basename(doc.get_filename()))[0]) print("Downloaded", len(filepaths), "files") return speakers, basenames
def test_create_client_oauth(self, m): """Create a client using OAuth credentials""" redirect_url = API_URL + '/oauth_redirect/' oauth_url = API_URL + '/oauth/authorize' m.get(redirect_url, json={}) oauth_info = { 'client_id': 'foobar', 'client_secret': 'secret client', 'redirect_url': redirect_url, } client = pyalveo.Client(api_url=API_URL, oauth=oauth_info, configfile="missing.config", verifySSL=False) # we can't capture the request that OAuth makes but we can # check the settings that result from it self.assertTrue(client.oauth.auth_url.startswith(oauth_url)) self.assertEqual(client.oauth.redirect_url, redirect_url)
def test_add_item(self, m): """Test that we can add new items to a collection""" m.get(API_URL + "/item_lists.json",json={'success': 'yes'}) client = pyalveo.Client(api_url=API_URL, api_key=API_KEY) collection_name = "testcollection1" collection_uri = API_URL + "/catalog/" + collection_name itemname = "item1" m.post(collection_uri, json={"success": [itemname]}) meta = { 'dcterms:title': 'Test Item', 'dcterms:creator': 'A. Programmer' } item_uri = client.add_item(collection_uri, itemname, meta) self.assertIn(itemname, item_uri) req = m.last_request self.assertEqual(req.method, 'POST') self.assertEqual(req.headers['Content-Type'], 'application/json') self.assertEqual(req.headers['X-API-KEY'], API_KEY) self.assertIn('items', req.json())
def get_item_list(api_key, item_list_url): client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False) return client.get_item_list(item_list_url)