Example #1
0
    def test_to_from_json(self, m):
        """ Test packing the oath object into a json form then reloading it. """

        api_url = 'https://example.org'
        api_key = 'thisisrandomtext'
        verifySSL = False
        cache_dir = 'tmp'
        oauth_dict = {
            'client_id': 'morerandomtext',
            'client_secret': 'secretrandomtext',
            'redirect_url': 'https://anotherfake.com'
        }
        expected_json = '{"use_cache": false, "api_url": "https://example.org", "cache": {"max_age": 0, "cache_dir": "tmp"}, "cache_dir": "tmp", "update_cache": true, "oauth": {"client_id": "morerandomtext", "state": "cgLXfsICCMsuTeY6HWkzsqMPyxTA8K", "token": null, "auth_url": "https://example.org/oauth/authorize?response_type=code&client_id=morerandomtext&redirect_uri=https%3A%2F%2Fanotherfake.com&state=cgLXfsICCMsuTeY6HWkzsqMPyxTA8K", "redirect_url": "https://anotherfake.com", "client_secret": "secretrandomtext", "api_key": "secretkey", "verifySSL": false, "api_url": "https://example.org"}, "api_key": "secretkey"}'
        client = pyalveo.Client(api_url=api_url,
                                oauth=oauth_dict,
                                verifySSL=verifySSL,
                                use_cache=False,
                                cache_dir=cache_dir,
                                configfile="tests/alveo.config")
        json_string = client.to_json()
        #Test json comes out as expected
        #A state will be generated which should be different always
        #So we need to load the json into a dict, remove the state key then check equality
        json_dict = json.loads(json_string)
        expected_dict = json.loads(expected_json)
        json_dict['oauth'].pop('state', None)
        expected_dict['oauth'].pop('state', None)
        #Do the same with auth url as it's a string that contains the state
        json_dict['oauth'].pop('auth_url', None)
        expected_dict['oauth'].pop('auth_url', None)
        #Do the same with cache dir as that also can't be predicted
        json_dict['cache'].pop('cache_dir', None)
        expected_dict['cache'].pop('cache_dir', None)
        json_dict.pop('cache_dir', None)
        expected_dict.pop('cache_dir', None)

        self.assertEqual(json_dict, expected_dict)

        client2 = pyalveo.Client.from_json(json_string)

        #Test generated json creates an identical object
        #These should have identical states however
        self.assertEqual(client, client2)

        starting_json = '{"use_cache": true, "api_url": "https://example.org", "cache": {"max_age": 0, "cache_dir": "tmp"}, "cache_dir": "tmp", "update_cache": true, "oauth": {"client_id": null, "state": null, "token": null, "auth_url": null, "redirect_url": null, "client_secret": null, "api_key": "thisisrandomtext", "verifySSL": false, "api_url": "https://example.org"}, "api_key": "thisisrandomtext"}'

        client = pyalveo.Client(api_url=api_url,
                                api_key=api_key,
                                verifySSL=verifySSL,
                                use_cache=True,
                                cache_dir=cache_dir)

        client2 = pyalveo.Client.from_json(starting_json)

        #test manually created json creates an identical cache to one properly setup
        self.assertEqual(client, client2)
Example #2
0
    def test_add_annotations(self, m):
        """Test that we can add new annotations for an item"""

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY)
        collection_uri = API_URL + "/catalog/collection1"
        # create an item
        itemname = "testitem1"
        m.post(collection_uri, json={"success": [itemname]})
        meta = {
                'dcterms:title': 'Test Item',
                'dcterms:creator': 'A. Programmer'
                }

        item_uri = client.add_item(collection_uri, itemname, meta)

        anns = [{
                    "@type": "dada:TextAnnotation",
                    "type": "pageno",
                    "label": "hello",
                    "start": 421,
                    "end": 425
                },
                {
                    "@type": "dada:TextAnnotation",
                    "type": "pageno",
                    "label": "world",
                    "start": 2524,
                    "end": 2529
                }
               ]

        # now add some annotations
        m.post(item_uri + "/annotations", json={'success': 'yes'})
        client.add_annotations(item_uri, anns)
Example #3
0
def oauth_login(request, redirect_url='/'):

    request.session.flush()
    client = request.session.get('client', None)

    #If there a client exists and is valid, don't bother doing anything, redirect home.
    if client != None:
        if client.oauth.validate():
            return HttpResponseRedirect(redirect_url)

    oauth_redirect_url = 'https://%s/oauth/callback' % request.get_host()

    OAUTH = {
        'client_id': settings.OAUTH_CLIENT_ID,
        'client_secret': settings.OAUTH_CLIENT_SECRET,
        'redirect_url': oauth_redirect_url,
    }

    client = pyalveo.Client(api_url=settings.API_URL,
                            oauth=OAUTH,
                            verifySSL=False)
    url = client.oauth.get_authorisation_url()
    request.session['client'] = client.to_json()

    request.session['next'] = request.GET.get('next', redirect_url)

    redirect_url = url
    return HttpResponseRedirect(redirect_url)
Example #4
0
    def test_add_text_item(self, m):
        """Test that we can add new items that have just a text document to a collection """

        doctext = "This is the text of my test document.\nTwo lines.\n"

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY)
        collection_name = "testcollection1"
        collection_uri = API_URL + "/catalog/" + collection_name
        itemname = "item1"

        m.post(collection_uri, json={"success": [itemname]})

        meta = {
                'dcterms:title': 'Test Item',
                'dcterms:creator': 'A. Programmer'
                }

        item_uri = client.add_text_item(collection_uri, itemname, meta, text=doctext, title='my test document')

        self.assertIn(itemname, item_uri)
        req = m.last_request
        self.assertEqual(req.method, 'POST')
        self.assertEqual(req.headers['Content-Type'], 'application/json')
        self.assertEqual(req.headers['X-API-KEY'], API_KEY)
        self.assertIn('items', req.json())
        self.assertEqual(1, len(req.json()['items']))
        itemdict = req.json()['items'][0]
        self.assertIn('documents', itemdict)
        self.assertEqual(1, len(itemdict['documents']))
        self.assertEqual(doctext, itemdict['documents'][0]['content'])
        self.assertEqual(itemname+'.txt', itemdict['documents'][0]['identifier'])
Example #5
0
    def test_get_annotations(self, m):

        m.get(API_URL + "/item_lists.json", json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL,
                                api_key=API_KEY,
                                use_cache=False)

        item_url = client.oauth.api_url + "/catalog/ace/A01b"
        with open('tests/responses/A01b.json', 'rb') as fd:
            m.get(item_url, content=fd.read())
        item = client.get_item(item_url)

        # get annotations for this item of type 'speaker'
        ann_url = item_url + '/annotations.json'
        with open('tests/responses/A01b-annotations.json', 'rb') as fd:
            m.get(ann_url, content=fd.read())
        anns = item.get_annotations(
            atype=u'http://ns.ausnc.org.au/schemas/annotation/ice/speaker')
        self.assertListEqual(
            sorted(anns.keys()),
            [u'@context', u'alveo:annotations', u'commonProperties'])

        ann = anns['alveo:annotations'][0]
        self.assertEqual(sorted(ann.keys()),
                         [u'@id', u'@type', u'end', u'start', u'type'])
Example #6
0
    def test_add_document(self, m):
        """Test adding documents to items"""

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY)
        collection_name = "testcollection1"
        itemname = "item1"
        docname = "doc1.txt"
        content = "Hello World!\n"

        item_uri = API_URL + "/catalog/%s/%s" % (collection_name, itemname)

        m.post(item_uri, json={"success":"Added the document %s to item %s in collection %s" % (docname, itemname, collection_name)})

        docmeta = {
                   "dcterms:title": "Sample Document",
                   "dcterms:type": "Text"
                  }

        document_uri = client.add_document(item_uri, docname, docmeta, content=content)

        req = m.last_request
        payload = req.json()
        self.assertEqual(payload['document_content'], content)
        self.assertIn('metadata', payload)
        md = payload['metadata']
        self.assertIn('dcterms:title', md)
        self.assertEqual(md['dcterms:title'], docmeta['dcterms:title'])
        self.assertEqual(md['@type'], "foaf:Document")
        self.assertEqual(md['dcterms:identifier'], docname)
Example #7
0
    def test_item_download(self, m):
        """Test access to individual items"""

        m.get(API_URL + "/item_lists.json", json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL,
                                api_key=API_KEY,
                                use_cache=True)
        item_url = client.oauth.api_url + '/catalog/cooee/1-190'

        with open('tests/responses/1-190.json', 'rb') as rh:
            m.get(item_url, body=rh)
            item = client.get_item(item_url)

        self.assertEqual(item_url, item.url())

        meta = item.metadata()

        self.assertEqual(
            meta['alveo:primary_text_url'],
            client.oauth.api_url + u'/catalog/cooee/1-190/primary_text.json')

        # now try it with the cache, should not make a request
        item2 = client.get_item(item_url)
        self.assertEqual(item_url, item2.url())
        self.assertEqual(item.metadata(), item2.metadata())
Example #8
0
    def test_client_no_cache(self, m):
        """Test that we can create and use a client without a cache enabled"""

        m.get(API_URL + "/item_lists.json", json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL,
                                api_key=API_KEY,
                                use_cache=False)

        item_url = client.oauth.api_url + "/catalog/cooee/1-190"
        item_meta = ""

        with open('tests/responses/1-190.json', 'rb') as rh:
            m.get(item_url, body=rh)
            item = client.get_item(item_url)

        self.assertEqual(type(item), pyalveo.Item)

        # get a document
        with open('tests/responses/1-190-plain.txt', 'rb') as rh:
            m.get(item_url + "/document/1-190-plain.txt", body=rh)
            doc = item.get_document(0)

            self.assertEqual(type(doc), pyalveo.Document)

            doc_content = doc.get_content()
            self.assertEqual(doc_content[:20].decode(), "Sydney, New South Wa")
Example #9
0
    def test_create_collection(self, m):
        """Test that we can create a new collection"""

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY)

        cname = 'testcollection1'
        curl = client.oauth.api_url + "/catalog/" + cname

        m.post(client.oauth.api_url + "/catalog",
               json={"success":"New collection \'%s\' (%s) created" % (cname, curl)})

        meta = { "@context": CONTEXT,
                 "@type": "dcmitype:Collection",
                 "dcterms:creator": "Data Owner",
                 "dcterms:rights": "All rights reserved to Data Owner",
                 "dcterms:subject": "English Language",
                 "dcterms:title": "Test Collection" }

        result = client.create_collection('testcollection1', meta)

        self.assertIn("testcollection1", result)
        self.assertIn("created", result)

        # validate the request we made
        req = m.last_request
        self.assertEqual(req.method, 'POST')
        self.assertIn('name', req.json())
        self.assertIn('collection_metadata', req.json())
        self.assertDictEqual(meta, req.json()['collection_metadata'])
Example #10
0
    def test_identical_clients(self, m):
        """ Test that multiple clients can be created with default configuration or specific configuration
        and check if they are identical or not """

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        first_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False)
        second_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False)

        self.assertTrue(first_client.__eq__(second_client))
        self.assertTrue(second_client.__eq__(first_client))


        first_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, cache="cache.db", use_cache=True, update_cache=True)
        second_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, cache="cache.db", use_cache=True, update_cache=True)

        # Two clients created with same api key and same arguments must be same
        self.assertTrue(first_client.__eq__(second_client))
        self.assertTrue(second_client.__eq__(first_client))

        # Two clients with same api key but diffent database configuration must be different
        third_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, cache="cache.db", use_cache=False, update_cache=False)
        self.assertTrue(first_client.__ne__(third_client))
        self.assertTrue(second_client.__ne__(third_client))

        # Client without any arguments should be equal to client with all the default arguments
        first_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False)
        second_client = first_client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, cache="cache.db", use_cache=True, update_cache=True)
        self.assertTrue(first_client.__eq__(second_client))
Example #11
0
    def test_client_context(self, m):
        """add_context extends the context that is used by the  client"""

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY)

        client.add_context('test', 'http://test.org/')

        self.assertIn('test', client.context)
        self.assertEqual('http://test.org/', client.context['test'])
Example #12
0
    def test_client_cache(self, m):
        """Test that we can create a client with a cache enabled and that it caches things"""

        cache_dir = "tmp"


        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=True, cache_dir=cache_dir)

        item_url = client.oauth.api_url + "/catalog/cooee/1-190"
        item_meta = ""

        self.addCleanup(shutil.rmtree, cache_dir, True)

        self.assertEqual(type(client.cache), pyalveo.Cache)


        with open('tests/responses/1-190.json', 'rb') as rh:
            m.get(item_url, body=rh)
            item = client.get_item(item_url)

        self.assertEqual(type(item), pyalveo.Item)

        # look in the cache for this item metadata

        self.assertTrue(client.cache.has_item(item_url))

        meta = client.cache.get_item(item_url)

        # check a few things about the metadata json
        self.assertIn("@context", meta.decode('utf-8'))
        self.assertIn(item_url, meta.decode('utf-8'))


        # get a document
        with open('tests/responses/1-190-plain.txt', 'rb') as rh:
            m.get(item_url + "/document/1-190-plain.txt", body=rh)
            doc = item.get_document(0)

            self.assertEqual(type(doc), pyalveo.Document)

            doc_content = doc.get_content()
            self.assertEqual(doc_content[:20].decode(), "Sydney, New South Wa")

        # there should be a cached file somewhere under cache_dir
        ldir = os.listdir(os.path.join(cache_dir, "files"))
        self.assertEqual(1, len(ldir))
        # the content of the file should be the same as our doc_content
        with open(os.path.join(cache_dir, "files", ldir[0]), 'rb') as h:
            self.assertEqual(h.read(), doc_content)

        # now trigger a cache hit
        doc_content_cache = doc.get_content()
        self.assertEqual(doc_content, doc_content_cache)
Example #13
0
def download_item_list(item_list_url, channel='all', outputdir='data/austalk'):
    '''
    download all the files from an item list in alveo
    :param item_list_url:
    :param speaker:
    :param documents:
    :param outputdir:
    :return:
    '''

    client = pyalveo.Client(configfile="alveo.config", use_cache=False)
    item_lists = client.get_item_list(item_list_url)
    label = 0
    label_dict = {}

    count = 0
    #focus on a particular item
    for url in item_lists[0:5]:
        item_url = url
        item = client.get_item(item_url)
        meta = read_meta(client, item.metadata())
        word = meta['word'].split('_')[1]

        # encode labels

        if word not in label_dict.keys():
            print(word)
            label_dict[word] = label
            label += 1

        # create sub-folder based on speaker names
        if not os.path.exists(outputdir):
            os.makedirs(outputdir)

        for doc in item.get_documents():
            filename = doc.get_filename()

            if filename.endswith('.wav') or filename.endswith('.TextGrid'):
                if channel != 'all':
                    if channel in filename:
                        print(filename)
                        doc.download_content(dir_path=outputdir,
                                             filename=('data.wav'))
                        y, sr = librosa.load(
                            os.path.join(outputdir, 'data.wav'))
                        y = librosa.resample(y, 16000, 8000)
                        observation = [label_dict[word]]
                        observation = observation + y.tolist()
                        write_file(observation, 'data/data.tsv')

                else:
                    print('please specify channel to download')
                    exit()
    print('download complete')
Example #14
0
    def test_create_client(self, m):
        """ Test that the clients can be created with or without alveo.config file
        and correct database is created """

        m.get(API_URL + "/item_lists.json",
              json={'failure': 'Client could not be created. Check your api key'},
              status_code=401)
        # Test with wrong api key
        with self.assertRaises(pyalveo.APIError) as cm:
            client = pyalveo.Client(api_url=API_URL, api_key=API_KEY)
            client.get_item_lists()

        self.assertTrue(
            "Client could not be created. Check your api key" in str(cm.exception)
        )

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        alveo_config_path = os.path.expanduser('~/alveo.config')
        cache_db_path = 'tmp'

        if False:
            # how to mock these?
            # Test when alveo.config is present
            if os.path.exists(alveo_config_path):
                client = pyalveo.Client()
                self.assertEqual(type(client), pyalveo.Client)

            else:
                # Test when alveo.config is absent
                with self.assertRaises(IOError) as cm:
                    client = pyalveo.Client()

                self.assertEqual(
                    "Could not find file ~/alveo.config. Please download your configuration file from http://pyalveo.org.au/ OR try to create a client by specifying your api key",
                    str(cm.exception)
                )

            # Test with correct api key
            client = pyalveo.Client()
            self.assertEqual(type(client), pyalveo.Client)
Example #15
0
    def test_create_contribution(self, m):
        """Test that we can create a new contribution"""

        m.get(API_URL + "/item_lists.json", json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY)

        cname = 'testcontrib'

        m.post(
            client.oauth.api_url + "/contrib/",
            json={
                'description':
                'This is contribution description',
                'documents': [{
                    'name':
                    'testfile.txt',
                    'url':
                    'https://staging.alveo.edu.au/catalog/demotext/2006-05-28-19/document/testfile.txt'
                }],
                'id':
                '29',
                'metadata': {
                    'abstract': '"This is contribution abstract"',
                    'collection':
                    'https://staging.alveo.edu.au/catalog/demotext',
                    'created': '2018-12-06T05:46:11Z',
                    'creator': 'Data Owner',
                    'title': 'HelloWorld'
                },
                'name':
                'HelloWorld',
                'url':
                'https://staging.alveo.edu.au/contrib/29'
            })

        meta = {
            "contribution_name": "HelloWorld",
            "contribution_collection": "demotext",
            "contribution_text": "This is contribution description",
            "contribution_abstract": "This is contribution abstract"
        }

        result = client.create_contribution(meta)

        # validate the request we made
        req = m.last_request
        self.assertEqual(req.method, 'POST')
        # check that the right things were in the request
        self.assertIn('contribution_collection', req.json())
        self.assertIn('contribution_name', req.json())
        self.assertDictEqual(meta, req.json())
Example #16
0
    def test_sparql_query(self, m):
        """Can we run a simple SPARQL query"""

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False)

        query = """select * where { ?a ?b ?c } LIMIT 10"""

        m.get(API_URL + "/sparql/mitcheldelbridge", json={'results': {'bindings': [1,2,3,4,5,6,7,8,9,0]}})
        result = client.sparql_query('mitcheldelbridge', query)

        self.assertIn('results', result)
        self.assertIn('bindings', result['results'])
        self.assertEqual(len(result['results']['bindings']), 10)
def main():
    args = parser()
    try:
        api_key = open(args.api_key, 'r').read().strip()

        client = pyalveo.Client(api_url=API_URL,
                                api_key=api_key,
                                use_cache=False)

        item_list = read_item_list(args.item_list, client)
        patterns = args.patterns.split(',')
        downloaded = download_documents(item_list, patterns, args.output_path)
    except pyalveo.APIError as e:
        print("ERROR: " + str(e), file=sys.stderr)
        sys.exit(1)
Example #18
0
    def test_item_lists(self, m):
        """ Test that the item list can be created, item can be added to the item list,
        item list can be renamed and deleted """

        m.get(API_URL + "/item_lists.json", json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL,
                                api_key=API_KEY,
                                use_cache=False)
        base_url = client.oauth.api_url
        item_list_name = 'pyalveo_test_item_list'

        msg = '1 items added to new item list ' + item_list_name
        m.post(API_URL + '/item_lists', json={'success': msg})
        new_item_url_1 = [base_url + '/catalog/ace/A01a']
        self.assertEqual(
            client.add_to_item_list_by_name(new_item_url_1, item_list_name),
            msg)

        with open('tests/responses/item-lists.json', 'rb') as fd:
            m.get(API_URL + '/item_lists', content=fd.read())

        with open('tests/responses/item-list-831.json') as fd:
            ilist_831 = json.loads(fd.read())

        m.get(API_URL + '/item_lists/831', json=ilist_831)
        my_list = client.get_item_list_by_name(item_list_name)
        self.assertEqual(my_list.name(), item_list_name)

        msg = '1 items added to existing item list ' + item_list_name
        m.post(API_URL + '/item_lists', json={'success': msg})
        new_item_url_2 = [base_url + 'catalog/ace/A01b']
        self.assertEqual(
            client.add_to_item_list(new_item_url_2, my_list.url()),
            '1 items added to existing item list ' + my_list.name())

        # Test Rename List
        ilist_831['name'] = 'brand new list'
        m.put(API_URL + '/item_lists/831', json=ilist_831)
        client.rename_item_list(my_list, 'brand new list')

        # Deleting an Item List
        m.delete(API_URL + '/item_lists/831',
                 json={'success': 'item list deleted'})
        self.assertEqual(client.delete_item_list(my_list), True)

        # deleting an Item List that isn't there raises an exception
        m.delete(API_URL + '/item_lists/831', status_code=404)
        self.assertRaises(pyalveo.APIError, client.delete_item_list, my_list)
Example #19
0
    def test_modify_item(self, m):
        """Test modify item metadata"""

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY)
        collection_name = "testcollection1"
        itemname = "item1"
        item_uri = API_URL + "/catalog/%s/%s" % (collection_name, itemname)

        meta = {"http://ns.ausnc.org.au/schemas/ausnc_md_model/mode":"An updated test mode"}

        m.put(item_uri, json={'success': "item metadata updated"})
        client.modify_item(item_uri, meta)

        req = m.last_request
        self.assertIn('metadata', req.json())
        self.assertEqual(meta, req.json()['metadata'])
Example #20
0
    def test_delete_item(self, m):
        """Test deleting an item"""

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY)
        collection_name = "testcollection1"
        itemname = "item1"
        docname = "doc1.txt"

        item_uri = API_URL + "/catalog/%s/%s" % (collection_name, itemname)

        # now delete the item
        m.delete(item_uri, json={"success": itemname})
        client.delete_item(item_uri)

        req = m.last_request
        self.assertEqual(req.method, 'DELETE')
Example #21
0
    def test_delete_document(self, m):
        """Test deleting a document"""

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY)
        collection_name = "testcollection1"
        itemname = "item1"
        docname = "doc1.txt"

        document_uri = API_URL + "/catalog/%s/%s/documents/%s" % (collection_name, itemname, docname)

        # delete the document
        m.delete(document_uri, json={"success":"Deleted the document %s from item %s in collection %s" % (docname, itemname, collection_name)})
        client.delete_document(document_uri)

        req = m.last_request
        self.assertEqual(req.method, 'DELETE')
Example #22
0
def find_hVd_words(api_key, speakerid, output, words='all'):
    """Find words in the Austalk corpus
    """

    client = pyalveo.Client(api_key, API_URL, use_cache=False)

    query = PREFIXES + """
SELECT distinct ?item ?prompt ?compname
WHERE {
  ?item a ausnc:AusNCObject .
  ?item olac:speaker ?speaker .
  ?speaker austalk:id "%s" .
  ?item austalk:prototype ?prot .
  ?prot austalk:prompt ?prompt .
  ?item austalk:componentName ?compname .
 """ % speakerid

    hVdWords = dict(
        monopthongs=[
            'head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod', 'whod', 'herd',
            'haired', 'hard', 'horde'
        ],
        dipthongs=['howd', 'hoyd', 'hide', 'hode', 'hade', 'heared'])

    if words == 'all':
        words = hVdWords['monopthongs'] + hVdWords['dipthongs']
    else:
        words = hVdWords[words]

    filterclause = 'FILTER regex(?prompt, "^'
    filterclause += '$|^'.join(words)
    filterclause += '$", "i")\n'

    query += filterclause + "}"

    print(query)
    result = client.sparql_query('austalk', query)

    items = []
    for b in result['results']['bindings']:
        items.append((b['prompt']['value'], b['item']['value']))

    with open(output, 'w') as out:
        out.write("Speaker\tPrompt\tItemURL\n")
        for item in items:
            out.write(speakerid + "\t" + item[0] + "\t" + item[1] + "\n")
def main():
    args = parser()
    try:
        api_key = open(args.api_key, 'r').read().strip()

        client = pyalveo.Client(api_key=api_key,
                                api_url=API_URL,
                                use_cache=False)
        item_list = client.get_item_list(args.item_list_url)

        with open(args.output, 'w') as out:
            out.write("ItemURL\n")
            for item in item_list:
                out.write(item + "\n")

    except pyalveo.APIError as e:
        print("ERROR: " + str(e), file=sys.stderr)
        sys.exit(1)
Example #24
0
def retrieve_doc_as_user(document_id, api_key):
    alveo_metadata = get_module_metadata("alveo")
    if alveo_metadata is None:
        abort(404, "Could not segment document. 'alveo' module not loaded")

    api_url = alveo_metadata['api_url']
    client = pyalveo.Client(api_url=api_url,
                            api_key=api_key,
                            use_cache=False,
                            update_cache=False,
                            cache_dir=None)

    audio_data = None
    try:
        audio_data = client.get_document(document_id)
    except BaseException:
        pass

    return audio_data
Example #25
0
    def test_download_document(self, m):
        """Download a document"""

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY, use_cache=False)

        # temp directory for output
        output_dir = tempfile.mkdtemp()
        outname = "downloaded_sample.wav"

        document_url = client.oauth.api_url + '/catalog/cooee/1-190/document/sample.wav'

        meta = {'alveo:url': document_url}
        document = pyalveo.Document(meta, client)

        with open('tests/responses/sample.wav', 'rb') as rh:
            m.get(document_url, body=rh)
            document.download_content(output_dir, outname, force_download=True)

        self.assertTrue(os.path.exists(os.path.join(output_dir, outname)))
Example #26
0
    def test_get_contribution(self, m):
        """Get details of a contribution"""

        m.get(API_URL + "/item_lists.json", json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY)

        cname = '29'

        contrib_url = client.oauth.api_url + "/contrib/" + cname
        m.get(
            contrib_url,
            json={
                'description':
                'This is contribution description',
                'documents': [{
                    'name':
                    'testfile.txt',
                    'url':
                    'https://staging.alveo.edu.au/catalog/demotext/2006-05-28-19/document/testfile.txt'
                }],
                'metadata': {
                    'abstract': '"This is contribution abstract"',
                    'collection':
                    'https://staging.alveo.edu.au/catalog/demotext',
                    'created': '2018-12-06T05:46:11Z',
                    'creator': 'Data Owner',
                    'title': 'HelloWorld'
                },
                'name':
                'HelloWorld',
                'url':
                contrib_url
            })

        result = client.get_contribution(contrib_url)

        req = m.last_request
        self.assertEqual(req.method, "GET")
        self.assertEqual(result['id'], cname)
        self.assertEqual(result['description'],
                         'This is contribution description')
Example #27
0
def get_alveo_data():
    """Using the Alveo API get the audio data for the configured
    item list.
    Return a list of speaker identifiers and a list of file
    basenames that have been stored in DATA_DIR

    config: ITEM_LIST_URL, DATA_DIR, ALVEO_API_URL, ALVEO_API_KEY
    """

    item_list_url = config("ITEM_LIST_URL")

    client = pyalveo.Client(api_url=config("ALVEO_API_URL"),
                            api_key=config("ALVEO_API_KEY"))
    item_list = client.get_item_list(item_list_url)

    # For each item we need to get the speaker identifier and the target audio file.
    item_meta = item_list.get_all()

    speakers = [
        i.metadata()['alveo:metadata']['olac:speaker'] for i in item_meta
    ]

    data_dir = config("DATA_DIR")

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    filepaths = []
    basenames = []
    for item in item_meta:
        docs = item.get_documents()
        for doc in docs:
            if doc.get_filename().endswith("wav"):
                path = doc.download_content(dir_path="data")
                filepaths.append(path)
                basenames.append(
                    os.path.splitext(os.path.basename(doc.get_filename()))[0])
    print("Downloaded", len(filepaths), "files")

    return speakers, basenames
Example #28
0
    def test_create_client_oauth(self, m):
        """Create a client using OAuth credentials"""

        redirect_url = API_URL + '/oauth_redirect/'
        oauth_url = API_URL + '/oauth/authorize'

        m.get(redirect_url, json={})

        oauth_info = {
            'client_id': 'foobar',
            'client_secret': 'secret client',
            'redirect_url': redirect_url,
        }
        client = pyalveo.Client(api_url=API_URL,
                                oauth=oauth_info,
                                configfile="missing.config",
                                verifySSL=False)

        # we can't capture the request that OAuth makes but we can
        # check the settings that result from it
        self.assertTrue(client.oauth.auth_url.startswith(oauth_url))
        self.assertEqual(client.oauth.redirect_url, redirect_url)
Example #29
0
    def test_add_item(self, m):
        """Test that we can add new items to a collection"""

        m.get(API_URL + "/item_lists.json",json={'success': 'yes'})
        client = pyalveo.Client(api_url=API_URL, api_key=API_KEY)
        collection_name = "testcollection1"
        collection_uri = API_URL + "/catalog/" + collection_name
        itemname = "item1"

        m.post(collection_uri, json={"success": [itemname]})

        meta = {
                'dcterms:title': 'Test Item',
                'dcterms:creator': 'A. Programmer'
                }

        item_uri = client.add_item(collection_uri, itemname, meta)

        self.assertIn(itemname, item_uri)
        req = m.last_request
        self.assertEqual(req.method, 'POST')
        self.assertEqual(req.headers['Content-Type'], 'application/json')
        self.assertEqual(req.headers['X-API-KEY'], API_KEY)
        self.assertIn('items', req.json())
def get_item_list(api_key, item_list_url):
    client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False)
    return client.get_item_list(item_list_url)