def upload_dir(self, dirname):
        """upload all RDF files found inside dirname and
        subdirectories (recursively)"""

        BATCH_UPLOAD_RDF = configmanager.get_config('BATCH_UPLOAD_RDF', 'no') == 'yes'
        BATCH_UPLOAD_SIZE = int(configmanager.get_config('BATCH_UPLOAD_SIZE', 100))

        retry = []
        rdffiles = []
        for dirpath, dirnames, filenames in os.walk(dirname):
            for fn in filenames:
                if fn.endswith(RDF_GRAPH_FORMAT):
                    if BATCH_UPLOAD_RDF:
                        rdffiles.append(os.path.join(dirpath, fn))
                        if len(rdffiles) > BATCH_UPLOAD_SIZE:
                            try:
                                self.upload_many(rdffiles)
                                rdffiles = []
                            except Exception as e:
                                print "problem uploading", len(rdffiles), "files from ", dirname
                                print e
                                retry.extend(rdffiles)
                                rdffiles = []
                    else:
                        try:
                            self.upload(os.path.join(dirpath, fn))
                        except:
                            print "problem uploading", fn
                            retry.append(os.path.join(dirpath, fn))

        # upload any remaining batch of files
        if BATCH_UPLOAD_RDF and len(rdffiles) > 0:
            try:
                self.upload_many(rdffiles)
                rdffiles = []
            except:
                print "problem uploading", len(rdffiles), "files from ", dirname
                retry.extend(rdffiles)

        if len(retry) > 0:
            print "Retrying ", len(retry), "uploads"
            while len(retry) > 0:
                fn = retry.pop()
                try:
                    self.upload(fn)
                except Exception as e:
                    print "problem with retry of ", fn
                    print e
Beispiel #2
0
def upload_collectionrecords(server):
    
    import glob 
    
    basedir = configmanager.get_config("CORPUS_BASEDIR")
    for f in glob.glob(os.path.join(basedir, '*.n3')):
        print "Uploading", f
        server.upload(f)
    def report(self, files, datasize, linecount):

        if configmanager.get_config('QUERY_SIZE', 'no') == 'yes':
            size = self.size()
        else:
            # esitmate size from number of lines, 1.172 is a factor
            # measured on austalk to account for duplicate triples
            size = linecount/1.172

        self.triplecount += size

        with open(self.logfile, "a") as fd:
            fd.write("%f,%d,%d,%f,%d,%d\n" % (time.time(), files, datasize, time.time()-self.starttime, size, self.triplecount))
"""Support for uploading RDF data directly to a Sesame instance"""

import urllib, urllib2
import os
import json
import time
import configmanager
configmanager.configinit()

RDF_GRAPH_FORMAT = configmanager.get_config("RDF_GRAPH_FORMAT", "nt")

import tempfile

class RequestWithMethod(urllib2.Request):
  def __init__(self, *args, **kwargs):
    self._method = kwargs.get('method')
    if self._method:
        del kwargs['method']
    urllib2.Request.__init__(self, *args, **kwargs)

  def get_method(self):
    return self._method if self._method else super(RequestWithMethod, self).get_method()


class SesameServer():
    """A utility class to support HTTP interaction with a sesame triple store"""

    def __init__(self, url, logfile='benchmark.csv'):

        self.url = url
        self.logfile = logfile
Beispiel #5
0
def maus(wavfile,
         text,
         language='aus',
         canonly=False,
         minpauselen=5,
         startword=0,
         endword=999999,
         mausshift=10,
         insprob=0.0,
         inskantextgrid=True,
         insorttextgrid=True,
         usetrn=False,
         outformat='TextGrid',
         lexicon=None):
    """Send the given wavfile to MAUS for forced alignment
    text is the orthographic transcription
    
    returns the text of the textgrid returned by MAUS
    raises MausException if there was an error, the exception
    contains any error text returned by the MAUS web service
    
>>> txt = maus("test/bassinette-sample-16.wav", "bassinette")
>>> txt.find('xmax')
62
>>> txt.find('b{s@net')
896
>>> txt = maus("test/bassinette-sample-16.wav", "not in the lexicon")
Traceback (most recent call last):
MausException: Can't generate phonetic transcription for text 'not in the lexicon'

# a bad request, send a text file
>>> maus("annotate/maus.py", "bassinette")
Traceback (most recent call last):
MausException: Internal Server Error

# another bad request, an unknown language
>>> maus("test/bassinette-sample-16.wav", "bassinette", language='unknown')
Traceback (most recent call last):
MausException: Internal Server Error

#maus("test/bassinette-sample-16.wav", "bassinette", outformat="EMU")
#something
    """

    if lexicon is None:
        lex = load_lexicon()
    else:
        lex = load_lexicon(lexicon)
    phb = text_phb(text, lex)

    if phb == None:
        truncated_text = (text[:100] + '...') if len(text) > 100 else text
        raise MausException(
            "Can't generate phonetic transcription for text '%s'" %
            truncated_text)

    params = dict((
        ('LANGUAGE', language),
        ('CANONLY', maus_boolean(canonly)),
        ('MINPAUSLEN', str(minpauselen)),
        ('STARTWORD', str(startword)),
        ('ENDWORD', str(endword)),
        ('MAUSSHIFT', str(mausshift)),
        ('INSPROB', str(insprob)),
        ('SIGNAL', wavfile),
        ('BPF', StringIO(phb)),
        ('OUTFORMAT', str(outformat)),
        ('USETRN', maus_boolean(usetrn)),
        ('INSKANTEXTGRID', maus_boolean(inskantextgrid)),
        ('MAUSSHIFT', str(mausshift)),
        ('INSORTTEXTGRID', maus_boolean(insorttextgrid)),
    ))

    if configmanager.get_config("MAUS_LOCAL", "no") == "yes":
        params['SIGNAL'] = wavfile
        h = NamedTemporaryFile(prefix='bpf', delete=False)
        h.write(phb)
        h.close()
        params['BPF'] = h.name

        outfile = NamedTemporaryFile(prefix='textgrid', delete=False)
        outfile.close()

        params['OUT'] = outfile.name

        maus_program = configmanager.get_config("MAUS_PROGRAM")

        maus_cmd = [maus_program]
        for key in params.keys():
            maus_cmd.append("%s=%s" % (key, params[key]))
        #print " ".join(maus_cmd)

        try:
            # send err output to nowhere
            devnull = open(os.devnull, "w")
            process = subprocess.Popen(maus_cmd,
                                       stdout=devnull,
                                       stderr=devnull)

            while process.poll() == None:
                pass
        except:
            pass

        os.unlink(h.name)

        if os.path.exists(outfile.name):
            # grab the result
            h = open(outfile.name)
            result = h.read()
            h.close()

            os.unlink(outfile.name)
        else:
            result = "Error Calling MAUS"

    else:
        # for the web call we open the wav file
        params['SIGNAL'] = open(wavfile)
        params['BPF'] = StringIO(phb)

        handler = MultipartPostHandler.MultipartPostHandler(debuglevel=0)
        opener = urllib2.build_opener(handler)

        MAUS_URL = configmanager.get_config("MAUS_URL")
        try:
            response = opener.open(MAUS_URL, params)
        except urllib2.HTTPError as e:
            errormessage = e.read()
            raise MausException(e.msg)

        result = response.read()

    if result.startswith('File type = "ooTextFile"'):
        # everything was ok
        return result
    else:
        # some kind of error
        raise MausException(result)
Beispiel #6
0
'''

import urllib, urllib2
import MultipartPostHandler
import os, sys
import subprocess
from StringIO import StringIO
import configmanager
configmanager.configinit()
from rdflib import Graph, Literal, URIRef
import convert
from convert.namespaces import *
from data import COMPONENT_MAP
from tempfile import NamedTemporaryFile

OUTPUT_DIR = configmanager.get_config('OUTPUT_DIR')

LEXICON = os.path.join(os.path.dirname(__file__), "AUSTALK.lex")


class MausException(Exception):
    pass


def maus_boolean(value):
    """Turn a Python boolean value into a
    'true' or 'false for MAUS"""

    if value:
        return 'true'
    else:
def maus(wavfile, text, language='aus', canonly=False, minpauselen=5, 
         startword=0, endword=999999, mausshift=10, insprob=0.0,
         inskantextgrid=True, insorttextgrid=True, usetrn=False, outformat='TextGrid',
         lexicon=None):
    """Send the given wavfile to MAUS for forced alignment
    text is the orthographic transcription
    
    returns the text of the textgrid returned by MAUS
    raises MausException if there was an error, the exception
    contains any error text returned by the MAUS web service
    
>>> txt = maus("test/bassinette-sample-16.wav", "bassinette")
>>> txt.find('xmax')
62
>>> txt.find('b{s@net')
896
>>> txt = maus("test/bassinette-sample-16.wav", "not in the lexicon")
Traceback (most recent call last):
MausException: Can't generate phonetic transcription for text 'not in the lexicon'

# a bad request, send a text file
>>> maus("annotate/maus.py", "bassinette")
Traceback (most recent call last):
MausException: Internal Server Error

# another bad request, an unknown language
>>> maus("test/bassinette-sample-16.wav", "bassinette", language='unknown')
Traceback (most recent call last):
MausException: Internal Server Error

#maus("test/bassinette-sample-16.wav", "bassinette", outformat="EMU")
#something
    """
    
    if lexicon is None:
        lex = load_lexicon()
    else:
        lex = load_lexicon(lexicon)
    phb = text_phb(text, lex)
    
    if phb == None:
        truncated_text = (text[:100] + '...') if len(text) > 100 else text
        raise MausException("Can't generate phonetic transcription for text '%s'" % truncated_text)

    params = dict((('LANGUAGE', language),
                   ('CANONLY', maus_boolean(canonly)),
                   ('MINPAUSLEN', str(minpauselen)),
                   ('STARTWORD', str(startword)),
                   ('ENDWORD', str(endword)),
                   ('MAUSSHIFT', str(mausshift)),
                   ('INSPROB', str(insprob)),
                   ('SIGNAL', wavfile),
                   ('BPF', StringIO(phb)),
                   ('OUTFORMAT', str(outformat)),
                   ('USETRN', maus_boolean(usetrn)),
                   ('INSKANTEXTGRID', maus_boolean(inskantextgrid)),
                   ('MAUSSHIFT', str(mausshift)),
                   ('INSORTTEXTGRID', maus_boolean(insorttextgrid)),
                ))
    
    if configmanager.get_config("MAUS_LOCAL", "no") == "yes":
        params['SIGNAL'] = wavfile
        h = NamedTemporaryFile(prefix='bpf', delete=False) 
        h.write(phb)
        h.close()
        params['BPF'] = h.name
        
        outfile = NamedTemporaryFile(prefix='textgrid', delete=False)
        outfile.close()
        
        params['OUT'] = outfile.name
        
        maus_program = configmanager.get_config("MAUS_PROGRAM")

        maus_cmd = [maus_program]
        for key in params.keys():
            maus_cmd.append("%s=%s" % (key, params[key]))
        #print " ".join(maus_cmd)
        
        try:
            # send err output to nowhere
            devnull = open(os.devnull, "w")
            process =  subprocess.Popen(maus_cmd, stdout=devnull, stderr=devnull)
    
            while process.poll() == None:
                pass
        except:
            pass
     
        os.unlink(h.name)
        
        if os.path.exists(outfile.name):
            # grab the result
            h = open(outfile.name)
            result = h.read()
            h.close()
            
            os.unlink(outfile.name)
        else:
            result = "Error Calling MAUS"
        
    else:
        # for the web call we open the wav file
        params['SIGNAL'] = open(wavfile)
        params['BPF'] = StringIO(phb)
        
        handler = MultipartPostHandler.MultipartPostHandler(debuglevel=0)
        opener = urllib2.build_opener(handler)
        
        MAUS_URL = configmanager.get_config("MAUS_URL")
        try:
            response = opener.open(MAUS_URL, params)
        except urllib2.HTTPError as e:
            errormessage = e.read()
            raise MausException(e.msg)
        
        result = response.read()
    
    if result.startswith('File type = "ooTextFile"'):
        # everything was ok
        return result
    else:
        # some kind of error
        raise MausException(result)
'''

import urllib, urllib2
import MultipartPostHandler
import os, sys
import subprocess
from StringIO import StringIO
import configmanager
configmanager.configinit()
from rdflib import Graph, Literal, URIRef
import convert
from convert.namespaces import *
from data import COMPONENT_MAP
from tempfile import NamedTemporaryFile

OUTPUT_DIR = configmanager.get_config('OUTPUT_DIR')

LEXICON = os.path.join(os.path.dirname(__file__), "AUSTALK.lex")

class MausException(Exception):
    pass

def maus_boolean(value):
    """Turn a Python boolean value into a
    'true' or 'false for MAUS"""
    
    if value:
        return 'true'
    else:
        return 'false'