예제 #1
0
class TranscribeRunner(RunnerBase):

    def __init__(self):
        self.blob_manager = BlobManager()
        self.googleTranscriber = GoogleTranscriber()
        self.azure_table = Database()

    def __str__(self):
        return "TranscribeRunner"

    def call(self):
        azure_blob, partition_key = \
            self.azure_table.retrieve_next_record_for_transcribing()

        with TmpFileCleanup() as tmp_file_store:
            filename = "{0}.{1}".format(uuid.uuid4(), "wav")
            local_filename = local_tmp_dir + "/" + filename
            tmp_file_store.tmp_files.append(local_filename)
            self.blob_manager.download_wav_from_blob_and_save_to_local_file(
                azure_blob,
                local_filename,
            )
            transcript, transcription_status = \
                self.googleTranscriber.transcribe_audio_file_path(
                    local_filename,
            )
            if transcript:
                print("Transcript for {partition_key}: {transcript}"
                      .format(**locals()))
            self.azure_table.update_transcript(
                partition_key,
                transcript,
                transcription_status,
            )
예제 #2
0
class EntityRunner(RunnerBase):

    def __init__(self):
        self.azure_table = Database()
        self.consec_error_count = 0

    def __str__(self):
        return "EntityRunner"

    def call(self):
        if self.consec_error_count > self.MAX_ALLOWABLE_ERRORS:
            raise TooManyErrorsException

        transcript, partition_key = self.azure_table.retrieve_next_record_for_extraction()
        location_dict = extract_location(transcript)
        print("Location for {0}: {1}".format(partition_key, str(location_dict)))
        date_dict = extract_date_time(transcript)
        print("Date, time for {0}: {1}".format(partition_key, str(date_dict)))
        self._process_location_date(partition_key, location_dict, date_dict)
        if location_dict == None or date_dict == None:
            self.consec_error_count += 1
            raise EntityExtractionError(
                "failed to extract location or date for {0}"
                .format(partition_key))
        else:
            self.consec_error_count = 0

    def _process_location_date(self, case_number, location_dict, date_dict):
        # location
        city = None
        location_confidence = None
        state = None
        zipcode = None
        if location_dict is not None:
            city = location_dict["City"]
            location_confidence = location_dict["Confidence_location"]
            state = location_dict["State"]
            zipcode = location_dict["Zipcode"]

        # date
        date = None
        if date_dict is not None:
            year = date_dict["year"]
            month = date_dict["month"]
            day = date_dict["day"]
            hour = date_dict["hour"] or 0
            minute = date_dict["minute"] or 0
            # only report a date if we found year, month and day
            if year is not None and month is not None and day is not None:
                date = datetime(year=year, month=month, day=day, hour=hour, minute=minute)

        self.azure_table.update_location_date(case_number, city, location_confidence, state, zipcode, date)
예제 #3
0
class ExtractRunner(object):
    def __init__(self):
        self.azureTable = Database()

    def call(self):
        transcript, partition_key = self.azureTable.retrieve_next_record_for_extraction(
        )
        extractor = Extractor(transcript)
        location = extractor.get_location()
        date = extractor.get_date()
        print("Location: " + location)
        print("Date: " + date)
        self.azureTable.update_location_date(partition_key, location, date)
예제 #4
0
class EntityRunner(RunnerBase):

    def __init__(self):
        self.azure_table = Database()

    def __str__(self):
        return "EntityRunner"

    def call(self):
        transcript, partition_key = self.azure_table.retrieve_next_record_for_extraction()
        location_dict = extract_location(transcript)
        print("Location: " + str(location_dict))
        date_dict = extract_date_time(transcript)
        print("Date, time: " + str(date_dict))
        self.azure_table.update_location_date(partition_key, location_dict, date_dict)
예제 #5
0
class TranscribeRunner(RunnerBase):

    def __init__(self):
        self.blob_manager = BlobManager()
        self.googleTranscriber = GoogleTranscriber()
        self.azure_table = Database()
        self.consec_error_count = 0

    def __str__(self):
        return "TranscribeRunner"

    def call(self):
        if self.consec_error_count > self.MAX_ALLOWABLE_ERRORS:
            raise TooManyErrorsException

        azure_blob, partition_key = \
            self.azure_table.retrieve_next_record_for_transcribing()

        with TmpFileCleanup() as tmp_file_store:
            filename = "{0}.{1}".format(uuid.uuid4(), "wav")
            local_filename = local_tmp_dir + "/" + filename
            tmp_file_store.tmp_files.append(local_filename)
            self.blob_manager.download_wav_from_blob_and_save_to_local_file(
                azure_blob,
                local_filename,
            )
            local_trim_filename = local_tmp_dir + "/trim_" + filename
            subprocess.call(
                ["sox", local_filename, local_trim_filename, "trim", "0", "59"]
            )
            transcript, transcription_status = \
                self.googleTranscriber.transcribe_audio_file_path(
                    local_trim_filename,
            )
        self.azure_table.update_transcript(
            partition_key,
            transcript,
            transcription_status,
        )
        if transcription_status != TranscriptionStatus.success:
            self.consec_error_count += 1
            raise TranscriptionError("Transcription failed, status: " +
                transcription_status)
        else:
            self.consec_error_count = 0
            print("Transcript for {partition_key}: {transcript}"
                .format(**locals()))
예제 #6
0
class CourtCallRunner(RunnerBase):
    def __init__(self):
        self._database = Database()
        self._caller = TwilioCallWrapper(self._call_placed_callback,
            self._call_done_callback)
        self._caller.try_server()
        self.consec_error_count = 0

    def __str__(self):
        return "CourtCallRunner"

    def call(self):
        """
        Main call loop

        At this stage, the recording has been uploaded.
        next stages are to call speech to text api then
        semantic extraction
        """
        if self.consec_error_count > self.MAX_ALLOWABLE_ERRORS:
            raise TooManyErrorsException
        next_ain = self._database.retrieve_next_record_for_call()
        print("Processing {0}".format(next_ain))
        try:
            self._caller.place_call(next_ain)
            self.consec_error_count = 0
        except:
            # reset and throw
            print("Rolling back {0}".format(next_ain))
            self._database.set_error(next_ain, Statuses.new)
            self.consec_error_count += 1
            raise

    def _call_placed_callback(self, ain, call_id):
        """ Update database to say that a call was started and set call id """
        print("Call placed: {0} {1}".format(ain, call_id))
        self._database.update_call_id(ain, call_id)

    def _call_done_callback(self, ain, call_duration, recording_uri):
        """
        Download the call and reupload to azure.

        Update database to say that a call was started
        and save the recording location.
        """
        print("Call duration was: {0}".format(call_duration))
        try:
            azure_path = BlobManager().download_and_reupload(recording_uri)
            print("Azure path: ", azure_path)
            self._database.update_azure_path(ain, azure_path)
        except TwilioResponseError as e:
            raise
예제 #7
0
class ErrorRecovery(RunnerBase):
    """
    Handles two kinds of errors:
        - Stale progressions.
        - Error states.
    """

    default_sleep_time = 60 * 10  # ten minutes

    def __init__(self):
        self.azure_table = Database()

    def __str__(self):
        return "ErrorRecovery"

    def call(self):
        cutoff_time = datetime.now() - timedelta(days=14)
        try:
            num_resets = self.azure_table.reset_stale_calls(cutoff_time)
        except NoRecordsToProcessError:
            num_resets = 0
        print("Reset {0} records".format(num_resets))
예제 #8
0
import sys
from storage.models import Database

if len(sys.argv) != 2:
    print(
        "Usage: python insert.py <file>  # file should contain one A Number per line."
    )
    sys.exit(1)

alien_numbers = [line.strip().replace('-', '') for line in open(sys.argv[1])]
db = Database()
db.create_table()  # checks if already exists
db.upload_new_requests(alien_numbers)
예제 #9
0
 def __init__(self):
     self.azureTable = Database()
예제 #10
0
import sys
from storage.models import Database

if len(sys.argv) != 2:
    print(
        "Usage: python insert.py <file>  # file should contain one A Number per line."
    )
    sys.exit(1)

alien_numbers = [line.strip().replace('-', '') for line in open(sys.argv[1])]
db = Database()
db.upload_new_requests(alien_numbers)
예제 #11
0
 def __init__(self):
     self.blob_manager = BlobManager()
     self.googleTranscriber = GoogleTranscriber()
     self.azure_table = Database()
예제 #12
0
 def __init__(self):
     self._database = Database()
     self._caller = TwilioCallWrapper(self._call_placed_callback, self._call_done_callback)
     self._caller.try_server()
예제 #13
0
                        action='store_true')
    parser.add_argument('--re_transcribe',
                        help='Include previously transcribed records in entity transcription, used for testing',
                        action='store_true')
    parser.add_argument('--tryAgain',
                        help='Try calling again numbers for which we failed to get location date info',
                        action='store_true')
    parser.add_argument('--setCallingToNew',
                        help='Resets statuses stuck on calling to new',
                        action='store_true')


    args = vars(parser.parse_args())

    if args.pop('tryAgain'):
        db = Database()
        db.change_status(Statuses.failed_to_return_info, Statuses.new)

    if args.pop('setCallingToNew'):
        db = Database()
        db.change_status(Statuses.calling, Statuses.new)


    if args.pop('re_extract'):
        db = Database()
        db.change_status(Statuses.extracting_done, Statuses.transcribing_done)
        db.change_status(Statuses.extracting, Statuses.transcribing_done)

    if args.pop('re_transcribe'):
        db = Database()
        db.change_status(Statuses.transcribing_done, Statuses.recording_ready)
예제 #14
0
from flask_admin.model import BaseModelView
from flask_admin.model.template import EndpointLinkRowAction, LinkRowAction
from flask_admin.form import BaseForm
from flask_admin.form.rules import *
from flask_admin import Admin, BaseView, expose
from flask_admin.actions import action

from wtforms import SelectField
from wtforms.validators import DataRequired
from wtforms import StringField

from server.filters import EqualFilter
from storage.models import Statuses, Database

db = Database()


class AinView(BaseModelView):
    statuses = dict(Statuses.__dict__)

    column_filters = ('Status', 'AlienID', 'Zipcode', 'Last Step Error')
    named_filter_urls = True

    can_set_page_size = True
    page_size = 20

    can_view_details = True

    create_modal = True
    details_modal = True
    edit_modal = True
예제 #15
0
 def __init__(self):
     self.azure_table = Database()
     self.consec_error_count = 0