class TranscribeRunner(RunnerBase): def __init__(self): self.blob_manager = BlobManager() self.googleTranscriber = GoogleTranscriber() self.azure_table = Database() def __str__(self): return "TranscribeRunner" def call(self): azure_blob, partition_key = \ self.azure_table.retrieve_next_record_for_transcribing() with TmpFileCleanup() as tmp_file_store: filename = "{0}.{1}".format(uuid.uuid4(), "wav") local_filename = local_tmp_dir + "/" + filename tmp_file_store.tmp_files.append(local_filename) self.blob_manager.download_wav_from_blob_and_save_to_local_file( azure_blob, local_filename, ) transcript, transcription_status = \ self.googleTranscriber.transcribe_audio_file_path( local_filename, ) if transcript: print("Transcript for {partition_key}: {transcript}" .format(**locals())) self.azure_table.update_transcript( partition_key, transcript, transcription_status, )
class EntityRunner(RunnerBase): def __init__(self): self.azure_table = Database() self.consec_error_count = 0 def __str__(self): return "EntityRunner" def call(self): if self.consec_error_count > self.MAX_ALLOWABLE_ERRORS: raise TooManyErrorsException transcript, partition_key = self.azure_table.retrieve_next_record_for_extraction() location_dict = extract_location(transcript) print("Location for {0}: {1}".format(partition_key, str(location_dict))) date_dict = extract_date_time(transcript) print("Date, time for {0}: {1}".format(partition_key, str(date_dict))) self._process_location_date(partition_key, location_dict, date_dict) if location_dict == None or date_dict == None: self.consec_error_count += 1 raise EntityExtractionError( "failed to extract location or date for {0}" .format(partition_key)) else: self.consec_error_count = 0 def _process_location_date(self, case_number, location_dict, date_dict): # location city = None location_confidence = None state = None zipcode = None if location_dict is not None: city = location_dict["City"] location_confidence = location_dict["Confidence_location"] state = location_dict["State"] zipcode = location_dict["Zipcode"] # date date = None if date_dict is not None: year = date_dict["year"] month = date_dict["month"] day = date_dict["day"] hour = date_dict["hour"] or 0 minute = date_dict["minute"] or 0 # only report a date if we found year, month and day if year is not None and month is not None and day is not None: date = datetime(year=year, month=month, day=day, hour=hour, minute=minute) self.azure_table.update_location_date(case_number, city, location_confidence, state, zipcode, date)
class ExtractRunner(object): def __init__(self): self.azureTable = Database() def call(self): transcript, partition_key = self.azureTable.retrieve_next_record_for_extraction( ) extractor = Extractor(transcript) location = extractor.get_location() date = extractor.get_date() print("Location: " + location) print("Date: " + date) self.azureTable.update_location_date(partition_key, location, date)
class EntityRunner(RunnerBase): def __init__(self): self.azure_table = Database() def __str__(self): return "EntityRunner" def call(self): transcript, partition_key = self.azure_table.retrieve_next_record_for_extraction() location_dict = extract_location(transcript) print("Location: " + str(location_dict)) date_dict = extract_date_time(transcript) print("Date, time: " + str(date_dict)) self.azure_table.update_location_date(partition_key, location_dict, date_dict)
class TranscribeRunner(RunnerBase): def __init__(self): self.blob_manager = BlobManager() self.googleTranscriber = GoogleTranscriber() self.azure_table = Database() self.consec_error_count = 0 def __str__(self): return "TranscribeRunner" def call(self): if self.consec_error_count > self.MAX_ALLOWABLE_ERRORS: raise TooManyErrorsException azure_blob, partition_key = \ self.azure_table.retrieve_next_record_for_transcribing() with TmpFileCleanup() as tmp_file_store: filename = "{0}.{1}".format(uuid.uuid4(), "wav") local_filename = local_tmp_dir + "/" + filename tmp_file_store.tmp_files.append(local_filename) self.blob_manager.download_wav_from_blob_and_save_to_local_file( azure_blob, local_filename, ) local_trim_filename = local_tmp_dir + "/trim_" + filename subprocess.call( ["sox", local_filename, local_trim_filename, "trim", "0", "59"] ) transcript, transcription_status = \ self.googleTranscriber.transcribe_audio_file_path( local_trim_filename, ) self.azure_table.update_transcript( partition_key, transcript, transcription_status, ) if transcription_status != TranscriptionStatus.success: self.consec_error_count += 1 raise TranscriptionError("Transcription failed, status: " + transcription_status) else: self.consec_error_count = 0 print("Transcript for {partition_key}: {transcript}" .format(**locals()))
class CourtCallRunner(RunnerBase): def __init__(self): self._database = Database() self._caller = TwilioCallWrapper(self._call_placed_callback, self._call_done_callback) self._caller.try_server() self.consec_error_count = 0 def __str__(self): return "CourtCallRunner" def call(self): """ Main call loop At this stage, the recording has been uploaded. next stages are to call speech to text api then semantic extraction """ if self.consec_error_count > self.MAX_ALLOWABLE_ERRORS: raise TooManyErrorsException next_ain = self._database.retrieve_next_record_for_call() print("Processing {0}".format(next_ain)) try: self._caller.place_call(next_ain) self.consec_error_count = 0 except: # reset and throw print("Rolling back {0}".format(next_ain)) self._database.set_error(next_ain, Statuses.new) self.consec_error_count += 1 raise def _call_placed_callback(self, ain, call_id): """ Update database to say that a call was started and set call id """ print("Call placed: {0} {1}".format(ain, call_id)) self._database.update_call_id(ain, call_id) def _call_done_callback(self, ain, call_duration, recording_uri): """ Download the call and reupload to azure. Update database to say that a call was started and save the recording location. """ print("Call duration was: {0}".format(call_duration)) try: azure_path = BlobManager().download_and_reupload(recording_uri) print("Azure path: ", azure_path) self._database.update_azure_path(ain, azure_path) except TwilioResponseError as e: raise
class ErrorRecovery(RunnerBase): """ Handles two kinds of errors: - Stale progressions. - Error states. """ default_sleep_time = 60 * 10 # ten minutes def __init__(self): self.azure_table = Database() def __str__(self): return "ErrorRecovery" def call(self): cutoff_time = datetime.now() - timedelta(days=14) try: num_resets = self.azure_table.reset_stale_calls(cutoff_time) except NoRecordsToProcessError: num_resets = 0 print("Reset {0} records".format(num_resets))
import sys from storage.models import Database if len(sys.argv) != 2: print( "Usage: python insert.py <file> # file should contain one A Number per line." ) sys.exit(1) alien_numbers = [line.strip().replace('-', '') for line in open(sys.argv[1])] db = Database() db.create_table() # checks if already exists db.upload_new_requests(alien_numbers)
def __init__(self): self.azureTable = Database()
import sys from storage.models import Database if len(sys.argv) != 2: print( "Usage: python insert.py <file> # file should contain one A Number per line." ) sys.exit(1) alien_numbers = [line.strip().replace('-', '') for line in open(sys.argv[1])] db = Database() db.upload_new_requests(alien_numbers)
def __init__(self): self.blob_manager = BlobManager() self.googleTranscriber = GoogleTranscriber() self.azure_table = Database()
def __init__(self): self._database = Database() self._caller = TwilioCallWrapper(self._call_placed_callback, self._call_done_callback) self._caller.try_server()
action='store_true') parser.add_argument('--re_transcribe', help='Include previously transcribed records in entity transcription, used for testing', action='store_true') parser.add_argument('--tryAgain', help='Try calling again numbers for which we failed to get location date info', action='store_true') parser.add_argument('--setCallingToNew', help='Resets statuses stuck on calling to new', action='store_true') args = vars(parser.parse_args()) if args.pop('tryAgain'): db = Database() db.change_status(Statuses.failed_to_return_info, Statuses.new) if args.pop('setCallingToNew'): db = Database() db.change_status(Statuses.calling, Statuses.new) if args.pop('re_extract'): db = Database() db.change_status(Statuses.extracting_done, Statuses.transcribing_done) db.change_status(Statuses.extracting, Statuses.transcribing_done) if args.pop('re_transcribe'): db = Database() db.change_status(Statuses.transcribing_done, Statuses.recording_ready)
from flask_admin.model import BaseModelView from flask_admin.model.template import EndpointLinkRowAction, LinkRowAction from flask_admin.form import BaseForm from flask_admin.form.rules import * from flask_admin import Admin, BaseView, expose from flask_admin.actions import action from wtforms import SelectField from wtforms.validators import DataRequired from wtforms import StringField from server.filters import EqualFilter from storage.models import Statuses, Database db = Database() class AinView(BaseModelView): statuses = dict(Statuses.__dict__) column_filters = ('Status', 'AlienID', 'Zipcode', 'Last Step Error') named_filter_urls = True can_set_page_size = True page_size = 20 can_view_details = True create_modal = True details_modal = True edit_modal = True
def __init__(self): self.azure_table = Database() self.consec_error_count = 0