def speech_recognition(job, url): ACCESS_KEY_ID = access_key_id() SECRET_ACCESS_KEY = secret_access_key() bucket_name = s3_bucket_name() region = s3_region() transcribe = boto3.client('transcribe', region_name=region, aws_access_key_id=ACCESS_KEY_ID, aws_secret_access_key=SECRET_ACCESS_KEY) job_name = str(job) job_uri = str(url) path = urllib.parse.urlparse(url).path ext = os.path.splitext(path)[1].lstrip('.') transcribe.start_transcription_job(OutputBucketName=bucket_name, TranscriptionJobName=job_name, Media={'MediaFileUri': job_uri}, MediaFormat=ext, LanguageCode='en-US') while True: status = transcribe.get_transcription_job( TranscriptionJobName=job_name) if status['TranscriptionJob']['TranscriptionJobStatus'] in [ 'COMPLETED', 'FAILED' ]: break print("Not ready yet...") time.sleep(5) return status
def extract_image(x): ACCESS_KEY_ID = access_key_id() SECRET_ACCESS_KEY = secret_access_key() bucket = s3_bucket_name() region = s3_region() # connect to s3 client and list objects in bucket: # s3 = boto3.client('s3', region_name = region, aws_access_key_id = ACCESS_KEY_ID, aws_secret_access_key = SECRET_ACCESS_KEY) # bucket_response = s3.list_objects_v2(Bucket=bucket) # for i in bucket_response['Contents']: # y = i['Key'] # print(y) rekognition = boto3.client('rekognition', region_name=region, aws_access_key_id=ACCESS_KEY_ID, aws_secret_access_key=SECRET_ACCESS_KEY) response = rekognition.detect_text( Image={'S3Object': { 'Bucket': bucket, 'Name': x }}) resp_str = "" for resp in response['TextDetections']: if 'ParentId' in resp: resp_str += resp['DetectedText'] + ' ' key = ['Text'] val = [resp_str] keyval = dict(zip(key, val)) return keyval
def upload_image(x): ACCESS_KEY_ID = access_key_id() SECRET_ACCESS_KEY = secret_access_key() bucket_name = s3_bucket_name() region = s3_region() s3 = boto3.client('s3', region_name = region, aws_access_key_id=ACCESS_KEY_ID, aws_secret_access_key=SECRET_ACCESS_KEY) filename = x s3.upload_file(filename, bucket_name, filename) key = ['Message'] val = [x + ' uploaded'] keyval = dict(zip(key,val)) return keyval
def upload_audio(URL): ACCESS_KEY_ID = access_key_id() SECRET_ACCESS_KEY = secret_access_key() bucket_name = s3_bucket_name() region = s3_region() keyname = s3_key() file_name = URL.split('/')[-1] urllib.request.urlopen(URL).read() s3 = boto3.client('s3', region_name=region, aws_access_key_id=ACCESS_KEY_ID, aws_secret_access_key=SECRET_ACCESS_KEY) s3.upload_file(file_name, bucket_name, keyname.format(file_name)) key = ['Message'] val = [file_name + ' uploaded'] keyval = dict(zip(key, val)) return keyval
def extract_image(URL): ACCESS_KEY_ID = access_key_id() SECRET_ACCESS_KEY = secret_access_key() region = s3_region() res = requests.get(URL) #Fetching URL response... byteObj = res.content #Converting URL object to Byte object img = Image.open(BytesIO(byteObj)) rekognition = boto3.client('rekognition', region_name=region, aws_access_key_id=ACCESS_KEY_ID, aws_secret_access_key=SECRET_ACCESS_KEY) response = rekognition.detect_text(Image={'Bytes': byteObj}) resp_txt = [] for i in response['TextDetections']: # if 'ParentId' in i : if 'Id' in i and 'ParentId' not in i: resp_txt.append(i['DetectedText']) resp_key = [] for k in range(len(resp_txt)): resp_key.append(k) my_dict = dict(zip(resp_key, resp_txt)) for key, val in my_dict.items(): # if re.search(r'\w+@\w+', val): if re.search(r'(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)', val): my_dict['email'] = my_dict.pop(key) break for key, val in my_dict.items(): # if re.search(r'^[1-9]\d{2}-\d{3}-\d{4}', val): if re.search(r'^\+?\d[\d -]{8,12}\d', val) or re.search( r'^(\d{3}[-\.\s]??\d{3}[-\.\s]??\d{4}|\(\d{3}\)\s*\d{3}[-\.\s]??\d{4}|\d{3}[-\.\s]??\d{4})', val ) or re.search(r'^[1-9]\d{2}-\d{3}-\d{4}', val) or re.search( r'^(\+\d{1,2}\s)?\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}', val ) or re.search( r'^\s*(?:\+?(\d{1,3}))?[-. (]*(\d{3})[-. )]*(\d{3})[-. ]*(\d{4})(?: *x(\d+))?\s*', val) or re.search( r'^(?:(?:\+|0{0,2})91(\s*[\-]\s*)?|[0]?)?[789]\d{9}', val): my_dict['phone'] = my_dict.pop(key) break # nouns = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')} # for key,val in my_dict.items(): # if val in nouns: # my_dict['name'] = my_dict.pop(key) # break for key, val in my_dict.items(): if nlp(val): my_dict['name'] = my_dict.pop(key) break for key, val in my_dict.items(): if re.search( r'^(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})', val): my_dict['url'] = my_dict.pop(key) break lst = ['email', 'phone', 'name', 'url'] dict1 = {} unknown = [] for m, n in my_dict.items(): if m not in lst: unknown.append(n) newdict = {k: my_dict[k] for k in lst if k in my_dict} dict1['results'] = unknown dict2 = {**newdict, **dict1} return dict2