def speech_recognition(job, url):
     ACCESS_KEY_ID = access_key_id()
     SECRET_ACCESS_KEY = secret_access_key()
     bucket_name = s3_bucket_name()
     region = s3_region()
     transcribe = boto3.client('transcribe',
                               region_name=region,
                               aws_access_key_id=ACCESS_KEY_ID,
                               aws_secret_access_key=SECRET_ACCESS_KEY)
     job_name = str(job)
     job_uri = str(url)
     path = urllib.parse.urlparse(url).path
     ext = os.path.splitext(path)[1].lstrip('.')
     transcribe.start_transcription_job(OutputBucketName=bucket_name,
                                        TranscriptionJobName=job_name,
                                        Media={'MediaFileUri': job_uri},
                                        MediaFormat=ext,
                                        LanguageCode='en-US')
     while True:
         status = transcribe.get_transcription_job(
             TranscriptionJobName=job_name)
         if status['TranscriptionJob']['TranscriptionJobStatus'] in [
                 'COMPLETED', 'FAILED'
         ]:
             break
         print("Not ready yet...")
         time.sleep(5)
     return status
Beispiel #2
0
 def extract_image(x):
     ACCESS_KEY_ID = access_key_id()
     SECRET_ACCESS_KEY = secret_access_key()
     bucket = s3_bucket_name()
     region = s3_region()
     # connect to s3 client and list objects in bucket:
     # s3 = boto3.client('s3', region_name = region, aws_access_key_id = ACCESS_KEY_ID, aws_secret_access_key = SECRET_ACCESS_KEY)
     # bucket_response = s3.list_objects_v2(Bucket=bucket)
     # for i in bucket_response['Contents']:
     #     y = i['Key']
     # print(y)
     rekognition = boto3.client('rekognition',
                                region_name=region,
                                aws_access_key_id=ACCESS_KEY_ID,
                                aws_secret_access_key=SECRET_ACCESS_KEY)
     response = rekognition.detect_text(
         Image={'S3Object': {
             'Bucket': bucket,
             'Name': x
         }})
     resp_str = ""
     for resp in response['TextDetections']:
         if 'ParentId' in resp:
             resp_str += resp['DetectedText'] + ' '
     key = ['Text']
     val = [resp_str]
     keyval = dict(zip(key, val))
     return keyval
Beispiel #3
0
 def upload_image(x):
     ACCESS_KEY_ID = access_key_id()
     SECRET_ACCESS_KEY = secret_access_key()
     bucket_name = s3_bucket_name()
     region = s3_region()
     s3 = boto3.client('s3', region_name = region, aws_access_key_id=ACCESS_KEY_ID, aws_secret_access_key=SECRET_ACCESS_KEY)
     filename = x
     s3.upload_file(filename, bucket_name, filename)
     key = ['Message']
     val = [x + ' uploaded']
     keyval = dict(zip(key,val))
     return keyval
 def upload_audio(URL):
     ACCESS_KEY_ID = access_key_id()
     SECRET_ACCESS_KEY = secret_access_key()
     bucket_name = s3_bucket_name()
     region = s3_region()
     keyname = s3_key()
     file_name = URL.split('/')[-1]
     urllib.request.urlopen(URL).read()
     s3 = boto3.client('s3',
                       region_name=region,
                       aws_access_key_id=ACCESS_KEY_ID,
                       aws_secret_access_key=SECRET_ACCESS_KEY)
     s3.upload_file(file_name, bucket_name, keyname.format(file_name))
     key = ['Message']
     val = [file_name + ' uploaded']
     keyval = dict(zip(key, val))
     return keyval
Beispiel #5
0
    def extract_image(URL):
        ACCESS_KEY_ID = access_key_id()
        SECRET_ACCESS_KEY = secret_access_key()
        region = s3_region()

        res = requests.get(URL)  #Fetching URL response...
        byteObj = res.content  #Converting URL object to Byte object
        img = Image.open(BytesIO(byteObj))
        rekognition = boto3.client('rekognition',
                                   region_name=region,
                                   aws_access_key_id=ACCESS_KEY_ID,
                                   aws_secret_access_key=SECRET_ACCESS_KEY)
        response = rekognition.detect_text(Image={'Bytes': byteObj})

        resp_txt = []
        for i in response['TextDetections']:
            # if 'ParentId' in i :
            if 'Id' in i and 'ParentId' not in i:
                resp_txt.append(i['DetectedText'])
        resp_key = []
        for k in range(len(resp_txt)):
            resp_key.append(k)
        my_dict = dict(zip(resp_key, resp_txt))

        for key, val in my_dict.items():
            # if re.search(r'\w+@\w+', val):
            if re.search(r'(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)',
                         val):
                my_dict['email'] = my_dict.pop(key)
                break

        for key, val in my_dict.items():
            # if re.search(r'^[1-9]\d{2}-\d{3}-\d{4}', val):
            if re.search(r'^\+?\d[\d -]{8,12}\d', val) or re.search(
                    r'^(\d{3}[-\.\s]??\d{3}[-\.\s]??\d{4}|\(\d{3}\)\s*\d{3}[-\.\s]??\d{4}|\d{3}[-\.\s]??\d{4})',
                    val
            ) or re.search(r'^[1-9]\d{2}-\d{3}-\d{4}', val) or re.search(
                    r'^(\+\d{1,2}\s)?\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}', val
            ) or re.search(
                    r'^\s*(?:\+?(\d{1,3}))?[-. (]*(\d{3})[-. )]*(\d{3})[-. ]*(\d{4})(?: *x(\d+))?\s*',
                    val) or re.search(
                        r'^(?:(?:\+|0{0,2})91(\s*[\-]\s*)?|[0]?)?[789]\d{9}',
                        val):
                my_dict['phone'] = my_dict.pop(key)
                break

        # nouns = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
        # for key,val in my_dict.items():
        #     if val in nouns:
        #         my_dict['name'] = my_dict.pop(key)
        #         break
        for key, val in my_dict.items():
            if nlp(val):
                my_dict['name'] = my_dict.pop(key)
                break

        for key, val in my_dict.items():
            if re.search(
                    r'^(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})',
                    val):
                my_dict['url'] = my_dict.pop(key)
                break

        lst = ['email', 'phone', 'name', 'url']
        dict1 = {}
        unknown = []

        for m, n in my_dict.items():
            if m not in lst:
                unknown.append(n)

        newdict = {k: my_dict[k] for k in lst if k in my_dict}

        dict1['results'] = unknown
        dict2 = {**newdict, **dict1}
        return dict2