def main(): print "Content-type: text/plain; charset=UTF-8\n\n" sqlCmd = """select jobId, apiName, gcsLoc, globalId, batchId from speechJobs where beenProcessed = %s and respExported = %s and jobStatus is %s order by queueTimestamp asc limit 2""" sqlData = [1, 0, None] queryResp = utilities.dbExecution(sqlCmd, sqlData) for eachEntry in queryResp[2]: jobId = eachEntry[0] ##print jobId apiName = eachEntry[1] gcsLoc = eachEntry[2] globalId = eachEntry[3] batchId = eachEntry[4] if jobId: print "global Id: " + str(globalId) print "... job " + str(jobId) print runCycle(jobId, apiName, gcsLoc, globalId, batchId) sqlCmd = """select count(*) from speechJobs where respExported = %s and jobStatus is %s and globalId = %s""" sqlData = [0, None, globalId] queryResp = utilities.dbExecution(sqlCmd, sqlData) print "... " + str(queryResp[2][0][0]) + " jobs still in the queue" if queryResp[2][0][0] == 0: print nextAction(globalId) print ""
def runCycle(gcsLoc, jobId, globalId): credentialsJson = "__Credential_JSON_File_Name__" scopesList = ["https://www.googleapis.com/auth/cloud-platform"] credentialsObj = ServiceAccountCredentials.from_json_keyfile_name( credentialsJson, scopes = scopesList ) payloadObj = { "audio": { "uri": gcsLoc }, "config": { "languageCode": "en-US", "encoding": "FLAC", "sampleRateHertz": 16000, "enableWordTimeOffsets": True, "enableAutomaticPunctuation": True, "useEnhanced": True, "model": "video", "metadata": { "interaction_type": "DISCUSSION", "recording_device_type": "OTHER_INDOOR_DEVICE", "originalMediaType": "VIDEO" }, "speechContexts": { "phrases": [ "Louisville", "Weldona", "signage", "PROSTAC" ] } } } try: httpObj = credentialsObj.authorize(httplib2.Http()) serviceObj = build( serviceName = "speech", version = "v1p1beta1", http = httpObj, developerKey = "__Google_Speech_API_Key__" ) responseObj = serviceObj.speech().longrunningrecognize(body = payloadObj).execute() print "job " + str(jobId) + " for global id " + str(globalId) apiName = responseObj["name"] print "request name " + str(apiName) sqlCmd = """update speechJobs set apiName = %s, beenProcessed = %s where jobId = %s""" sqlData = [apiName, 1, jobId] queryResp = utilities.dbExecution(sqlCmd, sqlData) except Exception as e: sqlCmd = """update speechJobs set jobStatus = %s, beenProcessed = %s where jobId = %s""" sqlData = ["longrunning api call failed", 1, jobId] queryResp = utilities.dbExecution(sqlCmd, sqlData) print "longrunning api call failed"
def markTranscript(globalId, prodTranscript, batchId): prodTranscript = str(batchId) + "-" + prodTranscript sqlCmd = """update meetingRegistry set beenTranscribed = %s, prodTranscript = %s where globalId = %s""" sqlData = [1, prodTranscript, globalId] resultObj = utilities.dbExecution(sqlCmd, sqlData) return resultObj
def runCycle(globalId, orgIdentifier, prodTranscode, batchId): clientObj = storage.Client() bucketObj = clientObj.get_bucket(bucketName) listObj = bucketObj.list_blobs(prefix="accounts/" + orgIdentifier + "/enrichments/" + str(globalId) + "/transcodes/" + prodTranscode) fileCnt = 0 for eachEntry in listObj: if ".flac" in eachEntry.name: gcsLoc = "gs://" + eachEntry.bucket.name + "/" + eachEntry.name sqlCmd = """insert into speechJobs (globalId, orgIdentifier, gcsLoc, beenProcessed, batchId) values (%s, %s, %s, %s, %s)""" sqlData = [globalId, orgIdentifier, gcsLoc, 0, batchId] utilities.dbExecution(sqlCmd, sqlData) fileCnt += 1 return fileCnt
def assignUrl(globalId, wcUrl): sqlCmd = """update meetingRegistry set wordCloud = %s where globalId = %s""" sqlData = (wcUrl, globalId) resultList = utilities.dbExecution(sqlCmd, sqlData) return resultList
def assignUrl(globalId, transcriptUrl): sqlCmd = """update meetingRegistry set publishedTranscript = %s where globalId = %s""" sqlData = (transcriptUrl, globalId) resultList = utilities.dbExecution(sqlCmd, sqlData) return resultList
def get(self): self.response.headers["Content-Type"] = "application/json" self.response.headers.add_header( "Cache-Control", "no-cache, no-store, must-revalidate, max-age=0") self.response.headers.add_header("Expires", "0") try: globalId = self.request.get("gId") sqlData = (globalId) sqlCmd = "select videoName, beenTranscribed, beenTranscoded, videoDownloaded, videoLink, orgIdentifier, prodTranscript, meetingDate, meetingDesc, beenIndexed, youtubeId, meetingId, prodTranscode, urlIdentifier from meetingRegistry where globalId = %s" resultList = utilities.dbExecution(sqlCmd, sqlData) videoName = resultList[2][0][0] beenTranscribed = resultList[2][0][1] beenTranscoded = resultList[2][0][2] videoDownloaded = resultList[2][0][3] videoLink = resultList[2][0][4] orgIdentifier = resultList[2][0][5] prodTranscript = resultList[2][0][6] meetingDate = resultList[2][0][7] meetingDesc = resultList[2][0][8] beenIndexed = resultList[2][0][9] youtubeId = resultList[2][0][10] meetingId = resultList[2][0][11] prodTranscode = resultList[2][0][12] urlIdentifier = resultList[2][0][13] except: videoName = None beenTranscribed = None beenTranscoded = None videoDownloaded = None videoLink = None orgIdentifier = None prodTranscript = None meetingDate = None meetingDesc = None beenIndexed = None youtubeId = None meetingId = None prodTranscode = None urlIdentifier = None resultObj = {} resultObj["videoName"] = videoName resultObj["beenTranscribed"] = beenTranscribed resultObj["beenTranscoded"] = beenTranscoded resultObj["videoDownloaded"] = videoDownloaded resultObj["videoLink"] = videoLink resultObj["orgIdentifier"] = orgIdentifier resultObj["prodTranscript"] = prodTranscript resultObj["meetingDate"] = meetingDate resultObj["meetingDesc"] = meetingDesc resultObj["beenIndexed"] = beenIndexed resultObj["youtubeId"] = youtubeId resultObj["meetingId"] = meetingId resultObj["prodTranscode"] = prodTranscode resultObj["urlIdentifier"] = urlIdentifier self.response.out.write(ujson.dumps(resultObj))
def meetingCount(orgIdentifier): sqlCmd = """select count(*) from meetingRegistry where orgIdentifier = %s and youtubeId is not NULL""" sqlData = (orgIdentifier) resultList = utilities.dbExecution(sqlCmd, sqlData) return resultList[2][0][0]
def lookupFiles(urlIdentifier): sqlCmd = """select segmentJson from videoSegments where urlIdentifier = %s""" sqlData = (urlIdentifier) resultList = utilities.dbExecution(sqlCmd, sqlData) segmentJson = resultList[2][0][0] return segmentJson
def lookupFiles(urlIdentifier): sqlCmd = """select globalId from meetingRegistry where urlIdentifier = %s""" sqlData = (urlIdentifier) resultList = utilities.dbExecution(sqlCmd, sqlData) globalId = resultList[2][0][0] globalId = str(globalId) sqlCmd = """select fileId, fileName, mimeType, webViewLink, thumbnailLink, pageIndex from relatedFiles where globalId = %s order by fileName ASC""" sqlData = (globalId) resultList = utilities.dbExecution(sqlCmd, sqlData) return resultList[2]
def main(): sqlCmd = """select globalId, jobId, gcsLoc from speechJobs where beenProcessed = %s order by jobId limit 1""" sqlData = [0] queryResp = utilities.dbExecution(sqlCmd, sqlData) print "Content-type: text/plain; charset=UTF-8\n\n" if queryResp[2]: gcsLoc = queryResp[2][0][2].replace("'","") jobId = queryResp[2][0][1] globalId = queryResp[2][0][0] print globalId print gcsLoc print jobId runCycle(gcsLoc, jobId, globalId) else: print "No jobs to run."
def runCycle(jobId, apiName, gcsLoc, globalId, batchId): credentialsJson = "__Credential_JSON_File_Name__" scopesList = ["https://www.googleapis.com/auth/cloud-platform"] credentialsObj = ServiceAccountCredentials.from_json_keyfile_name( credentialsJson, scopes=scopesList) payloadObj = {"key": "__Google_Speech_API_Key__"} httpObj = credentialsObj.authorize(httplib2.Http()) serviceObj = build(serviceName="speech", version="v1p1beta1", http=httpObj, developerKey="__Google_Speech_API_Key__") reqObj = serviceObj.operations().get(name=apiName).execute() try: if reqObj["metadata"]["progressPercent"] == 100: clientObj = storage.Client() bucketObj = clientObj.get_bucket(bucketName) cloudPath = gcsLoc.replace("'", "") cloudPath = cloudPath.replace(".flac", ".json") bucketPrexif = "gs://" + bucketName + "/" cloudPath = cloudPath.replace(bucketPrexif, "") cloudPath = cloudPath.replace("transcodes/", "") globalDir = "/" + str(globalId) + "/" transDir = globalDir + "transcripts/" + str(batchId) + "-" newPath = cloudPath.replace(globalDir, transDir) blobObj = bucketObj.blob(newPath) blobObj.upload_from_string(ujson.dumps(reqObj)) sqlCmd = """update speechJobs set respExported = %s where jobId = %s""" sqlData = [1, jobId] queryResp = utilities.dbExecution(sqlCmd, sqlData) return "... job " + str(jobId) + " finished" else: return "... job " + str( reqObj["metadata"]["progressPercent"]) + "% complete" except Exception, e: return "... job queued"
def get(self): self.response.headers["Content-Type"] = "application/json" self.response.headers.add_header( "Cache-Control", "no-cache, no-store, must-revalidate, max-age=0") self.response.headers.add_header("Expires", "0") try: globalId = self.request.get("gId") sqlCmd = "update meetingRegistry set transcriptErr = %s where globalId = %s" sqlData = (1, globalId) resultList = utilities.dbExecution(sqlCmd, sqlData) outputStr = str(resultList) except: outputStr = None resultObj = {} resultObj["response"] = outputStr self.response.out.write(ujson.dumps(resultObj))
def lookupMeeting(urlIdentifier): globalId = str(urlIdentifier) sqlCmd = """select meetingDesc, meetingDate, youtubeId, wordCloud, publishedVideo, publishedTranscript, orgIdentifier, publishedAgenda, urlIdentifier, hasSegments from meetingRegistry where urlIdentifier = %s and youtubeId is not NULL limit 1""" sqlData = (urlIdentifier) resultList = utilities.dbExecution(sqlCmd, sqlData) return resultList[2][0]
def getMeetings(qryLimit, qryOffset, orgIdentifier): sqlCmd = """select meetingDesc, meetingDate, urlIdentifier, globalId from meetingRegistry where orgIdentifier = %s and youtubeId is not NULL order by meetingDate DESC limit %s offset %s""" sqlData = (orgIdentifier, int(qryLimit), int(qryOffset)) resultList = utilities.dbExecution(sqlCmd, sqlData) lastDate = None dateCnt = 0 meetingDict = {} for eachEntry in resultList[2]: meetingDate = eachEntry[1] dateIncr = meetingDate if lastDate == meetingDate: dateCnt = dateCnt + 1 else: dateCnt = 0 dateIncr = str(meetingDate) + str(dateCnt).zfill(3) formattedDate, weekDay = formatDate(meetingDate) meetingObj = {} meetingObj["desc"] = eachEntry[0] meetingObj["date"] = formattedDate meetingObj["dow"] = weekDay meetingObj["meetingId"] = eachEntry[3] meetingObj["urlIdentifier"] = eachEntry[2] meetingDict[dateIncr] = meetingObj lastDate = meetingDate return meetingDict
def lookupMeeting(globalId): sqlCmd = """select prodTranscode, orgIdentifier from meetingRegistry where globalId = %s""" sqlData = [globalId] resultObj = utilities.dbExecution(sqlCmd, sqlData) return resultObj[2][0][0], resultObj[2][0][1]