def google_stt(self): # Instantiates a client client = speech.SpeechClient() sound = AudioSegment.from_file(self.inputFilePath, format="webm") # if(sound.channels != 1):#If it's not mono sound = sound.set_channels(1) #Change it to mono sound.export(self.inputFilePath, format="wav") #Export them as wav files print('Conversion complete') # Instantiates a client and uploads file storage_client = storage.Client() # Parameter is the name of the Google Cloud bucket bucket = storage_client.lookup_bucket('celerfama2') folder = bucket.list_blobs() with open(self.inputFilePath, 'rb') as file: blob = Blob(os.path.basename(file.name), bucket) print("Google: Uploading: " + os.path.basename(file.name)) blob.upload_from_filename(self.inputFilePath) # Transcribes the file in the cloud for element in folder: print("Google: Transcribing " + element.name) audio = types.RecognitionAudio(uri="gs://celerfama2/" + element.name) config = types.RecognitionConfig( # Option to get word specific info like time stamps enable_word_time_offsets=True, # Language of the audio language_code='en-US') # Detects speech in the audio file operation = client.long_running_recognize(config, audio) print('Google: Waiting for operation to complete...') response = operation.result() file_name = element.name[:-4] output_file = open(file_name + "Google" + ".txt", "w") for result in response.results: for alternative in result.alternatives: output_file.write('Transcript: {}'.format( alternative.transcript.encode("utf-8")) + '\n') output_file.write("Confidence: " + str(alternative.confidence) + '\n') # # Below can be commented to get the detailed info for each word. # for word_info in alternative.words: # word = word_info.word # start_time = word_info.start_time # end_time = word_info.end_time # output_file.write('Word: {}, start_time: {}, end_time: {}'.format( # word, # start_time.seconds + start_time.nanos * 1e-9, # end_time.seconds + end_time.nanos * 1e-9)) # output_file.write("\n") output_file.close() print("Google: Operation Complete") element.delete() return
def _upload_to_blobstore(self, blob_to_upload_path, blob_target_name, chunk_size=None): """Upload file to blobstore. :type chunk_size: int :param chunk_size: If file size if greater than 5MB, it is recommended that, resumable uploads should be used. If you wish to use resumable upload, pass chunk_size param to this function. This must be a multiple of 256 KB per the API specification. """ log_prefix = '[Google Cloud Storage] [UPLOAD]' if self.container: self.logger.info( '{} Started to upload the tarball to the object storage.'.format(log_prefix)) try: blob = Blob(blob_target_name, self.container, chunk_size=chunk_size) blob.upload_from_filename(blob_to_upload_path) self.logger.info('{} SUCCESS: blob_to_upload={}, blob_target_name={}, container={}' .format(log_prefix, blob_to_upload_path, blob_target_name, self.CONTAINER)) return True except Exception as error: message = '{} ERROR: blob_to_upload={}, blob_target_name={}, container={}\n{}'.format( log_prefix, blob_to_upload_path, blob_target_name, self.CONTAINER, error) self.logger.error(message) raise Exception(message) else: message = '{} ERROR: blob_to_upload={}, blob_target_name={}, container={}\n{}'.format( log_prefix, blob_to_upload_path, blob_target_name, self.CONTAINER, "Container not found or accessible") self.logger.error(message) raise Exception(message)
class Uploder(): def __init__(self): self.IMAGE_DIR = "/home/dumingzhex/Projects/WintersWrath/webspider/Image/" self.storage_client = storage.Client() try: self.bucket = self.storage_client.get_bucket('argus_space') print("bucket") except Exception as e: print(e) print('Sorry, that bucket does not exist!') def generator(self, file_name): #encryption_key = 'c7f32af42e45e85b9848a6a14dd2a8f6' self.blob = Blob(file_name, self.bucket, encryption_key=None) self.blob.upload_from_filename(self.IMAGE_DIR + file_name) self.blob.make_public() def get_media_link(self): return self.blob.media_link def get_public_link(self): return self.blob.public_url def get_dir(self, dir_name): return os.listdir(dir_name)
def txfr_blob(filename: str, bq: BigQueryer = PostBigQueryer(), cs: CloudStorager = CloudStorager()): """ uploads the blob to bigquery. This would probably be better as a shell script :param cs: :param bq: :param bucket: :param filename: :return: """ tm = current_time_ms( ) # pain in the ass to get nanotime in python apparently objname = 'api-update-blob-{}'.format(tm) blob = Blob(objname, cs.get_cloud_storage_bucket()) logger.info("Uploading file (this will take a long time)... ") blob.upload_from_filename(filename) # change this to change table table = bq.get_bigquery_table() uri = 'gs://' + cs.bucket + "/" + objname logger.info("Loading file to BQ...") # insert into tmp table # tmptable = bq.client.dataset('forums').table(objname) job = LoadTableFromStorageJob('api-job-{}'.format(tm), table, [uri], client=bq.client) job.write_disposition = 'WRITE_APPEND' job.source_format = 'AVRO' job.begin() wait_for_job(job) logger.info("Cleaning up...") blob.delete(cs.client)
def upload(csvfile, bucketname, blobname): client = storage.Client() bucket = client.get_bucket(bucketname) blob = Blob(blobname, bucket) blob.upload_from_filename(csvfile) gcslocation = 'gs://{}/{}'.format(bucketname, blobname) logging.info('Uploaded {} ...'.format(gcslocation)) return gcslocation
def upload(csvfile, bucketname, blobname): client = storage.Clinet() bucket = client.get_bucket(bucketname) blob = Blob(biobname, bucket) blob.upload_from_filename(csvfile) gcslocation = 'gs://{}/{}'.format(bucketname, blobname) print("Upload {}.......".format(gcslocation)) return gcslocation
def upload(csvfile, bucketname, blobname): client = storage.Client() bucket = client.get_bucket(bucketname) blob = Blob(blobname, bucket) blob.upload_from_filename(csvfile) gcslocation = 'gs://{}/{}'.format(bucketname, blobname) logging.info('Uploaded {} ...'.format(gcslocation)) return gcslocation
def google_stt(self): # Instantiates a client client = speech.SpeechClient() # Instantiates a client and uploads file storage_client = storage.Client() # Parameter is the name of the Google Cloud bucket bucket = storage_client.lookup_bucket('celerfama2') folder = bucket.list_blobs() with open(self.inputFilePath, 'rb') as file: blob = Blob(self.inputFilePath[7:], bucket) print("Google: Uploading: " + self.inputFilePath[7:]) blob.upload_from_filename(self.inputFilePath) # Transcribes the file in the cloud for element in folder: print("Google: Transcribing " + element.name) audio = types.RecognitionAudio(uri="gs://celerfama2/" + element.name) config = types.RecognitionConfig(enable_word_time_offsets=True, language_code='en-US') # Detects speech in the audio file operation = client.long_running_recognize(config, audio) print('Google: Waiting for operation to complete...') response = operation.result() output_file = open(self.inputFilePath[:-4] + "Google" + ".txt", "w") index = self.inputFilePath.rfind("-") output_text_file = open(self.inputFilePath[:index] + ".txt", 'a+') for result in response.results: for alternative in result.alternatives: # output_file.write(alternative.transcript.encode("utf-8") + '\n') output_file.write( '{}'.format(alternative.transcript.encode("utf-8")) + '\n') output_text_file.write( '{}'.format(alternative.transcript.encode("utf-8")) + '\n') # output_file.write("Confidence: " + str(alternative.confidence) + '\n') # # Below can be commented to get the detailed info for each word. # for word_info in alternative.words: # word = word_info.word # start_time = word_info.start_time # end_time = word_info.end_time # output_file.write('Word: {}, start_time: {}, end_time: {}'.format( # word, # start_time.seconds + start_time.nanos * 1e-9, # end_time.seconds + end_time.nanos * 1e-9)) # output_file.write("\n") output_file.close() output_text_file.close() print("Google: Operation Complete") element.delete() return
def upload_to_gcp(src_path, gcp_path): print('###### start upload from %s to %s' % (src_path, gcp_path)) client = storage.Client.from_service_account_json(settings.GCP_KEY_PATH) bucket = client.get_bucket(settings.BUCKET_NAME) blob = Blob(gcp_path, bucket) blob.upload_from_filename(src_path) blob.make_public() print('##### upload success: %s' % blob.public_url) return blob.public_url
def upload_to_storage(path, filename, bucket, bucket_folder, bucket_name): blob = Blob(bucket_folder + filename, bucket) try: blob.upload_from_filename(path + filename) print(filename + ' is uploaded to "{}:{}"'.format(bucket_name, bucket_folder)) retry = False except: print(filename + ' needs to be re-uploaded.') retry = True return({'filename':filename, 'retry':retry})
def upload_file(self, source, dest, contentType=None): if self.use_gcs: args = {} if not contentType is None: args = {'content_type': contentType} blob = Blob(dest, self.gcs_bucket) blob.upload_from_filename(source, **args) else: args = {} if not contentType is None: args = {'ContentType': contentType} self.s3.upload_file(source, self.bucket, dest, ExtraArgs=args)
def archive_file(self, file_path, content_hash=None): """Store the file located at the given path on S3, based on a path made up from its SHA1 content hash.""" if content_hash is None: content_hash = checksum(file_path) blob = self._locate_blob(content_hash) if blob is None: path = os.path.join(self._get_prefix(content_hash), 'data') blob = Blob(path, self.bucket) blob.upload_from_filename(file_path) return content_hash
def upload(csvfile, bucketname, blobname): """ Uploads the CSV file into the bucket with the given blobname """ client = storage.Client() bucket = client.get_bucket(bucketname) logging.info(bucket) blob = Blob(blobname, bucket) logging.debug('Uploading {} ...'.format(csvfile)) blob.upload_from_filename(csvfile) gcslocation = 'gs://{}/{}'.format(bucketname, blobname) logging.info('Uploaded {} ...'.format(gcslocation)) return gcslocation
def test_end_to_end(input_bucket, output_bucket): # Upload image to the input bucket blob = Blob("zombie.jpg", input_bucket) blob.upload_from_filename("test-images/zombie.jpg", content_type="image/jpeg") # Wait for image processing to complete time.sleep(30) for x in range(10): # Check for blurred image in output bucket output_blobs = list(output_bucket.list_blobs()) if len(output_blobs) > 0: break time.sleep(5) assert len(output_blobs) > 0
def upload(csv_file, bucket_name, blob_name): """Uploads csv_file to bucket_name. Args: csv_file: CSV file to upload. bucket_name: Bucket to upload the files to. blob_name: Path of the file in the bucket. Returns: The path of the uploaded file in GCS. """ client = storage.Client() bucket = client.get_bucket(bucket_name) blob = Blob(blob_name, bucket) blob.upload_from_filename(csv_file) gcs_location = 'gs://{}/{}'.format(bucket_name, blob_name) logging.info('Uploaded "{}" ...'.format(gcs_location)) return gcs_location
def write_file( self, upload_file_name: str, bucket_name: str, bucket_file_name: str, content_type: str = None, skip_bucket_check=False, ): log.info(f"Writing file to bucket {bucket_name} as {bucket_file_name}") # if we call client.get_bucket, then we need more than objectCreate role (we need read as well) if skip_bucket_check: blob = Blob(bucket_file_name, self.client.bucket(bucket_name.lower())) else: blob = Blob(bucket_file_name, self.get_or_create_bucket(bucket_name)) blob.upload_from_filename(upload_file_name, content_type=content_type)
def upload_encrypted_blob(bucket_name, source_file_name, destination_blob_name, base64_encryption_key): """Uploads a file to a Google Cloud Storage bucket using a custom encryption key. The file will be encrypted by Google Cloud Storage and only retrievable using the provided encryption key. """ storage_client = storage.Client() bucket = storage_client.get_bucket(bucket_name) # Encryption key must be an AES256 key represented as a bytestring with # 32 bytes. Since it's passed in as a base64 encoded string, it needs # to be decoded. encryption_key = base64.b64decode(base64_encryption_key) blob = Blob(destination_blob_name, bucket, encryption_key=encryption_key) blob.upload_from_filename(source_file_name) print('File {} uploaded to {}.'.format(source_file_name, destination_blob_name))
def upload_encrypted_blob(bucket_name, source_file_name, destination_blob_name, base64_encryption_key): """Uploads a file to a Google Cloud Storage bucket using a custom encryption key. The file will be encrypted by Google Cloud Storage and only retrievable using the provided encryption key. """ storage_client = storage.Client() bucket = storage_client.get_bucket(bucket_name) # Encryption key must be an AES256 key represented as a bytestring with # 32 bytes. Since it's passed in as a base64 encoded string, it needs # to be decoded. encryption_key = base64.b64decode(base64_encryption_key) blob = Blob(destination_blob_name, bucket, encryption_key=encryption_key) blob.upload_from_filename(source_file_name) print('File {} uploaded to {}.'.format( source_file_name, destination_blob_name))
class Uploder(): def __init__(self): self.storage_client = storage.Client() try: self.bucket = self.storage_client.get_bucket('argus_space') logging.debug("成功获得GCP存储空间.") except Exception as e: logging.error('指定存储空间不存在,请检查GCP.') def generator(self, file_name): #encryption_key = 'c7f32af42e45e85b9848a6a14dd2a8f6' self.blob = Blob(file_name, self.bucket, encryption_key=None) self.blob.upload_from_filename(file_name) self.blob.make_public() def get_media_link(self): return self.blob.media_link def get_public_link(self): return self.blob.public_url def get_dir(self, dir_name): return os.listdir(dir_name)
def archive_file(self, file_path, content_hash=None, mime_type=None): """Store the file located at the given path on Google, based on a path made up from its SHA1 content hash.""" file_path = ensure_path(file_path) if content_hash is None: content_hash = checksum(file_path) if content_hash is None: return file_path = ensure_posix_path(file_path) for attempt in service_retries(): try: # blob = self._locate_contenthash(content_hash) # if blob is not None: # return content_hash path = os.path.join(path_prefix(content_hash), "data") blob = Blob(path, self.bucket) blob.upload_from_filename(file_path, content_type=mime_type) return content_hash except FAILURES: log.exception("Store error in GS") backoff(failures=attempt)
def on_post(self, req, resp, security_context): """ Handles POST requests - uploads a file """ self.logger.debug('on_post running') try: #Gather Inputs user_token = req.context['user']['user'] #file_upload_req = self.web_util.parse_json_body(req.stream.read()) #self.logger.debug('Posted Data: ' + dumps(file_upload_req)) #Gaurd against CSRF #TODO Fix CSRF #self.web_util.check_csrf(user_token['ses'], req.headers) #TODO Add this security Check Back in #self.web_util.check_grant(security_context, user_token, 'FS_ADD') file_candidate = req.get_param('uploadCandidate') #TODO whitelist extenstions, probably via config. # Read image as binary raw_file_from_web = file_candidate.file.read() file_uuid4 = str(uuid.uuid4()) extension = self.get_extension_from_filename( file_candidate.filename) system_file_name = self.cfg.FILE_STORE_PATH + file_uuid4 system_file = open(system_file_name, "wb") system_file.write(raw_file_from_web) system_file.close() destination_blob_name = file_uuid4 + '/' + file_candidate.filename # # storage_client = storage.Client(project=self.cfg.GCP_PROJECT_NAME) bucket = storage_client.get_bucket(self.cfg.GCP_CS_BUCKET_NAME) blob = Blob(destination_blob_name, bucket, encryption_key=self.cfg.GCP_CS_ENCRTYPION_KEY) blob.upload_from_filename(system_file_name, predefined_acl='private') file_store_helper = FileStoreDBHelper(self.pers) file_store_req = { '_id': 0, 'fileName': file_candidate.filename, 'fileSize': len(raw_file_from_web), 'logicalPath': "/", 'contentType': file_candidate.type, 'cloudId': destination_blob_name, 'securityContext': security_context, 'createUser': user_token['username'], 'updateUser': user_token['username'], } file_store_check = file_store_helper.is_valid_file_store( file_store_req, goal='INS') if file_store_check == "OK": self.logger.debug('Object passed validation for insert') file_store = file_store_helper.ins(security_context, file_store_req) else: self.logger.debug('file_storecheck Failed: %s', file_store_check) raise falcon.HTTPError(falcon.HTTP_400, 'FileStore Integrity Check Failed', file_store_check) if os.path.exists(system_file_name): os.remove(system_file_name) del raw_file_from_web del system_file del file_candidate self.logger.debug('on_post done') resp.status = falcon.HTTP_201 resp.body = dumps(file_store) except falcon.HTTPError: raise except: self.logger.error("on_post failed: %s", traceback.print_exc()) raise falcon.HTTPError( falcon.HTTP_400, #BAD Request 'Something went wrong at the server.', 'Someone is already queued to research the issue.')
def publish_file(self, file_path, publish_path, mime_type=None): bucket = self.client.bucket(self._publication_bucket) blob = Blob(publish_path, bucket) blob.upload_from_filename(file_path, content_type=mime_type) blob.make_public(client=self.client) return blob.public_url