def upload(): fileFolderPath = sys.argv[1] latest_file_path = find_latest_file(fileFolderPath) tmpArr = latest_file_path.split('/') file_name = tmpArr[len(tmpArr) - 1] AWS_ACCESS_KEY_ID = 'AKIAIPYUWEGMQSRSZK2Q' AWS_SECRET_ACCESS_KEY = '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' session = boto3.Session(aws_access_key_id=AWS_ACCESS_KEY_ID,aws_secret_access_key=AWS_SECRET_ACCESS_KEY) bucket_name = 'bbg-jp-apks' bucket_location = session.client('s3').get_bucket_location(Bucket=bucket_name) s3 = session.resource('s3') with open(latest_file_path, 'rb') as f: s3.Object(bucket_name, file_name).upload_fileobj(f, ExtraArgs={'ACL':'public-read'}) upload_url = "https://s3-{0}.amazonaws.com/{1}/{2}".format(bucket_location['LocationConstraint'], bucket_name, file_name) print(upload_url) return upload_url
def read_links_from_file(self, date: datetime) -> List[NaverNewsLinkModel]: links = [] file_name = get_link_file_path(date, from_s3=self.from_s3) if self.from_s3: try: obj = s3.Object(bucket_name, file_name) data = obj.get()["Body"].read() data = json.loads(data) except ClientError as e: print("Client Error", e, file_name) return [] else: with open(file_name) as f: data = json.load(f) for record in data: full_content_link = record["link"] provider = record["provider"] time = record["time"] title = record["title"] link_data = NaverNewsLinkModel(publish_time=time, title=title, provider=provider, article_url=full_content_link) links.append(link_data) return links
def write_contents_to_file(self, date: datetime, contents: List[NaverNewsContentModel]): file_name = get_content_file_path(date, from_s3=self.from_s3) content_dictionary = [x.serialize() for x in contents] if self.from_s3: object = s3.Object(bucket_name, file_name) data = json.dumps(content_dictionary, ensure_ascii=False) object.put(Body=data) else: with open(file_name, 'w', encoding="utf-8") as outfile: json.dump(content_dictionary, outfile, ensure_ascii=False)
def write_links_to_file(self, date: datetime, links: List[NaverNewsLinkModel]): file_name = get_link_file_path(date, from_s3=self.from_s3) dict_list = [x.to_dictionary() for x in links] if self.from_s3: if self.from_s3: object = s3.Object(bucket_name, file_name) data = json.dumps(dict_list, ensure_ascii=False) object.put(Body=data) else: with open(file_name, 'w', encoding='utf-8') as outfile: json.dump(dict_list, outfile, ensure_ascii=False)
def read_raw_contents_from_file(self, date: datetime): file_name = get_content_file_path(date, from_s3=self.from_s3) if self.from_s3: try: obj = s3.Object(bucket_name, file_name) data = obj.get()["Body"].read() data = json.loads(data) except ClientError as e: print("Client Error", e, file_name) return None else: with open(file_name) as f: data = json.load(f) return data
def check_if_file_is_exists(file: str, from_s3=False): if from_s3: try: s3.Object(bucket_name, file).load() return True except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == "404": return False else: print(e.response) return False else: if os.path.isfile(file): return True else: return False
def delete_file(fileName): # noqa: E501 """List objects by bucket name Returns list of objects in bucket # noqa: E501 :param fileName: file name :type fileName: str :rtype: None """ if (validate_system()): try: session = boto3.Session(aws_access_key_id, aws_secret_access_key) s3 = session.resource('s3') client = boto3.client('s3') s3.Object(bucketName, fileName).delete() return 'File deleted' except: return 'File can not be deleted' else: return 'Amazon S3 is not configured correctly in /etc/config.yaml file'
def write_to_s3_bucket(data_tweets): """ Method to write to s3 bucket :param self: :param data_tweets: list of tweet tuple :return: """ s3 = boto3.resource('s3', aws_access_key_id=AWS_Access_Key_ID, aws_secret_access_key=AWS_Secret_Access_Key) all_tweets = [] global index index += 1 fn = "examples/file_"+ str(index)+".csv" print "writing 5 tweets" for twt in data_tweets: str_tweets = ",".join([x or '' for x in twt]) all_tweets.append(str_tweets) all_twt = "\n".join(all_tweets) t_bucket = s3.Bucket('tweetBucket') exists = True try: s3.meta.client.head_bucket(Bucket='tweetBucket') except botocore.exceptions.ClientError as e: # If a client error is thrown, then check that it was a 404 error. # If it was a 404 error, then the bucket does not exist. error_code = int(e.response['Error']['Code']) if error_code == 404: exists = False if exists is False: print "creating bucket" t_bucket = s3.create_bucket(Bucket='tweetBucket') key = s3.Object("tweetBucket", fn) key.put(Body=all_twt) t_bucket.Acl().put(ACL='public-read') print "done writing to bucket files %s" % fn return