Beispiel #1
0
def upload():
    fileFolderPath = sys.argv[1]
    latest_file_path = find_latest_file(fileFolderPath)
    tmpArr = latest_file_path.split('/')
    file_name = tmpArr[len(tmpArr) - 1]


    AWS_ACCESS_KEY_ID = 'AKIAIPYUWEGMQSRSZK2Q'
    AWS_SECRET_ACCESS_KEY = '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'


    session = boto3.Session(aws_access_key_id=AWS_ACCESS_KEY_ID,aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
    bucket_name = 'bbg-jp-apks'
    bucket_location = session.client('s3').get_bucket_location(Bucket=bucket_name)

    s3 = session.resource('s3')
    with open(latest_file_path, 'rb') as f:
        s3.Object(bucket_name, file_name).upload_fileobj(f, ExtraArgs={'ACL':'public-read'})

    upload_url = "https://s3-{0}.amazonaws.com/{1}/{2}".format(bucket_location['LocationConstraint'],
                                                               bucket_name,
                                                               file_name)

    print(upload_url)

    return upload_url
Beispiel #2
0
 def read_links_from_file(self, date: datetime) -> List[NaverNewsLinkModel]:
     links = []
     file_name = get_link_file_path(date, from_s3=self.from_s3)
     if self.from_s3:
         try:
             obj = s3.Object(bucket_name, file_name)
             data = obj.get()["Body"].read()
             data = json.loads(data)
         except ClientError as e:
             print("Client Error", e, file_name)
             return []
     else:
         with open(file_name) as f:
             data = json.load(f)
     for record in data:
         full_content_link = record["link"]
         provider = record["provider"]
         time = record["time"]
         title = record["title"]
         link_data = NaverNewsLinkModel(publish_time=time,
                                        title=title,
                                        provider=provider,
                                        article_url=full_content_link)
         links.append(link_data)
     return links
Beispiel #3
0
    def write_contents_to_file(self, date: datetime,
                               contents: List[NaverNewsContentModel]):
        file_name = get_content_file_path(date, from_s3=self.from_s3)
        content_dictionary = [x.serialize() for x in contents]

        if self.from_s3:
            object = s3.Object(bucket_name, file_name)
            data = json.dumps(content_dictionary, ensure_ascii=False)
            object.put(Body=data)
        else:
            with open(file_name, 'w', encoding="utf-8") as outfile:
                json.dump(content_dictionary, outfile, ensure_ascii=False)
Beispiel #4
0
    def write_links_to_file(self, date: datetime,
                            links: List[NaverNewsLinkModel]):
        file_name = get_link_file_path(date, from_s3=self.from_s3)

        dict_list = [x.to_dictionary() for x in links]

        if self.from_s3:
            if self.from_s3:
                object = s3.Object(bucket_name, file_name)
                data = json.dumps(dict_list, ensure_ascii=False)
                object.put(Body=data)
        else:
            with open(file_name, 'w', encoding='utf-8') as outfile:
                json.dump(dict_list, outfile, ensure_ascii=False)
Beispiel #5
0
 def read_raw_contents_from_file(self, date: datetime):
     file_name = get_content_file_path(date, from_s3=self.from_s3)
     if self.from_s3:
         try:
             obj = s3.Object(bucket_name, file_name)
             data = obj.get()["Body"].read()
             data = json.loads(data)
         except ClientError as e:
             print("Client Error", e, file_name)
             return None
     else:
         with open(file_name) as f:
             data = json.load(f)
     return data
Beispiel #6
0
def check_if_file_is_exists(file: str, from_s3=False):
    if from_s3:
        try:
            s3.Object(bucket_name, file).load()
            return True
        except botocore.exceptions.ClientError as e:
            if e.response['Error']['Code'] == "404":
                return False
            else:
                print(e.response)
                return False
    else:
        if os.path.isfile(file):
            return True
        else:
            return False
def delete_file(fileName):  # noqa: E501
    """List objects by bucket name

    Returns list of objects in bucket # noqa: E501

    :param fileName: file name
    :type fileName: str

    :rtype: None
    """
    if (validate_system()):
        try:
            session = boto3.Session(aws_access_key_id, aws_secret_access_key)
            s3 = session.resource('s3')
            client = boto3.client('s3')
            s3.Object(bucketName, fileName).delete()
            return 'File deleted'
        except:
            return 'File can not be deleted'
    else:
        return 'Amazon S3 is not configured correctly in /etc/config.yaml file'
Beispiel #8
0
    def write_to_s3_bucket(data_tweets):
        """
        Method to write to s3 bucket
        :param self:
        :param data_tweets: list of tweet tuple
        :return:
        """
        s3 = boto3.resource('s3', aws_access_key_id=AWS_Access_Key_ID, aws_secret_access_key=AWS_Secret_Access_Key)
        all_tweets = []
        global index
        index += 1
        fn = "examples/file_"+ str(index)+".csv"
        print "writing 5 tweets"

        for twt in data_tweets:
            str_tweets = ",".join([x or '' for x in twt])
            all_tweets.append(str_tweets)

        all_twt = "\n".join(all_tweets)

        t_bucket = s3.Bucket('tweetBucket')
        exists = True
        try:
            s3.meta.client.head_bucket(Bucket='tweetBucket')
        except botocore.exceptions.ClientError as e:
            # If a client error is thrown, then check that it was a 404 error.
            # If it was a 404 error, then the bucket does not exist.
            error_code = int(e.response['Error']['Code'])
            if error_code == 404:
                exists = False
        if exists is False:
            print "creating bucket"
            t_bucket = s3.create_bucket(Bucket='tweetBucket')

        key = s3.Object("tweetBucket", fn)
        key.put(Body=all_twt)
        t_bucket.Acl().put(ACL='public-read')
        print "done writing to bucket files %s" % fn
        return