Exemple #1
0
def download_file_s3(keyname,
                     aws_key,
                     aws_secret,
                     s3_bucket,
                     s3_folder=None,
                     local_folder=None):
    """ Download a file from an S3 bucket and save it at keyname.  """
    if local_folder is not None:
        ensure_directory(local_folder)
        path = os.path.join(local_folder, keyname)
    else:
        path = keyname

    if os.path.isfile(path):
        print 'file %s already exists!' % path
        return path

    conn = S3Connection(aws_key, aws_secret)
    bucket = conn.get_bucket(s3_bucket)

    if s3_folder:
        aws_keyname = os.path.join(s3_folder, keyname)
    else:
        aws_keyname = keyname

    print 'downloading data from S3...'
    s3key = Key(bucket)
    s3key.key = aws_keyname
    s3key.get_contents_to_filename(path)

    return path
Exemple #2
0
def download_file_s3(aws_path, aws_config, local_folder=DATA_PATH):
    """ Download a file from an S3 bucket and save it in the local folder. """
    # remove the prefix and extract the S3 bucket, folder, and file name
    m = re.match(S3_PREFIX, aws_path)
    split = aws_path[len(m.group()):].split('/')
    s3_bucket = split.pop(0)
    s3_folder = '/'.join(split[:-1])
    keyname = split[-1]

    # create the local folder if necessary
    if local_folder is not None:
        ensure_directory(local_folder)
        path = os.path.join(local_folder, keyname)
    else:
        path = keyname

    if os.path.isfile(path):
        print 'file %s already exists!' % path
        return path

    conn = S3Connection(aws_config.access_key, aws_config.secret_key)
    bucket = conn.get_bucket(s3_bucket)

    if s3_folder:
        aws_keyname = os.path.join(s3_folder, keyname)
    else:
        aws_keyname = keyname

    print 'downloading data from S3...'
    s3key = Key(bucket)
    s3key.key = aws_keyname
    s3key.get_contents_to_filename(path)

    return path
Exemple #3
0
def download_file_s3(aws_path, aws_config, local_folder=DATA_DL_PATH):
    """ Download a file from an S3 bucket and save it in the local folder. """
    # remove the prefix and extract the S3 bucket, folder, and file name
    m = re.match(S3_PREFIX, aws_path)
    split = aws_path[len(m.group()):].split('/')
    s3_bucket = split.pop(0)
    s3_folder = '/'.join(split[:-1])
    keyname = split[-1]

    # create the local folder if necessary
    if local_folder is not None:
        ensure_directory(local_folder)
        path = os.path.join(local_folder, keyname)
    else:
        path = keyname

    if os.path.isfile(path):
        logger.warning('file %s already exists!' % path)
        return path

    conn = S3Connection(aws_config.access_key, aws_config.secret_key)
    bucket = conn.get_bucket(s3_bucket)

    if s3_folder:
        aws_keyname = os.path.join(s3_folder, keyname)
    else:
        aws_keyname = keyname

    logger.debug('downloading data from S3...')
    s3key = Key(bucket)
    s3key.key = aws_keyname
    s3key.get_contents_to_filename(path)
    logger.info('file saved at %s' % path)

    return path
	def pull_s3_file(self, bucket, key, dst):
		"""
		Get a file from an S3 bucket
		"""
		conn = boto.connect_s3(self.aws_id, self.aws_key)
		b = conn.create_bucket(bucket)
		k = Key(b)
		k.key = key
		k.get_contents_to_filename(dst)
Exemple #5
0
    def _get_bill_amount(self):
        # Billing file name, generated by Amazon itself
        # Format : 123456789012-aws-billing-csv-yyyy-mm.csv
        s3_file_key = "{}-aws-billing-csv-{}-{}.csv".format(
            self.aws_account_id,
            datetime.datetime.now().strftime("%Y"),
            datetime.datetime.now().strftime("%m"),
        )
        i = 0

        # Connection to s3 service
        try:
            conn = boto.connect_s3(self.aws_access_key_id,
                                   self.aws_secret_access_key)
        except:  # noqa e722
            return "conn_error"

        # Connection to the bucket
        try:
            bucket = conn.get_bucket(self.s3_bucket_name)
        except:  # noqa e722
            return "bucket_error"

        # Fetch the objects keys and get the billing file
        try:
            k = Key(bucket)
            k.key = s3_file_key
            k.get_contents_to_filename(self.billing_file)
            k.close
        except:  # noqa e722
            return "key_error"

        # Parse the file and get the InvoiceTotal amount
        try:
            with open(self.billing_file, "rb") as f:
                reader = csv.reader(f)
                for row in reader:
                    if "".join(row).find("InvoiceTotal") == -1:
                        continue
                    i = i + 1
                    return row[-1]
        except:  # noqa e722
            return "csv_error"

        return False
Exemple #6
0
    def _get_bill_amount(self):
        # Billing file name, generated by Amazon itself
        # Format : 123456789012-aws-billing-csv-yyyy-mm.csv
        s3_file_key = "{}-aws-billing-csv-{}-{}.csv".format(
            self.aws_account_id,
            datetime.datetime.now().strftime("%Y"),
            datetime.datetime.now().strftime("%m"),
        )
        i = 0

        # Connection to s3 service
        try:
            conn = boto.connect_s3(self.aws_access_key_id, self.aws_secret_access_key)
        except:  # noqa e722
            return "conn_error"

        # Connection to the bucket
        try:
            bucket = conn.get_bucket(self.s3_bucket_name)
        except:  # noqa e722
            return "bucket_error"

        # Fetch the objects keys and get the billing file
        try:
            k = Key(bucket)
            k.key = s3_file_key
            k.get_contents_to_filename(self.billing_file)
            k.close
        except:  # noqa e722
            return "key_error"

        # Parse the file and get the InvoiceTotal amount
        try:
            with open(self.billing_file, "rb") as f:
                reader = csv.reader(f)
                for row in reader:
                    if "".join(row).find("InvoiceTotal") == -1:
                        continue
                    i = i + 1
                    return row[-1]
        except:  # noqa e722
            return "csv_error"

        return False
Exemple #7
0
    def _get_bill_amount(self):
        # Billing file name, generated by Amazon itself
        # Format : 123456789012-aws-billing-csv-yyyy-mm.csv
        s3_file_key = self.aws_account_id + '-aws-billing-csv-' + \
            datetime.datetime.now().strftime(
            '%Y') + '-' + datetime.datetime.now().strftime('%m') + '.csv'
        i = 0

        # Connection to s3 service
        try:
            conn = boto.connect_s3(self.aws_access_key_id,
                                   self.aws_secret_access_key)
        except:
            return 'conn_error'

        # Connection to the bucket
        try:
            bucket = conn.get_bucket(self.s3_bucket_name)
        except:
            return 'bucket_error'

        # Fetch the objects keys and get the billing file
        try:
            k = Key(bucket)
            k.key = s3_file_key
            k.get_contents_to_filename(self.billing_file)
            k.close
        except:
            return 'key_error'

        # Parse the file and get the InvoiceTotal amount
        try:
            with open(self.billing_file, 'rb') as f:
                reader = csv.reader(f)
                for row in reader:
                    if ''.join(row).find('InvoiceTotal') == -1:
                        continue
                    i = i + 1
                    return row[-1]
        except:
            return 'csv_error'

        return False
def download():
    s3_conn = s3()
    
#     bucket = s3_conn.create_bucket('distributed-web-crawler')
    bucket = Bucket(s3_conn, 'distributed-web-crawler')
    
    while True:
        try:
            k = Key(bucket)
            
            k.key = 'list_links_a.txt'
            k.get_contents_to_filename('input_links_a.txt')
            bucket.delete_key(k)
            
            break
            
        except S3ResponseError:
            pass
    
    s3_conn.close()
def download():
    s3_conn = s3()

    #     bucket = s3_conn.create_bucket('distributed-web-crawler')
    bucket = Bucket(s3_conn, 'distributed-web-crawler')

    while True:
        try:
            k = Key(bucket)

            k.key = 'list_links_b.txt'
            k.get_contents_to_filename('input_links_b.txt')
            bucket.delete_key(k)

            break

        except S3ResponseError:
            pass

    s3_conn.close()