Beispiel #1
0
    def fetch_s3_object(self, bucket, key):
        if not self.no_cache:
            m = hashlib.md5()
            m.update(bucket+key)
            cache_key = m.hexdigest()

            if cache_key in self.cache:
                self.logger.debug('cache hit for %s' % cache_key)
                obj = self.cache[cache_key]
                print obj
                return obj
            else:
                self.logger.debug('cache miss for %s' % cache_key)

            conn = boto.connect_s3()
            b = conn.get_bucket(bucket)
            k = b.get_key(key)
            if k:
                obj = (k.get_contents_as_string(), self.build_s3_meta(k))
                self.cache[cache_key] = obj
                return obj
            else:
                return None, None
        else:
            conn = boto.connect_s3()
            k = conn.get_bucket(bucket).get_key(key)
            if k:
                meta = self.build_s3_meta(k)
                return k.get_contents_as_string(), meta
            else:
                return None, None
Beispiel #2
0
def uploadThumbs(src, bucket_name, aws_access_id, aws_access_secret):
  updated_keys = 0

  # connect to the bucket
  conn = boto.connect_s3(aws_access_id, aws_access_secret)

  bucket = conn.get_bucket(bucket_name)

  for dirname, dirnames, filenames in os.walk(src):
    for filename in filenames:
      name, ext = filename.split('.')
      if ext == 'jpg':

        if updated_keys >= 100:
          # Close and Reopen connection
          conn.close()

          conn = boto.connect_s3(aws_access_id, aws_access_secret)

        key_dir = dirname.replace(os.path.abspath(src), '')
        sys.stdout.write("saving: " + key_dir + "/" + filename)
        k = Key(bucket)
        k.key = key_dir + "/" + filename
        k.set_contents_from_filename(os.path.join(dirname, filename),cb=done_cb)
        k.set_acl('public-read')
        sys.stdout.write("\n")
        sys.stdout.flush()
        updated_keys = updated_keys + 1
def main():
    parser = argparse.ArgumentParser('Check the multipart upload status')
    parser.add_argument('-c', '--cancel', action="store_true", help='cancel all the outstanding ')
    parser.add_argument('-f', '--force', action="store_true", help='force all the outstanding ')

    args = vars(parser.parse_args())

    if exists(join(expanduser('~'), '.aws/credentials')):
        # This relies on a ~/.aws/credentials file holding the '<aws access key>', '<aws secret key>'
        LOG.info("Using ~/.aws/credentials")
        s3_connection = boto.connect_s3(profile_name='chiles')
    else:
        # This relies on a ~/.boto or /etc/boto.cfg file holding the '<aws access key>', '<aws secret key>'
        LOG.info("Using ~/.boto or /etc/boto.cfg")
        s3_connection = boto.connect_s3()

    bucket = s3_connection.get_bucket(CHILES_BUCKET_NAME)

    one_day_ago = datetime.datetime.now() - datetime.timedelta(hours=24)
    for item in bucket.list_multipart_uploads():
        LOG.info('key_name: {0}, initiated: {1}'.format(item.key_name, item.initiated))
        date_initiated = datetime.datetime.strptime(item.initiated, '%Y-%m-%dT%H:%M:%S.%fZ')
        if (date_initiated < one_day_ago and args['cancel']) or args['force']:
            LOG.info('Cancelling {0}'.format(item.key_name))
            bucket.cancel_multipart_upload(item.key_name, item.id)
Beispiel #4
0
def main():
    logging.basicConfig(level=logging.INFO)
    args = parser.parse_args()
    log.debug("Got args: %s" % args)


    # Check that src is a valid S3 url
    split_rs = urlparse.urlsplit(args.src)
    if split_rs.scheme != "s3":
        raise ValueError("'%s' is not an S3 url" % args.src)

    # Check that dest does not exist
    if os.path.exists(args.dest):
        if args.force:
            os.remove(args.dest)
        else:
            raise ValueError("Destination file '%s' exists, specify -f to"
                             " overwrite" % args.dest)

    # Split out the bucket and the key
    s3 = boto.connect_s3()
    bucket = s3.lookup(split_rs.netloc)
    key = bucket.get_key(split_rs.path)

    # Determine the total size and calculate byte ranges
    conn = boto.connect_s3()
    resp = conn.make_request("HEAD", bucket=bucket, key=key)
    size = int(resp.getheader("content-length"))
    logging.info("Got headers: %s" % resp.getheaders())

    # Skipping multipart if file is less than 1mb
    if size < 1024 * 1024:
        t1 = time.time()
        key.get_contents_to_filename(args.dest)
        t2 = time.time() - t1
        log.info("Finished single-part download of %0.2fM in %0.2fs (%0.2fMbps)" %
                (size, t2, size/t2))
    else:
        # Touch the file
        fd = os.open(args.dest, os.O_CREAT)
        os.close(fd)
    
        num_parts = args.num_processes

        def arg_iterator(num_parts):
            for min_byte, max_byte in gen_byte_ranges(size, num_parts):
                yield (bucket.name, key.name, args.dest, min_byte, max_byte)

        s = size / 1024 / 1024.
        try:
            t1 = time.time()
            pool = Pool(processes=args.num_processes)
            pool.map_async(do_part_download, arg_iterator(num_parts)).get(9999999)
            t2 = time.time() - t1
            log.info("Finished downloading %0.2fM in %0.2fs (%0.2fMbps)" %
                    (s, t2, s/t2))
        except KeyboardInterrupt:
            log.info("User terminated")
        except Exception, err:
            log.error(err)
Beispiel #5
0
def create_connection(settings):
  """ Creates an S3 connection using credentials is skipauth is false """
  if settings.get('skipauth'):
    conn = boto.connect_s3()
  else:
    conn = boto.connect_s3(settings['key'], settings['secret'])
  return conn
Beispiel #6
0
def download_s3_bucket(bucket_name, target, credentials=None):
    if credentials:
        conn = boto.connect_s3(*credentials)
    else:
        conn = boto.connect_s3()
    b = conn.get_bucket(bucket_name)
    L = list(b.list())
    L.sort(lambda x, y: x.name > y.name)
    L = L[::-1]
    f_suffix = '_$folder$'
    for l in L:
        n = l.name
        if n.endswith(f_suffix):
            dirname = n[:-len(f_suffix)]
            pathname = os.path.join(target, dirname)
            if not os.path.exists(pathname):
                print(n)
                os.mkdir(pathname)
        else:
            pathname = os.path.join(target, n)
            dirn = os.path.split(pathname)[0]
            if dirn and not os.path.isdir(dirn):
                print('making directory %s' % dirn)
                os.makedirs(dirn)
            if not os.path.exists(pathname):
                print(n)
                l.get_contents_to_filename(pathname)
Beispiel #7
0
def get_s3_connection(aws_connect_kwargs, location, rgw, s3_url):
    if s3_url and rgw:
        rgw = urlparse(s3_url)
        s3 = boto.connect_s3(
            is_secure=rgw.scheme == 'https',
            host=rgw.hostname,
            port=rgw.port,
            calling_format=OrdinaryCallingFormat(),
            **aws_connect_kwargs
        )
    elif is_fakes3(s3_url):
        fakes3 = urlparse(s3_url)
        s3 = S3Connection(
            is_secure=fakes3.scheme == 'fakes3s',
            host=fakes3.hostname,
            port=fakes3.port,
            calling_format=OrdinaryCallingFormat(),
            **aws_connect_kwargs
        )
    elif is_walrus(s3_url):
        walrus = urlparse(s3_url).hostname
        s3 = boto.connect_walrus(walrus, **aws_connect_kwargs)
    else:
        aws_connect_kwargs['is_secure'] = True
        try:
            s3 = connect_to_aws(boto.s3, location, **aws_connect_kwargs)
        except AnsibleAWSError:
            # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases
            s3 = boto.connect_s3(**aws_connect_kwargs)
    return s3
Beispiel #8
0
    def download_s3(self,fromPath,toPath):
        """download from S3 to local folder

        Args:
            fromPath (str): S3 URL
            toPath (str): local folder
        """
        if fromPath.startswith("s3n://"):
            noSchemePath = fromPath[6:]
        elif fromPath.startswith("s3://"):
            noSchemePath = fromPath[5:]
        parts = noSchemePath.split('/')
        bucket = parts[0]
        s3path = noSchemePath[len(bucket)+1:]
        if self.key:
            self.conn = boto.connect_s3(self.key,self.secret)
        else:
            self.conn = boto.connect_s3()
        print bucket, s3path, toPath
        b = self.conn.get_bucket(bucket)
        for k in b.list(prefix=s3path):
            basename = os.path.basename(k.name)
            fnew = toPath+"/"+basename
            print "copying ",k.name,"to",fnew
            k.get_contents_to_filename(fnew)
Beispiel #9
0
    def _getDataFiles(self,file_master=0):
        """
        Retrieves metadata and parsed dataframe files
            (generated by utilities/hddata_process.py) from S3
        """
        comm = self._comm
        working_dir = self._working_dir
        data_source_bucket = self._datasource_bucket
       
        if comm.rank == file_master:
            if not op.exists(op.join( working_dir,'metadata.txt')):
                conn = boto.connect_s3()
                b = conn.get_bucket(data_source_bucket)
                k = Key(b)
                k.key = 'metadata.txt'
                k.get_contents_to_filename(op.join( working_dir,'metadata.txt'))

        if comm.rank == file_master:
            if not op.exists(op.join( working_dir, 'trimmed_dataframe.pandas')):
                conn = boto.connect_s3()
                b = conn.get_bucket(self._working_bucket)
                k = Key(b)
                k.key ='trimmed_dataframe.pandas'
                k.get_contents_to_filename(op.join( working_dir,'trimmed_dataframe.pandas'))
        comm.barrier()
Beispiel #10
0
    def fetch(self, files, force=False, check=False, verbose=1):
        assert (self.profile_name or
                (self.access_key and self.secret_access_key))

        files = Fetcher.reformat_files(files)  # allows flexibility
        import boto
        if self.profile_name is not None:
            s3 = boto.connect_s3(profile_name=self.profile_name)
        elif (self.access_key is not None and
              self.secret_access_key is not None):
            s3 = boto.connect_s3(self.access_key, self.secret_access_key)

        bucket_names = np.unique([opts.get('bucket') for f, rk, opts in files])
        files_ = []
        for bucket_name in bucket_names:  # loop over bucket names: efficient
            if bucket_name:  # bucket requested
                buck = s3.get_bucket(bucket_name)
            else:  # default to first bucket
                buck = s3.get_all_buckets()[0]

            for file_, remote_key, opts in files:
                if opts.get('bucket') != bucket_name:
                    continue  # get all files from the current bucket only.
                target_file = op.join(self.data_dir, file_)
                key = buck.get_key(remote_key)
                if not key:
                    warnings.warn('Failed to find key: %s' % remote_key)
                    files_.append(None)
                else:
                    do_download = force or not op.exists(target_file)
                    try:
                        do_download = (do_download or
                                       (check and nib.load(
                                        target_file).get_data() is None))
                    except IOError as ioe:
                        if verbose > 0:
                            print("Warning: %s corrupted, re-downloading "
                                  "(Error=%s)" % (target_file, ioe))
                        do_download = True

                    if do_download:
                        # Ensure destination directory exists
                        destination_dir = op.dirname(target_file)
                        if not op.isdir(destination_dir):
                            if verbose > 0:
                                print("Creating base directory %s" % (
                                    destination_dir))
                            os.makedirs(destination_dir)

                        if verbose > 0:
                            print("Downloading [%s]/%s to %s." % (
                                bucket_name or 'default bucket',
                                remote_key,
                                target_file))
                        with open(target_file, 'wb') as fp:
                            cb = partial(test_cb, t0=time.time())
                            key.get_contents_to_file(fp, cb=cb, num_cb=None)

                    files_.append(target_file)
        return files_
Beispiel #11
0
    def do_connect(self):
        if self.conf.path_style_request:
            calling_format=boto.s3.connection.OrdinaryCallingFormat()
        else:
            calling_format=boto.s3.connection.SubdomainCallingFormat()

        if self.conf.host is None:
            # If version 4 signature is used, boto requires 'host' parameter
            # Also there is a bug in AWS Frankfurt that causes boto doesn't work.
            # The current work around is to give specific service address, like
            # s3.eu-central-1.amazonaws.com instead of s3.amazonaws.com.
            if self.conf.use_v4_sig:
                self.conn = boto.connect_s3(self.conf.key_id, self.conf.key,
                                            host='s3.%s.amazonaws.com' % self.conf.aws_region,
                                            is_secure=self.conf.use_https,
                                            calling_format=calling_format)
            else:
                self.conn = boto.connect_s3(self.conf.key_id, self.conf.key, is_secure=self.conf.use_https,
                                            calling_format=calling_format)
        else:
            self.conn = boto.connect_s3(self.conf.key_id, self.conf.key,
                                        host='%s' % self.conf.host,
                                        port=self.conf.port,
                                        is_secure=self.conf.use_https,
                                        calling_format=calling_format)

        self.bucket = self.conn.get_bucket(self.conf.bucket_name)
Beispiel #12
0
def _get_ref_from_galaxy_loc(name, genome_build, loc_file, galaxy_dt, need_remap,
                             galaxy_config, data):
    """Retrieve reference genome file from Galaxy *.loc file.

    Reads from tool_data_table_conf.xml information for the index if it
    exists, otherwise uses heuristics to find line based on most common setups.
    """
    refs = [ref for dbkey, ref in _galaxy_loc_iter(loc_file, galaxy_dt, need_remap)
            if dbkey == genome_build]
    remap_fn = alignment.TOOLS[name].remap_index_fn
    need_remap = remap_fn is not None
    if len(refs) == 0:
        # if we have an S3 connection, try to download
        try:
            import boto
            boto.connect_s3()
        except:
            raise ValueError("Could not find reference genome file %s %s" % (genome_build, name))
        logger.info("Downloading %s %s from AWS" % (genome_build, name))
        cur_ref = _download_prepped_genome(genome_build, data, name, need_remap)
    # allow multiple references in a file and use the most recently added
    else:
        cur_ref = refs[-1]
    if need_remap:
        assert remap_fn is not None, "%s requires remapping function from base location file" % name
        cur_ref = os.path.normpath(utils.add_full_path(cur_ref, galaxy_config["tool_data_path"]))
        cur_ref = remap_fn(os.path.abspath(cur_ref))
    return cur_ref
Beispiel #13
0
    def copy_s3_file(self,fromPath,bucket,path):
        """copy from local file to S3 

        Args:
            fromPath (str): local file
            bucket (str): S3 bucket
            path (str): S3 prefix to add to files
        """
        if self.aws_key:
            self.conn = boto.connect_s3(self.aws_key,self.aws_secret)
        else:
            self.conn = boto.connect_s3()
        b = self.conn.get_bucket(bucket)
        source_size = os.stat(fromPath).st_size
        # Create a multipart upload request
        uploadPath = path
        logger.info("uploading to bucket %s path %s",bucket,uploadPath)
        mp = b.initiate_multipart_upload(uploadPath)
        chunk_size = 10485760
        chunk_count = int(math.ceil(source_size / float(chunk_size)))
        for i in range(chunk_count):
            offset = chunk_size * i
            bytes = min(chunk_size, source_size - offset)
            with FileChunkIO(fromPath, 'r', offset=offset,bytes=bytes) as fp:
                logger.info("uploading to s3 chunk %d/%d",(i+1),chunk_count)
                mp.upload_part_from_file(fp, part_num=i + 1)
        # Finish the upload
        logger.info("completing transfer to s3")
        mp.complete_upload()
Beispiel #14
0
    def download_s3(self,fromPath,toPath):
        """download from S3 to local folder

        Args:
            fromPath (str): S3 URL
            toPath (str): local folder
        """
        if fromPath.startswith("s3n://"):
            noSchemePath = fromPath[6:]
        elif fromPath.startswith("s3://"):
            noSchemePath = fromPath[5:]
        parts = noSchemePath.split('/')
        bucket = parts[0]
        s3path = noSchemePath[len(bucket)+1:]
        if self.aws_key:
            self.conn = boto.connect_s3(self.aws_key,self.aws_secret)
        else:
            self.conn = boto.connect_s3()
        b = self.conn.get_bucket(bucket)
        for k in b.list(prefix=s3path):
            if not k.name.endswith("/"):
                basename = os.path.basename(k.name)
                fnew = toPath+"/"+basename
                logger.info("copying %s to %s",k.name,fnew)
                k.get_contents_to_filename(fnew)
Beispiel #15
0
def rubberjack(ctx, application, organisation, region, sigv4_host, bucket):
    """
    Main entry point into the rubberjack CLI.
    """

    ctx.obj = {}

    ctx.obj['application'] = application
    ctx.obj['organisation'] = organisation
    ctx.obj['region'] = region_from_name(region)
    ctx.obj['application_name'] = application_name = "{organisation}-{application}".format(organisation=organisation, application=application)

    ctx.obj['dev_environment_name'] = "{application_name}-dev".format(application_name=application_name)
    ctx.obj['live_environment_name'] = "{application_name}-live".format(application_name=application_name)

    if bucket is None:
        bucket = "{organisation}-rubberjack-ebdeploy".format(organisation=organisation)

    ctx.obj['bucket'] = bucket

    # boto doesn't use a default of None, it uses NoHostProvided, and I struggled to pass that myself
    if sigv4_host:
        s3 = boto.connect_s3(host=sigv4_host)
    else:
        s3 = boto.connect_s3()
    ctx.obj['s3'] = s3
    ctx.obj['beanstalk'] = boto.beanstalk.layer1.Layer1(region=ctx.obj['region'])
Beispiel #16
0
    def record_stack(self, stack, destination, credentials):
        """
        S3 implementation.  Uploads stack definition to configured S3 bucket.

        :param stack: stack definition
        :type stack: str.
        :param destination: destination to copy stack to
        :type destination: str.
        :param credentials: credentials for copy command
        :type credentials: dict.
        :returns:  boolean
        :raises: :class:`pmcf.exceptions.AuditException`
        """

        LOG.info('recording stack definition to s3://%s/%s',
                 credentials['audit_output'], destination)
        try:
            s3_conn = None
            if credentials.get('use_iam_profile'):
                s3_conn = boto.connect_s3()
            else:
                s3_conn = boto.connect_s3(
                    aws_access_key_id=credentials['access'],
                    aws_secret_access_key=credentials['secret']
                )
            bucket = s3_conn.get_bucket(credentials['audit_output'])
            k = boto.s3.key.Key(bucket)
            k.key = destination
            k.set_contents_from_string(stack)
        except (boto.exception.S3ResponseError,
                boto.exception.BotoServerError), exc:
            raise AuditException(exc)
def upload_s3():
    """
    Upload jar file to s3
    """
    source_path = JARFILE
    source_size = os.stat(source_path).st_size

    # create bucket
    import boto
    conn = boto.connect_s3()
    bucket = conn.create_bucket(S3_BUCKET)

    # upload
    c = boto.connect_s3()
    b = c.get_bucket(S3_BUCKET)
    # Create a multipart upload request
    mp = b.initiate_multipart_upload(os.path.basename(source_path))

    # Use a chunk size of 5 MiB
    chunk_size = 5242880
    chunk_count = int(math.ceil(source_size / float(chunk_size)))

    # Send the file parts, using FileChunkIO to create a file-like object
    # that points to a certain byte range within the original file. We
    # set bytes to never exceed the original file size.
    for i in range(chunk_count):
        offset = chunk_size * i
        bytes = min(chunk_size, source_size - offset)
        with FileChunkIO(source_path, 'r', offset=offset,
                         bytes=bytes) as fp:
            mp.upload_part_from_file(fp, part_num=i + 1)

    # Finish the upload
    mp.complete_upload()
    print("Jar uploaded to S3 bucket " + S3_BUCKET)
Beispiel #18
0
    def __init__(self, username, access_key, secret_key,
                       rate_limit=None, host=None):

        self.username = username
        self.access_key = access_key
        self.secret_key = secret_key
        self.rate_limit = rate_limit
        self.rate_limiter = RateLimiter(self.rate_limit)
        self.callbacks = CallbackAggregator()
        self.multipart_status_callbacks = CallbackAggregator()
        self.host = host
        self.logger = logging.getLogger(__name__)

        if self.rate_limit:
            self.callbacks.add_callback(self.rate_limiter)

        if self.host =='s3.amazonaws.com':
            self.connection = boto.connect_s3(self.access_key, self.secret_key)
        else:
            self.connection = boto.connect_s3(aws_access_key_id=self.access_key,
                              aws_secret_access_key=self.secret_key,
                              is_secure=False,
                              host=self.host,
                              port=8773,
                              calling_format=boto_s3_connection_class.OrdinaryCallingFormat(),
                              path="/services/Walrus")

        if not self.connection:
            raise S3AuthError("check access_key and secret_key")

        self.bucket = self.connection.lookup(username)
        if not self.bucket:
            raise S3AuthError("check access_key and secret_key")
def Storage(accessKey,storageKey):
    # Storge bucket
    storageConn = get_storage_connection(accessKey,storageKey)

    storageTable = PrettyTable (["Storage Buckets", "Total Size"])
    storageTable.align["Storage Buckets"] = "l"
    storageTable.padding_width = 1
    

    boto.connect_s3(calling_format=OrdinaryCallingFormat())
    reservationStorage = storageConn.get_all_buckets()
    totalSize = 0

    for bucket in reservationStorage:
        s3Bucket = bucket.name
        if (bucket.name != 'esbundles-c14334.Ubuntu-1204-PE-300-agent'):           
            for key in bucket.list():
                size = key.size
                totalSize = totalSize + size
        storageTable.add_row([s3Bucket,""])        
    #print 'TotalSize: ' + str(totalSize/1073741824) + ' GB'
    totalSize = totalSize/1073741824
    storageTable.add_row(["---------------------------------------------------", "------------"])
    storageTable.add_row(["Total Size in GB", totalSize])
    print storageTable
    file2write.writelines('\n')
    file2write.writelines(str(storageTable))
Beispiel #20
0
    def _get_data_files(self):
        """
        Retrieves metadata and parsed dataframe files
            (generated by utilities/hddata_process.py) from S3
        """
        s2f = self._s3_to_fname
        while not op.exists(op.join( self.working_dir, s2f(self.meta_file))):
            try:
                conn = boto.connect_s3()
                b = conn.get_bucket(self.ds_bucket)
                k = Key(b)
                k.key = self.meta_file
                k.get_contents_to_filename(
                        op.join( self.working_dir, s2f(self.meta_file)))
            except:
                time.sleep(random.random())

        while not op.exists(op.join( self.working_dir, s2f(self.data_file))):
            conn = boto.connect_s3()
            try:
                b = conn.get_bucket(self.ds_bucket)
                k = Key(b)
                k.key = self.data_file
                k.get_contents_to_filename( op.join( self.working_dir,
                                                            s2f(self.data_file)) )
            except S3ResponseError:
                self.logger.exception( 'Master has not generated files' )
                raise
            except OSError:
                time.sleep(random.random())
Beispiel #21
0
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
                           compression=None):

    # Assuming AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_S3_HOST
    # are environment variables
    parsed_url = parse_url(filepath_or_buffer)
    s3_host = os.environ.get('AWS_S3_HOST', 's3.amazonaws.com')

    try:
        conn = boto.connect_s3(host=s3_host)
    except boto.exception.NoAuthHandlerFound:
        conn = boto.connect_s3(host=s3_host, anon=True)

    b = conn.get_bucket(parsed_url.netloc, validate=False)
    if compat.PY2 and (compression == 'gzip' or
                       (compression == 'infer' and
                        filepath_or_buffer.endswith(".gz"))):
        k = boto.s3.key.Key(b, parsed_url.path)
        filepath_or_buffer = BytesIO(k.get_contents_as_string(
            encoding=encoding))
    else:
        k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding)
        k.open('r')  # Expose read errors immediately
        filepath_or_buffer = k
    return filepath_or_buffer, None, compression
Beispiel #22
0
def get_s3_connection(aws_access_key_id=None, aws_secret_access_key=None,
                      anon=False, profile_name=None, **kwargs):
    import boto

    if profile_name:
        return boto.connect_s3(profile_name=profile_name)

    cfg = boto.Config()

    if aws_access_key_id is None:
        aws_access_key_id = cfg.get('Credentials', 'aws_access_key_id')

    if aws_access_key_id is None:
        aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID')

    if aws_secret_access_key is None:
        aws_secret_access_key = cfg.get('Credentials', 'aws_secret_access_key')

    if aws_secret_access_key is None:
        aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')

    # anon is False but we didn't provide any credentials so try anonymously
    anon = (not anon and
            aws_access_key_id is None and
            aws_secret_access_key is None)
    return boto.connect_s3(aws_access_key_id, aws_secret_access_key,
                           anon=anon)
Beispiel #23
0
    def load(self, url, offset, length):
        if not s3_avail:  #pragma: no cover
           raise IOError('To load from s3 paths, ' +
                          'you must install boto: pip install boto')

        aws_access_key_id = self.aws_access_key_id
        aws_secret_access_key = self.aws_secret_access_key

        parts = urlsplit(url)

        if parts.username and parts.password:
            aws_access_key_id = unquote_plus(parts.username)
            aws_secret_access_key = unquote_plus(parts.password)
            bucket_name = parts.netloc.split('@', 1)[-1]
        else:
            bucket_name = parts.netloc

        if not self.s3conn:
            try:
                self.s3conn = connect_s3(aws_access_key_id, aws_secret_access_key)
            except Exception:  #pragma: no cover
                self.s3conn = connect_s3(anon=True)

        bucket = self.s3conn.get_bucket(bucket_name)

        key = bucket.get_key(parts.path)

        if offset == 0 and length == -1:
            headers = {}
        else:
            headers = {'Range': BlockLoader._make_range_header(offset, length)}

        # Read range
        key.open_read(headers=headers)
        return key
Beispiel #24
0
    def configure(self):
        if settings.AWS_ENABLED:
            if settings.AWS_S3_FAKE_S3 is None:
                # The host must be manually specified in Python 2.7.9+ due to
                # https://github.com/boto/boto/issues/2836 this bug in boto with .s in
                # bucket names.
                host = settings.AWS_S3_HOST if settings.AWS_S3_HOST else NoHostProvided

                self.s3 = boto.connect_s3(
                    settings.AWS_ACCESS_KEY_ID,
                    settings.AWS_SECRET_ACCESS_KEY,
                    host=host,
                    calling_format=OrdinaryCallingFormat()
                )
            else:
                host, port = (settings.AWS_S3_FAKE_S3.split(':', 2) + [80])[:2]
                port = int(port)
                self.s3 = boto.connect_s3("key_id", "secret_key", is_secure=False, port=port,
                                          host=host, calling_format=OrdinaryCallingFormat())
                _ensure_bucket_exists(self.s3, settings.AWS_S3_SOURCE_BUCKET)
                _ensure_bucket_exists(self.s3, settings.AWS_S3_EXPORT_BUCKET)
                _ensure_bucket_exists(self.s3, settings.AWS_S3_BUILDS_BUCKET)

            self.buckets = {
                'source': self.s3.get_bucket(settings.AWS_S3_SOURCE_BUCKET),
                'export': self.s3.get_bucket(settings.AWS_S3_EXPORT_BUCKET),
                'builds': self.s3.get_bucket(settings.AWS_S3_BUILDS_BUCKET),
            }
            self.configured = True
        else:
            self.s3 = None
            self.buckets = None
Beispiel #25
0
    def deploy(self):
        """
        Deploy this quiz JSON to S3.
        """
        if not self.slug:
            return

        data = json.dumps(self.flatten())

        s3 = boto.connect_s3()

        gzip_buffer = StringIO()

        with gzip.GzipFile(fileobj=gzip_buffer, mode='w') as f:
            f.write(data)

        data = gzip_buffer.getvalue()

        s3 = boto.connect_s3()

        for bucket_name in app_config.S3_BUCKETS:
            bucket = s3.get_bucket(bucket_name)

            k = Key(bucket, '%s/live-data/games/%s.json' % (app_config.PROJECT_SLUG, self.slug))
            k.set_contents_from_string(data, headers={
                'Content-Type': 'application/json',
                'Content-Encoding': 'gzip',
                'Cache-Control': 'max-age=5'
            })
            k.set_acl('public-read')
def connect(aws_access_key_id=None, aws_secret_access_key=None):
    """ uses api_key and secret_key if available or
    falls back on ENV variables AWS_ACCESS_KEY_ID && AWS_SECRET_ACCESS_KEY """
    if aws_access_key_id or aws_secret_access_key is None:
        return boto.connect_s3()
    else:
        log.debug('Falling back to ENV variables $AWS_ACCESS_KEY && $AWS_SECRET_KEY')
        return boto.connect_s3(aws_access_key_id, aws_secret_access_key)
Beispiel #27
0
def get_s3_bucket(bucket_name, config):
    if bucket_name not in S3_BUCKETS:
        if config.aws_access_key and config.aws_secret_key:
            s3conn = boto.connect_s3(config.aws_access_key, config.aws_secret_key)
        else:
            s3conn = boto.connect_s3() # use local boto config or IAM profile
        S3_BUCKETS[bucket_name] = s3conn.get_bucket(bucket_name)
    return S3_BUCKETS[bucket_name]
    def s3(self):
        if not self.__s3_conn:
            if self.config.AWS:
                self.__s3_conn = boto.connect_s3(self.config.AWS["key"], self.config.AWS["secret_key"])
            else:
                self.__s3_conn = boto.connect_s3()

        return self.__s3_conn
Beispiel #29
0
def home(request):
	global location
	userInput = UserInputForm

	if 'import' in request.POST:
		newdoc = Document(docfile = request.FILES['upload'])
		newdoc.save()
		location = newdoc.path()

		# creating S3 bucket connection
		conn = boto.connect_s3('AKIAIJZ56E33VC2GBG3Q', 'xfSWxuK9uGAsRwtwdJgIPBhiye0Z3ka5oRqRa8FD')
		bucket = conn.create_bucket('client1.bucket')
		k = Key(bucket)

		filename = str(request.FILES['upload'])
		filenameKey = re.sub('\.txt$', '', filename)

		print filenameKey
		
		k.key = filenameKey 
		k.set_contents_from_filename(location)
		return HttpResponseRedirect(reverse('upload.views.home'))
	else:
		form = DocumentForm() # An empty, unbound form

	if 'user_input' in request.POST:
		form = UserInputForm(request.POST)
		if form.is_valid():
			form.save()
			myfile = open(os.path.dirname(os.path.abspath(upload.__file__))+ "/media/Watchlists/userinput.txt", 'a')
			myfile.write(request.POST['keyword'] + "\n")
			myfile.close()

			location = os.path.dirname(os.path.abspath(upload.__file__))+ "/media/Watchlists/userinput.txt"

			conn = boto.connect_s3('AKIAIJZ56E33VC2GBG3Q', 'xfSWxuK9uGAsRwtwdJgIPBhiye0Z3ka5oRqRa8FD')
			bucket = conn.create_bucket('client1.bucket')
			k = Key(bucket)

			filenameKey = "userinput"

			print filenameKey
		
			k.key = filenameKey 
			k.set_contents_from_filename(location)
			return HttpResponseRedirect(reverse('upload.views.home'))
		else:
			form = UserInputForm()

	# Load documents for the list page
	documents = Document.objects.all()

	# Rendner list page with the documents and the form
	return render_to_response(
		'upload/parallax.html',
		{'documents': documents, 'form' : form, 'userInput' : userInput},
		context_instance = RequestContext(request)
	)
Beispiel #30
0
    def __init__(self, mega_stack_name, name, params, template_name, region,
                 sns_topic_arn, tags=None, depends_on=None):
        self.logger = logging.getLogger(__name__)
        if mega_stack_name == name:
            self.cf_stack_name = name
        else:
            self.cf_stack_name = "%s-%s" % (mega_stack_name, name)
        self.mega_stack_name = mega_stack_name
        self.name = name
        self.yaml_params = params
        self.params = {}
        self.template_name = template_name
        self.template_body = ''
        self.template_url = False
        if depends_on is None:
            self.depends_on = None
        else:
            self.depends_on = []
            for dep in depends_on:
                if dep == mega_stack_name:
                    self.depends_on.append(dep)
                else:
                    self.depends_on.append("%s-%s" % (mega_stack_name, dep))
        self.region = region
        self.sns_topic_arn = sns_topic_arn

        # Safer than setting default value for tags = {}
        if tags is None:
            self.tags = {}
        else:
            self.tags = tags

        try:
            # catch S3 url template names
            m = re.match(r'(https?|s3)://([^/]+)/(.+$)', self.template_name)
            if m:
                protocol, bucket, key = m.groups()
                if protocol == 's3':
                    connect_s3().get_bucket(bucket).get_key(key).read()
                else:
                    if not requests.get(self.template_name).ok:
                        raise Exception
            else:
                open(self.template_name, 'r')
        except:
            self.logger.critical("Failed to open template file %s for stack %s"
                                 % (self.template_name, self.name))
            exit(1)

        # check params is a dict if set
        if self.yaml_params and type(self.yaml_params) is not dict:
            self.logger.critical(
                "Parameters for stack %s must be of type dict not %s",
                self.name, type(self.yaml_params))
            exit(1)

        self.cf_stacks = {}
        self.cf_stacks_resources = {}
Beispiel #31
0
def upload_to_s3(bucketname, tiles):  # pragma: no cover
    tiles = os.path.abspath(tiles)

    conn = boto.connect_s3()
    bucket = conn.get_bucket(bucketname, validate=False)
    result = {
        'tile_changed': 0,
        'tile_deleted': 0,
        'tile_unchanged': 0,
        'tile_new': 0,
        's3_put': 0,
        's3_list': 0,
    }

    def _key(name):
        try:
            return int(name)
        except Exception:
            return -1

    for name in sorted(os.listdir(tiles), key=_key):
        folder = os.path.join(tiles, name)
        if not os.path.isdir(folder):
            continue

        for root, dirs, files in os.walk(folder):
            rel_root = 'tiles/' + root.lstrip(tiles) + '/'
            rel_root_len = len(rel_root)
            filtered_files = [f for f in files if f.endswith('.png')]
            if not filtered_files:
                continue
            # get all the keys
            keys = {}
            result['s3_list'] += 1
            for key in bucket.list(prefix=rel_root):
                rel_name = key.name[rel_root_len:]
                keys[rel_name] = key
            for f in filtered_files:
                filename = root + os.sep + f
                keyname = rel_root + f
                key = keys.pop(f, None)
                changed = True
                if key is not None:
                    if os.path.getsize(filename) != key.size:
                        # do the file sizes match?
                        changed = True
                    else:
                        remote_md5 = key.etag.strip('"')
                        with open(filename, 'rb') as fd:
                            local_md5 = hashlib.md5(fd.read()).hexdigest()
                        if local_md5 == remote_md5:
                            # do the md5/etags match?
                            changed = False
                if changed:
                    if key is None:
                        result['tile_new'] += 1
                        key = boto.s3.key.Key(bucket)
                        key.key = keyname
                    else:
                        result['tile_changed'] += 1
                    # upload or update the key
                    result['s3_put'] += 1
                    key.set_contents_from_filename(filename,
                                                   headers=IMAGE_HEADERS,
                                                   reduced_redundancy=True)
                else:
                    result['tile_unchanged'] += 1
            # delete orphaned files
            for rel_name, key in keys.items():
                result['tile_deleted'] += 1
                key.delete()

    # Update status file
    data = {'updated': util.utcnow().isoformat()}
    k = boto.s3.key.Key(bucket)
    k.key = 'tiles/data.json'
    k.set_contents_from_string(dumps(data),
                               headers=JSON_HEADERS,
                               reduced_redundancy=True)

    return result
Beispiel #32
0
 def create_bucket(self):
     self.conn = boto.connect_s3()
     self.conn.create_bucket(BUCKET)
Beispiel #33
0
def convertvid(ytid, options, convertedname, taskid, fetchandconvert, duration,
               id3):
    tmpfile = "/mnt/%s" % (taskid)
    tmpconverted = "/mnt/%s" % (convertedname)
    start = getseconds("00:" + options["youtube_start"])
    end = getseconds("00:" + options["youtube_end"])
    duration = end - start
    conn = boto.connect_s3(settings.AWS_ACCESS_KEY_ID,
                           settings.AWS_SECRET_ACCESS_KEY)
    #1) check if file exists on local disk
    if os.path.isfile(tmpfile) == False:
        # 1.1) get file from S3
        bucket = conn.get_bucket(settings.S3_YT_RAW_BUCKET)
        k = Key(bucket)
        k.key = ytid
        k.get_contents_to_filename(tmpfile)
    # 2) Run ffmpeg - ffmpeg -i input.flv -ab 128k output.mp3
    #todo: use actual options for transcoding
    quality = options['transcoder_quality'] + 'k'
    extraargs = []
    format = tmpconverted[-3:]
    if format == "m4r":
        # m4a is same as m4r
        tmpconverted = tmpconverted[:-3] + "m4a"
        extraargs = ["-vn"]
    if format == "mp4":
        # use MP4Box to do the job, since if the original video has a not 1:1 PAR, and
        # has a small width than 360, the script will fail, so we use MP4Box instead
        import shutil
        shutil.move(tmpfile, tmpfile + ".mp4")
        tmpfile = tmpfile + ".mp4"
        ffmpegcmd = "MP4Box -add %s %s" % (tmpfile, tmpconverted)
    elif format == "flv" or format == "wmv":
        # These are video formats
        ffmpegcmd = " ".join([
            "ffmpeg", "-ss",
            str(start), "-i", tmpfile, "-y", "-t",
            str(duration), "-vf", '"scale=-1:360"'
        ] + extraargs + [tmpconverted])
    elif format == "avi":
        ffmpegcmd = " ".join([
            "ffmpeg", "-ss",
            str(start), "-i", tmpfile, "-y", "-t",
            str(duration), "-ab", quality, "-vf", '"scale=-1:360"'
        ] + extraargs + [tmpconverted])
    elif format == "wav":
        ffmpegcmd = " ".join([
            "ffmpeg", "-ss",
            str(start), "-i", tmpfile, "-y", "-t",
            str(duration)
        ] + extraargs + [tmpconverted])
    else:
        #audio only
        ffmpegcmd = " ".join([
            "ffmpeg", "-ss",
            str(start), "-i", tmpfile, "-y", "-t",
            str(duration), "-ab", quality
        ] + extraargs + [tmpconverted])
    print ffmpegcmd
    thread = pexpect.spawn(ffmpegcmd)
    cpl = thread.compile_pattern_list([pexpect.EOF, ".*time=([^\s]*)", '(.+)'])
    while True:
        i = thread.expect_list(cpl, timeout=None)
        if i == 0:  # EOF
            print "the sub process exited"
            break
        elif i == 1:
            timestamp = thread.match.group(1)
            print timestamp
            t = getseconds(timestamp)
            print t, duration
            pct = 30 + ((t / duration) * 60)
            fetchandconvert.update_state(state="CONVERTING",
                                         meta={"progress": int(pct)})
            thread.close

    #p = subprocess.Popen(["ffmpeg", "-i", tmpfile, "-y", "-ab", quality] + extraargs + [tmpconverted], stdout=subprocess.PIPE)
    #com = p.communicate()
    #print com
    # 3) Store output into S3
    # === POSTPROCESSING ===
    # ID3 tags
    if id3 is not None:
        audio = EasyID3(tmpconverted)
        for key in id3:
            audio[key] = id3[key]
        audio.save()

    bucket = conn.get_bucket(settings.S3_YT_PRO_BUCKET)
    k = Key(bucket)
    k.key = convertedname
    k.set_contents_from_filename(tmpconverted)
    newfile = bucket.get_key(k.key)
    newfile.change_storage_class('REDUCED_REDUNDANCY')
    # 4) Return
    pass
Beispiel #34
0
def main(argv):

    parser = OptionParser(usage="usage: %prog [options]", version="%prog 1.0")
    parser.add_option('-b', '--bucket', dest='logBucket', type='string', \
            help='Specify the S3 bucket containing AWS logs')
    parser.add_option('-d', '--debug', action='store_true', dest='debug', \
            help='Increase verbosity')
    parser.add_option('-l', '--log', dest='logFile', type='string', \
            help='Local log file')
    parser.add_option('-j', '--json', action='store_true', dest='dumpJson', \
            help='Reformat JSON message (default: raw)')
    #Beware, once you delete history it's gone.
    parser.add_option('-D', '--delete', action='store_true', dest='deleteFile', \
            help='Delete processed files from the AWS S3 bucket')
    parser.add_option('-s', '--state', dest='state', type='string', \
            help="State file for keeping track of what logs you already processed.")
    (options, args) = parser.parse_args()
    state_tracker = None

    if options.debug:
        print '+++ Debug mode on'

    if options.logBucket == None:
        print 'ERROR: Missing an AWS S3 bucket! (-b flag)'
        sys.exit(1)
    if options.logFile == None:
        print 'ERROR: Missing a local log file! (-l flag)'
        sys.exit(1)
    if options.state:
        import sqlite3
        try:
            state_tracker = sqlite3.connect(options.state)
            state_tracker.execute("select count(*) from log_progress")
        except sqlite3.OperationalError:
            state_tracker.execute(
                "create table log_progress  (log_name 'text' primary key, processed_date 'TEXT')"
            )

    if options.debug: print '+++ Connecting to Amazon S3'
    s3 = boto.connect_s3()
    c = s3.get_bucket(options.logBucket)
    try:
        c = s3.get_bucket(options.logBucket)
    except boto.exception.S3ResponseError as e:
        print "Bucket %s access error: %s" % (options.logBucket, e)
        sys.exit(3)
    for f in c.list():
        newFile = os.path.basename(str(f.key))
        if re.match('.+_CloudTrail-Digest_.+', newFile):
            if options.debug: print "Skipping digest file: %s" % newFile
            continue
        if newFile != "":
            if already_processed(newFile, state_tracker):
                if options.debug:
                    print "Skipping previously seen file {file}".format(
                        file=newFile)
                continue
            if options.debug:
                print "+++ Found new log: ", newFile
            f.get_contents_to_filename(newFile)
            data = gzip.open(newFile, 'rb')
            try:
                log = open(options.logFile, 'ab')
            except IOError as e:
                print "ERROR: Cannot open %s (%s)" % (options.logFile,
                                                      e.strerror)
                sys.exit(1)

            if options.dumpJson == None:
                log.write(data.read())
                log.write("\n")
            else:
                j = json.load(data)
                if "Records" not in j:
                    continue
                for json_event in j["Records"]:
                    new_dict = {}
                    for key in json_event:
                        if json_event[key]:
                            new_dict[key] = json_event[key]
                    new_dict['log_file'] = newFile
                    aws_log = {'aws': new_dict}
                    # Copy 'aws.sourceIPAddress' and 'aws.userIdentity.userName' to standard fields 'srcip' and 'user' so 'srcip' can be used in Wazuh GeoIP lookups and <same_user /> and <same_source_ip /> can be used in composite rules.
                    if 'sourceIPAddress' in aws_log["aws"]:
                        aws_log["srcip"] = aws_log["aws"]["sourceIPAddress"]
                    if 'userIdentity' in aws_log[
                            "aws"] and 'userName' in aws_log["aws"][
                                "userIdentity"]:
                        aws_log["user"] = aws_log["aws"]["userIdentity"][
                            "userName"]
                    log.write("{0}\n".format(json.dumps(aws_log)))
            log.close()

            try:
                os.remove(newFile)
            except IOError as e:
                print "ERROR: Cannot delete %s (%s)" % (newFile, e.strerror)
            if options.deleteFile:
                c.delete_key(f.key)
            mark_complete(newFile, state_tracker)
Beispiel #35
0
def test_delete_missing_key():
    conn = boto.connect_s3('the_key', 'the_secret')
    bucket = conn.create_bucket('foobar')

    deleted_key = bucket.delete_key("foobar")
    deleted_key.key.should.equal("foobar")
def becomeJester():
    dirname = os.getcwd()

    f = open(f"{dirname}/linodeCreds.txt")
    access_key = f.readline()
    secret_key = f.readline()
    f.close()

    access_key = access_key.strip()
    secret_key = secret_key.strip()

    conn = boto.connect_s3(
            aws_access_key_id = access_key,
            aws_secret_access_key = secret_key,
            host = 'us-east-1.linodeobjects.com',
            #is_secure=False,               # uncomment if you are not using ssl
            calling_format = boto.s3.connection.OrdinaryCallingFormat(),
            )

    for bucket in conn.get_all_buckets():

        name = bucket.name,
        created = bucket.creation_date,
        print (f"{name}\t{created}")

    for key in bucket.list():
        #print(key.get_acl())
        name = key.name,
        size = key.size,
        modified = key.last_modified,
        print (f"{name}\t{size}\t{modified}")
        try:
            if not os.path.isfile(f'{dirname}/scavenger-bucket/{name[0]}'):
                try:
                    #create folder
                    if "/" in name[0]:
                        folderPath = name[0].split('/')
                        print("folderPath: ")
                        print(folderPath[0])
                        os.makedirs(f'{dirname}/scavenger-bucket/{folderPath[0]}')
                        #then add file
                        key.get_contents_to_filename(f'{dirname}/scavenger-bucket/{folderPath[0]}{folderPath[1]}')
                    else:
                        key.get_contents_to_filename(f'{dirname}/scavenger-bucket/{name[0]}')
                except:
                    pass
        except:
            pass


    #get most recent file
    list_of_files = glob.glob(f'{dirname}/scavenger-bucket/*') # * means all if need specific format then *.csv
    latest_file = max(list_of_files, key=os.path.getctime)
    #print (latest_file)

    j = open(latest_file)
    jesterCommand = j.readline()
    j.close()

    print(latest_file)
    print(jesterCommand)
    return(jesterCommand)
            p = Popen(dumpCommand, shell=True, stdout=PIPE)
            with gzip.open(mysqlfile, "wb") as f:
                f.writelines(p.stdout)

            exitcode = p.wait()
            print("exitcode = " + str(exitcode))

            currentTime = datetime.now()

            if 0 == exitcode:
                print(" backupcompleted at : " + str(currentTime) +
                      ' starting upload')

                #we have exception if upload fails
                try:
                    s3Conn = boto.connect_s3(os.environ['S3_ACCESSID'],
                                             os.environ['S3_ACCESSKEY'])
                    bucket = s3Conn.get_bucket(os.environ['S3_BUCKET'])
                    s3Key = Key(bucket)
                    s3Key.key = 'dbbackup/' + os.environ[
                        'BACKUP_NAME'] + '/' + fileNameOnly

                    print("uploading to :" + os.environ['S3_BUCKET'] + " : " +
                          s3Key.name)
                    s3Key.set_contents_from_filename(mysqlfile)

                    os.remove(mysqlfile)

                    print("back up sucess")
                    break

                except Exception, e:
Beispiel #38
0
#!/usr/bin/env python
import boto
from boto.s3.key import Key

OrdinaryCallingFormat = boto.config.get(
    's3', 'calling_format', 'boto.s3.connection.OrdinaryCallingFormat')

s3 = boto.connect_s3(host='localhost',
                     port=10001,
                     calling_format=OrdinaryCallingFormat,
                     is_secure=False)
b = s3.create_bucket('mocking')

keys = b.get_all_keys(prefix='level')
print 'TEST 1'
for key in keys:
    print repr(key)

keys = b.get_all_keys(max_keys=2)
print 'TEST 2'
for key in keys:
    print repr(key)
Beispiel #39
0
import boto
keyId = "your_aws_access_key_id"
sKeyId = "your_aws_secret_key_id"
conn = boto.connect_s3(keyId, sKeyId)
srcBucket = conn.get_bucket('mybucket001')  #Source Bucket Object
dstBucket = conn.get_bucket('mybucket002')  #Destination Bucket Object
fileName = "abc.txt"
#Call the copy_key() from destination bucket
dstBucket.copy_key(fileName, srcBucket.name, fileName)
Beispiel #40
0
    def _get_bucket(self, bucket_name, validate=False):
     
        s3 = boto.connect_s3(g.S3KEY_ID or None, g.S3SECRET_KEY or None)
        bucket = s3.get_bucket(bucket_name, validate=validate)

        return bucket
def saveRecentResults(electionID, idList, timestamp):

    # check if file exists already

    if os.path.exists('recentResults.json'):

        print "Results file exists, updating"

        with open('recentResults.json', 'r') as recentResultsFile:

            # Convert the results to a list of datetime objects

            tempList = []
            recentResults = json.load(recentResultsFile)

            print "oldresults", recentResults

            for result in recentResults[electionID]:
                tempList.append(datetime.strptime(result, "%Y%m%d%H%M%S"))

            # Sort it

            tempList.sort(reverse=True)

            # Check if it's less than 20 and append the new timestamp

            if len(tempList) < 20:

                print "Less than twenty results, appending latest now"

                tempList.append(datetime.strptime(timestamp, "%Y%m%d%H%M%S"))
                tempList.sort(reverse=True)

                for i in xrange(0, len(tempList)):
                    tempList[i] = datetime.strftime(tempList[i],
                                                    '%Y%m%d%H%M%S')

                recentResults[electionID] = tempList

            # If it's 20, remove the oldest timestamp, then append the new one

            elif len(tempList) == 20:

                print "Twenty results, removing oldest and appending newest"

                del tempList[-1]

                tempList.append(datetime.strptime(timestamp, "%Y%m%d%H%M%S"))
                tempList.sort(reverse=True)

                for i in xrange(0, len(tempList)):
                    tempList[i] = datetime.strftime(tempList[i],
                                                    '%Y%m%d%H%M%S')

                recentResults[electionID] = tempList

        # Write the new version

        print "newresults", recentResults

        newJson = json.dumps(recentResults, indent=4)

        with open('recentResults.json', 'w') as fileOut:
            fileOut.write(newJson)

        print "Finished saving results log locally"

        print "Connecting to S3"
        conn = boto.connect_s3(AWS_KEY, AWS_SECRET)
        bucket = conn.get_bucket('gdn-cdn')

        from boto.s3.key import Key

        k = Key(bucket)
        k.key = "2018/07/aus-byelections/recentResults.json".format(
            timestamp=timestamp)
        k.set_metadata("Cache-Control", "max-age=180")
        k.set_metadata("Content-Type", "application/json")
        k.set_contents_from_string(newJson)
        k.set_acl("public-read")
        print "Done, JSON is updated"

    # Otherwise start a new file

    else:
        print "No results file, making one now"

        # electionIDs = ['22692','22693','22694','22695','22696']
        # testIDs = ['21364','21379']
        jsonObj = {}

        for id in idList:
            jsonObj[id] = []

        jsonObj[electionID].append(timestamp)

        newJson = json.dumps(jsonObj, indent=4)

        with open('recentResults.json', 'w') as fileOut:
            fileOut.write(newJson)

        print "Finished creating results log"

        print "Connecting to S3"
        conn = boto.connect_s3(AWS_KEY, AWS_SECRET)
        bucket = conn.get_bucket('gdn-cdn')

        from boto.s3.key import Key

        k = Key(bucket)
        k.key = "2018/07/aus-byelections/recentResults.json".format(
            timestamp=timestamp)
        k.set_metadata("Cache-Control", "max-age=90")
        k.set_metadata("Content-Type", "application/json")
        k.set_contents_from_string(newJson)
        k.set_acl("public-read")
        print "Done, JSON is updated"
Beispiel #42
0
def test_missing_key_urllib2():
    conn = boto.connect_s3('the_key', 'the_secret')
    conn.create_bucket("foobar")

    urlopen.when.called_with(
        "http://foobar.s3.amazonaws.com/the-key").should.throw(HTTPError)
Beispiel #43
0
 def get_bucket_size(self):
     try:
         s3conn = boto.connect_s3()
     except boto.exception.BotoServerError, e:
         print e
         sys.exit(1)
#!/usr/bin/env python3
import os
import boto
from flask import Flask
from config import ecs_test_drive

app = Flask(__name__)

#### Get ECS credentials from external config file
ecs_access_key_id = ecs_test_drive['ecs_access_key_id']
ecs_secret_key = ecs_test_drive['ecs_secret_key']
bucket_name = ecs_test_drive['bucket_name']

## Open a session with your ECS
session = boto.connect_s3(ecs_access_key_id,
                          ecs_secret_key,
                          host='object.ecstestdrive.com')
## Get hold of your bucket
b = session.get_bucket(bucket_name)
print("ECS connection is: " + str(session))
print("Bucket is: " + str(b))

print("Uploading photos ...")
## Create a list of filenames in "photos" to upload to ECS
for each_photo in os.listdir("photos"):
    print("Uploading " + str(each_photo))
    k = b.new_key(each_photo)
    src = os.path.join("photos", each_photo)
    k.set_contents_from_filename(src)
    k.set_acl('public-read')
Beispiel #45
0
# /bin/python

import boto
import boto.s3.connection
import glob
import ntpath
import socket
#import key
from key import *
listing = glob.glob('/var/log/ceph/*.log')

for files in listing:
    if "radosgw" in files:
        filename=ntpath.basename(files)+"-"+socket.gethostname()
    else:
        filename=ntpath.basename(files)
    print filename
    print files
    conn = boto.connect_s3(aws_access_key_id = access_key,aws_secret_access_key = secret_key,port=port_number, debug=2,host = hostname,          is_secure=False, calling_format = boto.s3.connection.OrdinaryCallingFormat(),)
    bucket = conn.create_bucket('log_bucket')
    key = bucket.new_key(filename);
    key.set_contents_from_filename(files);


Beispiel #46
0
def run(args):

    utils.pessimistic_connection_handling()

    # Setting up logging
    log = os.path.expanduser(configuration.get('core', 'BASE_LOG_FOLDER'))
    directory = log + "/{args.dag_id}/{args.task_id}".format(args=args)
    if not os.path.exists(directory):
        os.makedirs(directory)
    args.execution_date = dateutil.parser.parse(args.execution_date)
    iso = args.execution_date.isoformat()
    filename = "{directory}/{iso}".format(**locals())

    # store old log (to help with S3 appends)
    if os.path.exists(filename):
        with open(filename, 'r') as logfile:
            old_log = logfile.read()
    else:
        old_log = None

    subdir = process_subdir(args.subdir)
    logging.root.handlers = []
    logging.basicConfig(filename=filename,
                        level=settings.LOGGING_LEVEL,
                        format=settings.LOG_FORMAT)

    if not args.pickle:
        dagbag = DagBag(subdir)
        if args.dag_id not in dagbag.dags:
            msg = 'DAG [{0}] could not be found in {1}'.format(
                args.dag_id, subdir)
            logging.error(msg)
            raise AirflowException(msg)
        dag = dagbag.dags[args.dag_id]
        task = dag.get_task(task_id=args.task_id)
    else:
        session = settings.Session()
        logging.info('Loading pickle id {args.pickle}'.format(**locals()))
        dag_pickle = session.query(DagPickle).filter(
            DagPickle.id == args.pickle).first()
        if not dag_pickle:
            raise AirflowException("Who hid the pickle!? [missing pickle]")
        dag = dag_pickle.pickle
        task = dag.get_task(task_id=args.task_id)

    task_start_date = None
    if args.task_start_date:
        task_start_date = dateutil.parser.parse(args.task_start_date)
        task.start_date = task_start_date
    ti = TaskInstance(task, args.execution_date)

    if args.local:
        print("Logging into: " + filename)
        run_job = jobs.LocalTaskJob(
            task_instance=ti,
            mark_success=args.mark_success,
            force=args.force,
            pickle_id=args.pickle,
            task_start_date=task_start_date,
            ignore_dependencies=args.ignore_dependencies,
            pool=args.pool)
        run_job.run()
    elif args.raw:
        ti.run(
            mark_success=args.mark_success,
            force=args.force,
            ignore_dependencies=args.ignore_dependencies,
            job_id=args.job_id,
            pool=args.pool,
        )
    else:
        pickle_id = None
        if args.ship_dag:
            try:
                # Running remotely, so pickling the DAG
                session = settings.Session()
                pickle = DagPickle(dag)
                session.add(pickle)
                session.commit()
                pickle_id = pickle.id
                print(('Pickled dag {dag} '
                       'as pickle_id:{pickle_id}').format(**locals()))
            except Exception as e:
                print('Could not pickle the DAG')
                print(e)
                raise e

        executor = DEFAULT_EXECUTOR
        executor.start()
        print("Sending to executor.")
        executor.queue_task_instance(
            ti,
            mark_success=args.mark_success,
            pickle_id=pickle_id,
            ignore_dependencies=args.ignore_dependencies,
            force=args.force)
        executor.heartbeat()
        executor.end()

    if configuration.get('core', 'S3_LOG_FOLDER').startswith('s3:'):
        import boto
        s3_log = filename.replace(log,
                                  configuration.get('core', 'S3_LOG_FOLDER'))
        bucket, key = s3_log.lstrip('s3:/').split('/', 1)
        if os.path.exists(filename):

            # get logs
            with open(filename, 'r') as logfile:
                new_log = logfile.read()

            # remove old logs (since they are already in S3)
            if old_log:
                new_log.replace(old_log, '')

            try:
                s3 = boto.connect_s3()
                s3_key = boto.s3.key.Key(s3.get_bucket(bucket), key)

                # append new logs to old S3 logs, if available
                if s3_key.exists():
                    old_s3_log = s3_key.get_contents_as_string().decode()
                    new_log = old_s3_log + '\n' + new_log

                # send log to S3
                encrypt = configuration.get('core', 'ENCRYPT_S3_LOGS')
                s3_key.set_contents_from_string(new_log, encrypt_key=encrypt)
            except:
                print('Could not send logs to S3.')
def get_conn(accesskey, secretkey):
    if accesskey and secretkey:
        return boto.connect_s3(accesskey, secretkey)
    else:
        return boto.connect_s3()
Beispiel #48
0
#!/usr/bin/python
#
# TimeLapse Downloader
#

import time
import boto
import subprocess
from boto.s3.key import Key

#get access to S3
conn = boto.connect_s3('access_key', 'secret_key')
#our bucket name
bucketname = 'plant-photos'
#get access to bucket
bucket = conn.get_bucket(bucketname)
list = bucket.list()
i = 0
for key in list:
    fname = 'images/' + str(i).zfill(6) + '.jpg'
    print fname, key.name
    ky = Key(bucket, key)
    ky.get_contents_to_filename(fname)
    i = i + 1
Beispiel #49
0
def main():
    # parse options from the command line
    parser = argparse.ArgumentParser(
        prog='PROG',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
        -------------------------------------------------------------------------------------------------------------
        This is a deep neural network architecture for training sparse filters. Example uses:
            $ python test.py
            $ python test.py -m GroupSF -v 1 -g 3 -s 1
            $ python test.py -m ConvolutionalSF -d 16 1 8 8 -v 1 -w y -c y -f CIFAR_data.mat -i 100
            $ python test.py -m ConvolutionalSF ConvolutionalSF -d 16 1 6 6 16 16 4 4 -w y -c y -f CIFAR_data.mat
              -i 100 150 -t y -v 1
        -------------------------------------------------------------------------------------------------------------
        ''')
    )
    parser.add_argument("-m", "--model", default=['SparseFilter'], nargs='+', help="the model type")
    parser.add_argument("-c", "--convolution", default="n", help="convolution, yes or no")
    parser.add_argument("-f", "--filename", default="patches.mat", help="the data filename")
    parser.add_argument("-d", "--dimensions", type=int, nargs='+', default=([100, 256]),
                        help="the dimensions of the model: [neurons, input size] or [neurons, length, width]")
    parser.add_argument("-p", "--pool", type=int, nargs='+', default=None, help="pooling dimensions")
    parser.add_argument("-g", "--group", type=int, default=None, help="group size")
    parser.add_argument("-s", "--step", type=int, default=None, help="step size")
    parser.add_argument("-l", "--learn_rate", type=float, default=.001, help="learning rate")
    parser.add_argument("-i", "--iterations", type=int, nargs='+', default=[100], help="number of iterations")
    parser.add_argument("-v", "--verbosity", type=int, default=0, help="verbosity: 0 no plot; 1 plots")
    parser.add_argument("-o", "--opt", default="GD", help="optimization method: GD or L-BFGS")
    parser.add_argument("-w", "--whitening", default='n', help="whitening: 'y' or 'n'")
    parser.add_argument("-t", "--test", default='n', help="test classification performance: 'y' or 'n'")
    parser.add_argument("-a", "--channels", type=int, default=1, help="number of channels in data")
    parser.add_argument("-e", "--examples", type=int, default=None, help="number of training examples")
    parser.add_argument("-b", "--batch_size", type=int, default=1000, help="number of examples in [mini]batch")
    parser.add_argument("-z", "--aws", default='n', help="run on aws: 'y' or 'n'")
    parser.add_argument("-r", "--random", default='n', help="type of batches: random = 'y'")
    args = parser.parse_args()
    args.dimensions = parse_dims(args)
    args.iterations = parse_iter(args)

    ''' =================================== Load in the data =================================== '''

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", args.filename)
    data = loadmat(file_path)['X']

    # reshape and preprocess data
    print "pre-processing data ..."
    video = None
    if args.filename == 'patches_video.mat':
        video = data
        data = data.reshape(data.shape[0] * data.shape[1], data.shape[2]).T

    if args.convolution == 'n':
        if args.whitening == 'y':
            data -= data.mean(axis=0)
            data = whiten(data.T).T
        elif args.whitening == 'n' and args.channels == 1:
            data -= data.mean(axis=0)
        # elif args.whitening == 'n' and args.channels == 3:
        # data = np.float32(data)
        data = np.float32(data.T)

    elif args.convolution == 'y':

        if args.filename == 'kyotoData.mat':
            data = np.float32(data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1]))))
            data = scaling.LCNinput(data, kernel_shape=9)

        elif args.filename == 'CIFAR_data.mat':
            data = np.float32(data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1]))))
            data = scaling.LCNinput(data, kernel_shape=5)
            data = data[0:args.examples, :, :, :]

        elif args.filename == 'STL_10.mat' or args.filename == 'Lenna.mat':
            data = np.float32(data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3))))
            data = data[0:args.examples, :, :, :]
            args.channels = data.shape[1]
            for channel in range(args.channels):
                data[:, channel, :, :] = np.reshape(scaling.LCNinput(data[:, channel, :, :].
                                                                     reshape((data.shape[0], 1,
                                                                              data.shape[2],
                                                                              data.shape[3])),
                                                                     kernel_shape=9), (
                                                    data.shape[0],
                                                    data.shape[2],
                                                    data.shape[3]))

    # assert that batch size is valid and get number of batches
    n_batches, rem = divmod(data.shape[0], args.batch_size)
    assert rem == 0

    # other assertions
    assert len(args.model) == len(args.iterations)
    if args.model[0] == 'GroupSF' or args.model[0] == 'GroupConvolutionalSF':
        assert args.group is not None
        assert args.step is not None

    # assert that the number of neurons in each layer is a perfect square
    for layer in xrange(len(args.dimensions)):
        assert np.sqrt(args.dimensions[layer][0]) % np.floor(np.sqrt(args.dimensions[layer][0])) == 0

    ''' ============================= Build and train the network ============================= '''

    # construct the network
    print "building model..."
    model = sf.Network(
        model_type=args.model, weight_dims=args.dimensions, p=args.pool, group_size=args.group,
        step=args.step, lr=args.learn_rate, opt=args.opt, c=args.convolution, test=args.test,
        batch_size=args.batch_size, random=args.random, weights=None
    )  # TODO: custom learning rates for each layer

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        if args.opt == 'GD':
            for epoch in xrange(args.iterations[l]):

                # go though [mini]batches
                for batch_index in xrange(n_batches):

                    c, w = train[l](index=batch_index)
                    cost_layer.append(c)
                    print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c))

        elif args.opt == 'L-BFGS':
            w = minimize(train[l], model.layers[l].w.eval().flatten(),
                         method='L-BFGS-B', jac=True,
                         options={'maxiter': args.iterations[l], 'disp': True})

            if args.convolution == 'n':
                w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1])
            elif args.convolution == 'y':
                w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1],
                                args.dimensions[0][2], args.dimensions[0][3])

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # calculate and display elapsed training time        
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # create sub-folder for saved model
    if args.aws == 'n':
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)
    elif args.aws == 'y':
        import boto
        from boto.s3.key import Key
        s3 = boto.connect_s3()
        my_bucket = 'dlacombejr.bucket'
        bucket = s3.get_bucket(my_bucket)
        k = Key(bucket)
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)

    # save the model for later use
    full_path = directory_name + '/model.pkl'
    pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)
    if args.aws == 'y':
        k.key = full_path
        k.set_contents_from_filename(full_path)
        os.remove(full_path)

    # save weights separately
    savemat(directory_name + '/weights.mat', weights)
    if args.aws == 'y':
        k.key = directory_name + '/weights.mat'
        k.set_contents_from_filename(directory_name + '/weights.mat')
        os.remove(directory_name + '/weights.mat')

    # save the cost functions
    savemat(directory_name + '/cost.mat', cost)
    if args.aws == 'y':
        k.key = directory_name + '/cost.mat'
        k.set_contents_from_filename(directory_name + '/cost.mat')
        os.remove(directory_name + '/cost.mat')

    # create log file
    log_file = open(directory_name + "/log.txt", "wb")  # todo: create log file by looping through args
    # for arg in args:
    #     log_file.write(
    #         args.
    #     )
    for m in range(len(args.model)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m,
                                                                                    args.model[m],
                                                                                    args.dimensions[m],
                                                                                    args.iterations[m])
        )
        if args.model == 'GroupSF' or args.model == 'GroupConvolutionalSF':
            log_file.write(
                " Groups: %d \n Step: %d" % (args.group, args.step)
            )
        ex = data.shape[0]
        if args.examples is not None:
            ex = args.examples

    log_file.write(
        " Data-set: %s \n Examples: %6d \n Whitened: %s" % (args.filename, ex, args.whitening)
    )
    log_file.write('\nElapsed training time: %f' % elapsed)
    log_file.close()
    if args.aws == 'y':
        k.key = directory_name + "/log.txt"
        k.set_contents_from_filename(directory_name + "/log.txt")
        os.remove(directory_name + "/log.txt")

    ''' =============================== Verbosity Options ===================================== '''

    # get variables and saves
    if args.verbosity >= 1:

        # # get variables of interest
        # activations_norm = {}
        # activations_raw = {}
        # activations_shuffled = {}
        # reconstruction = {}
        # error_recon = {}
        # pooled = {}

        # for l in xrange(len(args.dimensions)):

            # activations_norm['layer' + str(l)] = {}
            # activations_raw['layer' + str(l)] = {}
            # activations_shuffled['layer' + str(l)] = {}
            # reconstruction['layer' + str(l)] = {}
            # error_recon['layer' + str(l)] = {}
            # pooled['layer' + str(l)] = {}

        for batch in xrange(n_batches):

            # get variables of interest
            activations_norm = {}
            activations_raw = {}
            activations_shuffled = {}
            reconstruction = {}
            error_recon = {}
            pooled = {}

            # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
            begin = batch * args.batch_size
            end = begin + args.batch_size
            f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data[begin:end])

            # activations_norm['layer' + str(l)]['batch' + str(batch)] = f_hat
            # activations_raw['layer' + str(l)]['batch' + str(batch)] = f
            # activations_shuffled['layer' + str(l)]['batch' + str(batch)] = f_hat_shuffled
            # reconstruction['layer' + str(l)]['batch' + str(batch)] = err
            # error_recon['layer' + str(l)]['batch' + str(batch)] = rec
            # pooled['layer' + str(l)]['batch' + str(batch)] = p

            # define [mini]batch title
            batch_title = 'layer' + str(l) + '_batch' + '%03d' % batch

            # define norm and raw file names
            norm_file_name = directory_name + '/activations_norm_' + batch_title + '.mat'
            raw_file_name = directory_name + '/activation_raw_' + batch_title + '.mat'

            activations_norm[batch_title] = f_hat
            activations_raw[batch_title] = f
            activations_shuffled[batch_title] = f_hat_shuffled
            reconstruction[batch_title] = err
            error_recon[batch_title] = rec
            pooled[batch_title] = p

            # save model as well as weights and activations separately
            savemat(norm_file_name, activations_norm)
            # savemat(raw_file_name, activations_raw)

            if args.aws == 'y':

                k.key = norm_file_name
                k.set_contents_from_filename(norm_file_name)
                os.remove(norm_file_name)

                # k.key = raw_file_name
                # k.set_contents_from_filename(raw_file_name)
                # os.remove(raw_file_name)

        # savemat(directory_name + '/weights.mat', weights)
        # if args.aws == 'y':
        #     k.key = directory_name + '/weights.mat'
        #     k.set_contents_from_filename(directory_name + '/weights.mat')
        #     os.remove(directory_name + '/weights.mat')

        #     # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
        #     f_hat, rec, err, f_hat_shuffled, f, p = outputs[l](data[0:args.batch_size])
        #
        #     activations_norm['layer' + str(l)] = f_hat
        #     activations_raw['layer' + str(l)] = f
        #     activations_shuffled['layer' + str(l)] = f_hat_shuffled
        #     reconstruction['layer' + str(l)] = err
        #     error_recon['layer' + str(l)] = rec
        #     pooled['layer' + str(l)] = p
        #
        # # save model as well as weights and activations separately
        # savemat(directory_name + '/weights.mat', weights)
        # savemat(directory_name + '/activations_norm.mat', activations_norm)
        # savemat(directory_name + '/activation_raw.mat', activations_raw)

    # output helper file for concatenating activations
    helper = {'batches': n_batches, 'output_size': f_hat.shape}
    helper_file_name = directory_name + '/helper.mat'
    savemat(helper_file_name, helper)
    if args.aws == 'y':
        k.key = helper_file_name
        k.set_contents_from_filename(helper_file_name)
        os.remove(helper_file_name)

    # get data if not on AWS
    if args.aws == 'n':
        f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data)
        activations_norm = {"layer0": f_hat}

    # display figures
    if args.verbosity == 2:

        # if GD, plot the cost function over time
        if args.opt == 'GD':
            visualize.plotCost(cost)

        # visualize the receptive fields of the first layer
        visualize.drawplots(weights['layer0'].T, color='gray', convolution=args.convolution,
                            pad=0, examples=None, channels=args.channels)

        # visualize the distribution of lifetime and population sparseness
        for l in xrange(len(args.dimensions)):
            layer = 'layer' + str(l)
            if args.convolution == 'n':
                visualize.dispSparseHist(activations_norm[layer], l)
            elif args.convolution == 'y':
                visualize.dispSparseHist(activations_shuffled[layer].reshape(args.dimensions[l][0],
                                                                             data.shape[0] *
                                                                             activations_shuffled[layer].shape[2] *
                                                                             activations_shuffled[layer].shape[3]),
                                         layer=l)

        # visualize the distribution of activity across the "cortical sheet" and reconstruction
        if args.filename == 'patches_video.mat':
            f_hat = activations_norm['layer0'].T.reshape(video.shape[0], video.shape[1], args.dimensions[0][0])
            visualize.videoCortex(f_hat[0:100, :, :], 'y', args.convolution, 1)
        else:
            visualize.drawplots(activations_norm['layer0'], color='gray', convolution=args.convolution,
                                pad=1, examples=100)

        # # visualize reconstruction capabilities
        # if args.convolution == 'n':
        #     visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', args.convolution, 1)
        # elif args.convolution == 'y':
        #     visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'],
        #                                            color='gray', convolution=args.convolution)
        # print('Reconstructed error: %e' % reconstruction['layer0'])

        # additional visualizations for convolutional network
        if args.convolution == 'y':

            dim = activations_raw['layer0'].shape[2]

            # visualize an example of a convolved image
            visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim)
            # print activations_raw['layer0']

            # visualize max-pooled activations and LCN output
            visualize.visualize_convolved_image(pooled['layer0'][0, :, :, :].reshape(1,
                                                                                     pooled['layer0'].shape[1],
                                                                                     pooled['layer0'].shape[2],
                                                                                     pooled['layer0'].shape[3]),
                                                dim=dim / 2)

            # visualize an example of a LCNed convolved image after max pooling
            # temp = activations_raw['layer0']    #[0, :, :, :]
            temp = pooled['layer0']    #[0, :, :, :]
            # print temp.shape
            for i in range(temp.shape[1]):
                temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape((1, 1, dim / 2, dim / 2)), kernel_shape=5)
            # temp = scaling.LCNinput(temp, kernel_shape=5)
            visualize.visualize_convolved_image(temp, dim=dim / 2)
            # print temp

    ''' ================================ Test the Model ======================================= '''

    # test the model if evaluating classification performance
    if args.test == 'y':

        from sklearn import svm
        from sklearn.metrics import confusion_matrix

        train_labels = loadmat(file_path)['y']

        file_path = os.path.join(base_path, "data", "CIFAR_test.mat")
        test_data = loadmat(file_path)['X']
        test_labels = loadmat(file_path)['y']

        # reshape and normalize the data
        if args.convolution == 'y':
            test_data = np.float32(test_data.reshape(-1, 1, int(np.sqrt(test_data.shape[1])),
                                                     int(np.sqrt(test_data.shape[1]))))
            test_data = scaling.LCNinput(test_data, kernel_shape=5)
            test_data = test_data[0:args.examples, :, :, :]

        # get SVM test results for pixels to last layer
        train_input = None
        for layer in range(model.n_layers + 1):

            # pixel inputs
            if layer == 0:

                test_input = test_data.reshape(test_data.shape[0], test_data.shape[1] *
                                               test_data.shape[2] * test_data.shape[3])

                train_input = data.reshape(data.shape[0], data.shape[1] *
                                           data.shape[2] * data.shape[3])

            # hidden layers
            elif layer > 0:

                # get the output of the current layer in the model given the training / test data and then reshape
                # TODO: use raw output as training and testing data?
                test_input = test[layer - 1](test_data[0:args.batch_size])
                test_input = test_input[0].reshape(test_input[0].shape[0], test_input[0].shape[1] *
                                                   test_input[0].shape[2] * test_input[0].shape[3])

                train_input = activations_norm['layer' + str(layer - 1)]
                train_input = train_input.reshape(train_input.shape[0], train_input.shape[1] *
                                                  train_input.shape[2] * train_input.shape[3])

            # train linear support vector machine
            clf = svm.SVC(kernel="linear").fit(train_input, np.ravel(train_labels[0:args.examples]))

            # get predictions from SVM and calculate accuracy
            predictions = clf.predict(test_input)
            accuracy = clf.score(test_input, test_labels[0:args.examples])

            # display results and log them
            print("Accuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy))
            cm = confusion_matrix(test_labels[0:args.examples], predictions)
            log_file = open(directory_name + "/log.txt", "a")
            log_file.write(
                "\nAccuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy)
            )
            log_file.close()

    # visualize the confusion matrix
    if args.test == 'y' and args.verbosity == 2:

        import pylab as pl

        pl.imshow(cm, interpolation='nearest')
        pl.title('Confusion Matrix for Network')
        pl.colorbar()
        pl.ylabel('True Label')
        pl.xlabel('Predicted Label')
        pl.show()
Beispiel #50
0
    def test_s3_treestore(self):
        # Create an s3 backed treestore
        # Requires these environment variables set
        #
        #   AWS_ACCESS_KEY_ID
        #   AWS_SECRET_ACCESS_KEY
        #   S3TS_BUCKET
        #
        # NB: **this will only work if the bucket is empty

        s3c = boto.connect_s3()
        bucket = s3c.get_bucket(os.environ['S3TS_BUCKET'])

        with EmptyS3Bucket(bucket):
            fileStore = S3FileStore(bucket)
            localCache = LocalFileStore(
                makeEmptyDir(os.path.join(self.workdir, 'cache')))
            treestore = TreeStore.create(fileStore, localCache,
                                         TreeStoreConfig(100, True))

            # Upload it as a tree
            creationTime = datetimeFromIso('2015-01-01T00:00:00.0')
            treestore.upload('v1.0', '', creationTime, self.srcTree,
                             CaptureUploadProgress())
            pkg = treestore.findPackage('v1.0')

            # Confirm it's in the index
            self.assertEquals(treestore.listPackages(), ['v1.0'])

            # Verify it
            treestore.verify(pkg)

            # Download it, checking we get expected progress callbacks
            cb = CaptureDownloadProgress()
            treestore.download(pkg, cb)
            self.assertEquals(sorted(cb.recorded), [30, 45, 47, 100, 100])

            # Verify it locally
            treestore.verifyLocal(pkg)

            # Install it
            destTree = os.path.join(self.workdir, 'dest-1')
            treestore.install(pkg, destTree, CaptureInstallProgress())

            # Check that the installed tree is the same as the source tree
            self.assertEquals(
                subprocess.call('diff -r -x {0} {1} {2}'.format(
                    S3TS_PROPERTIES, self.srcTree, destTree),
                                shell=True), 0)
            self.assertEquals(readInstallProperties(destTree).treeName, 'v1.0')

            # Use the compareInstall function to confirm the installed package is ok, and
            # then check that modifying the files show up in the comparison
            result = treestore.compareInstall(pkg, destTree)
            self.assertEquals(len(result.missing), 0)
            self.assertEquals(len(result.extra), 0)
            self.assertEquals(len(result.diffs), 0)

            with open(os.path.join(destTree, "code/file1.py"), "w") as f:
                f.write("x")
            with open(os.path.join(destTree, "code/file3.py"), "w") as f:
                f.write("y")
            os.unlink(os.path.join(destTree, 'assets/car-01.db'))

            result = treestore.compareInstall(pkg, destTree)
            self.assertEquals(result.missing, set(['assets/car-01.db']))
            self.assertEquals(result.extra, set(['code/file3.py']))
            self.assertEquals(result.diffs, set(['code/file1.py']))

            # Reinstall to fix directory content
            shutil.rmtree(destTree)
            treestore.install(pkg, destTree, CaptureInstallProgress())
            result = treestore.compareInstall(pkg, destTree)
            self.assertEquals(len(result.missing), 0)
            self.assertEquals(len(result.extra), 0)
            self.assertEquals(len(result.diffs), 0)

            # Now create a pre-signed version of the package
            pkg = treestore.findPackage('v1.0')
            treestore.addUrls(pkg, 3600)
            self.assertEquals(len(result.missing), 0)
            self.assertEquals(len(result.extra), 0)
            self.assertEquals(len(result.diffs), 0)

            # And download it directly via http. Create a new local cache
            # to ensure that we actually redownload each chunk
            localCache = LocalFileStore(
                makeEmptyDir(os.path.join(self.workdir, 'cache')))
            treestore2 = TreeStore.forHttpOnly(localCache)
            cb = CaptureDownloadProgress()
            treestore2.downloadHttp(pkg, cb)
            self.assertEquals(sorted(cb.recorded), [30, 45, 47, 100, 100])

            # Install it
            destTree2 = os.path.join(self.workdir, 'dest-2')
            treestore2.install(pkg, destTree2, CaptureInstallProgress())

            # Check that the new installed tree is the same as the source tree
            self.assertEquals(
                subprocess.call('diff -r -x {0} {1} {2}'.format(
                    S3TS_PROPERTIES, self.srcTree, destTree2),
                                shell=True), 0)

            # Rename the tree, and check that installing that is the same
            treestore.rename('v1.0', 'v1.0x')
            pkg = treestore.findPackage('v1.0x')
            treestore.download(pkg, CaptureDownloadProgress())
            destTree = os.path.join(self.workdir, 'dest-3')
            treestore.install(pkg, destTree, CaptureInstallProgress())
            self.assertEquals(
                subprocess.call('diff -r -x {0} {1} {2}'.format(
                    S3TS_PROPERTIES, self.srcTree, destTree),
                                shell=True), 0)

            # Remove the tree
            treestore.remove('v1.0x')
Beispiel #51
0
def test_bucket_name_with_dot():
    conn = boto.connect_s3()
    bucket = conn.create_bucket('firstname.lastname')

    k = Key(bucket, 'somekey')
    k.set_contents_from_string('somedata')
Beispiel #52
0
# how to capture variables passed from the command line in Python?

import boto
import boto.s3.connection

access_key = 'your_access_key'
secret_key = 'your_secret_key'

conn = boto.connect_s3(
        aws_access_key_id = access_key,
        aws_secret_access_key = 'your_access_key'
        host = 'objects.dreamhost.com',
        calling_format = boto.s3.connection.OrdinaryCallingFormat(),
        )

newbucket = conn.create_bucket('the-ipanemas')

for bucket in conn.get_all_buckets():
  print "{name}\t{created}".format(
          name = bucket.name,
          created = bucket.creation_date,
          )
Beispiel #53
0
#! /usr/bin/env python

import os, sys, boto
from boto.s3.key import Key

if len(sys.argv) != 3:
    print "Usage : python uploader.py <bucket name> <file absolute path>"
    print "Usage : Enter 2 arguments in command line!!"
    exit()

# kwangje.park
AWS_ACCESS_KEY = "key here"
AWS_SECRET = "secret here"

s3Connection = boto.connect_s3(AWS_ACCESS_KEY, AWS_SECRET)

try:
    bucket = s3Connection.get_bucket(sys.argv[1])
    uploader = Key(bucket)
except:
    print "ERROR >> Connecting to bucket was failed !!!"
    exit()

uploadFile = sys.argv[2]

try:
    os.stat(uploadFile)
except:
    print "ERROR >> File is not existed in " + uploadFile
    exit()
Beispiel #54
0
def deploy_static(app_name, env_name, domain, force):
    app = App(env_name, app_name)
    bucket_name = domain or '{}-{}'.format(
        config.get('system_name',
                   uuid.uuid1().hex), app.repo.name)

    app.repo.fetch()

    version = app.repo.head_commit_id()

    s3 = boto.connect_s3()
    b = s3.lookup(bucket_name)

    if b is not None:
        version_key = b.get_key('__VERSION__')
        if version_key is not None:
            current_version = version_key.get_metadata('git-version')
            if version == current_version:
                if force:
                    print '-----> Version {} already deployed, but re-deploying anyway'.format(
                        version)
                else:
                    print '-----> Version {} already deployed!'.format(version)
                    return

    with lcd(app.repo.path):
        build_cmd = app.config.get('build_script')
        if build_cmd:
            print '-----> Building'
            local(build_cmd)

    if b is None:
        print '-----> Creating bucket {}'.format(bucket_name)
        b = s3.create_bucket(bucket_name)

    # TODO: this policy allows all users read access to all objects.
    # Need to find a way to limit access to __VERSION__ to only authenticated
    # users.
    public_access_policy = json.dumps({
        "Version":
        "2012-10-17",
        "Statement": [{
            "Sid": "PublicReadForGetBucketObjects",
            "Effect": "Allow",
            "Principal": "*",
            "Action": ["s3:GetObject"],
            "Resource": ["arn:aws:s3:::{}/*".format(bucket_name)]
        }]
    })
    b.set_policy(public_access_policy)
    #b.configure_versioning(versioning=False)
    b.configure_website(suffix="index.html", error_key="error.html")

    def map_key_to_obj(m, obj):
        if obj.key != '__VERSION__':
            m[obj.key] = obj
        return m

    existing_keys = reduce(map_key_to_obj, b.get_all_keys(), {})

    root = normpath(join(app.repo.path, app.config.get('root_dir', '')))

    app_redirects = app.config.get('redirects', {})
    for key_name in app_redirects.keys():
        existing_keys.pop(key_name, None)

    print '-----> Uploading {} to {} bucket'.format(root, bucket_name)
    new_keys = []
    updated_keys = []
    for dirname, dirnames, filenames in walk(root):
        reldirname = relpath(dirname, root)
        reldirname = '' if reldirname == '.' else reldirname
        if os.path.commonprefix(['.git', reldirname]) == '.git':
            continue
        for filename in filenames:
            full_filename = join(reldirname, filename)
            if full_filename == '.s3':
                continue
            new_or_update = '        '
            if existing_keys.has_key(full_filename):
                new_or_update = '[UPDATE]'
                updated_keys.append(full_filename)
                key = existing_keys.pop(full_filename)
            else:
                new_or_update = '[NEW]   '
                new_keys.append(full_filename)
                key = b.new_key(full_filename)
            print '       {} Uploading {}'.format(new_or_update, full_filename)
            key.set_contents_from_filename(join(dirname, filename))
    if len(existing_keys) > 0:
        print '-----> WARNING: the following files are still present but no'
        print '       longer part of the website:'
        for k, v in existing_keys.iteritems():
            print '       {}'.format(k)

    print '-----> Tagging bucket with git version {}'.format(version)
    version_key = b.get_key('__VERSION__')
    if version_key:
        version_key.delete()
    version_key = b.new_key('__VERSION__')
    version_key.set_metadata('git-version', version)
    version_key.set_contents_from_string('')

    print '-----> Setting up redirects'
    app_redirects = app.config.get('redirects', {})
    if len(app_redirects) == 0:
        print '       No redirects.'
    else:

        def get_or_new_key(bucket, name):
            key = bucket.get_key(name)
            if key is not None:
                key.delete()
            return bucket.new_key(name)

        elb = boto.connect_elb()
        pybars_compiler = pybars.Compiler()
        for key_name, redirect_source in app_redirects.iteritems():
            redirect_template = pybars_compiler.compile(redirect_source)
            app_redirects[key_name] = redirect_template
        data = {
            'webui_dns':
            elb.get_all_load_balancers(
                load_balancer_names=['{}-web-ui'.format(env_name)])[0].dns_name
        }
        for key_name, redirect_template in app_redirects.iteritems():
            k = get_or_new_key(b, key_name)
            redirect = unicode(redirect_template(data))
            print '       Redirect {} to {}'.format(key_name, redirect)
            k.set_redirect(redirect)

    print '=====> Deployed to {}!'.format(b.get_website_endpoint())

    if domain is not None:

        # TODO: support redirection from www.<domain>
        # b_www = 'www.{}'.format(bucket_name)

        ec2 = boto.connect_ec2()
        region_name = first([
            z.region.name for z in ec2.get_all_zones()
            if z.name == config['availability_zone']
        ])
        s3_website_region = s3_website_regions[region_name]

        route53 = boto.connect_route53()
        zone_name = "{}.".format(get_tld("http://{}".format(domain)))
        zone = route53.get_zone(zone_name)
        if zone is None:
            raise Exception("Cannot find zone {}".format(zone_name))
        full_domain = "{}.".format(domain)
        a_record = zone.get_a(full_domain)
        if not a_record:
            print '-----> Creating ALIAS for {} to S3'.format(full_domain)
            changes = ResourceRecordSets(route53, zone.id)
            change_a = changes.add_change('CREATE', full_domain, 'A')
            change_a.set_alias(alias_hosted_zone_id=s3_website_region[1],
                               alias_dns_name=s3_website_region[0])
            #change_cname = records.add_change('CREATE', 'www.' + full_domain, 'CNAME')
            #change_cname.add_value(b_www.get_website_endpoint())
            changes.commit()
        else:
            print '-----> ALIAS for {} to S3 already exists'.format(
                full_domain)
            print '       {}'.format(a_record)
            if a_record.alias_dns_name != s3_website_region[0]:
                print '       WARNING: Alias DNS name is {}, but should be {}'.format(
                    a_record.alias_dns_name, s3_website_region[0])
            if a_record.alias_hosted_zone_id != s3_website_region[1]:
                print '       WARNING: Alias hosted zone ID is {}, but should be {}'.format(
                    a_record.alias_hosted_zone_id, s3_website_region[1])
            if a_record.name != full_domain:
                print '       WARNING: Domain is {}, but should be {}'.format(
                    a_record.name, full_domain)
            if a_record.type != 'A':
                print '       WARNING: Record type is {}, but should be {}'.format(
                    a_record.type, 'A')

    print '=====> DONE!'
Beispiel #55
0
def test_bucket_with_dash():
    conn = boto.connect_s3('the_key', 'the_secret')
    conn.get_bucket.when.called_with(
        'mybucket-test').should.throw(S3ResponseError)
Beispiel #56
0
 def connect_method(self, *args, **kwargs):
     return boto.connect_s3(*args, **kwargs)
    start_time = time.time()
    assert opt.mysql_query_file, 'Input query file (-q,--mysql_query_file) is not set.'
    assert os.path.isfile(
        opt.mysql_query_file
    ), 'Query file "%s"\ndoes not exists.' % opt.mysql_query_file
    q_file = os.path.splitext(os.path.basename(opt.mysql_query_file))

    assert opt.red_to_table, 'Target Redshift table '
    assert opt.s3_bucket_name, 'Target S3 bucket name (-b,--s3_bucket_name) is not set.'
    assert RepresentsInt(opt.mysql_lame_duck
                         ), '[-l] --mysql_lame_duck is not of type "integer".'
    if not opt.s3_key_name:
        opt.s3_key_name = q_file[0]

    conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)

    try:
        bucket = conn.get_bucket(opt.s3_bucket_name)
        print('Uploading results of "%s" to existing bucket "%s"' %
              (''.join(q_file), opt.s3_bucket_name))
    except S3ResponseError as err:

        if str(err).strip().endswith('404 Not Found'):
            print('Creating new bucket "%s" in location "%s"' %
                  (opt.s3_bucket_name, opt.s3_location))
            try:
                conn.create_bucket(opt.s3_bucket_name,
                                   location=opt.s3_location)
                print(
                    'Uploading results of "%s" to new bucket "%s" in region "%s"'
Beispiel #58
0
 def get_s3_connection(self):
     if not self.s3:
         self.s3 = boto.connect_s3(self.aws_access_key_id,
                                   self.aws_secret_access_key)
     return self.s3
Beispiel #59
0
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

import boto
import sys
import threading
import multiprocessing
import Queue

c = boto.connect_s3()


def uploadThread(mpUpload, fileQueue):
    while True:
        try:
            ix, fileName = fileQueue.get(False)
            with open(fileName, 'rb') as fp:
                mpUpload.upload_part_from_file(fp, ix)
                print 'finished upload of %s' % fileName
        except Queue.Empty:
            return


if __name__ == "__main__":
    if len(sys.argv) < 4:
Beispiel #60
0
def test_missing_key():
    conn = boto.connect_s3('the_key', 'the_secret')
    bucket = conn.create_bucket("foobar")
    bucket.get_key("the-key").should.equal(None)