def new_data_to_publish(config, section, blob):
    # Get the metadata for our old chunk

    # If necessary, fetch the existing data from S3, otherwise open a local file
    if ((config.has_option('main', 's3_upload')
         and config.getboolean('main', 's3_upload'))
            or (config.has_option(section, 's3_upload')
                and config.getboolean(section, 's3_upload'))):
        conn = boto.s3.connection.S3Connection()
        bucket = conn.get_bucket(config.get('main', 's3_bucket'))
        s3key = config.get(section, 's3_key') or config.get(section, 'output')
        key = bucket.get_key(s3key)
        if key is None:
            # most likely a new list
            print "{0} looks like it hasn't been uploaded to s3://{1}/{2}".format(
                section, bucket.name, s3key)
            key = boto.s3.key.Key(bucket)
            key.key = s3key
            key.set_contents_from_string("a:1:32:32\n" + 32 * '1')
        current = tempfile.TemporaryFile()
        key.get_contents_to_file(current)
        current.seek(0)
    else:
        current = open(config.get(section, 'output'), 'rb')

    old = chunk_metadata(current)
    current.close()

    new = chunk_metadata(blob)

    if old['checksum'] != new['checksum']:
        return True
    return False
def new_data_to_publish(config, section, blob):
    # Get the metadata for our old chunk

    # If necessary fetch the existing data from S3, otherwise open a local file
    if ((config.has_option('main', 's3_upload')
         and config.getboolean('main', 's3_upload'))
        or (config.has_option(section, 's3_upload')
            and config.getboolean(section, 's3_upload'))):
        conn = boto.s3.connection.S3Connection()
        bucket = conn.get_bucket(config.get('main', 's3_bucket'))
        s3key = config.get(section, 's3_key') or config.get(section, 'output')
        key = bucket.get_key(s3key)
        if key is None:
            # most likely a new list
            print("{0} looks like it hasn't been uploaded to "
                  "s3://{1}/{2}".format(section, bucket.name, s3key))
            key = boto.s3.key.Key(bucket)
            key.key = s3key
            key.set_contents_from_string("a:1:32:32\n" + 32 * '1')
        current = tempfile.TemporaryFile()
        key.get_contents_to_file(current)
        current.seek(0)
    else:
        current = open(config.get(section, 'output'), 'rb')

    old = chunk_metadata(current)
    current.close()

    new = chunk_metadata(blob)

    if old['checksum'] != new['checksum']:
        return True
    return False
Ejemplo n.º 3
0
    def sendBlob(self, oid, serial):
        try:
            key = self._s3key
        except AttributeError:
            conn = boto.s3.connection.S3Connection()
            bucket = self._s3bucket = conn.get_bucket(bucket_name)
            key = self._s3key = boto.s3.key.Key(
                boto.s3.connection.S3Connection().get_bucket(bucket_name)
                )
        key.key = "%s/%s/%s" % (folder, oid.encode('hex'), serial.encode('hex'))
        f = tempfile.TemporaryFile()
        key.get_contents_to_file(f)
        f.seek(0)

        def store():
            yield ('receiveBlobStart', (oid, serial))
            while 1:
                chunk = f.read(59000)
                if not chunk:
                    break
                yield ('receiveBlobChunk', (oid, serial, chunk, ))
            f.close()
            yield ('receiveBlobStop', (oid, serial))

        self.client.rpc.callAsyncIterator(store())
Ejemplo n.º 4
0
def fetch(s3bucket,
          s3key,
          aws_access_key,
          aws_secret_key,
          output_file,
          headers=None):

    if isinstance(output_file, basestring):
        output_file = open(output_file, 'w')
        _close_when_done_ = True
    else:
        _close_when_done_ = False

    connection = S3Connection(aws_access_key, aws_secret_key)
    bucket = connection.lookup(s3bucket)
    if bucket is None:
        sys.stderr.write(
            'bucket does not exist, may be cause by incorrect credentials')
        return 1

    key = boto.s3.key.Key(bucket, s3key)
    if not key.exists():
        sys.stderr.write('key does not exist within given bucket')
        return 1

    key.get_contents_to_file(output_file, headers=headers)

    if _close_when_done_:
        output_file.close()

    return 0
Ejemplo n.º 5
0
def fetch(s3bucket, s3key, aws_access_key, aws_secret_key, output_file, headers=None):

    if isinstance(output_file, basestring):
        output_file = open(output_file, 'w')
        _close_when_done_ = True
    else:
        _close_when_done_ = False


    connection = S3Connection(aws_access_key,aws_secret_key)
    bucket = connection.lookup(s3bucket)
    if bucket is None:
        sys.stderr.write('bucket does not exist, may be cause by incorrect credentials')
        return 1

    key = boto.s3.key.Key(bucket, s3key)
    if not key.exists():
        sys.stderr.write('key does not exist within given bucket')
        return 1
        
                
    key.get_contents_to_file(output_file, headers=headers)
    
    if _close_when_done_:
        output_file.close()
    
    return 0
Ejemplo n.º 6
0
def sync(log_paths, es, location=None, db=None, force=False):
    """
    Insert log records into Elastic Search.
    """
    create_index_templates(es)

    conn = None
    keys = log_paths
    if location:
        log_url = urllib3.util.parse_url(location)
        if log_url.scheme == 's3':
            s3_bucket = log_url.host
            s3_prefix = log_url.path
            if s3_prefix.startswith('/'):
                s3_prefix = s3_prefix[1:]
            if conn is None:
                conn = boto.connect_s3()
            bucket = conn.get_bucket(s3_bucket)
            if not log_paths:
                keys = bucket.list(prefix=s3_prefix)
            else:
                keys = [bucket.get_key(path) for path in log_paths]

    for key in keys:
        if isinstance(key, boto.s3.key.Key):
            name = key.key
        else:
            name = key

        if force:
            finished = False
        else:
            db.execute('select finished from UPLOAD where key=?', (name,))
            row = db.fetchone()
            finished = (row and row[0])

        if not finished:
            sys.stderr.write('uploading %s...\n' % name)

            if isinstance(key, boto.s3.key.Key):
                with tempfile.TemporaryFile() as temp_file:
                    key.get_contents_to_file(temp_file)
                    temp_file.seek(0)
                    (successes, errors) = sync_fileobj(temp_file, es, name)
            else:
                with open(name, 'rb') as fileobj:
                    (successes, errors) = sync_fileobj(fileobj, es, name)

            sys.stdout.write('%s: %d successes, %d errors\n'
                % (name, successes, len(errors)))

            if errors:
                sys.stdout.write('error: %s\n' % str(errors))
            else:
                row_data = (datetime.now().isoformat(), name, True)
                db.execute(
    'INSERT OR REPLACE into UPLOAD (dt,key,finished) VALUES (?,?,?)', row_data)
                sys.stdout.write('%s: cache results\n' % name)
        else:
            sys.stdout.write('%s: cached (skipping)\n' % name)
Ejemplo n.º 7
0
def sync(log_paths, es, location=None, db=None, force=False):
    """
    Insert log records into Elastic Search.
    """
    create_index_templates(es)

    conn = None
    keys = log_paths
    if location:
        log_url = urllib3.util.parse_url(location)
        if log_url.scheme == 's3':
            s3_bucket = log_url.host
            s3_prefix = log_url.path
            if s3_prefix.startswith('/'):
                s3_prefix = s3_prefix[1:]
            if conn is None:
                conn = boto.connect_s3()
            bucket = conn.get_bucket(s3_bucket)
            if not log_paths:
                keys = bucket.list(prefix=s3_prefix)
            else:
                keys = [bucket.get_key(path) for path in log_paths]

    for key in keys:
        if isinstance(key, boto.s3.key.Key):
            name = key.key
        else:
            name = key

        if force:
            finished = False
        else:
            db.execute('select finished from UPLOAD where key=?', (name,))
            row = db.fetchone()
            finished = (row and row[0])

        if not finished:
            sys.stdout.write('uploading %s...\n' % name)

            if isinstance(key, boto.s3.key.Key):
                with tempfile.TemporaryFile() as temp_file:
                    key.get_contents_to_file(temp_file)
                    temp_file.seek(0)
                    (successes, errors) = sync_fileobj(temp_file, es, name)
            else:
                with open(name, 'rb') as fileobj:
                    (successes, errors) = sync_fileobj(fileobj, es, name)

            sys.stdout.write('successes: %s\n' % str(successes))
            sys.stdout.write('errors: %s\n' % str(errors))

            if not errors:
                row_data = (datetime.now().isoformat(), name, True)
                db.execute(
    'INSERT OR REPLACE into UPLOAD (dt,key,finished) VALUES (?,?,?)', row_data)
                sys.stdout.write('done %s\n' % name)
        else:
            sys.stdout.write('skipping %s\n' % name)
Ejemplo n.º 8
0
def new_data_to_publish_to_s3(config, section, new):
    # Get the metadata for our old chunk

    # If necessary fetch the existing data from S3, otherwise open a local file
    if ((config.has_option('main', 's3_upload')
         and config.getboolean('main', 's3_upload'))
            or (config.has_option(section, 's3_upload')
                and config.getboolean(section, 's3_upload'))):
        conn = boto.s3.connection.S3Connection()
        bucket = conn.get_bucket(config.get('main', 's3_bucket'))
        s3key = config.get(section, 's3_key') or config.get(section, 'output')
        key = bucket.get_key(s3key)
        if key is None:
            # most likely a new list
            print('{0} looks like it hasn\'t been uploaded to '
                  's3://{1}/{2}'.format(section, bucket.name, s3key))
            key = boto.s3.key.Key(bucket)
            key.key = s3key
            key.set_contents_from_string('a:1:32:32\n' + 32 * '1')
        current = tempfile.TemporaryFile()
        key.get_contents_to_file(current)
        key.set_acl('bucket-owner-full-control')
        if CLOUDFRONT_USER_ID is not None:
            key.add_user_grant('READ', CLOUDFRONT_USER_ID)
        current.seek(0)
    else:
        current = open(config.get(section, 'output'), 'rb')

    old = chunk_metadata(current)
    current.close()

    s3_upload_needed = False
    if old['checksum'] != new['checksum']:
        s3_upload_needed = True

    return s3_upload_needed
Ejemplo n.º 9
0
def _fetch_file(data, source, file):
    if source.startswith('s3://'):
        key = _get_key(data, source)
        key.get_contents_to_file(file)
    else:
        raise NotImplementedError