コード例 #1
0
ファイル: bquantile_test.py プロジェクト: nathants/bsv
def sketch_data(datas, alpha, bins, minval, quantiles):
    quantiles = ','.join(map(str, quantiles))
    with shell.tempdir():
        for data in datas:
            shell.run(f'bsv | bschema a:f64 | bquantile-sketch f64 -b {bins} -a {alpha} >> sketches', stdin='\n'.join(map(str, data)) + '\n')
        csv = shell.run(f'cat sketches | bquantile-merge {quantiles} | bschema f64:a,f64:a | csv')
        return [float(v) for line in csv.splitlines() for [q, v] in [line.split(',')]]
コード例 #2
0
def test_appends():
    with shell.tempdir():
        stdin = """
        0,b,c,d
        1,e,f,g
        2,h,i,j
        """
        stdout = """
        prefix00
        prefix01
        prefix02
        """
        assert rm_whitespace(unindent(stdout)) == shell.run(f'bsv | bpartition 10 prefix', stdin=unindent(stdin))
        assert rm_whitespace(unindent(stdout)) == shell.run(f'bsv | bpartition 10 prefix', stdin=unindent(stdin))
        stdout = """
        prefix00:b,c,d
        prefix00:b,c,d
        prefix01:e,f,g
        prefix01:e,f,g
        prefix02:h,i,j
        prefix02:h,i,j
        """
        assert unindent(stdout).strip() == shell.run(f'bcat --prefix prefix*')
        stdout = """
        prefix00
        prefix01
        prefix02
        """
        assert unindent(stdout).strip() == shell.run('ls prefix*')
コード例 #3
0
def test_basic():
    with shell.tempdir():
        shell.run('for char in a a b b c c; do echo $char | bsv >> $char; done')
        stdout = """
        a:a
        b:b
        c:c
        """
        assert rm_whitespace(unindent(stdout)) == shell.run('bcat --prefix --head 1 a b c')
        stdout = """
        a:a
        a:a
        b:b
        b:b
        c:c
        c:c
        """
        assert rm_whitespace(unindent(stdout)) == shell.run('bcat --prefix --head 2 a b c')
        assert rm_whitespace(unindent(stdout)) == shell.run('bcat --head 2 --prefix a b c')
        assert rm_whitespace(unindent(stdout)) == shell.run('bcat --prefix a b c')
        stdout = """
        a
        b
        c
        """
        assert rm_whitespace(unindent(stdout)) == shell.run('bcat --head 1 a b c')
        stdout = """
        a
        a
        b
        b
        c
        c
        """
        assert rm_whitespace(unindent(stdout)) == shell.run('bcat a b c')
コード例 #4
0
ファイル: test_server.py プロジェクト: nathants/s4
def test_cp_dot_to_dot():
    with servers():
        with shell.tempdir():
            run('mkdir dir1 dir2')
            run('touch dir1/file1.txt dir2/file2.txt dir2/file3.txt')
            run('s4 cp -r . s4://bucket')
        assert run(
            "s4 ls -r s4://bucket | awk '{print $NF}'").splitlines() == [
                'dir1/file1.txt',
                'dir2/file2.txt',
                'dir2/file3.txt',
            ]
        run('s4 cp -r s4://bucket .')
        assert sorted(run('find dir* -type f').splitlines()) == [
            'dir1/file1.txt',
            'dir2/file2.txt',
            'dir2/file3.txt',
        ]
        run('rm -rf dir*')
        run('s4 cp -r s4://bucket/dir2 .')
        assert sorted(run('find dir* -type f').splitlines()) == [
            'dir2/file2.txt',
            'dir2/file3.txt',
        ]
        run('rm -rf dir*')
        run('s4 cp -r s4://bucket/dir2/ .')
        assert sorted(run('find dir* -type f').splitlines()) == [
            'dir2/file2.txt',
            'dir2/file3.txt',
        ]
コード例 #5
0
ファイル: test_util.py プロジェクト: slham/bsv
def clone_source():
    with shell.climb_git_root():
        orig = os.getcwd()
        with shell.tempdir(cleanup=False):
            shell.run(f"rsync -avhc {orig}/ . --exclude '.git' --exclude '.tox' --exclude '.backups' --exclude '__pycache__'")
            shell.run('mkdir .git')
            return os.getcwd()
コード例 #6
0
ファイル: test_s3_stubbed.py プロジェクト: shareablee/py-aws
def test_binary():
    with shell.tempdir():
        with open('1.txt', 'w') as f:
            f.write('123')
        run('cat 1.txt | lz4 -1 |', preamble, 'cp - s3://bucket/binary/1.txt')
        assert '123' == run(preamble,
                            'cp s3://bucket/binary/1.txt - | lz4 -d -c')
コード例 #7
0
def test_props(args):
    num_buckets, csv = args
    result = expected(num_buckets, csv)
    with shell.tempdir():
        stdout = '\n'.join(sorted({l.split(':')[0] for l in result.splitlines()}))
        assert stdout == shell.run(f'bsv | bpartition {num_buckets} prefix', stdin=csv, echo=True)
        assert result == shell.run(f'bcat --prefix prefix*')
コード例 #8
0
ファイル: bpartition_lz4_test.py プロジェクト: nathants/bsv
def test_basic():
    with shell.tempdir():
        stdin = """
        b,c,d
        e,f,g
        h,i,j
        """
        stdout = """
        prefix_02
        prefix_04
        prefix_05
        """
        assert rm_whitespace(unindent(stdout)) == shell.run(
            'bsv | bpartition -l 10 prefix', stdin=unindent(stdin))
        stdout = """
        prefix_02:h,i,j
        prefix_04:e,f,g
        prefix_05:b,c,d
        """
        assert unindent(stdout).strip() == shell.run('bcat -l -p prefix*')
        stdout = """
        prefix_02
        prefix_04
        prefix_05
        """
        assert unindent(stdout).strip() == shell.run('ls prefix*')
コード例 #9
0
ファイル: bsplit_test.py プロジェクト: nathants/bsv
def test_props(args):
    buffer, lines, chunks_per_file = args
    if not chunks_per_file:
        chunks_per_file = ''
    with shell.tempdir():
        shell.run(f'_gen_csv 2 {lines} | bsv.{buffer} > data.bsv', echo=True)
        shell.run(f'cat data.bsv | bsplit.{buffer} prefix {chunks_per_file} > filenames')
        assert shell.run(f'cat data.bsv | csv.{buffer} | xxh3') == shell.run(f'cat filenames | while read path; do cat $path; done | csv.{buffer} | xxh3')
コード例 #10
0
ファイル: test_cluster.py プロジェクト: nathants/s4
 def push():
     with shell.tempdir():
         with open('s4.conf', 'w') as f:
             f.write(conf)
         run('aws-ec2-scp -y s4.conf :.s4.conf', *ids)
     with shell.climb_git_root():
         run('aws-ec2-rsync -y . :/mnt/s4', cluster_name)
         run('aws-ec2-ssh -yc scripts/install_archlinux.sh', *ids)
     state['ids'] = ids
コード例 #11
0
ファイル: test_s3_stubbed.py プロジェクト: shareablee/py-aws
def test_cp():
    with shell.tempdir():
        run('mkdir -p foo/3')
        with open('foo/1.txt', 'w') as f:
            f.write('123')
        with open('foo/2.txt', 'w') as f:
            f.write('234')
        with open('foo/3/4.txt', 'w') as f:
            f.write('456')
        run(preamble, 'cp foo/ s3://bucket/cp/dst/ --recursive')
        assert rm_whitespace(run(preamble,
                                 'ls bucket/cp/dst/')) == rm_whitespace("""
              PRE 3/
            _ _ _ 1.txt
            _ _ _ 2.txt
        """)
        assert rm_whitespace(run(
            preamble, 'ls bucket/cp/dst/ --recursive')) == rm_whitespace("""
            _ _ _ cp/dst/1.txt
            _ _ _ cp/dst/2.txt
            _ _ _ cp/dst/3/4.txt
        """)
        run(preamble, 'cp s3://bucket/cp/dst/ dst1/ --recursive')
        assert run('grep ".*" $(find dst1/ -type f|LC_ALL=C sort)'
                   ) == rm_whitespace("""
            dst1/1.txt:123
            dst1/2.txt:234
            dst1/3/4.txt:456
        """)
        run(preamble, 'cp s3://bucket/cp/dst/ . --recursive')
        assert run('grep ".*" $(find dst/ -type f|LC_ALL=C sort)'
                   ) == rm_whitespace("""
            dst/1.txt:123
            dst/2.txt:234
            dst/3/4.txt:456
        """)
        run('rm -rf dst')
        run(preamble, 'cp foo s3://bucket/cp/dst2 --recursive')
        assert rm_whitespace(run(preamble,
                                 'ls bucket/cp/dst2/')) == rm_whitespace("""
              PRE 3/
            _ _ _ 1.txt
            _ _ _ 2.txt
        """)
        assert rm_whitespace(run(
            preamble, 'ls bucket/cp/dst2/ --recursive')) == rm_whitespace("""
            _ _ _ cp/dst2/1.txt
            _ _ _ cp/dst2/2.txt
            _ _ _ cp/dst2/3/4.txt
        """)
        run(preamble, 'cp s3://bucket/cp/dst . --recursive')
        assert run('grep ".*" $(find dst/ -type f|LC_ALL=C sort)'
                   ) == rm_whitespace("""
            dst/1.txt:123
            dst/2.txt:234
            dst/3/4.txt:456
        """)
コード例 #12
0
def test_props(csvs):
    result = expected(csvs)
    if result.strip():
        with shell.tempdir():
            paths = []
            for i, csv in enumerate(csvs):
                path = f'file{i}.bsv'
                shell.run(f'bsv > {path}', stdin=csv)
                paths.append(path)
            assert result.strip() == shell.run(f'brmerge', *paths, ' | bcut 1 | csv', echo=True)
            assert shell.run('cat', *paths, '| brsort | bcut 1 | csv') == shell.run(f'brmerge', *paths, ' | bcut 1 | csv')
コード例 #13
0
def _tar_script(src, name, echo_only=False):
    name = ('-name %s' % name) if name else ''
    script = (
        'cd %(src)s\n'
        'src=$(pwd)\n'
        'cd $(dirname $src)\n'
        "FILES=$(find -L $(basename $src) -type f %(name)s -o -type l %(name)s)\n"
        'echo $FILES|tr " " "\\n"|grep -v \.git 1>&2\n' +
        ('' if echo_only else 'tar cfh - $FILES')) % locals()
    with shell.tempdir(cleanup=False):
        with open('script.sh', 'w') as f:
            f.write(script)
        return os.path.abspath('script.sh')
コード例 #14
0
ファイル: _csv_test.py プロジェクト: slham/bsv
def test_fails_when_too_many_columns():
    with shell.climb_git_root():
        stdin = 'a,' * (2**16 - 1)
        with shell.tempdir(cleanup=False):
            with open('input', 'w') as f:
                f.write(stdin)
            path = os.path.abspath('input')
        try:
            res = shell.run('cat', path, '| bin/_csv >/dev/null', warn=True)
        finally:
            shell.run('rm', path)
        assert res['exitcode'] == 1
        assert 'fatal: line with more than 65535 columns' == res['stderr']
コード例 #15
0
ファイル: bmerge_test.py プロジェクト: slham/bsv
def test_basic():
    with shell.tempdir():
        shell.run('echo -e "a,a\nc,c\ne,e\n" | bsv > a.bsv')
        shell.run('echo -e "b,b\nd,d\nf,f\n" | bsv > b.bsv')
        stdout = """
        a,a
        b,b
        c,c
        d,d
        e,e
        f,f
        """
        assert rm_whitespace(unindent(stdout)) == shell.run(
            f'bmerge a.bsv b.bsv | csv', stream=True)
コード例 #16
0
ファイル: bpartition_lz4_test.py プロジェクト: nathants/bsv
def test_without_prefix():
    with shell.tempdir():
        stdin = """
        b,c,d
        e,f,g
        h,i,j
        """
        stdout = """
        02
        04
        05
        """
        assert rm_whitespace(unindent(stdout)) == shell.run(
            'bsv | bpartition -l 10', stdin=unindent(stdin))
コード例 #17
0
ファイル: test_server.py プロジェクト: nathants/s4
def servers(timeout=30, extra_conf='', num_servers=3):
    util.log.setup(format='%(message)s')
    shell.set['stream'] = True
    with util.time.timeout(timeout):
        with shell.stream():
            with shell.tempdir():
                procs = start_all(extra_conf, num_servers)
                watch = [True]
                pool.thread.new(watcher, watch, procs)
                try:
                    yield
                finally:
                    watch[0] = False
                    for proc in procs:
                        proc.terminate()
コード例 #18
0
ファイル: bmerge_test.py プロジェクト: slham/bsv
def test_props_compatability(csvs):
    result = expected(csvs)
    if result.strip():
        with shell.tempdir():
            bsv_paths = []
            for i, csv in enumerate(csvs):
                path = f'file{i}.bsv'
                shell.run(f'bsv > {path}', stdin=csv)
                bsv_paths.append(path)
            csv_paths = []
            for i, csv in enumerate(csvs):
                path = f'file{i}.csv'
                shell.run(f'cat - > {path}', stdin=csv)
                csv_paths.append(path)
            assert shell.run(f'LC_ALL=C sort -m -k1,1', *csv_paths,
                             ' | cut -d, -f1') == shell.run(f'bmerge',
                                                            *bsv_paths,
                                                            ' | bcut 1 | csv',
                                                            echo=True)
コード例 #19
0
ファイル: test_s3_stubbed.py プロジェクト: shareablee/py-aws
def test_basic():
    with shell.tempdir():
        with open('input.txt', 'w') as f:
            f.write('123')
        run(preamble, 'cp input.txt s3://bucket/basic/dir/file.txt')
        run('echo asdf |', preamble, 'cp - s3://bucket/basic/dir/stdin.txt')
        assert run(preamble, 'ls s3://bucket/ --recursive').splitlines() == [
            '_ _ _ basic/dir/file.txt', '_ _ _ basic/dir/stdin.txt'
        ]
        assert run(preamble, 'cp s3://bucket/basic/dir/file.txt -') == "123"
        assert run(preamble, 'cp s3://bucket/basic/dir/stdin.txt -') == "asdf"
        run(preamble, 'cp s3://bucket/basic/dir/file.txt file.downloaded')
        with open('file.downloaded') as f:
            assert f.read() == "123"
        run(preamble, 'cp s3://bucket/basic/dir/stdin.txt stdin.downloaded')
        with open('stdin.downloaded') as f:
            assert f.read() == "asdf\n"
        run("mkdir foo")
        run(preamble, 'cp s3://bucket/basic/dir/stdin.txt foo/', stream=True)
        with open('foo/stdin.txt') as f:
            assert f.read() == "asdf\n"
コード例 #20
0
def test_dupes():
    with shell.tempdir():
        shell.run('echo -e "a,a\na,a\nc,c\nc,c\ne,e\ne,e\n" | bsv > a.bsv')
        shell.run('echo -e "b,b\nd,d\nf,f\n" | bsv > b.bsv')
        stdout = """
        a,a
        a,a
        b,b
        c,c
        c,c
        d,d
        e,e
        e,e
        f,f
        """
        assert rm_whitespace(unindent(stdout)) == shell.run(
            'echo a.bsv b.bsv | bmerge | csv', stream=True)
        assert rm_whitespace(unindent(stdout)) == shell.run(
            '(echo a.bsv; echo b.bsv) | bmerge | csv', stream=True)
        assert rm_whitespace(unindent(stdout)) == shell.run(
            '(echo a.bsv; echo; echo b.bsv) | bmerge | csv', stream=True)
コード例 #21
0
def new(name: 'name of the instance',
        gigs: 'size in gigs of data disk' = 128,
        size: 'instance size' = shell.conf.get_or_prompt_pref(
            'size', __file__, message='instance size'),
        location=shell.conf.get_or_prompt_pref('location',
                                               __file__,
                                               message='azure location'),
        no_wait: 'do not wait for ssh' = False,
        num: 'number of instances' = 1,
        init=_data_disk_init,
        group=None):
    assert not init.startswith(
        '#!'
    ), 'init commands are bash snippets, and should not include a hashbang'
    init = '#!/bin/bash\npath=/tmp/$(uuidgen); echo %s | base64 -d > $path; sudo -u ubuntu bash -e $path /var/log/cloud_init_script.log 2>&1' % util.strings.b64_encode(
        init)
    if not group:
        group_name = name
    else:
        group_name = group
    assert not list(id(name)), 'name must be globally unique'
    if not _group_exists(group_name):
        run('az group create --name',
            group_name,
            '--location',
            location,
            echo=True)
        run('az network vnet create --resource-group',
            group_name,
            '--name',
            group_name,
            '--location',
            location,
            '--subnet-name',
            group_name,
            echo=True)
        run('az network nsg create --resource-group',
            group_name,
            '--name',
            group_name,
            '--location',
            location,
            echo=True)
        run('az network nsg rule create --resource-group',
            group_name,
            '--nsg-name',
            group_name,
            '-n ssh',
            '--priority 100',
            '--source-address-prefix "*"',
            '--destination-address-prefix "*"',
            '--destination-port-range 22',
            '--access Allow',
            '--protocol Tcp',
            echo=True)
    with shell.tempdir():
        with open('cloud-init.txt', 'w') as f:
            f.write(init)
        for i in range(num):
            run('az vm create',
                '--resource-group',
                group_name,
                '--vnet-name',
                group_name,
                '--subnet',
                group_name,
                '--nsg',
                group_name,
                '--name', (name if num == 1 else '%s-%s' % (name, i + 1)),
                '--image',
                'Canonical:UbuntuServer:14.04.4-LTS:latest',
                '--ssh-key-value',
                '~/.ssh/id_rsa.pub',
                '--admin-username',
                'ubuntu',
                '--data-disk-sizes-gb',
                gigs,
                '--custom-data',
                'cloud-init.txt',
                '--size',
                size, ('--no-wait' if num > 1 else ''),
                stream=True)
    wait_for_ssh(group=group, num=num)
コード例 #22
0
def new(
        name: 'name of all instances',
        arg:
    'one instance per arg, and that arg is str formatted into cmd, pre_cmd, and tags as {arg}' = None,
        label:
    'one label per arg, to use as ec2 tag since arg is often inapproriate, defaults to arg if not provided' = None,
        pre_cmd:
    'optional cmd which runs before cmd is backgrounded. will be retried on failure. format with {arg}.' = None,
        cmd: 'cmd which is run in the background. format with {arg}.' = None,
        tag: 'tag to set as "<key>=<value>' = None,
        no_rm: 'stop instance instead of terminating when done' = False,
        chunk_size: 'how many args to launch at once' = 50,
        bucket: 's3 bucket to upload logs to' = shell.conf.get_or_prompt_pref(
            'launch_logs_bucket', __file__, message='bucket for launch_logs'),
        spot: 'spot price to bid' = None,
        key: 'key pair name' = shell.conf.get_or_prompt_pref(
            'key', aws.ec2.__file__, message='key pair name'),
        ami: 'ami id' = shell.conf.get_or_prompt_pref('ami',
                                                      aws.ec2.__file__,
                                                      message='ami id'),
        sg: 'security group name' = shell.conf.get_or_prompt_pref(
            'sg', aws.ec2.__file__, message='security group name'),
        type: 'instance type' = shell.conf.get_or_prompt_pref(
            'type', aws.ec2.__file__, message='instance type'),
        vpc: 'vpc name' = shell.conf.get_or_prompt_pref('vpc',
                                                        aws.ec2.__file__,
                                                        message='vpc name'),
        zone: 'ec2 availability zone' = None,
        role: 'ec2 iam role' = None,
        gigs: 'gb capacity of primary disk' = 8):
    optional = ['no_rm', 'zone', 'spot', 'tag', 'pre_cmd', 'label']
    for k, v in locals().items():
        assert v is not None or k in optional, 'required flag missing: --' + k.replace(
            '_', '-')
    tags, args, labels = tuple(tag or ()), tuple(arg or ()), tuple(label or ())
    args = [str(a) for a in args]
    if labels:
        assert len(args) == len(
            labels
        ), 'there must be an equal number of args and labels, %s != %s' % (
            len(args), len(labels))
    else:
        labels = args
    labels = [_tagify(x) for x in labels]
    for tag in tags:
        assert '=' in tag, 'tags should be "<key>=<value>", not: %s' % tag
    for label, arg in zip(labels, args):
        if label == arg:
            logging.info('going to launch arg: %s', arg)
        else:
            logging.info('going to launch label: %s, arg: %s', label, arg)
    if pre_cmd and os.path.exists(pre_cmd):
        logging.info('reading pre_cmd from file: %s', os.path.abspath(pre_cmd))
        with open(pre_cmd) as f:
            pre_cmd = f.read()
    if os.path.exists(cmd):
        logging.info('reading cmd from file: %s', os.path.abspath(cmd))
        with open(cmd) as f:
            cmd = f.read()
    for _ in range(10):
        launch = str(uuid.uuid4())
        path = 's3://%(bucket)s/launch_logs/launch=%(launch)s' % locals()
        try:
            shell.run('aws s3 ls', path)
        except:
            break
    else:
        assert False, 'failed to generate a unique launch id. clean up: s3://%(bucket)s/launch_logs/' % locals(
        )
    logging.info('launch=%s', launch)
    data = json.dumps({
        'name': name,
        'args': args,
        'labels': labels,
        'pre_cmd': pre_cmd,
        'cmd': cmd,
        'tags': tags,
        'no_rm': no_rm,
        'bucket': bucket,
        'spot': spot,
        'key': key,
        'ami': ami,
        'sg': sg,
        'type': type,
        'vpc': vpc,
        'gigs': gigs
    })
    if 'LAUNCH_LOCAL' in os.environ:
        for arg in args:
            with shell.tempdir(), shell.set_stream():
                shell.run(pre_cmd.format(arg=arg))
                shell.run(cmd.format(arg=arg))
    else:
        shell.run(
            'aws s3 cp - s3://%(bucket)s/launch_logs/launch=%(launch)s/params.json'
            % locals(),
            stdin=data)
        tags += ('launch=%s' % launch, )
        for i, (args_chunk, labels_chunk) in enumerate(
                zip(chunk(args, chunk_size), chunk(labels, chunk_size))):
            logging.info('launching chunk %s of %s, chunk size: %s', i + 1,
                         len(args) // chunk_size + 1, chunk_size)
            instance_ids = aws.ec2.new(name,
                                       role=role,
                                       spot=spot,
                                       key=key,
                                       ami=ami,
                                       sg=sg,
                                       type=type,
                                       vpc=vpc,
                                       zone=zone,
                                       gigs=gigs,
                                       num=len(args_chunk))
            errors = []

            def run_cmd(instance_id, arg, label):
                def fn():
                    try:
                        if pre_cmd:
                            aws.ec2._retry(aws.ec2.ssh)(
                                instance_id,
                                yes=True,
                                cmd=pre_cmd.format(arg=arg),
                                prefixed=True)
                        aws.ec2.ssh(instance_id,
                                    no_tty=True,
                                    yes=True,
                                    cmd=_cmd(arg, cmd, no_rm, bucket),
                                    prefixed=True)
                        instance = aws.ec2._ls([instance_id])[0]
                        aws.ec2._retry(instance.create_tags)(
                            Tags=[{
                                'Key': k,
                                'Value': v
                            } for tag in tags +
                                  ('label=%s' % label, 'chunk=%s' % i)
                                  for [k, v] in [tag.split('=', 1)]])
                        logging.info('tagged: %s', aws.ec2._pretty(instance))
                        logging.info('ran cmd against %s for label %s',
                                     instance_id, label)
                    except:
                        errors.append(traceback.format_exc())

                return fn

            pool.thread.wait(*map(run_cmd, instance_ids, args_chunk,
                                  labels_chunk),
                             max_threads=10)
            if errors:
                logging.info(util.colors.red('errors:'))
                for e in errors:
                    logging.info(e)
                sys.exit(1)
        return 'launch=%s' % launch
コード例 #23
0
ファイル: test_backup.py プロジェクト: nathants/backup
def test_basic():
    with sh.tempdir():
        uid = str(uuid.uuid4())
        os.environ['BACKUP_RCLONE_REMOTE'] = os.environ['BACKUP_TEST_RCLONE_REMOTE']
        os.environ['BACKUP_DESTINATION'] = os.environ['BACKUP_TEST_DESTINATION'] + '/' + uid
        os.environ['BACKUP_STORAGE_CLASS'] = 'STANDARD_IA'
        os.environ['BACKUP_CHUNK_MEGABYTES'] = '100'
        os.environ['BACKUP_ROOT'] = os.getcwd()
        for k, v in os.environ.items():
            if k.startswith('BACKUP_'):
                print(k, '=>', v)
        ##
        sh.run('echo foo > bar.txt')
        sh.run('backup-add')
        assert diff() == [
            ('addition:', './bar.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
        ]
        assert additions() == [
            ('./bar.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
        ]
        sh.run('backup-commit')
        assert log() == [
            '0000000000.DATE.tar.lz4.gpg.00000 HASH 1510',
            'init',
        ]
        assert index() == [
            ('./bar.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
        ]
        ##
        sh.run('echo foo > bar2.txt')
        sh.run('backup-add')
        assert diff() == [
            ('addition:', './bar2.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
        ]
        assert additions() == [
            ('./bar2.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
        ]
        sh.run('backup-commit')
        assert log() == [
            'index-only-update',
            '0000000000.DATE.tar.lz4.gpg.00000 HASH 1510',
            'init',
        ]
        assert index() == [
            ('./bar.txt',  '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
            ('./bar2.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
        ]
        ##
        sh.run('echo asdf > asdf.txt')
        sh.run('backup-add')
        assert diff() == [
            ('addition:', './asdf.txt', '0000000001.DATE.tar.lz4.gpg.00000', '36b807d5', '5')
        ]
        assert additions() == [
            ('./asdf.txt', '0000000001.DATE.tar.lz4.gpg.00000', '36b807d5', '5')
        ]
        sh.run('backup-commit')
        assert log() == [
            '0000000001.DATE.tar.lz4.gpg.00000 HASH 1513',
            'index-only-update',
            '0000000000.DATE.tar.lz4.gpg.00000 HASH 1510',
            'init',
        ]
        assert index() == [
            ('./asdf.txt', '0000000001.DATE.tar.lz4.gpg.00000', '36b807d5', '5'),
            ('./bar.txt',  '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
            ('./bar2.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
        ]
        ##
        with sh.tempdir():
            os.environ['BACKUP_ROOT'] = os.getcwd()
            _ = find('.') # clone the repo with the first call to find()
            assert [find('.', commit) for commit in commits()] == [find(r'\.txt$', commit) for commit in commits()] == [
                [
                    ('./asdf.txt', '0000000001.DATE.tar.lz4.gpg.00000', '36b807d5', '5'),
                    ('./bar.txt',  '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
                    ('./bar2.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
                ],
                [
                    ('./bar.txt',  '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
                    ('./bar2.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
                ],
                [
                    ('./bar.txt',  '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
                ],
                [],
            ]
            assert [find('.*asdf.*', commit) for commit in commits()] == [
                [
                    ('./asdf.txt', '0000000001.DATE.tar.lz4.gpg.00000', '36b807d5', '5'),
                ],
                [],
                [],
                [],
            ]
        ##
        with sh.tempdir():
            os.environ['BACKUP_ROOT'] = os.getcwd()
            _ = find('.')
            assert [restore('.', commit) for commit in commits()] == [
                [
                    ('./bar.txt',  'd202d795'),
                    ('./bar2.txt', 'd202d795'),
                    ('./asdf.txt', '36b807d5'),
                ],
                [
                    ('./bar.txt',  'd202d795'),
                    ('./bar2.txt', 'd202d795'),
                ],
                [
                    ('./bar.txt',  'd202d795'),
                ],
                [],
            ]
            assert [restore(r'\./bar2\.txt$', commit) for commit in commits()] == [
                [
                    ('./bar2.txt', 'd202d795'),
                ],
                [
                    ('./bar2.txt', 'd202d795'),
                ],
                [],
                [],
            ]
            assert sh.run('cat bar.txt') == 'foo'
            assert sh.run('cat bar2.txt') == 'foo'
            assert sh.run('cat asdf.txt') == 'asdf'
コード例 #24
0
ファイル: test_backup.py プロジェクト: nathants/backup
def test_symlink():
    with sh.tempdir():
        uid = str(uuid.uuid4())
        os.environ['BACKUP_RCLONE_REMOTE'] = os.environ['BACKUP_TEST_RCLONE_REMOTE']
        os.environ['BACKUP_DESTINATION'] = os.environ['BACKUP_TEST_DESTINATION'] + '/' + uid
        os.environ['BACKUP_STORAGE_CLASS'] = 'STANDARD_IA'
        os.environ['BACKUP_CHUNK_MEGABYTES'] = '100'
        os.environ['BACKUP_ROOT'] = os.getcwd()
        for k, v in os.environ.items():
            if k.startswith('BACKUP_'):
                print(k, '=>', v)
        ##
        sh.run('echo foo > bar.txt')
        sh.run('ln -s bar.txt link.txt')
        sh.run('backup-add')
        assert diff() == [
            ('addition:', './bar.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
            ('addition:', './link.txt', 'symlink', './bar.txt', '0'),
        ]
        assert additions() == [
            ('./bar.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
            ('./link.txt', 'symlink', './bar.txt', '0'),
        ]
        sh.run('backup-commit')
        assert log() == [
            '0000000000.DATE.tar.lz4.gpg.00000 HASH 1510',
            'init',
        ]
        assert index() == [
            ('./bar.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
            ('./link.txt', 'symlink', './bar.txt', '0'),
        ]
        ##
        sh.run('mkdir dir')
        sh.run('cd dir && ln -s ../bar.txt link.txt')
        sh.run('backup-add')
        assert diff() == [
            ('addition:', './dir/link.txt', 'symlink', './bar.txt', '0'),
        ]
        assert additions() == [
            ('./dir/link.txt', 'symlink', './bar.txt', '0'),
        ]
        sh.run('backup-commit')
        assert log() == [
            'index-only-update',
            '0000000000.DATE.tar.lz4.gpg.00000 HASH 1510',
            'init',
        ]
        assert index() == [
            ('./bar.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
            ('./dir/link.txt', 'symlink', './bar.txt', '0'),
            ('./link.txt', 'symlink', './bar.txt', '0'),
        ]
        ##
        with sh.tempdir():
            os.environ['BACKUP_ROOT'] = os.getcwd()
            _ = find('.') # clone the repo with the first call to find()
            assert [find('.', commit) for commit in commits()] == [
                [
                    ('./bar.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
                    ('./dir/link.txt', 'symlink', './bar.txt', '0'),
                    ('./link.txt', 'symlink', './bar.txt', '0'),
                ],
                [
                    ('./bar.txt', '0000000000.DATE.tar.lz4.gpg.00000', 'd202d795', '4'),
                    ('./link.txt', 'symlink', './bar.txt', '0'),
                ],
                [],
            ]
        ##
        with sh.tempdir():
            os.environ['BACKUP_ROOT'] = os.getcwd()
            restore('.')
            assert sh.run('find -printf "%y %p %l\n" | grep -v "\./\.backup" | grep -P "^(l|f)"').splitlines() == [
                'l ./link.txt bar.txt',
                'l ./dir/link.txt ../bar.txt',
                'f ./bar.txt',
            ]
            assert sh.run('cat link.txt') == 'foo'
            assert sh.run('cat dir/link.txt') == 'foo'
            assert sh.run('cat bar.txt') == 'foo'
コード例 #25
0
ファイル: launch.py プロジェクト: nathants/py-aws
def new(name:    'name of all instances',
        arg:     'one instance per arg, and that arg is str formatted into cmd, pre_cmd, and tags as "arg"' = None,
        label:   'one label per arg, to use as ec2 tag since arg is often inapproriate, defaults to arg if not provided' = None,
        pre_cmd: 'optional cmd which runs before cmd is backgrounded. will be retried on failure. format with %(arg)s.' = None,
        cmd:     'cmd which is run in the background. format with %(arg)s.' = None,
        tag:     'tag to set as "<key>=<value>' = None,
        no_rm:   'stop instance instead of terminating when done' = False,
        chunk_size: 'how many args to launch at once' = 50,
        bucket:  's3 bucket to upload logs to' = shell.conf.get_or_prompt_pref('launch_logs_bucket',  __file__, message='bucket for launch_logs'),
        spot:    'spot price to bid'           = None,
        key:     'key pair name'               = shell.conf.get_or_prompt_pref('key',  aws.ec2.__file__, message='key pair name'),
        ami:     'ami id'                      = shell.conf.get_or_prompt_pref('ami',  aws.ec2.__file__, message='ami id'),
        sg:      'security group name'         = shell.conf.get_or_prompt_pref('sg',   aws.ec2.__file__, message='security group name'),
        type:    'instance type'               = shell.conf.get_or_prompt_pref('type', aws.ec2.__file__, message='instance type'),
        vpc:     'vpc name'                    = shell.conf.get_or_prompt_pref('vpc',  aws.ec2.__file__, message='vpc name'),
        zone:    'ec2 availability zone'       = None,
        gigs:    'gb capacity of primary disk' = 8):
    optional = ['no_rm', 'zone', 'spot', 'tag', 'pre_cmd', 'label']
    for k, v in locals().items():
        assert v is not None or k in optional, 'required flag missing: --' + k.replace('_', '-')
    tags, args, labels = tuple(tag or ()), tuple(arg or ()), tuple(label or ())
    args = [str(a) for a in args]
    if labels:
        assert len(args) == len(labels), 'there must be an equal number of args and labels, %s != %s' % (len(args), len(labels))
    else:
        labels = args
    labels = [_tagify(x) for x in labels]
    for tag in tags:
        assert '=' in tag, 'tags should be "<key>=<value>", not: %s' % tag
    for label, arg in zip(labels, args):
        if label == arg:
            logging.info('going to launch arg: %s', arg)
        else:
            logging.info('going to launch label: %s, arg: %s', label, arg)
    if pre_cmd and os.path.exists(pre_cmd):
        logging.info('reading pre_cmd from file: %s', os.path.abspath(pre_cmd))
        with open(pre_cmd) as f:
            pre_cmd = f.read()
    if os.path.exists(cmd):
        logging.info('reading cmd from file: %s', os.path.abspath(cmd))
        with open(cmd) as f:
            cmd = f.read()
    for _ in range(10):
        launch = str(uuid.uuid4())
        path = 's3://%(bucket)s/launch_logs/launch=%(launch)s' % locals()
        try:
            shell.run('aws s3 ls', path)
        except:
            break
    else:
        assert False, 'failed to generate a unique launch id. clean up: s3://%(bucket)s/launch_logs/' % locals()
    logging.info('launch=%s', launch)
    data = json.dumps({'name': name,
                       'args': args,
                       'labels': labels,
                       'pre_cmd': pre_cmd,
                       'cmd': cmd,
                       'tags': tags,
                       'no_rm': no_rm,
                       'bucket': bucket,
                       'spot': spot,
                       'key': key,
                       'ami': ami,
                       'sg': sg,
                       'type': type,
                       'vpc': vpc,
                       'gigs': gigs})
    if 'AWS_LAUNCH_RUN_LOCAL' in os.environ:
        for arg in args:
            with shell.tempdir(), shell.set_stream():
                shell.run(pre_cmd % {'arg': arg})
                shell.run(cmd % {'arg': arg})
    else:
        shell.run('aws s3 cp - s3://%(bucket)s/launch_logs/launch=%(launch)s/params.json' % locals(), stdin=data)
        tags += ('launch=%s' % launch,)
        for i, (args_chunk, labels_chunk) in enumerate(zip(chunk(args, chunk_size), chunk(labels, chunk_size))):
            logging.info('launching chunk %s of %s, chunk size: %s', i + 1, len(args) // chunk_size + 1, chunk_size)
            instance_ids = aws.ec2.new(name,
                                       spot=spot,
                                       key=key,
                                       ami=ami,
                                       sg=sg,
                                       type=type,
                                       vpc=vpc,
                                       zone=zone,
                                       gigs=gigs,
                                       num=len(args_chunk))
            errors = []
            def run_cmd(instance_id, arg, label):
                def fn():
                    try:
                        if pre_cmd:
                            aws.ec2._retry(aws.ec2.ssh)(instance_id, yes=True, cmd=pre_cmd % {'arg': arg}, prefixed=True)
                        aws.ec2.ssh(instance_id, no_tty=True, yes=True, cmd=_cmd(arg, cmd, no_rm, bucket), prefixed=True)
                        instance = aws.ec2._ls([instance_id])[0]
                        aws.ec2._retry(instance.create_tags)(Tags=[{'Key': k, 'Value': v}
                                                                   for tag in tags + ('label=%s' % label, 'chunk=%s' % i)
                                                                   for [k, v] in [tag.split('=', 1)]])
                        logging.info('tagged: %s', aws.ec2._pretty(instance))
                        logging.info('ran cmd against %s for label %s', instance_id, label)
                    except:
                        errors.append(traceback.format_exc())
                return fn
            pool.thread.wait(*map(run_cmd, instance_ids, args_chunk, labels_chunk), max_threads=10)
            if errors:
                logging.info(util.colors.red('errors:'))
                for e in errors:
                    logging.info(e)
                sys.exit(1)
        return 'launch=%s' % launch
コード例 #26
0
def test_numeric():
    with shell.tempdir():
        shell.run('echo a,1,2.0 | bsv > data.bsv')
        assert 'a,1,2.000000' == shell.run('bcat data.bsv')