コード例 #1
0
    def find_s3_region(self, bucket: str) -> str:
        """
        Returns the URL for an S3 bucket (extracted from AWS CLI).

        :return: the URL string.
        """
        from sh import aws
        cmd = aws('s3api', 'get-bucket-location', '--bucket', str(bucket), '--output', 'json')
        regs: Dict[str, str] = json.loads(str(cmd))
        return str(bucket) + '.s3.' + str(regs['LocationConstraint']) + '.amazonaws.com'
コード例 #2
0
def object_exists_in_s3(key):
    "check if object exists in S3 and is not empty"
    try:
        ret = sh.aws("s3api", "head-object", "--bucket",
                     os.getenv("BUCKET_NAME"), "--key", key)
        obj = json.loads(str(ret))
        return obj["ContentLength"] > 0
    except sh.ErrorReturnCode_255:
        return False
    return False  # TODO revisit
コード例 #3
0
ファイル: s3utils.py プロジェクト: alanyee/S3Scanner
def dumpBucket(bucketName):

    # Dump the bucket into bucket folder
    bucketDir = './buckets/' + bucketName

    dumped = None

    try:
        if not AWS_CREDS_CONFIGURED:
            sh.aws('s3',
                   'sync',
                   's3://' + bucketName,
                   bucketDir,
                   '--no-sign-request',
                   _fg=False)
            dumped = True
        else:
            sh.aws('s3', 'sync', 's3://' + bucketName, bucketDir, _fg=False)
            dumped = True
    except sh.ErrorReturnCode_1 as e:
        # Loop through our list of known errors. If found, dumping failed.
        foundErr = False
        err_message = e.stderr.decode('utf-8')
        for err in ERROR_CODES:
            if err in err_message:
                foundErr = True
                break
        if foundErr:  # We caught a known error while dumping
            if not os.listdir(
                    bucketDir
            ):  # The bucket directory is empty. The dump didn't work
                dumped = False
            else:  # The bucket directory is not empty. At least 1 of the files was downloaded.
                dumped = True
        else:
            raise e

    # Check if folder is empty. If it is, delete it
    if not os.listdir(bucketDir):
        os.rmdir(bucketDir)

    return dumped
コード例 #4
0
ファイル: awsiot.py プロジェクト: jackylee1/awstools
def create_policy_from_string(name, doc):
    """
    Create a policy

    :param name: policy name
    :param doc: json document string describing the policy
    :return: dictionary with the policy properties
    """
    response = sh.aws("iot", "create-policy", "--policy-name", name,
                      "--policy-document", doc)
    return json.loads(str(response))
コード例 #5
0
    def find_aws_acct(self) -> str:
        """
        Returns the logged in users ID.
        This will only be useful once we can tag sources and not just assets. For now this is unused.

        :return: the account string
        """
        from sh import aws
        cmd = aws('sts', 'get-caller-identity', '--output', 'json')
        acc: Dict[str, str] = json.loads(str(cmd))
        return 'AWS_ACCT_ID:' + str(acc['Account'])
コード例 #6
0
def upload(input, output, clean_train_files, dryrun):
    """Upload all model checkpoints to s3 for archival. You may want to remove large files like optimizer.pt before that."""

    if clean_train_files:
        to_remove = ["optimizer.pt"]
        for root, subFolders, files in os.walk(input):
            for file in files:
                if file in to_remove:
                    path = Path(root) / file
                    print(f"Removing train file {path}")
                    if not dry_run:
                        path.unlink()

    if dryrun:
        dry_run_command = ["--dryrun"]
    else:
        dry_run_command = []

    command = ["s3", "sync"] + dry_run_command + ["--follow-symlinks", input, output]
    sh.aws(*command, _out = sys.stdout)
コード例 #7
0
 def find_aws_dynamic_ips(self, region: str) -> Dict[str, int]:
     from sh import aws
     ips: Dict[str, int] = {}
     cmd = aws('ec2', 'describe-instances', '--region', region, '--query',
               'Reservations[*].Instances[*].[PublicIpAddress]', '--output', 'json')
     iplist: List[List[List[str]]] = json.loads(str(cmd))
     # This is required to unravel the list within list within list that AWS responds with
     for innerlist in iplist:
         for theips in innerlist:
             ips[theips[0]] = 1
     return ips
コード例 #8
0
def getBucketSize(bucketName):
    """
    Use awscli to 'ls' the bucket which will give us the total size of the bucket.
    NOTE:
        Function assumes the bucket exists and doesn't catch errors if it doesn't.
    """

    a = sh.aws('s3', 'ls', '--summarize', '--human-readable', '--recursive',
               '--no-sign-request', 's3://' + bucketName)

    # Get the last line of the output, get everything to the right of the colon, and strip whitespace
    return a.splitlines()[len(a.splitlines()) - 1].split(":")[1].strip()
コード例 #9
0
def delete_stack(stack_name):
    """
	Deletes the given cloudFormation stack
	"""
    print("Deleting stack {stack_name}".format(stack_name=stack_name))
    try:
        sh.aws(
            "cloudformation",
            "delete-stack",
            "--stack-name",
            stack_name,
        )

        #Wait for stack to be deleted
        print("Waiting for stack {stack_name} to be deleted".format(
            stack_name=stack_name))
        sh.aws("cloudformation", "wait", "stack-delete-complete",
               "--stack-name", stack_name)
        print("stack {stack_name} deleted".format(stack_name=stack_name))
    except Exception as e:
        print(traceback.format_exc())
コード例 #10
0
def create_put_activate_pipeline(template_file_path):
    """
       :param template_file_path: string.
    """
    pipelineFilePath = "file://" + template_file_path
    uniqueId = "TeraSort" + str(uuid.uuid4().fields[-1])[:5]
    print("New pipeline from pipeline template: " + pipelineFilePath)

    print("Create Pipeline")
    cr = sh.aws("datapipeline", "create-pipeline", "--name", "TeraSort-10GB", "--unique-id", uniqueId, "--tags", "key=DPLTemplate,value=TeraSort-10GB-Template-v7")
    print(cr)
    pipelineId = json.loads(str(cr))['pipelineId']

    print("Put pipeline definition")
    pr = sh.aws("datapipeline", "put-pipeline-definition", "--pipeline-id", pipelineId, "--pipeline-definition", pipelineFilePath)
    print(pr)

    print("Activate pipeline")
    ar = sh.aws("datapipeline", "activate-pipeline", "--pipeline-id", pipelineId)
    print(ar)
    print("Activated pipeline")
コード例 #11
0
ファイル: test_awsiot.py プロジェクト: jackylee1/awstools
    def test_attach_and_delete(self):
        """Test if:
          * policies can be attached to certificates
          * certificates can be attached to things
          * we can detach and delete certificates
          """
        thing_name = "Thing-" + random_string()
        policy_name = "Policy-" + random_string()

        thing = iot.create_thing(thing_name)
        certs = iot.create_keys_and_certificate()
        policy = iot.create_policy(policy_name, "Allow", "iot:Publish",
                                   "topic-" + random_string())
        arn = certs["certificateArn"]

        # attach policy and test if it is there
        iot.attach_policy(certs, policy)
        policies_data = json.loads(
            str(sh.aws("iot", "list-principal-policies", "--principal", arn)))
        attached_policy_name = policies_data["policies"][0]["policyName"]
        self.assertEqual(policy_name, attached_policy_name)

        # attach thing and test if it is there
        iot.attach_to_thing(thing, certs)
        things_data = json.loads(
            str(sh.aws("iot", "list-principal-things", "--principal", arn)))
        attached_thing_name = things_data["things"][0]
        self.assertEqual(thing_name, attached_thing_name)

        # delete certificate and make sure it is gone
        iot.delete_certificate(certs)
        try:
            iot.describe_certificate(certs)
        except Exception as e:
            self.assertTrue("%s does not exist" %
                            certs['certificateId'] in e.message)

        iot.delete_policy(policy)
        iot.delete_thing(thing)
コード例 #12
0
    def find_s3_buckets(self) -> Dict[str, int]:
        """
        Retrieve all running S3 buckets that the logged-in user owns (extracted from AWS CLI).

        :return: a dictionary with the buckets as keys.
        """
        from sh import aws
        buckets = {}
        cmd = aws('s3api', 'list-buckets', '--query', "Buckets[].Name", '--output', 'json')
        bucketjson: List[str] = json.loads(str(cmd))
        for i in bucketjson:
            buckets[i] = 1
        return buckets
コード例 #13
0
ファイル: s3utils.py プロジェクト: unl1k3ly/S3Scanner
def dumpBucket(bucketName, region):

    # Check to make sure the bucket is open
    b = checkBucket(bucketName, region)
    if b[0] != 200:
        raise ValueError("The specified bucket is not open.")

    # Dump the bucket into bucket folder
    bucketDir = './buckets/' + bucketName
    if not os.path.exists(bucketDir):
        os.makedirs(bucketDir)

    sh.aws('s3',
           'sync',
           's3://' + bucketName,
           bucketDir,
           '--no-sign-request',
           _fg=True)

    # Check if folder is empty. If it is, delete it
    if not os.listdir(bucketDir):
        # Delete empty folder
        os.rmdir(bucketDir)
コード例 #14
0
ファイル: s3utils.py プロジェクト: hajowieland/S3Scanner
def getBucketSize(bucketName):
    """
    Use awscli to 'ls' the bucket which will give us the total size of the bucket.
    NOTE:
        Function assumes the bucket exists and doesn't catch errors if it doesn't.
    """
    try:
        if awsCredsConfigured:
            a = sh.aws('s3',
                       'ls',
                       '--summarize',
                       '--human-readable',
                       '--recursive',
                       's3://' + bucketName,
                       _timeout=sizeCheckTimeout)
        else:
            a = sh.aws('s3',
                       'ls',
                       '--summarize',
                       '--human-readable',
                       '--recursive',
                       '--no-sign-request',
                       's3://' + bucketName,
                       _timeout=sizeCheckTimeout)
        # Get the last line of the output, get everything to the right of the colon, and strip whitespace
        return a.splitlines()[len(a.splitlines()) - 1].split(":")[1].strip()
    except sh.TimeoutException:
        return "Unknown Size - timeout"
    except sh.ErrorReturnCode_255 as e:
        if "AccessDenied" in e.stderr.decode("UTF-8"):
            return "AccessDenied"
        elif "AllAccessDisabled" in e.stderr.decode("UTF-8"):
            return "AllAccessDisabled"
        elif "NoSuchBucket" in e.stderr.decode("UTF-8"):
            return "NoSuchBucket"
        else:
            raise e
コード例 #15
0
def delete_images(ecrName):
    """
	Deletes the images in a given ecr repository
	"""
    print("Deleting images from {ecrName}".format(ecrName=ecrName))
    try:
        #Get the image list from repository
        imageList = sh.aws("ecr", "list-images", "--repository-name", ecrName)
        #Remove all images
        imageList = json.loads(str(imageList))
        for image in imageList['imageIds']:
            print("Removing image {imageDigest}".format(
                imageDigest=image['imageDigest']))
            sh.aws(
                "ecr",
                "batch-delete-image",
                "--repository-name",
                ecrName,
                "--image-ids",
                "imageDigest={imageDigest}".format(
                    imageDigest=image['imageDigest']),
            )
    except Exception as e:
        print(traceback.format_exc())
コード例 #16
0
ファイル: run.py プロジェクト: vpeddu/sra-pipeline
def get_fastq_files_from_s3(sra_accession):
    """
    If fastq files are present in S3, download them and return True.
    Otherwise return False.
    """
    bucket = os.getenv("BUCKET_NAME")
    dirs = ["pipeline-fastq", "pipeline-fastq-salivary"]
    found_one = False
    found_two = False
    for dir_ in dirs:
        for num in ["1", "2"]:
            key = "{}/{}/{}_{}.fastq.gz".format(dir_, sra_accession, sra_accession, num)
            if object_exists_in_s3(key):
                fprint("Downloading {}_{}.fastq.gz....".format(sra_accession, num))
                sh.aws("s3", "cp", "s3://{}/{}".format(bucket, key), ".")
                # false positive below:
                # https://github.com/PyCQA/pylint/issues/837#issuecomment-255109936
                if num == "1":  # pylint: disable=simplifiable-if-statement
                    found_one = True
                else:
                    found_two = True
        if found_one and found_two:
            return True
    return False
def transfer(df):
    """
    Function that makes the transfer to the worker

    df is passed in the meassure wrapper
    """

    bucket_id = "jfhuete-pycones2021"
    temp_path = "/tmp"
    file_path = f"{temp_path}/sample.hdf5"
    temp_files_prefix = str(random.getrandbits(32))

    # Export to hdf5
    df.export_hdf5(file_path)

    # Split hdf5 file in smaller files
    sh.split(
        f"-b{CHUNK_SIZE_MB}M",
        file_path,
        f"{temp_path}/{temp_files_prefix}"
    )

    # Upload files to S3
    temp_files = list(
        filter(lambda x: x.find(temp_files_prefix) == 0, os.listdir(temp_path))
    )
    processes = []
    for file in temp_files:
        processes.append(
            sh.aws(
                "s3api",
                "put-object",
                "--bucket",
                bucket_id,
                "--key",
                file,
                "--body",
                f"{temp_path}/{file}",
                _bg=True
            )
        )

    for process in processes:
        process.wait()

    task = read.delay(temp_files)
    task.wait()
コード例 #18
0
def main():

    diff_only = False
    if len(sys.argv) > 1 and sys.argv[1] in ("-d", "--diff"):
        diff_only = True

    src = json.load(sys.stdin, encoding='utf-8')
    # get current ipset
    newl = _get_new_ipset(src)
    curr = _get_curr_ipset(src['IPSet']['IPSetId'])
    (del_list, add_list) = _get_diff(curr, newl)

    if diff_only:
        print("Append:")
        pprint.pprint(add_list)
        print("Revoke:")
        pprint.pprint(del_list)
        exit(0)

    token = aws("waf", "get-change-token", "--output", "text").stdout

    ipset_list = {
        "IPSetId": src['IPSet']['IPSetId'],
        "ChangeToken": token.rstrip(),
        'Updates': []
    }

    for l in del_list:
        ipset_list['Updates'].append({
            "Action": "DELETE",
            "IPSetDescriptor": {
                "Type": "IPV4",
                "Value": "%s" % l
            }
        })

    for l in add_list:
        ipset_list['Updates'].append({
            "Action": "INSERT",
            "IPSetDescriptor": {
                "Type": "IPV4",
                "Value": "%s" % l
            }
        })

    print(json.dumps(ipset_list, indent=4))
コード例 #19
0
def main():
    """
	main function
	"""
    #Delete the pipeline stack
    delete_stack(pipeline_stack_name)
    #Delete the ecs stack
    delete_stack(ecs_stack_name)
    #Get the ECR name
    try:
        ecrName = sh.aws(
            "cloudformation", "describe-stacks", "--stack-name",
            vpc_stack_name, "--query",
            "Stacks[0].Outputs[?OutputKey=='ECRRepositoryName'].OutputValue",
            "--output", "text")
        ecrName = str(ecrName)
        ecrName = ecrName.replace('\n', '')
        delete_images(ecrName)
    except Exception as e:
        print(traceback.format_exc())
    #Delete the vpc stack
    delete_stack(vpc_stack_name)
コード例 #20
0
def get_stat(start, end, profile, inst_name):
    raw = sh.aws(
        '--profile',
        profile,
        'lightsail',
        'get-instance-metric-data',
        '--instance-name',
        inst_name,
        '--metric-name',
        'NetworkOut',
        '--period',
        '2700000',
        '--start-time',
        str(start),
        '--unit',
        'Bytes',
        '--statistics',
        'Sum',
        '--end-time',
        str(end),
    )
    js = json.loads(str(raw))
    return js['metricData'][0]['sum'] / 2**20
コード例 #21
0
def _get_curr_ipset(ipset_id):
    j = json.loads(
        aws("waf", "get-ip-set", "--ip-set-id", ipset_id, "--output",
            "json").stdout)
    return [c['Value'] for c in j['IPSet']['IPSetDescriptors']]
コード例 #22
0
 def find_aws_regions(self) -> List[str]:
     from sh import aws
     cmd = aws('ec2', 'describe-regions', '--output', 'json')
     regions: Dict[str, List[Dict[str, str]]] = json.loads(str(cmd))
     return [r['RegionName'] for r in regions['Regions']]
コード例 #23
0
import random as rd

ip = ''


def print_ret(s, j):
    r = str(j)
    js = json.loads(r)
    print(s)
    print(js)
    return r


while not (ip.startswith('54.238') or ip.startswith('54.95')):

    raw = sh.aws('lightsail', 'allocate-static-ip', '--profile', 'tokyo2',
                 '--static-ip-name', 'try')
    print_ret('Allocation msg:', raw)
    time.sleep(2)

    raw = sh.aws('lightsail', 'attach-static-ip', '--static-ip-name', 'try',
                 '--profile', 'tokyo2', '--instance-name', 'AWS-Tokyo-2')
    print_ret('Attach msg:', raw)
    time.sleep(2)

    raw = sh.aws('lightsail', 'get-instance', '--profile', 'tokyo2',
                 '--instance-name', 'AWS-Tokyo-2')
    r = print_ret('Instance message after allocation:', raw)
    ip = json.loads(r)['instance']['publicIpAddress']
    print(ip)
    if ip.startswith('54.238') or ip.startswith('54.95'):
        break
コード例 #24
0
ファイル: aws_download.py プロジェクト: solfung/nn_pruning
    def load_single(self, xp_name):
        key = xp_name + "/output/model.tar.gz"
        dest_file_name = self.tmp_dir / xp_name / (xp_name + "_output.tgz")
        dest_dir = dest_file_name.parent
        final_dest_file = self.dest / ("aws_" + dest_dir.name)
        if final_dest_file.exists() and False:
            print("ALREADY PROCESSED", final_dest_file)
            # Nothing to do
            return
        print("PROCESSING", key)
        dest_dir.mkdir(parents=True, exist_ok=True)

        if not dest_file_name.exists():
            try:
                print(dest_file_name)
                s3_download(self.s3client, self.sagemaker_bucket, key,
                            dest_file_name)
            except self.s3client.exceptions.ClientError as e:
                return None
        else:
            print("File was already downloaded to %s" % dest_file_name)

        print("Unpacking")
        with working_directory(dest_dir):
            sh.tar("-zxvf", dest_file_name.name)

            to_remove = []
            to_remove_local = []

            for root, dirs, files in os.walk(".", topdown=False):
                for name in files:
                    # Mark optimizer files for deletion
                    if name == "optimizer.pt":
                        to_remove += [Path(root) / name]

        print("Cleaning up")
        # Remove the unwanted files
        for f in to_remove:
            print("remove", f)
            (dest_dir / f).unlink()

        # Remove the tar.gz
        dest_file_name.unlink()

        sh.aws(
            "s3",
            "sync",
            str(dest_dir),
            "s3://lagunas-sparsity-experiments/backup/nn_pruning/output/squad_test_aws/"
            + xp_name,
            _out=sys.stdout,
            _err=sys.stderr)

        for f in to_remove_local:
            print("remove local", f)
            (dest_dir / f).unlink()

        print("Copying to final destination")
        shutil.copytree(dest_dir, final_dest_file, dirs_exist_ok=True)

        print("Removing temporary dir")
        shutil.rmtree(self.tmp_dir)

        # Special stuff : add link to compensate for bug
        for link_name in [
                "pytorch_model.bin", "training_args.bin", "vocab.txt",
                "tokenizer_config.json", "special_tokens_map.json"
        ]:
            link = final_dest_file / "checkpoint-110660" / link_name
            if not link.exists():
                link.symlink_to(final_dest_file / link_name)
コード例 #25
0
def main():
    """
	main function
	"""
    #Create the vpc stack
    print("Creating stack {vpc_stack_name}".format(
        vpc_stack_name=vpc_stack_name))
    try:
        sh.aws(
            "cloudformation", "create-stack", "--stack-name", vpc_stack_name,
            "--template-body", "file://{vpc_file_location}".format(
                vpc_file_location=vpc_file_location), "--capabilities",
            "CAPABILITY_IAM")

        #Wait for stack to be created
        wait_for_creation(vpc_stack_name)
    except Exception as e:
        print(traceback.format_exc())

    #Get the ECR Url
    try:
        ecrUrl = sh.aws(
            "cloudformation", "describe-stacks", "--stack-name",
            vpc_stack_name, "--query",
            "Stacks[0].Outputs[?OutputKey=='ECRRepositoryUrl'].OutputValue",
            "--output", "text")
    except Exception as e:
        print(traceback.format_exc())

    #Build the image and push it to ECR
    print('Building Docker image and pushing it to {ecrUrl}'.format(
        ecrUrl=ecrUrl))
    print(sh.bash("docker_image.sh", ecrUrl))

    #Create the ecs stack
    print("Creating stack {ecs_stack_name}".format(
        ecs_stack_name=ecs_stack_name))
    try:
        sh.aws(
            "cloudformation", "create-stack", "--stack-name", ecs_stack_name,
            "--template-body", "file://{ecs_file_location}".format(
                ecs_file_location=ecs_file_location))

        #Wait for stack to be created
        wait_for_creation(ecs_stack_name)
    except Exception as e:
        print(traceback.format_exc())

    #Create the pipeline stack
    print("Creating stack {pipeline_stack_name}".format(
        pipeline_stack_name=pipeline_stack_name))
    try:
        sh.aws(
            "cloudformation", "create-stack", "--stack-name",
            pipeline_stack_name, "--template-body",
            "file://{pipeline_file_location}".format(
                pipeline_file_location=pipeline_file_location), "--parameters",
            "ParameterKey=GitHubToken,ParameterValue={github_key}".format(
                github_key=github_key), "--capabilities", "CAPABILITY_IAM")

        #Wait for stack to be created
        wait_for_creation(pipeline_stack_name)
    except Exception as e:
        print(traceback.format_exc())
コード例 #26
0
#!/usr/bin/env python3
from pathlib import Path

import sh

sh.cd(Path(__file__).parent.absolute())
sh.mkdir('-p', 'datasets/modcloth/raw', 'datasets/electronics/raw')
print(
    sh.aws('s3', 'cp', 's3://seshlabucsc/df_modcloth.csv',
           './datasets/modcloth/raw'))
print(
    sh.aws('s3', 'cp', 's3://seshlabucsc/df_electronics.csv',
           './datasets/electronics/raw'))
コード例 #27
0
def delete_images_with_digest(repo, image_digests):
    if len(image_digests) != 0:
        sh.aws("ecr", "batch-delete-image", "--repository-name", repo,
               "--image-ids", image_digests)
コード例 #28
0
ファイル: run.py プロジェクト: vpeddu/sra-pipeline
def run_bowtie(sra_accession, read_handling="equal"):
    """
    run bowtie2
    sra_accession - sra accession
    read_handling - if both fastq files are of equal length
                    (indicated by value "equal", the default),
                    then both fastq files are used. If value is
                    1 or 2, then the given single fastq file is used.
    """
    viruses = os.getenv("REFERENCES").split(",")
    viruses = [x.strip() for x in viruses]
    # cmd = sh.Command("/bowtie2-2.3.4.1-linux-x86_64//bowtie2")
    bowtie2 = partial(sh.bowtie2, _piped=True, _bg_exc=False)

    for virus in viruses:
        bowtie_args = [
            "--local",
            "-p",
            os.getenv("NUM_CORES"),
            "--no-unal",
            "-x",
            "/bt2/{}".format(virus),
        ]
        if read_handling == "equal":
            bowtie_args.extend(
                [
                    "-1",
                    "{}_1.fastq.gz".format(sra_accession),
                    "-2",
                    "{}_2.fastq.gz".format(sra_accession),
                ]
            )
        elif read_handling == 1:
            bowtie_args.extend(["-U", "{}_1.fastq.gz".format(sra_accession)])
        elif read_handling == 2:
            bowtie_args.extend(["-U", "{}_2.fastq.gz".format(sra_accession)])

        fprint("processing virus {} ...".format(virus))
        if object_exists_in_s3(
            "{}/{}/{}/{}.sam".format(
                os.getenv("PREFIX"), sra_accession, virus, sra_accession
            )
        ):
            fprint(
                "output sam file already exists in s3 for virus {}, skipping...".format(
                    virus
                )
            )
        else:
            with Timer() as timer:
                for line in sh.aws(
                    bowtie2(*bowtie_args),
                    "s3",
                    "cp",
                    "-",
                    "s3://{}/{}/{}/{}/{}.sam".format(
                        os.getenv("BUCKET_NAME"),
                        os.getenv("PREFIX"),
                        sra_accession,
                        virus,
                        sra_accession,
                    ),
                    _iter=True,
                ):
                    fprint(line)
            fprint("bowtie2 duration for {}: {}".format(virus, timer.interval))