Esempio n. 1
0
def upload_ids(ids_file, redirect_from, s3_bucket, aws_key, aws_secret):
    """
    Upload file ids_file into s3_bucket and modify redirect_from key's metadata
    so redirect_from link will now point to ids_file
    redirect_from s3 key must exist.
    """
    # cache file should be named the same as target_name
    logging.info("Uploading _ids file %s to s3 (bucket: %s)" %
                 (repr(ids_file), s3_bucket))
    s3path = os.path.basename(ids_file)
    send_s3_file(ids_file,
                 s3path,
                 overwrite=True,
                 s3_bucket=s3_bucket,
                 aws_key=aws_key,
                 aws_secret=aws_secret)
    # make the file public
    s3 = connect_s3(aws_key, aws_secret)
    bucket = s3.get_bucket(s3_bucket)
    s3key = bucket.get_key(s3path)
    s3key.set_acl("public-read")
    # update permissions and redirect metadata
    k = bucket.get_key(redirect_from)
    assert k, "Can't find s3 key '%s' to set redirection" % redirect_from
    k.set_redirect("/%s" % s3path)
    k.set_acl("public-read")
    logging.info("IDs file '%s' uploaded to s3, redirection set from '%s'" %
                 (ids_file, redirect_from),
                 extra={"notify": True})
Esempio n. 2
0
def upload_ids(ids_file, redirect_from, s3_bucket, aws_key, aws_secret):
    """
    Upload file ids_file into s3_bucket and modify redirect_from key's metadata
    so redirect_from link will now point to ids_file
    redirect_from s3 key must exist.
    """
    # cache file should be named the same as target_name
    logging.info("Uploading _ids file %s to s3 (bucket: %s)" %
                 (repr(ids_file), s3_bucket))
    s3path = os.path.basename(ids_file)
    send_s3_file(ids_file,
                 s3path,
                 overwrite=True,
                 s3_bucket=s3_bucket,
                 aws_key=aws_key,
                 aws_secret=aws_secret)
    # make the file public
    s3_resource = boto3.resource('s3',
                                 aws_access_key_id=aws_key,
                                 aws_secret_access_key=aws_secret)
    actual_object = s3_resource.Object(bucket_name=s3_bucket, key=s3path)
    actual_object.Acl().put(ACL='public-read')
    # update permissions and redirect metadata
    redir_object = s3_resource.Object(bucket_name=s3_bucket, key=redirect_from)
    redir_object.load()  # check if object exists, will raise if not
    redir_object.put(WebsiteRedirectLocation='/%s' % s3path)
    redir_object.Acl().put(ACL='public-read')
    logging.info("IDs file '%s' uploaded to s3, redirection set from '%s'" %
                 (ids_file, redirect_from),
                 extra={"notify": True})
Esempio n. 3
0
 def post_index(self, target_name, index_name, job_manager, steps=["index","post"], batch_size=10000, ids=None, mode=None):
     # cache file should be named the same as target_name
     asyncio.set_event_loop(job_manager.loop)
     cache_file = os.path.join(config.CACHE_FOLDER,target_name)
     if getattr(config,"CACHE_FORMAT",None):
         cache_file += "." + config.CACHE_FORMAT
     if not os.path.exists(cache_file):
         raise FileNotFoundError("Can't find cache file '%s'" % cache_file)
     self.logger.info("Upload _id cache file '%s' to s3" % cache_file)
     try:
         s3path = os.path.basename(cache_file)
         send_s3_file(cache_file, s3path, overwrite=True)
         # make the file public
         s3 = connect_s3(config.AWS_KEY, config.AWS_SECRET) 
         bucket = s3.get_bucket(config.S3_BUCKET)
         s3key = bucket.get_key(s3path)
         s3key.set_acl("public-read")
         # update permissions and redirect metadata
         if "hg19" in s3path:
             k = bucket.get_key("myvariant_hg19_ids.xz")
         else:
             k = bucket.get_key("myvariant_hg38_ids.xz")
         k.set_redirect("/%s" % s3path)
         k.set_acl("public-read")
         self.logger.info("Cache file '%s' uploaded to s3" % cache_file, extra={"notify":True})
     except Exception as e:
         self.logger.error("Failed to upload cache file '%s' to s3: %s" % (cache_file,e), extra={"notify":True})
         raise
Esempio n. 4
0
 def gen_meta():
     pinfo["step"] = "generate meta"
     self.logger.info(
         "Generating JSON metadata for incremental release '%s'"
         % diff_version)
     # generate json metadata about this diff release
     diff_meta = {
         "type": "incremental",
         "build_version": diff_version,
         "require_version": meta["old"]["version"],
         "app_version": None,
         "metadata": {
             "url":
             aws.get_s3_url(os.path.join(
                 s3basedir, "metadata.json"),
                            aws_key=btconfig.AWS_KEY,
                            aws_secret=btconfig.AWS_SECRET,
                            s3_bucket=btconfig.S3_DIFF_BUCKET)
         }
     }
     diff_file = "%s.json" % diff_version
     diff_meta_path = os.path.join(btconfig.DIFF_PATH,
                                   diff_file)
     json.dump(diff_meta,
               open(diff_meta_path, "w"),
               indent=True)
     # get a timestamp from metadata to force lastdmodifed header
     # timestamp is when the new collection was built (not when the diff
     # was generated, as diff can be generated way after). New collection's
     # timestamp remains a good choice as data (diff) relates to that date anyway
     metadata = json.load(
         open(os.path.join(diff_folder, "metadata.json")))
     local_ts = dtparse(metadata["_meta"]["timestamp"])
     utc_epoch = str(int(time.mktime(local_ts.timetuple())))
     s3key = os.path.join(btconfig.S3_DIFF_FOLDER, diff_file)
     aws.send_s3_file(diff_meta_path,
                      s3key,
                      aws_key=btconfig.AWS_KEY,
                      aws_secret=btconfig.AWS_SECRET,
                      s3_bucket=btconfig.S3_DIFF_BUCKET,
                      metadata={"lastmodified": utc_epoch},
                      overwrite=True,
                      permissions="public-read")
     url = aws.get_s3_url(s3key,
                          aws_key=btconfig.AWS_KEY,
                          aws_secret=btconfig.AWS_SECRET,
                          s3_bucket=btconfig.S3_DIFF_BUCKET)
     self.logger.info(
         "Incremental release metadata published for version: '%s'"
         % url)
     publish_data_version(diff_version)
     self.logger.info("Registered version '%s'" %
                      (diff_version))
Esempio n. 5
0
        def do(index):
            def snapshot_launched(f):
                try:
                    self.logger.info("Snapshot launched: %s" % f.result())
                except Exception as e:
                    self.logger.error("Error while lauching snapshot: %s" % e)
                    fut.set_exception(e)

            if "snapshot" in steps:
                pinfo = {
                    "category": "index",
                    "source": index,
                    "step": "snapshot",
                    "description": es_snapshot_host
                }
                self.logger.info(
                    "Creating snapshot for index '%s' on host '%s', repository '%s'"
                    % (index, es_snapshot_host, btconfig.SNAPSHOT_REPOSITORY))
                job = yield from self.job_manager.defer_to_thread(
                    pinfo,
                    partial(idxr.snapshot,
                            btconfig.SNAPSHOT_REPOSITORY,
                            snapshot,
                            mode=mode))
                job.add_done_callback(snapshot_launched)
                yield from job
                while True:
                    state = get_status()
                    if state in ["INIT", "IN_PROGRESS", "STARTED"]:
                        yield from asyncio.sleep(
                            getattr(btconfig, "MONITOR_SNAPSHOT_DELAY", 60))
                    else:
                        if state == "SUCCESS":
                            # if "meta" is required, it will set the result later
                            if not "meta" in steps:
                                fut.set_result(state)
                            self.logger.info("Snapshot '%s' successfully created (host: '%s', repository: '%s')" % \
                                    (snapshot,es_snapshot_host,btconfig.SNAPSHOT_REPOSITORY),extra={"notify":True})
                        else:
                            e = IndexerException("Snapshot '%s' failed: %s" %
                                                 (snapshot, state))
                            fut.set_exception(e)
                            self.logger.error("Failed creating snapshot '%s' (host: %s, repository: %s), state: %s" % \
                                    (snapshot,es_snapshot_host,btconfig.SNAPSHOT_REPOSITORY,state),extra={"notify":True})
                            raise e
                        break

            if "meta" in steps:
                try:
                    esb = DocESBackend(idxr)
                    self.logger.info(
                        "Generating JSON metadata for full release '%s'" %
                        esb.version)
                    repo = idxr._es.snapshot.get_repository(
                        btconfig.URL_SNAPSHOT_REPOSITORY)
                    # generate json metadata about this diff release
                    full_meta = {
                        "type": "full",
                        "build_version": esb.version,
                        "app_version": None,
                        "metadata": {
                            "repository": repo,
                            "snapshot_name": snapshot
                        }
                    }
                    assert esb.version, "Can't retrieve a version from index '%s'" % index
                    build_info = "%s.json" % esb.version
                    build_info_path = os.path.join(btconfig.DIFF_PATH,
                                                   build_info)
                    json.dump(full_meta, open(build_info_path, "w"))
                    # override lastmodified header with our own timestamp
                    local_ts = dtparse(
                        idxr.get_mapping_meta()["_meta"]["timestamp"])
                    utc_epoch = str(int(time.mktime(local_ts.timetuple())))
                    # it's a full release, but all build info metadata (full, incremental) all go
                    # to the diff bucket (this is the main entry)
                    s3key = os.path.join(btconfig.S3_DIFF_FOLDER, build_info)
                    aws.send_s3_file(build_info_path,
                                     s3key,
                                     aws_key=btconfig.AWS_KEY,
                                     aws_secret=btconfig.AWS_SECRET,
                                     s3_bucket=btconfig.S3_DIFF_BUCKET,
                                     metadata={"lastmodified": utc_epoch},
                                     overwrite=True)
                    url = aws.get_s3_url(s3key,
                                         aws_key=btconfig.AWS_KEY,
                                         aws_secret=btconfig.AWS_SECRET,
                                         s3_bucket=btconfig.S3_DIFF_BUCKET)
                    self.logger.info(
                        "Full release metadata published for version: '%s'" %
                        url)
                    publish_data_version(esb.version)
                    self.logger.info("Registered version '%s'" % (esb.version))
                    fut.set_result("SUCCESS")
                except Exception as e:
                    self.logger.error(
                        "Error while publishing metadata for snapshot '%s': %s"
                        % (snapshot, e))
                    fut.set_exception(e)
Esempio n. 6
0
def publish_data_version(s3_bucket,
                         s3_folder,
                         version_info,
                         update_latest=True,
                         aws_key=None,
                         aws_secret=None):
    """
    Update remote files:
        - versions.json: add version_info to the JSON list
                        or replace if arg version_info is a list
        - latest.json: update redirect so it points to latest version url

    "versions" is dict such as::

        {"build_version":"...",         # version name for this release/build
         "require_version":"...",       # version required for incremental update
         "target_version": "...",       # version reached once update is applied
         "type" : "incremental|full"    # release type
         "release_date" : "...",        # ISO 8601 timestamp, release date/time
         "url": "http...."}             # url pointing to release metadata
    """
    # register version
    versionskey = os.path.join(s3_folder, "%s.json" % VERSIONS)
    try:
        versions = json.loads(
            aws.get_s3_file_contents(versionskey,
                                     aws_key=aws_key,
                                     aws_secret=aws_secret,
                                     s3_bucket=s3_bucket).decode())
    except (FileNotFoundError, json.JSONDecodeError):
        versions = {"format": "1.0", "versions": []}
    if isinstance(version_info, list):
        versions["versions"] = version_info
    else:
        # used to check duplicates
        tmp = {}
        # [tmp.setdefault(e["build_version"], e) for e in versions["versions"]]   # TODO: remove this line
        for e in versions["versions"]:
            tmp.setdefault(e["build_version"], e)
        tmp[version_info["build_version"]] = version_info
        # order by build_version
        versions["versions"] = sorted(tmp.values(),
                                      key=lambda e: e["build_version"])

    aws.send_s3_file(None,
                     versionskey,
                     content=json.dumps(versions, indent=True),
                     aws_key=aws_key,
                     aws_secret=aws_secret,
                     s3_bucket=s3_bucket,
                     content_type="application/json",
                     overwrite=True)

    # update latest
    if not isinstance(version_info, list) and update_latest:
        latestkey = os.path.join(s3_folder, "%s.json" % LATEST)
        newredir = os.path.join(
            "/", s3_folder, "{}.json".format(version_info["build_version"]))
        # the consensus is that we will upload the data and have the
        # redirection, for record-keep purpose
        aws.send_s3_file(None,
                         latestkey,
                         content=json.dumps(version_info["build_version"],
                                            indent=True),
                         content_type="application/json",
                         aws_key=aws_key,
                         aws_secret=aws_secret,
                         s3_bucket=s3_bucket,
                         overwrite=True,
                         redirect=newredir)
Esempio n. 7
0
def publish_data_version(s3_folder,
                         version_info,
                         env=None,
                         update_latest=True):
    """
    Update remote files:
    - versions.json: add version_info to the JSON list
                     or replace if arg version_info is a list
    - latest.json: update redirect so it points to latest version url
    "versions" is dict such as:
        {"build_version":"...",         # version name for this release/build
         "require_version":"...",       # version required for incremental update
         "target_version": "...",       # version reached once update is applied
         "type" : "incremental|full"    # release type
         "release_date" : "...",        # ISO 8601 timestamp, release date/time
         "url": "http...."}             # url pointing to release metadata
    """
    # register version
    versionskey = os.path.join(s3_folder, "%s.json" % VERSIONS)
    try:
        versions = aws.get_s3_file(versionskey,
                                   return_what="content",
                                   aws_key=config.AWS_KEY,
                                   aws_secret=config.AWS_SECRET,
                                   s3_bucket=config.S3_RELEASE_BUCKET)
        versions = json.loads(versions.decode())  # S3 returns bytes
    except (FileNotFoundError, json.JSONDecodeError):
        versions = {"format": "1.0", "versions": []}
    if type(version_info) == list:
        versions["versions"] = version_info
    else:
        # used to check duplicates
        tmp = {}
        [tmp.setdefault(e["build_version"], e) for e in versions["versions"]]
        tmp[version_info["build_version"]] = version_info
        # order by build_version
        versions["versions"] = sorted(tmp.values(),
                                      key=lambda e: e["build_version"])

    aws.send_s3_file(None,
                     versionskey,
                     content=json.dumps(versions, indent=True),
                     aws_key=config.AWS_KEY,
                     aws_secret=config.AWS_SECRET,
                     s3_bucket=config.S3_RELEASE_BUCKET,
                     content_type="application/json",
                     overwrite=True)

    # update latest
    if type(version_info) != list and update_latest:
        latestkey = os.path.join(s3_folder, "%s.json" % LATEST)
        key = None
        try:
            key = aws.get_s3_file(latestkey,
                                  return_what="key",
                                  aws_key=config.AWS_KEY,
                                  aws_secret=config.AWS_SECRET,
                                  s3_bucket=config.S3_RELEASE_BUCKET)
        except FileNotFoundError:
            pass
        aws.send_s3_file(None,
                         latestkey,
                         content=json.dumps(version_info["build_version"],
                                            indent=True),
                         content_type="application/json",
                         aws_key=config.AWS_KEY,
                         aws_secret=config.AWS_SECRET,
                         s3_bucket=config.S3_RELEASE_BUCKET,
                         overwrite=True)
        if not key:
            key = aws.get_s3_file(latestkey,
                                  return_what="key",
                                  aws_key=config.AWS_KEY,
                                  aws_secret=config.AWS_SECRET,
                                  s3_bucket=config.S3_RELEASE_BUCKET)
        newredir = os.path.join("/", s3_folder,
                                "%s.json" % version_info["build_version"])
        key.set_redirect(newredir)