Python get_src_build Exemples, biothings.utils.hub_db.get_src_build Python Exemples

Exemple #1

0

Afficher le fichier

    def cleanup(
        self,
        env=None,  # a snapshot environment describing a repository
        keep=3,  # the number of most recent snapshots to keep in one group
        group_by="build_config",  # the attr of which its values form groups
        dryrun=True,  # display the snapshots to be deleted without deleting them
        **
        filters  # a set of criterions to limit which snapshots are to be cleaned
    ):
        """ Delete past snapshots and keep only the most recent ones.

        Examples:
            >>> snapshot_cleanup()
            >>> snapshot_cleanup("s3_outbreak")
            >>> snapshot_cleanup("s3_outbreak", keep=0)
        """

        snapshots = cleaner.find(  # filters support dotfield.
            get_src_build(), env, keep, group_by, **filters)

        if dryrun:
            return '\n'.join(
                ("-" * 75, cleaner.plain_text(snapshots), "-" * 75,
                 "DRYRUN ONLY - APPLY THE ACTIONS WITH:",
                 "   > snapshot_cleanup(..., dryrun=False)"))

        # return the number of snapshots successfully deleted
        return cleaner.delete(get_src_build(), snapshots, self)

Exemple #2

0

Afficher le fichier

        def _snapshot(snapshot):
            x = CumulativeResult()
            build_doc = self._doc(index)
            cfg = self.repcfg.format(build_doc)
            for step in ("pre", "snapshot", "post"):
                state = registrar.dispatch(step)  # _TaskState Class
                state = state(get_src_build(), build_doc.get("_id"))
                logging.info(state)
                state.started()

                job = yield from self.job_manager.defer_to_thread(
                    self.pinfo.get_pinfo(step, snapshot),
                    partial(getattr(self, state.func), cfg, index, snapshot))
                try:
                    dx = yield from job
                    dx = StepResult(dx)

                except Exception as exc:
                    logging.exception(exc)
                    state.failed({}, exc)
                    raise exc
                else:
                    merge(x.data, dx.data)
                    logging.info(dx)
                    logging.info(x)
                    state.succeed({snapshot: x.data}, res=dx.data)
            return x

Exemple #3

0

Afficher le fichier

 def update_metadata(self,
                     indexer_env,
                     index_name,
                     build_name=None,
                     _meta=None):
     """
     Update _meta for index_name, based on build_name (_meta directly
     taken from the src_build document) or _meta
     """
     idxkwargs = self[indexer_env]
     # 1st pass we get the doc_type (don't want to ask that on the signature...)
     indexer = create_backend(
         (idxkwargs["es_host"], index_name, None)).target_esidxer
     m = indexer._es.indices.get_mapping(index_name)
     assert len(m[index_name]["mappings"]) == 1, "Found more than one doc_type: " + \
         "%s" % m[index_name]["mappings"].keys()
     doc_type = list(m[index_name]["mappings"].keys())[0]
     # 2nd pass to re-create correct indexer
     indexer = create_backend(
         (idxkwargs["es_host"], index_name, doc_type)).target_esidxer
     if build_name:
         build = get_src_build().find_one({"_id": build_name})
         assert build, "No such build named '%s'" % build_name
         _meta = build.get("_meta")
     assert _meta is not None, "No _meta found"
     return indexer.update_mapping_meta({"_meta": _meta})

Exemple #4

0

Afficher le fichier

 def __init__(self, *args, **kwargs):
     super(IndexManager, self).__init__(*args, **kwargs)
     self.src_build = get_src_build()
     self.indexers = {}
     self.es_config = {}
     self.t0 = time.time()
     self.prepared = False
     self.log_folder = LOG_FOLDER
     self.timestamp = datetime.now()
     self.setup()

Exemple #5

0

Afficher le fichier

Fichier : snapshooter.py Projet : sirloon/biothings.api

 def clean_stale_status(self):
     src_build = get_src_build()
     for build in src_build.find():
         for job in build.get("jobs", []):
             if job.get("status", "").endswith("snapshotting"):
                 logging.warning(
                     "Found stale build '%s', marking snapshot status as 'canceled'"
                     % build["_id"])
                 job["status"] = "canceled"
         src_build.replace_one({"_id": build["_id"]}, build)

Exemple #6

0

Afficher le fichier

Fichier : syncer.py Projet : zcqian/biothings.api

 def clean_stale_status(self):
     src_build = get_src_build()
     for build in src_build.find():
         dirty = False
         for job in build.get("jobs", []):
             if job.get("status") == "syncing":
                 logging.warning(
                     "Found stale build '%s', marking sync status as 'canceled'"
                     % build["_id"])
                 job["status"] = "canceled"
                 dirty = True
         if dirty:
             src_build.replace_one({"_id": build["_id"]}, build)

Exemple #7

0

Afficher le fichier

 def post_publish(self, s3_folder, old_db_col_names, new_db_col_names,
                  diff_folder, release_folder, steps, s3_bucket, *args,
                  **kwargs):
     bdoc = get_src_build().find_one({"_id": new_db_col_names})
     assert bdoc, "Can't find build doc associated with index '%s' (should be named the same)" % new_db_col_names
     ids_file = export_ids(new_db_col_names)
     redir = "%s_ids.xz" % bdoc["build_config"]["assembly"]
     if "demo" in new_db_col_names:
         redir = "demo_%s" % redir
     upload_ids(ids_file,
                redir,
                s3_bucket=config.IDS_S3_BUCKET,
                aws_key=config.AWS_KEY,
                aws_secret=config.AWS_SECRET)

Exemple #8

0

Afficher le fichier

Fichier : indexer.py Projet : Quiltomics/myvariant.info

 def post_publish(self, snapshot, index, *args, **kwargs):
     # assuming build name == index name, and assuming demo index has
     # "demo" in its name...
     # assuming full index, not demo, guess name now
     bdoc = get_src_build().find_one({"_id" : index})
     assert bdoc, "Can't find build doc associated with index '%s' (should be named the same)" % index
     ids_file = export_ids(index)
     if "hg19" in index or "hg19" in snapshot:
         redir = "hg19_ids.xz"
     else:
         redir = "hg38_ids.xz"
     if "demo" in index or "demo" in snapshot:
         redir = "demo_%s" % redir
     upload_ids(ids_file, redir, 
             s3_bucket=config.IDS_S3_BUCKET,
             aws_key=config.AWS_KEY,
             aws_secret=config.AWS_SECRET)

Exemple #9

0

Afficher le fichier

Fichier : indexer.py Projet : biothings/biothings.api

        async def _update_meta(_meta):
            env = self.register[indexer_env]
            async with AsyncElasticsearch(**env["args"]) as client:

                doc_type = None
                if int((await
                        client.info())['version']['number'].split('.')[0]) < 7:
                    mappings = client.indices.get_mapping(index_name)
                    mappings = mappings[index_name]["mappings"]
                    doc_type = next(iter(mappings.keys()))

                if _meta is None:
                    _id = build_name or index_name  # best guess
                    build = get_src_build().find_one({"_id": _id})
                    _meta = (build or {}).get("_meta")

                return await client.indices.put_mapping(body=dict(_meta=_meta),
                                                        index=index_name,
                                                        doc_type=doc_type)

Exemple #10

0

Afficher le fichier

 def load_build(self):
     """
     Load cold and hot build documents.
     Index settings are the one declared in the hot build doc.
     """
     src_build = get_src_build()
     # we don't want to reload build docs if they are already loaded
     # so we can temporarily override values when dealing with cold/hot collection
     # (kind of a hack, not really clean, but...)
     if self.hot_build_doc and self.cold_build_doc and self.build_doc:
         self.logger.debug("Build documents already loaded")
         return
     self.hot_build_doc = src_build.find_one({'_id': self.hot_target_name})
     # search the cold collection definition
     assert "build_config" in self.hot_build_doc and "cold_collection" in self.hot_build_doc["build_config"], \
         "Can't find cold_collection field in build_config"
     self.cold_target_name = self.hot_build_doc["build_config"][
         "cold_collection"]
     self.cold_build_doc = src_build.find_one(
         {'_id': self.cold_target_name})
     # we'll register everything (status) on the hot one
     self.build_doc = self.hot_build_doc
     assert self.cold_build_doc, "Can't find build document associated to '%s'" % self.cold_target_name
     assert self.hot_build_doc, "Can't find build document associated to '%s'" % self.hot_target_name
     self.cold_cfg = self.cold_build_doc.get("build_config")
     self.hot_cfg = self.hot_build_doc.get("build_config")
     if self.hot_cfg or not self.cold_cfg:
         self.build_config = self.hot_cfg
         if "doc_type" not in self.hot_cfg:
             raise ValueError("Missing 'doc_type' in build config")
         self.doc_type = self.hot_cfg["doc_type"]
         self.num_shards = self.hot_cfg.get("num_shards", 10)  # optional
         self.num_shards = self.num_shards and int(
             self.num_shards) or self.num_shards
         self.num_replicas = self.hot_cfg.get("num_replicas", 0)  # optional
         self.num_replicas = self.num_replicas and int(
             self.num_replicas) or self.num_replicas
         self.conf_name = self.hot_cfg["name"]
     else:
         raise ValueError(
             "Cannot find build config associated to '%s' or '%s'" %
             (self.hot_target_name, self.cold_target_name))
     return (self.cold_cfg, self.hot_cfg)

Exemple #11

0

Afficher le fichier

Fichier : indexer.py Projet : biothings/biothings.api

    def extract_coldbuild(self):
        cold_target = self.build_config["cold_collection"]
        cold_build_doc = get_src_build().find_one({'_id': cold_target})
        cold_build_doc = _BuildDoc(cold_build_doc)

        cold_build_doc["_id"] = self.build_name  # *
        cold_build_doc["mapping"].update(self["mapping"])  # combine mapping
        merge_src_build_metadata([cold_build_doc, self])  # combine _meta

        # * About State Updates
        # All updates are diverted to the hot collection.
        # Indices & snapshots are only registered there.

        if self.build_config.get("num_shards"):
            cold_build_doc.build_config["num_shards"] = \
                self.build_config["num_shards"]
        if self.build_config.get("num_replicas"):
            cold_build_doc.build_config["num_replicas"] =  \
                self.build_config["num_replicas"]
        return cold_build_doc

Exemple #12

0

Afficher le fichier

Fichier : indexer.py Projet : biothings/biothings.api

    def __init__(self, *args, **kwargs):
        """
        An example of config dict for this module.
        {
            "indexer_select": {
                None: "hub.dataindex.indexer.DrugIndexer", # default
                "build_config.cold_collection" : "mv.ColdHotVariantIndexer",
            },
            "env": {
                "prod": {
                    "host": "localhost:9200",
                    "indexer": {
                        "args": {
                            "timeout": 300,
                            "retry_on_timeout": True,
                            "max_retries": 10,
                        },
                        "bulk": {
                            "chunk_size": 50
                            "raise_on_exception": False
                        },
                        "concurrency": 3
                    },
                    "index": [
                        # for information only, only used in index_info
                        {"index": "mydrugs_current", "doc_type": "drug"},
                        {"index": "mygene_current", "doc_type": "gene"}
                    ],
                },
                "dev": { ... }
            }
        }
        """
        super().__init__(*args, **kwargs)
        self._srcbuild = get_src_build()
        self._config = {}

        self.logger, self.logfile = get_logger('indexmanager')

Exemple #13

0

Afficher le fichier

 def load_build(self, target_name=None):
     '''Load build info from src_build collection.'''
     target_name = target_name or self.target_name
     src_build = get_src_build()
     self.build_doc = src_build.find_one({'_id': target_name})
     assert self.build_doc, "Can't find build document associated to '%s'" % target_name
     _cfg = self.build_doc.get("build_config")
     if _cfg:
         self.build_config = _cfg
         #if not "doc_type" in _cfg:
         #    raise ValueError("Missing 'doc_type' in build config")
         self.doc_type = _cfg.get("doc_type")
         self.num_shards = _cfg.get("num_shards", 10)  # optional
         self.num_shards = self.num_shards and int(
             self.num_shards) or self.num_shards
         self.num_replicas = _cfg.get("num_replicas", 0)  # optional
         self.num_replicas = self.num_replicas and int(
             self.num_replicas) or self.num_replicas
         self.conf_name = _cfg["name"]
     else:
         raise ValueError("Cannot find build config associated to '%s'" %
                          target_name)
     return _cfg

Exemple #14

0

Afficher le fichier

Fichier : indexer.py Projet : biothings/biothings.api

    def cleanup(self, env=None, keep=3, dryrun=True, **filters):
        """ Delete old indices except for the most recent ones.

        Examples:
            >>> index_cleanup()
            >>> index_cleanup("production")
            >>> index_cleanup("local", build_config="demo")
            >>> index_cleanup("local", keep=0)
            >>> index_cleanup(_id="<elasticsearch_index>")
        """
        if not env and not dryrun:  # low specificity, unsafe.
            raise ValueError('Missing argument "env".')

        cleaner = Cleaner(get_src_build(), self, self.logger)
        cleanups = cleaner.find(env, keep, **filters)

        if dryrun:
            return '\n'.join(("-" * 75, cleaner.plain_text(cleanups), "-" * 75,
                              "DRYRUN ONLY - APPLY THE ACTIONS WITH:",
                              "   > index_cleanup(..., dryrun=False)"))

        job = asyncio.ensure_future(cleaner.clean(cleanups))
        job.add_done_callback(self.logger.info)
        return job

Exemple #15

0

Afficher le fichier

 def clean_stale_status(self):
     registrar.audit(get_src_build(), logging)

Exemple #16

0

Afficher le fichier

def create_backend(db_col_names, name_only=False, follow_ref=False, **kwargs):
    """
    Guess what's inside 'db_col_names' and return the corresponding backend.
    - It could be a string (will first check for an src_build doc to check
      a backend_url field, if nothing there, will lookup a mongo collection
      in target database)
    - or a tuple("target|src","col_name")
    - or a ("mongodb://*****:*****@host","db","col_name") URI.
    - or a ("es_host:port","index_name","doc_type")
    If name_only is true, just return the name uniquely identifying the collection or index
    URI connection.
    """
    col = None
    db = None
    is_mongo = True
    if type(db_col_names) == str:
        # first check build doc, if there's backend_url key, we'll use it instead of
        # direclty using db_col_names as target collection (see LinkDataBuilder)
        bdoc = get_src_build().find_one({"_id": db_col_names})
        if follow_ref and bdoc and bdoc.get(
                "backend_url") and bdoc["backend_url"] != db_col_names:
            return create_backend(bdoc["backend_url"],
                                  name_only=name_only,
                                  follow_ref=follow_ref,
                                  **kwargs)
        else:
            db = mongo.get_target_db()
            col = db[db_col_names]
            # normalize params
            db_col_names = [
                "%s:%s" % (db.client.HOST, db.client.PORT), db.name, col.name
            ]
    elif db_col_names[0].startswith("mongodb://"):
        assert len(
            db_col_names
        ) == 3, "Missing connection information for %s" % repr(db_col_names)
        conn = mongo.MongoClient(db_col_names[0])
        db = conn[db_col_names[1]]
        col = db[db_col_names[2]]
        # normalize params
        db_col_names = [
            "%s:%s" % (db.client.HOST, db.client.PORT), db.name, col.name
        ]
    elif len(db_col_names) == 3 and ":" in db_col_names[0]:
        is_mongo = False
        idxr = ESIndexer(index=db_col_names[1],
                         doc_type=db_col_names[2],
                         es_host=db_col_names[0],
                         **kwargs)
        db = idxr
        col = db_col_names[1]
    else:
        assert len(
            db_col_names
        ) == 2, "Missing connection information for %s" % repr(db_col_names)
        db = db_col_names[0] == "target" and mongo.get_target_db(
        ) or mongo.get_src_db()
        col = db[db_col_names[1]]
        # normalize params (0:host, 1:port)
        db_col_names = [
            "%s:%s" % (db.client.address[0], db.client.address[1]), db.name,
            col.name
        ]
    assert col is not None, "Could not create collection object from %s" % repr(
        db_col_names)
    if name_only:
        if is_mongo:
            return "mongo_%s_%s_%s" % (db_col_names[0].replace(
                ":", "_"), db_col_names[1], db_col_names[2])
        else:
            return "es_%s_%s_%s" % (db_col_names[0].replace(
                ":", "_"), db_col_names[1], db_col_names[2])
    else:
        if is_mongo:
            return DocMongoBackend(db, col)
        else:
            return DocESBackend(db)

Exemple #17

0

Afficher le fichier

Fichier : ids.py Projet : sirloon/biothings.api

def export_ids(col_name):
    """
    Export all _ids from collection named col_name.
    If col_name refers to a build where a cold_collection is defined,
    will also extract _ids and sort/uniq them to have the full list of _ids
    of the actual merged (cold+hot) collection
    Output file is stored in DATA_EXPORT_FOLDER/ids,
    defaulting to <DATA_ARCHIVE_ROOT>/export/ids. Output filename is
    returned as the end, if successful.
    """
    # prepare output directory
    DATA_EXPORT_FOLDER = getattr(btconfig,"DATA_EXPORT_FOLDER",None)
    if not DATA_EXPORT_FOLDER:
        DATA_EXPORT_FOLDER = os.path.join(btconfig.DATA_ARCHIVE_ROOT,"export")
    ids_export_folder = os.path.join(DATA_EXPORT_FOLDER,"ids")
    if not os.path.exists(ids_export_folder):
        logging.debug("Creating export/ids folder: %s" % ids_export_folder)
        os.makedirs(ids_export_folder)
    build = get_src_build().find_one({"_id":col_name})
    cold = None
    if build:
        col = get_target_db()[col_name]
        if build.get("build_config",{}).get("cold_collection"):
            cold_name = build["build_config"]["cold_collection"]
            cold = get_target_db()[cold_name]
            logging.info("Found a cold collection '%s' associated to '%s'" % (cold_name,col_name))
    else:
        # it's a src
        col = get_src_db()[col_name]
    
    # first iterate over all _ids. This will potentially update underlying _id cache it's not valid anymore,
    # so we're sure to work with latest data. If cache is valid, this will be pretty fast
    logging.info("Screening _ids in collection '%s'" % col.name)
    for _id in id_feeder(col,validate_only=True):
        pass
    # now accessing cache
    col_ids_cache = get_cache_filename(col.name)
    assert os.path.exists(col_ids_cache)
    logging.info("Now using cache file %s" % col_ids_cache)
    if cold:
        logging.info("Screening _ids in cold collection '%s'" % cold.name)
        for _id in id_feeder(cold,validate_only=True):
            pass
        # now accessing cache
        cold_ids_cache = get_cache_filename(cold.name)
        assert os.path.exists(cold_ids_cache)
        logging.info("Now using cache file %s" % cold_ids_cache)
    outfn = os.path.join(ids_export_folder,"%s_ids.xz" % col_name)
    # NOTE: can't use anyfile to open cache files and send _id through pipes
    # because it would load _id in memory (unless using hacks) so use cat (and
    # existing uncompressing ones, like gzcat/xzcat/...) to fully run the pipe
    # on the shell
    if cold:
        fout = anyfile(outfn,"wb")
        colext = os.path.splitext(col_ids_cache)[1]
        coldext = os.path.splitext(cold_ids_cache)[1]
        assert colext == coldext, "Hot and Cold _id cache are compressed differently (%s and %s), it should be the same" % (coldext,coldext)
        comp = colext.replace(".","")
        supportedcomps = ["xz","gz",""] # no compression allowed as well
        assert comp in supportedcomps, "Compression '%s' isn't supported (%s)" % (comp,supportedcomps)
        # IDs sent to pipe's input (sort) then compress it (xz)
        pcat = subprocess.Popen(["%scat" % comp, col_ids_cache, cold_ids_cache],stdout=subprocess.PIPE)
        psort = subprocess.Popen(["sort","-u"],stdin=pcat.stdout,stdout=subprocess.PIPE,universal_newlines=True)
        pcat.stdout.close() # will raise end of pipe error when finished
        if comp:
            pcomp = subprocess.Popen(["xz","-c"],stdin=psort.stdout,stdout=fout)
        else:
            # just print stdin to stdout
            pcomp = subprocess.Popen(["tee"],stdin=psort.stdout,stdout=fout)
        psort.stdout.close()
        try:
            logging.info("Running pipe to compute list of unique _ids")
            (out,err) = pcomp.communicate() # run the pipe! (blocking)
            if err:
                raise Exception(err)
        except Exception as e:
            logging.error("Error while running pipe to export _ids: %s" % e)
            # make sure to clean empty or half processed files
            try:
                os.unlink(outfn)
            finally:
                pass
            raise
    else:
        logging.info("Copying cache _id file")
        try:
            shutil.copyfile(col_ids_cache,outfn)
        except Exception as e:
            logging.error("Error while exporting _ids: %s" % e)
            # make sure to clean empty or half processed files
            try:
                os.unlink(outfn)
            finally:
                pass
            raise

    logging.info("Done exporting _ids to '%s'" % outfn)
    return outfn

Exemple #18

0

Afficher le fichier

Fichier : snapshooter.py Projet : ravila4/biothings.api

 def pending_snapshot(build_name):
     src_build = get_src_build()
     src_build.update({"_id": build_name},
                      {"$addToSet": {
                          "pending": "snapshot"
                      }})

Exemple #19

0

Afficher le fichier

Fichier : syncer.py Projet : zcqian/biothings.api

    def register_status(self, status, transient=False, init=False, **extra):
        src_build = get_src_build()
        job_info = {
            'status': status,
            'step_started_at': datetime.now().astimezone(),
            'logfile': self.logfile,
        }
        # to select correct diff sub-record (1 collection can be diffed with multiple others)
        diff_key = "%s" % self.old.target_name
        # once in diff, select correct sync sub-record (1 diff can be applied to different backend)
        # replace dots as hostname can have dots which could be interpreted as dotted field by mongo
        # also remove doc_type (which can be sometimes None if hub deals with multiple APIs,
        # and is not useful in distinguishing where the diff was applid since there's only one
        # doc type allowed now since ES6 (last element in self.target_backend is doc_type)
        sync_key = "-".join(self.target_backend[:-1]).replace(".", "-")
        sync_info = {sync_key: {}}
        if transient:
            # record some "in-progress" information
            job_info['pid'] = os.getpid()
        else:
            # only register time when it's a final state
            job_info["time"] = timesofar(self.ti)
            t1 = round(time.time() - self.ti, 0)
            job_info["time_in_s"] = t1
            sync_info[sync_key]["created_at"] = datetime.now().astimezone()
        if "sync" in extra:
            sync_info[sync_key].update(extra["sync"])
        if "job" in extra:
            job_info.update(extra["job"])
        # since the base is the merged collection, we register info there
        # as the new collection (diff results are associated to the most recent colleciton)
        build = src_build.find_one({'_id': self.new.target_name})
        if not build:
            self.logger.info(
                "Can't find build document '%s', no status to register" %
                self.new.target_name)
            return
        assert "diff" in build and diff_key in build[
            "diff"], "Missing previous diff information in build document"
        if init:
            # init timer for this step
            self.ti = time.time()
            src_build.update({'_id': self.new.target_name},
                             {"$push": {
                                 'jobs': job_info
                             }})
            # now refresh/sync
            build = src_build.find_one({'_id': self.new.target_name})
        else:
            # merge extra at root level
            # (to keep building data...) and update the last one
            # (it's been properly created before when init=True)
            build["jobs"] and build["jobs"][-1].update(job_info)

            def merge_info(target, d):
                if "__REPLACE__" in d.keys():
                    d.pop("__REPLACE__")
                    target = d
                else:
                    for k, v in d.items():
                        if isinstance(v, dict):
                            if k in target:
                                target[k] = merge_info(target[k], v)
                            else:
                                v.pop("__REPLACE__", None)
                                # merge v with "nothing" just to make sure to remove any "__REPLACE__"
                                v = merge_info({}, v)
                                target[k] = v
                        else:
                            target[k] = v
                return target

            sync_info = {
                "sync":
                merge_info(build["diff"][diff_key].get("sync", {}), sync_info)
            }
            build["diff"][diff_key].update(sync_info)
            #src_build.update({'_id': build["_id"]}, {"$set": index_info})
            src_build.replace_one({"_id": build["_id"]}, build)

Exemple #20

0

Afficher le fichier

Fichier : snapshooter.py Projet : ravila4/biothings.api

 def get_build_doc(index_name):
     src_build = get_src_build()
     doc = src_build.find_one({"index." + index_name: {"$exists": True}})
     if not doc:
         logging.error("No build associated with index %s.", index_name)
     return doc

Exemple #21

0

Afficher le fichier

def set_pending_to_publish(col_name):
    src_build = get_src_build()
    src_build.update({"_id": col_name}, {"$addToSet": {"pending": "publish"}})

Exemple #22

0

Afficher le fichier

Fichier : indexer.py Projet : biothings/biothings.api

 def clean_stale_status(self):
     IndexJobStateRegistrar.prune(get_src_build())

Exemple #23

0

Afficher le fichier

Fichier : indexer.py Projet : biothings/biothings.api

 def __init__(self, indexer):
     self.indexer = indexer
     self.state = self.state(get_src_build(),
                             indexer.build_name,
                             indexer.es_index_name,
                             logfile=indexer.logfile)

Exemple #24

0

Afficher le fichier

Fichier : snapshooter.py Projet : sirloon/biothings.api

 def poll(self, state, func):
     super().poll(state, func, col=get_src_build())

Exemple #25

0

Afficher le fichier

Fichier : snapshooter.py Projet : sirloon/biothings.api

 def collection(self):
     return get_src_build()

Exemple #26

0

Afficher le fichier

    def register_status(self, status, transient=False, init=False, **extra):
        assert self.build_doc
        src_build = get_src_build()
        job_info = {
            'status': status,
            'step_started_at': datetime.now(),
            'logfile': self.logfile,
        }
        index_info = {
            "index": {
                self.index_name: {
                    'host': self.host,
                    'environment': self.env,
                    'conf_name': self.conf_name,
                    'target_name': self.target_name,
                    'index_name': self.index_name,
                    'doc_type': self.doc_type,
                    'num_shards': self.num_shards,
                    'num_replicas': self.num_replicas
                }
            }
        }
        if transient:
            # record some "in-progress" information
            job_info['pid'] = os.getpid()
        else:
            # only register time when it's a final state
            job_info["time"] = timesofar(self.ti)
            t1 = round(time.time() - self.ti, 0)
            job_info["time_in_s"] = t1
            index_info["index"][self.index_name]["created_at"] = datetime.now()
        if "index" in extra:
            index_info["index"][self.index_name].update(extra["index"])
        if "job" in extra:
            job_info.update(extra["job"])
        # since the base is the merged collection, we register info there
        build = src_build.find_one({'_id': self.target_name})
        assert build, "Can't find build document '%s'" % self.target_name
        if init:
            # init timer for this step
            self.ti = time.time()
            src_build.update({'_id': self.target_name},
                             {"$push": {
                                 'jobs': job_info
                             }})
            # now refresh/sync
            build = src_build.find_one({'_id': self.target_name})
        else:
            # merge extra at root level
            # (to keep building data...) and update the last one
            # (it's been properly created before when init=True)
            build["jobs"] and build["jobs"][-1].update(job_info)

            def merge_index_info(target, d):
                if "__REPLACE__" in d.keys():
                    d.pop("__REPLACE__")
                    target = d
                else:
                    for k, v in d.items():
                        if type(v) == dict:
                            if k in target:
                                target[k] = merge_index_info(target[k], v)
                            else:
                                v.pop("__REPLACE__", None)
                                # merge v with "nothing" just to make sure to remove any "__REPLACE__"
                                v = merge_index_info({}, v)
                                target[k] = v
                        else:
                            target[k] = v
                return target

            build = merge_index_info(build, index_info)
            src_build.replace_one({"_id": build["_id"]}, build)

Exemple #27

0

Afficher le fichier

 def _doc(self, index):
     doc = get_src_build().find_one(
         {f"index.{index}.environment": self.idxenv})
     if not doc:  # not asso. with a build
         raise ValueError("Not a hub-managed index.")
     return doc  # TODO UNIQUENESS

Exemple #28

0

Afficher le fichier

def set_pending_to_release_note(col_name):
    src_build = get_src_build()
    src_build.update({"_id": col_name}, {"$addToSet": {"pending": "release_note"}})