def _execute_custom(node: Node, output: Repository) -> ProvenanceLine: assert output.head is not None command, args = parse_custom_command(node) # Locate the command in the config file and instantiate it. cmd_fq_class: str = cast( str, get_all_in_section(CONFIG, "commands").get(command)) if not cmd_fq_class: raise SplitfileError( "Custom command {0} not found in the config! Make sure you add an entry to your" " config like so:\n [commands] \n{0}=path.to.command.Class". format(command)) assert isinstance(cmd_fq_class, str) index = cmd_fq_class.rindex(".") try: cmd_class = getattr(import_module(cmd_fq_class[:index]), cmd_fq_class[index + 1:]) except AttributeError as e: raise SplitfileError( "Error loading custom command {0}".format(command)) from e except ImportError as e: raise SplitfileError( "Error loading custom command {0}".format(command)) from e get_engine().run_sql("SET search_path TO %s", (output.to_schema(), )) command = cmd_class() # Pre-flight check: get the new command hash and see if we can short-circuit and just check the image out. command_hash = command.calc_hash(repository=output, args=args) output_head = output.head.image_hash if command_hash is not None: image_hash = _combine_hashes([output_head, command_hash]) try: output.images.by_hash(image_hash).checkout() logging.info(" ---> Using cache") return {"type": "CUSTOM"} except ImageNotFoundError: pass logging.info(" Executing custom command...") exec_hash = command.execute(repository=output, args=args) command_hash = command_hash or exec_hash or "{:064x}".format( getrandbits(256)) image_hash = _combine_hashes([output_head, command_hash]) logging.info(" ---> %s" % image_hash[:12]) # Check just in case if the new hash produced by the command already exists. try: output.images.by_hash(image_hash).checkout() except ImageNotFoundError: # Full command as a commit comment output.commit(image_hash, comment=node.text) return {"type": "CUSTOM"}
def build_repo(): repo = Repository(namespace="abc", repository="1234") repo.delete() repo.init() df_to_table(fake_data(8), repository=repo, table="unit_test", if_exists='replace') new_img = repo.commit() new_img.checkout() return repo
def write(self, value_: Any, **kwargs: Any) -> Result: """ Writes the result to a repository on Splitgraph Args: - value_ (Any): the value to write; will then be stored as the `value` attribute of the returned `Result` instance - **kwargs (optional): if provided, will be used to format the `table`, `comment`, and `tag` Returns: - Result: returns a new `Result` with both `value`, `comment`, `table`, and `tag` attributes """ if self.schema is not None: errors = self.schema.validate(value_) if errors: raise SchemaValidationError(errors) new = self.format(**kwargs) new.value = value_ repo_info = parse_repo(new.location) repo = Repository(namespace=repo_info.namespace, repository=repo_info.repository) remote = Repository.from_template(repo, engine=get_engine( repo_info.remote_name, autocommit=True)) assert isinstance(value_, pd.DataFrame) if not repository_exists(repo) and self.auto_init_repo: self.logger.info("Creating repo {}/{}...".format( repo.namespace, repo.repository)) repo.init() # TODO: Retrieve the repo from bedrock first self.logger.info("Starting to upload result to {}...".format( new.location)) with self.atomic(repo.engine): self.logger.info("checkout") img = repo.head img.checkout(force=True) self.logger.info("df to table") df_to_table(new.value, repository=repo, table=repo_info.table, if_exists='replace') self.logger.info("commit") new_img = repo.commit(comment=new.comment, chunk_size=10000) new_img.tag(repo_info.tag) # if (repo.diff(new.table, img, new_img)): if self.auto_push: self.logger.info("push") repo.push( remote, handler="S3", overwrite_objects=True, overwrite_tags=True, reupload_objects=True, ) self.logger.info("Finished uploading result to {}...".format( new.location)) return new
def write(self, value_: Any, **kwargs: Any) -> Result: """ Writes the result to a repository on Splitgraph Args: - value_ (Any): the value to write; will then be stored as the `value` attribute of the returned `Result` instance - **kwargs (optional): if provided, will be used to format the `table`, `comment`, and `tag` Returns: - Result: returns a new `Result` with both `value`, `comment`, `table`, and `tag` attributes """ cfg = patch_config(create_config_dict(), self.env or dict()) engine = PostgresEngine(name='SplitgraphResult', conn_params=cfg) engine.initialize() repo = Repository(namespace=self.namespace, repository=self.repo_name, engine=engine) assert isinstance(value_, pd.DataFrame) assert engine.connected if not repository_exists(repo) and self.auto_init_repo: self.logger.info("Creating repo {}/{}...".format(repo.namespace, repo.repository)) repo.init() # TODO: Retrieve the repo from bedrock first new = self.format(**kwargs) new.value = value_ self.logger.info("Starting to upload result to {}...".format(new.table)) with self.atomic(engine): self.logger.info("checkout") img = repo.head img.checkout(force=True) self.logger.info("df to table") df_to_table(new.value, repository=repo, table=new.table, if_exists='replace') self.logger.info("commit") new_img = repo.commit(comment=new.comment, chunk_size=10000) new_img.tag(new.tag) # if (repo.diff(new.table, img, new_img)): if self.auto_push: self.logger.info("push") repo.push( self.get_upstream(repo), handler="S3", overwrite_objects=True, overwrite_tags=True, reupload_objects=True, ) engine.close() self.logger.info("Finished uploading result to {}...".format(new.table)) return new