def serialize_value(value: Any): if type(value) is not Edge: return BaseXCom.serialize_value(value) location, funcname, src, params, result = value.values() funcname = f'{funcname}{hashlib.md5(src.encode("ascii")).hexdigest()}' path = os.path.join(location, funcname) if not os.path.exists(path): os.mkdir(path) path = os.path.join( path, hashlib.md5(str(params).encode("ascii")).hexdigest()) if not os.path.exists(path): os.mkdir(path) params_path = os.path.join(path, 'params.txt') values_path = os.path.join(path, 'values.pkl') manifest_path = os.path.join(path, 'manifest.txt') with open(params_path, 'w') as f: f.write(str(params)) pickle.dump(result, open(values_path, 'wb')) manifest = { 'params': params_path, 'values': values_path, 'manifest': manifest_path } json.dump(manifest, open(manifest_path, 'w')) return BaseXCom.serialize_value(manifest_path)
def test_xcom_init_on_load_uses_orm_deserialize_value( self, mock_orm_deserialize): # pylint: disable=unexpected-keyword-arg instance = BaseXCom( key="key", value="value", timestamp=timezone.utcnow(), execution_date=timezone.utcnow(), task_id="task_id", dag_id="dag_id", ) # pylint: enable=unexpected-keyword-arg instance.init_on_load() mock_orm_deserialize.assert_called_once_with()
def serialize_value(value: Any): client = vineyard.connect(VineyardXCom.options()['ipc_socket']) value_id = client.put(value) if VineyardXCom.options()['persist']: client.persist(value_id) logger.debug("serialize_value: %s -> %r", value, value_id) return BaseXCom.serialize_value(repr(value_id))
def set(cls, key, value, execution_date, task_id, dag_id, session=None): """ Store an XCom value. :return: None """ session.expunge_all() value = VineyardXCom.serialize_value(value) # remove any duplicate XComs query = session.query(cls).filter(cls.key == key, cls.execution_date == execution_date, cls.task_id == task_id, cls.dag_id == dag_id) targets = [] for result in query.with_entities(VineyardXCom.value): targets.append(vineyard.ObjectID(BaseXCom.deserialize_value(result))) if targets: logger.info("Drop duplicates from vineyard: %s", targets) try: client = vineyard.connect(cls.options['ipc_socket']) client.delete(targets) except Exception as e: logger.error('Failed to drop duplicates from vineyard: %s', e) # step 2: remove from the underlying xcom db query.delete() session.commit() # insert new XCom session.add(VineyardXCom(key=key, value=value, execution_date=execution_date, task_id=task_id, dag_id=dag_id)) session.commit()
def serialize_value(value: Any): hook = RedisHook(redis_conn_id=XComRedisBackend.CONN_ID) key = str(uuid4()) # We use the default serializer, which pickles or JSONs hook.get_conn().set(key, pickle.dumps(value)) # Add prefix to make it clear where the value is stored. value = XComRedisBackend.PREFIX + key return BaseXCom.serialize_value(value)
def deserialize_value(result) -> Any: result = BaseXCom.deserialize_value(result) prefix = XComRedisBackend.PREFIX if isinstance(result, str) and result.startswith(prefix): key = result.replace(prefix, "") hook = RedisHook(redis_conn_id=XComRedisBackend.CONN_ID) result = hook.get_conn().get(key) result = pickle.loads(result) return result
def serialize_value(value: Any): """ Docstring goes here """ hook = RedisHook() hook.get_conn() redis = hook.redis key = f"data_{uuid.uuid4()}" xcom = {key: json.dumps(value)} redis.mset(xcom) return BaseXCom.serialize_value(key)
def deserialize_value(result) -> Any: result = BaseXCom.deserialize_value(result) if isinstance(result, str) and result.startswith(S3XComBackend.PREFIX): hook = S3Hook() key = result.replace(S3XComBackend.PREFIX, "") filename = hook.download_file( key=key, bucket_name=S3XComBackend.BUCKET_NAME, local_path="/tmp") result = json.load(filename) return result
def deserialize_value(result) -> Any: result = BaseXCom.deserialize_value(result) if type(result) is not str: return result if not os.path.exists(result): return result manifest_path = result manifest = json.load(open(manifest_path, 'r')) values_path = manifest['values'] result = pickle.load(open(values_path, 'rb')) return result
def deserialize_value(result) -> Any: """ Docstring goes here """ result = BaseXCom.deserialize_value(result) if isinstance(result, str): hook = RedisHook() hook.get_conn() redis = hook.redis xcom = redis.mget(result) result = eval(xcom[0]) return result
def deserialize_value(result) -> Any: result = BaseXCom.deserialize_value(result) if isinstance(result, str) and result.startswith( GCSXComBackend.PREFIX): object_name = result.replace(GCSXComBackend.PREFIX, "") hook = GCSHook() with hook.provide_file(bucket_name=GCSXComBackend.BUCKET_NAME, object_name=object_name) as f: f.flush() result = pickle.load(f) return result
def serialize_value(value: Any): if not isinstance(value, (str, dict, list)): hook = S3Hook() key = "data_" + str(uuid.uuid4()) filename = f"{key}.json" with open(filename, 'w') as f: json.dump(json.loads(str(value)), f) hook.load_file(filename=filename, key=key, bucket_name=S3XComBackend.BUCKET_NAME, replace=True) value = S3XComBackend.PREFIX + key return BaseXCom.serialize_value(value)
def delete(cls, xcoms, session=None): """Delete Xcom""" if isinstance(xcoms, VineyardXCom): xcoms = [xcoms] targets = [] for xcom in xcoms: if not isinstance(xcom, VineyardXCom): raise TypeError(f'Expected XCom; received {xcom.__class__.__name__}') if xcom.value: targets.append(vineyard.ObjectID(BaseXCom.deserialize_value(xcom))) session.delete(xcom) logger.info("Drop from vineyard: %s", targets) try: client = vineyard.connect(cls.options['ipc_socket']) client.delete(targets) except Exception as e: logger.error('Failed to drop from vineyard: %s', e) session.commit()
def serialize_value(value: Any): if isinstance(value, Dict): hook = GCSHook() if value.get("final", False) and "execution_date" in value.keys(): object_name = f"model/{value['execution_date']}/data.pickle" elif "execution_date" in value.keys(): object_name = ( f"model/{value['execution_date']}/temp/data_{value['model_name']}_" f"{datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}.pickle") else: object_name = f"data/data_{uuid4()}.pickle" value["location"] = object_name with hook.provide_file_and_upload( bucket_name=GCSXComBackend.BUCKET_NAME, object_name=object_name) as f: pickle.dump(value, f) value = f"{GCSXComBackend.PREFIX}{object_name}" return BaseXCom.serialize_value(value)
def clear( cls, execution_date: pendulum.DateTime, dag_id: str, task_id: str, session: Session = None, ) -> None: query = session.query(cls).filter( cls.dag_id == dag_id, cls.task_id == task_id, cls.execution_date == execution_date, ) targets = [] for result in query.with_entities(VineyardXCom.value): targets.append(vineyard.ObjectID(BaseXCom.deserialize_value(result))) if targets: logger.info("Drop from vineyard: %s", targets) try: client = vineyard.connect(cls.options['ipc_socket']) client.delete(targets) except Exception as e: logger.error('Failed to drop from vineyard: %s', e) query.delete()
def deserialize_value(result: "VineyardXCom") -> Any: value = BaseXCom.deserialize_value(result) vineyard_value = VineyardXCom.post_resolve_value(result, value) logger.debug("deserialize_value: %s -> %s -> %s", result, value, vineyard_value) return vineyard_value