def check_hash(self, raw, phash: bool, bias=0.0, threshold=13, bounces=None): raw_str = str(raw) minimum = 1000 if phash: # which hash? True phash, false whash redisdb = self.phashdb redisdb_clean = self.phashdb_clean # locks for redis concurrency lock = Redlock(key=f'phashdb', masters={self.redis}) lock_clean = Redlock(key=f'phashdb_clean', masters={self.redis}) # end of locks else: redisdb = self.whashdb redisdb_clean = self.whashdb_clean # locks for redis concurrency lock = Redlock(key=f'whashdb', masters={self.redis}) lock_clean = Redlock(key=f'whashdb_clean', masters={self.redis}) # end of locks if self.is_in_db(redisdb_clean, raw_str, lock=lock_clean): # if i've processed it, it's not banned return False, -2 elif self.is_in_db(redisdb, raw_str, lock=lock): # if found return True, -2 else: # checks for similar hashes result, value = self.calc_similar_hash(redisdb, raw, raw_str, bias, threshold, bounces, lock=lock) minimum = min(minimum, value) if result: return result, value # if it's in the db else: self.set_in_db(redisdb_clean, raw_str, True, lock=lock_clean) # set as safe return False, minimum # if it's nott
def setUp(self): super().setUp() self.redlock = Redlock( masters={self.redis}, key='printer', auto_release_time=100, )
def unban_image(self, path): img = Image.open(path) lock_phash = Redlock(key=f'phashdb', masters={self.redis}) lock_whash = Redlock(key=f'whashdb', masters={self.redis}) raw_phash = im.phash(img) raw_whash = im.whash(img) self.exec_similar_hash(self.phashdb, raw_phash, 0, 13, self.del_from_db, lock=lock_phash) self.exec_similar_hash(self.whashdb, raw_whash, 0, 13, self.del_from_db, lock=lock_whash)
def materialized_views_execute_sensor(context: SensorExecutionContext): """Sensor for executing materialized views based on cron expressions.""" # Setup Redis and Redlock r = Redis(constants.REDIS_HOST.value) lock = Redlock( key=constants.REDIS_KEY_MAT_VIEWS_MATERIALIZE_SENSOR_LOCK.value, auto_release_time=constants.REDIS_LOCK_AUTO_RELEASE_TIME.value, masters=[r]) if lock.acquire(timeout=2): lock.release() else: yield SkipReason("Another run is already in progress!") return rp = RedisPal(constants.REDIS_HOST.value) # Get managed materialized views managed_materialized_views: dict = rp.get("managed_materialized_views") if managed_materialized_views is None: managed_materialized_views = {} managed_materialized_views["views"] = {} # Get current timestamp now = datetime.datetime.now(pytz.timezone("America/Sao_Paulo")) # Iterate over all managed materialized views, storing a list # of all queries to be executed queries_to_execute: list = [] for blob_name, view_config in managed_materialized_views["views"].items(): if (view_config["last_run"] is None or determine_whether_to_execute_or_not( view_config["cron_expression"], now, view_config["last_run"])) and ( view_config["materialized"]): # Add to list of queries to execute queries_to_execute.append(blob_name) # Launch run if we have any queries to execute if queries_to_execute: # Get run configuration config: dict = read_config( Path(__file__).parent / "materialized_views_execute.yaml") # Get run key run_key = build_run_key("materialized_views_execute", now) # Set inputs config["solids"]["resolve_dependencies_and_execute"]["inputs"][ "queries_names"]["value"] = queries_to_execute yield RunRequest(run_key=run_key, run_config=config) # Tell Dagit a reason we skipped it else: yield SkipReason("No materialization requested for now")
def debug(self, path): img = Image.open(path) lock_phash = Redlock(key=f'phashdb', masters={self.redis}) lock_whash = Redlock(key=f'whashdb', masters={self.redis}) raw_phash = im.phash(img) raw_whash = im.whash(img) toreturn = [self.calculate_hashes(path)] toreturn.append("phashdb") print("phashdb") toreturn.append(self.debug_similar_hash(self.phashdb, raw_phash, 13, lock=lock_phash)) print("whashdb") toreturn.append("whashdb") toreturn.append(self.debug_similar_hash(self.whashdb, raw_whash, 13, lock=lock_whash)) return "\n".join(toreturn)
def ban_image(self, path): img = Image.open(path) lock_phash_clean = Redlock(key=f'phashdb_clean', masters={self.redis}) lock_whash_clean = Redlock(key=f'whashdb_clean', masters={self.redis}) lock_phash = Redlock(key=f'phashdb', masters={self.redis}) lock_whash = Redlock(key=f'whashdb', masters={self.redis}) raw_phash = im.phash(img) raw_whash = im.whash(img) phash = str(raw_phash) whash = str(raw_whash) self.set_in_db(self.phashdb, phash, 0, lock=lock_phash) self.set_in_db(self.whashdb, whash, 0, lock=lock_whash) self.exec_similar_hash(self.phashdb_clean, raw_phash, 0, 13, self.del_from_db, lock=lock_phash_clean) # removing duplicates from the clean phash, if any self.exec_similar_hash(self.whashdb_clean, raw_whash, 0, 13, self.del_from_db, lock=lock_whash_clean) # removing duplicates from the clean whash, if any
def jtag_x3xx(jtag_args, redis_server): remote_working_dir = "pipeline_fpga" vivado_program_jtag = "/opt/Xilinx/Vivado_Lab/2020.1/bin/vivado_lab -mode batch -source {}/viv_hardware_utils.tcl -nolog -nojournal -tclargs program".format( remote_working_dir) jtag_server, jtag_serial, fpga_path = jtag_args.split(",") print("Waiting on jtag mutex for {}".format(jtag_server), flush=True) with Redlock(key="hw_jtag_{}".format(jtag_server), masters=redis_server, auto_release_time=1000 * 60 * 5): print("Got jtag mutex for {}".format(jtag_server), flush=True) with Connection(host=jtag_server) as jtag_host: jtag_host.run("mkdir -p " + remote_working_dir) jtag_host.run("rm -rf {}/*".format(remote_working_dir)) jtag_host.put(os.path.join( pathlib.Path(__file__).parent.absolute(), "jtag/viv_hardware_utils.tcl"), remote=remote_working_dir) jtag_host.put(fpga_path, remote=remote_working_dir) jtag_host.run( vivado_program_jtag + " " + os.path.join(remote_working_dir, os.path.basename(fpga_path)) + " " + jtag_serial) print( "Waiting 15 seconds for device to come back up and for Vivado to close", flush=True) time.sleep(15)
def setUp(self): super().setUp() self.redis = _default_redis self.redlock = Redlock( masters={self.redis}, key='printer', auto_release_time=100, )
def get_materialize_sensor_lock(context): """ Get a lock for the materialization sensor. """ r = Redis(constants.REDIS_HOST.value) lock = Redlock( key=constants.REDIS_KEY_MAT_VIEWS_MATERIALIZE_SENSOR_LOCK.value, masters=[r], auto_release_time=constants.REDIS_LOCK_AUTO_RELEASE_TIME.value, ) return lock
def delete_managed_views( context, blob_names, materialization_locked: bool, materialization_lock: Redlock, ): try: r = Redis(constants.REDIS_HOST.value) rp = RedisPal(constants.REDIS_HOST.value) lock = Redlock( key=constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS_LOCK.value, masters=[r], auto_release_time=constants.REDIS_LOCK_AUTO_RELEASE_TIME.value, ) with lock: materialized_views: dict = rp.get( constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value) if materialized_views is None: materialized_views = {} materialized_views["views"] = {} for blob_name in blob_names: context.log.info(f"Deleting managed view {blob_name}") if blob_name in materialized_views["views"]: del materialized_views["views"][blob_name] prefix: str = os.getenv("BQ_PROJECT_NAME", "rj-smtr-dev") table_name: str = f"{prefix}.{blob_name}" update_view(table_name, {}, "", "", "", delete=True) context.log.info("Success!") else: context.log.info("View not found, skipping...") rp.set(constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value, materialized_views) except Exception as e: try: materialization_lock.release() except: pass raise e
def main(args): redis_server = { Redis.from_url("redis://{}:6379/0".format(args.redis_server)) } print("Waiting to acquire mutex for {}".format(args.dut_name), flush=True) with Redlock(key=args.dut_name, masters=redis_server, auto_release_time=1000 * 60 * args.dut_timeout): print("Got mutex for {}".format(args.dut_name), flush=True) if (args.jtag_x3xx != None): jtag_x3xx(args.jtag_x3xx, redis_server) for command in args.test_commands: result = subprocess.run(shlex.split(command)) if (result.returncode != 0): sys.exit(result.returncode) sys.exit(0)
def home_view(request): template = 'polls/home.html' if request.user.is_authenticated : polluser = PollUser.objects.get(user=request.user) if(polluser.national_id == "0"):#when user got no national_id return HttpResponseRedirect(reverse('polls:get_national_id')) if(not polluser.payment_id_valid()): return HttpResponseRedirect(reverse('polls:get_payment_id')) error_message = "" if request.method == 'POST': section = Section.objects.get(pk=request.POST['section_pk']) reserve_lock = Redlock(key=f"res_{section.pk}", masters={redis}, auto_release_time=60*1000) reserve_lock.acquire() usrs = section.usr_set.all() if len(usrs) >= 3: error_message = "این شیفت در این تاریخ و ایستگاه پر شده است" elif len(usrs)==2 and usrs[0].polluser.sex==usrs[1].polluser.sex==polluser.sex==0: error_message = "ا توجه به طرح تطبیق امکان حضور سه پسر در یک ایستگاه وجود ندارد!!!" else: # not supported for current jdatetime #if USR.objects.filter(section__end__mt=section.start, section__start__lt=section.end, polluser=polluser).exists(): if [usr for usr in USR.objects.filter(polluser=polluser) if (usr.section.start<section.end and usr.section.end>section.start)]: error_message = "شما قبلا در این روز و تاریخ شیفتی رزرو کرده اید" else: #todo if section.available_from <= jdatetime.datetime.now(): USR.objects.create(polluser=polluser, section=section) error_message = "رزرو شد" else: error_message = "هم اکنون امکان رزرو شیفت وجود ندارد" reserve_lock.release() # day.section_set.order_by('index', 'station') sections = [section for section in Section.objects.filter(available_from__lt=jdatetime.datetime.now()) if section.usr_set.count()<3] user_usr = [usr for usr in USR.objects.filter(polluser__user=request.user).order_by('section__start')] return render(request, template, {'sections': sections, 'error_message': error_message, 'user_usr': user_usr, 'polluser':polluser}) else: return HttpResponseRedirect(reverse('polls:register'))
class RedlockTests(TestCase): 'Distributed Redis-powered lock tests.' def setUp(self): super().setUp() self.redlock = Redlock( masters={self.redis}, key='printer', auto_release_time=100, ) def test_acquire_and_time_out(self): assert not self.redis.exists(self.redlock.key) assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) time.sleep(self.redlock.auto_release_time / 1000 + 1) assert not self.redis.exists(self.redlock.key) def test_acquire_same_lock_twice_blocking_without_timeout(self): assert not self.redis.exists(self.redlock.key) with ContextTimer() as timer: assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) assert timer.elapsed() >= self.redlock.auto_release_time def test_acquire_same_lock_twice_blocking_with_timeout(self): assert not self.redis.exists(self.redlock.key) assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) assert not self.redlock.acquire(timeout=0) assert self.redis.exists(self.redlock.key) def test_acquire_same_lock_twice_non_blocking_without_timeout(self): assert not self.redis.exists(self.redlock.key) assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) assert not self.redlock.acquire(blocking=False) assert self.redis.exists(self.redlock.key) time.sleep(self.redlock.auto_release_time / 1000 + 1) assert not self.redis.exists(self.redlock.key) def test_acquire_same_lock_twice_non_blocking_with_timeout(self): assert not self.redis.exists(self.redlock.key) assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) with self.assertRaises(ValueError): self.redlock.acquire(blocking=False, timeout=0) assert self.redis.exists(self.redlock.key) def test_acquired(self): assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) assert self.redlock.locked() time.sleep(self.redlock.auto_release_time / 1000 + 1) assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() def test_extend(self): assert not self.redis.exists(self.redlock.key) with self.assertRaises(ExtendUnlockedLock): self.redlock.extend() assert self.redlock.acquire() for extension_num in range(3): with self.subTest(extension_num=extension_num): self.redlock.extend() with self.assertRaises(TooManyExtensions): self.redlock.extend() def test_acquire_then_release(self): assert not self.redis.exists(self.redlock.key) assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) self.redlock.release() assert not self.redis.exists(self.redlock.key) def test_release_unlocked_lock(self): with self.assertRaises(ReleaseUnlockedLock): self.redlock.release() def test_releaseunlockedlock_repr(self): try: self.redlock.release() except ReleaseUnlockedLock as wtf: assert repr(wtf) == ( f"ReleaseUnlockedLock(masters=[Redis<ConnectionPool<Connection<host=localhost,port=6379,db={self.redis_db}>>>], " "key='redlock:printer')") def test_releaseunlockedlock_str(self): try: self.redlock.release() except ReleaseUnlockedLock as wtf: assert str(wtf) == ( f"masters=[Redis<ConnectionPool<Connection<host=localhost,port=6379,db={self.redis_db}>>>], " "key='redlock:printer'") def test_release_same_lock_twice(self): assert not self.redis.exists(self.redlock.key) assert self.redlock.acquire() self.redlock.release() with self.assertRaises(ReleaseUnlockedLock): self.redlock.release() def test_context_manager(self): assert not self.redis.exists(self.redlock.key) with self.redlock: assert self.redis.exists(self.redlock.key) assert not self.redis.exists(self.redlock.key) def test_context_manager_time_out_before_exit(self): assert not self.redis.exists(self.redlock.key) with self.assertRaises(ReleaseUnlockedLock), self.redlock: assert self.redis.exists(self.redlock.key) time.sleep(self.redlock.auto_release_time / 1000 + 1) assert not self.redis.exists(self.redlock.key) assert not self.redis.exists(self.redlock.key) def test_context_manager_acquired(self): assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() with self.redlock: assert self.redis.exists(self.redlock.key) assert self.redlock.locked() assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() def test_context_manager_acquired_time_out_before_exit(self): assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() with self.assertRaises(ReleaseUnlockedLock), self.redlock: assert self.redis.exists(self.redlock.key) assert self.redlock.locked() time.sleep(self.redlock.auto_release_time / 1000 + 1) assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() def test_context_manager_release_before_exit(self): assert not self.redis.exists(self.redlock.key) with self.assertRaises(ReleaseUnlockedLock), self.redlock: assert self.redis.exists(self.redlock.key) self.redlock.release() assert not self.redis.exists(self.redlock.key) def test_repr(self): assert repr(self.redlock) == \ "<Redlock key=redlock:printer value=b'' timeout=0>"
def get_configs_for_materialized_view(context, query_names: list, materialization_locked: bool, materialization_lock) -> dict: """Retrieves configs for materialized views""" try: for query_name in query_names: # Split query name into dataset_name and view_name dataset_name, view_name = query_name.split(".") # Load configs from GCS view_yaml = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, dataset_name, view_name)}.yaml' defaults_yaml = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, dataset_name)}/defaults.yaml' context.log.info(f"Defaults blob: {defaults_yaml}") context.log.info(f"View blob: {view_yaml}") defaults_blob = get_blob(defaults_yaml, SENSOR_BUCKET, mode="staging") view_blob = get_blob(view_yaml, SENSOR_BUCKET, mode="staging") if defaults_blob is None: raise Exception(f"Blob {defaults_yaml} not found!") defaults_dict = yaml.safe_load(defaults_blob.download_as_string()) if view_blob: view_dict = yaml.safe_load(view_blob.download_as_string()) else: context.log.warning( f"Blob {view_yaml} not found. This is not an error.") view_dict = {} # Merge configs query_params = {**defaults_dict, **view_dict} # Build base configs now = datetime.datetime.now(pytz.timezone("America/Sao_Paulo")) run_key = build_run_key(query_name, now) with open( str( Path(__file__).parent / "materialized_views_base_config.yaml"), "r") as f: base_params: dict = yaml.safe_load(f) base_params["run_timestamp"] = "'{}'".format( convert_datetime_to_datetime_string(now)) base_params["maestro_sha"] = "'{}'".format( fetch_branch_sha(constants.MAESTRO_REPOSITORY.value, constants.MAESTRO_DEFAULT_BRANCH.value)) base_params["maestro_bq_sha"] = "'{}'".format( fetch_branch_sha(constants.MAESTRO_BQ_REPOSITORY.value, constants.MAESTRO_BQ_DEFAULT_BRANCH.value)) base_params["run_key"] = "'{}'".format(run_key) # Few more params r = Redis(constants.REDIS_HOST.value) rp = RedisPal(constants.REDIS_HOST.value) lock = Redlock( key=constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS_LOCK.value, masters=[r], auto_release_time=constants.REDIS_LOCK_AUTO_RELEASE_TIME.value, ) table_name = parse_filepath_to_tablename(view_yaml) with lock: managed = rp.get( constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value) if managed is None: managed = {} managed["views"] = {} if query_name not in managed["views"]: raise Exception( f"Query {query_name} not found in managed views: {managed}" ) d = managed["views"][query_name] changed = d["query_modified"] context.log.info(f"{query_name} changed: {changed}") d["query_modified"] = False last_run = d["last_run"] d["last_run"] = now rp.set(constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value, managed) # Get query on GCS query_file = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, dataset_name, view_name)}.sql' query_blob = get_blob(query_file, SENSOR_BUCKET, mode="staging") if query_blob is None: raise Exception(f"Blob {query_file} not found!") base_query = query_blob.download_as_string().decode("utf-8") # Get parent queries on GCS parent_queries = {} for query_name in d["depends_on"]: if query_name in managed["views"] and managed["views"][ query_name]["materialized"]: continue query_file = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, "/".join(query_name.split(".")[:2]))}.sql' query_blob = get_blob(query_file, SENSOR_BUCKET, mode="staging") if query_blob is None: context.log.warning( f"Blob for parent query \"{query_file}\" not found, skipping..." ) continue parent_view_yaml = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, "/".join(query_name.split(".")[:2]))}.yaml' parent_view_blob = get_blob(parent_view_yaml, SENSOR_BUCKET, mode="staging") if parent_view_blob is not None: parent_view_dict = yaml.safe_load( parent_view_blob.download_as_string()) else: parent_view_dict = {} parent_defaults_yaml = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, "/".join(query_name.split(".")[:1]))}/defaults.yaml' parent_defaults_blob = get_blob(parent_defaults_yaml, SENSOR_BUCKET, mode="staging") if parent_defaults_blob is not None: parent_defaults_dict = yaml.safe_load( parent_defaults_blob.download_as_string()) else: context.log.warning( f"Blob for parent query \"{parent_defaults_yaml}\" not found, skipping..." ) continue parent_queries[query_name] = {} parent_queries[query_name][ "base_query"] = query_blob.download_as_string().decode( "utf-8") parent_queries[query_name]["query_params"] = { **parent_defaults_dict, **parent_view_dict } context.log.info(f"Parent queries: {parent_queries}") # Build configs # - table_name: str # - changed: bool # - base_query: str # - base_params: dict # - query_params: dict # - now: str # - last_run: str date_ranges = get_date_ranges( last_run if last_run else query_params["backfill"]["start_timestamp"], query_params["backfill"]["interval"], now) context.log.info(f"{date_ranges}") for i, _ in enumerate(date_ranges[:-1]): configs = { "table_name": table_name, "changed": changed if i == 0 else False, "base_query": base_query, "base_params": base_params, "query_params": query_params, "now": date_ranges[i + 1], "last_run": date_ranges[i], "parent_queries": parent_queries, } yield DynamicOutput( { "config_dict": configs, "materialization_lock": materialization_lock }, mapping_key= f'{configs["table_name"]}_{configs["last_run"]}_{configs["now"]}' .replace(".", "_").replace("-", "_").replace(" ", "_").replace(":", "_")) except Exception as e: try: locks.release(materialization_lock) except: pass raise e
def manage_view(context, input_dict): view_name = input_dict["view_name"] materialization_lock = input_dict["materialization_lock"] try: # Setup Redis and Redlock r = Redis(constants.REDIS_HOST.value) rp = RedisPal(constants.REDIS_HOST.value) lock = Redlock( key=constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS_LOCK.value, masters=[r], auto_release_time=constants.REDIS_LOCK_AUTO_RELEASE_TIME.value, ) # Get materialization information from Redis materialized_views: dict = rp.get( constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value) if materialized_views is None: materialized_views = {} materialized_views["views"] = {} materialized = materialized_views["views"][view_name]["materialized"] # If this is materialized, generate temp view if materialized: with lock: materialized_views["views"][view_name]["query_modified"] = True materialized_views["views"][view_name]["last_run"] = None rp.set(constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value, materialized_views) context.log.info( f"Generate {view_name} as a view for now, materialization comes later" ) # We need to build the query using # latest parameters and build a view with it. # Get defaults for view_name blob_path = os.path.join(*([MATERIALIZED_VIEWS_PREFIX] + [n for n in view_name.split(".")][:-1])) defaults_path = blob_path + "/defaults.yaml" context.log.info(f"Defaults path -> {defaults_path}") defaults_blob = get_blob(defaults_path, SENSOR_BUCKET, mode="staging") if defaults_blob is None: raise Exception(f"Blob {defaults_path} not found") defaults_dict: dict = yaml.safe_load( defaults_blob.download_as_string()) # Parse dataset_name dataset_name = view_name.split(".")[0] # Parse view yaml path view_yaml = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, view_name)}.yaml' # Parse table_name prefix: str = os.getenv("BQ_PROJECT_NAME", "rj-smtr-dev") table_name: str = f"{prefix}.{view_name}" context.log.info(f"Table name is {table_name}") # Update view update_view(table_name, defaults_dict, dataset_name, view_name.split(".")[-1], view_yaml, delete=False, context=context) except Exception as e: try: materialization_lock.release() except: pass raise e
def update_managed_views( context, blob_names, materialization_locked: bool, materialization_lock: Redlock, ): try: # Setup Redis and Redlock r = Redis(constants.REDIS_HOST.value) rp = RedisPal(constants.REDIS_HOST.value) views_lock = Redlock( key=constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS_LOCK.value, masters=[r], auto_release_time=constants.REDIS_LOCK_AUTO_RELEASE_TIME.value, ) # Initialize graph graph = nx.DiGraph() # If blob_name ends with "defaults.yaml", we need to # either add it to Redis or update its values and add # runs for every child it has and its dependencies. for blob_name in [ b for b in blob_names if b.endswith("defaults.yaml") ]: # Get dataset name blob_path = "/".join([n for n in blob_name.split("/") if n != ""][:-1]) dataset_name: str = blob_path.split("/")[-1] context.log.info("#" * 80) context.log.info(f"Updating {dataset_name} defaults") # Read the blob blob = get_blob(blob_name, SENSOR_BUCKET, mode="staging") if blob is None: raise Exception(f"Blob {blob_name} not found") blob_dict: dict = yaml.safe_load(blob.download_as_string()) # Add it to Redis with views_lock: materialized_views: dict = rp.get( constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value) if materialized_views is None: materialized_views = {} materialized_views["views"] = {} # Add every child to Redis if "views" not in blob_dict: raise Exception( f"Malformed blob (missing views key): {blob_name}") for key in blob_dict["views"].keys(): # Build key with dataset_name m_key = f"{dataset_name}.{key}" # This child also needs a run context.log.info(f"Adding {m_key} to runs") if m_key not in graph.nodes: graph.add_node(m_key) # Avoid KeyError if "views" not in materialized_views: materialized_views["views"] = {} # Add to Redis if m_key not in materialized_views["views"]: materialized_views["views"][m_key] = {} update_dict_with_dict( materialized_views["views"][m_key], { "cron_expression": blob_dict["scheduling"]["cron"], "last_run": None, "materialized": blob_dict["views"][key]["materialized"], "query_modified": True, "depends_on": blob_dict["views"][key]["depends_on"], }) # Adds dependencies to runs for dep in blob_dict["views"][key]["depends_on"]: context.log.info( f"Adding {dep} to runs as dependency of {m_key}") if dep not in graph.nodes: graph.add_node(dep) graph.add_edge(dep, m_key) # Try to find specific values for this view blob = get_blob(blob_path + key + ".yaml", SENSOR_BUCKET, mode="staging") if blob: # Replace values in Redis specific = yaml.safe_load( blob.download_as_string().decode("utf-8")) materialized_views["views"][m_key][ "cron_expression"] = specific["scheduling"]["cron"] else: context.log.warning( f"No specific values for {m_key} found. This is not an error." ) # Update Redis effectively rp.set(constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value, materialized_views) # Otherwise, we need to add the blob_name and its # dependencies to the graph. for blob_name in [ b for b in blob_names if not b.endswith("defaults.yaml") ]: # Get table name file_name = ".".join(blob_name.split("/")[-2:]) table_name = ".".join(file_name.split(".")[:-1]) context.log.info("#" * 80) context.log.info(f"Updating {table_name} specific values...") # If it's YAML file, update values on Redis if blob_name.endswith(".yaml"): # Read the blob blob = get_blob(blob_name, SENSOR_BUCKET, mode="staging") if blob is None: raise Exception(f"Blob {blob_name} not found") blob_dict: dict = yaml.safe_load(blob.download_as_string()) # Update Redis with views_lock: materialized_views: dict = rp.get( constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value) if materialized_views is None: materialized_views = {} materialized_views["views"] = {} if table_name not in materialized_views["views"]: materialized_views["views"][table_name] = {} update_dict_with_dict( materialized_views["views"][table_name], { "cron_expression": blob_dict["scheduling"]["cron"], "last_run": None, "query_modified": True, }) rp.set(constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value, materialized_views) # Add table_name and its dependencies to runs context.log.info(f"Adding {table_name} to runs") if table_name not in graph.nodes: graph.add_node(table_name) materialized_views: dict = rp.get( constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value) if materialized_views is None: materialized_views = {} materialized_views["views"] = {} if table_name in materialized_views["views"]: for dep in materialized_views["views"][table_name][ "depends_on"]: context.log.info( f"Adding {dep} to runs as dependency of {table_name}") if dep not in graph.nodes: graph.add_node(dep) graph.add_edge(dep, table_name) context.log.info(f"Graph edges: {graph.edges()}") # Get topological order order = list(nx.topological_sort(graph)) # Filter out views that are not on materialized_views["views"] order = [o for o in order if o in materialized_views["views"]] # Log topological order context.log.info(f"Order: {order}") # Execute queries in topological order for q in order: yield DynamicOutput( { "view_name": q, "materialization_lock": materialization_lock }, mapping_key=q.replace(".", "_")) except Exception as e: try: materialization_lock.release() except: pass raise e
class RedlockTests(TestCase): 'Distributed Redis-powered lock tests.' def setUp(self): super().setUp() self.redis = _default_redis self.redlock = Redlock( masters={self.redis}, key='printer', auto_release_time=100, ) def tearDown(self): with contextlib.suppress(ReleaseUnlockedLock): self.redlock.release() super().tearDown() def test_acquire_and_time_out(self): assert not self.redis.exists(self.redlock.key) assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) time.sleep(self.redlock.auto_release_time / 1000 + 1) assert not self.redis.exists(self.redlock.key) def test_acquire_same_lock_twice_blocking_without_timeout(self): assert not self.redis.exists(self.redlock.key) with ContextTimer() as timer: assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) assert timer.elapsed() >= self.redlock.auto_release_time def test_acquire_same_lock_twice_blocking_with_timeout(self): assert not self.redis.exists(self.redlock.key) assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) assert not self.redlock.acquire(timeout=0) assert self.redis.exists(self.redlock.key) def test_acquire_same_lock_twice_non_blocking_without_timeout(self): assert not self.redis.exists(self.redlock.key) assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) assert not self.redlock.acquire(blocking=False) assert self.redis.exists(self.redlock.key) time.sleep(self.redlock.auto_release_time / 1000 + 1) assert not self.redis.exists(self.redlock.key) def test_acquire_same_lock_twice_non_blocking_with_timeout(self): assert not self.redis.exists(self.redlock.key) assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) with self.assertRaises(ValueError): self.redlock.acquire(blocking=False, timeout=0) assert self.redis.exists(self.redlock.key) def test_acquired(self): assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) assert self.redlock.locked() time.sleep(self.redlock.auto_release_time / 1000 + 1) assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() def test_extend(self): assert not self.redis.exists(self.redlock.key) assert not self.redlock.extend() assert self.redlock.acquire() for extension_num in range(3): with self.subTest(extension_num=extension_num): assert self.redlock.extend() with self.assertRaises(TooManyExtensions): self.redlock.extend() def test_acquire_then_release(self): assert not self.redis.exists(self.redlock.key) assert self.redlock.acquire() assert self.redis.exists(self.redlock.key) self.redlock.release() assert not self.redis.exists(self.redlock.key) def test_release_unlocked_lock(self): with self.assertRaises(ReleaseUnlockedLock): self.redlock.release() def test_releaseunlockedlock_repr(self): try: self.redlock.release() except ReleaseUnlockedLock as wtf: assert repr(wtf) == ( "ReleaseUnlockedLock(masters=[Redis<ConnectionPool<Connection<host=localhost,port=6379,db=0>>>], " "key='redlock:printer')" ) def test_releaseunlockedlock_str(self): try: self.redlock.release() except ReleaseUnlockedLock as wtf: assert str(wtf) == ( "masters=[Redis<ConnectionPool<Connection<host=localhost,port=6379,db=0>>>], " "key='redlock:printer'" ) def test_release_same_lock_twice(self): assert not self.redis.exists(self.redlock.key) assert self.redlock.acquire() self.redlock.release() with self.assertRaises(ReleaseUnlockedLock): self.redlock.release() def test_context_manager(self): assert not self.redis.exists(self.redlock.key) with self.redlock: assert self.redis.exists(self.redlock.key) assert not self.redis.exists(self.redlock.key) def test_context_manager_time_out_before_exit(self): assert not self.redis.exists(self.redlock.key) with self.assertRaises(ReleaseUnlockedLock), self.redlock: assert self.redis.exists(self.redlock.key) time.sleep(self.redlock.auto_release_time / 1000 + 1) assert not self.redis.exists(self.redlock.key) assert not self.redis.exists(self.redlock.key) def test_context_manager_acquired(self): assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() with self.redlock: assert self.redis.exists(self.redlock.key) assert self.redlock.locked() assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() def test_context_manager_acquired_time_out_before_exit(self): assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() with self.assertRaises(ReleaseUnlockedLock), self.redlock: assert self.redis.exists(self.redlock.key) assert self.redlock.locked() time.sleep(self.redlock.auto_release_time / 1000 + 1) assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() assert not self.redis.exists(self.redlock.key) assert not self.redlock.locked() def test_context_manager_release_before_exit(self): assert not self.redis.exists(self.redlock.key) with self.assertRaises(ReleaseUnlockedLock), self.redlock: assert self.redis.exists(self.redlock.key) self.redlock.release() assert not self.redis.exists(self.redlock.key) def test_repr(self): assert repr(self.redlock) == \ "<Redlock key=redlock:printer value=b'' timeout=0>"
def whitelist_image(self, path): img = Image.open(path) lock_phash_clean = Redlock(key=f'phashdb_clean', masters={self.redis}) lock_whash_clean = Redlock(key=f'whashdb_clean', masters={self.redis}) self.set_in_db(self.phashdb_clean, str(im.phash(img)), True, lock=lock_phash_clean) self.set_in_db(self.whashdb_clean, str(im.whash(img)), True, lock=lock_whash_clean)