예제 #1
0
 def check_hash(self, raw, phash: bool, bias=0.0, threshold=13, bounces=None):
     raw_str = str(raw)
     minimum = 1000
     if phash:  # which hash? True phash, false whash
         redisdb = self.phashdb
         redisdb_clean = self.phashdb_clean
         # locks for redis concurrency
         lock = Redlock(key=f'phashdb', masters={self.redis})
         lock_clean = Redlock(key=f'phashdb_clean', masters={self.redis})
         # end of locks
     else:
         redisdb = self.whashdb
         redisdb_clean = self.whashdb_clean
         # locks for redis concurrency
         lock = Redlock(key=f'whashdb', masters={self.redis})
         lock_clean = Redlock(key=f'whashdb_clean', masters={self.redis})
         # end of locks
     if self.is_in_db(redisdb_clean, raw_str, lock=lock_clean):  # if i've processed it, it's not banned
         return False, -2
     elif self.is_in_db(redisdb, raw_str, lock=lock):  # if found
         return True, -2
     else:  # checks for similar hashes
         result, value = self.calc_similar_hash(redisdb, raw, raw_str, bias, threshold, bounces, lock=lock)
         minimum = min(minimum, value)
         if result:
             return result, value  # if it's in the db
         else:
             self.set_in_db(redisdb_clean, raw_str, True, lock=lock_clean)  # set as safe
             return False, minimum  # if it's nott
예제 #2
0
 def setUp(self):
     super().setUp()
     self.redlock = Redlock(
         masters={self.redis},
         key='printer',
         auto_release_time=100,
     )
예제 #3
0
 def unban_image(self, path):
     img = Image.open(path)
     lock_phash = Redlock(key=f'phashdb', masters={self.redis})
     lock_whash = Redlock(key=f'whashdb', masters={self.redis})
     raw_phash = im.phash(img)
     raw_whash = im.whash(img)
     self.exec_similar_hash(self.phashdb, raw_phash, 0, 13, self.del_from_db, lock=lock_phash)
     self.exec_similar_hash(self.whashdb, raw_whash, 0, 13, self.del_from_db, lock=lock_whash)
예제 #4
0
파일: sensors.py 프로젝트: RJ-SMTR/maestro
def materialized_views_execute_sensor(context: SensorExecutionContext):
    """Sensor for executing materialized views based on cron expressions."""
    # Setup Redis and Redlock
    r = Redis(constants.REDIS_HOST.value)
    lock = Redlock(
        key=constants.REDIS_KEY_MAT_VIEWS_MATERIALIZE_SENSOR_LOCK.value,
        auto_release_time=constants.REDIS_LOCK_AUTO_RELEASE_TIME.value,
        masters=[r])

    if lock.acquire(timeout=2):
        lock.release()
    else:
        yield SkipReason("Another run is already in progress!")
        return

    rp = RedisPal(constants.REDIS_HOST.value)

    # Get managed materialized views
    managed_materialized_views: dict = rp.get("managed_materialized_views")
    if managed_materialized_views is None:
        managed_materialized_views = {}
        managed_materialized_views["views"] = {}

    # Get current timestamp
    now = datetime.datetime.now(pytz.timezone("America/Sao_Paulo"))

    # Iterate over all managed materialized views, storing a list
    # of all queries to be executed
    queries_to_execute: list = []
    for blob_name, view_config in managed_materialized_views["views"].items():
        if (view_config["last_run"] is None
                or determine_whether_to_execute_or_not(
                    view_config["cron_expression"], now,
                    view_config["last_run"])) and (
                        view_config["materialized"]):
            # Add to list of queries to execute
            queries_to_execute.append(blob_name)

    # Launch run if we have any queries to execute
    if queries_to_execute:
        # Get run configuration
        config: dict = read_config(
            Path(__file__).parent / "materialized_views_execute.yaml")

        # Get run key
        run_key = build_run_key("materialized_views_execute", now)

        # Set inputs
        config["solids"]["resolve_dependencies_and_execute"]["inputs"][
            "queries_names"]["value"] = queries_to_execute

        yield RunRequest(run_key=run_key, run_config=config)

    # Tell Dagit a reason we skipped it
    else:
        yield SkipReason("No materialization requested for now")
예제 #5
0
 def debug(self, path):
     img = Image.open(path)
     lock_phash = Redlock(key=f'phashdb', masters={self.redis})
     lock_whash = Redlock(key=f'whashdb', masters={self.redis})
     raw_phash = im.phash(img)
     raw_whash = im.whash(img)
     toreturn = [self.calculate_hashes(path)]
     toreturn.append("phashdb")
     print("phashdb")
     toreturn.append(self.debug_similar_hash(self.phashdb, raw_phash, 13, lock=lock_phash))
     print("whashdb")
     toreturn.append("whashdb")
     toreturn.append(self.debug_similar_hash(self.whashdb, raw_whash, 13, lock=lock_whash))
     return "\n".join(toreturn)
예제 #6
0
 def ban_image(self, path):
     img = Image.open(path)
     lock_phash_clean = Redlock(key=f'phashdb_clean', masters={self.redis})
     lock_whash_clean = Redlock(key=f'whashdb_clean', masters={self.redis})
     lock_phash = Redlock(key=f'phashdb', masters={self.redis})
     lock_whash = Redlock(key=f'whashdb', masters={self.redis})
     raw_phash = im.phash(img)
     raw_whash = im.whash(img)
     phash = str(raw_phash)
     whash = str(raw_whash)
     self.set_in_db(self.phashdb, phash, 0, lock=lock_phash)
     self.set_in_db(self.whashdb, whash, 0, lock=lock_whash)
     self.exec_similar_hash(self.phashdb_clean, raw_phash, 0, 13, self.del_from_db,
                            lock=lock_phash_clean)  # removing duplicates from the clean phash, if any
     self.exec_similar_hash(self.whashdb_clean, raw_whash, 0, 13, self.del_from_db,
                            lock=lock_whash_clean)  # removing duplicates from the clean whash, if any
예제 #7
0
def jtag_x3xx(jtag_args, redis_server):
    remote_working_dir = "pipeline_fpga"
    vivado_program_jtag = "/opt/Xilinx/Vivado_Lab/2020.1/bin/vivado_lab -mode batch -source {}/viv_hardware_utils.tcl -nolog -nojournal -tclargs program".format(
        remote_working_dir)
    jtag_server, jtag_serial, fpga_path = jtag_args.split(",")
    print("Waiting on jtag mutex for {}".format(jtag_server), flush=True)
    with Redlock(key="hw_jtag_{}".format(jtag_server),
                 masters=redis_server,
                 auto_release_time=1000 * 60 * 5):
        print("Got jtag mutex for {}".format(jtag_server), flush=True)
        with Connection(host=jtag_server) as jtag_host:
            jtag_host.run("mkdir -p " + remote_working_dir)
            jtag_host.run("rm -rf {}/*".format(remote_working_dir))
            jtag_host.put(os.path.join(
                pathlib.Path(__file__).parent.absolute(),
                "jtag/viv_hardware_utils.tcl"),
                          remote=remote_working_dir)
            jtag_host.put(fpga_path, remote=remote_working_dir)
            jtag_host.run(
                vivado_program_jtag + " " +
                os.path.join(remote_working_dir, os.path.basename(fpga_path)) +
                " " + jtag_serial)
        print(
            "Waiting 15 seconds for device to come back up and for Vivado to close",
            flush=True)
        time.sleep(15)
예제 #8
0
 def setUp(self):
     super().setUp()
     self.redis = _default_redis
     self.redlock = Redlock(
         masters={self.redis},
         key='printer',
         auto_release_time=100,
     )
예제 #9
0
def get_materialize_sensor_lock(context):
    """
    Get a lock for the materialization sensor.
    """
    r = Redis(constants.REDIS_HOST.value)
    lock = Redlock(
        key=constants.REDIS_KEY_MAT_VIEWS_MATERIALIZE_SENSOR_LOCK.value,
        masters=[r],
        auto_release_time=constants.REDIS_LOCK_AUTO_RELEASE_TIME.value,
    )
    return lock
예제 #10
0
def delete_managed_views(
    context,
    blob_names,
    materialization_locked: bool,
    materialization_lock: Redlock,
):
    try:
        r = Redis(constants.REDIS_HOST.value)
        rp = RedisPal(constants.REDIS_HOST.value)
        lock = Redlock(
            key=constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS_LOCK.value,
            masters=[r],
            auto_release_time=constants.REDIS_LOCK_AUTO_RELEASE_TIME.value,
        )
        with lock:
            materialized_views: dict = rp.get(
                constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value)
            if materialized_views is None:
                materialized_views = {}
                materialized_views["views"] = {}
            for blob_name in blob_names:
                context.log.info(f"Deleting managed view {blob_name}")
                if blob_name in materialized_views["views"]:
                    del materialized_views["views"][blob_name]
                    prefix: str = os.getenv("BQ_PROJECT_NAME", "rj-smtr-dev")
                    table_name: str = f"{prefix}.{blob_name}"
                    update_view(table_name, {}, "", "", "", delete=True)
                    context.log.info("Success!")
                else:
                    context.log.info("View not found, skipping...")
            rp.set(constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value,
                   materialized_views)
    except Exception as e:
        try:
            materialization_lock.release()
        except:
            pass
        raise e
예제 #11
0
def main(args):
    redis_server = {
        Redis.from_url("redis://{}:6379/0".format(args.redis_server))
    }
    print("Waiting to acquire mutex for {}".format(args.dut_name), flush=True)
    with Redlock(key=args.dut_name,
                 masters=redis_server,
                 auto_release_time=1000 * 60 * args.dut_timeout):
        print("Got mutex for {}".format(args.dut_name), flush=True)
        if (args.jtag_x3xx != None):
            jtag_x3xx(args.jtag_x3xx, redis_server)
        for command in args.test_commands:
            result = subprocess.run(shlex.split(command))
            if (result.returncode != 0):
                sys.exit(result.returncode)
        sys.exit(0)
예제 #12
0
파일: views.py 프로젝트: Admia1/feshar
def home_view(request):
    template = 'polls/home.html'
    if request.user.is_authenticated :
        polluser = PollUser.objects.get(user=request.user)
        if(polluser.national_id == "0"):#when user got no national_id
            return HttpResponseRedirect(reverse('polls:get_national_id'))
        if(not polluser.payment_id_valid()):
            return HttpResponseRedirect(reverse('polls:get_payment_id'))
        error_message = ""

        if request.method == 'POST':
            section = Section.objects.get(pk=request.POST['section_pk'])

            reserve_lock = Redlock(key=f"res_{section.pk}", masters={redis}, auto_release_time=60*1000)
            reserve_lock.acquire()

            usrs = section.usr_set.all()
            if len(usrs) >= 3:
                error_message = "این شیفت در این تاریخ و ایستگاه پر شده است"
            elif len(usrs)==2 and usrs[0].polluser.sex==usrs[1].polluser.sex==polluser.sex==0:
                error_message = "ا توجه به طرح تطبیق امکان حضور سه پسر در یک ایستگاه وجود ندارد!!!"
            else:
                # not supported for current jdatetime
                #if USR.objects.filter(section__end__mt=section.start, section__start__lt=section.end, polluser=polluser).exists():
                if [usr for usr in USR.objects.filter(polluser=polluser) if (usr.section.start<section.end and usr.section.end>section.start)]:
                    error_message = "شما قبلا در این روز و تاریخ شیفتی رزرو کرده اید"
                else:
                    #todo
                    if section.available_from <= jdatetime.datetime.now():
                        USR.objects.create(polluser=polluser, section=section)
                        error_message = "رزرو شد"
                    else:
                        error_message = "هم اکنون امکان رزرو شیفت وجود ندارد"

            reserve_lock.release()
        #  day.section_set.order_by('index', 'station')
        sections = [section for section in Section.objects.filter(available_from__lt=jdatetime.datetime.now()) if section.usr_set.count()<3]

        user_usr = [usr for usr in USR.objects.filter(polluser__user=request.user).order_by('section__start')]
        return render(request, template, {'sections': sections, 'error_message': error_message, 'user_usr': user_usr, 'polluser':polluser})
    else:
        return HttpResponseRedirect(reverse('polls:register'))
예제 #13
0
class RedlockTests(TestCase):
    'Distributed Redis-powered lock tests.'

    def setUp(self):
        super().setUp()
        self.redlock = Redlock(
            masters={self.redis},
            key='printer',
            auto_release_time=100,
        )

    def test_acquire_and_time_out(self):
        assert not self.redis.exists(self.redlock.key)
        assert self.redlock.acquire()
        assert self.redis.exists(self.redlock.key)
        time.sleep(self.redlock.auto_release_time / 1000 + 1)
        assert not self.redis.exists(self.redlock.key)

    def test_acquire_same_lock_twice_blocking_without_timeout(self):
        assert not self.redis.exists(self.redlock.key)
        with ContextTimer() as timer:
            assert self.redlock.acquire()
            assert self.redis.exists(self.redlock.key)
            assert self.redlock.acquire()
            assert self.redis.exists(self.redlock.key)
            assert timer.elapsed() >= self.redlock.auto_release_time

    def test_acquire_same_lock_twice_blocking_with_timeout(self):
        assert not self.redis.exists(self.redlock.key)
        assert self.redlock.acquire()
        assert self.redis.exists(self.redlock.key)
        assert not self.redlock.acquire(timeout=0)
        assert self.redis.exists(self.redlock.key)

    def test_acquire_same_lock_twice_non_blocking_without_timeout(self):
        assert not self.redis.exists(self.redlock.key)
        assert self.redlock.acquire()
        assert self.redis.exists(self.redlock.key)
        assert not self.redlock.acquire(blocking=False)
        assert self.redis.exists(self.redlock.key)
        time.sleep(self.redlock.auto_release_time / 1000 + 1)
        assert not self.redis.exists(self.redlock.key)

    def test_acquire_same_lock_twice_non_blocking_with_timeout(self):
        assert not self.redis.exists(self.redlock.key)
        assert self.redlock.acquire()
        assert self.redis.exists(self.redlock.key)
        with self.assertRaises(ValueError):
            self.redlock.acquire(blocking=False, timeout=0)
        assert self.redis.exists(self.redlock.key)

    def test_acquired(self):
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.locked()
        assert self.redlock.acquire()
        assert self.redis.exists(self.redlock.key)
        assert self.redlock.locked()
        time.sleep(self.redlock.auto_release_time / 1000 + 1)
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.locked()

    def test_extend(self):
        assert not self.redis.exists(self.redlock.key)
        with self.assertRaises(ExtendUnlockedLock):
            self.redlock.extend()
        assert self.redlock.acquire()
        for extension_num in range(3):
            with self.subTest(extension_num=extension_num):
                self.redlock.extend()
        with self.assertRaises(TooManyExtensions):
            self.redlock.extend()

    def test_acquire_then_release(self):
        assert not self.redis.exists(self.redlock.key)
        assert self.redlock.acquire()
        assert self.redis.exists(self.redlock.key)
        self.redlock.release()
        assert not self.redis.exists(self.redlock.key)

    def test_release_unlocked_lock(self):
        with self.assertRaises(ReleaseUnlockedLock):
            self.redlock.release()

    def test_releaseunlockedlock_repr(self):
        try:
            self.redlock.release()
        except ReleaseUnlockedLock as wtf:
            assert repr(wtf) == (
                f"ReleaseUnlockedLock(masters=[Redis<ConnectionPool<Connection<host=localhost,port=6379,db={self.redis_db}>>>], "
                "key='redlock:printer')")

    def test_releaseunlockedlock_str(self):
        try:
            self.redlock.release()
        except ReleaseUnlockedLock as wtf:
            assert str(wtf) == (
                f"masters=[Redis<ConnectionPool<Connection<host=localhost,port=6379,db={self.redis_db}>>>], "
                "key='redlock:printer'")

    def test_release_same_lock_twice(self):
        assert not self.redis.exists(self.redlock.key)
        assert self.redlock.acquire()
        self.redlock.release()
        with self.assertRaises(ReleaseUnlockedLock):
            self.redlock.release()

    def test_context_manager(self):
        assert not self.redis.exists(self.redlock.key)
        with self.redlock:
            assert self.redis.exists(self.redlock.key)
        assert not self.redis.exists(self.redlock.key)

    def test_context_manager_time_out_before_exit(self):
        assert not self.redis.exists(self.redlock.key)
        with self.assertRaises(ReleaseUnlockedLock), self.redlock:
            assert self.redis.exists(self.redlock.key)
            time.sleep(self.redlock.auto_release_time / 1000 + 1)
            assert not self.redis.exists(self.redlock.key)
        assert not self.redis.exists(self.redlock.key)

    def test_context_manager_acquired(self):
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.locked()
        with self.redlock:
            assert self.redis.exists(self.redlock.key)
            assert self.redlock.locked()
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.locked()

    def test_context_manager_acquired_time_out_before_exit(self):
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.locked()
        with self.assertRaises(ReleaseUnlockedLock), self.redlock:
            assert self.redis.exists(self.redlock.key)
            assert self.redlock.locked()
            time.sleep(self.redlock.auto_release_time / 1000 + 1)
            assert not self.redis.exists(self.redlock.key)
            assert not self.redlock.locked()
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.locked()

    def test_context_manager_release_before_exit(self):
        assert not self.redis.exists(self.redlock.key)
        with self.assertRaises(ReleaseUnlockedLock), self.redlock:
            assert self.redis.exists(self.redlock.key)
            self.redlock.release()
            assert not self.redis.exists(self.redlock.key)

    def test_repr(self):
        assert repr(self.redlock) == \
            "<Redlock key=redlock:printer value=b'' timeout=0>"
예제 #14
0
def get_configs_for_materialized_view(context, query_names: list,
                                      materialization_locked: bool,
                                      materialization_lock) -> dict:
    """Retrieves configs for materialized views"""
    try:
        for query_name in query_names:

            # Split query name into dataset_name and view_name
            dataset_name, view_name = query_name.split(".")

            # Load configs from GCS
            view_yaml = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, dataset_name, view_name)}.yaml'
            defaults_yaml = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, dataset_name)}/defaults.yaml'
            context.log.info(f"Defaults blob: {defaults_yaml}")
            context.log.info(f"View blob: {view_yaml}")
            defaults_blob = get_blob(defaults_yaml,
                                     SENSOR_BUCKET,
                                     mode="staging")
            view_blob = get_blob(view_yaml, SENSOR_BUCKET, mode="staging")
            if defaults_blob is None:
                raise Exception(f"Blob {defaults_yaml} not found!")
            defaults_dict = yaml.safe_load(defaults_blob.download_as_string())
            if view_blob:
                view_dict = yaml.safe_load(view_blob.download_as_string())
            else:
                context.log.warning(
                    f"Blob {view_yaml} not found. This is not an error.")
                view_dict = {}

            # Merge configs
            query_params = {**defaults_dict, **view_dict}

            # Build base configs
            now = datetime.datetime.now(pytz.timezone("America/Sao_Paulo"))
            run_key = build_run_key(query_name, now)
            with open(
                    str(
                        Path(__file__).parent /
                        "materialized_views_base_config.yaml"), "r") as f:
                base_params: dict = yaml.safe_load(f)
            base_params["run_timestamp"] = "'{}'".format(
                convert_datetime_to_datetime_string(now))
            base_params["maestro_sha"] = "'{}'".format(
                fetch_branch_sha(constants.MAESTRO_REPOSITORY.value,
                                 constants.MAESTRO_DEFAULT_BRANCH.value))
            base_params["maestro_bq_sha"] = "'{}'".format(
                fetch_branch_sha(constants.MAESTRO_BQ_REPOSITORY.value,
                                 constants.MAESTRO_BQ_DEFAULT_BRANCH.value))
            base_params["run_key"] = "'{}'".format(run_key)

            # Few more params
            r = Redis(constants.REDIS_HOST.value)
            rp = RedisPal(constants.REDIS_HOST.value)
            lock = Redlock(
                key=constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS_LOCK.value,
                masters=[r],
                auto_release_time=constants.REDIS_LOCK_AUTO_RELEASE_TIME.value,
            )
            table_name = parse_filepath_to_tablename(view_yaml)
            with lock:
                managed = rp.get(
                    constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value)
                if managed is None:
                    managed = {}
                    managed["views"] = {}
                if query_name not in managed["views"]:
                    raise Exception(
                        f"Query {query_name} not found in managed views: {managed}"
                    )
                d = managed["views"][query_name]
                changed = d["query_modified"]
                context.log.info(f"{query_name} changed: {changed}")
                d["query_modified"] = False
                last_run = d["last_run"]
                d["last_run"] = now
                rp.set(constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value,
                       managed)

            # Get query on GCS
            query_file = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, dataset_name, view_name)}.sql'
            query_blob = get_blob(query_file, SENSOR_BUCKET, mode="staging")
            if query_blob is None:
                raise Exception(f"Blob {query_file} not found!")
            base_query = query_blob.download_as_string().decode("utf-8")

            # Get parent queries on GCS
            parent_queries = {}
            for query_name in d["depends_on"]:
                if query_name in managed["views"] and managed["views"][
                        query_name]["materialized"]:
                    continue
                query_file = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, "/".join(query_name.split(".")[:2]))}.sql'
                query_blob = get_blob(query_file,
                                      SENSOR_BUCKET,
                                      mode="staging")
                if query_blob is None:
                    context.log.warning(
                        f"Blob for parent query \"{query_file}\" not found, skipping..."
                    )
                    continue
                parent_view_yaml = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, "/".join(query_name.split(".")[:2]))}.yaml'
                parent_view_blob = get_blob(parent_view_yaml,
                                            SENSOR_BUCKET,
                                            mode="staging")
                if parent_view_blob is not None:
                    parent_view_dict = yaml.safe_load(
                        parent_view_blob.download_as_string())
                else:
                    parent_view_dict = {}
                parent_defaults_yaml = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, "/".join(query_name.split(".")[:1]))}/defaults.yaml'
                parent_defaults_blob = get_blob(parent_defaults_yaml,
                                                SENSOR_BUCKET,
                                                mode="staging")
                if parent_defaults_blob is not None:
                    parent_defaults_dict = yaml.safe_load(
                        parent_defaults_blob.download_as_string())
                else:
                    context.log.warning(
                        f"Blob for parent query \"{parent_defaults_yaml}\" not found, skipping..."
                    )
                    continue
                parent_queries[query_name] = {}
                parent_queries[query_name][
                    "base_query"] = query_blob.download_as_string().decode(
                        "utf-8")
                parent_queries[query_name]["query_params"] = {
                    **parent_defaults_dict,
                    **parent_view_dict
                }
            context.log.info(f"Parent queries: {parent_queries}")

            # Build configs
            # - table_name: str
            # - changed: bool
            # - base_query: str
            # - base_params: dict
            # - query_params: dict
            # - now: str
            # - last_run: str
            date_ranges = get_date_ranges(
                last_run
                if last_run else query_params["backfill"]["start_timestamp"],
                query_params["backfill"]["interval"], now)
            context.log.info(f"{date_ranges}")
            for i, _ in enumerate(date_ranges[:-1]):
                configs = {
                    "table_name": table_name,
                    "changed": changed if i == 0 else False,
                    "base_query": base_query,
                    "base_params": base_params,
                    "query_params": query_params,
                    "now": date_ranges[i + 1],
                    "last_run": date_ranges[i],
                    "parent_queries": parent_queries,
                }
                yield DynamicOutput(
                    {
                        "config_dict": configs,
                        "materialization_lock": materialization_lock
                    },
                    mapping_key=
                    f'{configs["table_name"]}_{configs["last_run"]}_{configs["now"]}'
                    .replace(".",
                             "_").replace("-",
                                          "_").replace(" ",
                                                       "_").replace(":", "_"))
    except Exception as e:
        try:
            locks.release(materialization_lock)
        except:
            pass
        raise e
예제 #15
0
def manage_view(context, input_dict):

    view_name = input_dict["view_name"]
    materialization_lock = input_dict["materialization_lock"]

    try:
        # Setup Redis and Redlock
        r = Redis(constants.REDIS_HOST.value)
        rp = RedisPal(constants.REDIS_HOST.value)
        lock = Redlock(
            key=constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS_LOCK.value,
            masters=[r],
            auto_release_time=constants.REDIS_LOCK_AUTO_RELEASE_TIME.value,
        )

        # Get materialization information from Redis
        materialized_views: dict = rp.get(
            constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value)
        if materialized_views is None:
            materialized_views = {}
            materialized_views["views"] = {}
        materialized = materialized_views["views"][view_name]["materialized"]

        # If this is materialized, generate temp view
        if materialized:
            with lock:
                materialized_views["views"][view_name]["query_modified"] = True
                materialized_views["views"][view_name]["last_run"] = None
                rp.set(constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value,
                       materialized_views)
            context.log.info(
                f"Generate {view_name} as a view for now, materialization comes later"
            )

        # We need to build the query using
        # latest parameters and build a view with it.

        # Get defaults for view_name
        blob_path = os.path.join(*([MATERIALIZED_VIEWS_PREFIX] +
                                   [n for n in view_name.split(".")][:-1]))
        defaults_path = blob_path + "/defaults.yaml"
        context.log.info(f"Defaults path -> {defaults_path}")
        defaults_blob = get_blob(defaults_path, SENSOR_BUCKET, mode="staging")
        if defaults_blob is None:
            raise Exception(f"Blob {defaults_path} not found")
        defaults_dict: dict = yaml.safe_load(
            defaults_blob.download_as_string())

        # Parse dataset_name
        dataset_name = view_name.split(".")[0]

        # Parse view yaml path
        view_yaml = f'{os.path.join(MATERIALIZED_VIEWS_PREFIX, view_name)}.yaml'

        # Parse table_name
        prefix: str = os.getenv("BQ_PROJECT_NAME", "rj-smtr-dev")
        table_name: str = f"{prefix}.{view_name}"
        context.log.info(f"Table name is {table_name}")

        # Update view
        update_view(table_name,
                    defaults_dict,
                    dataset_name,
                    view_name.split(".")[-1],
                    view_yaml,
                    delete=False,
                    context=context)

    except Exception as e:
        try:
            materialization_lock.release()
        except:
            pass
        raise e
예제 #16
0
def update_managed_views(
    context,
    blob_names,
    materialization_locked: bool,
    materialization_lock: Redlock,
):
    try:
        # Setup Redis and Redlock
        r = Redis(constants.REDIS_HOST.value)
        rp = RedisPal(constants.REDIS_HOST.value)
        views_lock = Redlock(
            key=constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS_LOCK.value,
            masters=[r],
            auto_release_time=constants.REDIS_LOCK_AUTO_RELEASE_TIME.value,
        )

        # Initialize graph
        graph = nx.DiGraph()

        # If blob_name ends with "defaults.yaml", we need to
        # either add it to Redis or update its values and add
        # runs for every child it has and its dependencies.
        for blob_name in [
                b for b in blob_names if b.endswith("defaults.yaml")
        ]:

            # Get dataset name
            blob_path = "/".join([n for n in blob_name.split("/")
                                  if n != ""][:-1])
            dataset_name: str = blob_path.split("/")[-1]

            context.log.info("#" * 80)
            context.log.info(f"Updating {dataset_name} defaults")

            # Read the blob
            blob = get_blob(blob_name, SENSOR_BUCKET, mode="staging")
            if blob is None:
                raise Exception(f"Blob {blob_name} not found")
            blob_dict: dict = yaml.safe_load(blob.download_as_string())

            # Add it to Redis
            with views_lock:
                materialized_views: dict = rp.get(
                    constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value)
                if materialized_views is None:
                    materialized_views = {}
                    materialized_views["views"] = {}
                # Add every child to Redis
                if "views" not in blob_dict:
                    raise Exception(
                        f"Malformed blob (missing views key): {blob_name}")
                for key in blob_dict["views"].keys():

                    # Build key with dataset_name
                    m_key = f"{dataset_name}.{key}"

                    # This child also needs a run
                    context.log.info(f"Adding {m_key} to runs")
                    if m_key not in graph.nodes:
                        graph.add_node(m_key)

                    # Avoid KeyError
                    if "views" not in materialized_views:
                        materialized_views["views"] = {}

                    # Add to Redis
                    if m_key not in materialized_views["views"]:
                        materialized_views["views"][m_key] = {}
                    update_dict_with_dict(
                        materialized_views["views"][m_key], {
                            "cron_expression": blob_dict["scheduling"]["cron"],
                            "last_run": None,
                            "materialized":
                            blob_dict["views"][key]["materialized"],
                            "query_modified": True,
                            "depends_on":
                            blob_dict["views"][key]["depends_on"],
                        })

                    # Adds dependencies to runs
                    for dep in blob_dict["views"][key]["depends_on"]:
                        context.log.info(
                            f"Adding {dep} to runs as dependency of {m_key}")
                        if dep not in graph.nodes:
                            graph.add_node(dep)
                        graph.add_edge(dep, m_key)

                    # Try to find specific values for this view
                    blob = get_blob(blob_path + key + ".yaml",
                                    SENSOR_BUCKET,
                                    mode="staging")
                    if blob:
                        # Replace values in Redis
                        specific = yaml.safe_load(
                            blob.download_as_string().decode("utf-8"))
                        materialized_views["views"][m_key][
                            "cron_expression"] = specific["scheduling"]["cron"]
                    else:
                        context.log.warning(
                            f"No specific values for {m_key} found. This is not an error."
                        )

                # Update Redis effectively
                rp.set(constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value,
                       materialized_views)

        # Otherwise, we need to add the blob_name and its
        # dependencies to the graph.
        for blob_name in [
                b for b in blob_names if not b.endswith("defaults.yaml")
        ]:

            # Get table name
            file_name = ".".join(blob_name.split("/")[-2:])
            table_name = ".".join(file_name.split(".")[:-1])

            context.log.info("#" * 80)
            context.log.info(f"Updating {table_name} specific values...")

            # If it's YAML file, update values on Redis
            if blob_name.endswith(".yaml"):

                # Read the blob
                blob = get_blob(blob_name, SENSOR_BUCKET, mode="staging")
                if blob is None:
                    raise Exception(f"Blob {blob_name} not found")
                blob_dict: dict = yaml.safe_load(blob.download_as_string())

                # Update Redis
                with views_lock:
                    materialized_views: dict = rp.get(
                        constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value)
                    if materialized_views is None:
                        materialized_views = {}
                        materialized_views["views"] = {}

                    if table_name not in materialized_views["views"]:
                        materialized_views["views"][table_name] = {}
                    update_dict_with_dict(
                        materialized_views["views"][table_name], {
                            "cron_expression": blob_dict["scheduling"]["cron"],
                            "last_run": None,
                            "query_modified": True,
                        })
                    rp.set(constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value,
                           materialized_views)

            # Add table_name and its dependencies to runs
            context.log.info(f"Adding {table_name} to runs")
            if table_name not in graph.nodes:
                graph.add_node(table_name)

            materialized_views: dict = rp.get(
                constants.REDIS_KEY_MAT_VIEWS_MANAGED_VIEWS.value)
            if materialized_views is None:
                materialized_views = {}
                materialized_views["views"] = {}
            if table_name in materialized_views["views"]:
                for dep in materialized_views["views"][table_name][
                        "depends_on"]:
                    context.log.info(
                        f"Adding {dep} to runs as dependency of {table_name}")
                    if dep not in graph.nodes:
                        graph.add_node(dep)
                    graph.add_edge(dep, table_name)

        context.log.info(f"Graph edges: {graph.edges()}")

        # Get topological order
        order = list(nx.topological_sort(graph))

        # Filter out views that are not on materialized_views["views"]
        order = [o for o in order if o in materialized_views["views"]]

        # Log topological order
        context.log.info(f"Order: {order}")

        # Execute queries in topological order
        for q in order:
            yield DynamicOutput(
                {
                    "view_name": q,
                    "materialization_lock": materialization_lock
                },
                mapping_key=q.replace(".", "_"))
    except Exception as e:
        try:
            materialization_lock.release()
        except:
            pass
        raise e
예제 #17
0
class RedlockTests(TestCase):
    'Distributed Redis-powered lock tests.'

    def setUp(self):
        super().setUp()
        self.redis = _default_redis
        self.redlock = Redlock(
            masters={self.redis},
            key='printer',
            auto_release_time=100,
        )

    def tearDown(self):
        with contextlib.suppress(ReleaseUnlockedLock):
            self.redlock.release()
        super().tearDown()

    def test_acquire_and_time_out(self):
        assert not self.redis.exists(self.redlock.key)
        assert self.redlock.acquire()
        assert self.redis.exists(self.redlock.key)
        time.sleep(self.redlock.auto_release_time / 1000 + 1)
        assert not self.redis.exists(self.redlock.key)

    def test_acquire_same_lock_twice_blocking_without_timeout(self):
        assert not self.redis.exists(self.redlock.key)
        with ContextTimer() as timer:
            assert self.redlock.acquire()
            assert self.redis.exists(self.redlock.key)
            assert self.redlock.acquire()
            assert self.redis.exists(self.redlock.key)
            assert timer.elapsed() >= self.redlock.auto_release_time

    def test_acquire_same_lock_twice_blocking_with_timeout(self):
        assert not self.redis.exists(self.redlock.key)
        assert self.redlock.acquire()
        assert self.redis.exists(self.redlock.key)
        assert not self.redlock.acquire(timeout=0)
        assert self.redis.exists(self.redlock.key)

    def test_acquire_same_lock_twice_non_blocking_without_timeout(self):
        assert not self.redis.exists(self.redlock.key)
        assert self.redlock.acquire()
        assert self.redis.exists(self.redlock.key)
        assert not self.redlock.acquire(blocking=False)
        assert self.redis.exists(self.redlock.key)
        time.sleep(self.redlock.auto_release_time / 1000 + 1)
        assert not self.redis.exists(self.redlock.key)

    def test_acquire_same_lock_twice_non_blocking_with_timeout(self):
        assert not self.redis.exists(self.redlock.key)
        assert self.redlock.acquire()
        assert self.redis.exists(self.redlock.key)
        with self.assertRaises(ValueError):
            self.redlock.acquire(blocking=False, timeout=0)
        assert self.redis.exists(self.redlock.key)

    def test_acquired(self):
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.locked()
        assert self.redlock.acquire()
        assert self.redis.exists(self.redlock.key)
        assert self.redlock.locked()
        time.sleep(self.redlock.auto_release_time / 1000 + 1)
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.locked()

    def test_extend(self):
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.extend()
        assert self.redlock.acquire()
        for extension_num in range(3):
            with self.subTest(extension_num=extension_num):
                assert self.redlock.extend()
        with self.assertRaises(TooManyExtensions):
            self.redlock.extend()

    def test_acquire_then_release(self):
        assert not self.redis.exists(self.redlock.key)
        assert self.redlock.acquire()
        assert self.redis.exists(self.redlock.key)
        self.redlock.release()
        assert not self.redis.exists(self.redlock.key)

    def test_release_unlocked_lock(self):
        with self.assertRaises(ReleaseUnlockedLock):
            self.redlock.release()

    def test_releaseunlockedlock_repr(self):
        try:
            self.redlock.release()
        except ReleaseUnlockedLock as wtf:
            assert repr(wtf) == (
                "ReleaseUnlockedLock(masters=[Redis<ConnectionPool<Connection<host=localhost,port=6379,db=0>>>], "
                "key='redlock:printer')"
            )

    def test_releaseunlockedlock_str(self):
        try:
            self.redlock.release()
        except ReleaseUnlockedLock as wtf:
            assert str(wtf) == (
                "masters=[Redis<ConnectionPool<Connection<host=localhost,port=6379,db=0>>>], "
                "key='redlock:printer'"
            )

    def test_release_same_lock_twice(self):
        assert not self.redis.exists(self.redlock.key)
        assert self.redlock.acquire()
        self.redlock.release()
        with self.assertRaises(ReleaseUnlockedLock):
            self.redlock.release()

    def test_context_manager(self):
        assert not self.redis.exists(self.redlock.key)
        with self.redlock:
            assert self.redis.exists(self.redlock.key)
        assert not self.redis.exists(self.redlock.key)

    def test_context_manager_time_out_before_exit(self):
        assert not self.redis.exists(self.redlock.key)
        with self.assertRaises(ReleaseUnlockedLock), self.redlock:
            assert self.redis.exists(self.redlock.key)
            time.sleep(self.redlock.auto_release_time / 1000 + 1)
            assert not self.redis.exists(self.redlock.key)
        assert not self.redis.exists(self.redlock.key)

    def test_context_manager_acquired(self):
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.locked()
        with self.redlock:
            assert self.redis.exists(self.redlock.key)
            assert self.redlock.locked()
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.locked()

    def test_context_manager_acquired_time_out_before_exit(self):
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.locked()
        with self.assertRaises(ReleaseUnlockedLock), self.redlock:
            assert self.redis.exists(self.redlock.key)
            assert self.redlock.locked()
            time.sleep(self.redlock.auto_release_time / 1000 + 1)
            assert not self.redis.exists(self.redlock.key)
            assert not self.redlock.locked()
        assert not self.redis.exists(self.redlock.key)
        assert not self.redlock.locked()

    def test_context_manager_release_before_exit(self):
        assert not self.redis.exists(self.redlock.key)
        with self.assertRaises(ReleaseUnlockedLock), self.redlock:
            assert self.redis.exists(self.redlock.key)
            self.redlock.release()
            assert not self.redis.exists(self.redlock.key)

    def test_repr(self):
        assert repr(self.redlock) == \
            "<Redlock key=redlock:printer value=b'' timeout=0>"
예제 #18
0
 def whitelist_image(self, path):
     img = Image.open(path)
     lock_phash_clean = Redlock(key=f'phashdb_clean', masters={self.redis})
     lock_whash_clean = Redlock(key=f'whashdb_clean', masters={self.redis})
     self.set_in_db(self.phashdb_clean, str(im.phash(img)), True, lock=lock_phash_clean)
     self.set_in_db(self.whashdb_clean, str(im.whash(img)), True, lock=lock_whash_clean)