Esempio n. 1
0
    def search_directory_experimental(
            self,
            request_input: SearchDirectoryInput) -> SearchDirectoryOutput:
        """
        This new query function improves performance significantly, but is still
        being tested for accuracy and edge cases.

        This only executes one query to PWS per population requested. The
        query includes wildcards for each token the user input.

        For example: "buffy anne summers" would become a query for
        display names matching:
            "*buffy* *summers*"

        In this example, PWS would return any of the following results:
            - buffy anne summers
            - buffy "the vampire slayer" summers
            - ubuffya alsummersia
            - buffy-anne summers
            - buffy anne summers-finn

        After the results have been filtered, they are sent to the
        NameSearchResultReducer, which is responsible for sorting
        these names into appropriate buckets by relevance.
        """
        timer_context = {
            "query":
            request_input.dict(
                exclude_none=True,
                by_alias=True,
                exclude_properties=True,
                exclude_unset=True,
            ),
            "statistics": {},
        }
        timer = Timer("search_directory", context=timer_context).start()
        statistics = ListPersonsRequestStatistics(
            num_queries_generated=1,
            num_user_search_tokens=len(request_input.name.split()),
        )
        query = " ".join(f"*{token}*" for token in request_input.name.split())
        results = {}

        for population in request_input.requested_populations:
            pws_output: ListPersonsOutput = self._pws.list_persons(
                ListPersonsInput(
                    display_name=query,
                    employee_affiliation_state=(AffiliationState.current
                                                if population == "employees"
                                                else None),
                    student_affiliation_state=(AffiliationState.current
                                               if population == "students" else
                                               None),
                ),
                populations=request_input.requested_populations,
            )

            statistics.aggregate(pws_output.request_statistics)
            results = self.reducer.reduce_output(pws_output,
                                                 request_input.name, results)

            while pws_output.next:
                pws_output = self._pws.get_explicit_href(
                    pws_output.next.href, output_type=ListPersonsOutput)
                results = self.reducer.reduce_output(pws_output,
                                                     request_input.name,
                                                     results)
                statistics.aggregate(pws_output.request_statistics)

        statistics.num_duplicates_found = self.reducer.duplicate_hit_count
        timer.context["statistics"] = statistics.dict(by_alias=True)
        timer.stop(emit_log=True)

        return SearchDirectoryOutput(scenarios=[
            DirectoryQueryScenarioOutput(
                description=b.description,
                populations=self.pws_translator.translate_bucket(b),
            ) for b in results.values()
        ])
Esempio n. 2
0
    def search_directory_classic(
            self,
            request_input: SearchDirectoryInput) -> SearchDirectoryOutput:
        timer_context = {
            "query":
            request_input.dict(
                exclude_none=True,
                by_alias=True,
                exclude_properties=True,
                exclude_unset=True,
            ),
            "statistics": {},
        }
        duplicate_netids = set()
        timer = Timer("search_directory", context=timer_context).start()

        statistics = ListPersonsRequestStatistics()
        scenarios: List[DirectoryQueryScenarioOutput] = []
        scenario_description_indexes: Dict[str, int] = {}

        for generated in self.query_generator.generate(request_input):
            self.logger.debug(
                f"Querying: {generated.description} with "
                f"{generated.request_input.dict(exclude_unset=True, exclude_defaults=True)}"
            )
            statistics.num_queries_generated += 1
            pws_output: ListPersonsOutput = self._pws.list_persons(
                generated.request_input,
                populations=request_input.requested_populations)
            aggregate_output = pws_output
            statistics.aggregate(pws_output.request_statistics)

            while pws_output.next and pws_output.next.href:
                pws_output = self._pws.get_explicit_href(pws_output.next.href)
                statistics.aggregate(pws_output.request_statistics)
                aggregate_output.persons.extend(pws_output.persons)

            populations = self.pws_translator.translate_scenario(
                aggregate_output, duplicate_netids)
            statistics.num_duplicates_found += populations.pop(
                "__META__", {}).get("duplicates", 0)

            scenario_output = DirectoryQueryScenarioOutput(
                description=generated.description,
                populations=populations,
            )

            if generated.description in scenario_description_indexes:
                index = scenario_description_indexes[generated.description]
                existing_scenario = scenarios[index]
                for population, results in scenario_output.populations.items():
                    if population not in existing_scenario.populations:
                        existing_scenario.populations[population].people = []
                    existing_scenario.populations[population].people.extend(
                        results.people)
            else:
                scenarios.append(scenario_output)
                scenario_description_indexes[
                    generated.description] = len(scenarios) - 1

        timer.context["statistics"] = statistics.dict(by_alias=True)
        timer.stop(emit_log=True)
        return SearchDirectoryOutput(scenarios=scenarios)