def get_query_suggestions(self, query, page_size): if not query: raise ValueError('No query string provided') if page_size <= 0: raise ValueError( 'Page size should be strictly positive for search suggestions') results = list() next_page_state = '' solr_query = '{"q":"name:(' + query + '*) OR tags:(' + query + '*) OR description:(' + query + '*)", "paging":"driver"}' bound_statement = self.get_query_suggestions_prepared.bind( [solr_query]) # TODO: not sure we're interpreting page size correctly here. # Should it be a limit on our database query, or a limit on the number of terms returned? bound_statement.fetch_size = page_size bound_statement.consistency_level = ConsistencyLevel.LOCAL_ONE # required for search queries result_set = self.session.execute(bound_statement) # deliberately avoiding paging in background current_rows = result_set.current_rows remaining = len(current_rows) suggestions = SortedSet() pattern = re.compile(r'\b' + re.escape(query) + r'[a-z]*\b') for video_row in current_rows: logging.debug('next video used for suggestions is: ' + video_row['name']) for name_term in re.findall(pattern, video_row['name']): logging.debug('Name term: ' + name_term) suggestions.add(name_term) for tag in video_row['tags']: for tag_term in re.findall(pattern, tag): logging.debug('Tag term: ' + tag_term) suggestions.add(tag_term) for desc_term in re.findall(pattern, video_row['description']): logging.debug('Description term: ' + desc_term) suggestions.add(desc_term) # ensure we don't continue asking and pull another page remaining -= 1 if (remaining == 0): break # remove stop words suggestions.difference_update(self.stop_words) return list(suggestions)
class SortedSetKey: def __init__(self): self.dict = dict() self.sorted_set = SortedSet(key=self.get_key) def __getitem__(self, item): return self.sorted_set[item] def __len__(self): return len(self.sorted_set) def __str__(self): return str(self.sorted_set) def get_key(self, value): return self.dict[value] def get_reversed_list(self, index, count): return self[-1 - index:-1 - index - count:-1] def values(self): for value in self.sorted_set: yield value def clear(self): self.sorted_set.clear() self.dict.clear() def destroy(self): self.sorted_set = None def index(self, value): return self.sorted_set.index(value) def pop(self, index=-1): return self.sorted_set.pop(index) def add(self, value, rank): if value in self.sorted_set: self.sorted_set.remove(value) self.dict[value] = rank self.sorted_set.add(value) def remove(self, value): self.sorted_set.remove(value) del self.dict[value] def update(self, value_list, rank_list): self.sorted_set.difference_update(value_list) for i, value in enumerate(value_list): self.dict[value] = rank_list[i] self.sorted_set.update(value_list)
def test_difference_update(): temp = SortedSet(range(100), load=7) temp.difference_update(range(0, 10), range(10, 20)) assert all((val + 20) == temp[val] for val in range(80))
class Chunk(object): """ Represents a chunk of code providing some useful functionality in the system. """ def __init__(self, logical_name, feature, local_content=None): self.logical_name = logical_name self.feature = feature self.local_content = local_content self.dependencies = SortedSet(key=lambda d: d.fully_qualified_name) self.bugs = SortedSet(key=lambda b: b.logical_name) self.bug_count = 0 def __eq__(self, other): if self.local_content != other.local_content: return False elif self.bugs_logical_names != other.bugs_logical_names: return False elif self.dependency_logical_names != other.dependency_logical_names: return False else: return True def __ne__(self, other): return not(self.__eq__(other)) @property def probability_gain_feature_dependency(self): return self.feature.software_system.probability_gain_feature_dependency @property def probability_lose_feature_dependency(self): return self.feature.software_system.probability_lose_feature_dependency @property def probability_gain_system_dependency(self): return self.feature.software_system.probability_gain_system_dependency @property def probability_lose_system_dependency(self): return self.feature.software_system.probability_lose_system_dependency @property def probability_new_bug(self): return self.feature.software_system.probability_new_bug @property def probability_debug_known(self): return self.feature.software_system.probability_debug_known @property def probability_debug_unknown(self): return self.feature.software_system.probability_debug_unknown @property def dependency_logical_names(self): return map(lambda d: d.logical_name, self.dependencies) @property def bugs_logical_names(self): return map(lambda b: b.logical_name, self.bugs) @property def bugs_in_dependencies(self): chunk_bug_set = frozenset(map(lambda chunk: frozenset(chunk.bugs), self.dependencies)) return reduce(lambda bugs_a, bugs_b: bugs_a.union(bugs_b), chunk_bug_set, set()) @property def tests(self): return filter(lambda t: self in t.chunks, self.feature.tests) def modify(self, random): feature_chunks = self.feature.chunks - {self} system_chunks = set(self.feature.software_system.chunks.difference(self.feature.chunks)) self._add_dependencies(random, system_chunks, self.probability_gain_system_dependency) self._add_dependencies(random, feature_chunks, self.probability_gain_feature_dependency) self.local_content = random.create_local_content() self._insert_bugs(random) def merge(self, source_chunk, random): for dependency in source_chunk.dependencies: working_copy_dependency = self.feature.software_system.get_chunk(dependency.fully_qualified_name) self.dependencies.add(working_copy_dependency) self.modify(random) def overwrite_with(self, source_chunk): self.local_content = source_chunk.local_content self.bugs.clear() for old_bug in source_chunk.bugs: new_bug = self.get_bug(old_bug.logical_name) if new_bug is None: self.add_bug(old_bug.logical_name) self.dependencies.clear() for dependency in source_chunk.dependencies: new_dependency = self.feature.software_system.get_chunk(dependency.fully_qualified_name) self.dependencies.add(new_dependency) def _add_dependencies(self, random, candidate_chunks, threshold): for candidate in SortedSet(candidate_chunks, key=lambda c: c.logical_name): if random.dependency_should_be_added(threshold): self.add_dependency(candidate) def add_dependency(self, candidate): self.dependencies.add(candidate) def _insert_bugs(self, random): while random.a_bug_should_be_inserted(self): self.add_bug(self.bug_count) self.bug_count += 1 def add_bug(self, logical_name): self.bugs.add(Bug(logical_name, self)) def get_bug(self, logical_name): result = filter(lambda bug: bug.logical_name == logical_name, self.bugs) if len(result) is 0: return None else: return result[0] def refactor(self, random): to_remove = set() for dependency in self.dependencies: if random.dependency_should_be_removed(self, dependency): to_remove.add(dependency) self.dependencies.difference_update(to_remove) def debug(self, random, bug=None): if len(self.bugs) == 0: return False if bug is None or bug not in self.bugs: if random.unknown_bug_should_be_removed(self): bug = random.choose_bug(self) self.bugs.remove(bug) elif random.known_bug_should_be_removed(self): self.bugs.remove(bug) def operate(self, random): for bug in self.bugs_in_dependencies.union(self.bugs): bug.manifest(random) def __str__(self): def string_repr_set(iterable): return ",".join(map(lambda e: repr(e), iterable)) feature_dependencies = string_repr_set(filter(lambda c: c.feature == self.feature, self.dependencies)) system_dependencies = string_repr_set(filter(lambda c: c.feature != self.feature, self.dependencies)) bugs = ", ".join(map(lambda bug: str(bug), self.bugs)) return "c_%s:[%s]:[%s]->(in[%s],ex[%s])" % \ (str(self.logical_name), self.local_content, bugs, feature_dependencies, system_dependencies) @property def fully_qualified_name(self): return "%s.%s" % (str(self.feature.logical_name), str(self.logical_name)) def __repr__(self): return "c%s" % str(self.fully_qualified_name)
def test_difference_update(): temp = SortedSet(range(100)) temp._reset(7) temp.difference_update(range(0, 10), range(10, 20)) assert all((val + 20) == temp[val] for val in range(80))