def resume(self, run, input): """ Resumes an existing run with new input :param run: the previous run state :param input: the new input :return: the updated run state """ if run.state == RunState.State.COMPLETED: raise FlowRunException("Cannot resume a completed run state") last_step = run.steps[-1] if len(run.steps) > 0 else None # reset steps list so that it doesn't grow forever in a never-ending flow run.steps = [] if last_step: current_node = last_step.node # we're resuming an existing run else: current_node = run.flow.entry # we're starting a new run if not current_node: raise FlowRunException("Flow has no entry point") # tracks nodes visited so we can detect loops nodes_visited = OrderedSet() while current_node: # if we're resuming a previously paused step, then use its arrived on value if last_step and len(nodes_visited) == 0: arrived_on = last_step.arrived_on else: arrived_on = datetime.datetime.now(tz=pytz.UTC) # create new step for this node step = Step(current_node, arrived_on) run.steps.append(step) # should we pause at this node? if isinstance(current_node, RuleSet): if current_node.is_pause() and (not input or input.consumed): run.state = RunState.State.WAIT_MESSAGE return run # check for an non-pausing loop if current_node in nodes_visited: raise FlowLoopException(nodes_visited) else: nodes_visited.add(current_node) next_node = current_node.visit(self, run, step, input) if next_node: # if we have a next node, then record leaving this one step.left_on = datetime.datetime.now(tz=pytz.UTC) else: # if not then we've completed this flow run.state = RunState.State.COMPLETED current_node = next_node return run
def get_header(self, data, renderer_context): """Return the list of header fields, determined by class settings and context.""" # Start with the previously-set list of header fields header = renderer_context.get('header', self.header) # If no previous set, then determine the candidates from the data if header is None: header = set() data = self.flatten_data(data) for item in data: header.update(list(item.keys())) # Alphabetize header fields by default, since # flatten_data() makes field order indeterminate. header = sorted(header) # If configured to, examine the query parameters for the requsted header fields request = renderer_context.get('request') if request is not None and self.fields_param is not None: request_fields = request.query_params.get(self.fields_param) if request_fields is not None: requested = OrderedSet() for request_field in request_fields.split(self.fields_sep): # Only fields in the original candidate header set are valid if request_field in header: requested.update((request_field,)) header = requested # pylint: disable=redefined-variable-type return header
def __init__(self): self._points = OrderedSet() self._outline = OrderedSet() self._border = OrderedSet() self.dirty = True self.midpoint = (0, 0)
class Circle(Shape): radius = 0 def __init__(self, midpoint, radius): super().__init__() self.midpoint = midpoint self.radius = int(radius) @classmethod def from_rect(cls, rect): diameter = min(rect.width, rect.height) radius = int(diameter/2) midpoint = rect.midpoint return Circle(midpoint, radius) def find_points(self): midx, midy = self.midpoint self._points = OrderedSet() for x in range(-1*self.radius, self.radius+1): for y in range(-1*self.radius, self.radius+1): if self.contains_point((int(x), int(y))): self._points.add((int(x+midx), int(y+midy))) def contains_point(self, p): x, y = p return (x+0.5)**2 + (y+0.5)**2 <= self.radius**2
def test_update(): set1 = OrderedSet('abcd') set1.update('efgh') assert len(set1) == 8 assert set1[0] == 'a' assert set1[7] == 'h'
class IncludeRequest(Request): """ Adds the ability to include webassets bundles on the request. If the bundle does not exist, a KeyError will be raised during the rendering of the response, after the view has returned. Including a bundle multiple times will have the same result as including it once. The bundles are rendered in the order in which they were included. Bundles that are included first, are also rendered first. For example: @App.html(model=Model) def view(self, request): request.include('jquery') # includes the jquery bundle """ def __init__(self, *args, **kwargs): super(IncludeRequest, self).__init__(*args, **kwargs) self.included_assets = OrderedSet() def include(self, resource): self.included_assets.add(resource)
def _parse(self, page: BeautifulSoup, url): seasons = OrderedDict() eqg = OrderedSet() child = page.select_one("#WikiaArticle h2") season = child.text while child.next_sibling: child = child.next_sibling if child.name == "table": for a in child.find_all("a", string="Transcript"): if not a.has_attr("class") or "new" not in a["class"]: episode_url, fragment = urldefrag(a["href"]) episode_url = urljoin(url, episode_url) if "Equestria Girls" not in season: if season not in seasons: seasons[season] = OrderedSet() seasons[season].append(episode_url) else: eqg.append(episode_url) continue if child.name == "h2": season = child.text continue seasons["Equestria Girls"] = eqg return seasons
def upcoming_flow_questions(self): questions = OrderedSet() started = False for q in self.qset.flow_questions: if q.pk == self.pk: started = True if started: questions.append(q) return questions
class Shape(object): def __init__(self): self._points = OrderedSet() self._outline = OrderedSet() self._border = OrderedSet() self.dirty = True self.midpoint = (0, 0) def refresh(self): self.find_points() self.find_outline() self.find_border() self.dirty = False @property def outline(self): """The points outside the shape that are adjacent to it""" if self.dirty: self.refresh() return self._outline @property def border(self): """the points inside the shape along the border""" if self.dirty: self.refresh() return self._border @property def points(self): if self.dirty: self.refresh() self.dirty = False return self._points def find_points(self): raise NotImplementedError() def find_outline(self): self._outline = OrderedSet() for point in self._points: for neighbor in neighbors(point): if neighbor not in self._points: self._outline.add(neighbor) def find_border(self): self._border = OrderedSet() for point in self._points: for neighbor in neighbors(point): if neighbor not in self._points: self._border.add(point)
class Index: def __init__(self, simple_url=constants.PYPI_SIMPLE_URL, package_url=constants.PYPI_PACKAGE_URL): self.package_url = package_url self.simple_url = simple_url self._package_names = None @property def package_names(self): if self._package_names is None: self._package_names = OrderedSet() self.reload() return self._package_names def _get_html_data(self): if self.simple_url.startswith('/'): with open(self.simple_url) as fp: data = fp.read() else: response = requests.get(self.simple_url) data = response.content return data def _get_names(self): data = self._get_html_data() soup = BeautifulSoup(data, 'html.parser') links = soup.find_all('a') names = (link.string for link in links) return names def _add_package_names(self, names): if self._package_names is None: self._package_names = OrderedSet() for name in names: self._package_names.add(name) def reload(self): """ Reload package names from index. """ names = self._get_names() self._add_package_names(names) def __len__(self): if self._package_names is None: return 0 return len(self.package_names) def __iter__(self): return (Package(name, self) for name in self.package_names) def __repr__(self): return "<Index '{}'>".format(self.simple_url)
def load_ordered_set(filename): """ Load a set of words from a text file, and represent them in an OrderedSet object. """ oset = OrderedSet() for line in open(filename, encoding='utf-8'): oset.append(line.rstrip('\n')) return oset
def test_indexing(): set1 = OrderedSet('abracadabra') eq_(set1[:], set1) eq_(set1.copy(), set1) assert set1[:] is set1 assert set1.copy() is not set1 eq_(set1[[1, 2]], OrderedSet(['b', 'r'])) eq_(set1[1:3], OrderedSet(['b', 'r']))
def all_questions(self): """This is might be different from the flow questions because it might have group paramater questions if present :return: """ if self.parameter_list: questions = OrderedSet(self.parameter_list.parameters) else: questions = OrderedSet() map(lambda q: questions.add(q), self.flow_questions) return questions
def test_pop(): set1 = OrderedSet('ab') elem = set1.pop() assert elem == 'b' elem = set1.pop() assert elem == 'a' pytest.raises(KeyError, set1.pop)
def test_bitwise_and_consistency(): # Specific case that was failing without explicit __and__ definition data1 = OrderedSet([12, 13, 1, 8, 16, 15, 9, 11, 18, 6, 4, 3, 19, 17]) data2 = OrderedSet([19, 4, 9, 3, 2, 10, 15, 17, 11, 13, 20, 6, 14, 16, 8]) result1 = data1.copy() result1.intersection_update(data2) # This requires a custom & operation apparently result2 = data1 & data2 result3 = data1.intersection(data2) check_results_([result1, result2, result3], datas=(data1, data2), name='isect')
def select_averaged_rows(self, row_dict): """ Given a mapping from labels to row-indices, returns a space in which the row with a given label is the average of those row-indices. """ labels = OrderedSet() new_u = np.zeros((len(row_dict), self.k)) for label, indices in row_dict.items(): rownum = labels.add(label) old_rows = self.u[indices, :] new_u[rownum] = sum(old_rows) / len(old_rows) return self.__class__(new_u, self.sigma, labels)
def survey_questions(self): inline_ques = self.questions_inline() questions = OrderedSet(inline_ques) survey_questions = OrderedSet() other_flows = QuestionFlow.objects.exclude(validation_test__isnull=True, question__pk__in=[q.pk for q in inline_ques]).exclude( next_question__pk__in=[q.pk for q in inline_ques] #skip questions ) for ques in inline_ques: survey_questions.append(ques) map(lambda q: survey_questions.add(q), sub_questions(ques, other_flows)) return survey_questions
def test_update(): set1 = OrderedSet('abcd') result = set1.update('efgh') assert result == 7 assert len(set1) == 8 assert ''.join(set1) == 'abcdefgh' set2 = OrderedSet('abcd') result = set2.update('cdef') assert result == 5 assert len(set2) == 6 assert ''.join(set2) == 'abcdef'
def sub_questions(question, flows): questions = OrderedSet() try: qflows = flows.filter(question=question).exclude(next_question=question) if qflows: for flow in qflows: if flow.next_question: questions.add(flow.next_question) subsequent = sub_questions(flow.next_question, flows) map(lambda q: questions.add(q), subsequent) except QuestionFlow.DoesNotExist: return OrderedSet() return questions
def solve(ring, values): for value in values: soln = OrderedSet([value]) target = value + ring[0] + ring[1] for ridx in range(1, len(ring)): diff = target - (ring[ridx] + ring[ridx + 1]) # TODO: We could short-circuit here if diff is not in # values, but I like this flow better if diff in values: soln.add(diff) if len(soln) == len(values): return ring, soln return None
def _flow_questions(): # next line is to normalize to question set. Otherwise it seems to be causing some issues with flows # since the flow is more native to Qset. Additional attributes in subclasses are just extras qset = QuestionSet.get(id=self.id) inline_ques = qset.questions_inline() OrderedSet(inline_ques) flow_questions = OrderedSet() for ques in inline_ques: flow_questions.append(ques) # boldly assuming subquests dont go map(lambda q: flow_questions.add( q), ques.direct_sub_questions()) # more than quest subquestion deep for present implemnt return flow_questions
def build_from_conceptnet_table(filename, orig_index=(), self_loops=True): """ Read a file of tab-separated association data from ConceptNet, such as `data/assoc/reduced.csv`. Return a SciPy sparse matrix of the associations, and a pandas Index of labels. If you specify `orig_index`, then the index of labels will be pre-populated with existing labels, and any new labels will get index numbers that are higher than the index numbers the existing labels use. This is important for producing a sparse matrix that can be used for retrofitting onto an existing dense labeled matrix (see retrofit.py). """ mat = SparseMatrixBuilder() labels = OrderedSet(orig_index) totals = defaultdict(float) with open(str(filename), encoding='utf-8') as infile: for line in infile: concept1, concept2, value_str, dataset, relation = line.strip().split('\t') index1 = labels.add(replace_numbers(concept1)) index2 = labels.add(replace_numbers(concept2)) value = float(value_str) mat[index1, index2] = value mat[index2, index1] = value totals[index1] += value totals[index2] += value # Link nodes to their more general versions for label in labels: prefixes = list(uri_prefixes(label, 3)) if len(prefixes) >= 2: parent_uri = prefixes[-2] if parent_uri in labels: index1 = labels.index(label) index2 = labels.index(parent_uri) mat[index1, index2] = 1 mat[index2, index1] = 1 totals[index1] += 1 totals[index2] += 1 # add self-loops on the diagonal with equal weight to the rest of the row if self_loops: for key, value in totals.items(): mat[key, key] = value shape = (len(labels), len(labels)) index = pd.Index(labels) return mat.tocsr(shape), index
def _do_set_n_point_crossover(representation, mom, dad, points, random, max_size): chunks = [] i = 0 for point in points: chunks.append(representation[i:point]) i = point chunks.append(representation[i:]) bro = OrderedSet() sis = OrderedSet() cross = True for variables in chunks: for v in variables: if v in mom: bro.append(v) if cross else sis.append(v) if v in dad: sis.append(v) if cross else bro.append(v) cross = not cross if len(bro) > max_size: bro = random.sample(bro, max_size) if len(sis) > max_size: sis = random.sample(sis, max_size) return bro, sis
def load_dir(cls, dirname): """ Load an AssocSpace from a directory on disk. The returned object will be an instance of the class that you called .load_dir on. """ dirname = dirname.rstrip('/') u = np.load(dirname + '/u.npy', mmap_mode='r') sigma = np.load(dirname + '/sigma.npy') label_file = codecs.open(dirname + '/labels.txt', 'r', encoding='utf-8') labels = OrderedSet() for label in label_file: labels.append(label.rstrip('\n')) return cls(u, sigma, labels)
def configure(self, args, file=sys.stdout): """ Configures this `MasterTransducer` using the arguments parsed by an :py:mod:`ArgumentParser`. :param args: command line arguments parsed by an :py:mod:`ArgumentParser` :param file: the file to print help string to """ self.selected = OrderedSet() if args.group: for group_name in chain.from_iterable(args.group): group = self.groups[group_name] if args.help: self.parser.print_help(file) file.write('\n') group.print_help(file) self.parser.exit() for transducer in group.transducers: self.selected.add(transducer) if args.transducer: for transducer_name in chain.from_iterable(args.transducer): transducer = self.transducers[transducer_name] if args.help: self.parser.print_help(file) file.write('\n') transducer.print_help(file) self.parser.exit() self.selected.add(transducer) if len(self.selected) == 0: # If no transducer is selected explicitly, all transducers are used. self.selected = self.transducers.values()
def find_points(self): startx, starty = self.ul self._points = OrderedSet() for x in range(int(startx), int(startx)+self.width): for y in range(int(starty), int(starty)+self.height): self._points.add((int(x), int(y)))
def load_word_file(word_file): fd = os.path.expanduser(word_file) try: w = open(fd) except FileNotFoundError: print("%s does not exist. You need a wordlist." % (word_file)) sys.exit(127) # OrderedSet only accepts unique keys, ensures no duplicate words wrds = OrderedSet() for l in w: wrds.append(cleanup(l)) wrds_len = len(wrds) if wrds_len < MIN_WORDS: print("Word list too small, need at least %u, got %u." % (MIN_WORDS, wrds_len)) sys.exit(127) return wrds
def find_points(self): midx, midy = self.midpoint self._points = OrderedSet() for x in range(-1*self.rx, self.rx+1): for y in range(-1*self.ry, self.ry+1): if self.contains_point((int(x+midx), int(y+midy))): self._points.add((int(x+midx), int(y+midy)))
def __init__(self, labels, vectors, replacements=None, standardizer=standardize): assert(len(labels) == len(vectors)) self.labels = OrderedSet(labels) if not isinstance(vectors, np.memmap): normalize(vectors, copy=False) self.vectors = vectors self.replacements = replacements self._standardizer = standardizer self._mean_vec = np.mean(self.vectors, axis=0)
def write_wide_format_otu_table(**kwargs): output_table_io = kwargs.pop('output_table_io') table_collection = kwargs.pop('table_collection') if len(kwargs) > 0: raise Exception("Unexpected arguments detected: %s" % kwargs) if hasattr(output_table_io, 'name'): logging.info("Writing %s" % output_table_io.name) else: logging.info("Writing an OTU table") # Collect a hash of sequence to sample to num_seqs gene_to_seq_to_sample_to_count = OrderedDict() sequence_to_taxonomy = {} samples = OrderedSet() for otu in table_collection: if otu.marker not in gene_to_seq_to_sample_to_count: gene_to_seq_to_sample_to_count[otu.marker] = {} if otu.sequence not in gene_to_seq_to_sample_to_count[otu.marker]: gene_to_seq_to_sample_to_count[otu.marker][otu.sequence] = {} if otu.sample_name in gene_to_seq_to_sample_to_count[otu.marker][otu.sequence]: raise Exception("Unexpectedly found 2 of the same sequences for the same sample and marker") gene_to_seq_to_sample_to_count[otu.marker][otu.sequence][otu.sample_name] = otu.count samples.add(otu.sample_name) # This isn't perfect, because the same sequence might have # different taxonomies in different samples. But taxonomy might # be of regular form, or as a diamond example etc, so eh. sequence_to_taxonomy[otu.sequence] = otu.taxonomy output_table_io.write("\t".join(itertools.chain( # header ['marker','sequence'], samples, ['taxonomy\n']))) for gene, seq_to_sample_to_count in gene_to_seq_to_sample_to_count.items(): for seq, sample_to_count in seq_to_sample_to_count.items(): row = [gene, seq] for sample in samples: try: row.append(str(sample_to_count[sample])) except KeyError: row.append('0') row.append(sequence_to_taxonomy[seq]) output_table_io.write("\t".join(row)+"\n")
def extract_filter_params( params: Iterable[ZfitParametrized], floating: Optional[bool] = True, extract_independent: Optional[bool] = True) -> Set[ZfitParameter]: params = convert_to_container(params, container=OrderedSet) if extract_independent: params = OrderedSet( itertools.chain.from_iterable( param.get_params( floating=floating, extract_independent=True, is_yield=None) for param in params)) if floating is not None: if not extract_independent and not all(param.independent for param in params): raise ValueError( "Since `extract_dependent` is not set to True, there are maybe dependent parameters for " "which `floating` is an ill-defined attribute.") params = OrderedSet(p for p in params if p.floating == floating) return params
def _parse(self, language: Language, text: str, merge: bool) -> Union[spacy.tokens.Doc, spacy.tokens.Token]: nlp = None languages = OrderedSet([language]) | self.languages for language in languages: with suppress(KeyError): nlp = self.nlps[language] break result = nlp(text) if merge: result = result[:].merge() return result
def collect_all_phrases(path): chars_no_data = OrderedSet() char_phrases = [] f = open(path) lines = f.readlines() f.close() for line in lines: char = line.strip() # no data file if not os.path.exists('../output/char_data/'+char+'.html'): chars_no_data.add(char) else: df = open('../output/char_data/'+char+'.html') content = df.read() df.close() if 'html' not in content: chars_no_data.add(char) else: phrases = collect_phrases(content) char_phrases.append(json.dumps({'char':char, 'phrases': phrases}, ensure_ascii=False)) # write chars with pharases print('total chars with phrases: {}'.format(len(char_phrases))) fo = open('../output/ONLINE_CHAR_PHRASES.TXT', 'a') for cp in char_phrases: fo.write("%s\n" % cp) fo.close() # save remaining print('characters without data: '.format(len(chars_no_data))) for char in chars_no_data: print(char)
def _sync(self): with self.thread_lock: self.last_sync_time = datetime.now() self.is_syncing = True new_id = self.max_id() + 1 new_ids_keys_map = {} new_keys_ids_map = {} full = OrderedSet(self.iter_full_keys()) intersect = full & OrderedSet(self._keys_ids_map) exclusion = full - intersect def _get_new_id(key): global new_id id = self._extract_task_id(key) if id is not None: return id id = new_id new_id += 1 return id # new tasks for key in exclusion: id = _get_new_id(key) new_ids_keys_map[id] = {'key': key, 'exists': True} new_keys_ids_map[key] = id # old existed tasks for key in intersect: id = self._keys_ids_map[key] new_ids_keys_map[id] = {'key': key, 'exists': True} new_keys_ids_map[key] = id with self.thread_lock: self._selected_ids = list(new_ids_keys_map.keys()) self._ids_keys_map.update(new_ids_keys_map) self._keys_ids_map.update(new_keys_ids_map) self._save_ids() self.is_syncing = False
class Node: def __init__(self, x: int, y: int, width: int): self.x = x self.y = y self.width = width self.__neighbors = OrderedSet() self.__conn_ins = [] self.__edge_cost = {} def add_edge(self, node: "Node", delay: int = 0, force_connect: bool = False): if not force_connect: assert self.width == node.width if node not in self.__neighbors: self.__neighbors.add(node) node.__conn_ins.append(self) self.__edge_cost[node] = delay def remove_edge(self, node: "Node"): if node in self.__neighbors: self.__edge_cost.pop(node) self.__neighbors.remove(node) # remove the incoming connections as well node.__conn_ins.remove(self) def get_edge_cost(self, node: "Node") -> int: if node not in self.__edge_cost: return MAX_DEFAULT_DELAY else: return self.__edge_cost[node] def get_conn_in(self) -> List["Node"]: return self.__conn_ins def __iter__(self) -> Iterator["Node"]: return iter(self.__neighbors) def __len__(self): return len(self.__neighbors) @abstractmethod def __repr__(self): pass @abstractmethod def node_str(self): pass def clear(self): self.__neighbors.clear() self.__edge_cost.clear() self.__conn_ins.clear() def __contains__(self, item): return item in self.__neighbors def __hash__(self): return hash(self.width) ^ hash(self.x) ^ hash(self.y)
def run(workspace: Path, privilege_config: PrivilegeConfig, *_): bench_configs: Tuple[BenchConfig, ...] = tuple(BenchParser(workspace).parse()) results: List[WorkloadResult] = read_result(bench_configs) output_path = workspace / 'generated' privilege_cfg = privilege_config.result with drop_privilege(privilege_cfg.user, privilege_cfg.group): output_path.mkdir(parents=True, exist_ok=True) fields = tuple(map(lambda x: x.name, results)) with (output_path / 'avg.csv').open('w') as fp: csv_writer = csv.DictWriter(fp, ('category', *fields)) csv_writer.writeheader() runtime_dict = OrderedDict({'category': 'runtime'}) for workload in results: runtime_dict[workload.name] = workload.runtime csv_writer.writerow(runtime_dict) perf_events: OrderedSet[str] = OrderedSet(results[0].perf.keys()) for category in perf_events: row_dict = OrderedDict({'category': category}) for workload in results: row_dict[workload.name] = mean(workload.perf[category]) csv_writer.writerow(row_dict) resctrl_events: OrderedSet[str] = OrderedSet( results[0].resctrl[0].keys()) for category in resctrl_events: row_dict = OrderedDict({'category': category}) for workload in results: row_dict[workload.name] = sum( mean(resctrl[category]) for resctrl in workload.resctrl) csv_writer.writerow(row_dict)
def __init__(self, searchTerm, language): VocabularyBase.__init__(self, searchTerm, language) self.relatedSet = OrderedSet() self.broaderSet = OrderedSet() self.narrowerSet = OrderedSet() self.supportedLang.append('en') if language in self.supportedLang: for word in self.searchTerms: relatedWords = None try: relatedWords = self.apiCall(word, language) except: relatedWords = None if relatedWords is not None: for related in relatedWords: relationship = related.relationshipType if ('equivalent' in relationship or 'synonym' in relationship or 'verb-form' in relationship or 'form' in relationship): for word in related.words: self.relatedSet.append(utils.eszettToSS(word)) if ('hypernym' in relationship): for word in related.words: self.broaderSet.append(utils.eszettToSS(word)) if ('hyponym' in relationship): for word in related.words: self.narrowerSet.append(utils.eszettToSS(word))
def test_ordered_inequality(): # Ordered set checks order against sequences. assert OrderedSet([1, 2]) != OrderedSet([2, 1]) assert OrderedSet([1, 2]) != [2, 1] assert OrderedSet([1, 2]) != [2, 1, 1] assert OrderedSet([1, 2]) != (2, 1) assert OrderedSet([1, 2]) != (2, 1, 1) # Note: in Python 2.7 deque does not inherit from Sequence, but __eq__ # contains an explicit check for this case for python 2/3 compatibility. assert OrderedSet([1, 2]) != collections.deque([2, 1]) assert OrderedSet([1, 2]) != collections.deque([2, 2, 1])
def send(): try: scope = ['https://www.googleapis.com/auth/youtube'] fromInet = YoutubeClient(scope).fetch_links_to_all_videos() fromFile = load_data_from_file('urls.json') queue = OrderedSet(fromInet) - OrderedSet(fromFile) ready = OrderedSet() env = config.load('config.prod.yml') bot = Bot(env['TM_TOKEN']) channel = env['CHANNEL'] for msg in queue: with suppress(TelegramError): bot.sendMessage(channel, msg, disable_notification=True) ready.append(msg) sleep(randint(2, 5)) lost = len(queue) - len(ready) save_as_json_to_file(fromFile + list(ready), 'urls.json') return f'Can\'t send {lost} of {len(queue)} videos' if lost else 'ok' except HttpError as e: print('An HTTP error %d occurred:\n%s' % (e.resp.status, e.content))
def _compute_all_needed_parametrization_ids(self, fixtureobj): stack = [(fixtureobj.info.id, [fixtureobj.info.id], set([fixtureobj.info.id]))] returned = OrderedSet() while stack: fixture_id, path, visited = stack.pop() if fixture_id in self._all_needed_parametrization_ids_by_fixture_id: returned.update( self. _all_needed_parametrization_ids_by_fixture_id[fixture_id]) continue fixture = self._fixtures_by_id[fixture_id] if fixture.parametrization_ids: assert isinstance(fixture.parametrization_ids, OrderedSet) returned.update(fixture.parametrization_ids) if fixture.keyword_arguments: for needed in fixture.keyword_arguments.values(): if needed.is_parameter(): continue needed_id = needed.info.id if needed_id in visited: self._raise_cyclic_dependency_error( fixtureobj, path, needed_id) stack.append((needed_id, path + [needed_id], visited | set([needed_id]))) return returned
def __init__(self, meta_train=False, meta_val=False, meta_test=False, meta_split=None, class_augmentations=None): if meta_train + meta_val + meta_test == 0: if meta_split is None: raise ValueError('The meta-split is undefined. Use either the ' 'argument `meta_train=True` (or `meta_val`/`meta_test`), or ' 'the argument `meta_split="train"` (or "val"/"test").') elif meta_split not in ['train', 'val', 'test']: raise ValueError('Unknown meta-split name `{0}`. The meta-split ' 'must be in [`train`, `val`, `test`].'.format(meta_split)) meta_train = (meta_split == 'train') meta_val = (meta_split == 'val') meta_test = (meta_split == 'test') elif meta_train + meta_val + meta_test > 1: raise ValueError('Multiple arguments among `meta_train`, `meta_val` ' 'and `meta_test` are set to `True`. Exactly one must be set to ' '`True`.') self.meta_train = meta_train self.meta_val = meta_val self.meta_test = meta_test self._meta_split = meta_split if class_augmentations is not None: if not isinstance(class_augmentations, list): raise TypeError('Unknown type for `class_augmentations`. ' 'Expected `list`, got `{0}`.'.format(type(class_augmentations))) unique_augmentations = OrderedSet() for augmentations in class_augmentations: for transform in augmentations: if transform in unique_augmentations: warnings.warn('The class augmentation `{0}` already ' 'exists in the list of class augmentations (`{1}`). ' 'To avoid any duplicate, this transformation is ' 'ignored.'.format(transform, repr(transform)), UserWarning, stacklevel=2) unique_augmentations.add(transform) class_augmentations = list(unique_augmentations) else: class_augmentations = [] self.class_augmentations = class_augmentations
def get_ignore_types_in_groups(self, ignore_type_in_groups, ignore_string_type_changes, ignore_numeric_type_changes, ignore_type_subclasses): if ignore_type_in_groups: if isinstance(ignore_type_in_groups[0], type): ignore_type_in_groups = [ignore_type_in_groups] else: ignore_type_in_groups = [] result = [] for item_group in ignore_type_in_groups: new_item_group = OrderedSet() for item in item_group: item = type(item) if item is None or not isinstance( item, type) else item new_item_group.add(item) result.append(new_item_group) ignore_type_in_groups = result if ignore_string_type_changes and self.strings not in ignore_type_in_groups: ignore_type_in_groups.append(OrderedSet(self.strings)) if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups: ignore_type_in_groups.append(OrderedSet(self.numbers)) if ignore_type_subclasses: ignore_type_in_groups = list(map(tuple, ignore_type_in_groups)) return ignore_type_in_groups
def test_unordered_equality(): # Unordered set checks order against non-sequences. assert OrderedSet([1, 2]) == {1, 2} assert OrderedSet([1, 2]) == frozenset([2, 1]) assert OrderedSet([1, 2]) == {1: 'a', 2: 'b'} assert OrderedSet([1, 2]) == {1: 1, 2: 2}.keys() assert OrderedSet([1, 2]) == {1: 1, 2: 2}.values() # Corner case: OrderedDict is not a Sequence, so we don't check for order, # even though it does have the concept of order. assert OrderedSet([1, 2]) == collections.OrderedDict([(2, 2), (1, 1)]) # Corner case: We have to treat iterators as unordered because there # is nothing to distinguish an ordered and unordered iterator assert OrderedSet([1, 2]) == iter([1, 2]) assert OrderedSet([1, 2]) == iter([2, 1]) assert OrderedSet([1, 2]) == iter([2, 1, 1])
def _visit_relatives(artist_id): visited_artist_ids = OrderedSet([artist_id]) artist_ids = OrderedSet([artist_id]) depth = 0 while not halt_condition(visited_artist_ids, depth): self.logger.debug("%d artists on level %d for whom to gather relatives.", len(artist_ids), depth) relative_ids = OrderedSet() for artist_id in artist_ids: relative_ids.update(self.spotify_client.related_artist_ids(artist_id)) relative_ids -= visited_artist_ids relative_ids -= excluded_artist_ids self.logger.debug("After removing relatives either excluded or already visited, %d new relatives found " "on level %d.", len(relative_ids), depth) visited_artist_ids.update(relative_ids) artist_ids = relative_ids depth += 1 return visited_artist_ids
def fix(): conn = pms.connect(host='localhost', user='******', passwd='', db='empath', charset='utf8', init_command='SET NAMES UTF8') cur = conn.cursor() # get chunked comment ids query = "SELECT c.id,c.body" for gen in _generics: query += (",m.%s" % gen.lower()) query += " FROM Comments c " query += "JOIN Mentions m on c.id=m.id WHERE c.chunked=True" cur.execute(query) data = {} for row in cur: drugs = np.array([uniconvert(d) for d in row[2:]]) dmap = np.where(drugs == 1) drugs = [d.lower() for d in list(np.array(_generics)[dmap])] data[row[0]] = (row[1], drugs) for post_id in data.keys(): body, drugs = data[post_id] body = body.lower() for drug in drugs: for remap in _gen_dict.get(drug.upper(), [drug.upper()]): body = body.replace(remap.lower(), drug.lower()) # set preamble order to correct precedence query = ("UPDATE Chunks SET precedence=0 WHERE (id='%s' " "AND drug='preamble')" % post_id) cur.execute(query) # get order of drug mentions tokens = tokenize(body, drug=None, pos_filter=False, lemma=False) ordered_drugs = [] for word in tokens: if word in drugs: ordered_drugs.append(word) ordered_drugs = OrderedSet(ordered_drugs) for i, drug in enumerate(ordered_drugs): query = ("UPDATE Chunks SET precedence=%i WHERE (id='%s' " "AND drug='%s')" % (i + 1, post_id, drug)) cur.execute(query) conn.commit() conn.close()
def write_csv_file(json_array_to_convert, csv_file_path: str, key_whitelist: list): list_processed_data = [] header = OrderedSet() for item in json_array_to_convert: map_column_flatitem = {} prefix = "" flatten_item(map_column_flatitem, prefix, item, key_whitelist) list_processed_data.append(map_column_flatitem) header.update(map_column_flatitem.keys()) csv.register_dialect("my_dialect", my_dialect) with open(csv_file_path, 'w+') as f: # https://stackoverflow.com/a/1170297 #with open(csv_file_path, 'w+', newline='') as f: # prevents python to replace \n by \r\n on Windows writer = csv.DictWriter(f, header, dialect="my_dialect") writer.writeheader() for map_row in list_processed_data: writer.writerow(map_row) #print(map_row) print("[+] Completed writing CSV file with %d columns, %d lines" % (len(header), len(list_processed_data)))
def __init__(cls, name, bases, d): # noqa packages = OrderedSet() for b in bases: if hasattr(b, 'packages'): packages |= b.packages if 'packages' in d: packages |= d['packages'] cls.packages = packages super().__init__(name, bases, d)
def _extract_dependencies(zfit_objects: Iterable[ZfitObject]) -> ztyping.DependentsType: """Calls the :py:meth:`~BaseDependentsMixin.get_dependents` method on every object and returns a combined set. Args: zfit_objects (): Returns: set(zfit.Parameter): A set of independent Parameters """ zfit_objects = convert_to_container(zfit_objects) dependents = (obj.get_cache_deps(only_floating=False) for obj in zfit_objects) dependents_set = OrderedSet(itertools.chain.from_iterable(dependents)) # flatten return dependents_set
def __init__(self, x: int, y: int, track_width: int, switchbox: SwitchBox, height: int = 1): self.x = x self.y = y self.track_width = track_width self.height = height # create a copy of switch box because the switchbox nodes have to be # created self.switchbox: SwitchBox = SwitchBox(x, y, switchbox.num_track, switchbox.width, switchbox.internal_wires) self.ports: Dict[str, PortNode] = {} self.inputs = OrderedSet() self.outputs = OrderedSet() # hold for the core self.core: InterconnectCore = None
def get_image_color_set(image: imageType, system: colorSystemLit, number: int = 0 ) -> Union[OrderedSet[colorRGB], OrderedSet[colorHSV]]: """Takes RGB image, get set of all colors in an image, return as set of RGV or HSV tuples""" color_set: OrderedSet[colorType] color_list = get_image_color_list(image=image, system=system) # don't pass number color_set = OrderedSet(color_list) # list(dict.fromkeys(color_list_all)) to preserve order if number: # is not 0 color_set = color_set[:number] return color_set
def get_selected_items(project, selected, filters, ordering): """ Get selected items :param project: LS project :param selected: dict {'all': true|false, 'included|excluded': [...task_ids...]} :param filters: filters as on tab :param ordering: ordering as on tab """ # all_tasks - excluded ids = eval_task_ids(project, filters=filters, ordering=ordering) # get tasks from tab filters if selected.get('all', False): for item in selected.get('excluded', []): try: # ids could include not all selected items, when filter is more narrow than selected items ids.remove(item) except ValueError: pass # included only else: items = selected.get('included', []) ids = OrderedSet(items) & OrderedSet(ids) return ids
def __init__(self, content: t.Iterable[T]): self._raw_content = OrderedSet(content) self._content = OrderedDict( {content: _RingLink(content) for content in self._raw_content}) _content = tuple(self._content.values()) for i in range(len(_content)): _content[i].next = _content[(i + 1) % len(_content)] _content[i].previous = _content[i - 1] try: self._current = _content[-1] except IndexError: raise ValueError('Ring must contain at least one object')
def __init__(self, name, config, proj_root, files_root, build_root=None): config['local'] = True # LocalFiles must be local super(LocalFiles, self).__init__(name, config, proj_root, files_root) if not 'fileset' in config: raise RuntimeError('LocalFiles must be initilized with a fileset') files = config['fileset'].get('files', []) files_map = map(lambda p: os.path.normpath(p), files) self.fileset = Fileset() self.fileset.files = OrderedSet(files_map) self.build_root = build_root self.resolver = IncDirsResolver(files_root, []) self.cache_files = Fileset()
def sparse_from_parallel_text(input_path, languages): vocabs = {language: get_vocab(language) for language in languages} labels = OrderedSet() pairs = OrderedSet() rows = [] cols = [] values = [] for lang1 in languages: for lang2 in languages: if lang1 < lang2: print(lang1, lang2) filename = input_path / "{}-{}.txt".format(lang1, lang2) with open(str(filename), encoding='utf-8') as infile: lines = list(infile.readlines()) random.shuffle(lines) for i, line in enumerate(lines): text1, text2 = line.rstrip('\n').split('\t') terms1 = [ replace_numbers( standardized_concept_uri(lang1, word)) for word in text1.split(' ') if word in vocabs[lang1] ] terms2 = [ replace_numbers( standardized_concept_uri(lang2, word)) for word in text2.split(' ') if word in vocabs[lang2] ] terms = terms1 + terms2 if i > 0 and i % 100000 == 0: print('\t', i, '\t', len(values), terms) if i == 1000000: break for t1 in terms: index1 = labels.add(t1) for t2 in terms: index2 = labels.add(t2) pair_index = pairs.add((index1, index2)) assert pair_index <= len(values) if pair_index == len(values): rows.append(index1) cols.append(index2) values.append(1 / len(terms)) else: values[pair_index] += 1 / len(terms) shape = (len(labels), len(labels)) index = pd.Index(labels) mat = sparse.coo_matrix((values, (rows, cols)), shape=shape, dtype='f').tocsr() return mat, index
def test_custom_profane_word_dictionaries(profanity_filter): assert EMPTY_PROFANE_WORD_DICTIONARY == profanity_filter.custom_profane_word_dictionaries # Testing pluralization here as well profanity_filter.custom_profane_word_dictionaries = { 'en': ['unicorn', 'chocolate'] } assert (create_profane_word_dictionaries( en=OrderedSet(['unicorn', 'chocolate'])) == profanity_filter.custom_profane_word_dictionaries) censored = profanity_filter.censor(TEST_STATEMENT) assert 'unicorns' not in censored assert 'chocolate' not in censored assert 'Turd' in censored
def recall(array1, array2): """ Recall : |a1 \cap a2|/|a1| Parameters ---------- array1: np.ndarray first array array2: np.ndarray second array Returns ---------- float recall """ set_a1 = OrderedSet(array1) set_a2 = OrderedSet(array2) inters = set_a1.intersection(set_a2) print(len(set_a1), " ", len(inters), " ", len(set_a2)) return len(inters) * 1.0 / len(set_a2)
def test_class_splitter_for_fold_overlaps(): class DemoTask(Task): def __init__(self): super(DemoTask, self).__init__(index=0, num_classes=None) self._inputs = np.arange(10) def __len__(self): return len(self._inputs) def __getitem__(self, index): return self._inputs[index] splitter = ClassSplitter(shuffle=True, num_train_per_class=5, num_test_per_class=5) task = DemoTask() all_train_samples = list() all_test_samples = list() # split task ten times into train and test for i in range(10): tasks_split = splitter(task) train_task = tasks_split["train"] test_task = tasks_split["test"] train_samples = set([train_task[i] for i in range(len(train_task))]) test_samples = set([test_task[i] for i in range(len(train_task))]) # no overlap between train and test splits at single split assert len(train_samples.intersection(test_samples)) == 0 all_train_samples.append(train_samples) all_train_samples.append(train_samples) # gather unique samples from multiple splits samples_in_all_train_splits = OrderedSet().union(*all_train_samples) samples_in_all_test_splits = OrderedSet().union(*all_test_samples) # no overlap between train and test splits at multiple splits assert len(samples_in_all_test_splits.intersection(samples_in_all_train_splits)) == 0
def sort_greedy(data: OrderedDictType[_T1, Set[_T2]]) -> OrderedSet[_T1]: assert isinstance(data, OrderedDict) result: OrderedSet[_T1] = OrderedSet() available_entries = data.copy() progress_bar = tqdm(total=len(data), initial=0) while len(available_entries) > 0: selection = get_greedy(available_entries) result.update(selection) for k in selection: available_entries.pop(k) progress_bar.update(round(len(result) - progress_bar.n, 0)) progress_bar.close() return result
def __prepare_beat_matcher(self, track, asset, filename): darea = self.__current_builder.get_object('waveform_area') self.__audio_previewer = AudioPreviewer(track, darea, filename) darea.get_style_context().add_class("AudioUriSource") for id_ in ('range-combo', 'select-type-combo', 'distribution-combo', 'step-spinner'): self.__current_builder.get_object(id_).set_sensitive(True) self.__clap_mixer.set_asset(asset) self.__clap_mixer_handlers.append( self.__clap_mixer.pipeline.connect("state-change", self.__mixer_state_changed_cb)) self.__clap_mixer.pipeline.activatePositionListener(50) self.__clap_mixer_handlers.append(self.__clap_mixer.pipeline.connect("position", self.__mixer_position_cb, track)) step = int(self.__current_builder.get_object('step-spinner').get_value()) self.__selected_beats = OrderedSet([b['start'] for b in track.beats[0::step]]) self.__compute_markers()
def get_state_group_info(program): """Returns a dictionary from state group indices to set of state variables indices. For state_group_0_state_1, the dict will have an entry {0: set(1)}""" state_group_info = OrderedDict() for i, j in findall(r'state_and_packet.state_group_(\d+)_state_(\d+)', program): indices = state_group_info.get(i, OrderedSet()) indices.add(j) state_group_info[i] = indices return state_group_info