def signal_crosses(short_moving_averages, long_moving_averages): short_moving_averages = SortedDict(short_moving_averages) long_moving_averages = SortedDict(long_moving_averages) short_len = len(short_moving_averages.values()) long_len = len(long_moving_averages.values()) if(short_len != long_len): print "[Error] signal_crosses: inputs must be same size" return {} signal_crosses = {} last_diff_dir = 0 for date, short_average in short_moving_averages.iteritems(): long_average = long_moving_averages[date] diff = short_average - long_average if(last_diff_dir == 0): signal_crosses[date] = HOLD if(diff != 0): last_diff_dir = sign(diff) continue if(sign(diff) != last_diff_dir): signal_crosses[date] = BUY if last_diff_dir < 0 else SELL last_diff_dir = -last_diff_dir else: signal_crosses[date] = HOLD return SortedDict(signal_crosses)
def get_sort_vector( self, col, order, key_func = None, filter_func = None ) : if filter_func : # is not None, # create a sort vector from scratch, filtered getter_func = self._make_key_getter( col ) sorted_dict = SortedDict( key_func ) for j in range( len( self.vocab ) ) : if filter_func( self.vocab_kview[j], self.vocab_vview[j][1] ) : k = getter_func( j ) sorted_dict[ k ] = j vector = sorted_dict.values() if order != Qt.AscendingOrder : vector = [j for j in reversed( vector ) ] else : # no filter_func, try to reuse a cached vector vector = self.sort_up_vectors[ col ] if not vector or key_func is not self.sort_key_funcs[ col ] : # there is no ascending vector for this column, or there # is one but it was made with a different key_func. getter_func = self._make_key_getter( col ) sorted_dict = SortedDict( key_func ) for j in range( len( self.vocab ) ) : k = getter_func( j ) sorted_dict[ k ] = j vector = self.sort_up_vectors[ col ] = sorted_dict.values() self.sort_key_funcs[ col ] = key_func if order != Qt.AscendingOrder : # what is wanted is a descending order vector, do we have one? if self.sort_down_vectors[ col ] is None : # no, so create one from the asc. vector we now have self.sort_down_vectors[ col ] = [ j for j in reversed( vector ) ] # yes we do (now) vector = self.sort_down_vectors[ col ] # one way or another, vector is a sort vector # note the actual word count available through that vector self.active_word_count = len(vector) return vector
def sort( self, col, order ) : self.active_sort_vector = [] if 0 == len(self.message_tuples) : # nothing to display return self.layoutAboutToBeChanged.emit([],QAbstractItemModel.VerticalSortHint) # treat columns 0 and 1 the same if col : # is 1 or 2 col -= 1 # make it 0 or 1 # we need an ascending vector in all cases. vector = self.sort_vectors_ascending[ col ] if vector is None : # we need to create the ascending vector sorted_dict = SortedDict() for j in range( len( self.message_tuples ) ) : line_col_msg_tuple = self.message_tuples[ j ] if col : # is 1, meaning sort on messages key = line_col_msg_tuple[2]+line_col_msg_tuple[0] else : # col is 0, sort on line#+col# key = line_col_msg_tuple[0]+line_col_msg_tuple[1] key += str(j) # ensure uniqueness sorted_dict[key] = j vector = self.sort_vectors_ascending[ col ] = sorted_dict.values() # vector now has an ascending sort vector which is cached.. if order == Qt.DescendingOrder : # ..but we need the descending one if self.sort_vectors_descending[ col ] is None : # we need to make it self.sort_vectors_descending[ col ] = [ j for j in reversed( vector ) ] vector = self.sort_vectors_descending[ col ] self.active_sort_vector = vector self.layoutChanged.emit([],QAbstractItemModel.VerticalSortHint)
class ProductReport(object): """Read overview page of one job group and generate a report for the product.""" def __init__(self, browser, job_group_url, root_url, args): """Construct a product report object with options.""" self.args = args self.job_group_url = job_group_url self.group = job_group_url.split('/')[-1] current_url, previous_url = get_build_urls_to_compare(browser, job_group_url, args.builds, args.against_reviewed, args.running_threshold) # read last finished current_details = browser.get_soup(current_url) previous_details = browser.get_soup(previous_url) for details in current_details, previous_details: assert sum(int(badge.text) for badge in details.find_all(class_='badge')) > 0, \ "invalid page with no test results found, make sure you specified valid builds (leading zero missing?)" current_summary = parse_summary(current_details) previous_summary = parse_summary(previous_details) changes = {k: v - previous_summary.get(k, 0) for k, v in iteritems(current_summary) if k != 'none' and k != 'incomplete'} log.info("Changes since last build:\n\t%s" % '\n\t'.join("%s: %s" % (k, v) for k, v in iteritems(changes))) self.build = get_build_nr(current_url) self.ref_build = get_build_nr(previous_url) # for each architecture iterate over all cur_archs, prev_archs = (set(arch.text for arch in details.find_all('th', id=re.compile('flavor_'))) for details in [current_details, previous_details]) archs = cur_archs if args.arch: assert args.arch in cur_archs, "Selected arch {} was not found in test results {}".format(args.arch, cur_archs) archs = [args.arch] self.missing_archs = sorted(prev_archs - cur_archs) if self.missing_archs: log.info("%s missing completely from current run: %s" % (pluralize(len(self.missing_archs), "architecture is", "architectures are"), ', '.join(self.missing_archs))) # create arch reports self.reports = SortedDict() progress_browser = progress_browser_factory(args) if args.query_issue_status else None bugzilla_browser = bugzilla_browser_factory(args) if args.query_issue_status else None for arch in sorted(archs): results = get_arch_state_results(arch, current_details, previous_details, args.output_state_results) self.reports[arch] = ArchReport(arch, results, args, root_url, progress_browser, bugzilla_browser, browser) def __str__(self): """Return report for product.""" now_str = datetime.datetime.now().strftime('%Y-%m-%d - %H:%M') missing_archs_str = '\n * **Missing architectures**: %s' % ', '.join(self.missing_archs) if self.missing_archs else '' build_str = self.build if self.args.verbose_test and self.args.verbose_test > 1: build_str += ' (reference %s)' % self.ref_build openqa_review_report_product = openqa_review_report_product_template.substitute({ 'now': now_str, 'build': build_str, 'common_issues': common_issues(missing_archs_str, self.args.show_empty), 'arch_report': '<hr>'.join(map(str, self.reports.values())) }) return openqa_review_report_product
def simulation(prices, signal_crosses, budget): simulation = {} prices = SortedDict(prices) cash_on_hand = budget shares = 0 for date, price in prices.iteritems(): signal = signal_crosses[date] if(signal == SELL): shares += cash_on_hand / price cash_on_hand = 0 elif(signal == BUY): cash_on_hand += shares * price shares = 0 simulation[date] = { 'shares': shares, 'cash_on_hand': cash_on_hand } final_value = max(cash_on_hand, shares * prices.values()[-1]) earnings = final_value - budget return simulation, earnings
class ImageFlow(QtCore.QObject): # _dataPosChanged = QtCore.pyqtSignal(int) def __init__(self): self.processors = SortedDict() def add_processor(self,processor): self.processors[len(self.processors)] = processor def apply(self,data): if len(self.processors) == 0: return data data = data.copy() for p in self.processors.values(): data = p.apply(data) return data
def test_valuesview(): mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping[:13]) values = temp.values() assert len(values) == 13 assert 0 in values assert list(values) == [pos for val, pos in mapping[:13]] assert values[0] == 0 assert values[-3:] == [10, 11, 12] assert list(reversed(values)) == list(reversed(range(13))) assert values.index(5) == 5 assert values.count(10) == 1 temp.update(mapping[13:]) assert len(values) == 26 assert 25 in values assert list(values) == [pos for val, pos in mapping] values = SortedDict(mapping[:2]).values() assert repr(values) == "SortedValuesView(SortedDict({'a': 0, 'b': 1}))"
def plotWidth(dwdictX,fname,nameX,mX,cuts): sorted_dwdictX = SortedDict(dwdictX) n = len(sorted_dwdictX)-1 x = array('d',sorted_dwdictX.keys()) y = array('d',sorted_dwdictX.values()) gwX = TGraph(n,x,y) gwX.SetName("gwX") gwX.SetTitle("") gwX.GetXaxis().SetTitle("tan#beta") gwX.GetYaxis().SetTitle("#Gamma_{#it{"+nameX+"}}/#it{m}_{#it{"+nameX+"}} [%]") gwX.SetLineColor(ROOT.kBlack) gwX.SetMarkerColor(ROOT.kBlack) gwX.SetMarkerStyle(20) gwX.SetMarkerSize(0.5) ptxt = TPaveText(0.62,0.70,0.87,0.87,"NDC") ptxt.SetFillStyle(4000) #will be transparent ptxt.SetFillColor(0) ptxt.SetTextFont(42) ptxt.SetBorderSize(0) ptxt.AddText("sin(#beta-#alpha)=1") ptxt.AddText("#it{m}_{#it{"+nameX+"}}="+str(mX)+" GeV") c = TCanvas("c","c",600,600) c.cd() c.SetLogx() c.SetLogy() c.SetGridx() c.SetGridy() c.SetTicks(1,1) c.Draw() # gwX.Draw("p") gwX.Draw() ptxt.Draw("same") c.Modified() c.Update() c.SaveAs(fname)
class ProductReport(object): """Read overview page of one job group and generate a report for the product.""" def __init__(self, browser, job_group_url, root_url, args): """Construct a product report object with options.""" self.args = args self.job_group_url = job_group_url self.group = job_group_url.split('/')[-1] current_url, previous_url = get_build_urls_to_compare( browser, job_group_url, args.builds, args.against_reviewed, args.running_threshold) # read last finished current_details = browser.get_soup(current_url) previous_details = browser.get_soup(previous_url) for details in current_details, previous_details: assert sum(int(badge.text) for badge in details.find_all(class_='badge')) > 0, \ "invalid page with no test results found, make sure you specified valid builds (leading zero missing?)" current_summary = parse_summary(current_details) previous_summary = parse_summary(previous_details) changes = { k: v - previous_summary.get(k, 0) for k, v in iteritems(current_summary) if k != 'none' and k != 'incomplete' } log.info("Changes since last build:\n\t%s" % '\n\t'.join("%s: %s" % (k, v) for k, v in iteritems(changes))) self.build = get_build_nr(current_url) self.ref_build = get_build_nr(previous_url) # for each architecture iterate over all cur_archs, prev_archs = ( set(arch.text for arch in details.find_all('th', id=re.compile('flavor_'))) for details in [current_details, previous_details]) archs = cur_archs if args.arch: assert args.arch in cur_archs, "Selected arch {} was not found in test results {}".format( args.arch, cur_archs) archs = [args.arch] self.missing_archs = sorted(prev_archs - cur_archs) if self.missing_archs: log.info("%s missing completely from current run: %s" % (pluralize( len(self.missing_archs), "architecture is", "architectures are"), ', '.join(self.missing_archs))) # create arch reports self.reports = SortedDict() progress_browser = progress_browser_factory( args) if args.query_issue_status else None bugzilla_browser = bugzilla_browser_factory( args) if args.query_issue_status else None for arch in sorted(archs): results = get_arch_state_results(arch, current_details, previous_details, args.output_state_results) self.reports[arch] = ArchReport(arch, results, args, root_url, progress_browser, bugzilla_browser, browser) def __str__(self): """Return report for product.""" now_str = datetime.datetime.now().strftime('%Y-%m-%d - %H:%M') missing_archs_str = '\n * **Missing architectures**: %s' % ', '.join( self.missing_archs) if self.missing_archs else '' build_str = self.build if self.args.verbose_test and self.args.verbose_test > 1: build_str += ' (reference %s)' % self.ref_build openqa_review_report_product = openqa_review_report_product_template.substitute( { 'now': now_str, 'build': build_str, 'common_issues': common_issues(missing_archs_str, self.args.show_empty), 'arch_report': '<hr>'.join(map(str, self.reports.values())) }) return openqa_review_report_product
class FederationRemoteSendQueue(object): """A drop in replacement for FederationSender""" def __init__(self, hs): self.server_name = hs.hostname self.clock = hs.get_clock() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.presence_map = {} # Pending presence map user_id -> UserPresenceState self.presence_changed = SortedDict() # Stream position -> list[user_id] # Stores the destinations we need to explicitly send presence to about a # given user. # Stream position -> (user_id, destinations) self.presence_destinations = SortedDict() self.keyed_edu = {} # (destination, key) -> EDU self.keyed_edu_changed = SortedDict() # stream position -> (destination, key) self.edus = SortedDict() # stream position -> Edu self.device_messages = SortedDict() # stream position -> destination self.pos = 1 self.pos_time = SortedDict() # EVERYTHING IS SAD. In particular, python only makes new scopes when # we make a new function, so we need to make a new function so the inner # lambda binds to the queue rather than to the name of the queue which # changes. ARGH. def register(name, queue): LaterGauge("synapse_federation_send_queue_%s_size" % (queue_name,), "", [], lambda: len(queue)) for queue_name in [ "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed", "edus", "device_messages", "pos_time", "presence_destinations", ]: register(queue_name, getattr(self, queue_name)) self.clock.looping_call(self._clear_queue, 30 * 1000) def _next_pos(self): pos = self.pos self.pos += 1 self.pos_time[self.clock.time_msec()] = pos return pos def _clear_queue(self): """Clear the queues for anything older than N minutes""" FIVE_MINUTES_AGO = 5 * 60 * 1000 now = self.clock.time_msec() keys = self.pos_time.keys() time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO) if not keys[:time]: return position_to_delete = max(keys[:time]) for key in keys[:time]: del self.pos_time[key] self._clear_queue_before_pos(position_to_delete) def _clear_queue_before_pos(self, position_to_delete): """Clear all the queues from before a given position""" with Measure(self.clock, "send_queue._clear"): # Delete things out of presence maps keys = self.presence_changed.keys() i = self.presence_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_changed[key] user_ids = set( user_id for uids in self.presence_changed.values() for user_id in uids ) keys = self.presence_destinations.keys() i = self.presence_destinations.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_destinations[key] user_ids.update( user_id for user_id, _ in self.presence_destinations.values() ) to_del = [ user_id for user_id in self.presence_map if user_id not in user_ids ] for user_id in to_del: del self.presence_map[user_id] # Delete things out of keyed edus keys = self.keyed_edu_changed.keys() i = self.keyed_edu_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.keyed_edu_changed[key] live_keys = set() for edu_key in self.keyed_edu_changed.values(): live_keys.add(edu_key) to_del = [edu_key for edu_key in self.keyed_edu if edu_key not in live_keys] for edu_key in to_del: del self.keyed_edu[edu_key] # Delete things out of edu map keys = self.edus.keys() i = self.edus.bisect_left(position_to_delete) for key in keys[:i]: del self.edus[key] # Delete things out of device map keys = self.device_messages.keys() i = self.device_messages.bisect_left(position_to_delete) for key in keys[:i]: del self.device_messages[key] def notify_new_events(self, current_id): """As per FederationSender""" # We don't need to replicate this as it gets sent down a different # stream. pass def build_and_send_edu(self, destination, edu_type, content, key=None): """As per FederationSender""" if destination == self.server_name: logger.info("Not sending EDU to ourselves") return pos = self._next_pos() edu = Edu( origin=self.server_name, destination=destination, edu_type=edu_type, content=content, ) if key: assert isinstance(key, tuple) self.keyed_edu[(destination, key)] = edu self.keyed_edu_changed[pos] = (destination, key) else: self.edus[pos] = edu self.notifier.on_new_replication_data() def send_read_receipt(self, receipt): """As per FederationSender Args: receipt (synapse.types.ReadReceipt): """ # nothing to do here: the replication listener will handle it. pass def send_presence(self, states): """As per FederationSender Args: states (list(UserPresenceState)) """ pos = self._next_pos() # We only want to send presence for our own users, so lets always just # filter here just in case. local_states = list(filter(lambda s: self.is_mine_id(s.user_id), states)) self.presence_map.update({state.user_id: state for state in local_states}) self.presence_changed[pos] = [state.user_id for state in local_states] self.notifier.on_new_replication_data() def send_presence_to_destinations(self, states, destinations): """As per FederationSender Args: states (list[UserPresenceState]) destinations (list[str]) """ for state in states: pos = self._next_pos() self.presence_map.update({state.user_id: state for state in states}) self.presence_destinations[pos] = (state.user_id, destinations) self.notifier.on_new_replication_data() def send_device_messages(self, destination): """As per FederationSender""" pos = self._next_pos() self.device_messages[pos] = destination self.notifier.on_new_replication_data() def get_current_token(self): return self.pos - 1 def federation_ack(self, token): self._clear_queue_before_pos(token) def get_replication_rows(self, from_token, to_token, limit, federation_ack=None): """Get rows to be sent over federation between the two tokens Args: from_token (int) to_token(int) limit (int) federation_ack (int): Optional. The position where the worker is explicitly acknowledged it has handled. Allows us to drop data from before that point """ # TODO: Handle limit. # To handle restarts where we wrap around if from_token > self.pos: from_token = -1 # list of tuple(int, BaseFederationRow), where the first is the position # of the federation stream. rows = [] # There should be only one reader, so lets delete everything its # acknowledged its seen. if federation_ack: self._clear_queue_before_pos(federation_ack) # Fetch changed presence i = self.presence_changed.bisect_right(from_token) j = self.presence_changed.bisect_right(to_token) + 1 dest_user_ids = [ (pos, user_id) for pos, user_id_list in self.presence_changed.items()[i:j] for user_id in user_id_list ] for (key, user_id) in dest_user_ids: rows.append((key, PresenceRow( state=self.presence_map[user_id], ))) # Fetch presence to send to destinations i = self.presence_destinations.bisect_right(from_token) j = self.presence_destinations.bisect_right(to_token) + 1 for pos, (user_id, dests) in self.presence_destinations.items()[i:j]: rows.append((pos, PresenceDestinationsRow( state=self.presence_map[user_id], destinations=list(dests), ))) # Fetch changes keyed edus i = self.keyed_edu_changed.bisect_right(from_token) j = self.keyed_edu_changed.bisect_right(to_token) + 1 # We purposefully clobber based on the key here, python dict comprehensions # always use the last value, so this will correctly point to the last # stream position. keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]} for ((destination, edu_key), pos) in iteritems(keyed_edus): rows.append((pos, KeyedEduRow( key=edu_key, edu=self.keyed_edu[(destination, edu_key)], ))) # Fetch changed edus i = self.edus.bisect_right(from_token) j = self.edus.bisect_right(to_token) + 1 edus = self.edus.items()[i:j] for (pos, edu) in edus: rows.append((pos, EduRow(edu))) # Fetch changed device messages i = self.device_messages.bisect_right(from_token) j = self.device_messages.bisect_right(to_token) + 1 device_messages = {v: k for k, v in self.device_messages.items()[i:j]} for (destination, pos) in iteritems(device_messages): rows.append((pos, DeviceRow( destination=destination, ))) # Sort rows based on pos rows.sort() return [(pos, row.TypeId, row.to_data()) for pos, row in rows]
def save_polygon(polygon, all_metadata): d = SortedDict([(m,'') for m in all_metadata]) d.update(polygon['properties']) return d.values()
def validate(self, protocol: str, subset: str = 'development', every: int = 1, start: Union[int, Literal['last']] = 1, end: Union[int, Literal['last']] = 100, chronological: bool = False, device: Optional[torch.device] = None, batch_size: int = 32, n_jobs: int = 1, **kwargs): # use last available epoch as starting point if start == 'last': start = self.get_number_of_epochs() - 1 # use last available epoch as end point if end == 'last': end = self.get_number_of_epochs() - 1 criterion = self.validation_criterion(protocol, **kwargs) validate_dir = Path( self.VALIDATE_DIR.format( train_dir=self.train_dir_, _criterion=f'_{criterion}' if criterion is not None else '', protocol=protocol, subset=subset)) params_yml = validate_dir / 'params.yml' validate_dir.mkdir(parents=True, exist_ok=True) writer = SummaryWriter(log_dir=str(validate_dir), purge_step=start) self.validate_dir_ = validate_dir validation_data = self.validate_init(protocol, subset=subset) if n_jobs > 1: self.pool_ = multiprocessing.Pool(n_jobs) progress_bar = tqdm(unit='iteration') for i, epoch in enumerate( self.validate_iter(start=start, end=end, step=every, chronological=chronological)): # {'metric': 'detection_error_rate', # 'minimize': True, # 'value': 0.9, # 'pipeline': ...} details = self.validate_epoch(epoch, validation_data, protocol=protocol, subset=subset, device=device, batch_size=batch_size, n_jobs=n_jobs, **kwargs) # initialize if i == 0: # what is the name of the metric? metric = details['metric'] # should the metric be minimized? minimize = details['minimize'] # epoch -> value dictionary values = SortedDict() # load best epoch and value from past executions if params_yml.exists(): with open(params_yml, 'r') as fp: params = yaml.load(fp, Loader=yaml.SafeLoader) best_epoch = params['epoch'] best_value = params[metric] values[best_epoch] = best_value # metric value for current epoch values[epoch] = details['value'] # send value to tensorboard writer.add_scalar(f'validate/{protocol}.{subset}/{metric}', values[epoch], global_step=epoch) # keep track of best value so far if minimize: best_epoch = values.iloc[np.argmin(values.values())] best_value = values[best_epoch] else: best_epoch = values.iloc[np.argmax(values.values())] best_value = values[best_epoch] # if current epoch leads to the best metric so far # store both epoch number and best pipeline parameter to disk if best_epoch == epoch: best = { metric: best_value, 'epoch': epoch, } if 'pipeline' in details: pipeline = details['pipeline'] best['params'] = pipeline.parameters(instantiated=True) with open(params_yml, mode='w') as fp: fp.write(yaml.dump(best, default_flow_style=False)) # create/update zip file for later upload to torch.hub hub_zip = create_zip(validate_dir) # progress bar desc = (f'{metric} | ' f'Epoch #{best_epoch} = {100 * best_value:g}% (best) | ' f'Epoch #{epoch} = {100 * details["value"]:g}%') progress_bar.set_description(desc=desc) progress_bar.update(1)
class ReadCounter(object): def __init__(self): from sortedcontainers import SortedDict self.reads = SortedDict() self.reads[b''] = [0, 0] self.read_counts = {} self.hit_count=0 def process(self, transaction_info): for get in transaction_info.gets: self._insert_read(get.key, None) for get_range in transaction_info.get_ranges: self._insert_read(get_range.key_range.start_key, get_range.key_range.end_key) def _insert_read(self, start_key, end_key): self.read_counts.setdefault((start_key, end_key), 0) self.read_counts[(start_key, end_key)] += 1 self.reads.setdefault(start_key, [0, 0])[0] += 1 if end_key is not None: self.reads.setdefault(end_key, [0, 0])[1] += 1 else: self.reads.setdefault(start_key+b'\x00', [0, 0])[1] += 1 def get_total_reads(self): return sum([v for v in self.read_counts.values()]) def matches_filter(addresses, required_addresses): for addr in required_addresses: if addr not in addresses: return False return True def get_top_k_reads(self, num, filter_addresses, shard_finder=None): count_pairs = sorted([(v, k) for (k, v) in self.read_counts.items()], reverse=True, key=lambda item: item[0]) if not filter_addresses: count_pairs = count_pairs[0:num] if shard_finder: results = [] for (count, (start, end)) in count_pairs: results.append((start, end, count, shard_finder.get_addresses_for_key(start))) shard_finder.wait_for_shard_addresses(results, 0, 3) if filter_addresses: filter_addresses = set(filter_addresses) results = [r for r in results if filter_addresses.issubset(set(r[3]))][0:num] else: results = [(start, end, count) for (count, (start, end)) in count_pairs[0:num]] return results def get_range_boundaries(self, num_buckets, shard_finder=None): total = sum([start_count for (start_count, end_count) in self.reads.values()]) range_size = total // num_buckets output_range_counts = [] if total == 0: return output_range_counts def add_boundary(start, end, started_count, total_count): if shard_finder: shard_count = shard_finder.get_shard_count(start, end) if shard_count == 1: addresses = shard_finder.get_addresses_for_key(start) else: addresses = None output_range_counts.append((start, end, started_count, total_count, shard_count, addresses)) else: output_range_counts.append((start, end, started_count, total_count, None, None)) this_range_start_key = None last_end = None open_count = 0 opened_this_range = 0 count_this_range = 0 for (start_key, (start_count, end_count)) in self.reads.items(): open_count -= end_count if opened_this_range >= range_size: add_boundary(this_range_start_key, start_key, opened_this_range, count_this_range) count_this_range = open_count opened_this_range = 0 this_range_start_key = None count_this_range += start_count opened_this_range += start_count open_count += start_count if count_this_range > 0 and this_range_start_key is None: this_range_start_key = start_key if end_count > 0: last_end = start_key if last_end is None: last_end = b'\xff' if count_this_range > 0: add_boundary(this_range_start_key, last_end, opened_this_range, count_this_range) shard_finder.wait_for_shard_addresses(output_range_counts, 0, 5) return output_range_counts
class Node(BaseNode): def __init__(self, *args, **kwargs): self.rest = None self.offset = None super().__init__(*args, **kwargs) def _select(self, key): """ Selects the bucket the key should belong to. """ # If the key is smaller than the min or larger than the max, immediately return. if key < min(self.bucket): return self.rest elif key >= max(self.bucket): return self.bucket.values()[-1] # Else find the correct node for k, v in reversed(list(self.bucket.items())): if k <= key: return v return self.rest def _insert(self, key, value): """ Recursively inserts the key and value by selecting the bucket the key should belong to, and inserting the key and value into that back. If the node has been split, it inserts the key of the newly created node into the bucket of this node. """ result = self._select(key)._insert(key, value) self.changed = True if result is None: return key, other = result return super()._insert(key, other) def _split(self): other = LazyNode(node=Node(tree=self.tree, changed=True), tree=self.tree) #other = Node(self.tree) values = self.bucket.items() self.bucket = SortedDict(values[:len(values) // 2]) other.bucket = SortedDict(values[len(values) // 2:]) key, value = other.bucket.popitem(last=False) other.rest = value return (key, other) def _commit(self): self.rest._commit() for child in self.bucket.values(): child._commit() data = packb({ 'rest': self.rest.offset, 'values': {k: v.offset for k, v in self.bucket.items()} }) return self.tree.store.write(data) def __getitem__(self, key): return self._select(key)[key] def __len__(self): print(len(self.rest)) print (self.bucket.values()) return sum([len(child) for child in self.bucket.values()]) + len(self.rest) def __iter__(self): for key in self.rest: yield key for child in self.bucket.values(): for key in child: yield key
class Topics: """ A class that manages a collection of `Topic`s. """ def __init__(self): self.logger = getLogger('topics') self.logger.info('started session') self.clear() def clear(self): self.logger.info('Cleared all topics and received data') self.topic_list = SortedDict() self.transfers = dict() def create(self, topic, source='remote'): # Create the topic if it doesn't exist already if not topic in self.topic_list: self.topic_list[topic] = Topic(topic,source=source) self.logger.info('new:topic ' + topic) def process(self, topic, payload, options=None): # Create the topic if it doesn't exist already self.create(topic) # Add the new sample self.topic_list[topic].new_sample(payload,options) # logging if options: self.logger.debug('new sample | {0} [{1}] {2}'.format(topic, options['index'], payload)) else: self.logger.debug('new sample | {0} {1}'.format(topic, payload)) # If there is an active transfer, transfer received data to the queue if topic in self.transfers: # If transfer requires indexed data, check there is an index if self.transfers[topic]['type'] == 'indexed' and options is not None: x = options['index'] self.transfers[topic]['queue'].put([x, payload]) # For linear data, provide sample id for x and payload for y elif self.transfers[topic]['type'] == 'linear': x = self.transfers[topic]['lastindex'] self.transfers[topic]['queue'].put([x, payload]) self.transfers[topic]['lastindex'] += 1 def ls(self,source='remote'): if source is None: return sorted([t.name for t in self.topic_list.keys()]) else: return sorted([t.name for t in self.topic_list.values() if t.source == source]) def samples(self,topic,amount=1): if not topic in self.topic_list: return None if amount == 0 or amount is None: return self.topic_list[topic].raw return self.topic_list[topic].raw[-amount:] def count(self,topic): if not topic in self.topic_list: return 0 return len(self.topic_list[topic].raw) def exists(self,topic): return topic in self.topic_list def transfer(self, topic, queue, transfer_type = "linear"): # If the topic data is not already transfered to some queue if not topic in self.transfers: self.transfers[topic] = dict() self.transfers[topic]['queue'] = queue self.transfers[topic]['lastindex'] = 0 self.transfers[topic]['type'] = transfer_type self.logger.info('start transfer | {0}'.format(topic)) # If there is already existing data under the topic if topic in self.topic_list: if transfer_type == 'indexed': for key, value in self.topic_list[topic].indexes.iteritems(): queue.put([key, value]) elif transfer_type == 'linear': for item in self.topic_list[topic].raw: queue.put([self.transfers[topic]['lastindex'], item]) self.transfers[topic]['lastindex'] += 1 def untransfer(self,topic): # If the topic data is already transfered to some queue if topic in self.transfers: # Remove it from the transfer list del self.transfers[topic] self.logger.info('stop transfer | {0}'.format(topic)) def intransfer(self,topic): return topic in self.transfers def has_indexed_data(self,topic): return self.topic_list[topic].has_indexed_data()
def test6(): """ 有序的map: SortedDict 网址: http://www.grantjenks.com/docs/sortedcontainers/sorteddict.html """ from sortedcontainers import SortedDict sd = SortedDict() # 插入、删除元素 sd["wxx"] = 21 sd["hh"] = 18 sd["other"] = 20 print(sd) # SortedDict({'hh': 18, 'other': 20, 'wxx': 21}) print(sd["wxx"]) # 访问不存在的键会报错, KeyError print(sd.get("c")) # 访问不存在的键会返回None None # SortedDict转dict print(dict(sd)) # {'hh': 18, 'other': 20, 'wxx': 21} # 返回最后一个元素和最后一个元素 print(sd.peekitem(0)) # 类型tuple, 返回第一个元素 ('hh', 18) print(sd.peekitem()) # 类型tuple, 返回最后一个元素 ('wxx', 21) # 遍历 for k, v in sd.items(): print(k, ':', v, sep="", end=", ") # sep取消每行输出之间的空格 print() for k in sd: # 遍历键k, 等价于for k in d.keys: print(str(k) + ":" + str(sd[k]), end=", ") print() for v in sd.values(): # 遍历值v print(v, end=", ") print() # 返回Map中的一个键 print(sd.peekitem()[0]) # 返回Map中的一个值 print(sd.peekitem()[1]) # 中判断某元素是否存在 print("wxx" in sd) # True # bisect_left() / bisect_right() sd["a"] = 1 sd["c1"] = 2 sd["c2"] = 4 print( sd ) # SortedDict({'a': 1, 'c1': 2, 'c2': 4, 'hh': 18, 'other': 20, 'wxx': 21}) print(sd.bisect_left("c1")) # 返回键大于等于"c1"的最小元素对应的下标 1 print(sd.bisect_right("c1")) # 返回键大于"c1"的最小元素对应的下标 2 # 清空 sd.clear() print(len(sd)) # 0 print(len(sd) == 0) # True """ 无序的map: dict """ print("---------------------------------------") d = {"c1": 2, "c2": 4, "hh": 18, "wxx": 21, 13: 14, 1: 0} print(d["wxx"]) # 21 print(d[13]) # 14 d[13] += 1 print(d[13]) # 15 d["future"] = "wonderful" # 字典中添加键值对 del d[1] # 删除字典d中键1对应的数据值 print("wxx" in d) # 判断键"wxx"是否在字典d中,如果在返回True,否则False print(d.keys()) # 返回字典d中所有的键信息 dict_keys(['c1', 'c2', 'hh', 'wxx', 13]) print(d.values()) # 返回字典d中所有的值信息 dict_values([2, 4, 18, 21, 14]) print(d.items( )) # dict_items([('c1', 2), ('c2', 4), ('hh', 18), ('wxx', 21), (13, 14)]) for k, v in d.items(): # 遍历 k, v print(k, ':', v) for k in d: # 遍历键k, 等价于for k in d.keys: print(str(k) + ":" + str(d[k]), end=", ") print() for v in d.values(): # 遍历值v print(v, end=", ") print() # 字典类型操作函数和方法 print("---------------------------------------") d = {"中国": "北京", "美国": "华盛顿", "法国": "巴黎"} print(len(d)) # 返回字典d中元素的个数 3 print(d.get("中国", "不存在")) # 键k存在,则返回相应值,不在则返回<default>值 北京 print(d.get("中", "不存在")) # 不存在 print(d.get("中")) # None d["美国"] = "Washington" # 修改键对应的值 print(d.pop("美国")) # 键k存在,则返回相应值,并将其从dict中删除 print(d.popitem()) # 随机从字典d中取出一个键值对,以元组形式返回,并将其从dict中删除 d.clear() # 删除所有的键值对
class PageWidget(QWidget): move_drop_event = pyqtSignal(object, int, int) copy_drop_event = pyqtSignal(object, int, int) DRAG_MAGIC = 'LiSP_Drag&Drop' def __init__(self, rows, columns, *args): super().__init__(*args) self.setAcceptDrops(True) self.__rows = rows self.__columns = columns self.__widgets = SortedDict() self.setLayout(QGridLayout()) self.layout().setContentsMargins(4, 4, 4, 4) self.init_layout() def init_layout(self): for row in range(0, self.__rows): self.layout().setRowStretch(row, 1) # item = QSpacerItem(0, 0, QSizePolicy.Minimum, QSizePolicy.Expanding) # self.layout().addItem(item, row, 0) for column in range(0, self.__columns): self.layout().setColumnStretch(column, 1) # item = QSpacerItem(0, 0, QSizePolicy.Expanding, QSizePolicy.Minimum) # self.layout().addItem(item, 0, column) def add_widget(self, widget, row, column): self._check_index(row, column) if (row, column) not in self.__widgets: widget.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.__widgets[(row, column)] = widget self.layout().addWidget(widget, row, column) widget.show() else: raise IndexError('cell {} already used'.format((row, column))) def take_widget(self, row, column): self._check_index(row, column) if (row, column) in self.__widgets: widget = self.__widgets.pop((row, column)) widget.hide() self.layout().removeWidget(widget) return widget else: raise IndexError('cell {} is empty'.format((row, column))) def move_widget(self, o_row, o_column, n_row, n_column): widget = self.take_widget(o_row, o_column) self.add_widget(widget, n_row, n_column) def widget(self, row, column): self._check_index(row, column) return self.__widgets.get((row, column)) def index(self, widget): for index, f_widget in self.__widgets.items(): if widget is f_widget: return index return -1, -1 def widgets(self): return iter(self.__widgets.values()) def reset(self): self.__widgets.clear() def dragEnterEvent(self, event): if event.mimeData().hasText(): if event.mimeData().text() == PageWidget.DRAG_MAGIC: event.accept() else: event.ignore() else: event.ignore() def dragLeaveEvent(self, event): event.ignore() def dropEvent(self, event): row, column = self._event_index(event) if self.layout().itemAtPosition(row, column) is None: if qApp.keyboardModifiers() == Qt.ControlModifier: event.setDropAction(Qt.MoveAction) event.accept() self.move_drop_event.emit(event.source(), row, column) elif qApp.keyboardModifiers() == Qt.ShiftModifier: event.setDropAction(Qt.CopyAction) self.copy_drop_event.emit(event.source(), row, column) event.accept() event.ignore() def dragMoveEvent(self, event): row, column = self._event_index(event) if self.layout().itemAtPosition(row, column) is None: event.accept() else: event.ignore() def _check_index(self, row, column): if not isinstance(row, int): raise TypeError('rows index must be integers, not {}'.format( row.__class__.__name__)) if not isinstance(column, int): raise TypeError('columns index must be integers, not {}'.format( column.__class__.__name__)) if not 0 <= row < self.__rows or not 0 <= column < self.__columns: raise IndexError('index out of bound {}'.format((row, column))) def _event_index(self, event): # Margins and spacings are equals space = self.layout().horizontalSpacing() margin = self.layout().contentsMargins().right() r_size = (self.height() + margin * 2) // self.__rows + space c_size = (self.width() + margin * 2) // self.__columns + space row = math.ceil(event.pos().y() / r_size) - 1 column = math.ceil(event.pos().x() / c_size) - 1 return row, column
class CacheStore(object): class CacheItem(object): def __init__(self): self.valid = Event() self.data = None def __init__(self, key=None): self.lock = RLock() self.store = SortedDict(key) def __getitem__(self, item): return self.get(item) def put(self, key, data): with self.lock: item = self.store[key] if key in self.store else self.CacheItem() item.data = data item.valid.set() if key not in self.store: self.store[key] = item return True return False def get(self, key, default=None, timeout=None): item = self.store.get(key) if item: item.valid.wait(timeout) return item.data return default def remove(self, key): with self.lock: if key in self.store: del self.store[key] return True return False def exists(self, key): return key in self.store def rename(self, oldkey, newkey): with self.lock: obj = self.get(oldkey) obj['id'] = newkey self.put(newkey, obj) self.remove(oldkey) def is_valid(self, key): item = self.store.get(key) if item: return item.valid.is_set() return False def invalidate(self, key): with self.lock: item = self.store.get(key) if item: item.valid.clear() def itervalid(self): for key, value in list(self.store.items()): if value.valid.is_set(): yield (key, value.data) def validvalues(self): for value in list(self.store.values()): if value.valid.is_set(): yield value.data def remove_predicate(self, predicate): result = [] for k, v in self.itervalid(): if predicate(v): self.remove(k) result.append(k) return result def query(self, *filter, **params): return wrap(list(self.validvalues())).query(*filter, **params)
if isPhoto(file) : try : exif = getExif(os.path.join(subdir, file)) if not cameraIsValid(exif) : continue # get focal length and convert from rational data type to float focalLength = exif[FOCALLENGTH_TAG][0] / exif[FOCALLENGTH_TAG][1] # count every focal length occurence in dictionary if (focalLength in occurences) : occurences[focalLength] = occurences[focalLength] + 1 else: # find nearest index = occurences.bisect(focalLength) greater = occurences.iloc[index] smaller = occurences.iloc[index - 1] nearestFL = greater if (greater - focalLength < focalLength - smaller) else smaller occurences[nearestFL] = occurences[nearestFL] + 1 except (KeyError, TypeError, IndexError) : # there is no focal length info in image exif data (Key/Type/IndexError) pass # plot the graph position = arange(len(focalLengths)) + .5 barh(position, occurences.values(), align='center', color='#FF0000') yticks(position, occurences.keys()) xlabel('Occurrences') ylabel('Focal length') title('Focal length usage analysis') grid(True) show()
class DotMap(MutableMapping): def __init__(self, *args, **kwargs): self._map = SortedDict() if args: d = args[0] if type(d) is dict: for k, v in self.__call_items(d): if type(v) is dict: v = DotMap(v) self._map[k] = v if kwargs: for k, v in self.__call_items(kwargs): self._map[k] = v @staticmethod def __call_items(obj): if hasattr(obj, 'iteritems') and ismethod(getattr(obj, 'iteritems')): return obj.iteritems() else: return obj.items() def items(self): return self.iteritems() def iteritems(self): return self.__call_items(self._map) def __iter__(self): return self._map.__iter__() def __setitem__(self, k, v): self._map[k] = v def __getitem__(self, k): if k not in self._map: # automatically extend to new DotMap self[k] = DotMap() return self._map[k] def __setattr__(self, k, v): if k == '_map': super(DotMap, self).__setattr__(k, v) else: self[k] = v def __getattr__(self, k): if k == '_map': return self._map else: return self[k] def __delattr__(self, key): return self._map.__delitem__(key) def __contains__(self, k): return self._map.__contains__(k) def __str__(self): items = [] for k, v in self.__call_items(self._map): items.append('{0}={1}'.format(k, repr(v))) out = 'DotMap({0})'.format(', '.join(items)) return out def __repr__(self): return str(self) def to_dict(self): d = {} for k, v in self.items(): if type(v) is DotMap: v = v.to_dict() d[k] = v return d def pprint(self): pprint(self.to_dict()) # proper dict subclassing def values(self): return self._map.values() @staticmethod def parse_other(other): if type(other) is DotMap: return other._map else: return other def __cmp__(self, other): other = DotMap.parse_other(other) return self._map.__cmp__(other) def __eq__(self, other): other = DotMap.parse_other(other) if not isinstance(other, dict): return False return self._map.__eq__(other) def __ge__(self, other): other = DotMap.parse_other(other) return self._map.__ge__(other) def __gt__(self, other): other = DotMap.parse_other(other) return self._map.__gt__(other) def __le__(self, other): other = DotMap.parseOther(other) return self._map.__le__(other) def __lt__(self, other): other = DotMap.parse_other(other) return self._map.__lt__(other) def __ne__(self, other): other = DotMap.parse_other(other) return self._map.__ne__(other) def __delitem__(self, key): return self._map.__delitem__(key) def __len__(self): return self._map.__len__() def copy(self): return self def get(self, key, default=None): return self._map.get(key, default) def has_key(self, key): return key in self._map def iterkeys(self): return self._map.iterkeys() def itervalues(self): return self._map.itervalues() def keys(self): return self._map.keys() def pop(self, key, default=None): return self._map.pop(key, default) def setdefault(self, key, default=None): return self._map.setdefault(key, default) def viewitems(self): if version_info.major == 2 and version_info.minor >= 7: return self._map.viewitems() else: return self._map.items() def viewkeys(self): if version_info.major == 2 and version_info.minor >= 7: return self._map.viewkeys() else: return self._map.keys() def viewvalues(self): if version_info.major == 2 and version_info.minor >= 7: return self._map.viewvalues() else: return self._map.values() @classmethod def fromkeys(cls, seq, value=None): d = DotMap() d._map = SortedDict.fromkeys(seq, value) return d
class Model(object): ''' The model of a Stranbeest. The Model consists of a set of nodes, edges and boundary conditions. Each node has a unique name and a x and y position which may change whenever the simuation is incremented. Each node introduces two degrees of freedom. The edges are specified by the nodes they are connecting. The edges are the push/pull rods which connect the edges whith one another. An edges keeps the distances between two nodes constant and therefore constrains exactly one degree of freedom in the system. ''' def __init__(self): ''' Constructor ''' self._nodes = SortedDict() self._edges = defaultdict(set) def addNode(self,name,x,y): if not isinstance(name,str ): raise Exception("The 1st argument must be the node's name as str.") if not isinstance(x ,float): raise Exception("The 2nd argument must be the node's x position as float.") if not isinstance(y ,float): raise Exception("The 2nd argument must be the node's y position as float.") if name in self._nodes: raise Exception( 'There already exists a node by the name of "%(name)s"' % locals() ) self._nodes[name] = x,y self.__t = 0.0 for listener in self.onNodeAddListeners: listener(name,x,y) def addEdge(self,node1,node2): if node1 == node2: raise Exception('"node1" cannot be equal to "node2".') self._edges[node1].add(node2) self._edges[node2].add(node1) for listener in self.onEdgeAddListeners: listener( min(node1,node2), max(node1,node2) ) def pos(self,name): return self._nodes[name] def move(self,name,x,y): self._nodes[name] = x,y for listener in self.onNodeMoveListeners: listener(name,x,y) def state(self): return fromiter( chain.from_iterable( self._nodes.values() ), float ) def setState(self,state): for i,(x,y) in enumerate( zip(state[::2],state[1::2]) ): self.move(self._nodes.keys()[i],x,y) @property def t(self): return self.__t def increment(self,dt): v = self.v t0 = self.__t x0 = self.state() # https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta_methods#The_Runge.E2.80.93Kutta_method k0 = v(x0, t0) k1 = v(x0+k0*(dt/2), t0+dt/2) k2 = v(x0+k1*(dt/2), t0+dt/2) k3 = v(x0+k2*(dt), t0+dt) self.setState( x0 + dt/6 * (k0+k1+k2+k3) ) self.__t += dt def v(self,x,t): lhs = zeros( 2*[len(x)] ) rhs = zeros( len(x) ) iRows = iter( range( len(x) ) ) for start,end in self.edges(): iStart = 2*self._nodes.index(start) iEnd = 2*self._nodes.index(end) iRow = next(iRows) dx = x[iEnd+0] - x[iStart+0] dy = x[iEnd+1] - x[iStart+1] lhs[iRow,iStart+0] = dx; lhs[iRow,iEnd+0] = -dx lhs[iRow,iStart+1] = dy; lhs[iRow,iEnd+1] = -dy rhs[iRow] = 0 for bc in self.bcs: bc.addEquations(x,t,iRows,lhs,rhs) return linalg.solve(lhs,rhs) def nodes(self): return self._nodes.iteritems() def edges(self): for node1,neighbors in self._edges.items(): for node2 in neighbors: if node1 < node2: yield node1,node2 bcs = [] onEdgeAddListeners = set() # <- FIXME should be a multiset onNodeAddListeners = set() # <- FIXME should be a multiset onNodeMoveListeners = set() # <- FIXME should be a multiset
class FileTable(object): """docstring for FileTable""" def __init__(self, myip, server): super(FileTable, self).__init__() self.ring = SortedDict() self.hasher = hashlib.sha224 self.myhash = self.hash(myip) self.add_node(myip) self.server = server def hash(self, key): return self.hasher(key).hexdigest()[:-10] def hash_at(self, idx): idx %= len(self.ring) hash = self.ring.iloc[idx] return hash def add_node(self, ip): hash = self.hash(ip) self.ring[hash] = {'ip': ip, 'files': []} SDFS_LOGGER.info('After adding %s - %s' % (ip, repr(self.ring))) def remove_node(self, failed_list): start_time = time.time() # this is for debug flag = False # deep copy failed list because it will be reset soon ip_list = list(failed_list) # change the order of failed node # make sure the smaller id node be handled first if len(ip_list) == 2: if self.hash(ip_list[0]) == 0 and self.hash(ip_list[1]) == len(self.ring) - 1: ip_list[0], ip_list[1] = ip_list[1], ip_list[0] elif self.ring.index(self.hash(ip_list[0])) == self.ring.index(self.hash(ip_list[1])) + 1: ip_list[0], ip_list[1] = ip_list[1], ip_list[0] for ip in ip_list: hash = self.hash(ip) idx = self.ring.index(hash) # if the node is not the direct successor of the failed node, do nothing if len(ip_list) == 2 and ip == ip_list[1] and self.hash_at((idx + 2) % len(self.ring)) == self.myhash: continue if self.hash_at((idx + 1) % len(self.ring)) == self.myhash or (self.hash_at((idx + 2) % len(self.ring)) == self.myhash and len(ip_list) == 2): # this is for debug flag = True heritage = set(self.ring[hash]['files']) my_files = set(self.ring[self.myhash]['files']) next_files = set(self.ring[self.hash_at(idx + 2)]['files']) # determine the to_me = heritage - my_files to_next = (heritage & my_files) - next_files to_next_next = heritage & my_files & next_files replica_list = [list(to_me), list(to_next), list(to_next_next)] self.ring[self.myhash]['files'].extend(to_me) # handle replica dest_ip_to_me = self.ring[self.hash_at(self.ring.index(hash) - 1)]['ip'] dest_ip_to_next = self.ring[self.hash_at(self.ring.index(self.myhash) + 1)]['ip'] dest_ip_to_next_next = self.ring[self.hash_at(self.ring.index(self.myhash) + 2)]['ip'] dest_ip_list = [dest_ip_to_me, dest_ip_to_next, dest_ip_to_next_next] del self.ring[hash] self.server.handle_replica(replica_list, dest_ip_list, ip_list) else: del self.ring[hash] elapsed_time = time.time() - start_time if flag: print "It takes", elapsed_time, "to handle replica" def lookup(self, sdfs_filename): hash = self.hash(sdfs_filename) idx = self.ring.bisect_left(hash) if self.ring.bisect_left(hash) < len(self.ring) else 0 ip_list = [self.ring[self.hash_at(idx + i)]['ip'] for i in xrange(3)] return ip_list def insert(self, sdfs_filename): hash = self.hash(sdfs_filename) idx = self.ring.bisect_left(hash) if self.ring.bisect_left(hash) < len(self.ring) else 0 for i in xrange(3): node_hash = self.hash_at(idx + i) self.ring[node_hash]['files'].append(sdfs_filename) SDFS_LOGGER.info('Inserted %s to %s' % (sdfs_filename, self.ring[node_hash]['ip'])) def delete(self, sdfs_filename): hash = self.hash(sdfs_filename) idx = self.ring.bisect_left(hash) if self.ring.bisect_left(hash) < len(self.ring) else 0 for i in xrange(3): node_hash = self.hash_at(idx + i) self.ring[node_hash]['files'].remove(sdfs_filename) SDFS_LOGGER.info('Deleted %s to %s' % (sdfs_filename, self.ring[node_hash]['ip'])) def update_replica(self, replica_list, dest_ip_list): for i in xrange(3): self.ring[self.hash(dest_ip_list[i])]['files'] = list(set(self.ring[self.hash(dest_ip_list[i])]['files'] + replica_list[i])) def list_my_store(self): print '-' * 5 + 'my files are:' for f in self.ring[self.myhash]['files']: print f, print print '-' * 5 + 'that is all' def list_file_location(self): all_files = set() for value in self.ring.values(): all_files.update(set(value['files'])) for f in all_files: print f + ' is stored at ', for value in self.ring.values(): if f in value['files']: print value['ip'], print
class TreePage(BasePage): """ Page object, implemented with a sorted dict. Who knows what's underneath! """ def __init__(self, *args, **kwargs): storage = kwargs.pop("storage", None) super(TreePage, self).__init__(*args, **kwargs) self._storage = SortedDict() if storage is None else storage def keys(self): if len(self._storage) == 0: return set() else: return set.union(*(set(range(*self._resolve_range(mo))) for mo in self._storage.values())) def replace_mo(self, state, old_mo, new_mo): start, end = self._resolve_range(old_mo) for key in self._storage.irange(start, end-1): val = self._storage[key] if val is old_mo: #assert new_mo.includes(a) self._storage[key] = new_mo def store_overwrite(self, state, new_mo, start, end): # iterate over each item we might overwrite # track our mutations separately since we're in the process of iterating deletes = [] updates = { start: new_mo } for key in self._storage.irange(maximum=end-1, reverse=True): old_mo = self._storage[key] # make sure we aren't overwriting all of an item that overlaps the end boundary if end < self._page_addr + self._page_size and end not in updates and old_mo.includes(end): updates[end] = old_mo # we can't set a minimum on the range because we need to do the above for # the first object before start too if key < start: break # delete any key that falls within the range deletes.append(key) #assert all(m.includes(i) for i,m in updates.items()) # perform mutations for key in deletes: del self._storage[key] self._storage.update(updates) def store_underwrite(self, state, new_mo, start, end): # track the point that we need to write up to last_missing = end - 1 # track also updates since we can't update while iterating updates = {} for key in self._storage.irange(maximum=end-1, reverse=True): mo = self._storage[key] # if the mo stops if mo.base <= last_missing and not mo.includes(last_missing): updates[max(mo.last_addr+1, start)] = new_mo last_missing = mo.base - 1 # we can't set a minimum on the range because we need to do the above for # the first object before start too if last_missing < start: break # if there are no memory objects <= start, we won't have filled start yet if last_missing >= start: updates[start] = new_mo #assert all(m.includes(i) for i,m in updates.items()) self._storage.update(updates) def load_mo(self, state, page_idx): """ Loads a memory object from memory. :param page_idx: the index into the page :returns: a tuple of the object """ try: key = next(self._storage.irange(maximum=page_idx, reverse=True)) except StopIteration: return None else: return self._storage[key] def load_slice(self, state, start, end): """ Return the memory objects overlapping with the provided slice. :param start: the start address :param end: the end address (non-inclusive) :returns: tuples of (starting_addr, memory_object) """ keys = list(self._storage.irange(start, end-1)) if not keys or keys[0] != start: try: key = next(self._storage.irange(maximum=start, reverse=True)) except StopIteration: pass else: if self._storage[key].includes(start): keys.insert(0, key) return [(max(start, key), self._storage[key]) for key in keys] def _copy_args(self): return { 'storage': self._storage.copy() }
class TradesView(object): def __init__(self): self.pending_offer_by_id = {} self.trade_by_id = {} self._trades = SortedDict() def add_pending(self, offer): self.pending_offer_by_id[offer.offer_id] = offer def report_completed(self, offer_id, completed_timestamp): offer = self.pending_offer_by_id.get(offer_id) if offer is None: return False del self.pending_offer_by_id[offer_id] assert isinstance(offer, BasicOffer) trade = Trade(offer, completed_timestamp) self._trades[(trade.timestamp, offer.offer_id)] = trade # inserts in the dict for retrieval by offer_id self.trade_by_id[offer.offer_id] = trade return offer.offer_id def get_trade_by_id(self, offer_id): return self.trade_by_id.get(offer_id) def get_pending_by_id(self, offer_id): return self.pending_offer_by_id.get(offer_id) def __len__(self): return len(self._trades) def __iter__(self): return iter(self._trades) def trades(self, from_timestamp=None, to_timestamp=None): """ :param from_timestamp: first timestamp to include in result :param to_timestamp: first timestamp to exclude from result :return: list """ if (from_timestamp, to_timestamp) is (None, None): return self._trades.values() min_key, max_key = None, None if from_timestamp is not None: min_key = (from_timestamp, 0) if to_timestamp is not None: max_key = (to_timestamp, 0) # FIXME prevent modifying (from report_completed()) while iterating trades = [ self._trades[key] for key in self._trades.irange( minimum=min_key, maximum=max_key, inclusive=(True, False)) ] return list(trades) def values(self): # returns sorted list of all values return self._trades.values()
def parallel_for(f, l, *, threads=int(cpu_count()/2), return_=False, return_ordered=True): """Applies f to each element of l, in parallel over the specified number of threads :param f: The function to apply :param l: The iterable to process :param threads: The number of threads :param return_: True whether this is a 'map'-like operation that returns results :param return_ordered: True whether the order of the results should match the order of the iterable :return: Optionally returns the f(l) result, if return_=True """ if threads > 1: iteratorlock = threading.Lock() exceptions = [] if return_: if return_ordered: d = SortedDict() i = zip(count(), l.__iter__()) else: d = list() i = l.__iter__() else: i = l.__iter__() def runall(): while True: iteratorlock.acquire() try: try: if exceptions: return v = next(i) finally: iteratorlock.release() except StopIteration: return try: if return_: if return_ordered: n, x = v d[n] = f(x) else: d.append(f(v)) else: f(v) except: e = sys.exc_info() iteratorlock.acquire() try: exceptions.append(e) finally: iteratorlock.release() threadlist = [threading.Thread(target=runall) for j in range(threads)] for t in threadlist: t.start() for t in threadlist: t.join() if exceptions: a, b, c = exceptions[0] raise a(b).with_traceback(c) if return_: if return_ordered: return d.values() else: return d else: if return_: return [f(v) for v in l] else: for v in l: f(v) return
class NetworkEventDataset: """Collection of 3-channel ZNE streams with traces aligned to a fixed time window about seismic P-wave arrival events, *for a given network*. Two indexes are provided. One indexes hierarchically by station code and event ID, yielding a 3-channel ZNE stream per event, so that you can easily gather all traces for a given station by iterating over events. The other index indexes hierarchically by event ID and station code, yielding a 3-channel ZNE stream per station. Using this index you can easily gather all traces for a given event across multiple stations. Preferably each input trace will already have an 'event_id' attribute in its stats. If not, an event ID will be invented based on station identifiers and time window. """ def __init__(self, stream_src, network=None, station=None, location='', ordering='ZNE'): """ Initialize from data source (file or obspy.Stream). Traces are COPIED into the dataset in order to leave input object intact, since many obspy functions mutate traces in-place. All streams in the input data source stream_src are expected to belong to the same network. This is checked as the data is ingested. A discrepant network code is an error condition. :param stream_src: Source of input streams. May be a file name or an Obspy Stream :type stream_src: str, pathlib.Path or obspy.Stream :param network: Network code of streams to load. If stream_src is an Obspy Stream, the \ streams will be filtered to match this network code. :type network: str :param station: Station code of streams to load. If stream_src is an Obspy Stream, the \ streams will be filtered to match this station code. :type station: str :param location: [OPTIONAL] Location code of streams to load. Leave as default (empty string) \ if location code is empty in the data source. :type location: str :param ordering: Channel ordering to be applied to the data after loading. The channel labelling \ must be consistent with the requested ordering - rotation to the coordinate system implied \ by the ordering is *NOT* applied. :type ordering: str :raises AssertionError: If discrepant network code is found in input data """ if isinstance(stream_src, obspy.Stream): net = network sta = station loc = location or None if net or sta or loc: data_src = stream_src.select(net, sta, loc) else: data_src = stream_src # end if elif os.path.isfile(stream_src): data_src = read_h5_stream(stream_src, network, station, location) else: assert False, "Unknown data source {}".format(type(stream_src)) # end if self.network = network # Data in data_src collects all traces together under a single Stream object. # In order to get control over data slicing and traceability in processing, we # break it down into one Stream per ZNE channel triplet of a given event. self.db_sta = SortedDict() for tr in data_src: net, sta, loc, _ = tr.id.split('.') if self.network: assert net == self.network else: self.network = net # end if # Create single copy of the trace to be shared by both dicts. dupe_trace = tr.copy() try: event_id = tr.stats.event_id except AttributeError: event_id = '.'.join([ net, sta, loc, '_'.join([str(tr.stats.starttime), str(tr.stats.endtime)]) ]) # end try self.db_sta.setdefault(sta, SortedDict()).setdefault( event_id, obspy.Stream()).append(dupe_trace) # end for # Index same obspy.Stream instances in event dict. This way, any changes # to a given event stream will be seen by both indexes. self.db_evid = SortedDict() for sta, ev_db in self.db_sta.items(): for evid, stream in ev_db.items(): self.db_evid.setdefault(evid, SortedDict())[sta] = stream # end for # end for # Sort each stream into specific order. if ordering.upper() == 'ZNE': ordinal = zne_order elif ordering.upper() == 'ZRT': ordinal = zrt_order else: ordinal = None # end if if ordinal is not None: self.apply(lambda x: x.traces.sort(key=ordinal)) # end if # end func def __iter__(self): """ Flat iterator. Loops over self.db_sta depth first and returns tuple of keys and matching stream. Equivalent to:: ```Python for sta, ev_db in self.db_sta.items(): for evid, stream in ev_db.items(): yield (sta, evid, stream) ``` """ return ((sta, evid, stream) for sta, ev_db in self.db_sta.items() for evid, stream in ev_db.items()) # end if def __len__(self): """Returns number of streams""" return sum((len(x) for x in self.db_sta.values())) # end func def __repr__(self): """Displays summary string for all streams""" return '\n'.join( (evid + ', ' + str(stream) for _, evid, stream in iter(self))) # end func def num_stations(self): """ Get number of stations in the dataset. :return: Number of stations :rtype: int """ return len(self.db_sta) # end func def station(self, station_code): """ Accessor for events for a given station. :param station_code: Station to get :type station_code: str :return: Event index for station, if station is found :rtype: SortedDict """ return self.db_sta.get(station_code) # end func def num_events(self): """ Get number of events in the dataset. :return: Number of events :rtype: int """ return len(self.db_evid) # end func def event(self, event_id): """ Accessor for stations for a given event. :param event_id: ID of event to look up :type event_id: str :return: Station index for given event, if event ID is found, otherwise None :rtype: SortedDict or NoneType """ return self.db_evid.get(event_id) # end func def curate(self, curator): """ Curate the dataset according to a callable curator. Modifies collection in-place to remove streams that do not satisfy the curation criteria of the callable. Curator call signature must be consitent with:: callable(station_code, event_id, stream) -> bool The callable returns a boolean indicating whether to keep the Stream or not. :param curator: Function or callable delegate to adjudicate whether to keep each given stream. :type curator: Callable :return: None """ # Only need to loop over one db, since they both reference the same underlying Stream instances. PY2 = (sys.version_info[0] == 2) if PY2: from itertools import ifilterfalse as filterfalse # pylint: disable=no-name-in-module, import-outside-toplevel else: from itertools import filterfalse # pylint: disable=import-outside-toplevel # end if discard_items = [ (x[0], x[1]) for x in filterfalse(lambda rec: curator(*rec), iter(self)) ] self.prune(discard_items) # end func def apply(self, _callable): """Apply a callable across all streams. Use to apply uniform processing steps to the whole dataset. :param _callable: Callable object that takes an obspy Stream as input and applies itself to that Stream. \ Expect that stream may be mutated in-place by the callable. :type _callable: Any Callable compatible with the call signature. :return: None """ for _1, _2, stream in iter(self): _callable(stream) # end func def by_station(self): """ Iterate over station sub-dictionaries. :return: Iterable over the stations, each element consisting of pair containing \ (station code, event dict). :rtype: Iterable(tuple) """ return iter(self.db_sta.items()) # end func def by_event(self): """ Iterate over event sub-dictionaries. :return: Iterable over the discrete events, each element consisting of pair containing \ (event id, station dict). :rtype: Iterable(tuple) """ return iter(self.db_evid.items()) # end func def prune(self, items, cull=True): """ Remove a given sequence of (station, event) pairs from the dataset. :param items: Iterable of (station, event) pairs :type items: Iterable(tuple) :param cull: If True, then empty entries in the top level index will be removed. :type cull: boolean :return: None """ for station, event_id in items: self.db_sta[station].pop(event_id) self.db_evid[event_id].pop(station) if cull: if not self.db_sta[station]: self.db_sta.pop(station) # end if if not self.db_evid[event_id]: self.db_evid.pop(event_id) # end if # end if # end for # end func def write(self, output_h5_filename, index_format='event'): """ Write event dataset back out to HDF5 file. :param output_h5_filename: Output file name :type output_h5_filename: str or path :param index_format: Format to use for index. Must be 'event' (default) or 'standard' (obspy default) :type index_format: str :return: True if file was written :rtype: boolean """ assert not os.path.exists( output_h5_filename), 'Output file already exists' if index_format not in ['event', 'standard']: raise ValueError('Index format %s not supported' % index_format) # end if all_stream = obspy.Stream() for sta, evid, stream in iter(self): all_stream += stream # end for if index_format == 'event': write_h5_event_stream(output_h5_filename, all_stream, mode='w') elif index_format == 'standard': all_stream.write(output_h5_filename, format='H5', mode='w') # end if return os.path.isfile(output_h5_filename)
def test_values(): mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping) assert list(temp.values()) == [pos for key, pos in mapping]
class AttributeSet: """The AttributeSet class that represents an attribute set.""" def __init__(self, attributes: Optional[Iterable[Attribute]] = None): """Initialize the AttributeSet object with the attributes. Args: attributes: The attributes that compose the attribute set if set. Raises: DuplicateAttributeId: Two attributes share the same id. """ # Maintain a sorted dictionary linking the attributes id to the # attribute objects self._id_to_attr = SortedDict() if attributes: for attribute in attributes: self.add(attribute) def __iter__(self) -> Iterator: """Give the iterator for the AttributeSet to get the attributes. Returns: An iterator that iterates over the Attribute objects that compose the attribute set. """ return iter(self._id_to_attr.values()) def __repr__(self) -> str: """Provide a string representation of the attribute set. Returns: A string representation of the attribute set. """ attribute_list = ', '.join( str(attr) for attr in self._id_to_attr.values()) return f'{self.__class__.__name__}([{attribute_list}])' @property def attribute_names(self) -> List[str]: """Give the names of the attributes of this attribute set (read only). The attribute names are sorted in function of the attribute ids. Returns: The name of the attributes of this attribute set as a list of str. """ return list(attribute.name for attribute in self._id_to_attr.values()) @property def attribute_ids(self) -> List[int]: """Give the ids of the attributes of this attribute set (read only). Returns: The ids of the attributes of this set as a sorted list of integers. """ return list(self._id_to_attr.keys()) def add(self, attribute: Attribute): """Add an attribute to this attribute set if it is not already present. Args: attribute: The attribute to add. Raises: DuplicateAttributeId: An attribute with the same id as the attribute that is added already exists. """ if attribute.attribute_id in self._id_to_attr: raise DuplicateAttributeId('An attribute with the same id as ' f'{attribute} already exists.') self._id_to_attr[attribute.attribute_id] = attribute def remove(self, attribute: Attribute): """Remove an attribute from this attribute set. Args: attribute: The attribute to remove. Raises: KeyError: The attribute is not present in this attribute set. """ if attribute.attribute_id not in self._id_to_attr: raise KeyError(f'{attribute} is not among the attributes.') del self._id_to_attr[attribute.attribute_id] def __hash__(self) -> int: """Give the hash of an attribute set: the hash of its attributes. Returns: The hash of an attribute set as the hash of its frozen attributes. """ return hash(frozenset(self.attribute_ids)) def __eq__(self, other_attr_set: 'AttributeSet') -> bool: """Compare two attribute sets, equal if the attributes correspond. Args: other_attr_set: The other attribute set to which the attribute set is compared with. Returns: The two attribute sets are equal: they share the same attributes. """ return (isinstance(other_attr_set, self.__class__) and hash(self) == hash(other_attr_set)) def __contains__(self, attribute: Attribute) -> bool: """Check if the attribute is in the attribute set. Args: attribute: The attribute that is checked whether it is in this set. Returns: The attribute is in the attribute set. """ return attribute.attribute_id in self._id_to_attr def __len__(self) -> int: """Give the size of this attribute set as the number of attributes. Returns: The number of attributes in this attribute set. """ return len(self._id_to_attr) def issuperset(self, other_attribute_set: 'AttributeSet') -> bool: """Check if the attribute set is a superset of the one in parameters. Args: other_attribute_set: The attribute set for which we check whether the attribute set is a superset of. Returns: The attribute set is a superset of the other attribute set. """ self_attribute_ids_set = frozenset(self.attribute_ids) other_attribute_ids_set = frozenset(other_attribute_set.attribute_ids) return self_attribute_ids_set.issuperset(other_attribute_ids_set) def issubset(self, other_attribute_set: 'AttributeSet') -> bool: """Check if the attribute set is a subset of the one in parameters. Args: other_attribute_set: The attribute set for which we check whether the attribute set is a subset of. Returns: The attribute set is a subset of the other attribute set. """ self_attribute_ids_set = frozenset(self.attribute_ids) other_attribute_ids_set = frozenset(other_attribute_set.attribute_ids) return self_attribute_ids_set.issubset(other_attribute_ids_set) def get_attribute_by_id(self, attribute_id: int) -> Attribute: """Give an attribute by its id. Args: attribute_id: The id of the attribute to retrieve. Raises: KeyError: The attribute is not present in this attribute set. """ if attribute_id not in self._id_to_attr: raise KeyError(f'No attribute with the id {attribute_id}.') return self._id_to_attr[attribute_id] def get_attribute_by_name(self, name: str) -> Attribute: """Give an attribute by its name. Args: name: The name of the attribute to retrieve. Raises: KeyError: The attribute is not present in this attribute set. """ for attribute in self._id_to_attr.values(): if attribute.name == name: return attribute raise KeyError(f'No attribute is named {name}.')
class Replica(HasActionQueue, MessageProcessor): def __init__(self, node: 'plenum.server.node.Node', instId: int, isMaster: bool = False): """ Create a new replica. :param node: Node on which this replica is located :param instId: the id of the protocol instance the replica belongs to :param isMaster: is this a replica of the master protocol instance """ super().__init__() self.stats = Stats(TPCStat) self.config = getConfig() routerArgs = [(ReqDigest, self._preProcessReqDigest)] for r in [PrePrepare, Prepare, Commit]: routerArgs.append((r, self.processThreePhaseMsg)) routerArgs.append((Checkpoint, self.processCheckpoint)) routerArgs.append((ThreePCState, self.process3PhaseState)) self.inBoxRouter = Router(*routerArgs) self.threePhaseRouter = Router( (PrePrepare, self.processPrePrepare), (Prepare, self.processPrepare), (Commit, self.processCommit) ) self.node = node self.instId = instId self.name = self.generateName(node.name, self.instId) self.outBox = deque() """ This queue is used by the replica to send messages to its node. Replica puts messages that are consumed by its node """ self.inBox = deque() """ This queue is used by the replica to receive messages from its node. Node puts messages that are consumed by the replica """ self.inBoxStash = deque() """ If messages need to go back on the queue, they go here temporarily and are put back on the queue on a state change """ self.isMaster = isMaster # Indicates name of the primary replica of this protocol instance. # None in case the replica does not know who the primary of the # instance is self._primaryName = None # type: Optional[str] # Requests waiting to be processed once the replica is able to decide # whether it is primary or not self.postElectionMsgs = deque() # PRE-PREPAREs that are waiting to be processed but do not have the # corresponding request digest. Happens when replica has not been # forwarded the request by the node but is getting 3 phase messages. # The value is a list since a malicious entry might send PRE-PREPARE # with a different digest and since we dont have the request finalised, # we store all PRE-PPREPARES self.prePreparesPendingReqDigest = {} # type: Dict[Tuple[str, int], List] # PREPAREs that are stored by non primary replica for which it has not # got any PRE-PREPARE. Dictionary that stores a tuple of view no and # prepare sequence number as key and a deque of PREPAREs as value. # This deque is attempted to be flushed on receiving every # PRE-PREPARE request. self.preparesWaitingForPrePrepare = {} # type: Dict[Tuple[int, int], deque] # COMMITs that are stored for which there are no PRE-PREPARE or PREPARE # received self.commitsWaitingForPrepare = {} # type: Dict[Tuple[int, int], deque] # Dictionary of sent PRE-PREPARE that are stored by primary replica # which it has broadcasted to all other non primary replicas # Key of dictionary is a 2 element tuple with elements viewNo, # pre-prepare seqNo and value is a tuple of Request Digest and time self.sentPrePrepares = {} # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]] # Dictionary of received PRE-PREPAREs. Key of dictionary is a 2 # element tuple with elements viewNo, pre-prepare seqNo and value is # a tuple of Request Digest and time self.prePrepares = {} # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]] # Dictionary of received Prepare requests. Key of dictionary is a 2 # element tuple with elements viewNo, seqNo and value is a 2 element # tuple containing request digest and set of sender node names(sender # replica names in case of multiple protocol instances) # (viewNo, seqNo) -> ((identifier, reqId), {senders}) self.prepares = Prepares() # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], Set[str]]] self.commits = Commits() # type: Dict[Tuple[int, int], # Tuple[Tuple[str, int], Set[str]]] # Set of tuples to keep track of ordered requests. Each tuple is # (viewNo, ppSeqNo) self.ordered = OrderedSet() # type: OrderedSet[Tuple[int, int]] # Dictionary to keep track of the which replica was primary during each # view. Key is the view no and value is the name of the primary # replica during that view self.primaryNames = {} # type: Dict[int, str] # Holds msgs that are for later views self.threePhaseMsgsForLaterView = deque() # type: deque[(ThreePhaseMsg, str)] # Holds tuple of view no and prepare seq no of 3-phase messages it # received while it was not participating self.stashingWhileCatchingUp = set() # type: Set[Tuple] # Commits which are not being ordered since commits with lower view # numbers and sequence numbers have not been ordered yet. Key is the # viewNo and value a map of pre-prepare sequence number to commit self.stashedCommitsForOrdering = {} # type: Dict[int, # Dict[int, Commit]] self.checkpoints = SortedDict(lambda k: k[0]) self.stashingWhileOutsideWaterMarks = deque() # Low water mark self._h = 0 # type: int # High water mark self.H = self._h + self.config.LOG_SIZE # type: int self.lastPrePrepareSeqNo = self.h # type: int @property def h(self) -> int: return self._h @h.setter def h(self, n): self._h = n self.H = self._h + self.config.LOG_SIZE @property def requests(self): return self.node.requests def shouldParticipate(self, viewNo: int, ppSeqNo: int): # Replica should only participating in the consensus process and the # replica did not stash any of this request's 3-phase request return self.node.isParticipating and (viewNo, ppSeqNo) \ not in self.stashingWhileCatchingUp @staticmethod def generateName(nodeName: str, instId: int): """ Create and return the name for a replica using its nodeName and instanceId. Ex: Alpha:1 """ return "{}:{}".format(nodeName, instId) @staticmethod def getNodeName(replicaName: str): return replicaName.split(":")[0] @property def isPrimary(self): """ Is this node primary? :return: True if this node is primary, False otherwise """ return self._primaryName == self.name if self._primaryName is not None \ else None @property def primaryName(self): """ Name of the primary replica of this replica's instance :return: Returns name if primary is known, None otherwise """ return self._primaryName @primaryName.setter def primaryName(self, value: Optional[str]) -> None: """ Set the value of isPrimary. :param value: the value to set isPrimary to """ if not value == self._primaryName: self._primaryName = value self.primaryNames[self.viewNo] = value logger.debug("{} setting primaryName for view no {} to: {}". format(self, self.viewNo, value)) logger.debug("{}'s primaryNames for views are: {}". format(self, self.primaryNames)) self._stateChanged() def _stateChanged(self): """ A series of actions to be performed when the state of this replica changes. - UnstashInBox (see _unstashInBox) """ self._unstashInBox() if self.isPrimary is not None: # TODO handle suspicion exceptions here self.process3PhaseReqsQueue() # TODO handle suspicion exceptions here try: self.processPostElectionMsgs() except SuspiciousNode as ex: self.outBox.append(ex) self.discard(ex.msg, ex.reason, logger.warning) def _stashInBox(self, msg): """ Stash the specified message into the inBoxStash of this replica. :param msg: the message to stash """ self.inBoxStash.append(msg) def _unstashInBox(self): """ Append the inBoxStash to the right of the inBox. """ self.inBox.extend(self.inBoxStash) self.inBoxStash.clear() def __repr__(self): return self.name @property def f(self) -> int: """ Return the number of Byzantine Failures that can be tolerated by this system. Equal to (N - 1)/3, where N is the number of nodes in the system. """ return self.node.f @property def viewNo(self): """ Return the current view number of this replica. """ return self.node.viewNo def isPrimaryInView(self, viewNo: int) -> Optional[bool]: """ Return whether a primary has been selected for this view number. """ return self.primaryNames[viewNo] == self.name def isMsgForLaterView(self, msg): """ Return whether this request's view number is greater than the current view number of this replica. """ viewNo = getattr(msg, "viewNo", None) return viewNo > self.viewNo def isMsgForCurrentView(self, msg): """ Return whether this request's view number is equal to the current view number of this replica. """ viewNo = getattr(msg, "viewNo", None) return viewNo == self.viewNo def isMsgForPrevView(self, msg): """ Return whether this request's view number is less than the current view number of this replica. """ viewNo = getattr(msg, "viewNo", None) return viewNo < self.viewNo def isPrimaryForMsg(self, msg) -> Optional[bool]: """ Return whether this replica is primary if the request's view number is equal this replica's view number and primary has been selected for the current view. Return None otherwise. :param msg: message """ if self.isMsgForLaterView(msg): self.discard(msg, "Cannot get primary status for a request for a later " "view {}. Request is {}".format(self.viewNo, msg), logger.error) else: return self.isPrimary if self.isMsgForCurrentView(msg) \ else self.isPrimaryInView(msg.viewNo) def isMsgFromPrimary(self, msg, sender: str) -> bool: """ Return whether this message was from primary replica :param msg: :param sender: :return: """ if self.isMsgForLaterView(msg): logger.error("{} cannot get primary for a request for a later " "view. Request is {}".format(self, msg)) else: return self.primaryName == sender if self.isMsgForCurrentView( msg) else self.primaryNames[msg.viewNo] == sender def _preProcessReqDigest(self, rd: ReqDigest) -> None: """ Process request digest if this replica is not a primary, otherwise stash the message into the inBox. :param rd: the client Request Digest """ if self.isPrimary is not None: self.processReqDigest(rd) else: logger.debug("{} stashing request digest {} since it does not know " "its primary status". format(self, (rd.identifier, rd.reqId))) self._stashInBox(rd) def serviceQueues(self, limit=None): """ Process `limit` number of messages in the inBox. :param limit: the maximum number of messages to process :return: the number of messages successfully processed """ # TODO should handle SuspiciousNode here r = self.inBoxRouter.handleAllSync(self.inBox, limit) r += self._serviceActions() return r # Messages that can be processed right now needs to be added back to the # queue. They might be able to be processed later def processPostElectionMsgs(self): """ Process messages waiting for the election of a primary replica to complete. """ while self.postElectionMsgs: msg = self.postElectionMsgs.popleft() logger.debug("{} processing pended msg {}".format(self, msg)) self.dispatchThreePhaseMsg(*msg) def process3PhaseReqsQueue(self): """ Process the 3 phase requests from the queue whose view number is equal to the current view number of this replica. """ unprocessed = deque() while self.threePhaseMsgsForLaterView: request, sender = self.threePhaseMsgsForLaterView.popleft() logger.debug("{} processing pended 3 phase request: {}" .format(self, request)) # If the request is for a later view dont try to process it but add # it back to the queue. if self.isMsgForLaterView(request): unprocessed.append((request, sender)) else: self.processThreePhaseMsg(request, sender) self.threePhaseMsgsForLaterView = unprocessed @property def quorum(self) -> int: r""" Return the quorum of this RBFT system. Equal to :math:`2f + 1`. Return None if `f` is not yet determined. """ return self.node.quorum def dispatchThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str) -> Any: """ Create a three phase request to be handled by the threePhaseRouter. :param msg: the ThreePhaseMsg to dispatch :param sender: the name of the node that sent this request """ senderRep = self.generateName(sender, self.instId) if self.isPpSeqNoAcceptable(msg.ppSeqNo): try: self.threePhaseRouter.handleSync((msg, senderRep)) except SuspiciousNode as ex: self.node.reportSuspiciousNodeEx(ex) else: logger.debug("{} stashing 3 phase message {} since ppSeqNo {} is " "not between {} and {}". format(self, msg, msg.ppSeqNo, self.h, self.H)) self.stashingWhileOutsideWaterMarks.append((msg, sender)) def processReqDigest(self, rd: ReqDigest): """ Process a request digest. Works only if this replica has decided its primary status. :param rd: the client request digest to process """ self.stats.inc(TPCStat.ReqDigestRcvd) if self.isPrimary is False: self.dequeuePrePrepare(rd.identifier, rd.reqId) else: self.doPrePrepare(rd) def processThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str): """ Process a 3-phase (pre-prepare, prepare and commit) request. Dispatch the request only if primary has already been decided, otherwise stash it. :param msg: the Three Phase message, one of PRE-PREPARE, PREPARE, COMMIT :param sender: name of the node that sent this message """ # Can only proceed further if it knows whether its primary or not if self.isMsgForLaterView(msg): self.threePhaseMsgsForLaterView.append((msg, sender)) logger.debug("{} pended received 3 phase request for a later view: " "{}".format(self, msg)) else: if self.isPrimary is None: self.postElectionMsgs.append((msg, sender)) logger.debug("Replica {} pended request {} from {}". format(self, msg, sender)) else: self.dispatchThreePhaseMsg(msg, sender) def processPrePrepare(self, pp: PrePrepare, sender: str): """ Validate and process the PRE-PREPARE specified. If validation is successful, create a PREPARE and broadcast it. :param pp: a prePrepareRequest :param sender: name of the node that sent this message """ key = (pp.viewNo, pp.ppSeqNo) logger.debug("{} Receiving PRE-PREPARE{} at {} from {}". format(self, key, time.perf_counter(), sender)) if self.canProcessPrePrepare(pp, sender): if not self.node.isParticipating: self.stashingWhileCatchingUp.add(key) self.addToPrePrepares(pp) logger.info("{} processed incoming PRE-PREPARE{}". format(self, key)) def tryPrepare(self, pp: PrePrepare): """ Try to send the Prepare message if the PrePrepare message is ready to be passed into the Prepare phase. """ if self.canSendPrepare(pp): self.doPrepare(pp) else: logger.debug("{} cannot send PREPARE".format(self)) def processPrepare(self, prepare: Prepare, sender: str) -> None: """ Validate and process the PREPARE specified. If validation is successful, create a COMMIT and broadcast it. :param prepare: a PREPARE msg :param sender: name of the node that sent the PREPARE """ # TODO move this try/except up higher logger.debug("{} received PREPARE{} from {}". format(self, (prepare.viewNo, prepare.ppSeqNo), sender)) try: if self.isValidPrepare(prepare, sender): self.addToPrepares(prepare, sender) self.stats.inc(TPCStat.PrepareRcvd) logger.debug("{} processed incoming PREPARE {}". format(self, (prepare.viewNo, prepare.ppSeqNo))) else: # TODO let's have isValidPrepare throw an exception that gets # handled and possibly logged higher logger.warning("{} cannot process incoming PREPARE". format(self)) except SuspiciousNode as ex: self.node.reportSuspiciousNodeEx(ex) def processCommit(self, commit: Commit, sender: str) -> None: """ Validate and process the COMMIT specified. If validation is successful, return the message to the node. :param commit: an incoming COMMIT message :param sender: name of the node that sent the COMMIT """ logger.debug("{} received COMMIT {} from {}". format(self, commit, sender)) if self.isValidCommit(commit, sender): self.stats.inc(TPCStat.CommitRcvd) self.addToCommits(commit, sender) logger.debug("{} processed incoming COMMIT{}". format(self, (commit.viewNo, commit.ppSeqNo))) def tryCommit(self, prepare: Prepare): """ Try to commit if the Prepare message is ready to be passed into the commit phase. """ if self.canCommit(prepare): self.doCommit(prepare) else: logger.debug("{} not yet able to send COMMIT".format(self)) def tryOrder(self, commit: Commit): """ Try to order if the Commit message is ready to be ordered. """ canOrder, reason = self.canOrder(commit) if canOrder: logger.debug("{} returning request to node".format(self)) self.tryOrdering(commit) else: logger.trace("{} cannot return request to node: {}". format(self, reason)) def doPrePrepare(self, reqDigest: ReqDigest) -> None: """ Broadcast a PRE-PREPARE to all the replicas. :param reqDigest: a tuple with elements identifier, reqId, and digest """ if not self.node.isParticipating: logger.error("Non participating node is attempting PRE-PREPARE. " "This should not happen.") return if self.lastPrePrepareSeqNo == self.H: logger.debug("{} stashing PRE-PREPARE {} since outside greater " "than high water mark {}". format(self, (self.viewNo, self.lastPrePrepareSeqNo+1), self.H)) self.stashingWhileOutsideWaterMarks.append(reqDigest) return self.lastPrePrepareSeqNo += 1 tm = time.time()*1000 logger.debug("{} Sending PRE-PREPARE {} at {}". format(self, (self.viewNo, self.lastPrePrepareSeqNo), time.perf_counter())) prePrepareReq = PrePrepare(self.instId, self.viewNo, self.lastPrePrepareSeqNo, *reqDigest, tm) self.sentPrePrepares[self.viewNo, self.lastPrePrepareSeqNo] = (reqDigest.key, tm) self.send(prePrepareReq, TPCStat.PrePrepareSent) def doPrepare(self, pp: PrePrepare): logger.debug("{} Sending PREPARE {} at {}". format(self, (pp.viewNo, pp.ppSeqNo), time.perf_counter())) prepare = Prepare(self.instId, pp.viewNo, pp.ppSeqNo, pp.digest, pp.ppTime) self.send(prepare, TPCStat.PrepareSent) self.addToPrepares(prepare, self.name) def doCommit(self, p: Prepare): """ Create a commit message from the given Prepare message and trigger the commit phase :param p: the prepare message """ logger.debug("{} Sending COMMIT{} at {}". format(self, (p.viewNo, p.ppSeqNo), time.perf_counter())) commit = Commit(self.instId, p.viewNo, p.ppSeqNo, p.digest, p.ppTime) self.send(commit, TPCStat.CommitSent) self.addToCommits(commit, self.name) def canProcessPrePrepare(self, pp: PrePrepare, sender: str) -> bool: """ Decide whether this replica is eligible to process a PRE-PREPARE, based on the following criteria: - this replica is non-primary replica - the request isn't in its list of received PRE-PREPAREs - the request is waiting to for PRE-PREPARE and the digest value matches :param pp: a PRE-PREPARE msg to process :param sender: the name of the node that sent the PRE-PREPARE msg :return: True if processing is allowed, False otherwise """ # TODO: Check whether it is rejecting PRE-PREPARE from previous view # PRE-PREPARE should not be sent from non primary if not self.isMsgFromPrimary(pp, sender): raise SuspiciousNode(sender, Suspicions.PPR_FRM_NON_PRIMARY, pp) # A PRE-PREPARE is being sent to primary if self.isPrimaryForMsg(pp) is True: raise SuspiciousNode(sender, Suspicions.PPR_TO_PRIMARY, pp) # A PRE-PREPARE is sent that has already been received if (pp.viewNo, pp.ppSeqNo) in self.prePrepares: raise SuspiciousNode(sender, Suspicions.DUPLICATE_PPR_SENT, pp) key = (pp.identifier, pp.reqId) if not self.requests.isFinalised(key): self.enqueuePrePrepare(pp, sender) return False # A PRE-PREPARE is sent that does not match request digest if self.requests.digest(key) != pp.digest: raise SuspiciousNode(sender, Suspicions.PPR_DIGEST_WRONG, pp) return True def addToPrePrepares(self, pp: PrePrepare) -> None: """ Add the specified PRE-PREPARE to this replica's list of received PRE-PREPAREs. :param pp: the PRE-PREPARE to add to the list """ key = (pp.viewNo, pp.ppSeqNo) self.prePrepares[key] = \ ((pp.identifier, pp.reqId), pp.ppTime) self.dequeuePrepares(*key) self.dequeueCommits(*key) self.stats.inc(TPCStat.PrePrepareRcvd) self.tryPrepare(pp) def hasPrepared(self, request) -> bool: return self.prepares.hasPrepareFrom(request, self.name) def canSendPrepare(self, request) -> bool: """ Return whether the request identified by (identifier, requestId) can proceed to the Prepare step. :param request: any object with identifier and requestId attributes """ return self.shouldParticipate(request.viewNo, request.ppSeqNo) \ and not self.hasPrepared(request) \ and self.requests.isFinalised((request.identifier, request.reqId)) def isValidPrepare(self, prepare: Prepare, sender: str) -> bool: """ Return whether the PREPARE specified is valid. :param prepare: the PREPARE to validate :param sender: the name of the node that sent the PREPARE :return: True if PREPARE is valid, False otherwise """ key = (prepare.viewNo, prepare.ppSeqNo) primaryStatus = self.isPrimaryForMsg(prepare) ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares # If a non primary replica and receiving a PREPARE request before a # PRE-PREPARE request, then proceed # PREPARE should not be sent from primary if self.isMsgFromPrimary(prepare, sender): raise SuspiciousNode(sender, Suspicions.PR_FRM_PRIMARY, prepare) # If non primary replica if primaryStatus is False: if self.prepares.hasPrepareFrom(prepare, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT, prepare) # If PRE-PREPARE not received for the PREPARE, might be slow network if key not in ppReqs: self.enqueuePrepare(prepare, sender) return False elif prepare.digest != self.requests.digest(ppReqs[key][0]): raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG, prepare) elif prepare.ppTime != ppReqs[key][1]: raise SuspiciousNode(sender, Suspicions.PR_TIME_WRONG, prepare) else: return True # If primary replica else: if self.prepares.hasPrepareFrom(prepare, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT, prepare) # If PRE-PREPARE was not sent for this PREPARE, certainly # malicious behavior elif key not in ppReqs: raise SuspiciousNode(sender, Suspicions.UNKNOWN_PR_SENT, prepare) elif prepare.digest != self.requests.digest(ppReqs[key][0]): raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG, prepare) elif prepare.ppTime != ppReqs[key][1]: raise SuspiciousNode(sender, Suspicions.PR_TIME_WRONG, prepare) else: return True def addToPrepares(self, prepare: Prepare, sender: str): self.prepares.addVote(prepare, sender) self.tryCommit(prepare) def hasCommitted(self, request) -> bool: return self.commits.hasCommitFrom(ThreePhaseKey( request.viewNo, request.ppSeqNo), self.name) def canCommit(self, prepare: Prepare) -> bool: """ Return whether the specified PREPARE can proceed to the Commit step. Decision criteria: - If this replica has got just 2f PREPARE requests then commit request. - If less than 2f PREPARE requests then probably there's no consensus on the request; don't commit - If more than 2f then already sent COMMIT; don't commit :param prepare: the PREPARE """ return self.shouldParticipate(prepare.viewNo, prepare.ppSeqNo) and \ self.prepares.hasQuorum(prepare, self.f) and \ not self.hasCommitted(prepare) def isValidCommit(self, commit: Commit, sender: str) -> bool: """ Return whether the COMMIT specified is valid. :param commit: the COMMIT to validate :return: True if `request` is valid, False otherwise """ primaryStatus = self.isPrimaryForMsg(commit) ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares key = (commit.viewNo, commit.ppSeqNo) if key not in ppReqs: self.enqueueCommit(commit, sender) return False if (key not in self.prepares and key not in self.preparesWaitingForPrePrepare): logger.debug("{} rejecting COMMIT{} due to lack of prepares". format(self, key)) # raise SuspiciousNode(sender, Suspicions.UNKNOWN_CM_SENT, commit) return False elif self.commits.hasCommitFrom(commit, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_CM_SENT, commit) elif commit.digest != self.getDigestFor3PhaseKey(ThreePhaseKey(*key)): raise SuspiciousNode(sender, Suspicions.CM_DIGEST_WRONG, commit) elif key in ppReqs and commit.ppTime != ppReqs[key][1]: raise SuspiciousNode(sender, Suspicions.CM_TIME_WRONG, commit) else: return True def addToCommits(self, commit: Commit, sender: str): """ Add the specified COMMIT to this replica's list of received commit requests. :param commit: the COMMIT to add to the list :param sender: the name of the node that sent the COMMIT """ self.commits.addVote(commit, sender) self.tryOrder(commit) def hasOrdered(self, viewNo, ppSeqNo) -> bool: return (viewNo, ppSeqNo) in self.ordered def canOrder(self, commit: Commit) -> Tuple[bool, Optional[str]]: """ Return whether the specified commitRequest can be returned to the node. Decision criteria: - If have got just 2f+1 Commit requests then return request to node - If less than 2f+1 of commit requests then probably don't have consensus on the request; don't return request to node - If more than 2f+1 then already returned to node; don't return request to node :param commit: the COMMIT """ if not self.commits.hasQuorum(commit, self.f): return False, "no quorum: {} commits where f is {}".\ format(commit, self.f) if self.hasOrdered(commit.viewNo, commit.ppSeqNo): return False, "already ordered" if not self.isNextInOrdering(commit): viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo if viewNo not in self.stashedCommitsForOrdering: self.stashedCommitsForOrdering[viewNo] = {} self.stashedCommitsForOrdering[viewNo][ppSeqNo] = commit # self._schedule(self.orderStashedCommits, 2) self.startRepeating(self.orderStashedCommits, 2) return False, "stashing {} since out of order".\ format(commit) return True, None def isNextInOrdering(self, commit: Commit): viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo if self.ordered and self.ordered[-1] == (viewNo, ppSeqNo-1): return True for (v, p) in self.commits: if v < viewNo: # Have commits from previous view that are unordered. # TODO: Question: would commits be always ordered, what if # some are never ordered and its fine, go to PBFT. return False if v == viewNo and p < ppSeqNo and (v, p) not in self.ordered: # If unordered commits are found with lower ppSeqNo then this # cannot be ordered. return False # TODO: Revisit PBFT paper, how to make sure that last request of the # last view has been ordered? Need change in `VIEW CHANGE` mechanism. # Somehow view change needs to communicate what the last request was. # Also what if some COMMITs were completely missed in the same view return True def orderStashedCommits(self): # TODO: What if the first few commits were out of order and stashed? # `self.ordered` would be empty if self.ordered: lastOrdered = self.ordered[-1] vToRemove = set() for v in self.stashedCommitsForOrdering: if v < lastOrdered[0] and self.stashedCommitsForOrdering[v]: raise RuntimeError("{} found commits from previous view {}" " that were not ordered but last ordered" " is {}".format(self, v, lastOrdered)) pToRemove = set() for p, commit in self.stashedCommitsForOrdering[v].items(): if (v == lastOrdered[0] and lastOrdered == (v, p - 1)) or \ (v > lastOrdered[0] and self.isLowestCommitInView(commit)): logger.debug("{} ordering stashed commit {}". format(self, commit)) if self.tryOrdering(commit): lastOrdered = (v, p) pToRemove.add(p) for p in pToRemove: del self.stashedCommitsForOrdering[v][p] if not self.stashedCommitsForOrdering[v]: vToRemove.add(v) for v in vToRemove: del self.stashedCommitsForOrdering[v] # if self.stashedCommitsForOrdering: # self._schedule(self.orderStashedCommits, 2) if not self.stashedCommitsForOrdering: self.stopRepeating(self.orderStashedCommits) def isLowestCommitInView(self, commit): # TODO: Assumption: This assumes that at least one commit that was sent # for any request by any node has been received in the view of this # commit ppSeqNos = [] for v, p in self.commits: if v == commit.viewNo: ppSeqNos.append(p) return min(ppSeqNos) == commit.ppSeqNo if ppSeqNos else True def tryOrdering(self, commit: Commit) -> None: """ Attempt to send an ORDERED request for the specified COMMIT to the node. :param commit: the COMMIT message """ key = (commit.viewNo, commit.ppSeqNo) logger.debug("{} trying to order COMMIT{}".format(self, key)) reqKey = self.getReqKeyFrom3PhaseKey(key) # type: Tuple digest = self.getDigestFor3PhaseKey(key) if not digest: logger.error("{} did not find digest for {}, request key {}". format(self, key, reqKey)) return self.doOrder(*key, *reqKey, digest, commit.ppTime) return True def doOrder(self, viewNo, ppSeqNo, identifier, reqId, digest, ppTime): key = (viewNo, ppSeqNo) self.addToOrdered(*key) ordered = Ordered(self.instId, viewNo, identifier, reqId, ppTime) # TODO: Should not order or add to checkpoint while syncing # 3 phase state. self.send(ordered, TPCStat.OrderSent) if key in self.stashingWhileCatchingUp: self.stashingWhileCatchingUp.remove(key) logger.debug("{} ordered request {}".format(self, (viewNo, ppSeqNo))) self.addToCheckpoint(ppSeqNo, digest) def processCheckpoint(self, msg: Checkpoint, sender: str): if self.checkpoints: seqNo = msg.seqNo _, firstChk = self.firstCheckPoint if firstChk.isStable: if firstChk.seqNo == seqNo: self.discard(msg, reason="Checkpoint already stable", logMethod=logger.debug) return if firstChk.seqNo > seqNo: self.discard(msg, reason="Higher stable checkpoint present", logMethod=logger.debug) return for state in self.checkpoints.values(): if state.seqNo == seqNo: if state.digest == msg.digest: state.receivedDigests[sender] = msg.digest break else: logger.error("{} received an incorrect digest {} for " "checkpoint {} from {}".format(self, msg.digest, seqNo, sender)) return if len(state.receivedDigests) == 2*self.f: self.markCheckPointStable(msg.seqNo) else: self.discard(msg, reason="No checkpoints present to tally", logMethod=logger.warn) def _newCheckpointState(self, ppSeqNo, digest) -> CheckpointState: s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ - 1 logger.debug("{} adding new checkpoint state for {}". format(self, (s, e))) state = CheckpointState(ppSeqNo, [digest, ], None, {}, False) self.checkpoints[s, e] = state return state def addToCheckpoint(self, ppSeqNo, digest): for (s, e) in self.checkpoints.keys(): if s <= ppSeqNo <= e: state = self.checkpoints[s, e] # type: CheckpointState state.digests.append(digest) state = updateNamedTuple(state, seqNo=ppSeqNo) self.checkpoints[s, e] = state break else: state = self._newCheckpointState(ppSeqNo, digest) s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ if len(state.digests) == self.config.CHK_FREQ: state = updateNamedTuple(state, digest=serialize(state.digests), digests=[]) self.checkpoints[s, e] = state self.send(Checkpoint(self.instId, self.viewNo, ppSeqNo, state.digest)) def markCheckPointStable(self, seqNo): previousCheckpoints = [] for (s, e), state in self.checkpoints.items(): if e == seqNo: state = updateNamedTuple(state, isStable=True) self.checkpoints[s, e] = state break else: previousCheckpoints.append((s, e)) else: logger.error("{} could not find {} in checkpoints". format(self, seqNo)) return self.h = seqNo for k in previousCheckpoints: logger.debug("{} removing previous checkpoint {}".format(self, k)) self.checkpoints.pop(k) self.gc(seqNo) logger.debug("{} marked stable checkpoint {}".format(self, (s, e))) self.processStashedMsgsForNewWaterMarks() def gc(self, tillSeqNo): logger.debug("{} cleaning up till {}".format(self, tillSeqNo)) tpcKeys = set() reqKeys = set() for (v, p), (reqKey, _) in self.sentPrePrepares.items(): if p <= tillSeqNo: tpcKeys.add((v, p)) reqKeys.add(reqKey) for (v, p), (reqKey, _) in self.prePrepares.items(): if p <= tillSeqNo: tpcKeys.add((v, p)) reqKeys.add(reqKey) logger.debug("{} found {} 3 phase keys to clean". format(self, len(tpcKeys))) logger.debug("{} found {} request keys to clean". format(self, len(reqKeys))) for k in tpcKeys: self.sentPrePrepares.pop(k, None) self.prePrepares.pop(k, None) self.prepares.pop(k, None) self.commits.pop(k, None) if k in self.ordered: self.ordered.remove(k) for k in reqKeys: self.requests.pop(k, None) def processStashedMsgsForNewWaterMarks(self): while self.stashingWhileOutsideWaterMarks: item = self.stashingWhileOutsideWaterMarks.pop() logger.debug("{} processing stashed item {} after new stable " "checkpoint".format(self, item)) if isinstance(item, ReqDigest): self.doPrePrepare(item) elif isinstance(item, tuple) and len(tuple) == 2: self.dispatchThreePhaseMsg(*item) else: logger.error("{} cannot process {} " "from stashingWhileOutsideWaterMarks". format(self, item)) @property def firstCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]: if not self.checkpoints: return None else: return self.checkpoints.peekitem(0) @property def lastCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]: if not self.checkpoints: return None else: return self.checkpoints.peekitem(-1) def isPpSeqNoAcceptable(self, ppSeqNo: int): return self.h < ppSeqNo <= self.H def addToOrdered(self, viewNo: int, ppSeqNo: int): self.ordered.add((viewNo, ppSeqNo)) def enqueuePrePrepare(self, request: PrePrepare, sender: str): logger.debug("Queueing pre-prepares due to unavailability of finalised " "Request. Request {} from {}".format(request, sender)) key = (request.identifier, request.reqId) if key not in self.prePreparesPendingReqDigest: self.prePreparesPendingReqDigest[key] = [] self.prePreparesPendingReqDigest[key].append((request, sender)) def dequeuePrePrepare(self, identifier: int, reqId: int): key = (identifier, reqId) if key in self.prePreparesPendingReqDigest: pps = self.prePreparesPendingReqDigest[key] for (pp, sender) in pps: logger.debug("{} popping stashed PRE-PREPARE{}". format(self, key)) if pp.digest == self.requests.digest(key): self.prePreparesPendingReqDigest.pop(key) self.processPrePrepare(pp, sender) logger.debug( "{} processed {} PRE-PREPAREs waiting for finalised " "request for identifier {} and reqId {}". format(self, pp, identifier, reqId)) break def enqueuePrepare(self, request: Prepare, sender: str): logger.debug("Queueing prepares due to unavailability of PRE-PREPARE. " "Request {} from {}".format(request, sender)) key = (request.viewNo, request.ppSeqNo) if key not in self.preparesWaitingForPrePrepare: self.preparesWaitingForPrePrepare[key] = deque() self.preparesWaitingForPrePrepare[key].append((request, sender)) def dequeuePrepares(self, viewNo: int, ppSeqNo: int): key = (viewNo, ppSeqNo) if key in self.preparesWaitingForPrePrepare: i = 0 # Keys of pending prepares that will be processed below while self.preparesWaitingForPrePrepare[key]: prepare, sender = self.preparesWaitingForPrePrepare[ key].popleft() logger.debug("{} popping stashed PREPARE{}".format(self, key)) self.processPrepare(prepare, sender) i += 1 self.preparesWaitingForPrePrepare.pop(key) logger.debug("{} processed {} PREPAREs waiting for PRE-PREPARE for" " view no {} and seq no {}". format(self, i, viewNo, ppSeqNo)) def enqueueCommit(self, request: Commit, sender: str): logger.debug("Queueing commit due to unavailability of PREPARE. " "Request {} from {}".format(request, sender)) key = (request.viewNo, request.ppSeqNo) if key not in self.commitsWaitingForPrepare: self.commitsWaitingForPrepare[key] = deque() self.commitsWaitingForPrepare[key].append((request, sender)) def dequeueCommits(self, viewNo: int, ppSeqNo: int): key = (viewNo, ppSeqNo) if key in self.commitsWaitingForPrepare: i = 0 # Keys of pending prepares that will be processed below while self.commitsWaitingForPrepare[key]: commit, sender = self.commitsWaitingForPrepare[ key].popleft() logger.debug("{} popping stashed COMMIT{}".format(self, key)) self.processCommit(commit, sender) i += 1 self.commitsWaitingForPrepare.pop(key) logger.debug("{} processed {} COMMITs waiting for PREPARE for" " view no {} and seq no {}". format(self, i, viewNo, ppSeqNo)) def getDigestFor3PhaseKey(self, key: ThreePhaseKey) -> Optional[str]: reqKey = self.getReqKeyFrom3PhaseKey(key) digest = self.requests.digest(reqKey) if not digest: logger.debug("{} could not find digest in sent or received " "PRE-PREPAREs or PREPAREs for 3 phase key {} and req " "key {}".format(self, key, reqKey)) return None else: return digest def getReqKeyFrom3PhaseKey(self, key: ThreePhaseKey): reqKey = None if key in self.sentPrePrepares: reqKey = self.sentPrePrepares[key][0] elif key in self.prePrepares: reqKey = self.prePrepares[key][0] elif key in self.prepares: reqKey = self.prepares[key][0] else: logger.debug("Could not find request key for 3 phase key {}". format(key)) return reqKey @property def threePhaseState(self): # TODO: This method is incomplete # Gets the current stable and unstable checkpoints and creates digest # of unstable checkpoints if self.checkpoints: pass else: state = [] return ThreePCState(self.instId, state) def process3PhaseState(self, msg: ThreePCState, sender: str): # TODO: This is not complete pass def send(self, msg, stat=None) -> None: """ Send a message to the node on which this replica resides. :param msg: the message to send """ logger.display("{} sending {}".format(self, msg.__class__.__name__), extra={"cli": True}) logger.trace("{} sending {}".format(self, msg)) if stat: self.stats.inc(stat) self.outBox.append(msg)
class FederationRemoteSendQueue(object): """A drop in replacement for FederationSender""" def __init__(self, hs): self.server_name = hs.hostname self.clock = hs.get_clock() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id # We may have multiple federation sender instances, so we need to track # their positions separately. self._sender_instances = hs.config.worker.federation_shard_config.instances self._sender_positions = {} # Pending presence map user_id -> UserPresenceState self.presence_map = {} # type: Dict[str, UserPresenceState] # Stream position -> list[user_id] self.presence_changed = SortedDict( ) # type: SortedDict[int, List[str]] # Stores the destinations we need to explicitly send presence to about a # given user. # Stream position -> (user_id, destinations) self.presence_destinations = ( SortedDict()) # type: SortedDict[int, Tuple[str, List[str]]] # (destination, key) -> EDU self.keyed_edu = {} # type: Dict[Tuple[str, tuple], Edu] # stream position -> (destination, key) self.keyed_edu_changed = (SortedDict() ) # type: SortedDict[int, Tuple[str, tuple]] self.edus = SortedDict() # type: SortedDict[int, Edu] # stream ID for the next entry into presence_changed/keyed_edu_changed/edus. self.pos = 1 # map from stream ID to the time that stream entry was generated, so that we # can clear out entries after a while self.pos_time = SortedDict() # type: SortedDict[int, int] # EVERYTHING IS SAD. In particular, python only makes new scopes when # we make a new function, so we need to make a new function so the inner # lambda binds to the queue rather than to the name of the queue which # changes. ARGH. def register(name, queue): LaterGauge( "synapse_federation_send_queue_%s_size" % (queue_name, ), "", [], lambda: len(queue), ) for queue_name in [ "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed", "edus", "pos_time", "presence_destinations", ]: register(queue_name, getattr(self, queue_name)) self.clock.looping_call(self._clear_queue, 30 * 1000) def _next_pos(self): pos = self.pos self.pos += 1 self.pos_time[self.clock.time_msec()] = pos return pos def _clear_queue(self): """Clear the queues for anything older than N minutes""" FIVE_MINUTES_AGO = 5 * 60 * 1000 now = self.clock.time_msec() keys = self.pos_time.keys() time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO) if not keys[:time]: return position_to_delete = max(keys[:time]) for key in keys[:time]: del self.pos_time[key] self._clear_queue_before_pos(position_to_delete) def _clear_queue_before_pos(self, position_to_delete): """Clear all the queues from before a given position""" with Measure(self.clock, "send_queue._clear"): # Delete things out of presence maps keys = self.presence_changed.keys() i = self.presence_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_changed[key] user_ids = { user_id for uids in self.presence_changed.values() for user_id in uids } keys = self.presence_destinations.keys() i = self.presence_destinations.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_destinations[key] user_ids.update( user_id for user_id, _ in self.presence_destinations.values()) to_del = [ user_id for user_id in self.presence_map if user_id not in user_ids ] for user_id in to_del: del self.presence_map[user_id] # Delete things out of keyed edus keys = self.keyed_edu_changed.keys() i = self.keyed_edu_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.keyed_edu_changed[key] live_keys = set() for edu_key in self.keyed_edu_changed.values(): live_keys.add(edu_key) keys_to_del = [ edu_key for edu_key in self.keyed_edu if edu_key not in live_keys ] for edu_key in keys_to_del: del self.keyed_edu[edu_key] # Delete things out of edu map keys = self.edus.keys() i = self.edus.bisect_left(position_to_delete) for key in keys[:i]: del self.edus[key] def notify_new_events(self, current_id): """As per FederationSender""" # We don't need to replicate this as it gets sent down a different # stream. pass def build_and_send_edu(self, destination, edu_type, content, key=None): """As per FederationSender""" if destination == self.server_name: logger.info("Not sending EDU to ourselves") return pos = self._next_pos() edu = Edu( origin=self.server_name, destination=destination, edu_type=edu_type, content=content, ) if key: assert isinstance(key, tuple) self.keyed_edu[(destination, key)] = edu self.keyed_edu_changed[pos] = (destination, key) else: self.edus[pos] = edu self.notifier.on_new_replication_data() def send_read_receipt(self, receipt): """As per FederationSender Args: receipt (synapse.types.ReadReceipt): """ # nothing to do here: the replication listener will handle it. return defer.succeed(None) def send_presence(self, states): """As per FederationSender Args: states (list(UserPresenceState)) """ pos = self._next_pos() # We only want to send presence for our own users, so lets always just # filter here just in case. local_states = list( filter(lambda s: self.is_mine_id(s.user_id), states)) self.presence_map.update( {state.user_id: state for state in local_states}) self.presence_changed[pos] = [state.user_id for state in local_states] self.notifier.on_new_replication_data() def send_presence_to_destinations(self, states, destinations): """As per FederationSender Args: states (list[UserPresenceState]) destinations (list[str]) """ for state in states: pos = self._next_pos() self.presence_map.update( {state.user_id: state for state in states}) self.presence_destinations[pos] = (state.user_id, destinations) self.notifier.on_new_replication_data() def send_device_messages(self, destination): """As per FederationSender""" # We don't need to replicate this as it gets sent down a different # stream. def get_current_token(self): return self.pos - 1 def federation_ack(self, instance_name, token): if self._sender_instances: # If we have configured multiple federation sender instances we need # to track their positions separately, and only clear the queue up # to the token all instances have acked. self._sender_positions[instance_name] = token token = min(self._sender_positions.values()) self._clear_queue_before_pos(token) async def get_replication_rows( self, instance_name: str, from_token: int, to_token: int, target_row_count: int ) -> Tuple[List[Tuple[int, Tuple]], int, bool]: """Get rows to be sent over federation between the two tokens Args: instance_name: the name of the current process from_token: the previous stream token: the starting point for fetching the updates to_token: the new stream token: the point to get updates up to target_row_count: a target for the number of rows to be returned. Returns: a triplet `(updates, new_last_token, limited)`, where: * `updates` is a list of `(token, row)` entries. * `new_last_token` is the new position in stream. * `limited` is whether there are more updates to fetch. """ # TODO: Handle target_row_count. # To handle restarts where we wrap around if from_token > self.pos: from_token = -1 # list of tuple(int, BaseFederationRow), where the first is the position # of the federation stream. rows = [] # type: List[Tuple[int, BaseFederationRow]] # Fetch changed presence i = self.presence_changed.bisect_right(from_token) j = self.presence_changed.bisect_right(to_token) + 1 dest_user_ids = [ (pos, user_id) for pos, user_id_list in self.presence_changed.items()[i:j] for user_id in user_id_list ] for (key, user_id) in dest_user_ids: rows.append((key, PresenceRow(state=self.presence_map[user_id]))) # Fetch presence to send to destinations i = self.presence_destinations.bisect_right(from_token) j = self.presence_destinations.bisect_right(to_token) + 1 for pos, (user_id, dests) in self.presence_destinations.items()[i:j]: rows.append(( pos, PresenceDestinationsRow(state=self.presence_map[user_id], destinations=list(dests)), )) # Fetch changes keyed edus i = self.keyed_edu_changed.bisect_right(from_token) j = self.keyed_edu_changed.bisect_right(to_token) + 1 # We purposefully clobber based on the key here, python dict comprehensions # always use the last value, so this will correctly point to the last # stream position. keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]} for ((destination, edu_key), pos) in keyed_edus.items(): rows.append(( pos, KeyedEduRow(key=edu_key, edu=self.keyed_edu[(destination, edu_key)]), )) # Fetch changed edus i = self.edus.bisect_right(from_token) j = self.edus.bisect_right(to_token) + 1 edus = self.edus.items()[i:j] for (pos, edu) in edus: rows.append((pos, EduRow(edu))) # Sort rows based on pos rows.sort() return ( [(pos, (row.TypeId, row.to_data())) for pos, row in rows], to_token, False, )
class Topics: """ A class that manages a collection of `Topic`s. """ def __init__(self): self.logger = getLogger('topics') self.logger.info('started session') self.clear() def clear(self): self.logger.info('Cleared all topics and received data') self.topic_list = SortedDict() self.transfers = dict() def create(self, topic, source='remote'): # Create the topic if it doesn't exist already if not topic in self.topic_list: self.topic_list[topic] = Topic(topic, source=source) self.logger.info('new:topic ' + topic) def process(self, topic, payload, options=None): # Create the topic if it doesn't exist already self.create(topic) # Add the new sample self.topic_list[topic].new_sample(payload, options) # logging if options: self.logger.debug('new sample | {0} [{1}] {2}'.format( topic, options['index'], payload)) else: self.logger.debug('new sample | {0} {1}'.format(topic, payload)) # If there is an active transfer, transfer received data to the queue if topic in self.transfers: # If transfer requires indexed data, check there is an index if self.transfers[topic][ 'type'] == 'indexed' and options is not None: x = options['index'] self.transfers[topic]['queue'].put([x, payload]) # For linear data, provide sample id for x and payload for y elif self.transfers[topic]['type'] == 'linear': x = self.transfers[topic]['lastindex'] self.transfers[topic]['queue'].put([x, payload]) self.transfers[topic]['lastindex'] += 1 def ls(self, source='remote'): if source is None: return sorted([t.name for t in self.topic_list.keys()]) else: return sorted([ t.name for t in self.topic_list.values() if t.source == source ]) def samples(self, topic, amount=1): if not topic in self.topic_list: return None if amount == 0 or amount is None: return self.topic_list[topic].raw return self.topic_list[topic].raw[-amount:] def count(self, topic): if not topic in self.topic_list: return 0 return len(self.topic_list[topic].raw) def exists(self, topic): return topic in self.topic_list def transfer(self, topic, queue, transfer_type="linear"): # If the topic data is not already transfered to some queue if not topic in self.transfers: self.transfers[topic] = dict() self.transfers[topic]['queue'] = queue self.transfers[topic]['lastindex'] = 0 self.transfers[topic]['type'] = transfer_type self.logger.info('start transfer | {0}'.format(topic)) # If there is already existing data under the topic if topic in self.topic_list: if transfer_type == 'indexed': for key, value in self.topic_list[topic].indexes.iteritems( ): queue.put([key, value]) elif transfer_type == 'linear': for item in self.topic_list[topic].raw: queue.put([self.transfers[topic]['lastindex'], item]) self.transfers[topic]['lastindex'] += 1 def untransfer(self, topic): # If the topic data is already transfered to some queue if topic in self.transfers: # Remove it from the transfer list del self.transfers[topic] self.logger.info('stop transfer | {0}'.format(topic)) def intransfer(self, topic): return topic in self.transfers def has_indexed_data(self, topic): return self.topic_list[topic].has_indexed_data()
class OrderedSet(abc.MutableSet, abc.Sequence): """Like OrderedDict, OrderedSet maintains the insertion order of elements. For example:: >>> ordered_set = OrderedSet('abcde') >>> list(ordered_set) == list('abcde') True >>> ordered_set = OrderedSet('edcba') >>> list(ordered_set) == list('edcba') True OrderedSet also implements the collections.Sequence interface. """ # pylint: disable=too-many-ancestors def __init__(self, iterable=()): # pylint: disable=super-init-not-called self._keys = {} self._nums = SortedDict() self._keys_view = self._nums.keys() self._count = count() self |= iterable def __contains__(self, key): "``key in ordered_set``" return key in self._keys count = __contains__ def __iter__(self): "``iter(ordered_set)``" return iter(self._nums.values()) def __reversed__(self): "``reversed(ordered_set)``" _nums = self._nums for key in reversed(_nums): yield _nums[key] def __getitem__(self, index): "``ordered_set[index]`` -> element; lookup element at index." num = self._keys_view[index] return self._nums[num] def __len__(self): "``len(ordered_set)``" return len(self._keys) def index(self, value): "Return index of value." # pylint: disable=arguments-differ try: return self._keys[value] except KeyError: raise ValueError('%r is not in %s' % (value, type(self).__name__)) def add(self, value): "Add element, value, to set." if value not in self._keys: num = next(self._count) self._keys[value] = num self._nums[num] = value def discard(self, value): "Remove element, value, from set if it is a member." num = self._keys.pop(value, None) if num is not None: del self._nums[num] def __repr__(self): "Text representation of set." return '%s(%r)' % (type(self).__name__, list(self)) __str__ = __repr__
class CacheStore(object): class CacheItem(object): __slots__ = ('valid', 'data') def __init__(self): self.valid = Event() self.data = None def __init__(self, key=None): self.lock = RLock() self.store = SortedDict(key) def __getitem__(self, item): return self.get(item) def put(self, key, data): with self.lock: try: item = self.store[key] item.data = data item.valid.set() return False except KeyError: item = self.CacheItem() item.data = data item.valid.set() self.store[key] = item return True def update(self, **kwargs): with self.lock: items = {} created = [] updated = [] for k, v in kwargs.items(): items[k] = self.CacheItem() items[k].data = v items[k].valid.set() if k in self.store: updated.append(k) else: created.append(k) self.store.update(**items) return created, updated def update_one(self, key, **kwargs): with self.lock: item = self.get(key) if not item: return False for k, v in kwargs.items(): set(item, k, v) self.put(key, item) return True def update_many(self, key, predicate, **kwargs): with self.lock: updated = [] for k, v in self.itervalid(): if predicate(v): if self.update_one(k, **kwargs): updated.append(key) return updated def get(self, key, default=None, timeout=None): item = self.store.get(key) if item: item.valid.wait(timeout) return item.data return default def remove(self, key): with self.lock: try: del self.store[key] return True except KeyError: return False def remove_many(self, keys): with self.lock: removed = [] for key in keys: try: del self.store[key] removed.append(key) except KeyError: pass return removed def clear(self): with self.lock: items = list(self.store.keys()) self.store.clear() return items def exists(self, key): return key in self.store def rename(self, oldkey, newkey): with self.lock: obj = self.get(oldkey) obj['id'] = newkey self.put(newkey, obj) self.remove(oldkey) def is_valid(self, key): item = self.store.get(key) if item: return item.valid.is_set() return False def invalidate(self, key): with self.lock: item = self.store.get(key) if item: item.valid.clear() def itervalid(self): for key, value in list(self.store.items()): if value.valid.is_set(): yield (key, value.data) def validvalues(self): for value in list(self.store.values()): if value.valid.is_set(): yield value.data def remove_predicate(self, predicate): result = [] for k, v in self.itervalid(): if predicate(v): self.remove(k) result.append(k) return result def query(self, *filter, **params): return query(list(self.validvalues()), *filter, **params)
class SequenceLearner(BaseLearner): r"""A learner that will learn a sequence. It simply returns the points in the provided sequence when asked. This is useful when your problem cannot be formulated in terms of another adaptive learner, but you still want to use Adaptive's routines to run, save, and plot. Parameters ---------- function : callable The function to learn. Must take a single element `sequence`. sequence : sequence The sequence to learn. Attributes ---------- data : dict The data as a mapping from "index of element in sequence" => value. Notes ----- From primitive tests, the `~adaptive.SequenceLearner` appears to have a similar performance to `ipyparallel`\s ``load_balanced_view().map``. With the added benefit of having results in the local kernel already. """ def __init__(self, function, sequence): self._original_function = function self.function = _IgnoreFirstArgument(function) self._to_do_indices = SortedSet({i for i, _ in enumerate(sequence)}) self._ntotal = len(sequence) self.sequence = copy(sequence) self.data = SortedDict() self.pending_points = set() def ask(self, n, tell_pending=True): indices = [] points = [] loss_improvements = [] for index in self._to_do_indices: if len(points) >= n: break point = self.sequence[index] indices.append(index) points.append((index, point)) loss_improvements.append(1 / self._ntotal) if tell_pending: for i, p in zip(indices, points): self.tell_pending((i, p)) return points, loss_improvements def _get_data(self): return self.data def _set_data(self, data): if data: indices, values = zip(*data.items()) # the points aren't used by tell, so we can safely pass None points = [(i, None) for i in indices] self.tell_many(points, values) def loss(self, real=True): if not (self._to_do_indices or self.pending_points): return 0 else: npoints = self.npoints + (0 if real else len(self.pending_points)) return (self._ntotal - npoints) / self._ntotal def remove_unfinished(self): for i in self.pending_points: self._to_do_indices.add(i) self.pending_points = set() def tell(self, point, value): index, point = point self.data[index] = value self.pending_points.discard(index) self._to_do_indices.discard(index) def tell_pending(self, point): index, point = point self.pending_points.add(index) self._to_do_indices.discard(index) def done(self): return not self._to_do_indices and not self.pending_points def result(self): """Get the function values in the same order as ``sequence``.""" if not self.done(): raise Exception("Learner is not yet complete.") return list(self.data.values()) @property def npoints(self): return len(self.data)
class Node(BaseNode): def __init__(self, *args, **kwargs): self.rest = None super(Node, self).__init__(*args, **kwargs) def _select(self, key): """ Selects the bucket the key should belong to. """ if key < min(self.bucket): new_node = self.rest return new_node elif key >= max(self.bucket): new_node = self.bucket.values()[-1] return new_node for i in range(0, len(self.bucket.keys())-1): if key >= self.bucket.keys()[i] and key < self.bucket.keys()[i + 1]: new_node = self.bucket.values()[i] return new_node pass def _insert(self, key, value): """ Recursively inserts the key and value by selecting the bucket the key should belong to, and inserting the key and value into that back. If the node has been split, it inserts the key of the newly created node into the bucket of this node. """ result = self._select(key)._insert(key,value) self.changed = True if result is None: return key, other = result return super()._insert(key, other) def _split(self): """ Creates a new node of the same type and splits the contents of the bucket into two parts of an equal size. The lower keys are being stored in the bucket of the current node. The higher keys are being stored in the bucket of the new node. Afterwards, the new node is being returned. """ other = self.__class__(tree=self.tree) size = len(self.bucket) values = self.bucket.items() self.bucket = SortedDict(values[:len(values) // 2]) other.values = SortedDict(values[len(values) // 2:]) key, value = other.values.popitem(last=False) other.rest = value return (key, other) def __getitem__(self, key): selected_node = self._select(key) return selected_node.__getitem__(key) def __iter__(self): if self.rest != None: for key in self.rest: yield key for child in self.bucket.values(): for key in child: yield key def __len__(self): return sum([len(child) for child in self.buckt.values()])+len(self.rest)
def test_values_view_index(): mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping[:13]) values = temp.values() with pytest.raises(ValueError): values.index(100)
class Replica(HasActionQueue, MessageProcessor): def __init__(self, node: 'plenum.server.node.Node', instId: int, isMaster: bool = False): """ Create a new replica. :param node: Node on which this replica is located :param instId: the id of the protocol instance the replica belongs to :param isMaster: is this a replica of the master protocol instance """ HasActionQueue.__init__(self) self.stats = Stats(TPCStat) self.config = getConfig() routerArgs = [(ReqDigest, self._preProcessReqDigest)] for r in [PrePrepare, Prepare, Commit]: routerArgs.append((r, self.processThreePhaseMsg)) routerArgs.append((Checkpoint, self.processCheckpoint)) routerArgs.append((ThreePCState, self.process3PhaseState)) self.inBoxRouter = Router(*routerArgs) self.threePhaseRouter = Router((PrePrepare, self.processPrePrepare), (Prepare, self.processPrepare), (Commit, self.processCommit)) self.node = node self.instId = instId self.name = self.generateName(node.name, self.instId) self.outBox = deque() """ This queue is used by the replica to send messages to its node. Replica puts messages that are consumed by its node """ self.inBox = deque() """ This queue is used by the replica to receive messages from its node. Node puts messages that are consumed by the replica """ self.inBoxStash = deque() """ If messages need to go back on the queue, they go here temporarily and are put back on the queue on a state change """ self.isMaster = isMaster # Indicates name of the primary replica of this protocol instance. # None in case the replica does not know who the primary of the # instance is self._primaryName = None # type: Optional[str] # Requests waiting to be processed once the replica is able to decide # whether it is primary or not self.postElectionMsgs = deque() # PRE-PREPAREs that are waiting to be processed but do not have the # corresponding request digest. Happens when replica has not been # forwarded the request by the node but is getting 3 phase messages. # The value is a list since a malicious entry might send PRE-PREPARE # with a different digest and since we dont have the request finalised, # we store all PRE-PPREPARES self.prePreparesPendingReqDigest = { } # type: Dict[Tuple[str, int], List] # PREPAREs that are stored by non primary replica for which it has not # got any PRE-PREPARE. Dictionary that stores a tuple of view no and # prepare sequence number as key and a deque of PREPAREs as value. # This deque is attempted to be flushed on receiving every # PRE-PREPARE request. self.preparesWaitingForPrePrepare = {} # type: Dict[Tuple[int, int], deque] # COMMITs that are stored for which there are no PRE-PREPARE or PREPARE # received self.commitsWaitingForPrepare = {} # type: Dict[Tuple[int, int], deque] # Dictionary of sent PRE-PREPARE that are stored by primary replica # which it has broadcasted to all other non primary replicas # Key of dictionary is a 2 element tuple with elements viewNo, # pre-prepare seqNo and value is a tuple of Request Digest and time self.sentPrePrepares = {} # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]] # Dictionary of received PRE-PREPAREs. Key of dictionary is a 2 # element tuple with elements viewNo, pre-prepare seqNo and value is # a tuple of Request Digest and time self.prePrepares = {} # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]] # Dictionary of received Prepare requests. Key of dictionary is a 2 # element tuple with elements viewNo, seqNo and value is a 2 element # tuple containing request digest and set of sender node names(sender # replica names in case of multiple protocol instances) # (viewNo, seqNo) -> ((identifier, reqId), {senders}) self.prepares = Prepares() # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], Set[str]]] self.commits = Commits() # type: Dict[Tuple[int, int], # Tuple[Tuple[str, int], Set[str]]] # Set of tuples to keep track of ordered requests. Each tuple is # (viewNo, ppSeqNo) self.ordered = OrderedSet() # type: OrderedSet[Tuple[int, int]] # Dictionary to keep track of the which replica was primary during each # view. Key is the view no and value is the name of the primary # replica during that view self.primaryNames = {} # type: Dict[int, str] # Holds msgs that are for later views self.threePhaseMsgsForLaterView = deque() # type: deque[(ThreePhaseMsg, str)] # Holds tuple of view no and prepare seq no of 3-phase messages it # received while it was not participating self.stashingWhileCatchingUp = set() # type: Set[Tuple] # Commits which are not being ordered since commits with lower view # numbers and sequence numbers have not been ordered yet. Key is the # viewNo and value a map of pre-prepare sequence number to commit self.stashedCommitsForOrdering = {} # type: Dict[int, # Dict[int, Commit]] self.checkpoints = SortedDict(lambda k: k[0]) self.stashingWhileOutsideWaterMarks = deque() # Low water mark self._h = 0 # type: int # High water mark self.H = self._h + self.config.LOG_SIZE # type: int self.lastPrePrepareSeqNo = self.h # type: int @property def h(self) -> int: return self._h @h.setter def h(self, n): self._h = n self.H = self._h + self.config.LOG_SIZE @property def requests(self): return self.node.requests def shouldParticipate(self, viewNo: int, ppSeqNo: int): # Replica should only participating in the consensus process and the # replica did not stash any of this request's 3-phase request return self.node.isParticipating and (viewNo, ppSeqNo) \ not in self.stashingWhileCatchingUp @staticmethod def generateName(nodeName: str, instId: int): """ Create and return the name for a replica using its nodeName and instanceId. Ex: Alpha:1 """ return "{}:{}".format(nodeName, instId) @staticmethod def getNodeName(replicaName: str): return replicaName.split(":")[0] @property def isPrimary(self): """ Is this node primary? :return: True if this node is primary, False otherwise """ return self._primaryName == self.name if self._primaryName is not None \ else None @property def primaryName(self): """ Name of the primary replica of this replica's instance :return: Returns name if primary is known, None otherwise """ return self._primaryName @primaryName.setter def primaryName(self, value: Optional[str]) -> None: """ Set the value of isPrimary. :param value: the value to set isPrimary to """ if not value == self._primaryName: self._primaryName = value self.primaryNames[self.viewNo] = value logger.debug("{} setting primaryName for view no {} to: {}".format( self, self.viewNo, value)) logger.debug("{}'s primaryNames for views are: {}".format( self, self.primaryNames)) self._stateChanged() def _stateChanged(self): """ A series of actions to be performed when the state of this replica changes. - UnstashInBox (see _unstashInBox) """ self._unstashInBox() if self.isPrimary is not None: # TODO handle suspicion exceptions here self.process3PhaseReqsQueue() # TODO handle suspicion exceptions here try: self.processPostElectionMsgs() except SuspiciousNode as ex: self.outBox.append(ex) self.discard(ex.msg, ex.reason, logger.warning) def _stashInBox(self, msg): """ Stash the specified message into the inBoxStash of this replica. :param msg: the message to stash """ self.inBoxStash.append(msg) def _unstashInBox(self): """ Append the inBoxStash to the right of the inBox. """ self.inBox.extend(self.inBoxStash) self.inBoxStash.clear() def __repr__(self): return self.name @property def f(self) -> int: """ Return the number of Byzantine Failures that can be tolerated by this system. Equal to (N - 1)/3, where N is the number of nodes in the system. """ return self.node.f @property def viewNo(self): """ Return the current view number of this replica. """ return self.node.viewNo def isPrimaryInView(self, viewNo: int) -> Optional[bool]: """ Return whether a primary has been selected for this view number. """ return self.primaryNames[viewNo] == self.name def isMsgForLaterView(self, msg): """ Return whether this request's view number is greater than the current view number of this replica. """ viewNo = getattr(msg, "viewNo", None) return viewNo > self.viewNo def isMsgForCurrentView(self, msg): """ Return whether this request's view number is equal to the current view number of this replica. """ viewNo = getattr(msg, "viewNo", None) return viewNo == self.viewNo def isMsgForPrevView(self, msg): """ Return whether this request's view number is less than the current view number of this replica. """ viewNo = getattr(msg, "viewNo", None) return viewNo < self.viewNo def isPrimaryForMsg(self, msg) -> Optional[bool]: """ Return whether this replica is primary if the request's view number is equal this replica's view number and primary has been selected for the current view. Return None otherwise. :param msg: message """ if self.isMsgForLaterView(msg): self.discard( msg, "Cannot get primary status for a request for a later " "view {}. Request is {}".format(self.viewNo, msg), logger.error) else: return self.isPrimary if self.isMsgForCurrentView(msg) \ else self.isPrimaryInView(msg.viewNo) def isMsgFromPrimary(self, msg, sender: str) -> bool: """ Return whether this message was from primary replica :param msg: :param sender: :return: """ if self.isMsgForLaterView(msg): logger.error("{} cannot get primary for a request for a later " "view. Request is {}".format(self, msg)) else: return self.primaryName == sender if self.isMsgForCurrentView( msg) else self.primaryNames[msg.viewNo] == sender def _preProcessReqDigest(self, rd: ReqDigest) -> None: """ Process request digest if this replica is not a primary, otherwise stash the message into the inBox. :param rd: the client Request Digest """ if self.isPrimary is not None: self.processReqDigest(rd) else: logger.debug( "{} stashing request digest {} since it does not know " "its primary status".format(self, (rd.identifier, rd.reqId))) self._stashInBox(rd) def serviceQueues(self, limit=None): """ Process `limit` number of messages in the inBox. :param limit: the maximum number of messages to process :return: the number of messages successfully processed """ # TODO should handle SuspiciousNode here r = self.inBoxRouter.handleAllSync(self.inBox, limit) r += self._serviceActions() return r # Messages that can be processed right now needs to be added back to the # queue. They might be able to be processed later def processPostElectionMsgs(self): """ Process messages waiting for the election of a primary replica to complete. """ while self.postElectionMsgs: msg = self.postElectionMsgs.popleft() logger.debug("{} processing pended msg {}".format(self, msg)) self.dispatchThreePhaseMsg(*msg) def process3PhaseReqsQueue(self): """ Process the 3 phase requests from the queue whose view number is equal to the current view number of this replica. """ unprocessed = deque() while self.threePhaseMsgsForLaterView: request, sender = self.threePhaseMsgsForLaterView.popleft() logger.debug("{} processing pended 3 phase request: {}".format( self, request)) # If the request is for a later view dont try to process it but add # it back to the queue. if self.isMsgForLaterView(request): unprocessed.append((request, sender)) else: self.processThreePhaseMsg(request, sender) self.threePhaseMsgsForLaterView = unprocessed @property def quorum(self) -> int: r""" Return the quorum of this RBFT system. Equal to :math:`2f + 1`. Return None if `f` is not yet determined. """ return self.node.quorum def dispatchThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str) -> Any: """ Create a three phase request to be handled by the threePhaseRouter. :param msg: the ThreePhaseMsg to dispatch :param sender: the name of the node that sent this request """ senderRep = self.generateName(sender, self.instId) if self.isPpSeqNoAcceptable(msg.ppSeqNo): try: self.threePhaseRouter.handleSync((msg, senderRep)) except SuspiciousNode as ex: self.node.reportSuspiciousNodeEx(ex) else: logger.debug("{} stashing 3 phase message {} since ppSeqNo {} is " "not between {} and {}".format( self, msg, msg.ppSeqNo, self.h, self.H)) self.stashingWhileOutsideWaterMarks.append((msg, sender)) def processReqDigest(self, rd: ReqDigest): """ Process a request digest. Works only if this replica has decided its primary status. :param rd: the client request digest to process """ self.stats.inc(TPCStat.ReqDigestRcvd) if self.isPrimary is False: self.dequeuePrePrepare(rd.identifier, rd.reqId) else: self.doPrePrepare(rd) def processThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str): """ Process a 3-phase (pre-prepare, prepare and commit) request. Dispatch the request only if primary has already been decided, otherwise stash it. :param msg: the Three Phase message, one of PRE-PREPARE, PREPARE, COMMIT :param sender: name of the node that sent this message """ # Can only proceed further if it knows whether its primary or not if self.isMsgForLaterView(msg): self.threePhaseMsgsForLaterView.append((msg, sender)) logger.debug( "{} pended received 3 phase request for a later view: " "{}".format(self, msg)) else: if self.isPrimary is None: self.postElectionMsgs.append((msg, sender)) logger.debug("Replica {} pended request {} from {}".format( self, msg, sender)) else: self.dispatchThreePhaseMsg(msg, sender) def processPrePrepare(self, pp: PrePrepare, sender: str): """ Validate and process the PRE-PREPARE specified. If validation is successful, create a PREPARE and broadcast it. :param pp: a prePrepareRequest :param sender: name of the node that sent this message """ key = (pp.viewNo, pp.ppSeqNo) logger.debug("{} Receiving PRE-PREPARE{} at {} from {}".format( self, key, time.perf_counter(), sender)) if self.canProcessPrePrepare(pp, sender): if not self.node.isParticipating: self.stashingWhileCatchingUp.add(key) self.addToPrePrepares(pp) logger.info("{} processed incoming PRE-PREPARE{}".format( self, key)) def tryPrepare(self, pp: PrePrepare): """ Try to send the Prepare message if the PrePrepare message is ready to be passed into the Prepare phase. """ if self.canSendPrepare(pp): self.doPrepare(pp) else: logger.debug("{} cannot send PREPARE".format(self)) def processPrepare(self, prepare: Prepare, sender: str) -> None: """ Validate and process the PREPARE specified. If validation is successful, create a COMMIT and broadcast it. :param prepare: a PREPARE msg :param sender: name of the node that sent the PREPARE """ # TODO move this try/except up higher logger.debug("{} received PREPARE{} from {}".format( self, (prepare.viewNo, prepare.ppSeqNo), sender)) try: if self.isValidPrepare(prepare, sender): self.addToPrepares(prepare, sender) self.stats.inc(TPCStat.PrepareRcvd) logger.debug("{} processed incoming PREPARE {}".format( self, (prepare.viewNo, prepare.ppSeqNo))) else: # TODO let's have isValidPrepare throw an exception that gets # handled and possibly logged higher logger.warning( "{} cannot process incoming PREPARE".format(self)) except SuspiciousNode as ex: self.node.reportSuspiciousNodeEx(ex) def processCommit(self, commit: Commit, sender: str) -> None: """ Validate and process the COMMIT specified. If validation is successful, return the message to the node. :param commit: an incoming COMMIT message :param sender: name of the node that sent the COMMIT """ logger.debug("{} received COMMIT {} from {}".format( self, commit, sender)) if self.isValidCommit(commit, sender): self.stats.inc(TPCStat.CommitRcvd) self.addToCommits(commit, sender) logger.debug("{} processed incoming COMMIT{}".format( self, (commit.viewNo, commit.ppSeqNo))) def tryCommit(self, prepare: Prepare): """ Try to commit if the Prepare message is ready to be passed into the commit phase. """ if self.canCommit(prepare): self.doCommit(prepare) else: logger.debug("{} not yet able to send COMMIT".format(self)) def tryOrder(self, commit: Commit): """ Try to order if the Commit message is ready to be ordered. """ canOrder, reason = self.canOrder(commit) if canOrder: logger.debug("{} returning request to node".format(self)) self.tryOrdering(commit) else: logger.trace("{} cannot return request to node: {}".format( self, reason)) def doPrePrepare(self, reqDigest: ReqDigest) -> None: """ Broadcast a PRE-PREPARE to all the replicas. :param reqDigest: a tuple with elements identifier, reqId, and digest """ if not self.node.isParticipating: logger.error("Non participating node is attempting PRE-PREPARE. " "This should not happen.") return if self.lastPrePrepareSeqNo == self.H: logger.debug("{} stashing PRE-PREPARE {} since outside greater " "than high water mark {}".format( self, (self.viewNo, self.lastPrePrepareSeqNo + 1), self.H)) self.stashingWhileOutsideWaterMarks.append(reqDigest) return self.lastPrePrepareSeqNo += 1 tm = time.time() * 1000 logger.debug("{} Sending PRE-PREPARE {} at {}".format( self, (self.viewNo, self.lastPrePrepareSeqNo), time.perf_counter())) prePrepareReq = PrePrepare(self.instId, self.viewNo, self.lastPrePrepareSeqNo, *reqDigest, tm) self.sentPrePrepares[self.viewNo, self.lastPrePrepareSeqNo] = (reqDigest.key, tm) self.send(prePrepareReq, TPCStat.PrePrepareSent) def doPrepare(self, pp: PrePrepare): logger.debug("{} Sending PREPARE {} at {}".format( self, (pp.viewNo, pp.ppSeqNo), time.perf_counter())) prepare = Prepare(self.instId, pp.viewNo, pp.ppSeqNo, pp.digest, pp.ppTime) self.send(prepare, TPCStat.PrepareSent) self.addToPrepares(prepare, self.name) def doCommit(self, p: Prepare): """ Create a commit message from the given Prepare message and trigger the commit phase :param p: the prepare message """ logger.debug("{} Sending COMMIT{} at {}".format( self, (p.viewNo, p.ppSeqNo), time.perf_counter())) commit = Commit(self.instId, p.viewNo, p.ppSeqNo, p.digest, p.ppTime) self.send(commit, TPCStat.CommitSent) self.addToCommits(commit, self.name) def canProcessPrePrepare(self, pp: PrePrepare, sender: str) -> bool: """ Decide whether this replica is eligible to process a PRE-PREPARE, based on the following criteria: - this replica is non-primary replica - the request isn't in its list of received PRE-PREPAREs - the request is waiting to for PRE-PREPARE and the digest value matches :param pp: a PRE-PREPARE msg to process :param sender: the name of the node that sent the PRE-PREPARE msg :return: True if processing is allowed, False otherwise """ # TODO: Check whether it is rejecting PRE-PREPARE from previous view # PRE-PREPARE should not be sent from non primary if not self.isMsgFromPrimary(pp, sender): raise SuspiciousNode(sender, Suspicions.PPR_FRM_NON_PRIMARY, pp) # A PRE-PREPARE is being sent to primary if self.isPrimaryForMsg(pp) is True: raise SuspiciousNode(sender, Suspicions.PPR_TO_PRIMARY, pp) # A PRE-PREPARE is sent that has already been received if (pp.viewNo, pp.ppSeqNo) in self.prePrepares: raise SuspiciousNode(sender, Suspicions.DUPLICATE_PPR_SENT, pp) key = (pp.identifier, pp.reqId) if not self.requests.isFinalised(key): self.enqueuePrePrepare(pp, sender) return False # A PRE-PREPARE is sent that does not match request digest if self.requests.digest(key) != pp.digest: raise SuspiciousNode(sender, Suspicions.PPR_DIGEST_WRONG, pp) return True def addToPrePrepares(self, pp: PrePrepare) -> None: """ Add the specified PRE-PREPARE to this replica's list of received PRE-PREPAREs. :param pp: the PRE-PREPARE to add to the list """ key = (pp.viewNo, pp.ppSeqNo) self.prePrepares[key] = \ ((pp.identifier, pp.reqId), pp.ppTime) self.dequeuePrepares(*key) self.dequeueCommits(*key) self.stats.inc(TPCStat.PrePrepareRcvd) self.tryPrepare(pp) def hasPrepared(self, request) -> bool: return self.prepares.hasPrepareFrom(request, self.name) def canSendPrepare(self, request) -> bool: """ Return whether the request identified by (identifier, requestId) can proceed to the Prepare step. :param request: any object with identifier and requestId attributes """ return self.shouldParticipate(request.viewNo, request.ppSeqNo) \ and not self.hasPrepared(request) \ and self.requests.isFinalised((request.identifier, request.reqId)) def isValidPrepare(self, prepare: Prepare, sender: str) -> bool: """ Return whether the PREPARE specified is valid. :param prepare: the PREPARE to validate :param sender: the name of the node that sent the PREPARE :return: True if PREPARE is valid, False otherwise """ key = (prepare.viewNo, prepare.ppSeqNo) primaryStatus = self.isPrimaryForMsg(prepare) ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares # If a non primary replica and receiving a PREPARE request before a # PRE-PREPARE request, then proceed # PREPARE should not be sent from primary if self.isMsgFromPrimary(prepare, sender): raise SuspiciousNode(sender, Suspicions.PR_FRM_PRIMARY, prepare) # If non primary replica if primaryStatus is False: if self.prepares.hasPrepareFrom(prepare, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT, prepare) # If PRE-PREPARE not received for the PREPARE, might be slow network if key not in ppReqs: self.enqueuePrepare(prepare, sender) return False elif prepare.digest != self.requests.digest(ppReqs[key][0]): raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG, prepare) elif prepare.ppTime != ppReqs[key][1]: raise SuspiciousNode(sender, Suspicions.PR_TIME_WRONG, prepare) else: return True # If primary replica else: if self.prepares.hasPrepareFrom(prepare, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT, prepare) # If PRE-PREPARE was not sent for this PREPARE, certainly # malicious behavior elif key not in ppReqs: raise SuspiciousNode(sender, Suspicions.UNKNOWN_PR_SENT, prepare) elif prepare.digest != self.requests.digest(ppReqs[key][0]): raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG, prepare) elif prepare.ppTime != ppReqs[key][1]: raise SuspiciousNode(sender, Suspicions.PR_TIME_WRONG, prepare) else: return True def addToPrepares(self, prepare: Prepare, sender: str): self.prepares.addVote(prepare, sender) self.tryCommit(prepare) def hasCommitted(self, request) -> bool: return self.commits.hasCommitFrom( ThreePhaseKey(request.viewNo, request.ppSeqNo), self.name) def canCommit(self, prepare: Prepare) -> bool: """ Return whether the specified PREPARE can proceed to the Commit step. Decision criteria: - If this replica has got just 2f PREPARE requests then commit request. - If less than 2f PREPARE requests then probably there's no consensus on the request; don't commit - If more than 2f then already sent COMMIT; don't commit :param prepare: the PREPARE """ return self.shouldParticipate(prepare.viewNo, prepare.ppSeqNo) and \ self.prepares.hasQuorum(prepare, self.f) and \ not self.hasCommitted(prepare) def isValidCommit(self, commit: Commit, sender: str) -> bool: """ Return whether the COMMIT specified is valid. :param commit: the COMMIT to validate :return: True if `request` is valid, False otherwise """ primaryStatus = self.isPrimaryForMsg(commit) ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares key = (commit.viewNo, commit.ppSeqNo) if key not in ppReqs: self.enqueueCommit(commit, sender) return False if (key not in self.prepares and key not in self.preparesWaitingForPrePrepare): logger.debug( "{} rejecting COMMIT{} due to lack of prepares".format( self, key)) # raise SuspiciousNode(sender, Suspicions.UNKNOWN_CM_SENT, commit) return False elif self.commits.hasCommitFrom(commit, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_CM_SENT, commit) elif commit.digest != self.getDigestFor3PhaseKey(ThreePhaseKey(*key)): raise SuspiciousNode(sender, Suspicions.CM_DIGEST_WRONG, commit) elif key in ppReqs and commit.ppTime != ppReqs[key][1]: raise SuspiciousNode(sender, Suspicions.CM_TIME_WRONG, commit) else: return True def addToCommits(self, commit: Commit, sender: str): """ Add the specified COMMIT to this replica's list of received commit requests. :param commit: the COMMIT to add to the list :param sender: the name of the node that sent the COMMIT """ self.commits.addVote(commit, sender) self.tryOrder(commit) def hasOrdered(self, viewNo, ppSeqNo) -> bool: return (viewNo, ppSeqNo) in self.ordered def canOrder(self, commit: Commit) -> Tuple[bool, Optional[str]]: """ Return whether the specified commitRequest can be returned to the node. Decision criteria: - If have got just 2f+1 Commit requests then return request to node - If less than 2f+1 of commit requests then probably don't have consensus on the request; don't return request to node - If more than 2f+1 then already returned to node; don't return request to node :param commit: the COMMIT """ if not self.commits.hasQuorum(commit, self.f): return False, "no quorum: {} commits where f is {}".\ format(commit, self.f) if self.hasOrdered(commit.viewNo, commit.ppSeqNo): return False, "already ordered" if not self.isNextInOrdering(commit): viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo if viewNo not in self.stashedCommitsForOrdering: self.stashedCommitsForOrdering[viewNo] = {} self.stashedCommitsForOrdering[viewNo][ppSeqNo] = commit self.startRepeating(self.orderStashedCommits, 2) return False, "stashing {} since out of order".\ format(commit) return True, None def isNextInOrdering(self, commit: Commit): viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo if self.ordered and self.ordered[-1] == (viewNo, ppSeqNo - 1): return True for (v, p) in self.commits: if v < viewNo: # Have commits from previous view that are unordered. # TODO: Question: would commits be always ordered, what if # some are never ordered and its fine, go to PBFT. return False if v == viewNo and p < ppSeqNo and (v, p) not in self.ordered: # If unordered commits are found with lower ppSeqNo then this # cannot be ordered. return False # TODO: Revisit PBFT paper, how to make sure that last request of the # last view has been ordered? Need change in `VIEW CHANGE` mechanism. # Somehow view change needs to communicate what the last request was. # Also what if some COMMITs were completely missed in the same view return True def orderStashedCommits(self): # TODO: What if the first few commits were out of order and stashed? # `self.ordered` would be empty if self.ordered: lastOrdered = self.ordered[-1] vToRemove = set() for v in self.stashedCommitsForOrdering: if v < lastOrdered[0] and self.stashedCommitsForOrdering[v]: raise RuntimeError( "{} found commits from previous view {}" " that were not ordered but last ordered" " is {}".format(self, v, lastOrdered)) pToRemove = set() for p, commit in self.stashedCommitsForOrdering[v].items(): if (v == lastOrdered[0] and lastOrdered == (v, p - 1)) or \ (v > lastOrdered[0] and self.isLowestCommitInView(commit)): logger.debug("{} ordering stashed commit {}".format( self, commit)) if self.tryOrdering(commit): lastOrdered = (v, p) pToRemove.add(p) for p in pToRemove: del self.stashedCommitsForOrdering[v][p] if not self.stashedCommitsForOrdering[v]: vToRemove.add(v) for v in vToRemove: del self.stashedCommitsForOrdering[v] # if self.stashedCommitsForOrdering: # self._schedule(self.orderStashedCommits, 2) if not self.stashedCommitsForOrdering: self.stopRepeating(self.orderStashedCommits) def isLowestCommitInView(self, commit): # TODO: Assumption: This assumes that at least one commit that was sent # for any request by any node has been received in the view of this # commit ppSeqNos = [] for v, p in self.commits: if v == commit.viewNo: ppSeqNos.append(p) return min(ppSeqNos) == commit.ppSeqNo if ppSeqNos else True def tryOrdering(self, commit: Commit) -> None: """ Attempt to send an ORDERED request for the specified COMMIT to the node. :param commit: the COMMIT message """ key = (commit.viewNo, commit.ppSeqNo) logger.debug("{} trying to order COMMIT{}".format(self, key)) reqKey = self.getReqKeyFrom3PhaseKey(key) # type: Tuple digest = self.getDigestFor3PhaseKey(key) if not digest: logger.error( "{} did not find digest for {}, request key {}".format( self, key, reqKey)) return self.doOrder(*key, *reqKey, digest, commit.ppTime) return True def doOrder(self, viewNo, ppSeqNo, identifier, reqId, digest, ppTime): key = (viewNo, ppSeqNo) self.addToOrdered(*key) ordered = Ordered(self.instId, viewNo, identifier, reqId, ppTime) # TODO: Should not order or add to checkpoint while syncing # 3 phase state. self.send(ordered, TPCStat.OrderSent) if key in self.stashingWhileCatchingUp: self.stashingWhileCatchingUp.remove(key) logger.debug("{} ordered request {}".format(self, (viewNo, ppSeqNo))) self.addToCheckpoint(ppSeqNo, digest) def processCheckpoint(self, msg: Checkpoint, sender: str): if self.checkpoints: seqNo = msg.seqNo _, firstChk = self.firstCheckPoint if firstChk.isStable: if firstChk.seqNo == seqNo: self.discard(msg, reason="Checkpoint already stable", logMethod=logger.debug) return if firstChk.seqNo > seqNo: self.discard(msg, reason="Higher stable checkpoint present", logMethod=logger.debug) return for state in self.checkpoints.values(): if state.seqNo == seqNo: if state.digest == msg.digest: state.receivedDigests[sender] = msg.digest break else: logger.error("{} received an incorrect digest {} for " "checkpoint {} from {}".format( self, msg.digest, seqNo, sender)) return if len(state.receivedDigests) == 2 * self.f: self.markCheckPointStable(msg.seqNo) else: self.discard(msg, reason="No checkpoints present to tally", logMethod=logger.warn) def _newCheckpointState(self, ppSeqNo, digest) -> CheckpointState: s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ - 1 logger.debug("{} adding new checkpoint state for {}".format( self, (s, e))) state = CheckpointState(ppSeqNo, [ digest, ], None, {}, False) self.checkpoints[s, e] = state return state def addToCheckpoint(self, ppSeqNo, digest): for (s, e) in self.checkpoints.keys(): if s <= ppSeqNo <= e: state = self.checkpoints[s, e] # type: CheckpointState state.digests.append(digest) state = updateNamedTuple(state, seqNo=ppSeqNo) self.checkpoints[s, e] = state break else: state = self._newCheckpointState(ppSeqNo, digest) s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ if len(state.digests) == self.config.CHK_FREQ: state = updateNamedTuple(state, digest=serialize(state.digests), digests=[]) self.checkpoints[s, e] = state self.send( Checkpoint(self.instId, self.viewNo, ppSeqNo, state.digest)) def markCheckPointStable(self, seqNo): previousCheckpoints = [] for (s, e), state in self.checkpoints.items(): if e == seqNo: state = updateNamedTuple(state, isStable=True) self.checkpoints[s, e] = state break else: previousCheckpoints.append((s, e)) else: logger.error("{} could not find {} in checkpoints".format( self, seqNo)) return self.h = seqNo for k in previousCheckpoints: logger.debug("{} removing previous checkpoint {}".format(self, k)) self.checkpoints.pop(k) self.gc(seqNo) logger.debug("{} marked stable checkpoint {}".format(self, (s, e))) self.processStashedMsgsForNewWaterMarks() def gc(self, tillSeqNo): logger.debug("{} cleaning up till {}".format(self, tillSeqNo)) tpcKeys = set() reqKeys = set() for (v, p), (reqKey, _) in self.sentPrePrepares.items(): if p <= tillSeqNo: tpcKeys.add((v, p)) reqKeys.add(reqKey) for (v, p), (reqKey, _) in self.prePrepares.items(): if p <= tillSeqNo: tpcKeys.add((v, p)) reqKeys.add(reqKey) logger.debug("{} found {} 3 phase keys to clean".format( self, len(tpcKeys))) logger.debug("{} found {} request keys to clean".format( self, len(reqKeys))) for k in tpcKeys: self.sentPrePrepares.pop(k, None) self.prePrepares.pop(k, None) self.prepares.pop(k, None) self.commits.pop(k, None) if k in self.ordered: self.ordered.remove(k) for k in reqKeys: self.requests.pop(k, None) def processStashedMsgsForNewWaterMarks(self): while self.stashingWhileOutsideWaterMarks: item = self.stashingWhileOutsideWaterMarks.pop() logger.debug("{} processing stashed item {} after new stable " "checkpoint".format(self, item)) if isinstance(item, ReqDigest): self.doPrePrepare(item) elif isinstance(item, tuple) and len(tuple) == 2: self.dispatchThreePhaseMsg(*item) else: logger.error("{} cannot process {} " "from stashingWhileOutsideWaterMarks".format( self, item)) @property def firstCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]: if not self.checkpoints: return None else: return self.checkpoints.peekitem(0) @property def lastCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]: if not self.checkpoints: return None else: return self.checkpoints.peekitem(-1) def isPpSeqNoAcceptable(self, ppSeqNo: int): return self.h < ppSeqNo <= self.H def addToOrdered(self, viewNo: int, ppSeqNo: int): self.ordered.add((viewNo, ppSeqNo)) def enqueuePrePrepare(self, request: PrePrepare, sender: str): logger.debug( "Queueing pre-prepares due to unavailability of finalised " "Request. Request {} from {}".format(request, sender)) key = (request.identifier, request.reqId) if key not in self.prePreparesPendingReqDigest: self.prePreparesPendingReqDigest[key] = [] self.prePreparesPendingReqDigest[key].append((request, sender)) def dequeuePrePrepare(self, identifier: int, reqId: int): key = (identifier, reqId) if key in self.prePreparesPendingReqDigest: pps = self.prePreparesPendingReqDigest[key] for (pp, sender) in pps: logger.debug("{} popping stashed PRE-PREPARE{}".format( self, key)) if pp.digest == self.requests.digest(key): self.prePreparesPendingReqDigest.pop(key) self.processPrePrepare(pp, sender) logger.debug( "{} processed {} PRE-PREPAREs waiting for finalised " "request for identifier {} and reqId {}".format( self, pp, identifier, reqId)) break def enqueuePrepare(self, request: Prepare, sender: str): logger.debug("Queueing prepares due to unavailability of PRE-PREPARE. " "Request {} from {}".format(request, sender)) key = (request.viewNo, request.ppSeqNo) if key not in self.preparesWaitingForPrePrepare: self.preparesWaitingForPrePrepare[key] = deque() self.preparesWaitingForPrePrepare[key].append((request, sender)) def dequeuePrepares(self, viewNo: int, ppSeqNo: int): key = (viewNo, ppSeqNo) if key in self.preparesWaitingForPrePrepare: i = 0 # Keys of pending prepares that will be processed below while self.preparesWaitingForPrePrepare[key]: prepare, sender = self.preparesWaitingForPrePrepare[ key].popleft() logger.debug("{} popping stashed PREPARE{}".format(self, key)) self.processPrepare(prepare, sender) i += 1 self.preparesWaitingForPrePrepare.pop(key) logger.debug("{} processed {} PREPAREs waiting for PRE-PREPARE for" " view no {} and seq no {}".format( self, i, viewNo, ppSeqNo)) def enqueueCommit(self, request: Commit, sender: str): logger.debug("Queueing commit due to unavailability of PREPARE. " "Request {} from {}".format(request, sender)) key = (request.viewNo, request.ppSeqNo) if key not in self.commitsWaitingForPrepare: self.commitsWaitingForPrepare[key] = deque() self.commitsWaitingForPrepare[key].append((request, sender)) def dequeueCommits(self, viewNo: int, ppSeqNo: int): key = (viewNo, ppSeqNo) if key in self.commitsWaitingForPrepare: i = 0 # Keys of pending prepares that will be processed below while self.commitsWaitingForPrepare[key]: commit, sender = self.commitsWaitingForPrepare[key].popleft() logger.debug("{} popping stashed COMMIT{}".format(self, key)) self.processCommit(commit, sender) i += 1 self.commitsWaitingForPrepare.pop(key) logger.debug("{} processed {} COMMITs waiting for PREPARE for" " view no {} and seq no {}".format( self, i, viewNo, ppSeqNo)) def getDigestFor3PhaseKey(self, key: ThreePhaseKey) -> Optional[str]: reqKey = self.getReqKeyFrom3PhaseKey(key) digest = self.requests.digest(reqKey) if not digest: logger.debug("{} could not find digest in sent or received " "PRE-PREPAREs or PREPAREs for 3 phase key {} and req " "key {}".format(self, key, reqKey)) return None else: return digest def getReqKeyFrom3PhaseKey(self, key: ThreePhaseKey): reqKey = None if key in self.sentPrePrepares: reqKey = self.sentPrePrepares[key][0] elif key in self.prePrepares: reqKey = self.prePrepares[key][0] elif key in self.prepares: reqKey = self.prepares[key][0] else: logger.debug( "Could not find request key for 3 phase key {}".format(key)) return reqKey @property def threePhaseState(self): # TODO: This method is incomplete # Gets the current stable and unstable checkpoints and creates digest # of unstable checkpoints if self.checkpoints: pass else: state = [] return ThreePCState(self.instId, state) def process3PhaseState(self, msg: ThreePCState, sender: str): # TODO: This is not complete pass def send(self, msg, stat=None) -> None: """ Send a message to the node on which this replica resides. :param msg: the message to send """ logger.display("{} sending {}".format(self, msg.__class__.__name__), extra={"cli": True}) logger.trace("{} sending {}".format(self, msg)) if stat: self.stats.inc(stat) self.outBox.append(msg)
class Superplot(): """ Self-contained plotting class that runs in its own process. Plotting functionality (reset the graph, .. ?) can be controlled by issuing message-based commands using a multiprocessing Pipe """ def __init__(self,name,plottype=PlotType.indexed): self.name = name self.plottype = plottype self._clear() def _clear(self): # Process-local buffers used to host the displayed data if self.plottype == PlotType.linear: self.set = True self.x = [] self.y = [] else: self.xy = SortedDict() # TODO : use this optimization, but for now raises issue # Can't pickle dict_key views ?? #self.x = self.xy.keys() #self.y = self.xy.values() self.set = False def start(self): # The queue that will be used to transfer data from the main process # to the plot self.q = Queue() main_pipe, self.in_process_pipe = Pipe() self.p = Process(target=self.run) self.p.start() # Return a handle to the data queue and the control pipe return self.q, main_pipe def join(self): self.p.join() def _update(self): # Empty data queue and process received data while not self.q.empty(): item = self.q.get() if self.plottype == PlotType.linear: self.x.append(item[0]) self.y.append(item[1]) else: # Seems pretty slow, # TODO : Profile # TODO : Eventually, need to find high performance alternative. Maybe numpy based self.xy[item[0]] = item[1] # Initialize view on data dictionnary only once for increased performance if not self.set: self.set = True self.x = self.xy.keys() self.y = self.xy.values() # Refresh plot data self.curve.setData(self.x,self.y) try: if self.in_process_pipe.poll(): msg = self.in_process_pipe.recv() self._process_msg(msg) except: # If the polling failed, then the application most likely shut down # So close the window and terminate as well self.app.quit() def _process_msg(self, msg): if msg == "exit": # TODO : Remove this line ? Redundant with send after app.exec_() ? self.in_process_pipe.send("closing") self.app.quit() elif msg == "clear": self._clear() def run(self): self.app = QtGui.QApplication([]) win = pg.GraphicsWindow(title="Basic plotting examples") win.resize(1000,600) win.setWindowTitle('pyqtgraph example: Plotting') plot = win.addPlot(title=self.name) self.curve = plot.plot(pen='y') timer = QtCore.QTimer() timer.timeout.connect(self._update) timer.start(50) self.app.exec_() try: self.in_process_pipe.send("closing") except: pass
def read_swans( fileglob, ndays=None, int_freq=True, int_dir=False, dirorder=True, ntimes=None ): """Read multiple SWAN ASCII files into single Dataset. Args: - fileglob (str, list): glob pattern specifying files to read. - ndays (float): number of days to keep from each file, choose None to keep entire period. - int_freq (ndarray, bool): frequency array for interpolating onto: - ndarray: 1d array specifying frequencies to interpolate onto. - True: logarithm array is constructed such that fmin=0.0418 Hz, fmax=0.71856 Hz, df=0.1f. - False: No interpolation performed in frequency space. - int_dir (ndarray, bool): direction array for interpolating onto: - ndarray: 1d array specifying directions to interpolate onto. - True: circular array is constructed such that dd=10 degrees. - False: No interpolation performed in direction space. - dirorder (bool): if True ensures directions are sorted. - ntimes (int): use it to read only specific number of times, useful for checking headers only. Returns: - dset (SpecDataset): spectra dataset object read from file with different sites and cycles concatenated along the 'site' and 'time' dimensions. Note: - If multiple cycles are provided, 'time' coordinate is replaced by 'cycletime' multi-index coordinate. - If more than one cycle is prescribed from fileglob, each cycle must have same number of sites. - Either all or none of the spectra in fileglob must have tabfile associated to provide wind/depth data. - Concatenation is done with numpy arrays for efficiency. """ swans = ( sorted(fileglob) if isinstance(fileglob, list) else sorted(glob.glob(fileglob)) ) assert swans, "No SWAN file identified with fileglob %s" % (fileglob) # Default spectral basis for interpolating if int_freq is True: int_freq = [0.04118 * 1.1 ** n for n in range(31)] elif int_freq is False: int_freq = None if int_dir is True: int_dir = np.arange(0, 360, 10) elif int_dir is False: int_dir = None cycles = list() dsets = SortedDict() tabs = SortedDict() all_times = list() all_sites = SortedDict() all_lons = SortedDict() all_lats = SortedDict() deps = SortedDict() wspds = SortedDict() wdirs = SortedDict() for filename in swans: swanfile = SwanSpecFile(filename, dirorder=dirorder) times = swanfile.times lons = list(swanfile.x) lats = list(swanfile.y) sites = ( [os.path.splitext(os.path.basename(filename))[0]] if len(lons) == 1 else np.arange(len(lons)) + 1 ) freqs = swanfile.freqs dirs = swanfile.dirs if ntimes is None: spec_list = [s for s in swanfile.readall()] else: spec_list = [swanfile.read() for itime in range(ntimes)] # Read tab files for winds / depth if swanfile.is_tab: try: tab = read_tab(swanfile.tabfile).rename(columns={"dep": attrs.DEPNAME}) if len(swanfile.times) == tab.index.size: if "X-wsp" in tab and "Y-wsp" in tab: tab[attrs.WSPDNAME], tab[attrs.WDIRNAME] = uv_to_spddir( tab["X-wsp"], tab["Y-wsp"], coming_from=True ) else: warnings.warn( "Times in {} and {} not consistent, not appending " "winds and depth".format(swanfile.filename, swanfile.tabfile) ) tab = pd.DataFrame() tab = tab[ list( set(tab.columns).intersection( (attrs.DEPNAME, attrs.WSPDNAME, attrs.WDIRNAME) ) ) ] except Exception as exc: warnings.warn( "Cannot parse depth and winds from {}:\n{}".format( swanfile.tabfile, exc ) ) else: tab = pd.DataFrame() # Shrinking times if ndays is not None: tend = times[0] + datetime.timedelta(days=ndays) if tend > times[-1]: raise IOError( "Times in %s does not extend for %0.2f days" % (filename, ndays) ) iend = times.index(min(times, key=lambda d: abs(d - tend))) times = times[0 : iend + 1] spec_list = spec_list[0 : iend + 1] tab = tab.loc[times[0] : tend] if tab is not None else tab spec_list = flatten_list(spec_list, []) # Interpolate spectra if int_freq is not None or int_dir is not None: spec_list = [ interp_spec(spec, freqs, dirs, int_freq, int_dir) for spec in spec_list ] freqs = int_freq if int_freq is not None else freqs dirs = int_dir if int_dir is not None else dirs # Appending try: arr = np.array(spec_list).reshape( len(times), len(sites), len(freqs), len(dirs) ) cycle = times[0] if cycle not in dsets: dsets[cycle] = [arr] tabs[cycle] = [tab] all_sites[cycle] = sites all_lons[cycle] = lons all_lats[cycle] = lats all_times.append(times) nsites = 1 else: dsets[cycle].append(arr) tabs[cycle].append(tab) all_sites[cycle].extend(sites) all_lons[cycle].extend(lons) all_lats[cycle].extend(lats) nsites += 1 except Exception: if len(spec_list) != arr.shape[0]: raise IOError( "Time length in %s (%i) does not match previous files (%i), " "cannot concatenate", (filename, len(spec_list), arr.shape[0]), ) else: raise swanfile.close() cycles = dsets.keys() # Ensuring sites are consistent across cycles sites = all_sites[cycle] lons = all_lons[cycle] lats = all_lats[cycle] for site, lon, lat in zip(all_sites.values(), all_lons.values(), all_lats.values()): if ( (list(site) != list(sites)) or (list(lon) != list(lons)) or (list(lat) != list(lats)) ): raise IOError("Inconsistent sites across cycles in glob pattern provided") # Ensuring consistent tabs cols = set( [ frozenset(tabs[cycle][n].columns) for cycle in cycles for n in range(len(tabs[cycle])) ] ) if len(cols) > 1: raise IOError( "Inconsistent tab files, ensure either all or none of the spectra have " "associated tabfiles and columns are consistent" ) # Concat sites for cycle in cycles: dsets[cycle] = np.concatenate(dsets[cycle], axis=1) deps[cycle] = ( np.vstack([tab[attrs.DEPNAME].values for tab in tabs[cycle]]).T if attrs.DEPNAME in tabs[cycle][0] else None ) wspds[cycle] = ( np.vstack([tab[attrs.WSPDNAME].values for tab in tabs[cycle]]).T if attrs.WSPDNAME in tabs[cycle][0] else None ) wdirs[cycle] = ( np.vstack([tab[attrs.WDIRNAME].values for tab in tabs[cycle]]).T if attrs.WDIRNAME in tabs[cycle][0] else None ) time_sizes = [dsets[cycle].shape[0] for cycle in cycles] # Concat cycles if len(dsets) > 1: dsets = np.concatenate(dsets.values(), axis=0) deps = ( np.concatenate(deps.values(), axis=0) if attrs.DEPNAME in tabs[cycle][0] else None ) wspds = ( np.concatenate(wspds.values(), axis=0) if attrs.WSPDNAME in tabs[cycle][0] else None ) wdirs = ( np.concatenate(wdirs.values(), axis=0) if attrs.WDIRNAME in tabs[cycle][0] else None ) else: dsets = dsets[cycle] deps = deps[cycle] if attrs.DEPNAME in tabs[cycle][0] else None wspds = wspds[cycle] if attrs.WSPDNAME in tabs[cycle][0] else None wdirs = wdirs[cycle] if attrs.WDIRNAME in tabs[cycle][0] else None # Creating dataset times = flatten_list(all_times, []) dsets = xr.DataArray( data=dsets, coords=OrderedDict( ( (attrs.TIMENAME, times), (attrs.SITENAME, sites), (attrs.FREQNAME, freqs), (attrs.DIRNAME, dirs), ) ), dims=(attrs.TIMENAME, attrs.SITENAME, attrs.FREQNAME, attrs.DIRNAME), name=attrs.SPECNAME, ).to_dataset() dsets[attrs.LATNAME] = xr.DataArray( data=lats, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME] ) dsets[attrs.LONNAME] = xr.DataArray( data=lons, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME] ) if wspds is not None: dsets[attrs.WSPDNAME] = xr.DataArray( data=wspds, dims=[attrs.TIMENAME, attrs.SITENAME], coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites))), ) dsets[attrs.WDIRNAME] = xr.DataArray( data=wdirs, dims=[attrs.TIMENAME, attrs.SITENAME], coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites))), ) if deps is not None: dsets[attrs.DEPNAME] = xr.DataArray( data=deps, dims=[attrs.TIMENAME, attrs.SITENAME], coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites))), ) # Setting multi-index if len(cycles) > 1: dsets = dsets.rename({attrs.TIMENAME: "cycletime"}) cycletime = zip( [ item for sublist in [[c] * t for c, t in zip(cycles, time_sizes)] for item in sublist ], dsets.cycletime.values, ) dsets["cycletime"] = pd.MultiIndex.from_tuples( cycletime, names=[attrs.CYCLENAME, attrs.TIMENAME] ) dsets["cycletime"].attrs = attrs.ATTRS[attrs.TIMENAME] set_spec_attributes(dsets) if "dir" in dsets and len(dsets.dir) > 1: dsets[attrs.SPECNAME].attrs.update( {"_units": "m^{2}.s.degree^{-1}", "_variable_name": "VaDens"} ) else: dsets[attrs.SPECNAME].attrs.update( {"units": "m^{2}.s", "_units": "m^{2}.s", "_variable_name": "VaDens"} ) return dsets
class DownloadTask(QObject): download_ready = Signal(QObject) download_not_ready = Signal(QObject) download_complete = Signal(QObject) download_failed = Signal(QObject) download_error = Signal(str) download_ok = Signal() download_finishing = Signal() copy_added = Signal(str) chunk_downloaded = Signal( str, # obj_id str, # str(offset) to fix offset >= 2**31 int) # length chunk_aborted = Signal() request_data = Signal( str, # node_id str, # obj_id str, # str(offset) to fix offset >= 2**31 int) # length abort_data = Signal( str, # node_id str, # obj_id str) # str(offset) to fix offset >= 2**31 possibly_sync_folder_is_removed = Signal() no_disk_space = Signal( QObject, # task str, # display_name bool) # is error wrong_hash = Signal(QObject) # task) signal_info_rx = Signal(tuple) default_part_size = DOWNLOAD_PART_SIZE receive_timeout = 20 # seconds retry_limit = 2 timeouts_limit = 2 max_node_chunk_requests = 128 end_race_timeout = 5. # seconds def __init__(self, tracker, connectivity_service, priority, obj_id, obj_size, file_path, display_name, file_hash=None, parent=None, files_info=None): QObject.__init__(self, parent=parent) self._tracker = tracker self._connectivity_service = connectivity_service self.priority = priority self.size = obj_size self.id = obj_id self.file_path = file_path self.file_hash = file_hash self.download_path = file_path + '.download' self._info_path = file_path + '.info' self.display_name = display_name self.received = 0 self.files_info = files_info self.hash_is_wrong = False self._ready = False self._started = False self._paused = False self._finished = False self._no_disk_space_error = False self._wanted_chunks = SortedDict() self._downloaded_chunks = SortedDict() self._nodes_available_chunks = dict() self._nodes_requested_chunks = dict() self._nodes_last_receive_time = dict() self._nodes_downloaded_chunks_count = dict() self._nodes_timeouts_count = dict() self._total_chunks_count = 0 self._file = None self._info_file = None self._started_time = time() self._took_from_turn = 0 self._received_via_turn = 0 self._received_via_p2p = 0 self._retry = 0 self._limiter = None self._init_wanted_chunks() self._on_downloaded_cb = None self._on_failed_cb = None self.download_complete.connect(self._on_downloaded) self.download_failed.connect(self._on_failed) self._timeout_timer = QTimer(self) self._timeout_timer.setInterval(15 * 1000) self._timeout_timer.setSingleShot(False) self._timeout_timer.timeout.connect(self._on_check_timeouts) self._leaky_timer = QTimer(self) self._leaky_timer.setInterval(1000) self._leaky_timer.setSingleShot(True) self._leaky_timer.timeout.connect(self._download_chunks) self._network_limited_error_set = False def __lt__(self, other): if not isinstance(other, DownloadTask): return object.__lt__(self, other) if self == other: return False if self.priority == other.priority: if self.size - self.received == other.size - other.received: return self.id < other.id return self.size - self.received < other.size - other.received return self.priority > other.priority def __le__(self, other): if not isinstance(other, DownloadTask): return object.__le__(self, other) if self == other: return True if self.priority == other.priority: if self.size - self.received == other.size - other.received: return self.id < other.id return self.size - self.received < other.size - other.received return self.priority >= other.priority def __gt__(self, other): if not isinstance(other, DownloadTask): return object.__gt__(self, other) if self == other: return False if self.priority == other.priority: if self.size - self.received == other.size - other.received: return self.id > other.id return self.size - self.received > other.size - other.received return self.priority <= other.priority def __ge__(self, other): if not isinstance(other, DownloadTask): return object.__ge__(self, other) if self == other: return True if self.priority == other.priority: if self.size - self.received == other.size - other.received: return self.id > other.id return self.size - self.received > other.size - other.received return self.priority <= other.priority def __eq__(self, other): if not isinstance(other, DownloadTask): return object.__eq__(self, other) return self.id == other.id def on_availability_info_received(self, node_id, obj_id, info): if obj_id != self.id or self._finished or not info: return logger.info( "availability info received, " "node_id: %s, obj_id: %s, info: %s", node_id, obj_id, info) new_chunks_stored = self._store_availability_info(node_id, info) if not self._ready and new_chunks_stored: if self._check_can_receive(node_id): self._ready = True self.download_ready.emit(self) else: self.download_error.emit('Turn limit reached') if self._started and not self._paused \ and not self._nodes_requested_chunks.get(node_id, None): logger.debug("Downloading next chunk") self._download_next_chunks(node_id) self._clean_nodes_last_receive_time() self._check_download_not_ready(self._nodes_requested_chunks) def on_availability_info_failure(self, node_id, obj_id, error): if obj_id != self.id or self._finished: return logger.info( "availability info failure, " "node_id: %s, obj_id: %s, error: %s", node_id, obj_id, error) try: if error["err_code"] == "FILE_CHANGED": self.download_failed.emit(self) except Exception as e: logger.warning("Can't parse error message. Reson: %s", e) def start(self, limiter): if exists(self.file_path): logger.info("download task file already downloaded %s", self.file_path) self.received = self.size self.download_finishing.emit() self.download_complete.emit(self) return self._limiter = limiter if self._started: # if we swapped task earlier self.resume() return self._no_disk_space_error = False if not self.check_disk_space(): return logger.info("starting download task, obj_id: %s", self.id) self._started = True self._paused = False self.hash_is_wrong = False self._started_time = time() self._send_start_statistic() if not self._open_file(): return self._read_info_file() for downloaded_chunk in self._downloaded_chunks.items(): self._remove_from_chunks(downloaded_chunk[0], downloaded_chunk[1], self._wanted_chunks) self.received = sum(self._downloaded_chunks.values()) if self._complete_download(): return self._download_chunks() if not self._timeout_timer.isActive(): self._timeout_timer.start() def check_disk_space(self): if self.size * 2 + get_signature_file_size(self.size) > \ get_free_space_by_filepath(self.file_path): self._emit_no_disk_space() return False return True def pause(self, disconnect_cb=True): self._paused = True if disconnect_cb: self.disconnect_callbacks() self.stop_download_chunks() def resume(self, start_download=True): self._started_time = time() self._paused = False self.hash_is_wrong = False if start_download: self._started = True self._download_chunks() if not self._timeout_timer.isActive(): self._timeout_timer.start() def cancel(self): self._close_file() self._close_info_file() self.stop_download_chunks() self._finished = True def clean(self): logger.debug("Cleaning download files %s", self.download_path) try: remove_file(self.download_path) except: pass try: remove_file(self._info_path) except: pass def connect_callbacks(self, on_downloaded, on_failed): self._on_downloaded_cb = on_downloaded self._on_failed_cb = on_failed def disconnect_callbacks(self): self._on_downloaded_cb = None self._on_failed_cb = None @property def ready(self): return self._ready @property def paused(self): return self._paused @property def no_disk_space_error(self): return self._no_disk_space_error def _init_wanted_chunks(self): self._total_chunks_count = math.ceil( float(self.size) / float(DOWNLOAD_CHUNK_SIZE)) self._wanted_chunks[0] = self.size def _on_downloaded(self, task): if callable(self._on_downloaded_cb): self._on_downloaded_cb(task) self._on_downloaded_cb = None def _on_failed(self, task): if callable(self._on_failed_cb): self._on_failed_cb(task) self._on_failed_cb = None def on_data_received(self, node_id, obj_id, offset, length, data): if obj_id != self.id or self._finished: return logger.debug( "on_data_received for objId: %s, offset: %s, from node_id: %s", self.id, offset, node_id) now = time() last_received_time = self._nodes_last_receive_time.get(node_id, 0.) if node_id in self._nodes_last_receive_time: self._nodes_last_receive_time[node_id] = now self._nodes_timeouts_count.pop(node_id, 0) downloaded_count = \ self._nodes_downloaded_chunks_count.get(node_id, 0) + 1 self._nodes_downloaded_chunks_count[node_id] = downloaded_count # to collect traffic info node_type = self._connectivity_service.get_self_node_type() is_share = node_type == "webshare" # tuple -> (obj_id, rx_wd, rx_wr, is_share) if self._connectivity_service.is_relayed(node_id): # relayed traffic info_rx = (obj_id, 0, length, is_share) else: # p2p traffic info_rx = (obj_id, length, 0, is_share) self.signal_info_rx.emit(info_rx) if not self._is_chunk_already_downloaded(offset): if not self._on_new_chunk_downloaded(node_id, offset, length, data): return else: logger.debug("chunk %s already downloaded", offset) requested_chunks = self._nodes_requested_chunks.get( node_id, SortedDict()) if not requested_chunks: return self._remove_from_chunks(offset, length, requested_chunks) if not requested_chunks: self._nodes_requested_chunks.pop(node_id, None) requested_count = sum(requested_chunks.values()) // DOWNLOAD_CHUNK_SIZE if downloaded_count * 4 >= requested_count \ and requested_count < self.max_node_chunk_requests: self._download_next_chunks(node_id, now - last_received_time) self._clean_nodes_last_receive_time() self._check_download_not_ready(self._nodes_requested_chunks) def _is_chunk_already_downloaded(self, offset): if self._downloaded_chunks: chunk_index = self._downloaded_chunks.bisect_right(offset) if chunk_index > 0: chunk_index -= 1 chunk = self._downloaded_chunks.peekitem(chunk_index) if offset < chunk[0] + chunk[1]: return True return False def _on_new_chunk_downloaded(self, node_id, offset, length, data): if not self._write_to_file(offset, data): return False self.received += length if self._connectivity_service.is_relayed(node_id): self._received_via_turn += length else: self._received_via_p2p += length new_offset = offset new_length = length left_index = self._downloaded_chunks.bisect_right(new_offset) if left_index > 0: left_chunk = self._downloaded_chunks.peekitem(left_index - 1) if left_chunk[0] + left_chunk[1] == new_offset: new_offset = left_chunk[0] new_length += left_chunk[1] self._downloaded_chunks.popitem(left_index - 1) right_index = self._downloaded_chunks.bisect_right(new_offset + new_length) if right_index > 0: right_chunk = self._downloaded_chunks.peekitem(right_index - 1) if right_chunk[0] == new_offset + new_length: new_length += right_chunk[1] self._downloaded_chunks.popitem(right_index - 1) self._downloaded_chunks[new_offset] = new_length assert self._remove_from_chunks(offset, length, self._wanted_chunks) logger.debug("new chunk downloaded from node: %s, wanted size: %s", node_id, sum(self._wanted_chunks.values())) part_offset = (offset / DOWNLOAD_PART_SIZE) * DOWNLOAD_PART_SIZE part_size = min([DOWNLOAD_PART_SIZE, self.size - part_offset]) if new_offset <= part_offset \ and new_offset + new_length >= part_offset + part_size: if self._file: self._file.flush() self._write_info_file() self.chunk_downloaded.emit(self.id, str(part_offset), part_size) if self._complete_download(): return False return True def _remove_from_chunks(self, offset, length, chunks): if not chunks: return False chunk_left_index = chunks.bisect_right(offset) if chunk_left_index > 0: left_chunk = chunks.peekitem(chunk_left_index - 1) if offset >= left_chunk[0] + left_chunk[1] \ and len(chunks) > chunk_left_index: left_chunk = chunks.peekitem(chunk_left_index) else: chunk_left_index -= 1 else: left_chunk = chunks.peekitem(chunk_left_index) if offset >= left_chunk[0] + left_chunk[1] or \ offset + length <= left_chunk[0]: return False chunk_right_index = chunks.bisect_right(offset + length) right_chunk = chunks.peekitem(chunk_right_index - 1) if chunk_right_index == chunk_left_index: to_del = [right_chunk[0]] else: to_del = list(chunks.islice(chunk_left_index, chunk_right_index)) for chunk in to_del: chunks.pop(chunk) if left_chunk[0] < offset: if left_chunk[0] + left_chunk[1] >= offset: chunks[left_chunk[0]] = offset - left_chunk[0] if right_chunk[0] + right_chunk[1] > offset + length: chunks[offset + length] = \ right_chunk[0] + right_chunk[1] - offset - length return True def on_data_failed(self, node_id, obj_id, offset, error): if obj_id != self.id or self._finished: return logger.info( "data request failure, " "node_id: %s, obj_id: %s, offset: %s, error: %s", node_id, obj_id, offset, error) self.on_node_disconnected(node_id) def get_downloaded_chunks(self): if not self._downloaded_chunks: return None return self._downloaded_chunks def on_node_disconnected(self, node_id, connection_alive=False, timeout_limit_exceed=True): requested_chunks = self._nodes_requested_chunks.pop(node_id, None) logger.info("node disconnected %s, chunks removed from requested: %s", node_id, requested_chunks) if timeout_limit_exceed: self._nodes_available_chunks.pop(node_id, None) self._nodes_timeouts_count.pop(node_id, None) if connection_alive: self._connectivity_service.reconnect(node_id) self._nodes_last_receive_time.pop(node_id, None) self._nodes_downloaded_chunks_count.pop(node_id, None) if connection_alive: self.abort_data.emit(node_id, self.id, None) if self._nodes_available_chunks: self._download_chunks(check_node_busy=True) else: chunks_to_test = self._nodes_requested_chunks \ if self._started and not self._paused \ else self._nodes_available_chunks self._check_download_not_ready(chunks_to_test) def complete(self): if self._started and not self._finished: self._complete_download(force_complete=True) elif not self._finished: self._finished = True self.clean() self.download_complete.emit(self) def _download_chunks(self, check_node_busy=False): if not self._started or self._paused or self._finished: return logger.debug("download_chunks for %s", self.id) node_ids = list(self._nodes_available_chunks.keys()) random.shuffle(node_ids) for node_id in node_ids: node_free = not check_node_busy or \ not self._nodes_requested_chunks.get(node_id, None) if node_free: self._download_next_chunks(node_id) self._clean_nodes_last_receive_time() self._check_download_not_ready(self._nodes_requested_chunks) def _check_can_receive(self, node_id): return True def _write_to_file(self, offset, data): self._file.seek(offset) try: self._file.write(data) except EnvironmentError as e: logger.error("Download task %s can't write to file. Reason: %s", self.id, e) self._send_error_statistic() if e.errno == errno.ENOSPC: self._emit_no_disk_space(error=True) else: self.download_failed.emit(self) self.possibly_sync_folder_is_removed.emit() return False return True def _open_file(self, clean=False): if not self._file or self._file.closed: try: if clean: self._file = open(self.download_path, 'wb') else: self._file = open(self.download_path, 'r+b') except IOError: try: self._file = open(self.download_path, 'wb') except IOError as e: logger.error( "Can't open file for download for task %s. " "Reason: %s", self.id, e) self.download_failed.emit(self) return False return True def _close_file(self): if not self._file: return True try: self._file.close() except EnvironmentError as e: logger.error("Download task %s can't close file. Reason: %s", self.id, e) self._send_error_statistic() if e.errno == errno.ENOSPC: self._emit_no_disk_space(error=True) else: self.download_failed.emit(self) self.possibly_sync_folder_is_removed.emit() self._file = None return False self._file = None return True def _write_info_file(self): try: self._info_file.seek(0) self._info_file.truncate() pickle.dump(self._downloaded_chunks, self._info_file, pickle.HIGHEST_PROTOCOL) self._info_file.flush() except EnvironmentError as e: logger.debug("Can't write to info file for task id %s. Reason: %s", self.id, e) def _read_info_file(self): try: if not self._info_file or self._info_file.closed: self._info_file = open(self._info_path, 'a+b') self._info_file.seek(0) try: self._downloaded_chunks = pickle.load(self._info_file) except: pass except EnvironmentError as e: logger.debug("Can't open info file for task id %s. Reason: %s", self.id, e) def _close_info_file(self, to_remove=False): if not self._info_file: return try: self._info_file.close() if to_remove: remove_file(self._info_path) except Exception as e: logger.debug( "Can't close or remove info file " "for task id %s. Reason: %s", self.id, e) self._info_file = None def _complete_download(self, force_complete=False): if (not self._wanted_chunks or force_complete) and \ not self._finished: logger.debug("download %s completed", self.id) self._nodes_requested_chunks.clear() for node_id in self._nodes_last_receive_time.keys(): self.abort_data.emit(node_id, self.id, None) if not force_complete: self.download_finishing.emit() if not force_complete and self.file_hash: hash_check_result = self._check_file_hash() if hash_check_result is not None: return hash_check_result self._started = False self._finished = True self.stop_download_chunks() self._close_info_file(to_remove=True) if not self._close_file(): return False try: if force_complete: remove_file(self.download_path) self.download_complete.emit(self) else: shutil.move(self.download_path, self.file_path) self._send_end_statistic() self.download_complete.emit(self) if self.file_hash: self.copy_added.emit(self.file_hash) except EnvironmentError as e: logger.error( "Download task %s can't (re)move file. " "Reason: %s", self.id, e) self._send_error_statistic() self.download_failed.emit(self) self.possibly_sync_folder_is_removed.emit() return False result = True else: result = not self._wanted_chunks return result def _check_file_hash(self): self._file.flush() try: hash = Rsync.hash_from_block_checksum( Rsync.block_checksum(self.download_path)) except IOError as e: logger.error("download %s error: %s", self.id, e) hash = None if hash != self.file_hash: logger.error( "download hash check failed objId: %s, " "expected hash: %s, actual hash: %s", self.id, self.file_hash, hash) if not self._close_file() or not self._open_file(clean=True): return False self._downloaded_chunks.clear() self._nodes_downloaded_chunks_count.clear() self._nodes_last_receive_time.clear() self._nodes_timeouts_count.clear() self._write_info_file() self._init_wanted_chunks() self.received = 0 if self._retry < self.retry_limit: self._retry += 1 self.resume() else: self._retry = 0 self._nodes_available_chunks.clear() self.hash_is_wrong = True self.wrong_hash.emit(self) return True return None def _download_next_chunks(self, node_id, time_from_last_received_chunk=0.): if (self._paused or not self._started or not self._ready or self._finished or not self._wanted_chunks or self._leaky_timer.isActive()): return total_requested = sum( map(lambda x: sum(x.values()), self._nodes_requested_chunks.values())) if total_requested + self.received >= self.size: if self._nodes_requested_chunks.get(node_id, None) and \ time_from_last_received_chunk <= self.end_race_timeout: return available_chunks = \ self._get_end_race_chunks_to_download_from_node(node_id) else: available_chunks = \ self._get_available_chunks_to_download_from_node(node_id) if not available_chunks: logger.debug("no chunks available for download %s", self.id) logger.debug("downloading from: %s nodes, length: %s, wanted: %s", len(self._nodes_requested_chunks), total_requested, self.size - self.received) return available_offset = random.sample(available_chunks.keys(), 1)[0] available_length = available_chunks[available_offset] logger.debug("selected random offset: %s", available_offset) parts_count = math.ceil( float(available_length) / float(DOWNLOAD_PART_SIZE)) - 1 logger.debug("parts count: %s", parts_count) part_to_download_number = random.randint(0, parts_count) offset = available_offset + \ part_to_download_number * DOWNLOAD_PART_SIZE length = min(DOWNLOAD_PART_SIZE, available_offset + available_length - offset) logger.debug("selected random part: %s, offset: %s, length: %s", part_to_download_number, offset, length) self._request_data(node_id, offset, length) def _get_end_race_chunks_to_download_from_node(self, node_id): available_chunks = self._nodes_available_chunks.get(node_id, None) if not available_chunks: return [] available_chunks = available_chunks.copy() logger.debug("end race downloaded_chunks: %s", self._downloaded_chunks) logger.debug("end race requested_chunks: %s", self._nodes_requested_chunks) logger.debug("end race available_chunks before excludes: %s", available_chunks) if self._downloaded_chunks: for downloaded_chunk in self._downloaded_chunks.items(): self._remove_from_chunks(downloaded_chunk[0], downloaded_chunk[1], available_chunks) if not available_chunks: return [] available_from_other_nodes = available_chunks.copy() for requested_offset, requested_length in \ self._nodes_requested_chunks.get(node_id, dict()).items(): self._remove_from_chunks(requested_offset, requested_length, available_from_other_nodes) result = available_from_other_nodes if available_from_other_nodes \ else available_chunks if result: logger.debug("end race available_chunks after excludes: %s", available_chunks) return result def _get_available_chunks_to_download_from_node(self, node_id): available_chunks = self._nodes_available_chunks.get(node_id, None) if not available_chunks: return [] available_chunks = available_chunks.copy() logger.debug("downloaded_chunks: %s", self._downloaded_chunks) logger.debug("requested_chunks: %s", self._nodes_requested_chunks) logger.debug("available_chunks before excludes: %s", available_chunks) for _, requested_chunks in self._nodes_requested_chunks.items(): for requested_offset, requested_length in requested_chunks.items(): self._remove_from_chunks(requested_offset, requested_length, available_chunks) if not available_chunks: return [] for downloaded_chunk in self._downloaded_chunks.items(): self._remove_from_chunks(downloaded_chunk[0], downloaded_chunk[1], available_chunks) logger.debug("available_chunks after excludes: %s", available_chunks) return available_chunks def _request_data(self, node_id, offset, length): logger.debug("Requesting date from node %s, request_chunk (%s, %s)", node_id, offset, length) if self._limiter: try: self._limiter.leak(length) except LeakyBucketException: if node_id not in self._nodes_requested_chunks: self._nodes_last_receive_time.pop(node_id, None) if not self._network_limited_error_set: self.download_error.emit('Network limited.') self._network_limited_error_set = True if not self._leaky_timer.isActive(): self._leaky_timer.start() return if self._network_limited_error_set: self._network_limited_error_set = False self.download_ok.emit() requested_chunks = self._nodes_requested_chunks.get(node_id, None) if not requested_chunks: requested_chunks = SortedDict() self._nodes_requested_chunks[node_id] = requested_chunks requested_chunks[offset] = length logger.debug("Requested chunks %s", requested_chunks) self._nodes_last_receive_time[node_id] = time() self.request_data.emit(node_id, self.id, str(offset), length) def _clean_nodes_last_receive_time(self): for node_id in list(self._nodes_last_receive_time.keys()): if node_id not in self._nodes_requested_chunks: self._nodes_last_receive_time.pop(node_id, None) def _on_check_timeouts(self): if self._paused or not self._started \ or self._finished or self._leaky_timer.isActive(): return timed_out_nodes = set() cur_time = time() logger.debug("Chunk requests check %s", len(self._nodes_requested_chunks)) if self._check_download_not_ready(self._nodes_requested_chunks): return for node_id in self._nodes_last_receive_time: last_receive_time = self._nodes_last_receive_time.get(node_id) if cur_time - last_receive_time > self.receive_timeout: timed_out_nodes.add(node_id) logger.debug("Timed out nodes %s, nodes last receive time %s", timed_out_nodes, self._nodes_last_receive_time) for node_id in timed_out_nodes: timeout_count = self._nodes_timeouts_count.pop(node_id, 0) timeout_count += 1 if timeout_count >= self.timeouts_limit: retry = False else: retry = True self._nodes_timeouts_count[node_id] = timeout_count logger.debug("Node if %s, timeout_count %s, retry %s", node_id, timeout_count, retry) self.on_node_disconnected(node_id, connection_alive=True, timeout_limit_exceed=not retry) def _get_chunks_from_info(self, chunks, info): new_added = False for part_info in info: logger.debug("get_chunks_from_info part_info %s", part_info) if part_info.length == 0: continue if not chunks: chunks[part_info.offset] = part_info.length new_added = True continue result_offset = part_info.offset result_length = part_info.length left_index = chunks.bisect_right(part_info.offset) if left_index > 0: left_chunk = chunks.peekitem(left_index - 1) if (left_chunk[0] <= part_info.offset and left_chunk[0] + left_chunk[1] >= part_info.offset + part_info.length): continue if part_info.offset <= left_chunk[0] + left_chunk[1]: result_offset = left_chunk[0] result_length = part_info.offset + \ part_info.length - result_offset left_index -= 1 right_index = chunks.bisect_right(part_info.offset + part_info.length) if right_index > 0: right_chunk = chunks.peekitem(right_index - 1) if part_info.offset + part_info.length <= \ right_chunk[0] + right_chunk[1]: result_length = right_chunk[0] + \ right_chunk[1] - result_offset to_delete = list(chunks.islice(left_index, right_index)) for to_del in to_delete: chunks.pop(to_del) new_added = True chunks[result_offset] = result_length return new_added def _store_availability_info(self, node_id, info): known_chunks = self._nodes_available_chunks.get(node_id, None) if not known_chunks: known_chunks = SortedDict() self._nodes_available_chunks[node_id] = known_chunks return self._get_chunks_from_info(known_chunks, info) def _check_download_not_ready(self, checkable): if not self._wanted_chunks and self._started: self._complete_download(force_complete=False) return False if self._leaky_timer.isActive(): if not self._nodes_available_chunks: self._make_not_ready() return True elif not checkable: self._make_not_ready() return True return False def _make_not_ready(self): if not self._ready: return logger.info("download %s not ready now", self.id) self._ready = False self._started = False if self._timeout_timer.isActive(): self._timeout_timer.stop() if self._leaky_timer.isActive(): self._leaky_timer.stop() self.download_not_ready.emit(self) def _clear_globals(self): self._wanted_chunks.clear() self._downloaded_chunks.clear() self._nodes_available_chunks.clear() self._nodes_requested_chunks.clear() self._nodes_last_receive_time.clear() self._nodes_downloaded_chunks_count.clear() self._nodes_timeouts_count.clear() self._total_chunks_count = 0 def stop_download_chunks(self): if self._leaky_timer.isActive(): self._leaky_timer.stop() if self._timeout_timer.isActive(): self._timeout_timer.stop() for node_id in self._nodes_requested_chunks: self.abort_data.emit(node_id, self.id, None) self._nodes_requested_chunks.clear() self._nodes_last_receive_time.clear() def _emit_no_disk_space(self, error=False): self._no_disk_space_error = True self._nodes_available_chunks.clear() self._clear_globals() self._make_not_ready() file_name = self.display_name.split()[-1] \ if self.display_name else "" self.no_disk_space.emit(self, file_name, error) def _send_start_statistic(self): if self._tracker: self._tracker.download_start(self.id, self.size) def _send_end_statistic(self): if self._tracker: time_diff = time() - self._started_time if time_diff < 1e-3: time_diff = 1e-3 self._tracker.download_end( self.id, time_diff, websockets_bytes=0, webrtc_direct_bytes=self._received_via_p2p, webrtc_relay_bytes=self._received_via_turn, chunks=len(self._downloaded_chunks), chunks_reloaded=0, nodes=len(self._nodes_available_chunks)) def _send_error_statistic(self): if self._tracker: time_diff = time() - self._started_time if time_diff < 1e-3: time_diff = 1e-3 self._tracker.download_error( self.id, time_diff, websockets_bytes=0, webrtc_direct_bytes=self._received_via_p2p, webrtc_relay_bytes=self._received_via_turn, chunks=len(self._downloaded_chunks), chunks_reloaded=0, nodes=len(self._nodes_available_chunks))
def main(): # test_file = "/run/user/1000/gvfs/smb-share:server=cossartlab.local,share=picardoteam/Behavior Camera/p5_20_02_17/cam 1" # print(f"is dir {os.path.isdir(test_file)}") # return open_avi_for_test = False if open_avi_for_test: test_avi() return subject_id = "p8_20_02_27" # P12_20_01_20 p8_20_01_16 cam_folder_id_1 = "cam2" # "cam2" cam_folder_id_2 = "a001" # a000 a001 if cam_folder_id_2 is None: cam_folder_id = "20190430_a002" # ex cam1_a002, movie1, etc... else: cam_folder_id = f"{cam_folder_id_1}_{cam_folder_id_2}" tiffs_path_dir = '/media/julien/My Book/robin_tmp/cameras/' tiffs_path_dir = '/media/julien/My Book/robin_tmp/cameras/to_convert/' # tiffs_path_dir = '/media/julien/My Book/robin_tmp/cameras/basler_recordings/' # tiffs_path_dir = '/media/julien/dream team/camera/' tiffs_path_dir = '/media/julien/Not_today/hne_not_today/data/behavior_movies/to_convert/' # On NAS # tiffs_path_dir = '/run/user/1000/gvfs/smb-share:server=cossartlab.local,share=picardoteam/Behavior Camera/' if cam_folder_id_2 is not None: tiffs_path_dir = os.path.join(tiffs_path_dir, subject_id, cam_folder_id_1, cam_folder_id_2) # tiffs_path_dir = os.path.join(tiffs_path_dir, subject_id, cam_folder_id_2, cam_folder_id_1) else: tiffs_path_dir = os.path.join(tiffs_path_dir, subject_id, cam_folder_id) # print(f"is dir {os.path.isdir(tiffs_path_dir)}") if cam_folder_id_1 is None: cam_id = "22983298" elif cam_folder_id_1 == "cam1": cam_id = "22983298" else: cam_id = "23109588" # cam1: 22983298 cam2: 23109588 # results_path = '/media/julien/My Book/robin_tmp/cameras/' # results_path = os.path.join(results_path, subject_id) results_path = "/media/julien/Not_today/hne_not_today/data/behavior_movies/converted_so_far/" files_in_dir = [ item for item in os.listdir(tiffs_path_dir) if os.path.isfile(os.path.join(tiffs_path_dir, item)) and ( item.endswith("tiff") or item.endswith("tif")) and ( not item.startswith(".")) ] # files_in_dir = sorted_tiff_ls(tiffs_path_dir) # print(f"len(files_in_dir) {len(files_in_dir)}") # for file_name in files_in_dir[-1000:]: # print(f"{file_name}") files_in_dir_dict = SortedDict() for file_name in files_in_dir: index_ = file_name[::-1].find("_") frame_number = int(file_name[-index_:-5]) files_in_dir_dict[frame_number] = file_name # print(f"{file_name[-index_:-5]}") # break # looking for a gap between frames last_tiff_frame = 0 error_detected = False for tiff_frame, tiff_file in files_in_dir_dict.items(): if tiff_frame - 1 != last_tiff_frame: print( f"Gap between frame n° {last_tiff_frame} and {tiff_frame}. File {tiff_file}" ) error_detected = True last_tiff_frame = tiff_frame if error_detected: raise Exception("ERROR: gap between 2 frames") # keep the name of the tiffs files yaml_file_name = os.path.join( results_path, f"behavior_{subject_id}_cam_{cam_id}_{cam_folder_id}.yaml") with open(yaml_file_name, 'w') as outfile: yaml.dump(list(files_in_dir_dict.values()), outfile, default_flow_style=False) # raise Exception("TEST YAML") # # leave only regular files, insert creation date # entries = ((stat[ST_CTIME], path) # for stat, path in entries if S_ISREG(stat[ST_MODE])) # # NOTE: on Windows `ST_CTIME` is a creation date # # but on Unix it could be something else # # NOTE: use `ST_MTIME` to sort by a modification date # # for cdate, path in sorted(entries): # print(time.ctime(cdate), os.path.basename(path)) # sort by alaphabatical order size_avi = None vid_avi = None fps_avi = 20 avi_file_name = os.path.join( results_path, f"behavior_{subject_id}_cam_{cam_id}_{cam_folder_id}_fps_{fps_avi}.avi" ) print( f"creating behavior_{subject_id}_cam_{cam_id}_{cam_folder_id}_fps_{fps_avi}.avi from {len(files_in_dir_dict)} tiff files" ) is_color = True # put fourcc to 0 for no compression # fourcc = 0 fourcc = VideoWriter_fourcc(*"XVID") # fourcc = VideoWriter_fourcc(*"MPEG") # https://stackoverflow.com/questions/44947505/how-to-make-a-movie-out-of-images-in-python start_time = time() for tiff_frame, tiff_file in files_in_dir_dict.items(): if (tiff_frame > 0) and (tiff_frame % 5000 == 0): print(f"{tiff_frame} frames done") # img = PIL.Image.open(os.path.join(tiffs_path_dir, tiff_file)) # img = np.array(img) if vid_avi is None: if size_avi is None: img = PIL.Image.open(os.path.join(tiffs_path_dir, tiff_file)) img = np.array(img) print(f"img.shape {img.shape}") size_avi = img.shape[1], img.shape[0] # vid_avi = VideoWriter(avi_file_name, fourcc, float(fps_avi), size_avi, is_color) vid_avi = VideoWriter(avi_file_name, fourcc, fps_avi, size_avi, is_color) # vid_avi.write(img) vid_avi.write(imread(os.path.join(tiffs_path_dir, tiff_file))) cv2.destroyAllWindows() vid_avi.release() time_to_convert = time() - start_time print(f"time_to_convert: {time_to_convert} sec")
class OrderedDict(dict): """Dictionary that remembers insertion order and is numerically indexable. Keys are numerically indexable using dict views. For example:: >>> ordered_dict = OrderedDict.fromkeys('abcde') >>> keys = ordered_dict.keys() >>> keys[0] 'a' >>> keys[-2:] ['d', 'e'] The dict views support the sequence abstract base class. """ # pylint: disable=super-init-not-called def __init__(self, *args, **kwargs): self._keys = {} self._nums = SortedDict() self._keys_view = self._nums.keys() self._count = count() self.update(*args, **kwargs) def __setitem__(self, key, value, dict_setitem=dict.__setitem__): "``ordered_dict[key] = value``" if key not in self: num = next(self._count) self._keys[key] = num self._nums[num] = key dict_setitem(self, key, value) def __delitem__(self, key, dict_delitem=dict.__delitem__): "``del ordered_dict[key]``" dict_delitem(self, key) num = self._keys.pop(key) del self._nums[num] def __iter__(self): "``iter(ordered_dict)``" return iter(self._nums.values()) def __reversed__(self): "``reversed(ordered_dict)``" nums = self._nums for key in reversed(nums): yield nums[key] def clear(self, dict_clear=dict.clear): "Remove all items from mapping." dict_clear(self) self._keys.clear() self._nums.clear() def popitem(self, last=True): """Remove and return (key, value) item pair. Pairs are returned in LIFO order if last is True or FIFO order if False. """ index = -1 if last else 0 num = self._keys_view[index] key = self._nums[num] value = self.pop(key) return key, value update = __update = co.MutableMapping.update def keys(self): "Return set-like and sequence-like view of mapping keys." return KeysView(self) def items(self): "Return set-like and sequence-like view of mapping items." return ItemsView(self) def values(self): "Return set-like and sequence-like view of mapping values." return ValuesView(self) def pop(self, key, default=NONE): """Remove given key and return corresponding value. If key is not found, default is returned if given, otherwise raise KeyError. """ if key in self: value = self[key] del self[key] return value elif default is NONE: raise KeyError(key) else: return default def setdefault(self, key, default=None): """Return ``mapping.get(key, default)``, also set ``mapping[key] = default`` if key not in mapping. """ if key in self: return self[key] self[key] = default return default @recursive_repr() def __repr__(self): "Text representation of mapping." return '%s(%r)' % (self.__class__.__name__, list(self.items())) __str__ = __repr__ def __reduce__(self): "Support for pickling serialization." return (self.__class__, (list(self.items()), )) def copy(self): "Return shallow copy of mapping." return self.__class__(self) @classmethod def fromkeys(cls, iterable, value=None): """Return new mapping with keys from iterable. If not specified, value defaults to None. """ return cls((key, value) for key in iterable) def __eq__(self, other): "Test self and other mapping for equality." if isinstance(other, OrderedDict): return dict.__eq__(self, other) and all(map(eq, self, other)) return dict.__eq__(self, other) __ne__ = co.MutableMapping.__ne__ def _check(self): "Check consistency of internal member variables." # pylint: disable=protected-access keys = self._keys nums = self._nums for key, value in keys.items(): assert nums[value] == key nums._check()
class KeyedRegion: """ KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in this region overlap with another variable in this region. Registers and function frames can all be viewed as a keyed region. """ __slots__ = ('_storage', '_object_mapping', '_phi_node_contains') def __init__(self, tree=None, phi_node_contains=None): self._storage = SortedDict() if tree is None else tree self._object_mapping = weakref.WeakValueDictionary() self._phi_node_contains = phi_node_contains def _get_container(self, offset): try: base_offset = next( self._storage.irange(maximum=offset, reverse=True)) except StopIteration: return offset, None else: container = self._storage[base_offset] if container.includes(offset): return base_offset, container return offset, None def __contains__(self, offset): """ Test if there is at least one varaible covering the given offset. :param offset: :return: """ return self._get_container(offset)[1] is not None def __len__(self): return len(self._storage) def __iter__(self): return iter(self._storage.values()) def __eq__(self, other): if set(self._storage.keys()) != set(other._storage.keys()): return False for k, v in self._storage.items(): if v != other._storage[k]: return False return True def copy(self): if not self._storage: return KeyedRegion(phi_node_contains=self._phi_node_contains) kr = KeyedRegion(phi_node_contains=self._phi_node_contains) for key, ro in self._storage.items(): kr._storage[key] = ro.copy() kr._object_mapping = self._object_mapping.copy() return kr def merge(self, other, replacements=None): """ Merge another KeyedRegion into this KeyedRegion. :param KeyedRegion other: The other instance to merge with. :return: None """ # TODO: is the current solution not optimal enough? for _, item in other._storage.items(): # type: RegionObject for so in item.stored_objects: # type: StoredObject if replacements and so.obj in replacements: so = StoredObject(so.start, replacements[so.obj], so.size) self._object_mapping[so.obj_id] = so self.__store(so, overwrite=False) return self def replace(self, replacements): """ Replace variables with other variables. :param dict replacements: A dict of variable replacements. :return: self """ for old_var, new_var in replacements.items(): old_var_id = id(old_var) if old_var_id in self._object_mapping: # FIXME: we need to check if old_var still exists in the storage old_so = self._object_mapping[old_var_id] # type: StoredObject self._store(old_so.start, new_var, old_so.size, overwrite=True) return self def dbg_repr(self): """ Get a debugging representation of this keyed region. :return: A string of debugging output. """ keys = self._storage.keys() offset_to_vars = {} for key in sorted(keys): ro = self._storage[key] variables = [obj.obj for obj in ro.stored_objects] offset_to_vars[key] = variables s = [] for offset, variables in offset_to_vars.items(): s.append("Offset %#x: %s" % (offset, variables)) return "\n".join(s) def add_variable(self, start, variable): """ Add a variable to this region at the given offset. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.add_object(start, variable, size) def add_object(self, start, obj, object_size): """ Add/Store an object to this region at the given offset. :param start: :param obj: :param int object_size: Size of the object :return: """ self._store(start, obj, object_size, overwrite=False) def set_variable(self, start, variable): """ Add a variable to this region at the given offset, and remove all other variables that are fully covered by this variable. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.set_object(start, variable, size) def set_object(self, start, obj, object_size): """ Add an object to this region at the given offset, and remove all other objects that are fully covered by this object. :param start: :param obj: :param object_size: :return: """ self._store(start, obj, object_size, overwrite=True) def get_base_addr(self, addr): """ Get the base offset (the key we are using to index objects covering the given offset) of a specific offset. :param int addr: :return: :rtype: int or None """ base_addr, container = self._get_container(addr) if container is None: return None else: return base_addr def get_variables_by_offset(self, start): """ Find variables covering the given region offset. :param int start: :return: A list of stack variables. :rtype: set """ _, container = self._get_container(start) if container is None: return [] else: return container.internal_objects def get_objects_by_offset(self, start): """ Find objects covering the given region offset. :param start: :return: """ _, container = self._get_container(start) if container is None: return set() else: return container.internal_objects # # Private methods # def _store(self, start, obj, size, overwrite=False): """ Store a variable into the storage. :param int start: The beginning address of the variable. :param obj: The object to store. :param int size: Size of the object to store. :param bool overwrite: Whether existing objects should be overwritten or not. :return: None """ stored_object = StoredObject(start, obj, size) self._object_mapping[stored_object.obj_id] = stored_object self.__store(stored_object, overwrite=overwrite) def __store(self, stored_object, overwrite=False): """ Store a variable into the storage. :param StoredObject stored_object: The descriptor describing start address and the variable. :param bool overwrite: Whether existing objects should be overwritten or not. True to make a strong update, False to make a weak update. :return: None """ start = stored_object.start object_size = stored_object.size end = start + object_size # region items in the middle overlapping_items = list(self._storage.irange(start, end - 1)) # is there a region item that begins before the start and overlaps with this variable? floor_key, floor_item = self._get_container(start) if floor_item is not None and floor_key not in overlapping_items: # insert it into the beginning overlapping_items.insert(0, floor_key) # scan through the entire list of region items, split existing regions and insert new regions as needed to_update = {start: RegionObject(start, object_size, {stored_object})} last_end = start for floor_key in overlapping_items: item = self._storage[floor_key] if item.start < start: # we need to break this item into two a, b = item.split(start) if overwrite: b.set_object(stored_object) else: self._add_object_with_check(b, stored_object) to_update[a.start] = a to_update[b.start] = b last_end = b.end elif item.start > last_end: # there is a gap between the last item and the current item # fill in the gap new_item = RegionObject(last_end, item.start - last_end, {stored_object}) to_update[new_item.start] = new_item last_end = new_item.end elif item.end > end: # we need to split this item into two a, b = item.split(end) if overwrite: a.set_object(stored_object) else: self._add_object_with_check(a, stored_object) to_update[a.start] = a to_update[b.start] = b last_end = b.end else: if overwrite: item.set_object(stored_object) else: self._add_object_with_check(item, stored_object) to_update[item.start] = item self._storage.update(to_update) def _is_overlapping(self, start, variable): if variable.size is not None: # make sure this variable does not overlap with any other variable end = start + variable.size try: prev_offset = next( self._storage.irange(maximum=end - 1, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: if start <= prev_offset < end: return True prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if start < prev_offset + prev_item_size < end: return True else: try: prev_offset = next( self._storage.irange(maximum=start, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if prev_offset <= start < prev_offset + prev_item_size: return True return False def _add_object_with_check(self, item, stored_object): if len({stored_object.obj} | item.internal_objects) > 1: if self._phi_node_contains is not None: # check if `item` is a phi node that contains stored_object.obj for so in item.internal_objects: if self._phi_node_contains(so, stored_object.obj): # yes! so we want to skip this object return # check if `stored_object.obj` is a phi node that contains item.internal_objects if all( self._phi_node_contains(stored_object.obj, o) for o in item.internal_objects): # yes! item.set_object(stored_object) return l.warning("Overlapping objects %s.", str({stored_object.obj} | item.internal_objects)) # import ipdb; ipdb.set_trace() item.add_object(stored_object)
class FreshPondSim: def __init__(self, distance, start_time, end_time, entrances, entrance_weights, rand_velocities_and_distances_func, entrance_rate, entrance_rate_integral=None, entrance_rate_integral_inverse=None, interpolate_rate=True, interpolate_rate_integral=True, interpolate_res=None, snap_exit=True): assert_positive_real(distance, 'distance') assert_real(start_time, 'start_time') assert_real(end_time, 'end_time') if not (start_time < end_time): raise ValueError(f"start_time should be less than end_time") assert len(entrances) == len(entrance_weights) self.start_time = start_time self.end_time = end_time self.dist_around = distance self.entrances = entrances self.entrance_weights = entrance_weights self.rand_velocities_and_distances = rand_velocities_and_distances_func self._snap_exit = snap_exit if interpolate_rate or interpolate_rate_integral: if interpolate_res is None: raise ValueError("Specify interpolate_res for interpolation") if interpolate_rate: self.entrance_rate = DynamicBoundedInterpolator( entrance_rate, start_time, end_time, interpolate_res) else: self.entrance_rate = entrance_rate if interpolate_rate_integral: # Want to interplate the integral function if entrance_rate_integral is None: # No integral function given # Do numerical integration and interpolate to speed it up def integral_func(t): y, abserr = integrate.quad(entrance_rate, start_time, t) return y self.entrance_rate_integral = DynamicBoundedInterpolator( integral_func, start_time, end_time, interpolate_res) else: # Integral function was provided # Use the provided rate integral function but interpolate it self.entrance_rate_integral = DynamicBoundedInterpolator( entrance_rate_integral, start_time, end_time, interpolate_res) else: # Don't want to interpolate the integral function # If entrance_rate_integral is not None (i.e. is provided) then # that function will be used as the rate integral. # If entrance_rate_integral is None, numerical integration will # be used. self.entrance_rate_integral = entrance_rate_integral self.entrance_rate_integral_inverse = entrance_rate_integral_inverse self.pedestrians = SortedKeyList(key=attrgetter('start_time')) self._counts = SortedDict() self._counts[self.start_time] = 0 self._counts_are_correct = True self.refresh_pedestrians() def _distance(self, a, b): """signed distance of a relative to b""" return circular_diff(a % self.dist_around, b % self.dist_around, self.dist_around) def _distance_from(self, b): """returns a function that returns the signed sitance from b""" return lambda a: self._distance(a, b) def _abs_distance_from(self, b): """returns a function that returns the distance from b""" return lambda a: abs(self._distance(a, b)) def _closest_exit(self, dist): """Returns the closest number to dist that is equivalent mod dist_around to an element of entrances""" closest_exit = min(self.entrances, key=self._abs_distance_from(dist)) diff = self._distance(closest_exit, dist) corrected_dist = dist + diff return corrected_dist def refresh_pedestrians(self): """Refreshes the pedestrians in the simulation to random ones""" self.clear_pedestrians() start_times = list( random_times(self.start_time, self.end_time, self.entrance_rate, self.entrance_rate_integral, self.entrance_rate_integral_inverse)) n_pedestrians = len(start_times) entrances = random.choices(population=self.entrances, weights=self.entrance_weights, k=n_pedestrians) velocities, distances = self.rand_velocities_and_distances( n_pedestrians).T def pedestrians_generator(): for start_time, entrance, velocity, dist in zip( start_times, entrances, velocities, distances): assert dist > 0 if self._snap_exit: original_exit = entrance + dist * sign(velocity) corrected_exit = self._closest_exit(original_exit) corrected_dist = abs(corrected_exit - entrance) if math.isclose(corrected_dist, 0, abs_tol=1e-10): corrected_dist = self.dist_around else: corrected_dist = dist yield FreshPondPedestrian(self.dist_around, entrance, corrected_dist, start_time, velocity) self.add_pedestrians(pedestrians_generator()) def clear_pedestrians(self): """Removes all pedestrains in the simulation""" self.pedestrians.clear() self._reset_counts() self._counts_are_correct = True def add_pedestrians(self, pedestrians): """Adds all the given pedestrians to the simulation""" def checked_pedestrians(): for p in pedestrians: self._assert_pedestrian_in_range(p) yield p initial_num_pedestrians = self.num_pedestrians() self.pedestrians.update(checked_pedestrians()) final_num_pedestrians = self.num_pedestrians() if final_num_pedestrians > initial_num_pedestrians: self._counts_are_correct = False else: assert final_num_pedestrians == initial_num_pedestrians def _assert_pedestrian_in_range(self, p): """Makes sure the pedestrian's start time is in the simulation's time interval""" if not (self.start_time <= p.start_time < self.end_time): raise ValueError( "Pedestrian start time is not in range [start_time, end_time)") def add_pedestrian(self, p): """Adds a new pedestrian to the simulation""" self._assert_pedestrian_in_range(p) self.pedestrians.add(p) # Update counts only when counts are correct if self._counts_are_correct: # add a new breakpoint at the pedestrian's start time if it not there self._counts[p.start_time] = self.n_people(p.start_time) # add a new breakpoint at the pedestrian's end time if it not there self._counts[p.end_time] = self.n_people(p.end_time) # increment all the counts in the pedestrian's interval of time # inclusive on the left, exclusive on the right # If it were inclusive on the right, then the count would be one more # than it should be in the period after end_time and before the next # breakpoint after end_time for t in self._counts.irange(p.start_time, p.end_time, inclusive=(True, False)): self._counts[t] += 1 def _reset_counts(self): """Clears _counts and sets count at start_time to 0""" self._counts.clear() self._counts[self.start_time] = 0 def _recompute_counts(self): """Store how many people there are whenever someone enters or exits so the number of people at a given time can be found quickly later""" # print("Recomputing counts") self._reset_counts() if self.num_pedestrians() == 0: return # pedestrians are already sorted by start time start_times = [p.start_time for p in self.pedestrians] end_times = sorted([p.end_time for p in self.pedestrians]) n = len(start_times) curr_count = 0 # current number of people start_times_index = 0 end_times_index = 0 starts_done = False # whether all the start times have been added ends_done = False # whether all the end times have been added while not (starts_done and ends_done): # determine whether a start time or an end time should be added next # store this in the variable take_start which is true if a start # time should be added next if starts_done: # already added all the start times; add an end time take_start = False elif ends_done: # already added all the end times; add a start time take_start = True else: # didn't add all the end times nor all the start times # add the time that is earliest next_start_time = start_times[start_times_index] next_end_time = end_times[end_times_index] take_start = next_start_time < next_end_time if take_start: # add next start curr_count += 1 start_time = start_times[start_times_index] self._counts[start_time] = curr_count start_times_index += 1 if start_times_index == n: starts_done = True else: # add next end curr_count -= 1 end_time = end_times[end_times_index] self._counts[end_time] = curr_count end_times_index += 1 if end_times_index == n: ends_done = True def n_unique_people_saw(self, p): """Returns the number of unique people that a pedestrian sees""" n = 0 for q in self.pedestrians: if p.intersects(q): n += 1 return n def n_people_saw(self, p): """Returns the number of times a pedestrian sees someone""" n = 0 for q in self.pedestrians: if p.end_time > q.start_time and p.start_time < q.end_time: n += p.n_intersections(q) return n def intersection_directions(self, p): """Returns the number of people seen going in the same direction and the number of people seen going in the opposite direction by p as a tuple""" n_same, n_diff = 0, 0 for q in self.pedestrians: if p.end_time > q.start_time and p.start_time < q.end_time: d = q.intersection_direction(p) if d == 1: n_same += 1 elif d == -1: n_diff += 1 return n_same, n_diff def intersection_directions_total(self, p): n_same, n_diff = 0, 0 for q in self.pedestrians: if p.end_time > q.start_time and p.start_time < q.end_time: i = p.total_intersection_direction(q) if i < 0: n_diff += -i elif i > 0: n_same += i return n_same, n_diff def n_people(self, t): """Returns the number of people at a given time""" if not self._counts_are_correct: self._recompute_counts() self._counts_are_correct = True if t in self._counts: return self._counts[t] elif t < self.start_time: return 0 else: index = self._counts.bisect_left(t) return self._counts.values()[index - 1] def num_pedestrians(self): """Returns the total number of pedestrians in the simulation""" return len(self.pedestrians) def get_pedestrians_in_interval(self, start, stop): """Returns a list of all the pedestrians who entered in the interval [start, stop]""" return list(self.pedestrians.irange_key(start, stop)) def num_entrances_in_interval(self, start, stop): """Returns the number of pedestrians who entered in the given interval of time [start, stop]""" return len(self.get_pedestrians_in_interval(start, stop)) def get_enter_and_exit_times_in_interval(self, start, stop): """Returns the entrance and exit times in a given time interval as a tuple of lists (entrance_times, exit_times).""" start_times = [] end_times = [] for p in self.pedestrians: if start <= p.start_time <= stop: start_times.append(p.start_time) if start <= p.end_time <= stop: end_times.append(p.end_time) return start_times, end_times def get_pedestrians_at_time(self, t): """Returns a list of all the pedestrians who were there at time t""" # get all pedestrians who entered at or before time t entered_before_t = self.pedestrians.irange_key( min_key=None, max_key=t, inclusive=(True, True)) # Of those, return return the ones who exited after time t return [p for p in entered_before_t if p.end_time > t]
class TreePage(BasePage): """ Page object, implemented with a sorted dict. Who knows what's underneath! """ def __init__(self, *args, **kwargs): storage = kwargs.pop("storage", None) super(TreePage, self).__init__(*args, **kwargs) self._storage = SortedDict() if storage is None else storage def keys(self): if len(self._storage) == 0: return set() else: return set.union(*(set(range(*self._resolve_range(mo))) for mo in self._storage.values())) def replace_mo(self, state, old_mo, new_mo): start, end = self._resolve_range(old_mo) for key in self._storage.irange(start, end - 1): val = self._storage[key] if val is old_mo: #assert new_mo.includes(a) self._storage[key] = new_mo def store_overwrite(self, state, new_mo, start, end): # iterate over each item we might overwrite # track our mutations separately since we're in the process of iterating deletes = [] updates = {start: new_mo} for key in self._storage.irange(maximum=end - 1, reverse=True): old_mo = self._storage[key] # make sure we aren't overwriting all of an item that overlaps the end boundary if end < self._page_addr + self._page_size and end not in updates and old_mo.includes( end): updates[end] = old_mo # we can't set a minimum on the range because we need to do the above for # the first object before start too if key < start: break # delete any key that falls within the range deletes.append(key) #assert all(m.includes(i) for i,m in updates.items()) # perform mutations for key in deletes: del self._storage[key] self._storage.update(updates) def store_underwrite(self, state, new_mo, start, end): # track the point that we need to write up to last_missing = end - 1 # track also updates since we can't update while iterating updates = {} for key in self._storage.irange(maximum=end - 1, reverse=True): mo = self._storage[key] # if the mo stops if mo.base <= last_missing and not mo.includes(last_missing): updates[max(mo.last_addr + 1, start)] = new_mo last_missing = mo.base - 1 # we can't set a minimum on the range because we need to do the above for # the first object before start too if last_missing < start: break # if there are no memory objects <= start, we won't have filled start yet if last_missing >= start: updates[start] = new_mo #assert all(m.includes(i) for i,m in updates.items()) self._storage.update(updates) def load_mo(self, state, page_idx): """ Loads a memory object from memory. :param page_idx: the index into the page :returns: a tuple of the object """ try: key = next(self._storage.irange(maximum=page_idx, reverse=True)) except StopIteration: return None else: return self._storage[key] def load_slice(self, state, start, end): """ Return the memory objects overlapping with the provided slice. :param start: the start address :param end: the end address (non-inclusive) :returns: tuples of (starting_addr, memory_object) """ keys = list(self._storage.irange(start, end - 1)) if not keys or keys[0] != start: try: key = next(self._storage.irange(maximum=start, reverse=True)) except StopIteration: pass else: if self._storage[key].includes(start): keys.insert(0, key) return [(max(start, key), self._storage[key]) for key in keys] def _copy_args(self): return {'storage': self._storage.copy()}
class Ontology(): def __init__(self, domains, predicates): self.domains = {} self.predicates = SortedDict() self.herbrand_base_size = 0 self.predicate_range = {} self.finalized = False self.constraints = [] for d in domains: self.__add_domain(d) for p in predicates: self.__add_predicate(p) self.__create_indexing_scheme() def __check_multidomain(self): if len(self.domains) > 1: raise Exception("This operation does not allow multi domains") def __add_domain(self, d): if not isinstance(d, Iterable): D = [d] else: D = d for d in D: if d.name in self.domains: raise Exception("Domain %s already exists" % d.name) self.domains[d.name] = d def __add_predicate(self, p): if not isinstance(p, Iterable): P = [p] else: P = p for p in P: if p.name in self.predicates: raise Exception("Predicate %s already exists" % p.name) self.predicates[p.name] = p self.predicate_range[p.name] = (self.herbrand_base_size, self.herbrand_base_size + p.groundings_number) self.herbrand_base_size += p.groundings_number def __create_indexing_scheme(self): # Managing a linearized version of this logic self._up_to_idx = 0 # linear max indices self._dict_indices = { } # map potentials id to correspondent multidimensional indices tensor self.finalized = False self._linear = None self._linear_evidence = None self._linear_size = 0 for p in self.predicates.values(): shape = [d.num_constants for d in p.domains] length = np.prod(shape) fr = self._up_to_idx to = fr + length self._up_to_idx = to self._dict_indices[p.name] = np.reshape(np.arange(fr, to), shape) self._linear_size += length self.finalized = True def get_constraint(self, formula): return Formula(self, formula) def FOL2LinearState(self, file): self.__check_multidomain() #just converting APIs from old NMLN pp = SortedDict({p.name: p.arity for p in self.predicates.values()}) constants, predicates, evidences = utils.read_file_fixed_world( file, list(self.domains.values())[0].constants, pp) linear = [] for p, v in predicates.items(): linear.extend(np.reshape(v, [-1])) linear = np.reshape(linear, [1, -1]) return linear def linear2Dict(self, linear_state): d = SortedDict() for p in self.predicates.values(): d[p.name] = np.take(linear_state, self._dict_indices[p.name]) return d def prettyPrintFromLinear(self, linear_state): for p in self.predicates.values(): print(p) print(np.take(linear_state, self._dict_indices[p.name])) print() def linear_size(self): return self._linear_size def sample_fragments_idx(self, k, num=100, get_ids=False): self.__check_multidomain() ii = [] all_ids = [] for _ in range(num): i = [] num_constants = list(self.domains.values())[0].num_constants idx = np.random.choice(num_constants, size=k, replace=False) idx = np.random.permutation(idx) all_ids.append(idx) for p in self.predicates.values(): a = p.arity f_idx = self._dict_indices[p.name] for j in range(a): f_idx = np.take(f_idx, idx, axis=j) f_idx = np.reshape(f_idx, [-1]) i.extend(f_idx) ii.append(i) res = np.stack(ii, axis=0) if not get_ids: return res else: return res, np.stack(all_ids, axis=0) def all_fragments_idx(self, k, get_ids=False, get_atom_to_fragments_mask=False): self.__check_multidomain() ii = [] all_ids = [] num_constants = list(self.domains.values())[0].num_constants for idx in permutations(range(num_constants), k): all_ids.append(idx) i = [] for p in self.predicates.values(): a = p.arity f_idx = self._dict_indices[p.name] for j in range(a): f_idx = np.take(f_idx, idx, axis=j) f_idx = np.reshape(f_idx, [-1]) i.extend(f_idx) ii.append(i) res = np.stack(ii, axis=0) atom_to_fragments_mask = np.zeros([self.linear_size(), len(res)]) for i in range(len(res)): for j in range(len(res[0])): atom_id = res[i, j] atom_to_fragments_mask[atom_id, i] = 1 to_return = res if get_ids: to_return = [res, np.stack(all_ids, axis=0)] if get_atom_to_fragments_mask: to_return = to_return + [atom_to_fragments_mask] return to_return def size_of_fragment_state(self, k): self.__check_multidomain() size = 0 for p in self.predicates.values(): size += k**p.arity return size
def calculate_scores(self): """ Function to calculate a score for each transcript, given the metrics derived with the calculate_metrics method and the scoring scheme provided in the JSON configuration. If any requirements have been specified, all transcripts which do not pass them will be assigned a score of 0 and subsequently ignored. Scores are rounded to the nearest integer. """ if self.scores_calculated is True: return self.get_metrics() if not hasattr(self, "logger"): self.logger = None self.logger.setLevel("DEBUG") self.logger.debug("Calculating scores for {0}".format(self.id)) self.scores = dict() for tid in self.transcripts: self.scores[tid] = dict() # Add the score for the transcript source self.scores[tid]["source_score"] = self.transcripts[tid].source_score if self.regressor is None: for param in self.json_conf["scoring"]: self._calculate_score(param) for tid in self.scores: self.transcripts[tid].scores = self.scores[tid].copy() for tid in self.transcripts: if tid in self.__orf_doubles: del self.scores[tid] continue self.transcripts[tid].score = sum(self.scores[tid].values()) self.scores[tid]["score"] = self.transcripts[tid].score else: valid_metrics = self.regressor.metrics metric_rows = SortedDict() for tid, transcript in sorted(self.transcripts.items(), key=operator.itemgetter(0)): for param in valid_metrics: self.scores[tid][param] = "NA" row = [] for attr in valid_metrics: val = getattr(transcript, attr) if isinstance(val, bool): if val: val = 1 else: val = 0 row.append(val) metric_rows[tid] = row # scores = SortedDict.fromkeys(metric_rows.keys()) for pos, score in enumerate(self.regressor.predict(list(metric_rows.values()))): tid = list(metric_rows.keys())[pos] if tid in self.__orf_doubles: del self.scores[tid] continue self.scores[tid]["score"] = score self.transcripts[tid].score = score self.metric_lines_store = [] for row in self.prepare_metrics(): if row["tid"] in self.__orf_doubles: continue else: self.metric_lines_store.append(row) for doubled in self.__orf_doubles: for partial in self.__orf_doubles[doubled]: if partial in self.transcripts: del self.transcripts[partial] self.scores_calculated = True
class Geofence(App): """A Geofence defines the space a vehicle is allowed to operate within. A geofence is constructed by layering additive and subtractive geometry to construct a 3-dimensional space of operations that a drone is allowed to fly in. Within a layer, a point is determined to be inside as if all the volumes in that layer were taken as a union. """ # TODO Use a small memory database (like TinyDB) to handle layer mapping. # Added benefit of allowing both name and order mapping to layer at once. req_telem = { 'latitude': '/Airliner/CNTL/VehicleGlobalPosition/Lat', 'longitude': '/Airliner/CNTL/VehicleGlobalPosition/Lon', 'altitude': '/Airliner/CNTL/VehicleGlobalPosition/Alt' } def __init__(self): super(Geofence, self).__init__() self._check_thread = None self.enabled = False self.fence_violation = False self.layers = SortedDict() """:type: dict[Any, _Layer]""" def __contains__(self, other): """True if the given other is contained within the Geofence.""" contained = False for layer in self.layers.values(): if other in layer: contained = layer.kind is LayerKind.ADDITIVE return contained def __str__(self): return 'Geofence{\n' + '\n'.join( ' {}{}: {}'.format( '+' if layer.kind is LayerKind.ADDITIVE else '-', order, layer) for order, layer in self.layers.items()) + '\n}' def attach(self, vehicle): super(Geofence, self).attach(vehicle) self._check_thread = PeriodicExecutor( self._check_fence, every=FENCE_SLEEP, logger=self.vehicle.logger, name='FenceCheck', exception=lambda e: self.vehicle.exception('Geofence Exception')) self._check_thread.start() def detach(self): self._check_thread.stop() super(Geofence, self).detach() @classmethod def required_telemetry_paths(cls): return cls.req_telem.values() def add_layer(self, layer_position, layer_name, layer_kind): if layer_position in self.layers: raise KeyError('This layer already exists.') if not isinstance(layer_kind, LayerKind): raise TypeError('layer_kind must be of type LayerKind.') layer = Layer(name=layer_name, kind=layer_kind) self.layers[layer_position] = layer return layer def _check_fence(self): old = self.fence_violation self.fence_violation = self.fence_violation or \ (self.enabled and self.position not in self) if not old and self.fence_violation: self.vehicle.error('Encountered Fence Violation at %s', self.position) self.vehicle.broadcast(Intent(action=ACTION_RTL)) print('Encountered fence violation. Press Ctrl-C exit.') def layer_by_name(self, name): for layer in self.layers.values(): if layer.name == name: return layer @property def position(self): return Position( App._telem(self.req_telem['latitude'])(self), App._telem(self.req_telem['longitude'])(self), App._telem(self.req_telem['altitude'])(self)) def remove_layer(self, position): del self.layers[position]
class FederationRemoteSendQueue(object): """A drop in replacement for TransactionQueue""" def __init__(self, hs): self.server_name = hs.hostname self.clock = hs.get_clock() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.presence_map = { } # Pending presence map user_id -> UserPresenceState self.presence_changed = SortedDict() # Stream position -> user_id self.keyed_edu = {} # (destination, key) -> EDU self.keyed_edu_changed = SortedDict( ) # stream position -> (destination, key) self.edus = SortedDict() # stream position -> Edu self.failures = SortedDict( ) # stream position -> (destination, Failure) self.device_messages = SortedDict() # stream position -> destination self.pos = 1 self.pos_time = SortedDict() # EVERYTHING IS SAD. In particular, python only makes new scopes when # we make a new function, so we need to make a new function so the inner # lambda binds to the queue rather than to the name of the queue which # changes. ARGH. def register(name, queue): LaterGauge( "synapse_federation_send_queue_%s_size" % (queue_name, ), "", [], lambda: len(queue)) for queue_name in [ "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed", "edus", "failures", "device_messages", "pos_time", ]: register(queue_name, getattr(self, queue_name)) self.clock.looping_call(self._clear_queue, 30 * 1000) def _next_pos(self): pos = self.pos self.pos += 1 self.pos_time[self.clock.time_msec()] = pos return pos def _clear_queue(self): """Clear the queues for anything older than N minutes""" FIVE_MINUTES_AGO = 5 * 60 * 1000 now = self.clock.time_msec() keys = self.pos_time.keys() time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO) if not keys[:time]: return position_to_delete = max(keys[:time]) for key in keys[:time]: del self.pos_time[key] self._clear_queue_before_pos(position_to_delete) def _clear_queue_before_pos(self, position_to_delete): """Clear all the queues from before a given position""" with Measure(self.clock, "send_queue._clear"): # Delete things out of presence maps keys = self.presence_changed.keys() i = self.presence_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_changed[key] user_ids = set(user_id for uids in itervalues(self.presence_changed) for user_id in uids) to_del = [ user_id for user_id in self.presence_map if user_id not in user_ids ] for user_id in to_del: del self.presence_map[user_id] # Delete things out of keyed edus keys = self.keyed_edu_changed.keys() i = self.keyed_edu_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.keyed_edu_changed[key] live_keys = set() for edu_key in self.keyed_edu_changed.values(): live_keys.add(edu_key) to_del = [ edu_key for edu_key in self.keyed_edu if edu_key not in live_keys ] for edu_key in to_del: del self.keyed_edu[edu_key] # Delete things out of edu map keys = self.edus.keys() i = self.edus.bisect_left(position_to_delete) for key in keys[:i]: del self.edus[key] # Delete things out of failure map keys = self.failures.keys() i = self.failures.bisect_left(position_to_delete) for key in keys[:i]: del self.failures[key] # Delete things out of device map keys = self.device_messages.keys() i = self.device_messages.bisect_left(position_to_delete) for key in keys[:i]: del self.device_messages[key] def notify_new_events(self, current_id): """As per TransactionQueue""" # We don't need to replicate this as it gets sent down a different # stream. pass def send_edu(self, destination, edu_type, content, key=None): """As per TransactionQueue""" pos = self._next_pos() edu = Edu( origin=self.server_name, destination=destination, edu_type=edu_type, content=content, ) if key: assert isinstance(key, tuple) self.keyed_edu[(destination, key)] = edu self.keyed_edu_changed[pos] = (destination, key) else: self.edus[pos] = edu self.notifier.on_new_replication_data() def send_presence(self, states): """As per TransactionQueue Args: states (list(UserPresenceState)) """ pos = self._next_pos() # We only want to send presence for our own users, so lets always just # filter here just in case. local_states = list( filter(lambda s: self.is_mine_id(s.user_id), states)) self.presence_map.update( {state.user_id: state for state in local_states}) self.presence_changed[pos] = [state.user_id for state in local_states] self.notifier.on_new_replication_data() def send_failure(self, failure, destination): """As per TransactionQueue""" pos = self._next_pos() self.failures[pos] = (destination, str(failure)) self.notifier.on_new_replication_data() def send_device_messages(self, destination): """As per TransactionQueue""" pos = self._next_pos() self.device_messages[pos] = destination self.notifier.on_new_replication_data() def get_current_token(self): return self.pos - 1 def federation_ack(self, token): self._clear_queue_before_pos(token) def get_replication_rows(self, from_token, to_token, limit, federation_ack=None): """Get rows to be sent over federation between the two tokens Args: from_token (int) to_token(int) limit (int) federation_ack (int): Optional. The position where the worker is explicitly acknowledged it has handled. Allows us to drop data from before that point """ # TODO: Handle limit. # To handle restarts where we wrap around if from_token > self.pos: from_token = -1 # list of tuple(int, BaseFederationRow), where the first is the position # of the federation stream. rows = [] # There should be only one reader, so lets delete everything its # acknowledged its seen. if federation_ack: self._clear_queue_before_pos(federation_ack) # Fetch changed presence i = self.presence_changed.bisect_right(from_token) j = self.presence_changed.bisect_right(to_token) + 1 dest_user_ids = [ (pos, user_id) for pos, user_id_list in self.presence_changed.items()[i:j] for user_id in user_id_list ] for (key, user_id) in dest_user_ids: rows.append((key, PresenceRow(state=self.presence_map[user_id], ))) # Fetch changes keyed edus i = self.keyed_edu_changed.bisect_right(from_token) j = self.keyed_edu_changed.bisect_right(to_token) + 1 # We purposefully clobber based on the key here, python dict comprehensions # always use the last value, so this will correctly point to the last # stream position. keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]} for ((destination, edu_key), pos) in iteritems(keyed_edus): rows.append((pos, KeyedEduRow( key=edu_key, edu=self.keyed_edu[(destination, edu_key)], ))) # Fetch changed edus i = self.edus.bisect_right(from_token) j = self.edus.bisect_right(to_token) + 1 edus = self.edus.items()[i:j] for (pos, edu) in edus: rows.append((pos, EduRow(edu))) # Fetch changed failures i = self.failures.bisect_right(from_token) j = self.failures.bisect_right(to_token) + 1 failures = self.failures.items()[i:j] for (pos, (destination, failure)) in failures: rows.append( (pos, FailureRow( destination=destination, failure=failure, ))) # Fetch changed device messages i = self.device_messages.bisect_right(from_token) j = self.device_messages.bisect_right(to_token) + 1 device_messages = {v: k for k, v in self.device_messages.items()[i:j]} for (destination, pos) in iteritems(device_messages): rows.append((pos, DeviceRow(destination=destination, ))) # Sort rows based on pos rows.sort() return [(pos, row.TypeId, row.to_data()) for pos, row in rows]
class KeyedRegion(object): """ KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in this region overlap with another variable in this region. Registers and function frames can all be viewed as a keyed region. """ def __init__(self, tree=None): self._storage = SortedDict() if tree is None else tree def _get_container(self, offset): try: base_offset = next(self._storage.irange(maximum=offset, reverse=True)) except StopIteration: return offset, None else: container = self._storage[base_offset] if container.includes(offset): return base_offset, container return offset, None def __contains__(self, offset): """ Test if there is at least one varaible covering the given offset. :param offset: :return: """ return self._get_container(offset)[1] is not None def __len__(self): return len(self._storage) def __iter__(self): return iter(self._storage.values()) def __eq__(self, other): if set(self._storage.keys()) != set(other._storage.keys()): return False for k, v in self._storage.items(): if v != other._storage[k]: return False return True def copy(self): if not self._storage: return KeyedRegion() kr = KeyedRegion() for key, ro in self._storage.items(): kr._storage[key] = ro.copy() return kr def merge(self, other, make_phi_func=None): """ Merge another KeyedRegion into this KeyedRegion. :param KeyedRegion other: The other instance to merge with. :return: None """ # TODO: is the current solution not optimal enough? for _, item in other._storage.items(): # type: RegionObject for loc_and_var in item.stored_objects: self.__store(loc_and_var, overwrite=False, make_phi_func=make_phi_func) return self def dbg_repr(self): """ Get a debugging representation of this keyed region. :return: A string of debugging output. """ keys = self._storage.keys() offset_to_vars = { } for key in sorted(keys): ro = self._storage[key] variables = [ obj.obj for obj in ro.stored_objects ] offset_to_vars[key] = variables s = [ ] for offset, variables in offset_to_vars.items(): s.append("Offset %#x: %s" % (offset, variables)) return "\n".join(s) def add_variable(self, start, variable): """ Add a variable to this region at the given offset. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.add_object(start, variable, size) def add_object(self, start, obj, object_size): """ Add/Store an object to this region at the given offset. :param start: :param obj: :param int object_size: Size of the object :return: """ self._store(start, obj, object_size, overwrite=False) def set_variable(self, start, variable): """ Add a variable to this region at the given offset, and remove all other variables that are fully covered by this variable. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.set_object(start, variable, size) def set_object(self, start, obj, object_size): """ Add an object to this region at the given offset, and remove all other objects that are fully covered by this object. :param start: :param obj: :param object_size: :return: """ self._store(start, obj, object_size, overwrite=True) def get_base_addr(self, addr): """ Get the base offset (the key we are using to index objects covering the given offset) of a specific offset. :param int addr: :return: :rtype: int or None """ base_addr, container = self._get_container(addr) if container is None: return None else: return base_addr def get_variables_by_offset(self, start): """ Find variables covering the given region offset. :param int start: :return: A list of stack variables. :rtype: set """ _, container = self._get_container(start) if container is None: return [] else: return container.internal_objects def get_objects_by_offset(self, start): """ Find objects covering the given region offset. :param start: :return: """ _, container = self._get_container(start) if container is None: return set() else: return container.internal_objects # # Private methods # def _store(self, start, obj, size, overwrite=False): """ Store a variable into the storage. :param int start: The beginning address of the variable. :param obj: The object to store. :param int size: Size of the object to store. :param bool overwrite: Whether existing objects should be overwritten or not. :return: None """ stored_object = StoredObject(start, obj, size) self.__store(stored_object, overwrite=overwrite) def __store(self, stored_object, overwrite=False, make_phi_func=None): """ Store a variable into the storage. :param StoredObject stored_object: The descriptor describing start address and the variable. :param bool overwrite: Whether existing objects should be overwritten or not. :return: None """ start = stored_object.start object_size = stored_object.size end = start + object_size # region items in the middle overlapping_items = list(self._storage.irange(start, end-1)) # is there a region item that begins before the start and overlaps with this variable? floor_key, floor_item = self._get_container(start) if floor_item is not None and floor_key not in overlapping_items: # insert it into the beginning overlapping_items.insert(0, floor_key) # scan through the entire list of region items, split existing regions and insert new regions as needed to_update = {start: RegionObject(start, object_size, {stored_object})} last_end = start for floor_key in overlapping_items: item = self._storage[floor_key] if item.start < start: # we need to break this item into two a, b = item.split(start) if overwrite: b.set_object(stored_object) else: self._add_object_or_make_phi(b, stored_object, make_phi_func=make_phi_func) to_update[a.start] = a to_update[b.start] = b last_end = b.end elif item.start > last_end: # there is a gap between the last item and the current item # fill in the gap new_item = RegionObject(last_end, item.start - last_end, {stored_object}) to_update[new_item.start] = new_item last_end = new_item.end elif item.end > end: # we need to split this item into two a, b = item.split(end) if overwrite: a.set_object(stored_object) else: self._add_object_or_make_phi(a, stored_object, make_phi_func=make_phi_func) to_update[a.start] = a to_update[b.start] = b last_end = b.end else: if overwrite: item.set_object(stored_object) else: self._add_object_or_make_phi(item, stored_object, make_phi_func=make_phi_func) to_update[item.start] = item self._storage.update(to_update) def _is_overlapping(self, start, variable): if variable.size is not None: # make sure this variable does not overlap with any other variable end = start + variable.size try: prev_offset = next(self._storage.irange(maximum=end-1, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: if start <= prev_offset < end: return True prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if start < prev_offset + prev_item_size < end: return True else: try: prev_offset = next(self._storage.irange(maximum=start, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if prev_offset <= start < prev_offset + prev_item_size: return True return False def _add_object_or_make_phi(self, item, stored_object, make_phi_func=None): #pylint:disable=no-self-use if not make_phi_func or len({stored_object.obj} | item.internal_objects) == 1: item.add_object(stored_object) else: # make a phi node item.set_object(StoredObject(stored_object.start, make_phi_func(stored_object.obj, *item.internal_objects), stored_object.size, ) )
class PiecewiseConstantFunction(Generic[T]): def __init__(self, initial_value: float = 0) -> None: """ Initialize the constant function to a particular value :param initial_value: the starting value for the function """ self.breakpoints = SortedDict() self._initial_value: float = initial_value def add_breakpoint(self, xval: XValue[T], yval: float, squash: bool = True) -> None: """ Add a breakpoint to the function and update the value Let f(x) be the original function, and next_bp be the first breakpoint > xval; after calling this method, the function will be modified to f'(x) = yval for x \in [xval, next_bp) :param xval: the x-position of the breakpoint to add/modify :param yval: the value to set the function to at xval :param squash: if True and f(xval) = yval before calling this method, the function will remain unchanged """ if squash and self.call(xval) == yval: return self.breakpoints[xval] = yval def add_delta(self, xval: XValue[T], delta: float) -> None: """ Modify the function value for x >= xval Let f(x) be the original function; After calling this method, the function will be modified to f'(x) = f(x) + delta for all x >= xval :param xval: the x-position of the breakpoint to add/modify :param delta: the amount to shift the function value by at xval """ if delta == 0: return if xval not in self.breakpoints: self.breakpoints[xval] = self.call(xval) for x in self.breakpoints.irange(xval): self.breakpoints[x] += delta self.values.cache_clear() self.integrals.cache_clear() def call(self, xval: XValue[T]) -> float: """ Compute the output of the function at a point :param xval: the x-position to compute :returns: f(xval) """ if len(self.breakpoints) == 0 or xval < self.breakpoints.keys()[0]: return self._initial_value else: lower_index = self.breakpoints.bisect(xval) - 1 return self.breakpoints.values()[lower_index] def _breakpoint_info( self, index: Optional[int] ) -> Tuple[Optional[int], Optional[XValue[T]], float]: """ Helper function for computing breakpoint information :param index: index of the breakpoint to compute :returns: (index, breakpoint, value) * index is the breakpoint index (if it exists), or None if we're off the end * breakpoint is the x-value of the breakpoint, or None if we're off the end * value is f(breakpoint), or f(last_breakpoint) if we're off the end """ try: breakpoint, value = self.breakpoints.peekitem(index) except IndexError: index = None breakpoint, value = None, self.breakpoints.values()[-1] return (index, breakpoint, value) @lru_cache(maxsize=_LRU_CACHE_SIZE ) # cache results of calls to this function def values(self, start: XValue[T], stop: XValue[T], step: XValueDiff[T]) -> 'SortedDict[XValue[T], float]': """ Compute a sequence of values of the function This is more efficient than [self.call(xval) for xval in range(start, stop, step)] because each self.call(..) takes O(log n) time due to the binary tree structure of self._breakpoints. This method can compute the range of values in linear time in the range, which is significantly faster for large value ranges. :param start: lower bound of value sequence :param stop: upper bound of value sequence :param step: width between points in the sequence :returns: a SortedDict of the values of the function between start and stop, with the x-distance between each data-point equal to `step`; like normal "range" functions the right endpoint is not included """ step = step or (stop - start) if len(self.breakpoints) == 0: num_values = int(math.ceil((stop - start) / step)) return SortedDict([(start + step * i, self._initial_value) for i in range(num_values)]) curr_xval = start curr_value = self.call(start) next_index, next_breakpoint, next_value = self._breakpoint_info( self.breakpoints.bisect(start)) sequence = SortedDict() while curr_xval < stop: sequence[curr_xval] = curr_value next_xval = min(stop, curr_xval + step) while next_breakpoint and next_xval >= next_breakpoint: assert next_index is not None # if next_breakpoint is set, next_index should also be set curr_value = next_value next_index, next_breakpoint, next_value = self._breakpoint_info( next_index + 1) curr_xval = next_xval return sequence @lru_cache(maxsize=_LRU_CACHE_SIZE ) # cache results of calls to this function def integrals( self, start: XValue[T], stop: XValue[T], step: XValueDiff[T], transform: Callable[[XValueDiff[T]], float] = lambda x: cast(float, x), ) -> 'SortedDict[XValue[T], float]': """ Compute a sequence of integrals of the function :param start: lower bound of integral sequence :param stop: upper bound of integral sequence :param step: width of each "chunk" of the integral sequence :param transform: function to apply to x-widths before computing the integral :returns: a SortedDict of the numeric integral values of the function between start and stop; each integral has a range of size `step`, and the key-value is the left endpoint of the chunk """ step = step or (stop - start) if len(self.breakpoints) == 0: # If there are no breakpoints, just split up the range into even widths and compute # (width * self._initial_value) for each chunk. step_width = transform(step) range_width = transform(stop - start) num_full_chunks = int(range_width // step_width) sequence = SortedDict([(start + step * i, step_width * self._initial_value) for i in range(num_full_chunks)]) # If the width does not evenly divide the range, compute the last chunk separately if range_width % step_width != 0: sequence[ start + step * num_full_chunks] = range_width % step_width * self._initial_value return sequence # Set up starting loop parameters curr_xval = start curr_value = self.call(start) next_index, next_breakpoint, next_value = self._breakpoint_info( self.breakpoints.bisect(start)) # Loop through the entire range and compute the integral of each chunk sequence = SortedDict() while curr_xval < stop: orig_xval = curr_xval next_xval = min(stop, curr_xval + step) # For each breakpoint in [curr_xval, next_xval), compute the area of that sub-chunk next_integral: float = 0 while next_breakpoint and next_xval >= next_breakpoint: assert next_index is not None # if next_breakpoint is set, next_index should also be set next_integral += transform(next_breakpoint - curr_xval) * curr_value curr_xval = next_breakpoint curr_value = next_value next_index, next_breakpoint, next_value = self._breakpoint_info( next_index + 1) # Handle any remaining width between the last breakpoint and the end of the chunk next_integral += transform(next_xval - curr_xval) * curr_value sequence[orig_xval] = next_integral curr_xval = next_xval return sequence def integral( self, start: XValue[T], stop: XValue[T], transform: Callable[[XValueDiff[T]], float] = lambda x: cast(float, x), ) -> float: """ Helper function to compute the integral of the whole specified range :param start: lower bound of the integral :param stop: upper bound of the integral :returns: the integral of the function between start and stop """ return self.integrals(start, stop, (stop - start), transform).values()[0] def __str__(self) -> str: ret = f'{self._initial_value}, x < {self.breakpoints.keys()[0]}\n' for xval, yval in self.breakpoints.items(): ret += f'{yval}, x >= {xval}\n' return ret def __add__( self, other: 'PiecewiseConstantFunction[T]' ) -> 'PiecewiseConstantFunction[T]': new_func: 'PiecewiseConstantFunction[T]' = PiecewiseConstantFunction( self._initial_value + other._initial_value) for xval, y0, y1 in _merged_breakpoints(self, other): new_func.add_breakpoint(xval, y0 + y1) return new_func def __sub__( self, other: 'PiecewiseConstantFunction[T]' ) -> 'PiecewiseConstantFunction[T]': new_func: 'PiecewiseConstantFunction[T]' = PiecewiseConstantFunction( self._initial_value - other._initial_value) for xval, y0, y1 in _merged_breakpoints(self, other): new_func.add_breakpoint(xval, y0 - y1) return new_func def __mul__( self, other: 'PiecewiseConstantFunction[T]' ) -> 'PiecewiseConstantFunction[T]': new_func: 'PiecewiseConstantFunction[T]' = PiecewiseConstantFunction( self._initial_value * other._initial_value) for xval, y0, y1 in _merged_breakpoints(self, other): new_func.add_breakpoint(xval, y0 * y1) return new_func def __truediv__( self, other: 'PiecewiseConstantFunction[T]' ) -> 'PiecewiseConstantFunction[T]': try: new_func: 'PiecewiseConstantFunction[T]' = PiecewiseConstantFunction( self._initial_value / other._initial_value) except ZeroDivisionError: new_func = PiecewiseConstantFunction() for xval, y0, y1 in _merged_breakpoints(self, other): try: new_func.add_breakpoint(xval, y0 / y1) except ZeroDivisionError: new_func.add_breakpoint(xval, 0) return new_func
class Node(BaseNode, Mapping): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.rest = None def _select(self, key): for k, v in reversed(list(self.values.items())): if k <= key: return v return self.rest def _insert(self, key, value): result = self._select(key)._insert(key, value) self.changed = True if result is None: return key, other = result return super()._insert(key, other) def _split(self): other = LazyNode(node=Node(tree=self.tree, changed=True), tree=self.tree) values = self.values.items() self.values = SortedDict(values[:len(values) // 2]) other.values = SortedDict(values[len(values) // 2:]) key, value = other.values.popitem(last=False) other.rest = value return (key, other) def _commit(self): self.rest._commit() for child in self.values.values(): child._commit() data = packb({ 'rest': self.rest.offset, 'values': {k: v.offset for k, v in self.values.items()}, }) self.tree.chunk.write(ChunkId.Node, data) return self.tree.chunk.tell() def __getitem__(self, key): return self._select(key)[key] def __len__(self): return sum([len(value) for child in self.values.values() + \ len(self.rest)]) def __iter__(self): for key in self.rest: yield key for child in self.values.values(): for key in child: yield key
def validate(self, protocol_name, subset='development', aggregate=False, every=1, start=0): # prepare paths validate_dir = self.VALIDATE_DIR.format(train_dir=self.train_dir_, protocol=protocol_name) validate_txt = self.VALIDATE_TXT.format( validate_dir=validate_dir, subset=subset, aggregate='aggregate.' if aggregate else '') validate_png = self.VALIDATE_PNG.format( validate_dir=validate_dir, subset=subset, aggregate='aggregate.' if aggregate else '') validate_eps = self.VALIDATE_EPS.format( validate_dir=validate_dir, subset=subset, aggregate='aggregate.' if aggregate else '') # create validation directory mkdir_p(validate_dir) # Build validation set if aggregate: X, n, y = self._validation_set_z(protocol_name, subset=subset) else: X, y = self._validation_set_y(protocol_name, subset=subset) # list of equal error rates, and epoch to process eers, epoch = SortedDict(), start desc_format = ('Best EER = {best_eer:.2f}% @ epoch #{best_epoch:d} ::' ' EER = {eer:.2f}% @ epoch #{epoch:d} :') progress_bar = tqdm(unit='epoch') with open(validate_txt, mode='w') as fp: # watch and evaluate forever while True: # last completed epochs completed_epochs = self.get_epochs(self.train_dir_) - 1 if completed_epochs < epoch: time.sleep(60) continue # if last completed epoch has already been processed # go back to first epoch that hasn't been processed yet process_epoch = epoch if completed_epochs in eers \ else completed_epochs # do not validate this epoch if it has been done before... if process_epoch == epoch and epoch in eers: epoch += every progress_bar.update(every) continue weights_h5 = LoggingCallback.WEIGHTS_H5.format( log_dir=self.train_dir_, epoch=process_epoch) # this is needed for corner case when training is started from # an epoch > 0 if not isfile(weights_h5): time.sleep(60) continue # sleep 5 seconds to let the checkpoint callback finish time.sleep(5) embedding = keras.models.load_model( weights_h5, custom_objects=CUSTOM_OBJECTS, compile=False) if aggregate: def embed(X): func = K.function([ embedding.get_layer(name='input').input, K.learning_phase() ], [embedding.get_layer(name='internal').output]) return func([X, 0])[0] else: embed = embedding.predict # embed all validation sequences fX = embed(X) if aggregate: indices = np.hstack([[0], np.cumsum(n)]) fX = np.stack([ np.sum(np.sum(fX[i:j], axis=0), axis=0) for i, j in pairwise(indices) ]) fX = l2_normalize(fX) # compute pairwise distances y_pred = pdist(fX, metric=self.approach_.metric) # compute pairwise groundtruth y_true = pdist(y, metric='chebyshev') < 1 # estimate equal error rate _, _, _, eer = det_curve(y_true, y_pred, distances=True) eers[process_epoch] = eer # save equal error rate to file fp.write( self.VALIDATE_TXT_TEMPLATE.format(epoch=process_epoch, eer=eer)) fp.flush() # keep track of best epoch so far best_epoch = eers.iloc[np.argmin(eers.values())] best_eer = eers[best_epoch] progress_bar.set_description( desc_format.format(epoch=process_epoch, eer=100 * eer, best_epoch=best_epoch, best_eer=100 * best_eer)) # plot fig = plt.figure() plt.plot(eers.keys(), eers.values(), 'b') plt.plot([best_epoch], [best_eer], 'bo') plt.plot([eers.iloc[0], eers.iloc[-1]], [best_eer, best_eer], 'k--') plt.grid(True) plt.xlabel('epoch') plt.ylabel('EER on {subset}'.format(subset=subset)) TITLE = '{best_eer:.5g} @ epoch #{best_epoch:d}' title = TITLE.format(best_eer=best_eer, best_epoch=best_epoch, subset=subset) plt.title(title) plt.tight_layout() plt.savefig(validate_png, dpi=75) plt.savefig(validate_eps) plt.close(fig) # go to next epoch if epoch == process_epoch: epoch += every progress_bar.update(every) else: progress_bar.update(0) progress_bar.close()
class WordData(QObject): # Define the signal we emit when we have loaded new data WordsUpdated = pyqtSignal() def __init__(self, my_book): super().__init__(None) # Save reference to the book self.my_book = my_book # Save reference to the metamanager self.metamgr = my_book.get_meta_manager() # Save reference to the edited document self.document = my_book.get_edit_model() # Save reference to a speller, which will be the default # at this point. self.speller = my_book.get_speller() # The vocabulary list as a sorted dict. self.vocab = SortedDict() # Key and Values views on the vocab list for indexing by table row. self.vocab_kview = self.vocab.keys() self.vocab_vview = self.vocab.values() # The count of available words based on the latest sort self.active_word_count = 0 # The good- and bad-words sets and the scannos set. self.good_words = set() self.bad_words = set() self.scannos = set() # A dict of words that use an alt-dict tag. The key is a word and the # value is the alt-dict tag string. self.alt_tags = SortedDict() # Cached sort vectors, see get_sort_vector() self.sort_up_vectors = [None, None, None] self.sort_down_vectors = [None, None, None] self.sort_key_funcs = [None, None, None] # Register metadata readers and writers. self.metamgr.register(C.MD_GW, self.good_read, self.good_save) self.metamgr.register(C.MD_BW, self.bad_read, self.bad_save) self.metamgr.register(C.MD_SC, self.scanno_read, self.scanno_save) self.metamgr.register(C.MD_VL, self.word_read, self.word_save) # End of __init__ # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # Methods used when saving metadata. The items in the good_words, # bad_words, and scanno sets are simply returned as a list of strings. # def good_save(self, section) : return [ token for token in self.good_words ] def bad_save(self, section) : return [ token for token in self.bad_words ] def scanno_save(self, section) : return [ token for token in self.scannos ] # # To save the vocabulary, write a list for each word: # [ "token", "tag", count, [prop-code...] ] # where "token" is the word as a string, "tag" is its alt-dict tag # or a null string, count is an integer and [prop-code...] is the # integer values from the word's property set as a list. Note that # alt_tag needs to be a string because json doesn't handle None. # def word_save(self, section) : vlist = [] for word in self.vocab: [count, prop_set] = self.vocab[word] #tag = "" if AD not in prop_set else self.alt_tags[word] tag = "" if AD in prop_set : if word in self.alt_tags : tag = self.alt_tags[word] else : # should never occur, could be assertion error worddata_logger.error( 'erroneous alt tag on ' + word ) plist = list(prop_set) vlist.append( [ word, count, tag, plist ] ) return vlist # # Methods used to load metadata. Called by the metadata manager with # a single Python object, presumably the object that was prepared by # the matching _save method above. Because the user might edit the metadata # file, do a little quality control. # def good_read(self, section, value, version): if isinstance(value, list) : for token in value : if isinstance(token, str) : if token in self.bad_words : worddata_logger.warn( '"{}" is in both good and bad words - use in good ignored'.format(token) ) else : self.good_words.add(token) if token in self.vocab : # vocab already loaded, it seems props = self.vocab[token][1] props.add(GW) props &= prop_nox else : worddata_logger.error( '{} in GOODWORDS list ignored'.format(token) ) if len(self.good_words) : # We loaded some, the display might need to change self.WordsUpdated.emit() else : worddata_logger.error( 'GOODWORDS metadata is not a list of strings, ignoring it' ) def bad_read(self, section, value, version): if isinstance(value, list) : for token in value : if isinstance(token, str) : if token in self.good_words : worddata_logger.warn( '"{}" is in both good and bad words - use in bad ignored'.format(token) ) else : self.bad_words.add(token) if token in self.vocab : # vocab already loaded, it seems props = self.vocab[token][1] props.add(BW) props.add(XX) else : worddata_logger.error( '{} in BADWORDS list ignored'.format(token) ) if len(self.bad_words) : # We loaded some, the display might need to change self.WordsUpdated.emit() else : worddata_logger.error( 'BADWORDS metadata is not a list of strings, ignoring it' ) def scanno_read(self, section, value, version): if isinstance(value, list) : for token in value : if isinstance(token, str) : self.scannos.add(token) else : worddata_logger.error( '{} in SCANNOLIST ignored'.format(token) ) else : worddata_logger.error( 'SCANNOLIST metadata is not a list of strings, ignoring it' ) # Load the vocabulary section of a metadata file, allowing for # user-edited malformed items. Be very generous about user errors in a # modified meta file. The expected value for each word is as written by # word_save() above, ["token", count, tag, [props]] but allow a single # item ["token"] or just "token" so the user can put in a single word # with no count or properties. Convert null-string alt-tag to None. # # Before adding a word make sure to unicode-flatten it. # def word_read(self, section, value, version) : global PROP_ALL, prop_nox # get a new speller in case the Book read a different dict already self.speller = self.my_book.get_speller() # if value isn't a list, bail out now if not isinstance(value,list): worddata_logger.error( 'WORDCENSUS metadata is not a list, ignoring it' ) return # inspect each item of the list. for wlist in value: try : if isinstance(wlist,str) : # expand "token" to ["token"] wlist = [wlist] if not isinstance(wlist, list) : raise ValueError if len(wlist) != 4 : if len(wlist) > 4 :raise ValueError if len(wlist) == 1 : wlist.append(0) # add default count of 0 if len(wlist) == 2 : wlist.append('') # add default alt-tag if len(wlist) == 3 : wlist.append([]) # add default props word = wlist[0] if not isinstance(word,str) : raise ValueError word = unicodedata.normalize('NFKC',word) count = int(wlist[1]) # exception if not numeric alt_tag = wlist[2] if not isinstance(alt_tag,str) : raise ValueError if alt_tag == '' : alt_tag = None prop_set = set(wlist[3]) # exception if not iterable if len( prop_set - PROP_ALL ) : raise ValueError #bogus props except : worddata_logger.error( 'WORDCENSUS item {} is invalid, ignoring it'.format(wlist) ) continue # checking done, store the word. if (0 == len(prop_set)) or (0 == count) : # word with no properties or count is a user addition, enter # it as if we found it in the file, including deducing the # properties, spell-check, hyphenation split. self._add_token(word, alt_tag) continue # that's that, on to next line # Assume we have a word saved by word_save(), but possibly the # good_words and bad_words have been edited and read-in first. # Note we are not checking for duplicates if word in self.bad_words : prop_set.add(BW) prop_set.add(XX) if word in self.good_words : prop_set.add(GW) prop_set &= prop_nox if alt_tag : prop_set.add(AD) self.alt_tags[word] = alt_tag self.vocab[word] = [count, prop_set] # end of "for wlist in value" # note the current word count self.active_word_count = len(self.vocab) # Tell wordview that the display might need to change self.WordsUpdated.emit() # end of word_read() # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # Methods used when opening a new file, one with no metadata. # # The Book will call these methods passing a text stream when it finds a # good-words file or bad-words file. Each of these is expected to have # one token per line. We don't presume to know in what order the files # are presented, but we DO assume that the vocabulary census has not yet # been taken. That requires the user clicking Refresh and that cannot # have happened while first opening the file. def good_file(self, stream) : while not stream.atEnd() : token = stream.readLine().strip() if token in self.bad_words : worddata_logger.warn( '"{}" is in both good and bad words - use in good ignored'.format(token) ) else : self.good_words.add(token) def bad_file(self, stream) : while not stream.atEnd() : token = stream.readLine().strip() if token in self.good_words : worddata_logger.warn( '"{}" is in both good and bad words - use in bad ignored'.format(token) ) else : self.bad_words.add(token) # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # # The user can choose a new scannos file any time while editing. So there # might be existing data, so we clear the set before reading. # def scanno_file(self, stream) : self.scannos = set() # clear any prior values while not stream.atEnd() : token = stream.readLine().strip() self.scannos.add(token) # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # The following is called by the Book when the user chooses a different # spelling dictionary. Store a new spellcheck object. Recheck the # spelling of all words except those with properties HY, GW, or BW. # # NOTE IF THIS IS A PERFORMANCE BURDEN, KILL IT AND REQUIRE REFRESH # def recheck_spelling(self, speller): global PROP_BGH, prop_nox self.speller = speller for i in range(len(self.vocab)) : (c, p) = self.vocab_vview[i] if not( PROP_BGH & p ) : # then p lacks BW, GW and HY p = p & prop_nox # and now it also lacks XX w = self.vocab_kview[i] t = self.alt_tags.get(w,None) if not self.speller.check(w,t): p.add(XX) self.vocab_vview[i][1] = p # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # Method to perform a census. This is called from wordview when the # user clicks the Refresh button asking for a new scan over all words in # the book. Formerly this took a progress bar, but the actual operation # is so fast no progress need be shown. # def refresh(self): global RE_LANG_ATTR, RE_TOKEN count = 0 end_count = self.document.blockCount() # get a reference to the dictionary to use self.speller = self.my_book.get_speller() # clear the alt-dict list. self.alt_tags = SortedDict() # clear the sort vectors self.sort_up_vectors = [None, None, None] self.sort_down_vectors = [None, None, None] self.sort_key_funcs = [None, None, None] # Zero out all counts and property sets that we have so far. We will # develop new properties when each word is first seen. Properties # such as HY will not have changed, but both AD and XX might have # changed while the word text remains the same. for j in range(len(self.vocab)) : self.vocab_vview[j][0] = 0 self.vocab_vview[j][1] = set() # iterate over all lines extracting tokens and processing them. alt_dict = None alt_tag = None for line in self.document.all_lines(): count += 1 j = 0 m = RE_TOKEN.search(line,0) while m : # while match is not None if m.group(6) : # start-tag; has it lang= ? d = RE_LANG_ATTR.search(m.group(8)) if d : alt_dict = d.group(1) alt_tag = m.group(7) elif m.group(9) : if m.group(10) == alt_tag : # end tag of a lang= start tag alt_dict = None alt_tag = None else : self._add_token(m.group(0),alt_dict) j = m.end() m = RE_TOKEN.search(line,j) # Look for zero counts and delete those items. It is forbidden to # alter the dict contents while iterating over values or keys views, # so make a list of the word tokens to be deleted, then use del. togo = [] for j in range(len(self.vocab)) : if self.vocab_vview[j][0] == 0 : togo.append(self.vocab_kview[j]) for key in togo: del self.vocab[key] # Update possibly modified word count self.active_word_count = len(self.vocab) # Internal method for adding a possibly-hyphenated token to the vocabulary, # incrementing its count. This is used during the census/refresh scan, and # can be called from word_read to process a user-added word. # Arguments: # tok_str: a normalized word-like token; may be hyphenated a/o apostrophized # dic_tag: an alternate dictionary tag or None # # If the token has no hyphens, this is just a cover on _count. When the # token is hyphenated, we enter each part of it alone, then add the # phrase with the union of the prop_sets of its parts, plus HY. Thus # "mother-in-law's" will be added as "mother", "in" and "law's", and as # itself with HY, LC, AP. "1989-1995" puts 1989 and 1995 in the list and # will have HY and ND. Yes, this means that a hyphenation could have all # of UC, MC and LC. # # If a part of a phrase fails spellcheck, it will have XX but we do not # propogate that to the phrase itself. # # If a part of the phrase has AD (because it was previously entered as # part of a lang= string) that also is not propogated to the phrase # itself. Since hyphenated phrases are never spell-checked, they should # never have AD. # # Note: en-dash \u2013 is not supported here, only the ascii hyphen. # Support for it could be added if required. # # Defensive programming: '-'.split('-') --> ['','']; '-9'.split('-') --> ['','9'] def _add_token(self, tok_str, dic_tag ) : global prop_nox # Count the entire token regardless of hyphens self._count(tok_str, dic_tag) # this definitely puts it in the dict [count, prop_set] = self.vocab[tok_str] if (count == 1) and (HY in prop_set) : # We just added a hyphenated token: add its parts also. parts = tok_str.split('-') prop_set = {HY} for member in parts : if len(member) : # if not null split from leading - self._count(member, dic_tag) [x, part_props] = self.vocab[member] prop_set |= part_props self.vocab[tok_str] = [count, prop_set - {XX, AD} ] # Internal method to count a token, adding it to the list if necessary. # An /alt-tag must already be removed. The word must be already # normalized. Because of the way we tokenize, we know the token contains # only letter forms, numeric forms, and possibly hyphens and/or # apostrophes. # # If it is in the list, increment its count. Otherwise, compute its # properties, including spellcheck for non-hyphenated tokens, and # add it to the vocabulary with a count of 1. Returns nothing. def _count(self, word, dic_tag ) : [count, prop_set] = self.vocab.get( word, [0,set()] ) if count : # it was in the list: a new word would have count=0 self.vocab[word][0] += 1 # increment its count return # and done. # Word was not in the list (but is now): count is 0, prop_set is empty. # The following is only done once per unique word. self.my_book.metadata_modified(True, C.MD_MOD_FLAG) work = word[:] # copy the word, we may modify it next. if work.startswith("Point"): pass # debug # If word has apostrophes, note that and delete for following tests. if -1 < work.find("'") : # look for ascii apostrophe prop_set.add(AP) work = work.replace("'","") if -1 < work.find('\u02bc') : # look for MODIFIER LETTER APOSTROPHE prop_set.add(AP) work = work.replace('\u02bc','') # If word has hyphens, note that and remove them. if -1 < work.find('-') : prop_set.add(HY) work = work.replace('-','') # With the hyphens and apostrophes out, check letter case if ANY_DIGIT.search( work ) : # word has at least one numeric prop_set.add(ND) if not work.isnumeric() : # word is not all-numeric, determine case of letters if work.lower() == work : prop_set.add(LC) # most common case elif work.upper() != work : prop_set.add(MC) # next most common case else : # work.upper() == work prop_set.add(UC) if HY not in prop_set : # word is not hyphenated, so check its spelling. if word not in self.good_words : if word not in self.bad_words : # Word in neither good- nor bad-words if dic_tag : # uses an alt dictionary self.alt_tags[word] = dic_tag prop_set.add(AD) if not self.speller.check(word, dic_tag) : prop_set.add(XX) else : # in bad-words prop_set.add(XX) # else in good-words # else hyphenated, spellcheck only its parts self.vocab[word] = [1, prop_set] # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # # The following methods are called from the Words panel. # # Get the count of words in the vocabulary, as selected by the # latest sort vector. # def word_count(self): return self.active_word_count # # Get the actual size of the vocabulary, for searching it all. def vocab_count(self): return len(self.vocab) # # Get the word at position n in the vocabulary, using the SortedDict # KeysView for O(1) lookup time. Guard against invalid indices. # def word_at(self, n): try: return self.vocab_kview[n] except Exception as whatever: worddata_logger.error('bad call to word_at({0})'.format(n)) return ('?') # # Get the count and/or property-set of the word at position n in the # vocabulary, using the SortedDict ValuesView for O(1) lookup time. # def word_info_at(self, n): try: return self.vocab_vview[n] except Exception as whatever: worddata_logger.error('bad call to word_count_at({0})'.format(n)) return [0, set()] def word_count_at(self, n): try: return self.vocab_vview[n][0] except Exception as whatever: worddata_logger.error('bad call to word_count_at({0})'.format(n)) return 0 def word_props_at(self, n): try: return self.vocab_vview[n][1] except Exception as whatever: worddata_logger.error('bad call to word_props_at({0})'.format(n)) return (set()) # # Return a sort vector to implement column-sorting and/or filtering. The # returned value is a list of index numbers to self.vocab_vview and # vocab_kview such that iterating over the list selects vocabulary items # in some order. The parameters are: # # col is the number of the table column, 0:word, 1:count, 2:properties. # The sort key is formed based on the column: # 0: key is the word-token # 1: key is nnnnnnword-token so that words with the same count are # in sequence. # 2: fffffffword-token so that words with the same props are in sequence. # # order is Qt.AscendingOrder or Qt.DescendingOrder # # key_func is a callable used to extract or condition the key value when # a new key is added to a SortedDict, usually created by natsort.keygen() # and used to implement locale-aware and case-independent sorting. # # filter_func is a callable that examines a vocab entry and returns # True or False, meaning include or omit this entry from the vector. # Used to implement property filters or harmonic-sets. # # To implement Descending order we return a reversed() version of the # matching Ascending order vector. # # Because vectors are expensive to make, we cache them, so that to # return to a previous sort order takes near zero time. However we can't # cache every variation of a filtered vector, so when a filter_func is # passed we make the vector every time. # def _make_key_getter(self, col) : if col == 0 : return lambda j : self.vocab_kview[j] elif col == 1 : return lambda j : '{:05}:{}'.format( self.vocab_vview[j][0], self.vocab_kview[j] ) else : # col == 2 return lambda j : prop_string(self.vocab_vview[j][1]) + self.vocab_kview[j] def get_sort_vector( self, col, order, key_func = None, filter_func = None ) : if filter_func : # is not None, # create a sort vector from scratch, filtered getter_func = self._make_key_getter( col ) sorted_dict = SortedDict( key_func ) for j in range( len( self.vocab ) ) : if filter_func( self.vocab_kview[j], self.vocab_vview[j][1] ) : k = getter_func( j ) sorted_dict[ k ] = j vector = sorted_dict.values() if order != Qt.AscendingOrder : vector = [j for j in reversed( vector ) ] else : # no filter_func, try to reuse a cached vector vector = self.sort_up_vectors[ col ] if not vector or key_func is not self.sort_key_funcs[ col ] : # there is no ascending vector for this column, or there # is one but it was made with a different key_func. getter_func = self._make_key_getter( col ) sorted_dict = SortedDict( key_func ) for j in range( len( self.vocab ) ) : k = getter_func( j ) sorted_dict[ k ] = j vector = self.sort_up_vectors[ col ] = sorted_dict.values() self.sort_key_funcs[ col ] = key_func if order != Qt.AscendingOrder : # what is wanted is a descending order vector, do we have one? if self.sort_down_vectors[ col ] is None : # no, so create one from the asc. vector we now have self.sort_down_vectors[ col ] = [ j for j in reversed( vector ) ] # yes we do (now) vector = self.sort_down_vectors[ col ] # one way or another, vector is a sort vector # note the actual word count available through that vector self.active_word_count = len(vector) return vector # Return a reference to the good-words set def get_good_set(self): return self.good_words # Note the addition of a word to the good-words set. The word probably # (but does not have to) exist in the database; add GW and remove XX from # its properties. def add_to_good_set(self, word): self.good_words.add(word) if word in self.vocab_kview : [count, pset] = self.vocab[word] pset.add(GW) pset -= set([XX]) # conditional .remove() self.vocab[word] = [count,pset] # Note the removal of a word from the good-words set. The word exists in # the good-words set, because the wordview panel good-words list only # calls this for words it is displaying. The word may or may not exist in # the database. If it does, remove GW and set XX based on a spellcheck # test. def del_from_good_set(self, word): self.good_words.remove(word) if word in self.vocab_kview : [count, pset] = self.vocab[word] pset -= set([GW,XX]) dic_tag = self.alt_tags.get(word) if not self.speller.check(word, dic_tag) : pset.add(XX) self.vocab[word] = [count, pset] # mostly used by unit test, get the index of a word by its key def word_index(self, w): try: return self.vocab_kview.index(w) except Exception as whatever: worddata_logger.error('bad call to word_index({0})'.format(w)) return -1 # The following methods are used by the edit syntax highlighter to set flags. # # 1. Check a token for spelling. We expect the vast majority of words # will be in the list. And for performance, we want to respond in as little # code as possible! So if we know the word, reply at once. # # 2. If the word in the document isn't in the vocab, perhaps it is not # a normalized string, so try again, normalized. # # 3 If the token is not in the list, add it to the vocabulary with null # properties (to speed up repeat calls) and return False, meaning it is # not misspelled. The opposite, returning True for misspelled, in a new # book before Refresh is done, would highlight everything. # def spelling_test(self, tok_str) : count, prop_set = self.vocab.get(tok_str,[0,set()]) if count : # it was in the list return XX in prop_set tok_nlz = unicodedata.normalize('NFKC',tok_str) [count, prop_set] = self.vocab.get(tok_nlz,[0,set()]) return XX in prop_set # # 2. Check a token for being in the scannos list. If no scannos # have been loaded, none will be hilited. # def scanno_test(self, tok_str) : return tok_str in self.scannos
def apply(self, protocol_name, output_dir, step=None, internal=False): # load best performing model with open(self.validate_txt_, 'r') as fp: eers = SortedDict(np.loadtxt(fp)) best_epoch = int(eers.iloc[np.argmin(eers.values())]) embedding = SequenceEmbeddingAutograd.load(self.train_dir_, best_epoch) # guess sequence duration from path (.../3.2+0.8/...) directory = basename(dirname(self.experiment_dir)) duration, _, _, _ = self._directory_to_params(directory) if step is None: step = 0.5 * duration # initialize embedding extraction batch_size = self.approach_.batch_size extraction = Extraction(embedding, self.feature_extraction_, duration, step=step, batch_size=batch_size, internal=internal) sliding_window = extraction.sliding_window dimension = extraction.dimension # create metadata file at root that contains # sliding window and dimension information path = Precomputed.get_config_path(output_dir) mkdir_p(dirname(path)) f = h5py.File(path) f.attrs['start'] = sliding_window.start f.attrs['duration'] = sliding_window.duration f.attrs['step'] = sliding_window.step f.attrs['dimension'] = dimension f.close() # file generator protocol = get_protocol(protocol_name, progress=True, preprocessors=self.preprocessors_) for subset in ['development', 'test', 'train']: try: file_generator = getattr(protocol, subset)() first_item = next(file_generator) except NotImplementedError as e: continue file_generator = getattr(protocol, subset)() for current_file in file_generator: fX = extraction.apply(current_file) path = Precomputed.get_path(output_dir, current_file) mkdir_p(dirname(path)) f = h5py.File(path) f.attrs['start'] = sliding_window.start f.attrs['duration'] = sliding_window.duration f.attrs['step'] = sliding_window.step f.attrs['dimension'] = dimension f.create_dataset('features', data=fX.data) f.close()
class KeyedRegion: """ KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in this region overlap with another variable in this region. Registers and function frames can all be viewed as a keyed region. """ __slots__ = ('_storage', '_object_mapping', '_phi_node_contains' ) def __init__(self, tree=None, phi_node_contains=None): self._storage = SortedDict() if tree is None else tree self._object_mapping = weakref.WeakValueDictionary() self._phi_node_contains = phi_node_contains def __getstate__(self): return self._storage, dict(self._object_mapping), self._phi_node_contains def __setstate__(self, s): self._storage, om, self._phi_node_contains = s self._object_mapping = weakref.WeakValueDictionary(om) def _get_container(self, offset): try: base_offset = next(self._storage.irange(maximum=offset, reverse=True)) except StopIteration: return offset, None else: container = self._storage[base_offset] if container.includes(offset): return base_offset, container return offset, None def __contains__(self, offset): """ Test if there is at least one variable covering the given offset. :param offset: :return: """ if type(offset) is not int: raise TypeError("KeyedRegion only accepts concrete offsets.") return self._get_container(offset)[1] is not None def __len__(self): return len(self._storage) def __iter__(self): return iter(self._storage.values()) def __eq__(self, other): if set(self._storage.keys()) != set(other._storage.keys()): return False for k, v in self._storage.items(): if v != other._storage[k]: return False return True def copy(self): if not self._storage: return KeyedRegion(phi_node_contains=self._phi_node_contains) kr = KeyedRegion(phi_node_contains=self._phi_node_contains) for key, ro in self._storage.items(): kr._storage[key] = ro.copy() kr._object_mapping = self._object_mapping.copy() return kr def merge(self, other, replacements=None): """ Merge another KeyedRegion into this KeyedRegion. :param KeyedRegion other: The other instance to merge with. :return: None """ # TODO: is the current solution not optimal enough? for _, item in other._storage.items(): # type: RegionObject for so in item.stored_objects: # type: StoredObject if replacements and so.obj in replacements: so = StoredObject(so.start, replacements[so.obj], so.size) self._object_mapping[so.obj_id] = so self.__store(so, overwrite=False) return self def replace(self, replacements): """ Replace variables with other variables. :param dict replacements: A dict of variable replacements. :return: self """ for old_var, new_var in replacements.items(): old_var_id = id(old_var) if old_var_id in self._object_mapping: # FIXME: we need to check if old_var still exists in the storage old_so = self._object_mapping[old_var_id] # type: StoredObject self._store(old_so.start, new_var, old_so.size, overwrite=True) return self def dbg_repr(self): """ Get a debugging representation of this keyed region. :return: A string of debugging output. """ keys = self._storage.keys() offset_to_vars = { } for key in sorted(keys): ro = self._storage[key] variables = [ obj.obj for obj in ro.stored_objects ] offset_to_vars[key] = variables s = [ ] for offset, variables in offset_to_vars.items(): s.append("Offset %#x: %s" % (offset, variables)) return "\n".join(s) def add_variable(self, start, variable): """ Add a variable to this region at the given offset. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.add_object(start, variable, size) def add_object(self, start, obj, object_size): """ Add/Store an object to this region at the given offset. :param start: :param obj: :param int object_size: Size of the object :return: """ self._store(start, obj, object_size, overwrite=False) def set_variable(self, start, variable): """ Add a variable to this region at the given offset, and remove all other variables that are fully covered by this variable. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.set_object(start, variable, size) def set_object(self, start, obj, object_size): """ Add an object to this region at the given offset, and remove all other objects that are fully covered by this object. :param start: :param obj: :param object_size: :return: """ self._store(start, obj, object_size, overwrite=True) def get_base_addr(self, addr): """ Get the base offset (the key we are using to index objects covering the given offset) of a specific offset. :param int addr: :return: :rtype: int or None """ base_addr, container = self._get_container(addr) if container is None: return None else: return base_addr def get_variables_by_offset(self, start): """ Find variables covering the given region offset. :param int start: :return: A list of stack variables. :rtype: set """ _, container = self._get_container(start) if container is None: return [] else: return container.internal_objects def get_objects_by_offset(self, start): """ Find objects covering the given region offset. :param start: :return: """ _, container = self._get_container(start) if container is None: return set() else: return container.internal_objects # # Private methods # def _store(self, start, obj, size, overwrite=False): """ Store a variable into the storage. :param int start: The beginning address of the variable. :param obj: The object to store. :param int size: Size of the object to store. :param bool overwrite: Whether existing objects should be overwritten or not. :return: None """ stored_object = StoredObject(start, obj, size) self._object_mapping[stored_object.obj_id] = stored_object self.__store(stored_object, overwrite=overwrite) def __store(self, stored_object, overwrite=False): """ Store a variable into the storage. :param StoredObject stored_object: The descriptor describing start address and the variable. :param bool overwrite: Whether existing objects should be overwritten or not. True to make a strong update, False to make a weak update. :return: None """ start = stored_object.start object_size = stored_object.size end = start + object_size # region items in the middle overlapping_items = list(self._storage.irange(start, end-1)) # is there a region item that begins before the start and overlaps with this variable? floor_key, floor_item = self._get_container(start) if floor_item is not None and floor_key not in overlapping_items: # insert it into the beginning overlapping_items.insert(0, floor_key) # scan through the entire list of region items, split existing regions and insert new regions as needed to_update = {start: RegionObject(start, object_size, {stored_object})} last_end = start for floor_key in overlapping_items: item = self._storage[floor_key] if item.start < start: # we need to break this item into two a, b = item.split(start) if overwrite: b.set_object(stored_object) else: self._add_object_with_check(b, stored_object) to_update[a.start] = a to_update[b.start] = b last_end = b.end elif item.start > last_end: # there is a gap between the last item and the current item # fill in the gap new_item = RegionObject(last_end, item.start - last_end, {stored_object}) to_update[new_item.start] = new_item last_end = new_item.end elif item.end > end: # we need to split this item into two a, b = item.split(end) if overwrite: a.set_object(stored_object) else: self._add_object_with_check(a, stored_object) to_update[a.start] = a to_update[b.start] = b last_end = b.end else: if overwrite: item.set_object(stored_object) else: self._add_object_with_check(item, stored_object) to_update[item.start] = item self._storage.update(to_update) def _is_overlapping(self, start, variable): if variable.size is not None: # make sure this variable does not overlap with any other variable end = start + variable.size try: prev_offset = next(self._storage.irange(maximum=end-1, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: if start <= prev_offset < end: return True prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if start < prev_offset + prev_item_size < end: return True else: try: prev_offset = next(self._storage.irange(maximum=start, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if prev_offset <= start < prev_offset + prev_item_size: return True return False def _add_object_with_check(self, item, stored_object): if len({stored_object.obj} | item.internal_objects) > 1: if self._phi_node_contains is not None: # check if `item` is a phi node that contains stored_object.obj for so in item.internal_objects: if self._phi_node_contains(so, stored_object.obj): # yes! so we want to skip this object return # check if `stored_object.obj` is a phi node that contains item.internal_objects if all(self._phi_node_contains(stored_object.obj, o) for o in item.internal_objects): # yes! item.set_object(stored_object) return l.warning("Overlapping objects %s.", str({stored_object.obj} | item.internal_objects)) # import ipdb; ipdb.set_trace() item.add_object(stored_object)
def generate_graphs(self, show=False): filename = "task_arrival_{0}.png".format(self.workload_name) if os.path.isfile(os.path.join(self.folder, filename)): return filename fig = plt.figure(figsize=(9, 7)) granularity_order = ["Second", "Minute", "Hour", "Day"] granularity_lambdas = { "Second": 1000, "Minute": 60 * 1000, "Hour": 60 * 60 * 1000, "Day": 60 * 60 * 24 * 1000, } plot_count = 0 for granularity in granularity_order: task_arrivals = SortedDict() df = self.df.withColumn( 'ts_submit', F.col('ts_submit') / granularity_lambdas[granularity]) df = df.withColumn('ts_submit', F.col('ts_submit').cast(T.LongType())) submit_times = df.groupBy("ts_submit").count().toPandas() for task in submit_times.itertuples(): submit_time = int(task.ts_submit) if submit_time not in task_arrivals: task_arrivals[submit_time] = 0 task_arrivals[submit_time] += task.count ax = plt.subplot2grid( (2, 2), (int(math.floor(plot_count / 2)), (plot_count % 2))) if max(task_arrivals.keys()) >= 1: ax.plot(task_arrivals.keys(), task_arrivals.values(), color="black", linewidth=1.0) ax.grid(True) else: ax.text(0.5, 0.5, 'Not available;\nTrace too small.', horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=16) ax.grid(False) # Rotates and right aligns the x labels, and moves the bottom of the # axes up to make room for them # fig.autofmt_xdate() ax.set_xlim(0) ax.set_ylim(0) ax.locator_params(nbins=3, axis='y') ax.margins(0.05) ax.tick_params(axis='both', which='major', labelsize=16) ax.tick_params(axis='both', which='minor', labelsize=14) ax.get_xaxis().get_offset_text().set_visible(False) formatter = ScalarFormatter(useMathText=True) formatter.set_powerlimits((-4, 5)) ax.get_xaxis().set_major_formatter(formatter) fig.tight_layout( ) # Need to set this to be able to get the offset... for whatever reason offset_text = ax.get_xaxis().get_major_formatter().get_offset() ax.set_xlabel('Time{0} [{1}]'.format( f' {offset_text}' if len(offset_text) else "", granularity.lower()), fontsize=18) ax.set_ylabel('Number of Tasks', fontsize=18) plot_count += 1 fig.tight_layout() fig.savefig(os.path.join(self.folder, filename), dpi=600, format='png') if show: fig.show() return filename