def test_difference_operator(): minuend = IntervalTree.from_tuples(data.ivs1.data) assert isinstance(minuend, IntervalTree) subtrahend = minuend.copy() expected_difference = IntervalTree([subtrahend.pop()]) expected_difference.add(subtrahend.pop()) minuend.verify() subtrahend.verify() expected_difference.verify() assert len(expected_difference) == len(minuend) - len(subtrahend) for iv in expected_difference: assert iv not in subtrahend assert iv in minuend difference = minuend - subtrahend difference.verify() for iv in difference: assert iv not in subtrahend assert iv in minuend assert iv in expected_difference assert difference == expected_difference
def test_insert(): tree = IntervalTree() tree[0:1] = "data" assert len(tree) == 1 assert tree.items() == set([Interval(0, 1, "data")]) tree.add(Interval(10, 20)) assert len(tree) == 2 assert tree.items() == set([Interval(0, 1, "data"), Interval(10, 20)]) tree.addi(19.9, 20) assert len(tree) == 3 assert tree.items() == set([ Interval(0, 1, "data"), Interval(19.9, 20), Interval(10, 20), ]) tree.update([Interval(19.9, 20.1), Interval(20.1, 30)]) assert len(tree) == 5 assert tree.items() == set([ Interval(0, 1, "data"), Interval(19.9, 20), Interval(10, 20), Interval(19.9, 20.1), Interval(20.1, 30), ])
def test_add_descending(self, ivs): if self.verbose: pbar = ProgressBar(len(ivs)) t = IntervalTree() for iv in sorted(ivs, reverse=True): t.add(iv) if self.verbose: pbar() return t
def get_known_meanings(self,w=None,option=None): if w is None: new_tree = IntervalTree() data = None begin = 0 for iv in sorted(self._content_coding): m1 = (iv.end-iv.begin)/2. if self.get_known_words(m1,option='max') != data: if data: new_tree.add(Interval(begin, iv.end)) begin = iv.end data = self.get_known_words(m1,option='max') return new_tree elif w in list(self._content_decoding.keys()): return self._content_decoding[w] else: return []
def test_duplicate_insert(): tree = IntervalTree() # string data tree[-10:20] = "arbitrary data" contents = frozenset([Interval(-10, 20, "arbitrary data")]) assert len(tree) == 1 assert tree.items() == contents tree.addi(-10, 20, "arbitrary data") assert len(tree) == 1 assert tree.items() == contents tree.add(Interval(-10, 20, "arbitrary data")) assert len(tree) == 1 assert tree.items() == contents tree.update([Interval(-10, 20, "arbitrary data")]) assert len(tree) == 1 assert tree.items() == contents # None data tree[-10:20] = None contents = frozenset([ Interval(-10, 20), Interval(-10, 20, "arbitrary data"), ]) assert len(tree) == 2 assert tree.items() == contents tree.addi(-10, 20) assert len(tree) == 2 assert tree.items() == contents tree.add(Interval(-10, 20)) assert len(tree) == 2 assert tree.items() == contents tree.update([Interval(-10, 20), Interval(-10, 20, "arbitrary data")]) assert len(tree) == 2 assert tree.items() == contents
def unconsumed_ranges(self): """Return an IntervalTree of unconsumed ranges, of the format (start, end] with the end value not being included """ res = IntervalTree() prev = None # normal iteration is not in a predictable order ranges = sorted([x for x in self.range_set], key=lambda x: x.begin) for rng in ranges: if prev is None: prev = rng continue res.add(Interval(prev.end, rng.begin)) prev = rng # means we've seeked past the end if len(self.range_set[self.tell()]) != 1: res.add(Interval(prev.end, self.tell())) return res
class Day(object): def __init__(self, start, end, dt): self.dt = dt self.free = IntervalTree([get_iv(start, end)]) self.booked = IntervalTree([]) def is_free(self, interval): return (self.free.overlaps(interval) and not self.booked.overlaps(interval)) def schedule(self, interval): assert self.is_free(interval),\ "Attempt to double-book: {} - {}".format( m2t(interval.begin), m2t(interval.end)) self.free.chop(interval.begin, interval.end + self.dt) self.booked.add(interval) def dumps(self): dump = '' for iv in sorted(self.booked): dump += "\t{} - {}\t{}\n".format( m2t(iv.begin), m2t(iv.end), iv.data) return dump
def test_all(): from intervaltree import Interval, IntervalTree from pprint import pprint from operator import attrgetter def makeinterval(lst): return Interval( lst[0], lst[1], "{}-{}".format(*lst) ) ivs = list(map(makeinterval, [ [1,2], [4,7], [5,9], [6,10], [8,10], [8,15], [10,12], [12,14], [14,15], ])) t = IntervalTree(ivs) t.verify() def data(s): return set(map(attrgetter('data'), s)) # Query tests print('Query tests...') assert data(t[4]) == set(['4-7']) assert data(t[4:5]) == set(['4-7']) assert data(t[4:6]) == set(['4-7', '5-9']) assert data(t[9]) == set(['6-10', '8-10', '8-15']) assert data(t[15]) == set() assert data(t.search(5)) == set(['4-7', '5-9']) assert data(t.search(6, 11, strict = True)) == set(['6-10', '8-10']) print(' passed') # Membership tests print('Membership tests...') assert ivs[1] in t assert Interval(1,3, '1-3') not in t assert t.overlaps(4) assert t.overlaps(9) assert not t.overlaps(15) assert t.overlaps(0,4) assert t.overlaps(1,2) assert t.overlaps(1,3) assert t.overlaps(8,15) assert not t.overlaps(15, 16) assert not t.overlaps(-1, 0) assert not t.overlaps(2,4) print(' passed') # Insertion tests print('Insertion tests...') t.add( makeinterval([1,2]) ) # adding duplicate should do nothing assert data(t[1]) == set(['1-2']) t[1:2] = '1-2' # adding duplicate should do nothing assert data(t[1]) == set(['1-2']) t.add(makeinterval([2,4])) assert data(t[2]) == set(['2-4']) t.verify() t[13:15] = '13-15' assert data(t[14]) == set(['8-15', '13-15', '14-15']) t.verify() print(' passed') # Duplication tests print('Interval duplication tests...') t.add(Interval(14,15,'14-15####')) assert data(t[14]) == set(['8-15', '13-15', '14-15', '14-15####']) t.verify() print(' passed') # Copying and casting print('Tree copying and casting...') tcopy = IntervalTree(t) tcopy.verify() assert t == tcopy tlist = list(t) for iv in tlist: assert iv in t for iv in t: assert iv in tlist tset = set(t) assert tset == t.items() print(' passed') # Deletion tests print('Deletion tests...') try: t.remove( Interval(1,3, "Doesn't exist") ) except ValueError: pass else: raise AssertionError("Expected ValueError") try: t.remove( Interval(500, 1000, "Doesn't exist") ) except ValueError: pass else: raise AssertionError("Expected ValueError") orig = t.print_structure(True) t.discard( Interval(1,3, "Doesn't exist") ) t.discard( Interval(500, 1000, "Doesn't exist") ) assert data(t[14]) == set(['8-15', '13-15', '14-15', '14-15####']) t.remove( Interval(14,15,'14-15####') ) assert data(t[14]) == set(['8-15', '13-15', '14-15']) t.verify() assert data(t[2]) == set(['2-4']) t.discard( makeinterval([2,4]) ) assert data(t[2]) == set() t.verify() assert t[14] t.remove_overlap(14) t.verify() assert not t[14] # Emptying the tree #t.print_structure() for iv in sorted(iter(t)): #print('### Removing '+str(iv)+'... ###') t.remove(iv) #t.print_structure() t.verify() #print('') assert len(t) == 0 assert t.is_empty() assert not t t = IntervalTree(ivs) #t.print_structure() t.remove_overlap(1) #t.print_structure() t.verify() t.remove_overlap(8) #t.print_structure() print(' passed') t = IntervalTree(ivs) pprint(t) t.split_overlaps() pprint(t) #import cPickle as pickle #p = pickle.dumps(t) #print(p)
class DynamiteConfig(object): # Instance Variables ServiceFiles = None FleetAPIEndpoint = None ETCD = None Service = None ScalingPolicy = None IntervalTree = None # Should not matter after initialization! dynamite_yaml_config = None # Arguments: arg_config_path: Path to the Dynamite YAML config file # arg_service_folder (Optional): List of paths containing service-files. # Can also/additionally be defined in the dynamite yaml configuration file def __init__(self, arg_config_path=None, arg_service_folder_list=None, etcd_endpoint=None, fleet_endpoint=None): if arg_config_path is not None: self.init_from_file(arg_config_path, arg_service_folder_list, fleet_endpoint) if etcd_endpoint is not None: self.init_from_etcd(etcd_endpoint) def init_from_file(self, arg_config_path=None, arg_service_folder_list=None, fleet_endpoint=None): # Test if Config-File exists. If not, terminate application if not os.path.exists(arg_config_path): raise FileNotFoundError("--config-file: " + arg_config_path + " --> File at given config-path does not exist") else: dynamite_yaml_config = self.load_config_file(arg_config_path) self.set_instance_variables(dynamite_yaml_config, arg_service_folder_list, fleet_endpoint) def init_from_etcd(self, etcd_endpoint): etcdctl = ETCDCTL.create_etcdctl(etcd_endpoint) if etcdctl is not None: res = etcdctl.read(ETCDCTL.etcd_key_init_application_configuration) dynamite_config_str = res.value if dynamite_config_str is not None and isinstance(dynamite_config_str, str): dynamite_yaml_config = json.loads(dynamite_config_str) self.set_instance_variables(dynamite_yaml_config) else: return None # Converts YAML Config to Python Dictionary def load_config_file(self, path_to_config_file): with open(path_to_config_file, "r") as config_yaml: dynamite_yaml_config = yaml.load(config_yaml) return dynamite_yaml_config def set_instance_variables(self, dynamite_yaml_config, arg_service_folder_list=None, fleet_endpoint=None): self.dynamite_yaml_config = dynamite_yaml_config if isinstance(arg_service_folder_list, str): tmp_str = arg_service_folder_list arg_service_folder_list = [] arg_service_folder_list.append(tmp_str) PathList = self.dynamite_yaml_config['Dynamite']['ServiceFiles']['PathList'] self.ServiceFiles = DynamiteConfig.ServiceFilesStruct(PathList) # Combine the 2 lists containing paths to the service files if arg_service_folder_list: if self.ServiceFiles.PathList != arg_service_folder_list: if self.ServiceFiles.PathList is not None: path_set_a = set(self.ServiceFiles.PathList) path_set_b = set(arg_service_folder_list) self.ServiceFiles.PathList = list(path_set_a)+list(path_set_b-path_set_a) else: self.ServiceFiles.PathList = arg_service_folder_list # check if Folders in ServiceFiles-->PathList exit if self.ServiceFiles.PathList is not None: for folder in self.ServiceFiles.PathList: if not os.path.isdir(folder): raise NotADirectoryError("Error: " + folder + " is not a valid directory") if fleet_endpoint is None: ip = self.dynamite_yaml_config['Dynamite']['FleetAPIEndpoint']['ip'] port = self.dynamite_yaml_config['Dynamite']['FleetAPIEndpoint']['port'] self.FleetAPIEndpoint = DynamiteConfig.FleetAPIEndpointStruct(ip, port) else: endpoint = ServiceEndpoint.ServiceEndpoint.from_string(fleet_endpoint) self.FleetAPIEndpoint = DynamiteConfig.FleetAPIEndpointStruct(endpoint.host_ip, endpoint.port) etcd_application_base_path = self.dynamite_yaml_config['Dynamite']['ETCD']['application_base_path'] etcd_metrics_base_path = self.dynamite_yaml_config['Dynamite']['ETCD']['metrics_base_path'] self.ETCD = DynamiteConfig.ETCDStruct(etcd_application_base_path, etcd_metrics_base_path) ServicesDict = self.dynamite_yaml_config['Dynamite']['Service'] self.Service = DynamiteConfig.ServiceStruct(ServicesDict) ScalingPolicyDict = self.dynamite_yaml_config['Dynamite']['ScalingPolicy'] self.ScalingPolicy = DynamiteConfig.ScalingPolicyStruct(ScalingPolicyDict) def check_for_overlapping_port_ranges(self): self.IntervalTree = IntervalTree() for service_name, service_detail in self.Service.__dict__.items(): if service_detail.base_instance_prefix_number is not None: interval_start = service_detail.base_instance_prefix_number interval_end = interval_start + service_detail.max_instance new_interval = Interval(interval_start, interval_end) # True if <new_interval> is already contained in interval if sorted(self.IntervalTree[new_interval]): raise OverlappingPortRangeError("Error: " + new_interval + " overlaps with already existing interval(s)" + sorted(self.IntervalTree[new_interval])) else: self.IntervalTree.add(new_interval) # check for existence of service_dependencies def check_for_service_dependencies(self): list_of_services = [] for service_name in self.Service.__dict__.keys(): list_of_services.append(service_name) for service_detail in self.Service.__dict__.values(): if service_detail.service_dependency is not None: for service_dependency in service_detail.service_dependency: if service_dependency not in list_of_services: raise ServiceDependencyNotExistError("Error: Service <" + service_dependency + "> defined as service dependency of service <" + service_detail.name + "> was not found in list of defined services --> " + str(list_of_services)) class ServiceFilesStruct(object): # Instance Variables PathList = None def init_pathlist(self, PathList): checked_list_of_abs_paths = [] for service_file_folder in PathList: if not os.path.isdir(service_file_folder): raise NotADirectoryError("Error reading Dynamite Configuration (ServiceFiles-->PathList-->" + service_file_folder + " --> Is not a directory") if os.path.isabs(service_file_folder): checked_list_of_abs_paths.append(service_file_folder) else: checked_list_of_abs_paths.append(os.path.abspath(service_file_folder)) return checked_list_of_abs_paths def __init__(self, PathList): if PathList is not None: self.PathList = self.init_pathlist(PathList) else: return None def __str__(self): return "ServiceFiles Struct:\n" \ "\t<Instance Variables>\n" \ "\t\tPathList, type: List\n" \ "\t\t\tNumber of Entries: " + str(len(self.PathList)) class FleetAPIEndpointStruct(object): # Instance Variables ip = None port = None def __init__(self, ip, port): self.ip = ip self.port = port def __str__(self): return "FleetAPIEndpoint Struct:\n" \ "\t<Instance Variables>\n" \ "\t\t<IP,\ttype: String>\n" \ "\t\t<Port,\ttype: Int>" class ETCDStruct(object): # Instance Variables # ip_api_endpoint = None # port_api_endpoint = None application_base_path = None metrics_base_path = None #def __init__(self, ip_api_endpoint, port_api_endpoint, application_base_path): def __init__(self, application_base_path, metrics_base_path): # self.ip_api_endpoint = ip_api_endpoint # self.port_api_endpoint = port_api_endpoint self.application_base_path = application_base_path self.metrics_base_path = metrics_base_path def __str__(self): return_string = "ETCD Struct:\n" \ "\t<Instance Variables>\n" for (instance_variable_name, value) in self.__dict__.items(): return_string += "\t\tName: " + instance_variable_name + ", Type: " + str(type(value)) + "\n" return return_string class ServiceStruct(object): def __init__(self, ServicesDict): if type(ServicesDict) is type({}): for (service_name, service_detail_dict) in ServicesDict.items(): setattr(self, service_name, DynamiteConfig.ServiceStruct.ServiceDetailStruct(service_name, service_detail_dict)) def __str__(self): return_string = "Service Struct:\n" \ "\t<Instance Variables>\n" for (instance_variable_name, value) in self.__dict__.items(): return_string += "\t\tName: " + instance_variable_name + ", Type: " + str(type(value)) + "\n" return return_string class ServiceDetailStruct(object): # Instance Variables name = None name_of_unit_file = None type = None min_instance = None max_instance = None base_instance_prefix_number = None ports_per_instance = None attached_services = None service_dependency = None scale_up_policy = None scale_down_policy = None def __init__(self, name, service_detail_dict): self.name = name self.name_of_unit_file = service_detail_dict['name_of_unit_file'] if 'name_of_unit_file' in service_detail_dict else None self.type = service_detail_dict['type'] if 'type' in service_detail_dict else None self.min_instance = service_detail_dict['min_instance'] if 'min_instance' in service_detail_dict else None self.max_instance = service_detail_dict['max_instance'] if 'max_instance' in service_detail_dict else None self.base_instance_prefix_number = service_detail_dict['base_instance_prefix_number'] if 'base_instance_prefix_number' in service_detail_dict else None self.ports_per_instance = service_detail_dict['ports_per_instance'] if 'ports_per_instance' in service_detail_dict else None self.attached_services = service_detail_dict['attached_service'] if 'attached_service' in service_detail_dict else None self.service_dependency = service_detail_dict['service_dependency'] if 'service_dependency' in service_detail_dict else None self.scale_up_policy = service_detail_dict['scale_up_policy'] if 'scale_up_policy' in service_detail_dict else None self.scale_down_policy = service_detail_dict['scale_down_policy'] if 'scale_down_policy' in service_detail_dict else None def to_dict(self): service_detail_json = {} for key, value in self.__dict__.items(): service_detail_json[key] = value return service_detail_json @staticmethod def dict_to_instance(service_detail_struct_dict): name = service_detail_struct_dict['name'] del service_detail_struct_dict['name'] service_detail_struct_instance = DynamiteConfig.ServiceStruct.ServiceDetailStruct(name, service_detail_struct_dict) return service_detail_struct_instance def __str__(self): return_string = "ServiceDetail Struct:\n" \ "\t<Instance Variables>\n" for (instance_variable_name, value) in self.__dict__.items(): return_string += "\t\tName: " + instance_variable_name + ", Type: " + str(type(value)) + "\n" return return_string def __repr__(self): return "ServiceDetailStruct(name={},name_of_unit_file={},type={},min_instance={},max_instance={}," \ "base_instance_prefix_number={},attached_services={},service_dependency={}," \ "scale_up_policy={},scale_down_policy={})".format( self.name, self.name_of_unit_file, self.type, repr(self.min_instance), repr(self.max_instance), repr(self.base_instance_prefix_number), repr(self.attached_services), repr(self.service_dependency), repr(self.scale_up_policy), repr(self.scale_down_policy) ) class ScalingPolicyStruct(object): def __init__(self, ScalingPolicyDict): if(type(ScalingPolicyDict) == type({})): for (service_name, service_detail_dict) in ScalingPolicyDict.items(): setattr( self, service_name, DynamiteConfig.ScalingPolicyStruct.ScalingPolicyDetailStruct(service_name, service_detail_dict) ) def get_scaling_policies(self): for policy_name, policy in self.__dict__.items(): if not policy_name.startswith("_"): return policy def __str__(self): return_string = "ServicePolicy Struct:\n" \ "\t<Instance Variables>\n" for (instance_variable_name, value) in self.__dict__.items(): return_string += "\t\tName: " + instance_variable_name + ", Type: " + str(type(value)) + "\n" return return_string # do not make a property out of this, this would break the logic creating variables for each policy def get_scaling_policies(self): scaling_policies = [] for (instance_variable_name, value) in self.__dict__.items(): scaling_policies.append(value) return scaling_policies class ScalingPolicyDetailStruct(object): # Instance Variables name = None service_type = None metric = None metric_aggregated = None comparative_operator = None threshold = None threshold_unit = None period = None period_unit = None cooldown_period = None cooldown_period_unit = None def __init__(self, name, scaling_policy_detail_dict): self.name = name self.service_type = scaling_policy_detail_dict['service_type'] if 'service_type' in scaling_policy_detail_dict else None self.metric = scaling_policy_detail_dict['metric'] if 'metric' in scaling_policy_detail_dict else None self.metric_aggregated = scaling_policy_detail_dict['metric_aggregated'] if 'metric_aggregated' in scaling_policy_detail_dict else None self.comparative_operator = scaling_policy_detail_dict['comparative_operator'] if 'comparative_operator' in scaling_policy_detail_dict else None self.threshold = scaling_policy_detail_dict['threshold'] if 'threshold' in scaling_policy_detail_dict else None self.threshold_unit = scaling_policy_detail_dict['threshold_unit'] if 'threshold_unit' in scaling_policy_detail_dict else None self.period = scaling_policy_detail_dict['period'] if 'period' in scaling_policy_detail_dict else None self.period_unit = scaling_policy_detail_dict['period_unit'] if 'period_unit' in scaling_policy_detail_dict else None self.cooldown_period = scaling_policy_detail_dict['cooldown_period'] if 'cooldown_period' in scaling_policy_detail_dict else None self.cooldown_period_unit = scaling_policy_detail_dict['cooldown_period_unit'] if 'cooldown_period_unit' in scaling_policy_detail_dict else None def __str__(self): return_string = "ScalingPolicyDetail Struct:\n" \ "\t<Instance Variables>\n" for (instance_variable_name, value) in self.__dict__.items(): return_string += "\t\tName: " + instance_variable_name + ", Type: " + str(type(value)) + "\n" return return_string
class StorageResource(Resource): def __init__(self, scheduler: Scheduler, name: str, id: int, resources_list: Resources = None, capacity_bytes: int = 0): super().__init__(scheduler, name, id, resources_list, resource_sharing=True) self.capacity = capacity_bytes self._job_allocations: Dict[JobId, Interval] = { } # job_id -> [(start, end, num_bytes)] self._interval_tree = IntervalTree() def currently_allocated_space(self) -> int: intervals = self._interval_tree[self._scheduler.time] allocated_space = sum(interval.data for interval in intervals) assert allocated_space <= self.capacity return allocated_space def available_space(self, start: float, end: float) -> int: """ Available space in the storage resource in a time range (start, end). Should be the same as self._interval_tree.envelop(start, end). """ intervals = self._interval_tree[start:end] interval_starts = [(interval.begin, interval.data) for interval in intervals] interval_ends = [(interval.end, -interval.data) for interval in intervals] interval_points = sorted(interval_starts + interval_ends) # (time, value) # Compute max of prefix sum max_allocated_space = 0 curr_allocated_space = 0 for _, value in interval_points: curr_allocated_space += value max_allocated_space = max(max_allocated_space, curr_allocated_space) assert max_allocated_space <= self.capacity return self.capacity - max_allocated_space def allocate(self, start: float, end: float, num_bytes: int, job: Job): assert self._scheduler.time <= start <= end assert 0 < num_bytes <= self.available_space(start, end) # There should be only one interval per job. assert job.id not in self._job_allocations interval = Interval(start, end, num_bytes) self._job_allocations[job.id] = interval self._interval_tree.add(interval) assert bool(not self._job_allocations) == bool( self._interval_tree.is_empty()) assert len(self._job_allocations) == len( self._interval_tree.all_intervals) if __debug__: self._interval_tree.verify() def free(self, job: Job): interval = self._job_allocations[job.id] self._interval_tree.remove(interval) del self._job_allocations[job.id] assert bool(not self._job_allocations) == bool( self._interval_tree.is_empty()) assert len(self._job_allocations) == len( self._interval_tree.all_intervals) if __debug__: self._interval_tree.verify() def find_first_time_to_fit_job(self, job, time=None, future_reservation=False): raise NotImplementedError def get_allocation_end_times(self): return set(interval.end for interval in self._job_allocations.values())
def get_outer_hits(outer_hits, scaffold, sample, hitdir): scf_dir_tree = IntervalTree() for outer_hit in outer_hits: if scaffold == outer_hit.data.tname and sample == outer_hit.data.sample and hitdir != outer_hit.data.dir: scf_dir_tree.add(outer_hit) return scf_dir_tree
if (len(sys.argv) == 3) and (sys.argv[2] == 'saveit'): saveit = True if saveit: print("Reading pandalog") with PLogReader(plog) as plr: for i, m in enumerate(plr): if (i%100000) == 0: print("Log entry %d" % i) if m.HasField('basic_block'): bb = m.basic_block if bb.pc > 0: interv = Interval(bb.pc, bb.pc + bb.size) ct.add(interv) bbs[interv] = bb.code if m.HasField('tainted_mmio_label'): t = m.tainted_mmio_label label = t.label ioaddr[label] = t.addr tqs = None if m.HasField('tainted_branch'): tqs = m.tainted_branch.taint_query if m.HasField('tainted_instr'): tqs = m.tainted_instr.taint_query
def shift_left_to(self, new_base: int, exs: ExonCoords): rebased = IntervalTree() shift = exs.exons.begin() - new_base for item in exs.exons: rebased.add(Interval(item.begin - shift, item.end - shift)) return rebased
class BitwrappedStream(object): """A stream that wraps other streams to provide bit-level access""" closed = True def __init__(self, stream): """Init the bit-wrapped stream :stream: The normal byte stream """ self._stream = stream self._bits = collections.deque() self.closed = False # assume that bitfields end on an even boundary, # otherwise the entire stream will be treated as # a bit stream with no padding self.padded = True self.range_set = IntervalTree() def is_eof(self): """Return if the stream has reached EOF or not without discarding any unflushed bits :returns: True/False """ pos = self._stream.tell() byte = self._stream.read(1) self._stream.seek(pos, 0) return utils.binary(byte) == utils.binary("") def close(self): """Close the stream """ self.closed = True self._flush_bits_to_stream() self._stream.close() def flush(self): """Flush the stream """ self._flush_bits_to_stream() self._stream.flush() def isatty(self): """Return if the stream is a tty """ return self._stream.isatty() def read(self, num): """Read ``num`` number of bytes from the stream. Note that this will automatically resets/ends the current bit-reading if it does not end on an even byte AND ``self.padded`` is True. If ``self.padded`` is True, then the entire stream is treated as a bitstream. :num: number of bytes to read :returns: the read bytes, or empty string if EOF has been reached """ start_pos = self.tell() if self.padded: # we toss out any uneven bytes self._bits.clear() res = utils.binary(self._stream.read(num)) else: bits = self.read_bits(num * 8) res = bits_to_bytes(bits) res = utils.binary(res) end_pos = self.tell() self._update_consumed_ranges(start_pos, end_pos) return res def read_bits(self, num): """Read ``num`` number of bits from the stream :num: number of bits to read :returns: a list of ``num`` bits, or an empty list if EOF has been reached """ if num > len(self._bits): needed = num - len(self._bits) num_bytes = int(math.ceil(needed / 8.0)) read_bytes = self._stream.read(num_bytes) for bit in bytes_to_bits(read_bytes): self._bits.append(bit) res = [] while len(res) < num and len(self._bits) > 0: res.append(self._bits.popleft()) return res def write(self, data): """Write data to the stream :data: the data to write to the stream :returns: None """ if self.padded: # flush out any remaining bits first if len(self._bits) > 0: self._flush_bits_to_stream() self._stream.write(data) else: # nothing to do here if len(data) == 0: return bits = bytes_to_bits(data) self.write_bits(bits) def write_bits(self, bits): """Write the bits to the stream. Add the bits to the existing unflushed bits and write complete bytes to the stream. """ for bit in bits: self._bits.append(bit) while len(self._bits) >= 8: byte_bits = [self._bits.popleft() for x in six.moves.range(8)] byte = bits_to_bytes(byte_bits) self._stream.write(byte) # there may be unflushed bits leftover and THAT'S OKAY def tell(self): """Return the current position in the stream (ignoring bit position) :returns: int for the position in the stream """ res = self._stream.tell() if len(self._bits) > 0: res -= 1 return res def tell_bits(self): """Return the number of bits into the stream since the last whole byte. :returns: int """ if len(self._bits) == 0: return 0 return 8 - len(self._bits) def seek(self, pos, seek_type=0): """Seek to the specified position in the stream with seek_type. Unflushed bits will be discarded in the case of a seek. The stream will also keep track of which bytes have and have not been consumed so that the dom will capture all of the bytes in the stream. :pos: offset :seek_type: direction :returns: TODO """ self._bits.clear() return self._stream.seek(pos, seek_type) def size(self): """Return the size of the stream, or -1 if it cannot be determined. """ pos = self._stream.tell() # seek to the end of the stream self._stream.seek(0, 2) size = self._stream.tell() self._stream.seek(pos, 0) return size def unconsumed_ranges(self): """Return an IntervalTree of unconsumed ranges, of the format (start, end] with the end value not being included """ res = IntervalTree() prev = None # normal iteration is not in a predictable order ranges = sorted([x for x in self.range_set], key=lambda x: x.begin) for rng in ranges: if prev is None: prev = rng continue res.add(Interval(prev.end, rng.begin)) prev = rng # means we've seeked past the end if len(self.range_set[self.tell()]) != 1: res.add(Interval(prev.end, self.tell())) return res # ----------------------------- # PRIVATE FUNCTIONS # ----------------------------- def _update_consumed_ranges(self, start_pos, end_pos): """Update the ``self.consumed_ranges`` array with which byte ranges have been consecutively consumed. """ self.range_set.add(Interval(start_pos, end_pos + 1)) self.range_set.merge_overlaps() def _flush_bits_to_stream(self): """Flush the bits to the stream. This is used when a few bits have been read and ``self._bits`` contains unconsumed/ flushed bits when data is to be written to the stream """ if len(self._bits) == 0: return 0 bits = list(self._bits) diff = 8 - (len(bits) % 8) padding = [0] * diff bits = bits + padding self._stream.write(bits_to_bytes(bits)) self._bits.clear()
class DynamiteConfig(object): # Instance Variables ServiceFiles = None FleetAPIEndpoint = None ETCD = None Service = None ScalingPolicy = None IntervalTree = None # Should not matter after initialization! dynamite_yaml_config = None _logger = logging.getLogger("dynamite.DynamiteConfig") # Arguments: arg_config_path: Path to the Dynamite YAML config file # arg_service_folder (Optional): List of paths containing service-files. # Can also/additionally be defined in the dynamite yaml configuration file def __init__(self, arg_config_path=None, arg_service_folder_list=None, etcd_endpoint=None, fleet_endpoint=None): if arg_config_path is not None: self.init_from_file(arg_config_path, arg_service_folder_list, fleet_endpoint) if etcd_endpoint is not None: self.init_from_etcd(etcd_endpoint) def init_from_file(self, arg_config_path=None, arg_service_folder_list=None, fleet_endpoint=None): # Test if Config-File exists. If not, terminate application if not os.path.exists(arg_config_path): raise FileNotFoundError( "--config-file: " + arg_config_path + " --> File at given config-path does not exist") else: dynamite_yaml_config = self.load_config_file(arg_config_path) self.set_instance_variables(dynamite_yaml_config, arg_service_folder_list, fleet_endpoint) def init_from_etcd(self, etcd_endpoint): etcdctl = ETCDCTL.create_etcdctl(etcd_endpoint) if etcdctl is not None: res = etcdctl.read(ETCDCTL.etcd_key_init_application_configuration) dynamite_config_str = res.value if dynamite_config_str is not None and isinstance( dynamite_config_str, str): dynamite_yaml_config = json.loads(dynamite_config_str) self.set_instance_variables(dynamite_yaml_config) else: return None # Converts YAML Config to Python Dictionary def load_config_file(self, path_to_config_file): with open(path_to_config_file, "r") as config_yaml: dynamite_yaml_config = yaml.load(config_yaml) return dynamite_yaml_config def set_instance_variables(self, dynamite_yaml_config, arg_service_folder_list=None, fleet_endpoint=None): self.dynamite_yaml_config = dynamite_yaml_config if isinstance(arg_service_folder_list, str): tmp_str = arg_service_folder_list arg_service_folder_list = [] arg_service_folder_list.append(tmp_str) PathList = self.dynamite_yaml_config['Dynamite']['ServiceFiles'][ 'PathList'] self.ServiceFiles = DynamiteConfig.ServiceFilesStruct(PathList) # Combine the 2 lists containing paths to the service files if arg_service_folder_list: if self.ServiceFiles.PathList != arg_service_folder_list: if self.ServiceFiles.PathList is not None: path_set_a = set(self.ServiceFiles.PathList) path_set_b = set(arg_service_folder_list) self.ServiceFiles.PathList = list(path_set_a) + list( path_set_b - path_set_a) else: self.ServiceFiles.PathList = arg_service_folder_list # check if Folders in ServiceFiles-->PathList exit if self.ServiceFiles.PathList is not None: for folder in self.ServiceFiles.PathList: if not os.path.isdir(folder): raise NotADirectoryError("Error: " + folder + " is not a valid directory") if fleet_endpoint is None: ip = self.dynamite_yaml_config['Dynamite']['FleetAPIEndpoint'][ 'ip'] port = self.dynamite_yaml_config['Dynamite']['FleetAPIEndpoint'][ 'port'] self.FleetAPIEndpoint = DynamiteConfig.FleetAPIEndpointStruct( ip, port) else: endpoint = ServiceEndpoint.ServiceEndpoint.from_string( fleet_endpoint) self.FleetAPIEndpoint = DynamiteConfig.FleetAPIEndpointStruct( endpoint.host_ip, endpoint.port) etcd_application_base_path = self.dynamite_yaml_config['Dynamite'][ 'ETCD']['application_base_path'] etcd_metrics_base_path = self.dynamite_yaml_config['Dynamite']['ETCD'][ 'metrics_base_path'] self.ETCD = DynamiteConfig.ETCDStruct(etcd_application_base_path, etcd_metrics_base_path) ServicesDict = self.dynamite_yaml_config['Dynamite']['Service'] self.Service = DynamiteConfig.ServiceStruct(ServicesDict) ScalingPolicyDict = self.dynamite_yaml_config['Dynamite'][ 'ScalingPolicy'] self.ScalingPolicy = DynamiteConfig.ScalingPolicyStruct( ScalingPolicyDict) def check_for_overlapping_port_ranges(self): self.IntervalTree = IntervalTree() for service_name, service_detail in self.Service.__dict__.items(): if service_detail.base_instance_prefix_number is not None: interval_start = service_detail.base_instance_prefix_number interval_end = interval_start + service_detail.max_instance new_interval = Interval(interval_start, interval_end) # True if <new_interval> is already contained in interval if sorted(self.IntervalTree[new_interval]): raise OverlappingPortRangeError( "Error: " + new_interval + " overlaps with already existing interval(s)" + sorted(self.IntervalTree[new_interval])) else: self.IntervalTree.add(new_interval) # check for existence of service_dependencies def check_for_service_dependencies(self): list_of_services = [] for service_name in self.Service.__dict__.keys(): list_of_services.append(service_name) for service_detail in self.Service.__dict__.values(): if service_detail.service_dependency is not None: for service_dependency in service_detail.service_dependency: if service_dependency not in list_of_services: raise ServiceDependencyNotExistError( "Error: Service <" + service_dependency + "> defined as service dependency of service <" + service_detail.name + "> was not found in list of defined services --> " + str(list_of_services)) class ServiceFilesStruct(object): # Instance Variables PathList = None _logger = logging.getLogger("dynamite.DynamiteConfig") def init_pathlist(self, PathList): checked_list_of_abs_paths = [] for service_file_folder in PathList: self._logger.info("dir: " + service_file_folder) if not os.path.isdir(service_file_folder): raise NotADirectoryError( "Error reading Dynamite Configuration (ServiceFiles-->PathList-->" + service_file_folder + " --> Is not a directory") if os.path.isabs(service_file_folder): checked_list_of_abs_paths.append(service_file_folder) else: checked_list_of_abs_paths.append( os.path.abspath(service_file_folder)) return checked_list_of_abs_paths def __init__(self, PathList): if PathList is not None: self.PathList = self.init_pathlist(PathList) else: return None def __str__(self): return "ServiceFiles Struct:\n" \ "\t<Instance Variables>\n" \ "\t\tPathList, type: List\n" \ "\t\t\tNumber of Entries: " + str(len(self.PathList)) class FleetAPIEndpointStruct(object): # Instance Variables ip = None port = None def __init__(self, ip, port): self.ip = ip self.port = port def __str__(self): return "FleetAPIEndpoint Struct:\n" \ "\t<Instance Variables>\n" \ "\t\t<IP,\ttype: String>\n" \ "\t\t<Port,\ttype: Int>" class ETCDStruct(object): # Instance Variables # ip_api_endpoint = None # port_api_endpoint = None application_base_path = None metrics_base_path = None #def __init__(self, ip_api_endpoint, port_api_endpoint, application_base_path): def __init__(self, application_base_path, metrics_base_path): # self.ip_api_endpoint = ip_api_endpoint # self.port_api_endpoint = port_api_endpoint self.application_base_path = application_base_path self.metrics_base_path = metrics_base_path def __str__(self): return_string = "ETCD Struct:\n" \ "\t<Instance Variables>\n" for (instance_variable_name, value) in self.__dict__.items(): return_string += "\t\tName: " + instance_variable_name + ", Type: " + str( type(value)) + "\n" return return_string class ServiceStruct(object): def __init__(self, ServicesDict): if type(ServicesDict) is type({}): for (service_name, service_detail_dict) in ServicesDict.items(): setattr( self, service_name, DynamiteConfig.ServiceStruct.ServiceDetailStruct( service_name, service_detail_dict)) def __str__(self): return_string = "Service Struct:\n" \ "\t<Instance Variables>\n" for (instance_variable_name, value) in self.__dict__.items(): return_string += "\t\tName: " + instance_variable_name + ", Type: " + str( type(value)) + "\n" return return_string class ServiceDetailStruct(object): # Instance Variables name = None name_of_unit_file = None type = None min_instance = None max_instance = None base_instance_prefix_number = None ports_per_instance = None attached_services = None service_dependency = None scale_up_policy = None scale_down_policy = None def __init__(self, name, service_detail_dict): self.name = name self.name_of_unit_file = service_detail_dict[ 'name_of_unit_file'] if 'name_of_unit_file' in service_detail_dict else None self.type = service_detail_dict[ 'type'] if 'type' in service_detail_dict else None self.min_instance = service_detail_dict[ 'min_instance'] if 'min_instance' in service_detail_dict else None self.max_instance = service_detail_dict[ 'max_instance'] if 'max_instance' in service_detail_dict else None self.base_instance_prefix_number = service_detail_dict[ 'base_instance_prefix_number'] if 'base_instance_prefix_number' in service_detail_dict else None self.ports_per_instance = service_detail_dict[ 'ports_per_instance'] if 'ports_per_instance' in service_detail_dict else None self.attached_services = service_detail_dict[ 'attached_service'] if 'attached_service' in service_detail_dict else None self.service_dependency = service_detail_dict[ 'service_dependency'] if 'service_dependency' in service_detail_dict else None self.scale_up_policy = service_detail_dict[ 'scale_up_policy'] if 'scale_up_policy' in service_detail_dict else None self.scale_down_policy = service_detail_dict[ 'scale_down_policy'] if 'scale_down_policy' in service_detail_dict else None def to_dict(self): service_detail_json = {} for key, value in self.__dict__.items(): service_detail_json[key] = value return service_detail_json @staticmethod def dict_to_instance(service_detail_struct_dict): name = service_detail_struct_dict['name'] del service_detail_struct_dict['name'] service_detail_struct_instance = DynamiteConfig.ServiceStruct.ServiceDetailStruct( name, service_detail_struct_dict) return service_detail_struct_instance def __str__(self): return_string = "ServiceDetail Struct:\n" \ "\t<Instance Variables>\n" for (instance_variable_name, value) in self.__dict__.items(): return_string += "\t\tName: " + instance_variable_name + ", Type: " + str( type(value)) + "\n" return return_string def __repr__(self): return "ServiceDetailStruct(name={},name_of_unit_file={},type={},min_instance={},max_instance={}," \ "base_instance_prefix_number={},attached_services={},service_dependency={}," \ "scale_up_policy={},scale_down_policy={})".format( self.name, self.name_of_unit_file, self.type, repr(self.min_instance), repr(self.max_instance), repr(self.base_instance_prefix_number), repr(self.attached_services), repr(self.service_dependency), repr(self.scale_up_policy), repr(self.scale_down_policy) ) class ScalingPolicyStruct(object): def __init__(self, ScalingPolicyDict): if (type(ScalingPolicyDict) == type({})): for (service_name, service_detail_dict) in ScalingPolicyDict.items(): setattr( self, service_name, DynamiteConfig.ScalingPolicyStruct. ScalingPolicyDetailStruct(service_name, service_detail_dict)) def get_scaling_policies(self): for policy_name, policy in self.__dict__.items(): if not policy_name.startswith("_"): return policy def __str__(self): return_string = "ServicePolicy Struct:\n" \ "\t<Instance Variables>\n" for (instance_variable_name, value) in self.__dict__.items(): return_string += "\t\tName: " + instance_variable_name + ", Type: " + str( type(value)) + "\n" return return_string # do not make a property out of this, this would break the logic creating variables for each policy def get_scaling_policies(self): scaling_policies = [] for (instance_variable_name, value) in self.__dict__.items(): scaling_policies.append(value) return scaling_policies class ScalingPolicyDetailStruct(object): # Instance Variables name = None service_type = None metric = None metric_aggregated = None comparative_operator = None threshold = None threshold_unit = None period = None period_unit = None cooldown_period = None cooldown_period_unit = None def __init__(self, name, scaling_policy_detail_dict): self.name = name self.service_type = scaling_policy_detail_dict[ 'service_type'] if 'service_type' in scaling_policy_detail_dict else None self.metric = scaling_policy_detail_dict[ 'metric'] if 'metric' in scaling_policy_detail_dict else None self.metric_aggregated = scaling_policy_detail_dict[ 'metric_aggregated'] if 'metric_aggregated' in scaling_policy_detail_dict else None self.comparative_operator = scaling_policy_detail_dict[ 'comparative_operator'] if 'comparative_operator' in scaling_policy_detail_dict else None self.threshold = scaling_policy_detail_dict[ 'threshold'] if 'threshold' in scaling_policy_detail_dict else None self.threshold_unit = scaling_policy_detail_dict[ 'threshold_unit'] if 'threshold_unit' in scaling_policy_detail_dict else None self.period = scaling_policy_detail_dict[ 'period'] if 'period' in scaling_policy_detail_dict else None self.period_unit = scaling_policy_detail_dict[ 'period_unit'] if 'period_unit' in scaling_policy_detail_dict else None self.cooldown_period = scaling_policy_detail_dict[ 'cooldown_period'] if 'cooldown_period' in scaling_policy_detail_dict else None self.cooldown_period_unit = scaling_policy_detail_dict[ 'cooldown_period_unit'] if 'cooldown_period_unit' in scaling_policy_detail_dict else None def __str__(self): return_string = "ScalingPolicyDetail Struct:\n" \ "\t<Instance Variables>\n" for (instance_variable_name, value) in self.__dict__.items(): return_string += "\t\tName: " + instance_variable_name + ", Type: " + str( type(value)) + "\n" return return_string
class SegmentProducer(object): save_interval = SAVE_INTERVAL def __init__(self, download, n_procs): assert download.size is not None,\ 'Segment producer passed uninitizalied Download!' self.download = download self.n_procs = n_procs # Initialize producer self.load_state() self._setup_pbar() self._setup_queues() self._setup_work() self.schedule() def _setup_pbar(self): self.pbar = None self.pbar = get_pbar(self.download.ID, self.download.size) def _setup_work(self): if self.is_complete(): log.info('File already complete.') return work_size = self.integrate(self.work_pool) self.block_size = work_size / self.n_procs def _setup_queues(self): if WINDOWS: self.q_work = Queue() self.q_complete = Queue() else: manager = Manager() self.q_work = manager.Queue() self.q_complete = manager.Queue() def integrate(self, itree): return sum([i.end-i.begin for i in itree.items()]) def validate_segment_md5sums(self): if not self.download.check_segment_md5sums: return True corrupt_segments = 0 intervals = sorted(self.completed.items()) pbar = ProgressBar(widgets=[ 'Checksumming {}: '.format(self.download.ID), Percentage(), ' ', Bar(marker='#', left='[', right=']'), ' ', ETA()]) with mmap_open(self.download.path) as data: for interval in pbar(intervals): log.debug('Checking segment md5: {}'.format(interval)) if not interval.data or 'md5sum' not in interval.data: log.error(STRIP( """User opted to check segment md5sums on restart. Previous download did not record segment md5sums (--no-segment-md5sums).""")) return chunk = data[interval.begin:interval.end] checksum = md5sum(chunk) if checksum != interval.data.get('md5sum'): log.debug('Redownloading corrupt segment {}, {}.'.format( interval, checksum)) corrupt_segments += 1 self.completed.remove(interval) if corrupt_segments: log.warn('Redownloading {} currupt segments.'.format( corrupt_segments)) def load_state(self): # Establish default intervals self.work_pool = IntervalTree([Interval(0, self.download.size)]) self.completed = IntervalTree() self.size_complete = 0 if not os.path.isfile(self.download.state_path)\ and os.path.isfile(self.download.path): log.warn(STRIP( """A file named '{} was found but no state file was found at at '{}'. Either this file was downloaded to a different location, the state file was moved, or the state file was deleted. Parcel refuses to claim the file has been successfully downloaded and will restart the download.\n""").format( self.download.path, self.download.state_path)) return if not os.path.isfile(self.download.state_path): self.download.setup_file() return # If there is a file at load_path, attempt to remove # downloaded sections from work_pool log.info('Found state file {}, attempting to resume download'.format( self.download.state_path)) if not os.path.isfile(self.download.path): log.warn(STRIP( """State file found at '{}' but no file for {}. Restarting entire download.""".format( self.download.state_path, self.download.ID))) return try: with open(self.download.state_path, "rb") as f: self.completed = pickle.load(f) assert isinstance(self.completed, IntervalTree), \ "Bad save state: {}".format(self.download.state_path) except Exception as e: self.completed = IntervalTree() log.error('Unable to resume file state: {}'.format(str(e))) else: self.validate_segment_md5sums() self.size_complete = self.integrate(self.completed) for interval in self.completed: self.work_pool.chop(interval.begin, interval.end) def save_state(self): try: # Grab a temp file in the same directory (hopefully avoud # cross device links) in order to atomically write our save file temp = tempfile.NamedTemporaryFile( prefix='.parcel_', dir=os.path.abspath(self.download.state_directory), delete=False) # Write completed state pickle.dump(self.completed, temp) # Make sure all data is written to disk temp.flush() os.fsync(temp.fileno()) temp.close() # Rename temp file as our save file, this could fail if # the state file and the temp directory are on different devices if OS_WINDOWS and os.path.exists(self.download.state_path): # If we're on windows, there's not much we can do here # except stash the old state file, rename the new one, # and back up if there is a problem. old_path = os.path.join(tempfile.gettempdir(), ''.join( random.choice(string.ascii_lowercase + string.digits) for _ in range(10))) try: # stash the old state file os.rename(self.download.state_path, old_path) # move the new state file into place os.rename(temp.name, self.download.state_path) # if no exception, then delete the old stash os.remove(old_path) except Exception as msg: log.error('Unable to write state file: {}'.format(msg)) try: os.rename(old_path, self.download.state_path) except: pass raise else: # If we're not on windows, then we'll just try to # atomically rename the file os.rename(temp.name, self.download.state_path) except KeyboardInterrupt: log.warn('Keyboard interrupt. removing temp save file'.format( temp.name)) temp.close() os.remove(temp.name) except Exception as e: log.error('Unable to save state: {}'.format(str(e))) raise def schedule(self): while True: interval = self._get_next_interval() log.debug('Returning interval: {}'.format(interval)) if not interval: return self.q_work.put(interval) def _get_next_interval(self): intervals = sorted(self.work_pool.items()) if not intervals: return None interval = intervals[0] start = interval.begin end = min(interval.end, start + self.block_size) self.work_pool.chop(start, end) return Interval(start, end) def print_progress(self): if not self.pbar: return try: self.pbar.update(self.size_complete) except Exception as e: log.debug('Unable to update pbar: {}'.format(str(e))) def check_file_exists_and_size(self): if self.download.is_regular_file: return (os.path.isfile(self.download.path) and os.path.getsize( self.download.path) == self.download.size) else: log.debug('File is not a regular file, refusing to check size.') return (os.path.exists(self.download.path)) def is_complete(self): return (self.integrate(self.completed) == self.download.size and self.check_file_exists_and_size()) def finish_download(self): # Tell the children there is no more work, each child should # pull one NoneType from the queue and exit for i in range(self.n_procs): self.q_work.put(None) # Wait for all the children to exit by checking to make sure # that everyone has taken their NoneType from the queue. # Otherwise, the segment producer will exit before the # children return, causing them to read from a closed queue log.debug('Waiting for children to report') while not self.q_work.empty(): time.sleep(0.1) # Finish the progressbar if self.pbar: self.pbar.finish() def wait_for_completion(self): try: since_save = 0 while not self.is_complete(): while since_save < self.save_interval: interval = self.q_complete.get() self.completed.add(interval) if self.is_complete(): break this_size = interval.end - interval.begin self.size_complete += this_size since_save += this_size self.print_progress() since_save = 0 self.save_state() finally: self.finish_download()
class Scheduler: """Scheduler for event looking for the most suitable time.""" def __init__(self, intervals: list[Interval]): self._tree = IntervalTree() for interval in intervals: self._tree.add(interval) def get_most_suitable_time_intervals( self, duration: timedelta, limit: Optional[int] = None) -> list[Interval]: """ Get most suitable time intervals based on the number of active participants. Return not more than `limit` time intervals if specified. """ # get interval boundaries boundaries = list(self._tree.boundary_table.keys()) # check if tree is not empty if len(boundaries) < 2: return [] result = [] # find all intervals with length greater than duration # using two-pointers technique left, right = 0, 1 while left < len(boundaries): # move right pointer until the interval has enough duration while (right < len(boundaries) and boundaries[right] - boundaries[left] < duration): right += 1 if (right == len(boundaries) or boundaries[right] - boundaries[left] < duration): break # go through all intervals and intersect data (participants) participants = set.intersection( *({interval.data for interval in self._tree[start:end]} for start, end in zip(boundaries[left:right], boundaries[left + 1:right + 1]))) result.append( Interval(boundaries[left], boundaries[right], sorted(participants))) left += 1 # sort by number of active participants result.sort(key=lambda t: len(t.data), reverse=True) if limit: result = result[:limit] return result @classmethod async def from_event(cls, event: Event) -> Scheduler: """Create an instance of Scheduler from tortoise event model.""" intervals = [] await event.fetch_related('timetables__time_intervals') async for timetable in event.timetables: async for time_interval in timetable.time_intervals: intervals.append( Interval( time_interval.start, time_interval.end, timetable.participant_name, )) return Scheduler(intervals)
class Allocator(Publisher): # Initialization ------------------------------------------------------ {{{ __slots__ = ('_aa', '_am', '_arg', '_ts') def __init__(self, tslam=None, cliargs=[], **kwargs): super().__init__() self._ts = tslam self._aa = IntervalTree() self._aa.add(AddrIval(0, 2**64, AState.REVOKED)) self._am = IntervalTree() self._am.add(AddrIval(0, 2**64, AState.UNMAPD)) self._ls = {} # Argument parsing ---------------------------------------------------- {{{ argp = argparse.ArgumentParser() argp.add_argument('--fix', action='store_const', const=True, default=False, help="Automatically insert fixups for reports") argp.add_argument('--skip-map', action='store_const', const=True, default=False, help="Ignore map/unmap constraints") argp.add_argument('--drop-safe', action='store_const', const=True, default=False, help="Suppress warnings for safely dropped events") self._arg = argp.parse_args(cliargs) # --------------------------------------------------------------------- }}} # --------------------------------------------------------------------- }}} # Allocation ---------------------------------------------------------- {{{ def _allocd(self, begin, end): overlaps_a = self._aa[begin:end] overlaps_m = self._am[begin:end] if not self._arg.skip_map: overlaps_unmapped = [ o for o in overlaps_m if o.state == AState.UNMAPD ] if overlaps_unmapped: logging.warning("Allocation ts=%d b=%x e=%x overlaps unmap=%r", self._ts(), begin, end, overlaps_unmapped) # XXX fix by mapping pages overlaps_allocated = [ o for o in overlaps_a if o.state == AState.ALLOCD ] if overlaps_allocated: logging.error("Allocation ts=%d b=%x e=%x overlaps alloc=%r", self._ts(), begin, end, overlaps_allocated) if self._arg.fix: for oa in overlaps_allocated: self._publish('free', '', oa.begin) self._aa.chop(begin, end) self._aa.add(AddrIval(begin, end, AState.ALLOCD)) def allocd(self, stk, begin, end): self._allocd(begin, end) self._publish('allocd', stk, begin, end) # --------------------------------------------------------------------- }}} # Freeing ------------------------------------------------------------- {{{ def _freed(self, addr): doalloc = False end = addr + 1 # Will be fixed up later overlaps_a = self._aa[addr:end] overlaps_m = self._am[addr:end] if not self._arg.skip_map: overlaps_unmapped = [ o for o in overlaps_m if o.state == AState.UNMAPD ] if overlaps_unmapped: logging.error("Free ts=%d a=%x overlaps unmap=%r", self._ts(), addr, overlaps_unmapped) allocations = [o for o in overlaps_a if o.state == AState.ALLOCD] overlaps_free = [o for o in overlaps_a if o.state == AState.FREED] if overlaps_free != []: logging.warning("Free ts=%d a=%x overlaps free=%r", self._ts(), addr, overlaps_free) if allocations == [] and len( overlaps_free) == 1 and self._arg.drop_safe: return False else: for of in overlaps_free: if of.begin <= addr: end = max(end, of.end) if self._arg.fix: doalloc = True if len(allocations) > 1 or (allocations != [] and overlaps_free != []): logging.error("Free ts=%d a=%x multiply-attested alloc=%r free=%r", self._ts(), addr, allocations, overlaps_free) elif allocations == [] and overlaps_free == []: logging.warning("Free ts=%d a=%x no corresponding alloc", self._ts(), addr) if self._arg.fix and not self._arg.drop_safe: doalloc = True else: assert doalloc == False return False else: for a in allocations: if a.begin != addr: # Likely to leave cruft behind, indicative of serious errors logging.error("Free ts=%d a=%x within alloc=%r", self._ts(), addr, a) else: end = max(end, a.end) self._aa.chop(addr, end) self._aa.add(AddrIval(addr, end, AState.FREED)) if doalloc: self._publish('allocd', '', addr, end) return True def freed(self, stk, addr): if addr == 0: # Just throw out free(NULL) return if self._freed(addr): self._publish('freed', stk, addr) # --------------------------------------------------------------------- }}} # Reallocation -------------------------------------------------------- {{{ def reallocd(self, stk, begin_old, begin_new, end_new): self._freed(begin_old) self._allocd(begin_new, end_new) self._publish('reallocd', stk, begin_old, begin_new, end_new) # --------------------------------------------------------------------- }}} # Mapping ------------------------------------------------------------- {{{ def mapd(self, stk, begin, end, prot): # XXX self._publish('mapd', stk, begin, end, prot) # --------------------------------------------------------------------- }}} # Unmapping ----------------------------------------------------------- {{{ def unmapd(self, stk, begin, end): # XXX self._publish('unmapd', stk, begin, end) # --------------------------------------------------------------------- }}} # Revoking ------------------------------------------------------------ {{{ def revoked(self, stk, spans): for (begin, end) in spans: overlaps = self._aa[begin:end] overlaps_allocated = [ o for o in overlaps if o.state == AState.ALLOCD ] if overlaps_allocated: logging.warning("Revocation ts=%d b=%x e=%x overlaps alloc=%r", self._ts(), begin, end, overlaps_allocated) if self._arg.fix: for oa in overlaps_allocated: self._publish('free', '', oa.begin) # XXX fix by freeing self._publish('revoked', stk, spans) # --------------------------------------------------------------------- }}} # Size-measurement pass-thru ------------------------------------------ {{{ def size_measured(self, sz): self._publish('size_measured', sz) def sweep_size_measured(self, sz): self._publish('sweep_size_measured', sz)
def add_slides_with_annotations(self): def data_reducer(a, b): return a + b layer = self._add_layer('Slides') doc = ET.parse(os.path.join(self.opts.basedir, 'shapes.svg')) for img in doc.iterfind('./{http://www.w3.org/2000/svg}image'): path = img.get('{http://www.w3.org/1999/xlink}href') img.set('{http://www.w3.org/1999/xlink}href', os.path.join(self.opts.basedir, path)) if path.endswith('/deskshare.png'): continue img_width = int(img.get('width')) img_height = int(img.get('height')) canvas = doc.find( './{{http://www.w3.org/2000/svg}}g[@class="canvas"][@image="{}"]' .format(img.get('id'))) img_start = round(float(img.get('in')) * Gst.SECOND) img_end = round(float(img.get('out')) * Gst.SECOND) t = IntervalTree() t.add(Interval(begin=img_start, end=img_end, data=[])) if canvas is None: svg = ET.XML( '<svg version="1.1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 {} {}"></svg>' .format(img_width, img_height)) svg.append(img) pngpath = os.path.join(self.opts.basedir, '{}.png'.format(img.get('id'))) if not os.path.exists(pngpath): cairosvg.svg2png(bytestring=ET.tostring(svg).decode( 'utf-8').encode('utf-8'), write_to=pngpath, output_width=img_width, output_height=img_height) asset = self._get_asset(pngpath) width, height = self._constrain( self._get_dimensions(asset), (self.slides_width, self.opts.height)) self._add_clip(layer, asset, img_start, 0, img_end - img_start, 0, 0, width, height) else: shapes = {} for shape in canvas.iterfind( './{http://www.w3.org/2000/svg}g[@class="shape"]'): shape_style = shape.get('style') shape.set('style', shape_style.replace('visibility:hidden;', '')) for shape_img in shape.iterfind( './{http://www.w3.org/2000/svg}image'): print(ET.tostring(shape_img)) shape_img_path = shape_img.get( '{http://www.w3.org/1999/xlink}href') shape_img.set( '{http://www.w3.org/1999/xlink}href', os.path.join(self.opts.basedir, shape_img_path)) start = img_start timestamp = shape.get('timestamp') shape_start = round(float(timestamp) * Gst.SECOND) if shape_start > img_start: start = shape_start end = img_end undo = shape.get('undo') shape_end = round(float(undo) * Gst.SECOND) if undo != '-1' and shape_end != 0 and shape_end < end: end = shape_end if end < start: continue shape_id = shape.get('shape') if shape_id in shapes: shapes[shape_id].append({ 'start': start, 'end': end, 'shape': shape }) else: shapes[shape_id] = [{ 'start': start, 'end': end, 'shape': shape }] for shape_id, shapes_list in shapes.items(): sorted_shapes = sorted(shapes_list, key=lambda k: k['start']) index = 1 for s in sorted_shapes: if index < len(shapes_list): s['end'] = sorted_shapes[index]['start'] t.add( Interval(begin=s['start'], end=s['end'], data=[(shape_id, s['shape'])])) index += 1 t.split_overlaps() t.merge_overlaps(data_reducer=data_reducer) for index, interval in enumerate(sorted(t)): svg = ET.XML( '<svg version="1.1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 {} {}"></svg>' .format(img_width, img_height)) svg.append(img) for shape_id, shape in sorted(interval.data, key=lambda k: k[0]): svg.append(shape) pngpath = os.path.join( self.opts.basedir, '{}-{}.png'.format(img.get('id'), index)) if not os.path.exists(pngpath): cairosvg.svg2png(bytestring=ET.tostring(svg).decode( 'utf-8').encode('utf-8'), write_to=pngpath, output_width=img_width, output_height=img_height) asset = self._get_asset(pngpath) width, height = self._constrain( self._get_dimensions(asset), (self.slides_width, self.opts.height)) self._add_clip(layer, asset, interval.begin, 0, interval.end - interval.begin, 0, 0, width, height)
class TipsIndex: """ Use an interval tree to quick get the tips at a given timestamp. The interval of a transaction is in the form [begin, end), where `begin` is the transaction's timestamp, and `end` is when it was first verified by another transaction. If a transaction is still a tip, `end` is equal to infinity. If a transaction has been verified many times, `end` is equal to `min(tx.timestamp)`. TODO Use an interval tree stored in disk, possibly using a B-tree. """ # An interval tree used to know the tips at any timestamp. # The intervals are in the form (begin, end), where begin is the timestamp # of the transaction, and end is the smallest timestamp of the tx's children. tree: IntervalTree # It is a way to access the interval by the hash of the transaction. # It is useful because the interval tree allows access only by the interval. tx_last_interval: Dict[bytes, Interval] def __init__(self) -> None: self.log = logger.new() self.tree = IntervalTree() self.tx_last_interval = {} # Dict[bytes(hash), Interval] def add_tx(self, tx: BaseTransaction) -> bool: """ Add a new transaction to the index :param tx: Transaction to be added """ assert tx.hash is not None assert tx.storage is not None if tx.hash in self.tx_last_interval: return False # Fix the end of the interval of its parents. for parent_hash in tx.parents: pi = self.tx_last_interval.get(parent_hash, None) if not pi: continue if tx.timestamp < pi.end: self.tree.remove(pi) new_interval = Interval(pi.begin, tx.timestamp, pi.data) self.tree.add(new_interval) self.tx_last_interval[parent_hash] = new_interval # Check whether any children has already been added. # It so, the end of the interval is equal to the smallest timestamp of the children. min_timestamp = inf meta = tx.get_metadata() for child_hash in meta.children: if child_hash in self.tx_last_interval: child = tx.storage.get_transaction(child_hash) min_timestamp = min(min_timestamp, child.timestamp) # Add the interval to the tree. interval = Interval(tx.timestamp, min_timestamp, tx.hash) self.tree.add(interval) self.tx_last_interval[tx.hash] = interval return True def del_tx(self, tx: BaseTransaction, *, relax_assert: bool = False) -> None: """ Remove a transaction from the index. """ assert tx.hash is not None assert tx.storage is not None interval = self.tx_last_interval.pop(tx.hash, None) if interval is None: return if not relax_assert: assert interval.end == inf self.tree.remove(interval) # Update its parents as tips if needed. # FIXME Although it works, it does not seem to be a good solution. for parent_hash in tx.parents: parent = tx.storage.get_transaction(parent_hash) if parent.is_block != tx.is_block: continue self.update_tx(parent, relax_assert=relax_assert) def update_tx(self, tx: BaseTransaction, *, relax_assert: bool = False) -> None: """ Update a tx according to its children. """ assert tx.storage is not None assert tx.hash is not None meta = tx.get_metadata() if meta.voided_by: if not relax_assert: assert tx.hash not in self.tx_last_interval return pi = self.tx_last_interval[tx.hash] min_timestamp = inf for child_hash in meta.children: if child_hash in self.tx_last_interval: child = tx.storage.get_transaction(child_hash) min_timestamp = min(min_timestamp, child.timestamp) if min_timestamp != pi.end: self.tree.remove(pi) new_interval = Interval(pi.begin, min_timestamp, pi.data) self.tree.add(new_interval) self.tx_last_interval[tx.hash] = new_interval def __getitem__(self, index: float) -> Set[Interval]: return self.tree[index]
class BaseModel(Model): ''' Phage-Bacteria with RM systems ''' def __init__(self, initial_phage=10, initial_fraction_p_m1=1, initial_fraction_p_g1=1, initial_bacteria=100, fraction_b_m1=0.5, phage_inactivation_time=1, phage_burst_size=3, phage_off_diagonal=0.01, phage_mutation_step=0, phage_mutation_freq=0, re_degrade_foreign_0=1e-3, re_degrade_foreign_1=1e-3, bacteria_per_step=10, encounter_width=0.01, verbose=False, latency=0.5, epi_inheritance=1, **kwargs): ''' Create a new Phage-Bacteria model with the given parameters. Args: initial_phage (int) number of phage to start with intial_fraction_p_rm1 (float) percent of phage with methylation 1 initial_fration_p_g1 (float) percent of phage with genotype 1 initial_bacteria (int) number of bacteria to start with fraction_b_m1 (float) the fraction of bacteria with R-M system 1 that are added each epoch phage_inactivation_time (int) number of epochs a phage can live outside a host phage_burst_size (int) number of phage produced when it lyses a cell phage_off_diagonal (float) starting affinity of (e.g.) phage genotype 1 for bacteria genotype 1 phage_mutation_step (float) how big mutations are when they happen phage_mutation_freq (float) probability of a mutation re_degrade_foreign (float) probability that a R-M system will not kill a phage with the opposite methylation pattern bacteria_per_step (int) number of bacteria added during each epoch encounter_width (int) the world is [0,1], encounter width is the amount of bacteria that can be encountered by the phage verbose (bool) print info about the simulation latency (float) epi_inheritance (float or string) odds that the progeny gets the parent's methylation state. Otherwise gets None. If "genetic" then the phage inherits its parent's methylation state ''' # set parameters self.initial_phage = initial_phage self.initial_bacteria = initial_bacteria self.initial_fraction_p_m1 = initial_fraction_p_m1 self.initial_fraction_p_g1 = initial_fraction_p_g1 self.fraction_b_m1 = fraction_b_m1 self.verbose = verbose self.bacteria_per_step = bacteria_per_step self.phage_off_diagonal = phage_off_diagonal self.re_degrade_foreign_0 = re_degrade_foreign_0 self.re_degrade_foreign_1 = re_degrade_foreign_1 self.phage_mutation_step = phage_mutation_step self.phage_mutation_freq = phage_mutation_freq self.phage_inactivation_time = phage_inactivation_time self.phage_burst_size = phage_burst_size self.encounter_width = encounter_width self.agent_width = 0.0001 self.latency = latency self.epi_inheritance = epi_inheritance if self.encounter_width > 1 or self.encounter_width < 0: raise ValueError("Encounter width must be between 0 and 1") self.schedule = RandomActivationByBreed(self) model_reporters = { "phage": lambda m: m.schedule.get_breed_count(Phage), "bacteria": lambda m: m.schedule.get_breed_count(Bacteria), "bacteria_meth_0": lambda m: get_breed_filtered_count(Bacteria, by_methylation(0))(m), "phage_meth_0": lambda m: get_breed_filtered_count(Phage, by_methylation(0))(m) } agent_reporters = { "breed": lambda a: a.breed, "methylation": lambda a: a.methylation, "genotype": lambda a: a.genotype, "inactivation": lambda a: a.inactivation } self.datacollector = DataCollector(model_reporters=model_reporters, agent_reporters=agent_reporters) self.current_ID = 0 # Create phage self.add_phage() #Create bacteria self.add_bacteria(self.initial_bacteria) self.running = True def get_next_ID(self): self.current_ID += 1 return self.current_ID def get_evolvable_vector(self, probs): return EvolvableVector(probs, self.phage_mutation_step, self.phage_mutation_freq) def add_phage(self): #Create phage #phage start with affinity according to a symmetric matrix affinity = np.array( [[1 - self.phage_off_diagonal, self.phage_off_diagonal], [self.phage_off_diagonal, 1 - self.phage_off_diagonal]]) for i in range(self.initial_phage): # sample methylation rm_probs = [ 1 - self.initial_fraction_p_m1, self.initial_fraction_p_m1 ] rm = np.random.choice([0, 1], p=rm_probs) # sample genotypes g_probs = [ 1 - self.initial_fraction_p_g1, self.initial_fraction_p_g1 ] g = np.random.choice([0, 1], p=g_probs) # assign affinity based on genotype p_affinity = self.get_evolvable_vector(affinity[g, :].copy()) phage = Phage(self, self.get_next_ID(), g, rm, self.phage_inactivation_time, p_affinity, 0) # parent is 0 for first generation self.schedule.add(phage) def add_bacteria(self, num): for i in range(num): g = np.random.choice( [0, 1], p=[1 - self.fraction_b_m1, self.fraction_b_m1]) if g == 0: bacteria = Bacteria(self, self.get_next_ID(), g, g, self.re_degrade_foreign_0) elif g == 1: bacteria = Bacteria(self, self.get_next_ID(), g, g, self.re_degrade_foreign_1) else: raise (ValueError("Unknown genotype")) self.schedule.add(bacteria) def step(self): self.datacollector.collect(self) # Shuffle the location of agents each time self.tree = IntervalTree() for unique_id, agent in self.schedule.agents_by_breed[Bacteria].items( ): pos = random.random() self.tree.add(Interval(pos, pos + self.agent_width, agent)) self.schedule.step() if self.verbose: print([ self.schedule.time, self.schedule.get_breed_count(Phage), self.schedule.get_breed_count(Bacteria) ]) self.add_bacteria(self.bacteria_per_step) def run_model(self, step_count=200): if self.verbose: print('Initial number phage: ', self.schedule.get_breed_count(Phage)) print('Initial number bacteria: ', self.schedule.get_breed_count(Bacteria)) for i in range(step_count): self.step() if self.verbose: print('') print('Final number phage: ', self.schedule.get_breed_count(Phage)) print('Final number bacteria: ', self.schedule.get_breed_count(Bacteria))
class MemoryCache(object): """! @brief Memory cache. Maintains a cache of target memory. The constructor is passed a backing DebugContext object that will be used to fill the cache. The cache is invalidated whenever the target has run since the last cache operation (based on run tokens). If the target is currently running, all accesses cause the cache to be invalidated. The target's memory map is referenced. All memory accesses must be fully contained within a single memory region, or a TransferFaultError will be raised. However, if an access is outside of all regions, the access is passed to the underlying context unmodified. When an access is within a region, that region's cacheability flag is honoured. """ def __init__(self, context, core): self._context = context self._core = core self._run_token = -1 self._reset_cache() def _reset_cache(self): self._cache = IntervalTree() self._metrics = CacheMetrics() def _check_cache(self): """! @brief Invalidates the cache if appropriate.""" if self._core.is_running(): LOG.debug("core is running; invalidating cache") self._reset_cache() elif self._run_token != self._core.run_token: self._dump_metrics() LOG.debug("out of date run token; invalidating cache") self._reset_cache() self._run_token = self._core.run_token def _get_ranges(self, addr, count): """! @brief Splits a memory address range into cached and uncached subranges. @return Returns a 2-tuple with the first element being a set of Interval objects for each of the cached subranges. The second element is a set of Interval objects for each of the non-cached subranges. """ cached = self._cache.overlap(addr, addr + count) uncached = {Interval(addr, addr + count)} for cachedIv in cached: newUncachedSet = set() for uncachedIv in uncached: # No overlap. if cachedIv.end < uncachedIv.begin or cachedIv.begin > uncachedIv.end: newUncachedSet.add(uncachedIv) continue # Begin segment. if cachedIv.begin - uncachedIv.begin > 0: newUncachedSet.add( Interval(uncachedIv.begin, cachedIv.begin)) # End segment. if uncachedIv.end - cachedIv.end > 0: newUncachedSet.add(Interval(cachedIv.end, uncachedIv.end)) uncached = newUncachedSet return cached, uncached def _read_uncached(self, uncached): """! "@brief Reads uncached memory ranges and updates the cache. @return A list of Interval objects is returned. Each Interval has its @a data attribute set to a bytearray of the data read from target memory. """ uncachedData = [] for uncachedIv in uncached: data = self._context.read_memory_block8( uncachedIv.begin, uncachedIv.end - uncachedIv.begin) iv = Interval(uncachedIv.begin, uncachedIv.end, bytearray(data)) self._cache.add(iv) # TODO merge contiguous cached intervals uncachedData.append(iv) return uncachedData def _update_metrics(self, cached, uncached, addr, size): cachedSize = 0 for iv in cached: begin = iv.begin end = iv.end if iv.begin < addr: begin = addr if iv.end > addr + size: end = addr + size cachedSize += end - begin uncachedSize = sum((iv.end - iv.begin) for iv in uncached) self._metrics.reads += 1 self._metrics.hits += cachedSize self._metrics.misses += uncachedSize def _dump_metrics(self): if self._metrics.total > 0: LOG.debug( "%d reads, %d bytes [%d%% hits, %d bytes]; %d bytes written", self._metrics.reads, self._metrics.total, self._metrics.percent_hit, self._metrics.hits, self._metrics.writes) else: LOG.debug("no reads") def _read(self, addr, size): """! @brief Performs a cached read operation of an address range. @return A list of Interval objects sorted by address. """ # Get the cached and uncached subranges of the requested read. cached, uncached = self._get_ranges(addr, size) self._update_metrics(cached, uncached, addr, size) # Read any uncached ranges. uncachedData = self._read_uncached(uncached) # Merged cached with data we just read combined = list(cached) + uncachedData combined.sort(key=lambda x: x.begin) return combined def _merge_data(self, combined, addr, size): """! @brief Extracts data from the intersection of an address range across a list of interval objects. The range represented by @a addr and @a size are assumed to overlap the intervals. The first and last interval in the list may have ragged edges not fully contained in the address range, in which case the correct slice of those intervals is extracted. @param self @param combined List of Interval objects forming a contiguous range. The @a data attribute of each interval must be a bytearray. @param addr Start address. Must be within the range of the first interval. @param size Number of bytes. (@a addr + @a size) must be within the range of the last interval. @return A single bytearray object with all data from the intervals that intersects the address range. """ result = bytearray() resultAppend = bytearray() # Check for fully contained subrange. if len(combined) and combined[0].begin < addr and combined[ 0].end > addr + size: offset = addr - combined[0].begin endOffset = offset + size result = combined[0].data[offset:endOffset] return result # Take slice of leading ragged edge. if len(combined) and combined[0].begin < addr: offset = addr - combined[0].begin result += combined[0].data[offset:] combined = combined[1:] # Take slice of trailing ragged edge. if len(combined) and combined[-1].end > addr + size: offset = addr + size - combined[-1].begin resultAppend = combined[-1].data[:offset] combined = combined[:-1] # Merge. for iv in combined: result += iv.data result += resultAppend return result def _update_contiguous(self, cached, addr, value): size = len(value) end = addr + size leadBegin = addr leadData = bytearray() trailData = bytearray() trailEnd = end if cached[0].begin < addr and cached[0].end > addr: offset = addr - cached[0].begin leadData = cached[0].data[:offset] leadBegin = cached[0].begin if cached[-1].begin < end and cached[-1].end > end: offset = end - cached[-1].begin trailData = cached[-1].data[offset:] trailEnd = cached[-1].end self._cache.remove_overlap(addr, end) data = leadData + value + trailData self._cache.addi(leadBegin, trailEnd, data) def _check_regions(self, addr, count): """! @return A bool indicating whether the given address range is fully contained within one known memory region, and that region is cacheable. @exception TransferFaultError Raised if the access is not entirely contained within a single region. """ regions = self._core.memory_map.get_intersecting_regions(addr, length=count) # If no regions matched, then allow an uncached operation. if len(regions) == 0: return False # Raise if not fully contained within one region. if len(regions) > 1 or not regions[0].contains_range(addr, length=count): raise TransferFaultError( "individual memory accesses must not cross memory region boundaries" ) # Otherwise return whether the region is cacheable. return regions[0].is_cacheable def read_memory(self, addr, transfer_size=32, now=True): # TODO use more optimal underlying read_memory calls if transfer_size == 8: data = self.read_memory_block8(addr, 1)[0] else: data = conversion.byte_list_to_nbit_le_list( self.read_memory_block8(addr, transfer_size // 8), transfer_size)[0] if now: return data else: def read_cb(): return data return read_cb def read_memory_block8(self, addr, size): if size <= 0: return [] self._check_cache() # Validate memory regions. if not self._check_regions(addr, size): LOG.debug("range [%x:%x] is not cacheable", addr, addr + size) return self._context.read_memory_block8(addr, size) # Get the cached and uncached subranges of the requested read. combined = self._read(addr, size) # Extract data out of combined intervals. result = list(self._merge_data(combined, addr, size)) assert len( result) == size, "result size ({}) != requested size ({})".format( len(result), size) return result def read_memory_block32(self, addr, size): return conversion.byte_list_to_u32le_list( self.read_memory_block8(addr, size * 4)) def write_memory(self, addr, value, transfer_size=32): if transfer_size == 8: return self.write_memory_block8(addr, [value]) else: return self.write_memory_block8( addr, conversion.nbit_le_list_to_byte_list([value], transfer_size)) def write_memory_block8(self, addr, value): if len(value) <= 0: return self._check_cache() # Validate memory regions. cacheable = self._check_regions(addr, len(value)) # Write to the target first, so if it fails we don't update the cache. result = self._context.write_memory_block8(addr, value) if cacheable: size = len(value) end = addr + size cached = sorted(self._cache.overlap(addr, end), key=lambda x: x.begin) self._metrics.writes += size if len(cached): # Write data is entirely within a single cached interval. if addr >= cached[0].begin and end <= cached[0].end: beginOffset = addr - cached[0].begin endOffset = beginOffset + size cached[0].data[beginOffset:endOffset] = value else: self._update_contiguous(cached, addr, bytearray(value)) else: # No cached data in this range, so just add the entire interval. self._cache.addi(addr, end, bytearray(value)) return result def write_memory_block32(self, addr, data): return self.write_memory_block8( addr, conversion.u32le_list_to_byte_list(data)) def invalidate(self): self._reset_cache()
def __compute_workload( log: EventLog, resource: Optional[str] = None, activity: Optional[str] = None, parameters: Optional[Dict[Union[str, Parameters], Any]] = None ) -> Dict[Tuple, int]: """ Computes the workload of resources/activities, corresponding to each event a number (number of concurring events) Parameters --------------- log event log resource (if provided) Resource on which we want to compute the workload activity (if provided) Activity on which we want to compute the workload Returns --------------- workload_dict Dictionary associating to each event the number of concurring events """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) resource_key = exec_utils.get_param_value( Parameters.RESOURCE_KEY, parameters, xes_constants.DEFAULT_RESOURCE_KEY) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) start_timestamp_key = exec_utils.get_param_value( Parameters.START_TIMESTAMP_KEY, parameters, None) from pm4py.objects.log.util import sorting log = sorting.sort_timestamp(log, timestamp_key) from pm4py.objects.log.util import interval_lifecycle log = interval_lifecycle.to_interval(log, parameters=parameters) if start_timestamp_key is None: log = __insert_start_from_previous_event(log, parameters=parameters) start_timestamp_key = xes_constants.DEFAULT_START_TIMESTAMP_KEY events = converter.apply(log, variant=converter.Variants.TO_EVENT_STREAM, parameters={ "deepcopy": False, "include_case_attributes": False }) if resource is not None: events = [x for x in events if x[resource_key] == resource] if activity is not None: events = [x for x in events if x[activity_key] == activity] events = [(x[start_timestamp_key].timestamp(), x[timestamp_key].timestamp(), x[resource_key], x[activity_key]) for x in events] events = sorted(events) from intervaltree import IntervalTree, Interval tree = IntervalTree() ev_map = {} k = 0.000001 for ev in events: tree.add(Interval(ev[0], ev[1] + k)) for ev in events: ev_map[ev] = len(tree[ev[0]:ev[1] + k]) return ev_map
def fuse_tandem_genes(self): # dealing with tandem repeats: # first we have to get parts by strand # - strand means we want to have the # right part from the breakpoint for 5' # left part from the breakpoint for 3' # join them by starting with the 5' part, # add the 3' part to its left # + strand means we want to have the # left part from the breakpoint for 5' # right part from the breakpoint for 3' # join them by starting with the 5' part # add the 3' part to its right prime5part = None prime3part = None if (self.prime5.strand < 0): prime5part = ExonCoords(self.prime5.chromosome, self.prime5.strand, self.prime5.breakpoint, self.prime5.gene_name, self.get_right_part(self.prime5)) prime3part = ExonCoords(self.prime3.chromosome, self.prime3.strand, self.prime3.breakpoint, self.prime3.gene_name, self.get_left_part(self.prime3)) else: prime5part = ExonCoords(self.prime5.chromosome, self.prime5.strand, self.prime5.breakpoint, self.prime5.gene_name, self.get_left_part(self.prime5)) prime3part = ExonCoords(self.prime3.chromosome, self.prime3.strand, self.prime3.breakpoint, self.prime3.gene_name, self.get_right_part(self.prime3)) prime5part.print_as_bed() prime3part.print_as_bed() # now move the 3' part to the 5' part p5borders = (prime5part.exons.begin(), prime5part.exons.end()) p3borders = (prime3part.exons.begin(), prime3part.exons.end()) # |------5------| # |------3------| # |------5------| # |------3------| # # |------5------| # |------3------| # |------5------| # |------3------| # shift = (5'start - 3'start) # 3'start = 3'start + shift shift = 0 if prime5part.strand > 0: shift = prime5part.breakpoint - prime3part.exons.begin() + 1 else: shift = prime5part.exons.begin() - prime3part.exons.end() # we have to shift 3' only shifted3p = IntervalTree() for iv in prime3part.exons: shifted3p.add(Interval(iv.begin + shift, iv.end + shift)) shifted5p = prime5part.exons # and now shift down the stuff to 0 for SVG left_shift = (shifted5p | shifted3p).begin() # TODO: DRY it out based05p = IntervalTree() for iv in shifted5p: based05p.add(Interval(iv.begin - left_shift, iv.end - left_shift)) based03p = IntervalTree() for iv in shifted3p: based03p.add(Interval(iv.begin - left_shift, iv.end - left_shift)) return based05p, based03p
class MemoryCache(object): def __init__(self, context): self._context = context self._run_token = -1 self._log = logging.getLogger('memcache') self._reset_cache() def _reset_cache(self): self._cache = IntervalTree() self._metrics = CacheMetrics() ## # @brief Invalidates the cache if appropriate. def _check_cache(self): if self._context.core.isRunning(): self._log.debug("core is running; invalidating cache") self._reset_cache() elif self._run_token != self._context.core.run_token: self._dump_metrics() self._log.debug("out of date run token; invalidating cache") self._reset_cache() self._run_token = self._context.core.run_token ## # @brief Splits a memory address range into cached and uncached subranges. # @return Returns a 2-tuple with the first element being a set of Interval objects for each # of the cached subranges. The second element is a set of Interval objects for each of the # non-cached subranges. def _get_ranges(self, addr, count): cached = self._cache.search(addr, addr + count) uncached = {Interval(addr, addr + count)} for cachedIv in cached: newUncachedSet = set() for uncachedIv in uncached: # No overlap. if cachedIv.end < uncachedIv.begin or cachedIv.begin > uncachedIv.end: newUncachedSet.add(uncachedIv) continue # Begin segment. if cachedIv.begin - uncachedIv.begin > 0: newUncachedSet.add( Interval(uncachedIv.begin, cachedIv.begin)) # End segment. if uncachedIv.end - cachedIv.end > 0: newUncachedSet.add(Interval(cachedIv.end, uncachedIv.end)) uncached = newUncachedSet return cached, uncached ## # @brief Reads uncached memory ranges and updates the cache. # @return A list of Interval objects is returned. Each Interval has its @a data attribute set # to a bytearray of the data read from target memory. def _read_uncached(self, uncached): uncachedData = [] for uncachedIv in uncached: data = self._context.readBlockMemoryUnaligned8( uncachedIv.begin, uncachedIv.end - uncachedIv.begin) iv = Interval(uncachedIv.begin, uncachedIv.end, bytearray(data)) self._cache.add(iv) # TODO merge contiguous cached intervals uncachedData.append(iv) return uncachedData def _update_metrics(self, cached, uncached, addr, size): cachedSize = 0 for iv in cached: begin = iv.begin end = iv.end if iv.begin < addr: begin = addr if iv.end > addr + size: end = addr + size cachedSize += end - begin uncachedSize = sum((iv.end - iv.begin) for iv in uncached) self._metrics.reads += 1 self._metrics.hits += cachedSize self._metrics.misses += uncachedSize def _dump_metrics(self): if self._metrics.total > 0: self._log.debug( "%d reads, %d bytes [%d%% hits, %d bytes]; %d bytes written", self._metrics.reads, self._metrics.total, self._metrics.percent_hit, self._metrics.hits, self._metrics.writes) else: self._log.debug("no reads") ## # @brief Performs a cached read operation of an address range. # @return A list of Interval objects sorted by address. def _read(self, addr, size): # Get the cached and uncached subranges of the requested read. cached, uncached = self._get_ranges(addr, size) self._update_metrics(cached, uncached, addr, size) # Read any uncached ranges. uncachedData = self._read_uncached(uncached) # Merged cached with data we just read combined = list(cached) + uncachedData combined.sort(key=lambda x: x.begin) return combined ## # @brief Extracts data from the intersection of an address range across a list of interval objects. # # The range represented by @a addr and @a size are assumed to overlap the intervals. The first # and last interval in the list may have ragged edges not fully contained in the address range, in # which case the correct slice of those intervals is extracted. # # @param self # @param combined List of Interval objects forming a contiguous range. The @a data attribute of # each interval must be a bytearray. # @param addr Start address. Must be within the range of the first interval. # @param size Number of bytes. (@a addr + @a size) must be within the range of the last interval. # @return A single bytearray object with all data from the intervals that intersects the address # range. def _merge_data(self, combined, addr, size): result = bytearray() resultAppend = bytearray() # Take slice of leading ragged edge. if len(combined) and combined[0].begin < addr: offset = addr - combined[0].begin result += combined[0].data[offset:] combined = combined[1:] # Take slice of trailing ragged edge. if len(combined) and combined[-1].end > addr + size: offset = addr + size - combined[-1].begin resultAppend = combined[-1].data[:offset] combined = combined[:-1] # Merge. for iv in combined: result += iv.data result += resultAppend return result ## # @brief def _update_contiguous(self, cached, addr, value): size = len(value) end = addr + size leadBegin = addr leadData = bytearray() trailData = bytearray() trailEnd = end if cached[0].begin < addr and cached[0].end > addr: offset = addr - cached[0].begin leadData = cached[0].data[:offset] leadBegin = cached[0].begin if cached[-1].begin < end and cached[-1].end > end: offset = end - cached[-1].begin trailData = cached[-1].data[offset:] trailEnd = cached[-1].end self._cache.remove_overlap(addr, end) data = leadData + value + trailData self._cache.addi(leadBegin, trailEnd, data) ## # @return A bool indicating whether the given address range is fully contained within # one known memory region, and that region is cacheable. # @exception MemoryAccessError Raised if the access is not entirely contained within a single region. def _check_regions(self, addr, count): regions = self._context.core.memory_map.getIntersectingRegions( addr, length=count) # If no regions matched, then allow an uncached operation. if len(regions) == 0: return False # Raise if not fully contained within one region. if len(regions) > 1 or not regions[0].containsRange(addr, length=count): raise MemoryAccessError( "individual memory accesses must not cross memory region boundaries" ) # Otherwise return whether the region is cacheable. return regions[0].isCacheable def readMemory(self, addr, transfer_size=32, now=True): # TODO use more optimal underlying readMemory call if transfer_size == 8: data = self.readBlockMemoryUnaligned8(addr, 1)[0] elif transfer_size == 16: data = conversion.byteListToU16leList( self.readBlockMemoryUnaligned8(addr, 2))[0] elif transfer_size == 32: data = conversion.byteListToU32leList( self.readBlockMemoryUnaligned8(addr, 4))[0] if now: return data else: def read_cb(): return data return read_cb def readBlockMemoryUnaligned8(self, addr, size): if size <= 0: return [] self._check_cache() # Validate memory regions. if not self._check_regions(addr, size): self._log.debug("range [%x:%x] is not cacheable", addr, addr + size) return self._context.readBlockMemoryUnaligned8(addr, size) # Get the cached and uncached subranges of the requested read. combined = self._read(addr, size) # Extract data out of combined intervals. result = list(self._merge_data(combined, addr, size)) return result def readBlockMemoryAligned32(self, addr, size): return conversion.byteListToU32leList( self.readBlockMemoryUnaligned8(addr, size * 4)) def writeMemory(self, addr, value, transfer_size=32): if transfer_size == 8: return self.writeBlockMemoryUnaligned8(addr, [value]) elif transfer_size == 16: return self.writeBlockMemoryUnaligned8( addr, conversion.u16leListToByteList([value])) elif transfer_size == 32: return self.writeBlockMemoryUnaligned8( addr, conversion.u32leListToByteList([value])) def writeBlockMemoryUnaligned8(self, addr, value): if len(value) <= 0: return self._check_cache() # Validate memory regions. cacheable = self._check_regions(addr, len(value)) # Write to the target first, so if it fails we don't update the cache. result = self._context.writeBlockMemoryUnaligned8(addr, value) if cacheable: size = len(value) end = addr + size cached = sorted(self._cache.search(addr, end), key=lambda x: x.begin) self._metrics.writes += size if len(cached): # Write data is entirely within cached data. if addr >= cached[0].begin and end <= cached[0].end: beginOffset = addr - cached[0].begin endOffset = end - cached[0].end cached[0].data[beginOffset:endOffset] = value else: self._update_contiguous(cached, addr, bytearray(value)) else: # No cached data in this range, so just add the entire interval. self._cache.addi(addr, end, bytearray(value)) return result def writeBlockMemoryAligned32(self, addr, data): return self.writeBlockMemoryUnaligned8( addr, conversion.u32leListToByteList(data)) def invalidate(self): self._reset_cache()
class IntervalGraph(object): """Base class for undirected interval graphs. The IntervalGraph class allows any hashable object as a node and can associate key/value attribute pairs with each undirected edge. Each edge must have two integers, begin and end for its interval. Self-loops are allowed but multiple edges (two or more edges with the same nodes, begin and end interval) are not. Two nodes can have more than one edge with different overlapping or non-overlapping intervals. Parameters ---------- attr : keyword arguments, optional (default= no attributes) Attributes to add to graph as key=value pairs. Examples -------- Create an empty graph structure (a "null interval graph") with no nodes and no edges. >>> G = dnx.IntervalGraph() G can be grown in several ways. **Nodes:** Add one node at a time: >>> G.add_node(1) Add the nodes from any container (a list, dict, set or even the lines from a file or the nodes from another graph). Add the nodes from any container (a list, dict, set) >>> G.add_nodes_from([2, 3]) >>> G.add_nodes_from(range(100, 110)) **Edges:** G can also be grown by adding edges. This can be considered the primary way to grow G, since nodes with no edge will not appear in G in most cases. See ``G.to_snapshot()``. Add one edge, which starts at 0 and ends at 10. Keep in mind that the interval is [0, 10). Thus, it does not include the end. >>> G.add_edge(1, 2, 0, 10) a list of edges, >>> G.add_edges_from([(1, 2, 0, 10), (1, 3, 3, 11)]) If some edges connect nodes not yet in the graph, the nodes are added automatically. There are no errors when adding nodes or edges that already exist. **Attributes:** Each interval graph, node, and edge can hold key/value attribute pairs in an associated attribute dictionary (the keys must be hashable). By default these are empty, but can be added or changed using add_edge, add_node. Keep in mind that the edge interval is not an attribute of the edge. >>> G = dnx.IntervalGraph(day="Friday") >>> G.graph {'day': 'Friday'} Add node attributes using add_node(), add_nodes_from() >>> G.add_node(1, time='5pm') >>> G.add_nodes_from([3], time='2pm') Add edge attributes using add_edge(), add_edges_from(). >>> G.add_edge(1, 2, 0, 10, weight=4.7 ) >>> G.add_edges_from([(3, 4, 3, 11), (4, 5, 0, 33)], color='red') **Shortcuts:** Here are a couple examples of available shortcuts: >>> 1 in G # check if node in interval graph during any interval True >>> len(G) # number of nodes in the entire interval graph 5 **Subclasses (Advanced):** Edges in interval graphs are represented by Interval Objects and are kept in an IntervalTree. Both are based on intervaltree available in pypi (https://pypi.org/project/intervaltree). IntervalTree allows for fast interval based search through edges, which makes interval graph analyes possible. The Graph class uses a dict-of-dict-of-dict data structure. The outer dict (node_dict) holds adjacency information keyed by node. The next dict (adjlist_dict) represents the adjacency information and holds edge data keyed by interval object. The inner dict (edge_attr_dict) represents the edge data and holds edge attribute values keyed by attribute names. """ def __init__(self, **attr): """Initialize an interval graph with edges, name, or graph attributes. Parameters ---------- attr : keyword arguments, optional (default= no attributes) Attributes to add to graph as key=value pairs. Examples -------- >>> G = dnx.IntervalGraph() >>> G = dnx.IntervalGraph(name='my graph') >>> G.graph {'name': 'my graph'} """ self.tree = IntervalTree() self.graph = {} # dictionary for graph attributes self._adj = {} self._node = {} self.graph.update(attr) @property def name(self): """String identifier of the interval graph. This interval graph attribute appears in the attribute dict IG.graph keyed by the string `"name"`. as well as an attribute (technically a property) `IG.name`. This is entirely user controlled. """ return self.graph.get('name', '') @name.setter def name(self, s): self.graph['name'] = s def __str__(self): """Return the interval graph name. Returns ------- name : string The name of the interval graph. Examples -------- >>> G = dnx.IntervalGraph(name='foo') >>> str(G) 'foo' """ return self.name def __len__(self): """Return the number of nodes. Use: 'len(G)'. Returns ------- nnodes : int The number of nodes in the graph. Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_nodes_from([2, 4, 5]) >>> len(G) 3 """ return len(self._node) def __contains__(self, n): """Return True if n is a node, False otherwise. Use: 'n in G'. Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_node(2) >>> 2 in G True """ try: return n in self._node except TypeError: return False def interval(self): """Return a 2-tuple as (begin, end) interval of the entire interval graph. Note that end is non-inclusive. Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_edges_from([(1, 2, 0, 10), (3, 7, 9, 16)]) >>> G.interval() (0, 16) """ return self.tree.begin(), self.tree.end() def add_node(self, node_for_adding, **attr): """Add a single node `node_for_adding` and update node attributes. Parameters ---------- node_for_adding : node A node can be any hashable Python object except None. attr : keyword arguments, optional Set or change node attributes using key=value. See Also -------- add_nodes_from Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_node(1) >>> G.add_node('Hello') >>> G.number_of_nodes() 2 Use keywords set/change node attributes: >>> G.add_node(1, size=10) >>> G.add_node(3, weight=0.4, UTM=('13S', 382871, 3972649)) Notes ----- A hashable object is one that can be used as a key in a Python dictionary. This includes strings, numbers, tuples of strings and numbers, etc. On many platforms hashable items also include mutables such as NetworkX Graphs, though one should be careful that the hash doesn't change on mutables. """ if node_for_adding not in self._node: self._adj[node_for_adding] = {} self._node[node_for_adding] = attr else: # update attr even if node already exists self._node[node_for_adding].update(attr) def add_nodes_from(self, nodes_for_adding, **attr): """Add multiple nodes. Parameters ---------- nodes_for_adding : iterable container A container of nodes (list, dict, set, etc.). OR A container of (node, attribute dict) tuples. Node attributes are updated using the attribute dict. attr : keyword arguments, optional (default= no attributes) Update attributes for all nodes in nodes. Node attributes specified in nodes as a tuple take precedence over attributes specified via keyword arguments. See Also -------- add_node Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_nodes_from('Hello') >>> G.has_node('e') True Use keywords to update specific node attributes for every node. >>> G.add_nodes_from([1, 2], size=10) >>> G.add_nodes_from([3, 4], weight=0.4) Use (node, attrdict) tuples to update attributes for specific nodes. >>> G.add_nodes_from([(1, dict(size=11)), (2, {'color':'blue'})]) """ for n in nodes_for_adding: # keep all this inside try/except because # CPython throws TypeError on n not in self._node, # while pre-2.7.5 ironpython throws on self._adj[n] try: if n not in self._node: self._adj[n] = {} self._node[n] = attr.copy() else: self._node[n].update(attr) except TypeError: nn, ndict = n if nn not in self._node: self._adj[nn] = {} self._node[nn] = attr.copy() self._node[nn].update(ndict) else: self._node[nn].update(attr) self._node[nn].update(ndict) def number_of_nodes(self, begin=None, end=None): """Return the number of nodes in the interval graph between the given interval. Parameters ---------- begin: integer, optional (default= beginning of the entire interval graph) Inclusive beginning time of the node appearing in the interval graph. end: integer, optional (default= end of the entire interval graph + 1) Non-inclusive ending time of the node appearing in the interval graph. Must be bigger than begin. Note that the default value is shifted up by 1 to make it an inclusive end. Returns ------- nnodes : int The number of nodes in the interval graph. See Also -------- __len__ Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_edges_from([(1, 2, 0, 5), (3, 4, 8, 11)]) >>> len(G) 4 >>> G.number_of_nodes() 4 >>> G.number_of_nodes(begin=6) 2 >>> G.number_of_nodes(begin=5, end=8) # end in non-inclusive 2 >>> G.number_of_nodes(end=8) 4 """ if begin is None and end is None: return len(self._node) if begin is None: begin = self.tree.begin() if end is None: end = self.tree.end() + 1 iedges = self.tree[begin:end] inodes = set() for iv in iedges: inodes.add(iv.data[0]) inodes.add(iv.data[1]) return len(inodes) def has_node(self, n, begin=None, end=None): """Return True if the interval graph contains the node n, during the given interval. Identical to `n in G` when 'begin' and 'end' are not defined. Parameters ---------- n : node begin: integer, optional (default= beginning of the entire interval graph) Inclusive beginning time of the node appearing in the interval graph. end: integer, optional (default= end of the entire interval graph + 1) Non-inclusive ending time of the node appearing in the interval graph. Must be bigger than begin. Note that the default value is shifted up by 1 to make it an inclusive end. Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_ndoe(1) >>> G.has_node(1) True It is more readable and simpler to use >>> 0 in G True With interval query: >>> G.add_edge(3, 4, 2, 5) >>> G.has_node(3) True >>> G.has_node(3, begin=2) True >>> G.has_node(3, end=2) # end is non-inclusive False """ try: exists_node = n in self._node except TypeError: exists_node = False if (begin is None and end is None) or not exists_node: return exists_node if begin is None: begin = self.tree.begin() if end is None: end = self.tree.end() + 1 iedges = self._adj[n].keys() for iv in iedges: if iv.overlaps(begin=begin, end=end): return True return False def nodes(self, begin=None, end=None, data=False, default=None): """A NodeDataView of the IntervalGraph nodes. A nodes is considered to be present during an interval, if it has an edge with overlapping interval. Parameters ---------- begin: integer, optional (default= beginning of the entire interval graph) Inclusive beginning time of the node appearing in the interval graph. end: integer, optional (default= end of the entire interval graph + 1) Non-inclusive ending time of the node appearing in the interval graph. Must be bigger than begin. Note that the default value is shifted up by 1 to make it an inclusive end. data : string or bool, optional (default=False) The node attribute returned in 2-tuple (n, dict[data]). If False, return just the nodes n. default : value, optional (default=None) Value used for nodes that don't have the requested attribute. Only relevant if data is not True or False. Returns ------- NodeDataView A NodeDataView iterates over `(n, data)` and has no set operations. When called, if data is False, an iterator over nodes. Otherwise an iterator of 2-tuples (node, attribute value) where data is True. Examples -------- There are two simple ways of getting a list of all nodes in the graph: >>> G = dnx.IntervalGraph() >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 12, 19), (2, 4, 8, 15)]) [1, 2, 4, 6] To get the node data along with the nodes: >>> G.add_nodes_from([(1, {'time': '1pm'}), (2, {'time': '2pm'}), (4, {'time': '4pm'}), (6, {'day': 'Friday'})]) [(1, {'time': '1pm'}), (2, {'time': '2pm'}), (4, {'time': '4pm'}), (6, {'day': 'Friday'})] >>> G.nodes(data="time") [(1, '1pm'), (2, '2pm'), (4, '4pm'), (6, None)] >>> G.nodes(data="time", default="5pm") [(1, '1pm'), (2, '2pm'), (4, '4pm'), (6, '5pm')] To get nodes which appear in a specific interval. nodes without an edge are not considered present. >>> G.nodes(begin=11, data=True) [(2, {'time': '2pm'}), (4, {'time': '4pm'}), (6, {'day': 'Friday'})] >>> G.nodes(begin=4, end=12) # non-inclusive end [1, 2, 4] """ if begin is None and end is None: return NodeDataView(self._node, data=data, default=default) if begin is None: begin = self.tree.begin() if end is None: end = self.tree.end() + 1 iedges = self.tree[begin:end] inodes = set() for iv in iedges: inodes.add(iv.data[0]) inodes.add(iv.data[1]) node_dict = {n: self._node[n] for n in inodes} return NodeDataView(node_dict, data=data, default=default) def remove_node(self, n, begin=None, end=None): """Remove the presence of a node n within the given interval. Removes the presence node n and all adjacent edges within the given interval. If interval is specified, all the edges of n will be removed within that interval. Quiet if n is not in the interval graph. Parameters ---------- n : node A node in the graph begin: integer, optional (default= beginning of the entire interval graph) Inclusive beginning time of the node appearing in the interval graph. end: integer, optional (default= end of the entire interval graph + 1) Non-inclusive ending time of the node appearing in the interval graph. Must be bigger than begin. Note that the default value is shifted up by 1 to make it an inclusive end. Examples -------- >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 12, 19), (2, 4, 8, 15)]) >>> G.add_nodes_from([(1, {'time': '1pm'}), (2, {'time': '2pm'}), (4, {'time': '4pm'})]) >>> G.nodes(begin=4, end=6) [1, 2, 4, 6] >>> G.remove_node(2, begin=4, end=6) >>> G.nodes(begin=4, end=6) [4, 6] >>> G.nodes(data=True) [(1, {'time': '1pm'}), (2, {'time': '2pm'}), (4, {'time': '4pm'}), (6, {})] >>> G.remove_node(2) >>> G.nodes(data=True) [(1, {'time': '1pm'}), (4, {'time': '4pm'}), (6, {})] """ if n not in self._node: return if begin is None and end is None: for iedge in list(self._adj[n].keys()): self.__remove_iedge(iedge) else: if begin is None: begin = self.tree.begin() if end is None: end = self.tree.end() + 1 for iedge in self.tree[begin:end]: if iedge.data[0] == n or iedge.data[1] == n: self.__remove_iedge(iedge) # delete the node and its attributes if no edge left if len(self._adj[n]) == 0: self._adj.pop(n, None) self._node.pop(n, None) def add_edge(self, u, v, begin, end, **attr): """Add an edge between u and v, during interval [begin, end). The nodes u and v will be automatically added if they are not already in the interval graph. Edge attributes can be specified with keywords or by directly accessing the edge's attribute dictionary. See examples below. Parameters ---------- u, v : nodes Nodes can be, for example, strings or numbers. Nodes must be hashable (and not None) Python objects. begin: orderable type Inclusive beginning time of the edge appearing in the interval graph. end: orderable type Non-inclusive ending time of the edge appearing in the interval graph. Must be bigger than begin. attr : keyword arguments, optional Edge data (or labels or objects) can be assigned using keyword arguments. See Also -------- add_edges_from : add a collection of edges Notes ----- Adding an edge that already exists updates the edge data. Both begin and end must be the same type across all edges in the interval graph. Also, to create snapshots, both must be integers. Many NetworkX algorithms designed for weighted graphs use an edge attribute (by default `weight`) to hold a numerical value. Examples -------- The following all add the edge e=(1, 2, 3, 10) to graph G: >>> G = dnx.IntervalGraph() >>> e = (1, 2, 3, 10) >>> G.add_edge(1, 2, 3, 10) # explicit two-node form with interval >>> G.add_edge(*e) # single edge as tuple of two nodes and interval >>> G.add_edges_from([(1, 2, 3, 10)]) # add edges from iterable container Associate data to edges using keywords: >>> G.add_edge(1, 2, 3, 10 weight=3) >>> G.add_edge(1, 3, 4, 9, weight=7, capacity=15, length=342.7) """ iedge = self.__get_iedge_in_tree(begin, end, u, v) # if edge exists, just update attr if iedge is not None: # since both point to the same attr, updating one is enough self._adj[u][iedge].update(attr) return iedge = Interval(begin, end, (u, v)) # add nodes if u not in self._node: self._adj[u] = {} self._node[u] = {} if v not in self._node: self._adj[v] = {} self._node[v] = {} # add edge try: self.tree.add(iedge) except ValueError: raise NetworkXError( "IntervalGraph: edge duration must be strictly bigger than zero {0}." .format(iedge)) self._adj[u][iedge] = self._adj[v][iedge] = attr def add_edges_from(self, ebunch_to_add, **attr): """Add all the edges in ebunch_to_add. Parameters ---------- ebunch_to_add : container of edges Each edge given in the container will be added to the interval graph. The edges must be given as as 4-tuples (u, v, being, end). Both begin and end must be orderable and the same type across all edges. attr : keyword arguments, optional Edge data (or labels or objects) can be assigned using keyword arguments. See Also -------- add_edge : add a single edge Notes ----- Adding the same edge (with the same interval) twice has no effect but any edge data will be updated when each duplicate edge is added. Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11)]) # using a list of edge tuples Associate data to edges >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11)], weight=3) >>> G.add_edges_from([(3, 4, 2, 19), (1, 4, 1, 3)], label='WN2898') """ for e in ebunch_to_add: if len(e) != 4: raise NetworkXError( "Edge tuple {0} must be a 4-tuple.".format(e)) self.add_edge(e[0], e[1], e[2], e[3], **attr) def has_edge(self, u, v, begin=None, end=None, overlapping=True): """Return True if there exists an edge between u and v in the interval graph, during the given interval. Parameters ---------- u, v : nodes Nodes can be, for example, strings or numbers. Nodes must be hashable (and not None) Python objects. begin : integer, optional (default= beginning of the entire interval graph) Inclusive beginning time of the node appearing in the interval graph. end : integer, optional (default= end of the entire interval graph + 1) Non-inclusive ending time of the node appearing in the interval graph. Must be bigger than begin. Note that the default value is shifted up by 1 to make it an inclusive end. overlapping : bool, optional (default= True) if True, it returns True if there exists an edge between u and v with overlapping interval with `begin` and `end`. if False, it returns true only if there exists an edge between u and v with the exact interval. Note: if False, both `begin` and `end` must be defined, otherwise an exception is raised. Raises ------ NetworkXError If `begin` and `end` are not defined and `overlapping= False` Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11)]) >>> G.has_edge(1, 2) True With specific overlapping interval: >>> G.has_edge(1, 2, begin=2) True >>> G.has_edge(2, 4, begin=12) False Exact interval match: >>> G.has_edge(2, 4, begin=1, end=11) True >>> G.has_edge(2, 4, begin=2, end=11) False """ if begin is None and end is None: for iv in self._adj[u].keys(): if iv.data[0] == v or iv.data[1] == v: return True return False if not overlapping: if begin is None or end is None: raise NetworkXError( "For exact interval match (overlapping=False), both begin and end must be defined." ) return self.__get_iedge_in_tree(u, v, begin, end) is not None if begin is None: begin = self.tree.begin() if end is None: end = self.tree.end() + 1 for iv in self._adj[u].keys(): if (iv.data[0] == v or iv.data[1] == v) and iv.overlaps( begin=begin, end=end): return True return False def edges(self, u=None, v=None, begin=None, end=None, data=False, default=None): """A list of Interval objects of the IntervalGraph edges. All edges which are present within the given interval. All parameters are optional. `u` and `v` can be thought of as constraints. If no node is defined, all edges within the interval are returned. If one node is defined, all edges which have that node as one end, will be returned, and finally if both nodes are defined then all edges between the two nodes are returned. Parameters ---------- u, v : nodes, optional (default=None) Nodes can be, for example, strings or numbers. Nodes must be hashable (and not None) Python objects. If the node does not exist in the graph, a key error is raised. begin: integer, optional (default= beginning of the entire interval graph) Inclusive beginning time of the edge appearing in the interval graph. end: integer, optional (default= end of the entire interval graph + 1) Non-inclusive ending time of the edge appearing in the interval graph. Must be bigger than begin. Note that the default value is shifted up by 1 to make it an inclusive end. data : string or bool, optional (default=False) If True, return 2-tuple (Interval object, dict of attributes). If False, return just the Interval objects. If string (name of the attribute), return 2-tuple (Interval object, attribute value). default : value, optional (default=None) Default Value to be used for edges that don't have the requested attribute. Only relevant if `data` is a string (name of an attribute). Returns ------- List of Interval objects An interval object has the following format: (begin, end, (u, v)) When called, if `data` is False, a list of interval objects. If `data` is True, a list of 2-tuples: (Interval, dict of attribute(s) with values), If `data` is a string, a list of 2-tuples (Interval, attribute value). Examples -------- To get a list of all edges: >>> G = dnx.IntervalGraph() >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 12, 19), (2, 4, 8, 15)]) >>> G.edges() [Interval(8, 15, (2, 4)), Interval(3, 10, (1, 2)), Interval(1, 11, (2, 4)), Interval(12, 19, (6, 4))] To get edges which appear in a specific interval: >>> G.edges(begin=10) [Interval(12, 19, (6, 4)), Interval(1, 11, (2, 4)), Interval(8, 15, (2, 4))] >>> G.edges(end=5) [Interval(3, 10, (1, 2)), Interval(1, 11, (2, 4))] >>> G.edges(begin=2, end=4) [Interval(3, 10, (1, 2)), Interval(1, 11, (2, 4))] To get edges with either of the two nodes being defined: >>> G.edges(u=2) [Interval(3, 10, (1, 2)), Interval(1, 11, (2, 4)), Interval(8, 15, (2, 4))] >>> G.edges(u=2, begin=11) [Interval(1, 11, (2, 4)), Interval(8, 15, (2, 4))] >>> G.edges(u=2, v=4, end=8) [Interval(1, 11, (2, 4))] >>> G.edges(u=1, v=6) [] To get a list of edges with data: >>> G = dnx.IntervalGraph() >>> G.add_edge(1, 3, 1, 4, weight=8, height=18) >>> G.add_edge(1, 2, 3, 10, weight=10) >>> G.add_edge(2, 6, 2, 10) >>> G.edges(data="weight") [(Interval(2, 8, (2, 3)), None), (Interval(3, 10, (1, 2)), 10), (Interval(1, 4, (1, 3)), 8)] >>> G.edges(data="weight", default=5) [(Interval(2, 8, (2, 3)), 5), (Interval(3, 10, (1, 2)), 10), (Interval(1, 4, (1, 3)), 8)] >>> G.edges(data=True) [(Interval(2, 8, (2, 3)), {}), (Interval(3, 10, (1, 2)), {'weight': 10}), (Interval(1, 4, (1, 3)), {'height': 18, 'weight': 8})] >>> G.edges(u=1, begin=5, end=9, data="weight") [(Interval(3, 10, (1, 2)), 10)] """ # If non of the nodes are defined the interval tree is queried for the list of edges, # otherwise the edges are returned based on the nodes in the self._adj.o if u is None and v is None: if begin is None and end is None: iedges = self.tree.all_intervals # interval filtering else: if begin is None: begin = self.tree.begin() if end is None: end = self.tree.end() + 1 iedges = self.tree[begin:end] else: # Node filtering if u is not None and v is not None: iedges = [ iv for iv in self._adj[u].keys() if iv.data[0] == v or iv.data[1] == v ] elif u is not None: iedges = self._adj[u].keys() else: iedges = self._adj[v].keys() # Interval filtering if begin is not None and end is not None: iedges = [ iv for iv in iedges if iv.end >= begin and iv.begin < end ] elif begin is not None: iedges = [iv for iv in iedges if iv.end >= begin] elif end is not None: iedges = [iv for iv in iedges if iv.begin < end] # Appending attribute data if needed if data is False: return iedges if isinstance(iedges, list) else list(iedges) if data is True: return [(iv, self._adj[iv.data[0]][iv]) for iv in iedges] return [(iv, self._adj[iv.data[0]][iv][data]) if data in self._adj[iv.data[0]][iv].keys() else (iv, default) for iv in iedges] def remove_edge(self, u, v, begin=None, end=None, overlapping=True): """Remove the edge between u and v in the interval graph, during the given interval. Quiet if the specified edge is not present. Parameters ---------- u, v : nodes Nodes can be, for example, strings or numbers. Nodes must be hashable (and not None) Python objects. begin : integer, optional (default= beginning of the entire interval graph) Inclusive beginning time of the edge appearing in the interval graph. end : integer, optional (default= end of the entire interval graph + 1) Non-inclusive ending time of the edge appearing in the interval graph. Must be bigger than begin. Note that the default value is shifted up by 1 to make it an inclusive end. overlapping : bool, optional (default= True) if True, remove the edge between u and v with overlapping interval with `begin` and `end`. if False, remove the edge between u and v with the exact interval. Note: if False, both `begin` and `end` must be defined, otherwise an exception is raised. Raises ------ NetworkXError If `begin` and `end` are not defined and `overlapping= False` Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 5, 9), (1, 2, 8, 15)]) >>> G.remove_edge(1, 2) >>> G.has_edge(1, 2) False With specific overlapping interval >>> G = dnx.IntervalGraph() >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 5, 9), (1, 2, 8, 15)]) >>> G.remove_edge(1, 2, begin=2, end=4) >>> G.has_edge(1, 2, begin=2, end=4) False >>> G.has_edge(1, 2) True Exact interval match >>> G.remove_edge(2, 4, begin=1, end=11, overlapping=False) >>> G.has_edge(2, 4, begin=1, end=11) False """ # remove edge between u and v with the exact given interval if not overlapping: if begin is None or end is None: raise NetworkXError( "For exact interval match (overlapping=False), both begin and end must be defined." ) iedge = self.__get_iedge_in_tree(u, v, begin, end) if iedge is None: return self.__remove_iedge(iedge) return iedges_to_remove = [] # remove every edge between u and v if begin is None and end is None: for iv in self._adj[u].keys(): if iv.data[0] == v or iv.data[1] == v: iedges_to_remove.append(iv) # remove edge between u and v with overlapping interval with the given interval if begin is None: begin = self.tree.begin() if end is None: end = self.tree.end() + 1 for iv in self._adj[u].keys(): if (iv.data[0] == v or iv.data[1] == v) and iv.overlaps( begin=begin, end=end): iedges_to_remove.append(iv) # removing found iedges for iv in iedges_to_remove: self.__remove_iedge(iv) def __remove_iedge(self, iedge): """Remove the interval edge from the interval graph. Quiet if the specified edge is not present. Parameters ---------- iedge : Interval object Interval edge to be removed. Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_edge(1, 2, 3, 10) >>> iedge = Interval(3, 10, (1, 2)) # Interval(begin, end, (u, v)) >>> G.__remove_iedge(iedge) """ self.tree.discard(iedge) self._adj[iedge.data[0]].pop(iedge, None) self._adj[iedge.data[1]].pop(iedge, None) def __get_iedge_in_tree(self, u, v, begin, end): """Return interval edge if found in the interval graph with the exact interval, otherwise return None. Parameters ---------- u, v : nodes Nodes can be, for example, strings or numbers. Nodes must be hashable (and not None) Python objects. begin : integer Inclusive beginning time of the edge appearing in the interval graph. end : integer Non-inclusive ending time of the edge appearing in the interval graph. Must be bigger than begin. Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_edge(1, 2, 3, 10) >>> G.__get_iedge_in_tree(2, 1, 3, 10) Interval(3, 10, (1, 2)) >>> G.__get_iedge_in_tree(2, 1, 4, 10) None """ temp_iedge = Interval(begin, end, (u, v)) if temp_iedge in self.tree: return temp_iedge temp_iedge = Interval(begin, end, (v, u)) if temp_iedge in self.tree: return temp_iedge return None def to_subgraph(self, begin, end, multigraph=False, edge_data=False, edge_interval_data=False, node_data=False): """Return a networkx Graph or MultiGraph which includes all the nodes and edges which have overlapping intervals with the given interval. Parameters ---------- begin: integer Inclusive beginning time of the edge appearing in the interval graph. Must be bigger than begin. end: integer Non-inclusive ending time of the edge appearing in the interval graph. multigraph: bool, optional (default= False) If True, a networkx MultiGraph will be returned. If False, networkx Graph. edge_data: bool, optional (default= False) If True, edges will keep their attributes. edge_interval_data: bool, optional (default= False) If True, each edge's attribute will also include its begin and end interval data. If `edge_data= True` and there already exist edge attributes with names begin and end, they will be overwritten. node_data : bool, optional (default= False) if True, each node's attributes will be included. See Also -------- to_snapshots : divide the interval graph to snapshots Notes ----- If multigraph= False, and edge_data=True or edge_interval_data=True, in case there are multiple edges, only one will show with one of the edge's attributes. Note: nodes with no edges will not appear in any subgraph. Examples -------- >>> G = dnx.IntervalGraph() >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 12, 19), (2, 4, 8, 15)]) >>> H = G.to_subgraph(4, 12) >>> type(H) <class 'networkx.classes.graph.Graph'> >>> list(H.edges(data=True)) [(1, 2, {}), (2, 4, {})] >>> H = G.to_subgraph(4, 12, edge_interval_data=True) >>> type(H) <class 'networkx.classes.graph.Graph'> >>> list(H.edges(data=True)) [(1, 2, {'end': 10, 'begin': 3}), (2, 4, {'end': 15, 'begin': 8})] >>> M = G.to_subgraph(4, 12, multigraph=True, edge_interval_data=True) >>> type(M) <class 'networkx.classes.multigraph.MultiGraph'> >>> list(M.edges(data=True)) [(1, 2, {'end': 10, 'begin': 3}), (2, 4, {'end': 11, 'begin': 1}), (2, 4, {'end': 15, 'begin': 8})] """ if end <= begin: raise NetworkXError( "IntervalGraph: subgraph duration must be strictly bigger than zero: " "begin: {}, end: {}.".format(begin, end)) iedges = self.tree[begin:end] if multigraph: G = MultiGraph() else: G = Graph() if edge_data and edge_interval_data: G.add_edges_from((iedge.data[0], iedge.data[1], dict(self._adj[iedge.data[0]][iedge], begin=iedge.begin, end=iedge.end)) for iedge in iedges) elif edge_data: G.add_edges_from((iedge.data[0], iedge.data[1], self._adj[iedge.data[0]][iedge].copy()) for iedge in iedges) elif edge_interval_data: G.add_edges_from((iedge.data[0], iedge.data[1], { 'begin': iedge.begin, 'end': iedge.end }) for iedge in iedges) else: G.add_edges_from( (iedge.data[0], iedge.data[1]) for iedge in iedges) # include node attributes if node_data: G.add_nodes_from((n, self._node[n].copy()) for n in G.nodes) return G def to_snapshots(self, number_of_snapshots, multigraph=False, edge_data=False, edge_interval_data=False, node_data=False, return_length=False): """Return a list of networkx Graph or MultiGraph objects as snapshots of the interval graph in consecutive order. Parameters ---------- number_of_snapshots : integer Number of snapshots to divide the interval graph into. Must be bigger than 1. multigraph : bool, optional (default= False) If True, a networkx MultiGraph will be returned. If False, networkx Graph. edge_data: bool, optional (default= False) If True, edges will keep their attributes. edge_interval_data : bool, optional (default= False) If True, each edge's attribute will also include its begin and end interval data. If `edge_data= True` and there already exist edge attributes with names begin and end, they will be overwritten. node_data : bool, optional (default= False) if True, each node's attributes will be included. return_length : bool, optional (default= False) If true, the length of snapshots will be returned as the second argument. See Also -------- to_subgraph : subgraph based on an interval Notes ----- In order to create snapshots, begin and end interval objects of the interval graph must be numbers. If multigraph= False, and edge_data=True or edge_interval_data=True, in case there are multiple edges, only one will show with one of the edge's attributes. Examples -------- Snapshots of NetworkX Graph >>> G = dnx.IntervalGraph() >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 12, 19), (2, 4, 8, 15)]) >>> S, l = G.to_snapshots(2, edge_interval_data=True, return_length=True) >>> S [<networkx.classes.graph.Graph object at 0x100000>, <networkx.classes.graph.Graph object at 0x150d00>] >>> l 9.0 >>> for g in S: >>> ... g.edges(data=True)) [(1, 2, {'begin': 3, 'end': 10}), (2, 4, {'begin': 8, 'end': 15})] [(2, 4, {'begin': 8, 'end': 15}), (4, 6, {'begin': 12, 'end': 19})] Snapshots of NetworkX MultiGraph >>> S, l = G.to_snapshots(3, multigraph=True, edge_interval_data=True, return_length=True) >>> S [<networkx.classes.multigraph.MultiGraph object at 0x1060d40b8>, <networkx.classes.multigraph.MultiGraph object at 0x151020c9e8>, <networkx.classes.multigraph.MultiGraph object at 0x151021d390>] >>> l 6.0 >>> for g in S: >>> ... g.edges(data=True)) [(1, 2, {'end': 10, 'begin': 3}), (2, 4, {'end': 11, 'begin': 1})] [(1, 2, {'end': 10, 'begin': 3}), (2, 4, {'end': 11, 'begin': 1}), (2, 4, {'end': 15, 'begin': 8}), (4, 6, {'end': 19, 'begin': 12})] [(2, 4, {'end': 15, 'begin': 8}), (4, 6, {'end': 19, 'begin': 12})] """ if number_of_snapshots < 2 or type(number_of_snapshots) is not int: raise NetworkXError( "IntervalGraph: number of snapshots must be an integer and 2 or bigger. " "{0} was passed.".format(number_of_snapshots)) begin, end = self.interval() snapshot_len = (end - begin) / number_of_snapshots snapshots = [] end_inclusive_addition = 0 for i in range(number_of_snapshots): # since to_subgraph is end non-inclusive, shift the end up by 1 to include end in the last snapshot. if i == number_of_snapshots - 1: end_inclusive_addition = 1 snapshots.append( self.to_subgraph(begin + snapshot_len * i, begin + snapshot_len * (i + 1) + end_inclusive_addition, multigraph=multigraph, edge_data=edge_data, edge_interval_data=edge_interval_data, node_data=node_data)) if return_length: return snapshots, snapshot_len return snapshots @staticmethod def load_from_txt(path, delimiter=" ", nodetype=None, comments="#"): """Read interval graph in from path. Every line in the file must be an edge in the following format: "node node begin end". Both interval times must be integers. Nodes can be any hashable objects. Parameters ---------- path : string or file Filename to read. nodetype : Python type, optional Convert nodes to this type. comments : string, optional Marker for comment lines delimiter : string, optional Separator for node labels. The default is whitespace. Returns ------- G: IntervalGraph The graph corresponding to the lines in edge list. Examples -------- >>> G=dnx.IntervalGraph.load_from_txt("my_dygraph.txt") The optional nodetype is a function to convert node strings to nodetype. For example >>> G=dnx.IntervalGraph.load_from_txt("my_dygraph.txt", nodetype=int) will attempt to convert all nodes to integer type. Since nodes must be hashable, the function nodetype must return hashable types (e.g. int, float, str, frozenset - or tuples of those, etc.) """ ig = IntervalGraph() with open(path, 'r') as file: for line in file: p = line.find(comments) if p >= 0: line = line[:p] if not len(line): continue line = line.rstrip().split(delimiter) u, v, begin, end = line if nodetype is not None: try: u = nodetype(u) v = nodetype(v) except: raise TypeError( "Failed to convert node to type {0}".format( nodetype)) try: begin = int(begin) end = nodetype(end) except: raise TypeError("Failed to convert time to type int") ig.add_edge(u, v, begin, end) return ig
max_match = nx.maximal_matching(fg) print "Match Count:",len(max_match) if len(max_match) == 0: fg.clear() g_person_node_count = 0 g_wine_node_count = 0 nodes_to_process = False for node in max_match: if node[0][0] == "w": wine = node[0] person = node[1] elif node[0][0] == "p": person = node[0] wine = node[1] person_id = long(person.replace("p","")) wine_id = long(wine.replace("w","")) wine_sold += 1 fg.node[person]["c"] += 1 if fg.node[person]["c"] == MAX_WINE: pt.add(Interval(person_id,person_id+1)) fg.remove_node(person) wt.add(Interval(wine_id,wine_id+1)) fg.remove_node(wine) print "{0}\t{1}\t{2}".format(person,wine,wine_sold) f.close() print args.min_buffer_size, args.max_buffer_size, wine_sold, round(time.time()-start, 3)
class BitwrappedStream(object): """A stream that wraps other streams to provide bit-level access""" closed = True def __init__(self, stream): """Init the bit-wrapped stream :stream: The normal byte stream """ self._stream = stream self._bits = collections.deque() self.closed = False # assume that bitfields end on an even boundary, # otherwise the entire stream will be treated as # a bit stream with no padding self.padded = True # packed left-to-right self.direction = BIT_DIR_LEFT_RIGHT self.range_set = IntervalTree() def is_eof(self): """Return if the stream has reached EOF or not without discarding any unflushed bits :returns: True/False """ pos = self._stream.tell() byte = self._stream.read(1) self._stream.seek(pos, 0) return utils.binary(byte) == utils.binary("") def close(self): """Close the stream """ self.closed = True self._flush_bits_to_stream() self._stream.close() def flush(self): """Flush the stream """ self._flush_bits_to_stream() self._stream.flush() def isatty(self): """Return if the stream is a tty """ return self._stream.isatty() def read(self, num): """Read ``num`` number of bytes from the stream. Note that this will automatically resets/ends the current bit-reading if it does not end on an even byte AND ``self.padded`` is True. If ``self.padded`` is True, then the entire stream is treated as a bitstream. :num: number of bytes to read :returns: the read bytes, or empty string if EOF has been reached """ start_pos = self.tell() if self.padded: # we toss out any uneven bytes self._bits.clear() res = utils.binary(self._stream.read(num)) else: bits = self.read_bits(num * 8) res = bits_to_bytes(bits) res = utils.binary(res) end_pos = self.tell() self._update_consumed_ranges(start_pos, end_pos) return res def read_bits(self, num): """Read ``num`` number of bits from the stream :num: number of bits to read :returns: a list of ``num`` bits, or an empty list if EOF has been reached """ if num > len(self._bits): needed = num - len(self._bits) num_bytes = (needed // 8) + 1 read_bytes = self._stream.read(num_bytes) for bit in bytes_to_bits(read_bytes): self._bits.append(bit) res = [] while len(res) < num and len(self._bits) > 0: res.append(self._bits.popleft()) return res def write(self, data): """Write data to the stream :data: the data to write to the stream :returns: None """ if self.padded: # flush out any remaining bits first if len(self._bits) > 0: self._flush_bits_to_stream() self._stream.write(data) else: # nothing to do here if len(data) == 0: return bits = bytes_to_bits(data) self.write_bits(bits) def write_bits(self, bits): """Write the bits to the stream. Add the bits to the existing unflushed bits and write complete bytes to the stream. """ for bit in bits: self._bits.append(bit) while len(self._bits) >= 8: byte_bits = [self._bits.popleft() for x in six.moves.range(8)] byte = bits_to_bytes(byte_bits) self._stream.write(byte) # there may be unflushed bits leftover and THAT'S OKAY def tell(self): """Return the current position in the stream (ignoring bit position) :returns: int for the position in the stream """ res = self._stream.tell() if len(self._bits) > 0: res -= 1 return res def seek(self, pos, seek_type=0): """Seek to the specified position in the stream with seek_type. Unflushed bits will be discarded in the case of a seek. The stream will also keep track of which bytes have and have not been consumed so that the dom will capture all of the bytes in the stream. :pos: offset :seek_type: direction :returns: TODO """ self._bits.clear() return self._stream.seek(pos, seek_type) def size(self): """Return the size of the stream, or -1 if it cannot be determined. """ pos = self._stream.tell() # seek to the end of the stream self._stream.seek(0,2) size = self._stream.tell() self._stream.seek(pos, 0) return size def unconsumed_ranges(self): """Return an IntervalTree of unconsumed ranges, of the format (start, end] with the end value not being included """ res = IntervalTree() prev = None # normal iteration is not in a predictable order ranges = sorted([x for x in self.range_set], key=lambda x: x.begin) for rng in ranges: if prev is None: prev = rng continue res.add(Interval(prev.end, rng.begin)) prev = rng # means we've seeked past the end if len(self.range_set[self.tell()]) != 1: res.add(Interval(prev.end, self.tell())) return res # ----------------------------- # PRIVATE FUNCTIONS # ----------------------------- def _update_consumed_ranges(self, start_pos, end_pos): """Update the ``self.consumed_ranges`` array with which byte ranges have been consecutively consumed. """ self.range_set.add(Interval(start_pos, end_pos+1)) self.range_set.merge_overlaps() def _flush_bits_to_stream(self): """Flush the bits to the stream. This is used when a few bits have been read and ``self._bits`` contains unconsumed/ flushed bits when data is to be written to the stream """ if len(self._bits) == 0: return 0 bits = list(self._bits) diff = 8 - (len(bits) % 8) padding = [0] * diff bits = bits + padding self._stream.write(bits_to_bytes(bits)) self._bits.clear()
class MemoryCache(object): """! @brief Memory cache. Maintains a cache of target memory. The constructor is passed a backing DebugContext object that will be used to fill the cache. The cache is invalidated whenever the target has run since the last cache operation (based on run tokens). If the target is currently running, all accesses cause the cache to be invalidated. The target's memory map is referenced. All memory accesses must be fully contained within a single memory region, or a MemoryAccessError will be raised. However, if an access is outside of all regions, the access is passed to the underlying context unmodified. When an access is within a region, that region's cacheability flag is honoured. """ def __init__(self, context, core): self._context = context self._core = core self._run_token = -1 self._log = LOG.getChild('memcache') self._reset_cache() def _reset_cache(self): self._cache = IntervalTree() self._metrics = CacheMetrics() def _check_cache(self): """! @brief Invalidates the cache if appropriate.""" if self._core.is_running(): self._log.debug("core is running; invalidating cache") self._reset_cache() elif self._run_token != self._core.run_token: self._dump_metrics() self._log.debug("out of date run token; invalidating cache") self._reset_cache() self._run_token = self._core.run_token def _get_ranges(self, addr, count): """! @brief Splits a memory address range into cached and uncached subranges. @return Returns a 2-tuple with the first element being a set of Interval objects for each of the cached subranges. The second element is a set of Interval objects for each of the non-cached subranges. """ cached = self._cache.overlap(addr, addr + count) uncached = {Interval(addr, addr + count)} for cachedIv in cached: newUncachedSet = set() for uncachedIv in uncached: # No overlap. if cachedIv.end < uncachedIv.begin or cachedIv.begin > uncachedIv.end: newUncachedSet.add(uncachedIv) continue # Begin segment. if cachedIv.begin - uncachedIv.begin > 0: newUncachedSet.add(Interval(uncachedIv.begin, cachedIv.begin)) # End segment. if uncachedIv.end - cachedIv.end > 0: newUncachedSet.add(Interval(cachedIv.end, uncachedIv.end)) uncached = newUncachedSet return cached, uncached def _read_uncached(self, uncached): """! "@brief Reads uncached memory ranges and updates the cache. @return A list of Interval objects is returned. Each Interval has its @a data attribute set to a bytearray of the data read from target memory. """ uncachedData = [] for uncachedIv in uncached: data = self._context.read_memory_block8(uncachedIv.begin, uncachedIv.end - uncachedIv.begin) iv = Interval(uncachedIv.begin, uncachedIv.end, bytearray(data)) self._cache.add(iv) # TODO merge contiguous cached intervals uncachedData.append(iv) return uncachedData def _update_metrics(self, cached, uncached, addr, size): cachedSize = 0 for iv in cached: begin = iv.begin end = iv.end if iv.begin < addr: begin = addr if iv.end > addr + size: end = addr + size cachedSize += end - begin uncachedSize = sum((iv.end - iv.begin) for iv in uncached) self._metrics.reads += 1 self._metrics.hits += cachedSize self._metrics.misses += uncachedSize def _dump_metrics(self): if self._metrics.total > 0: self._log.debug("%d reads, %d bytes [%d%% hits, %d bytes]; %d bytes written", self._metrics.reads, self._metrics.total, self._metrics.percent_hit, self._metrics.hits, self._metrics.writes) else: self._log.debug("no reads") def _read(self, addr, size): """! @brief Performs a cached read operation of an address range. @return A list of Interval objects sorted by address. """ # Get the cached and uncached subranges of the requested read. cached, uncached = self._get_ranges(addr, size) self._update_metrics(cached, uncached, addr, size) # Read any uncached ranges. uncachedData = self._read_uncached(uncached) # Merged cached with data we just read combined = list(cached) + uncachedData combined.sort(key=lambda x: x.begin) return combined def _merge_data(self, combined, addr, size): """! @brief Extracts data from the intersection of an address range across a list of interval objects. The range represented by @a addr and @a size are assumed to overlap the intervals. The first and last interval in the list may have ragged edges not fully contained in the address range, in which case the correct slice of those intervals is extracted. @param self @param combined List of Interval objects forming a contiguous range. The @a data attribute of each interval must be a bytearray. @param addr Start address. Must be within the range of the first interval. @param size Number of bytes. (@a addr + @a size) must be within the range of the last interval. @return A single bytearray object with all data from the intervals that intersects the address range. """ result = bytearray() resultAppend = bytearray() # Check for fully contained subrange. if len(combined) and combined[0].begin < addr and combined[0].end > addr + size: offset = addr - combined[0].begin endOffset = offset + size result = combined[0].data[offset:endOffset] return result # Take slice of leading ragged edge. if len(combined) and combined[0].begin < addr: offset = addr - combined[0].begin result += combined[0].data[offset:] combined = combined[1:] # Take slice of trailing ragged edge. if len(combined) and combined[-1].end > addr + size: offset = addr + size - combined[-1].begin resultAppend = combined[-1].data[:offset] combined = combined[:-1] # Merge. for iv in combined: result += iv.data result += resultAppend return result def _update_contiguous(self, cached, addr, value): size = len(value) end = addr + size leadBegin = addr leadData = bytearray() trailData = bytearray() trailEnd = end if cached[0].begin < addr and cached[0].end > addr: offset = addr - cached[0].begin leadData = cached[0].data[:offset] leadBegin = cached[0].begin if cached[-1].begin < end and cached[-1].end > end: offset = end - cached[-1].begin trailData = cached[-1].data[offset:] trailEnd = cached[-1].end self._cache.remove_overlap(addr, end) data = leadData + value + trailData self._cache.addi(leadBegin, trailEnd, data) def _check_regions(self, addr, count): """! @return A bool indicating whether the given address range is fully contained within one known memory region, and that region is cacheable. @exception MemoryAccessError Raised if the access is not entirely contained within a single region. """ regions = self._core.memory_map.get_intersecting_regions(addr, length=count) # If no regions matched, then allow an uncached operation. if len(regions) == 0: return False # Raise if not fully contained within one region. if len(regions) > 1 or not regions[0].contains_range(addr, length=count): raise MemoryAccessError("individual memory accesses must not cross memory region boundaries") # Otherwise return whether the region is cacheable. return regions[0].is_cacheable def read_memory(self, addr, transfer_size=32, now=True): # TODO use more optimal underlying read_memory call if transfer_size == 8: data = self.read_memory_block8(addr, 1)[0] elif transfer_size == 16: data = conversion.byte_list_to_u16le_list(self.read_memory_block8(addr, 2))[0] elif transfer_size == 32: data = conversion.byte_list_to_u32le_list(self.read_memory_block8(addr, 4))[0] if now: return data else: def read_cb(): return data return read_cb def read_memory_block8(self, addr, size): if size <= 0: return [] self._check_cache() # Validate memory regions. if not self._check_regions(addr, size): self._log.debug("range [%x:%x] is not cacheable", addr, addr+size) return self._context.read_memory_block8(addr, size) # Get the cached and uncached subranges of the requested read. combined = self._read(addr, size) # Extract data out of combined intervals. result = list(self._merge_data(combined, addr, size)) assert len(result) == size, "result size ({}) != requested size ({})".format(len(result), size) return result def read_memory_block32(self, addr, size): return conversion.byte_list_to_u32le_list(self.read_memory_block8(addr, size*4)) def write_memory(self, addr, value, transfer_size=32): if transfer_size == 8: return self.write_memory_block8(addr, [value]) elif transfer_size == 16: return self.write_memory_block8(addr, conversion.u16le_list_to_byte_list([value])) elif transfer_size == 32: return self.write_memory_block8(addr, conversion.u32le_list_to_byte_list([value])) def write_memory_block8(self, addr, value): if len(value) <= 0: return self._check_cache() # Validate memory regions. cacheable = self._check_regions(addr, len(value)) # Write to the target first, so if it fails we don't update the cache. result = self._context.write_memory_block8(addr, value) if cacheable: size = len(value) end = addr + size cached = sorted(self._cache.overlap(addr, end), key=lambda x:x.begin) self._metrics.writes += size if len(cached): # Write data is entirely within a single cached interval. if addr >= cached[0].begin and end <= cached[0].end: beginOffset = addr - cached[0].begin endOffset = beginOffset + size cached[0].data[beginOffset:endOffset] = value else: self._update_contiguous(cached, addr, bytearray(value)) else: # No cached data in this range, so just add the entire interval. self._cache.addi(addr, end, bytearray(value)) return result def write_memory_block32(self, addr, data): return self.write_memory_block8(addr, conversion.u32le_list_to_byte_list(data)) def invalidate(self): self._reset_cache()
class HistorySet(object): __slots__ = ('current', 'history') def __init__(self, values=(), *, time=None): time = time if time is not None else now() self.current = {v: time for v in values} self.history = IntervalTree() @staticmethod def from_intervals(intervals): result = HistorySet() for iv in intervals: result.add_interval(iv) def add_interval(self, iv): if iv.end is GreatestValue: self.current[iv.data] = iv.begin else: if iv.data in self.current and self.current[iv.data] <= iv.end: del self.current[iv.data] self.history.add(iv) def refine_history(self): """ Scrub the internal IntervalTree history so that there are a minimum number of intervals. Any multiplicity of intervals with the same data value that covers a single contiguous range will be replaced with a single interval over that range. This is an expensive operation, both in time and memory, that should only be performed when the history is being modified carelessly, such as naively merging with the history from another HistorySet or adding and removing elements out of chronological order. Behavior for the HistorySet should be identical before and after calling refine_history(), but may be slightly faster and consume less memory afterwards. The only change will be that it should no longer return incorrect values for the effective added date of currently contained items after merging with history intervals. """ self.history = IntervalTree(merge_interval_overlaps(self.history, self.current)) def __getitem__(self, index): if type(index) is slice: if index.step is not None: raise ValueError("Slice indexing is used for intervals, which do not have a step.") iv = Interval(index.start, index.stop) result = {x.data for x in self.history[iv]} result.update(x[0] for x in self.current.items() if iv.overlaps(Interval(begin=x[1], end=None))) else: result = {x.data for x in self.history[index]} result.update(item_ for item_, time_ in self.current.items() if time_ <= index) return result def time_slice(self, begin, end): """ Return an iterable over all the intervals intersecting the given half-open interval from begin to end, chopped to fit within it """ if begin is None or end is None: raise ValueError("Both the beginning and end of the interval must be included") if end <= begin: raise ValueError("begin must be < end") for iv in self.history[begin:end]: yield Interval(begin=max(iv.begin, begin), end=min(iv.end, end), data=iv.data) for value, added in self.current.items(): if added < end: yield Interval(begin=added, end=end, data=value) def intervals(self): """ Return an iterator over all the intervals in this set. Currently contained values have intervals ending with a GreatestValue object. """ yield from self.history end = GreatestValue for value, begin in self.current.items(): yield Interval(begin=begin, end=end, data=value) def all_values(self): result = self.copy() for old in self.history: result.add(old.data) return result def item_added_time(self, value): return self.current[value] def ordered_by_addition(self, *, time=None): if time is None: result = list(self.current.items()) else: result = [(x.begin, x.data) for x in self.history[time]] result.extend((added, item) for item, added in self.current.items() if added <= time) result.sort(key=itemgetter(0)) return [x[1] for x in result] def add(self, value, *, time=None): time = time if time is not None else now() if value not in self.current or self.current[value] > time: self.current[value] = time def remove(self, value, *, time=None): self.history.addi(self.current.pop(value), time if time is not None else now(), value) def discard(self, value, *, time=None): if value in self.current: self.remove(value, time=time) def copy(self, *, time=None): if time is None: return set(self.current) else: return self[time] def members_in_interval(self, begin, end): return self[begin:end] def clear(self, *, time=None): time = time if time is not None else now() for item in self.current.items(): self.history.addi(item[1], time, item[0]) self.current.clear() def union(self, *others): result = self.copy() result.update(*others) return result def difference(self, *others): result = self.copy() result.difference_update(*others) return result def symmetric_difference(self, other): result = self.copy() result.symmetric_difference_update(other) return result def intersection(self, *others): result = self.copy() result.intersection_update(*others) return result def update(self, *others, time=None): time = time if time is not None else now() for other in others: for value in other: self.add(value, time=time) def difference_update(self, *others, time=None): time = time if time is not None else now() for other in others: for value in other: self.discard(value, time=time) def symmetric_difference_update(self, other, *, time=None): time = time if time is not None else now() for value in other: if value in self.current: self.remove(value, time=time) else: self.add(value, time=time) def intersection_update(self, *others, time=None): time = time if time is not None else now() toss = self.difference(*others) for value in toss: self.discard(value, time=time) def pop(self, *, time=None): time = time if time is not None else now() item = self.current.popitem() self.history.addi(item[1], time, item[0]) return item[0] def isdisjoint(self, other): # noinspection PyUnresolvedReferences return self.current.keys().isdisjoint(other) def issubset(self, other): return other > self.current def issuperset(self, other): return other < self.current def __iter__(self): return iter(self.current) def __len__(self): return len(self.current) def __eq__(self, other): if isinstance(other, (set, frozenset)): return self.current.keys() == other elif isinstance(other, HistorySet): return self.current.keys() == other.current.keys() return False def __lt__(self, other): return self < other or self == other def __gt__(self, other): return self > other or self == other def __contains__(self, item): return item in self.current __le__ = issubset __ge__ = issuperset __or__ = union __and__ = intersection __sub__ = difference __xor__ = symmetric_difference __ior__ = update __iand__ = intersection_update __isub__ = difference_update __ixor__ = symmetric_difference_update
g_wine_node_count, g_person_node_count) fg.node[person_node_with_fewest_edges]["c"] += 1 wine_sold += 1 if fg.node[person_node_with_fewest_edges]["c"] == MAX_WINE: fg.remove_node(person_node_with_fewest_edges) g_person_node_count -= 1 person_id = long(person_node_with_fewest_edges.replace("p","")) has_higher = list(pt[person_id+1]) has_lower = list(pt[person_id-1]) if has_higher and has_lower: #this person is being insert right next to it's siblings, merge them into one begin = has_lower[0].begin end = has_higher[0].end pt.remove(has_lower[0]) pt.remove(has_higher[0]) pt.add(Interval(begin,end)) elif has_higher: begin = person_id end = has_higher[0].end pt.remove(has_higher[0]) pt.add(Interval(begin,end)) elif has_lower: begin = has_lower[0].begin end = person_id pt.remove(has_lower[0]) pt.add(Interval(begin,end)) else: pt.add(Interval(person_id,person_id+1)) fg.remove_node(wine_node_with_fewest_edges) g_wine_node_count -= 1 wine_id = long(wine_node_with_fewest_edges.replace("w",""))
def subsample_region_uniformly(region, args): logger = logging.getLogger(region.ref_name) logger.info("Building interval tree.") tree = IntervalTree() with pysam.AlignmentFile(args.bam) as bam: ref_lengths = dict(zip(bam.references, bam.lengths)) for r in bam.fetch(region.ref_name, region.start, region.end): if filter_read(r, bam, args, logger): continue # trim reads to region tree.add( Interval(max(r.reference_start, region.start), min(r.reference_end, region.end), r.query_name)) logger.info('Starting pileup.') coverage = np.zeros(region.end - region.start, dtype=np.uint16) reads = set() n_reads = 0 iteration = 0 it_no_change = 0 last_depth = 0 targets = iter(sorted(args.depth)) target = next(targets) found_enough_depth = True while True: cursor = 0 while cursor < ref_lengths[region.ref_name]: read = _nearest_overlapping_point(tree, cursor) if read is None: cursor += args.stride else: reads.add(read.data) cursor = read.end coverage[read.begin - region.start:read.end - region.start] += 1 tree.remove(read) iteration += 1 median_depth = np.median(coverage) stdv_depth = np.std(coverage) logger.debug( u'Iteration {}. reads: {}, depth: {:.0f}X (\u00B1{:.1f}).'.format( iteration, len(reads), median_depth, stdv_depth)) # output when we hit a target if median_depth >= target: logger.info("Hit target depth {}.".format(target)) prefix = '{}_{}X'.format(args.output_prefix, target) _write_bam(args.bam, prefix, region, reads) _write_coverage(prefix, region, coverage, args.profile) try: target = next(targets) except StopIteration: break # exit if nothing happened this iteration if n_reads == len(reads): logger.warn("No reads added, finishing pileup.") found_enough_depth = False break n_reads = len(reads) # or if no change in depth if median_depth == last_depth: it_no_change += 1 if it_no_change == args.patience: logging.warn( "Coverage not increased for {} iterations, finishing pileup." .format(args.patience)) found_enough_depth = False break else: it_no_change == 0 last_depth = median_depth return found_enough_depth
def combine_ranges(ranges: tuple): ran = IntervalTree() for r in ranges: for start, end in r: ran.add(Interval(int(start), int(end) + 1)) return ran
print "Match Count:", len(max_match) if len(max_match) == 0: fg.clear() g_person_node_count = 0 g_wine_node_count = 0 nodes_to_process = False for node in max_match: if node[0][0] == "w": wine = node[0] person = node[1] elif node[0][0] == "p": person = node[0] wine = node[1] person_id = long(person.replace("p", "")) wine_id = long(wine.replace("w", "")) wine_sold += 1 fg.node[person]["c"] += 1 if fg.node[person]["c"] == MAX_WINE: pt.add(Interval(person_id, person_id + 1)) fg.remove_node(person) wt.add(Interval(wine_id, wine_id + 1)) fg.remove_node(wine) print "{0}\t{1}\t{2}".format(person, wine, wine_sold) f.close() print args.min_buffer_size, args.max_buffer_size, wine_sold, round( time.time() - start, 3)
def _generate_slides(self, maxsize): """Yield (png_path, start_time, duration) for each version of each slide, in order. Both start and duration are in seconds. This honors cut(start, end) and only returns slides and timings that fit into the configured timeframe.""" start_ts, end_ts = self._cut doc = ET.parse(self._asset_path("shapes.svg")) for img in doc.iterfind("./{http://www.w3.org/2000/svg}image"): logging.debug("Found slide: %s", img.get("id")) path = img.get("{http://www.w3.org/1999/xlink}href") img_start = to_ns(img.get("in")) img_end = to_ns(img.get("out")) img_width = int(img.get("width")) img_height = int(img.get("height")) size = self.fit((img_width, img_height), (0, 0, maxsize[0], maxsize[1]))[2:] if path.endswith("/deskshare.png"): logging.info("Skipping: Slides invisible during deskshare") continue if img_start >= end_ts or img_end <= start_ts: logging.info("Skipping: Slide not in presentation time frame") continue # Cut slide duration to presentation time frame img_start = max(img_start, start_ts) img_end = min(img_end, end_ts) # Fix backgound image path img.set("{http://www.w3.org/1999/xlink}href", self._asset_path(path)) # Find an SVG group with shapes belonging to this slide. canvas = doc.find( './{{http://www.w3.org/2000/svg}}g[@class="canvas"][@image="{}"]' .format(img.get("id"))) if canvas is None: # No annotations, just a slide. png = self._render_slide([img], size, f'{img.get("id")}-0.png') yield png, img_start, img_end - img_start continue # Collect shapes. Each shape can have multiple draw-steps with the same # `shape` id and only the most recent version is visible. shapes = {} # id -> [(start, undo, shape), ...] for shape in canvas.iterfind( './{http://www.w3.org/2000/svg}g[@class="shape"]'): shape_id = shape.get("shape") shape_style = shape.get("style") shape.set("style", shape_style.replace("visibility:hidden;", "")) # Poll results are embedded as images. Make the href absolute. for shape_img in shape.iterfind( "./{http://www.w3.org/2000/svg}image"): href = shape_img.get("{http://www.w3.org/1999/xlink}href") href = self._asset_path(href) shape_img.set("{http://www.w3.org/1999/xlink}href", href) start = to_ns(shape.get("timestamp")) undo = to_ns(shape.get("undo")) shapes.setdefault(shape_id, []).append((start, undo, shape)) # Build timeline of shapes and draw-steps during this slide timeline = IntervalTree() timeline.add(Interval(begin=img_start, end=img_end, data=[])) # For each shape-id, order draw-steps by start-time and calculate end-time. for shape_id, shapes in shapes.items(): shapes = sorted(shapes) # sort by start time zindex = shapes[0][ 0] # Use start time for z-layer ordering (new on top) for i, (start, undo, shape) in enumerate(shapes): # When switching back to an old slides, shape start-time is way too small start = max(img_start, start) end = img_end if i + 1 < len(shapes): # Hide non-final draw-steps when replaced by the next draw-step. end = shapes[i + 1][0] elif undo > 0: # Shape was erased, so hide it earlier end = undo if end <= start: continue # May happen if self._cut removed parts of a slide livetime if start >= img_end: loging.warning( "Shape timing is off: start=%d end=%s", start / Gst.SECOND, end / Gst.SECOND, ) continue # Should not happen, but who knows timeline.add( Interval(begin=start, end=end, data=[(zindex, shape)])) # In multiuser-canvas mode, shape drawing may overlap in time. This # split+merge step ensure that we have non-overlapping time slices, each # containing all shapes that are visible in that slice. timeline.split_overlaps() timeline.merge_overlaps(data_reducer=lambda a, b: a + b) # Render one PNG per time slice for i, interval in enumerate(sorted(timeline)): shapes = [shape for zindex, shape in sorted(interval.data)] png = self._render_slide([img] + shapes, size, f'{img.get("id")}-{i}.png') yield png, interval.begin, interval.end - interval.begin
#------------------------------------------------------------------------------ # main() #------------------------------------------------------------------------------ if __name__ == '__main__': bv = BinaryViewType.get_view_of_file(sys.argv[1]) bv.update_analysis_and_wait() assert bv.start == bv.segments[0].start assert bv.end == bv.segments[-1].end assert len(bv) == bv.end - bv.start # note! this includes gaps between segments! # collect segments it_segments = IntervalTree() for segment in bv.segments: it_segments.add(Interval(segment.start, segment.end, segment)) # collect sections it_sections = IntervalTree() for section in bv.sections.values(): it_sections.add(Interval(section.start, section.end, section)) # decorate each function with an IntervalTree of its spanned bytes for function in bv.functions: itree = IntervalTree() for bblock in function: itree.add(Interval(bblock.start, bblock.end)) function.itree = itree # check that sections are in one segment for section in bv.sections.values(): intervals = it_segments.overlap(section.start, section.end) assert len(intervals) == 1
def _calculate_work_and_wait_time_by_status(issue, lead_time_statuses, work_statuses): work_intervals = IntervalTree() wait_intervals = IntervalTree() work_time_by_status = Counter() wait_time_by_status = Counter() work_time_by_status_with_block_time = Counter() wait_time_by_status_with_block_time = Counter() last_status_change_date = issue[CREATED_DATE] for transition in issue[STATUS_TRANSITIONS]: if transition['from'] in lead_time_statuses: if transition['from'] in work_statuses: work_intervals.add( Interval(last_status_change_date, transition['date'], transition['from'])) else: wait_intervals.add( Interval(last_status_change_date, transition['date'], transition['from'])) last_status_change_date = transition['date'] wait_intervals_with_block_time = copy.deepcopy(wait_intervals) work_intervals_with_block_time = copy.deepcopy(work_intervals) for i in range(len(issue[FLAG_TRANSITIONS])): transition_block_start = issue[FLAG_TRANSITIONS][i] if transition_block_start['from'] is None: if i + 1 < len(issue[FLAG_TRANSITIONS]): transition_block_end = issue[FLAG_TRANSITIONS][i + 1] wait_intervals.chop(transition_block_start['date'], transition_block_end['date']) work_intervals.chop(transition_block_start['date'], transition_block_end['date']) for interval in work_intervals: work_time_by_status[interval.data] += ( interval.end - interval.begin).total_seconds() / SECONDS_IN_DAY for interval in wait_intervals: wait_time_by_status[interval.data] += ( interval.end - interval.begin).total_seconds() / SECONDS_IN_DAY for interval in work_intervals_with_block_time: work_time_by_status_with_block_time[interval.data] += ( interval.end - interval.begin).total_seconds() / SECONDS_IN_DAY for interval in wait_intervals_with_block_time: wait_time_by_status_with_block_time[interval.data] += ( interval.end - interval.begin).total_seconds() / SECONDS_IN_DAY return { WORK_TIME_BY_STATUS: { '{}_work_time'.format(x): work_time_by_status[x] for x in lead_time_statuses }, WAIT_TIME_BY_STATUS: { '{}_wait_time'.format(x): wait_time_by_status[x] for x in lead_time_statuses }, WORK_TIME_BY_STATUS_WITH_BLOCK_TIME: { '{}_work_time_with_block_time'.format(x): work_time_by_status_with_block_time[x] for x in lead_time_statuses }, WAIT_TIME_BY_STATUS_WITH_BLOCK_TIME: { '{}_wait_time_with_block_time'.format(x): wait_time_by_status_with_block_time[x] for x in lead_time_statuses }, }