def MedCalculator(fileNum, text): """ thread worker function Calculates the running median for the lines present in the text list supplied. Currently the sequential implementation is identical to the parallel implementation. :rtype : null :param fileNum: an index pointing to the file to be processing in the input files :param text: a text buffer to be loaded with the input text """ # Start Profiling # basic profiling for the speed of the algorithm # start = time.clock() # the list that is going to hold the running medians medianNumbers = [] # a sorted list to hold the word counts for input lines # the sorted list boosts performance substantially when computing the running median because this will not require # resorting the wordcount list every time we add an entry to it. linesWordCount = SortedList() lineNO = 0 for line in text: # fast conversion of uppercase to lowercase and removal of the following `'-_ cleaned_line = line.translate(table) # matching words/tokens words = re.findall(tokenPattern, cleaned_line) # counting wordcount of a line and adding it ot the respective list lineWordCount = len(words) linesWordCount.add(lineWordCount) # running median calculations # because I used a sorted list for the worcounts of lines, now it is straightforward to compute # the running median index = int(lineNO / 2) if lineNO % 2 == 0: medianNumbers.append(float(linesWordCount[index])) else: medianNumbers.append( float((linesWordCount[index] + linesWordCount[index + 1]) / 2)) lineNO += 1 # optional profiling # print("Line NO: " + str(lineNO) + " wordcount: " + str(lineWordCount)) # print("Size: " + str(len(linesWordCount)) + " linesWordCount elm: " + str(linesWordCount) ) # print("Median NOs: " + str(medianNumbers)) # end = time.clock() # optional profiling # print("(Calculator)Time elapsed: ", (end-start), "Using Multiprocessing, Generated ", len(medianNumbers) , " medians from " , lineNO, " Lines")#, len(text) , " files") return medianNumbers
def sort_file_lists(input_file_list, file_name_generator): command_list = SortedList() for input_file in input_file_list: reader = _get_reader(input_file) for command in reader: command_list.add(command) if len(command_list) > number_of_allowed_command: dump_commands_to_file(command_list, file_name_generator) command_list = SortedList()
def _get_top_n_samples(model_predictions: List[ModelPrediction], n: int, best: bool): top_n_samples = SortedList(key=lambda sample: -sample.true_label_probability) if best else SortedList() for model_prediction in model_predictions: if best == model_prediction.is_correct(): if len(top_n_samples) < n: top_n_samples.add(model_prediction) else: if best != (model_prediction < top_n_samples[-1]): top_n_samples.pop() top_n_samples.add(model_prediction) return [sample for sample in top_n_samples] # so that it returns a normal list instead of SortedList
def get_split_point_suggestions(self): suggested_split_values = SortedList() min_value = np.inf max_value = -np.inf for k, estimator in self._att_val_dist_per_class.items(): if self._min_value_observed_per_class[k] < min_value: min_value = self._min_value_observed_per_class[k] if self._max_value_observed_per_class[k] > max_value: max_value = self._max_value_observed_per_class[k] if min_value < np.inf: bin_size = max_value - min_value bin_size /= (float(self.num_bin_options) + 1.0) for i in range(self.num_bin_options): split_value = min_value + (bin_size * (i + 1)) if split_value > min_value and split_value < max_value: suggested_split_values.add(split_value) return suggested_split_values
class TxosLinker(object): ''' A class allowing to compute the entropy of Bitcoin transactions and the linkability of inputs/outputs of a transaction ''' ''' CONSTANTS ''' # Default maximum duration in seconds MAX_DURATION = 180 # Processing options LINKABILITY = 'LINKABILITY' PRECHECK = 'PRECHECK' MERGE_FEES = 'MERGE_FEES' # Markers FEES = 'FEES' PACK = 'PACK' # Max number of inputs (or outputs) which can be processed by this algorithm MAX_NB_TXOS = 12 ''' ATTRIBUTES # List of input txos expressed as tuples (id, amount) inputs = [] # List of output txos expressed as tuples (id, amount) outputs = [] # Fees associated to the transaction fees = 0 # Matrix of txos linkability # Columns = input txos # Rows = output txos # Cells = number of combinations for which an input and an output are linked links = np.array() # Number of valid transactions combinations nb_tx_cmbn = 0 # Maximum duration of the script (in seconds) _max_duration = MAX_DURATION ''' ''' INITIALIZATION ''' def __init__(self, inputs=[], outputs=[], fees=0, max_duration=MAX_DURATION, max_txos=MAX_NB_TXOS): ''' Constructor Parameters: inputs = list of inputs txos [(v1_id, v1_amount), ...] outputs = list of outputs txos [(v1_id, v1_amount), ...] fees = amount of fees associated to the transaction max_duration = max duration allocated to processing of a single tx (in seconds) max_txos = max number of txos. Txs with more than max_txos inputs or outputs are not processed. ''' self._orig_ins = inputs self._orig_outs = outputs self._orig_fees = fees self._max_duration = max_duration self.max_txos = max_txos self._packs = [] ''' PUBLIC METHODS ''' def process(self, linked_txos=[], options=[LINKABILITY, PRECHECK], intrafees=(0,0)): ''' Computes the linkability between a set of input txos and a set of output txos Returns: linkability matrix number of possible combinations for the transaction list of inputs (sorted by decreasing value) list of outputs (sorted by decreasing value) Parameters: linked_txos = list of sets storing linked input txos. Each txo is identified by its id options = list of actions to be applied LINKABILITY : computes the linkability matrix PRECHECK : prechecks existence of deterministic links between inputs and outputs MERGE_FEES : consider that all fees have been paid by a unique sender and manage fees as an additionnal output intrafees = tuple (fees_maker, fees_taker) of max "fees" paid among participants used for joinmarket transactions fees_maker are potential max "fees" received by a participant from another participant fees_taker are potential max "fees" paid by a participant to all others participants ''' self._options = options self.inputs = self._orig_ins.copy() self.outputs = self._orig_outs.copy() self._fees_maker = intrafees[0] self._fees_taker = intrafees[1] self._has_intrafees = True if (self._fees_maker or self._fees_taker) else False # Packs txos known as being controlled by a same entity # It decreases the entropy and speeds-up computations if linked_txos: self._pack_linked_txos(linked_txos) # Manages fees if (self.MERGE_FEES in options) and (self._orig_fees > 0): # Manages fees as an additional output (case of sharedsend by blockchain.info). # Allows to reduce the volume of computations to be done. self._fees = 0 txo_fees = (self.FEES, self._orig_fees) self.outputs.append(txo_fees) else: self._fees = self._orig_fees # Checks deterministic links nb_cmbn = 0 if self.PRECHECK in options and self._check_limit_ok(self.PRECHECK) and (not self._has_intrafees): # Prepares the data self._prepare_data() self._match_agg_by_val() # Checks deterministic links dtrm_lnks, dtrm_lnks_id = self._check_dtrm_links() # If deterministic links have been found, fills the linkability matrix # (returned as result if linkability is not processed) if dtrm_lnks is not None: shape = ( len(self.outputs), len(self.inputs) ) mat_lnk = np.zeros(shape, dtype=np.int64) for (r,c) in dtrm_lnks: mat_lnk[r,c] = 1 else: mat_lnk = None dtrm_lnks_id = None # Checks if all inputs and outputs have already been merged nb_ins = len(self.inputs) nb_outs = len(self.outputs) if (nb_ins == 0) or (nb_outs == 0): nb_cmbn = 1 shape = (nb_outs, nb_ins) mat_lnk = np.ones(shape, dtype=np.int64) elif self.LINKABILITY in options and self._check_limit_ok(self.LINKABILITY): # Packs deterministic links if needed if dtrm_lnks_id is not None: dtrm_lnks_id = [set(lnk) for lnk in dtrm_lnks_id] self._pack_linked_txos(dtrm_lnks_id) # Prepares data self._prepare_data() self._match_agg_by_val() # Computes a matrix storing a tree composed of valid pairs of input aggregates self._compute_in_agg_cmbn() # Builds the linkability matrix nb_cmbn, mat_lnk = self._compute_link_matrix() # Unpacks the matrix mat_lnk = self._unpack_link_matrix(mat_lnk, nb_cmbn) # Returns results return mat_lnk, nb_cmbn, self.inputs, self.outputs ''' PREPARATION ''' def _prepare_data(self): ''' Computes several data structures which will be used later Parameters: inputs = list of input txos outputs = list of output txos ''' # Prepares data related to the input txos self.inputs,\ self._all_in_agg,\ self._all_in_agg_val = self._prepare_txos(self.inputs) # Prepares data related to the output txos self.outputs,\ self._all_out_agg,\ self._all_out_agg_val = self._prepare_txos(self.outputs) def _prepare_txos(self, txos): ''' Computes several data structures related to a list of txos Returns: list of txos sorted by decreasing values array of aggregates (combinations of txos) in binary format array of values associated to the aggregates Parameters: txos = list of txos (list of tuples (id, value)) ''' # Removes txos with null value txos = filter(lambda x: x[1] > 0, txos) # Orders txos by value txos = sorted(txos, key=lambda tup: tup[1], reverse=True) # Creates a 1D array of values vals = [ e[1] for _, e in enumerate(txos) ] all_val = np.array(vals, dtype='int64') # Computes all possible combinations of txos encoded in binary format expnt = len(txos) shape = (expnt, 2**expnt) all_agg = np.zeros(shape, dtype=np.bool) base = np.array([0,1], dtype=bool) for j in range(0, expnt): two_exp_j = 2**j tmp = np.repeat(base, two_exp_j) all_agg[j, :] = np.tile(tmp, 2**(expnt-1) / two_exp_j) #all_agg = np.arange(2**expnt) >> np.arange(expnt)[::, np.newaxis] & 1 # Computes values of aggregates all_agg_val = np.dot(all_val, all_agg) # Returns computed data structures return txos, all_agg, all_agg_val ''' PROCESSING OF AGGREGATES ''' def _match_agg_by_val(self): ''' Matches input/output aggregates by values and returns a bunch of data structs ''' self._all_match_in_agg = SortedList() self._match_in_agg_to_val = defaultdict(int) self._val_to_match_out_agg = defaultdict(set) # Gets unique values of input / output aggregates all_unique_in_agg_val, _ = np.unique(self._all_in_agg_val, return_inverse=True) all_unique_out_agg_val, _ = np.unique(self._all_out_agg_val, return_inverse=True) # Computes total fees paid/receiver by taker/maker if self._has_intrafees: fees_taker = self._fees + self._fees_taker fees_maker = - self._fees_maker # doesn't take into account tx fees paid by makers # Finds input and output aggregates with matching values for in_agg_val in np.nditer(all_unique_in_agg_val): val = int(in_agg_val) for out_agg_val in np.nditer(all_unique_out_agg_val): diff = in_agg_val - out_agg_val if (not self._has_intrafees) and (diff < 0): break else: # Computes conditions required for a matching cond_no_intrafees = (not self._has_intrafees) and diff <= self._fees cond_intrafees = self._has_intrafees and\ ( (diff <= 0 and diff >= fees_maker) or (diff >= 0 and diff <= fees_taker) ) if cond_no_intrafees or cond_intrafees: # Registers the matching input aggregate match_in_agg = np.where(self._all_in_agg_val == in_agg_val)[0] for in_idx in match_in_agg: if not in_idx in self._all_match_in_agg: self._all_match_in_agg.add(in_idx) self._match_in_agg_to_val[in_idx] = val # Registers the matching output aggregate match_out_agg = np.where(self._all_out_agg_val == out_agg_val)[0] self._val_to_match_out_agg[val].update(match_out_agg.tolist()) def _compute_in_agg_cmbn(self): ''' Computes a matrix of valid combinations (pairs) of input aggregates Returns a dictionary (parent_agg => (child_agg1, child_agg2)) We have a valid combination (agg1, agg2) if: R1/ child_agg1 & child_agg2 = 0 (no bitwise overlap) R2/ child_agg1 > child_agg2 (matrix is symmetric) ''' aggs = self._all_match_in_agg[1:-1] tgt = self._all_match_in_agg[-1] mat = defaultdict(list) saggs = set(aggs) for i in range(0, tgt+1): if i in saggs: j_max = min(i, tgt - i + 1) for j in range(0, j_max): if (i & j == 0) and (j in saggs): mat[i+j].append( (i,j) ) self._mat_in_agg_cmbn = mat ''' COMPUTATION OF LINKS BETWEEN TXOS ''' def _check_dtrm_links(self): ''' Checks the existence of deterministic links between inputs and outputs Returns a list of tuples (idx_output, idx_input) and a list of tuples (id_output, id_input) ''' nb_ins = len(self.inputs) nb_outs = len(self.outputs) shape = (nb_outs, nb_ins) mat_cmbn = np.zeros(shape, dtype=np.int64) shape = (1, nb_ins) in_cmbn = np.zeros(shape, dtype=np.int64) # Computes a matrix storing numbers of raw combinations matching input/output pairs # Also computes sum of combinations along inputs axis to get the number of combinations for (in_idx, val) in self._match_in_agg_to_val.items(): for out_idx in self._val_to_match_out_agg[val]: mat_cmbn += self._get_link_cmbn(in_idx, out_idx) in_cmbn += self._all_in_agg[:,in_idx][np.newaxis,:] # Builds a list of sets storing inputs having a deterministic link with an output nb_cmbn = in_cmbn[0,0] dtrm_rows, dtrm_cols = np.where(mat_cmbn == nb_cmbn) dtrm_coords = list(zip(dtrm_rows, dtrm_cols)) dtrm_aggs = [(self.outputs[o][0], self.inputs[i][0]) for (o,i) in dtrm_coords] return dtrm_coords, dtrm_aggs def _compute_link_matrix(self): ''' Computes the linkability matrix Returns the number of possible combinations and the links matrix Implements a depth-first traversal of the inputs combinations tree (right to left) For each input combination we compute the matching output combinations. This is a basic brute-force solution. Will have to find a better method later. ''' nb_tx_cmbn = 0 itgt = 2 ** len(self.inputs) - 1 otgt = 2 ** len(self.outputs) - 1 d_links = defaultdict(int) # Initializes a stack of tasks & sets the initial task # 0: index used to resume the processing of the task (required for depth-first algorithm) # 1: il = left input aggregate # 2: ir = right input aggregate # 3: d_out = outputs combination matching with current input combination # dictionary of dictionary : { or => { ol => (nb_parents_cmbn, nb_children_cmbn) } } stack = deque() ini_d_out = defaultdict(dict) ini_d_out[otgt] = { 0: (1, 0) } stack.append( (0, 0, itgt, ini_d_out) ) # Sets start date/hour start_time = datetime.now() # Iterates over all valid inputs combinations (top->down) while len(stack) > 0: # Checks duration curr_time = datetime.now() delta_time = curr_time - start_time if delta_time.total_seconds() >= self._max_duration: return 0, None # Gets data from task t = stack[-1] idx_il = t[0] il = t[1] ir = t[2] d_out = t[3] n_idx_il = idx_il # Gets all valid decompositions of right input aggregate ircs = self._mat_in_agg_cmbn[ir] len_ircs = len(ircs) for i in range(idx_il, len_ircs): n_idx_il = i n_d_out = defaultdict(dict) # Gets left input sub-aggregate (column from ircs) n_il = ircs[i][1] # Checks if we must process this pair (columns from ircs are sorted in decreasing order) if n_il > il: # Gets the right input sub-aggregate (row from ircs) n_ir = ircs[i][0] # Iterates over outputs combinations previously found for o_r in d_out: sol = otgt - o_r # Computes the number of parent combinations nb_prt = sum([s[0] for s in d_out[o_r].values()]) # Iterates over output sub-aggregates matching with left input sub-aggregate val_il = self._match_in_agg_to_val[n_il] for n_ol in self._val_to_match_out_agg[val_il]: # Checks compatibility of output sub-aggregate with left part of output combination if (sol & n_ol == 0): # Computes: # the sum corresponding to the left part of the output combination # the complementary right output sub-aggregate n_sol = sol + n_ol n_or = otgt - n_sol # Checks if the right output sub-aggregate is valid val_ir = self._match_in_agg_to_val[n_ir] match_out_agg = self._val_to_match_out_agg[val_ir] # Adds this output combination into n_d_out if all conditions met if (n_sol & n_or == 0) and (n_or in match_out_agg): n_d_out[n_or][n_ol] = (nb_prt, 0) # Updates idx_il for the current task stack[-1] = (i + 1, il, ir, d_out) # Pushes a new task which will decompose the right input aggregate stack.append( (0, n_il, n_ir, n_d_out) ) # Executes the new task (depth-first) break else: # No more results for il, triggers a break and a pop n_idx_il = len_ircs break # Checks if task has completed if n_idx_il > len_ircs - 1: # Pops the current task t = stack.pop() il = t[1] ir = t[2] d_out = t[3] # Checks if it's the root task if len(stack) == 0: # Retrieves the number of combinations from root task nb_tx_cmbn = d_out[otgt][0][1] else: # Gets parent task p_t = stack[-1] p_d_out = p_t[3] # Iterates over all entries from d_out for (o_r, l_ol) in d_out.items(): r_key = (ir, o_r) # Iterates over all left aggregates for (ol, (nb_prnt, nb_chld)) in l_ol.items(): l_key = (il, ol) # Updates the dictionary of links for the pair of aggregates nb_occur = nb_chld + 1 d_links[r_key] += nb_prnt d_links[l_key] += nb_prnt * nb_occur # Updates parent d_out by back-propagating number of child combinations p_or = ol + o_r p_l_ol = p_d_out[p_or] for (p_ol, (p_nb_prt, p_nb_chld)) in p_l_ol.items(): p_d_out[p_or][p_ol] = (p_nb_prt, p_nb_chld + nb_occur) # Fills the matrix links = self._get_link_cmbn(itgt, otgt) nb_tx_cmbn += 1 for (lnk, mult) in d_links.items(): links = links + self._get_link_cmbn(lnk[0], lnk[1]) * mult return nb_tx_cmbn, links def _get_link_cmbn(self, in_agg, out_agg): ''' Computes a linkability matrix encoding the matching of given input/output aggregates Returns a numpy array Parameters: in_agg = input aggregate out_agg = output aggregate ''' vouts = self._all_out_agg[:,out_agg][:,np.newaxis] vins = self._all_in_agg[:,in_agg][np.newaxis,:] return np.dot(vouts, vins) ''' PACKING/UNPACKING OF LINKED TXOS ''' def _pack_linked_txos(self, linked_txos): ''' Packs input txos which are known as being controlled by a same entity Parameters: linked_txos = list of sets storing linked input txos. Each txo is identified by its "id" ''' idx = len(self._packs) # Merges packs sharing common elements packs = merge_sets(linked_txos) for pack in packs: ins = [] val_ins = 0 for i in self.inputs: if i[0] in pack: ins.append(i) val_ins += i[1] idx += 1 if len(ins) > 0: lbl = '%s_I%i' % (self.PACK, idx) inp = (lbl, val_ins) self.inputs.append(inp) in_pack = (lbl, val_ins, 'INPUTS', ins, []) self._packs.append(in_pack) [self.inputs.remove(v) for v in ins] def _unpack_link_matrix(self, mat_lnk, nb_cmbn): ''' Unpacks linked txos in the linkability matrix Returns the unpacked matrix Parameters: mat_lnk = linkability matrix to be unpacked nb_cmbn = number of combinations associated to the linkability matrix ''' mat_res = mat_lnk nb_cmbn = max(1, nb_cmbn) for (pack, val, lctn, ins, outs) in reversed(self._packs): if lctn == 'INPUTS': key = (pack, val) idx = self.inputs.index(key) if mat_lnk is not None: nb_ins = len(ins) nb_outs = len(self.outputs) # Inserts columns into the matrix for packed inputs shape = (nb_outs, nb_ins) vals = np.zeros(shape , dtype=np.int64) vals += mat_res[:,idx][:, np.newaxis] mat_res = np.hstack( (mat_res[:,0:idx], vals, mat_res[:,idx+1:]) ) # Inserts unpacked inputs into the list of inputs self.inputs[idx:idx+1] = ins elif lctn == 'OUTPUTS': key = (pack, val) idx = self.outputs.index(key) if mat_lnk is not None: nb_ins = len(self.inputs) nb_outs = len(outs) # Inserts rows into the matrix for packed outputs shape = (nb_outs, nb_ins) vals = np.zeros(shape, dtype=np.int64) vals += mat_res[idx,:][np.newaxis,:] mat_res = np.vstack( (mat_res[0:idx,:], vals, mat_res[idx+1:,:]) ) # Inserts unpacked outputs into the list of outputs self.outputs[idx:idx+1] = outs return mat_res ''' LIMITS ''' def _check_limit_ok(self, mode): len_in = len(self.inputs) len_out = len(self.outputs) max_card = max(len_in, len_out) return True if (max_card <= self.max_txos) else False
def scheduling_method(self, cur_time, es, es_dict): """ This function must map the queued events to available nodes at the current time. :param cur_time: current time :param es_dict: dictionary with full data of the events :param es: events to be scheduled :param debug: Flag to debug :return a tuple of (time to schedule, event id, list of assigned nodes) """ resource_types = self.resource_manager.resource_types avl_resources = self.resource_manager.current_availability #======================================================================= # Considered queued jobs: Jobs can be fitted in the current system state and less or equal than q_length # If a job_obj cannot be fitted or exceed the q_length is directly loaded in the dispatching decision using the no-solution dispatching tuple #======================================================================= priorized_jobs = SortedListWithKey(key=lambda job_tuple: job_tuple[1]) current_qjobs = SortedList() #=================================================================== # Here, if there is a non dispatching previous state set, the current system capacity # is verified if it is different (more reasource available than before) the dispatcher is called. # Otherwise, a non dispatching decision is returned. #=================================================================== # Dispatching Skip dispatch = True prev_qjobs = None # Dispatching skip if self.non_dispatched_state: dispatch = False (prev_qjobs, prev_total_resource_usage,) = self.non_dispatched_state new_jobs = False for e in es: if not(e.id in prev_qjobs): new_jobs = True self.non_dispatched_state = None break if not new_jobs: cur_total_resource_usage = self.resource_manager._resources.usage('dict') zero_usage = [] same_usage = [] for res in resource_types: zero_usage.append(cur_total_resource_usage[res] == 0) same_usage.append(cur_total_resource_usage[res] >= prev_total_resource_usage[res]) if all(zero_usage): # The system is empty self.non_dispatched_state = None elif all(same_usage): # The system has the same or less capacity wrt the stuck state return [self.dispatching_tuple(e.id) for e in es], [] else: # The system is not empty but has more capacity wrt the stuck state self.non_dispatched_state = None cons_qjobs = {} max_ewt = max([self.get_ewt(job.queue) for job in es] + [self.get_ewt(es_dict[job_id]) for job_id in self.resource_manager.current_allocations]) for node in self.resource_manager.node_names: avl_res = avl_resources[node] for idx, job_obj in enumerate(es): job_id = job_obj.id if not(job_id in cons_qjobs): current_qjobs.add(job_id) cons_qjobs[job_id] = [False, 0, {}, None] priorized_jobs.add((job_id, self._job_priority_ewt(job_obj, cur_time, max_ewt))) possibilities = self._joint_nodes(job_obj, avl_res) if possibilities > 0: cons_qjobs[job_id][2][node] = min(possibilities, job_obj.requested_nodes) cons_qjobs[job_id][1] += possibilities if cons_qjobs[job_id][1] >= job_obj.requested_nodes: cons_qjobs[job_id][0] = True if not cons_qjobs[job_id][3]: cons_qjobs[job_id][3] = job_obj qjobs = 0 wc_makespan = 0 makespans = [] remaining_priorized_jobs = [] # Job of the dispatching decision decision_jobs = {} for job_id, _ in priorized_jobs: t = cons_qjobs[job_id] if not t[0] or qjobs > self.cur_q_length - 1: decision_jobs[job_id] = self.dispatching_tuple(job_id) cons_qjobs.pop(job_id) else: exp_duration = max(1, t[-1].expected_duration) wc_makespan += exp_duration makespans.append(exp_duration) qjobs += 1 remaining_priorized_jobs.append(job_id) #======================================================================= # There are no jobs to dispatch at the current system state. # Then a no solution list is returned. #======================================================================= if not cons_qjobs: # Job Dispatching skip cur_total_resource_usage = self.resource_manager._resources.usage('dict') self.non_dispatched_state = (current_qjobs, cur_total_resource_usage,) return decision_jobs.values(), [] #======================================================================= # After an unsuccessful dispatching #======================================================================= if self.use_max_timelimit: timelimit = self.timelimit else: timelimit = self.initial_timelimit a_jobs_list = [] best_z_list = [] solved = False self.priorized_jobs = None prev_sched = [] while timelimit <= self.timelimit: schedalloc_plan = {} args = (schedalloc_plan, cur_time, cons_qjobs, remaining_priorized_jobs, es_dict, resource_types, avl_resources) kwargs = {'timelimit':timelimit, 'prev_sched':prev_sched} function = getattr(self, 'cp_model') function(*args, **kwargs) solver_state = schedalloc_plan.pop('solver_state') best_z = schedalloc_plan.pop('best_z') best_z_list.append(best_z) if solver_state == self.SolverState.PROBLEM_INFEASIBLE: break limit_reached = schedalloc_plan.pop('limit_reached') disp_jobs = 0 prev_sched = [] for stime, job_id, _ in schedalloc_plan.values(): if stime == cur_time: prev_sched.append(job_id) disp_jobs += 1 if disp_jobs == len(cons_qjobs) and solver_state == self.SolverState.NO_MORE_SOLUTIONS.value and not limit_reached: solved = True break elif disp_jobs < len(cons_qjobs) and solver_state == self.SolverState.NO_MORE_SOLUTIONS.value and not limit_reached: solved = True break elif len(best_z_list) >= self.max_k and all([best_z_list[-1] == b for b in best_z_list[-self.max_k:]]): solved = True break else: a_jobs_list.append(disp_jobs) timelimit *= 2 self.priorized_jobs = None # This is useful for print and also to create the unsuccessful data dispatched_jobs = 0 queued_job_ids = [] for a in schedalloc_plan: if a[2]: dispatched_jobs += 1 if dispatched_jobs == 0: queued_job_ids.append(a[1]) if self.reduce_job_length: #=================================================================== # The considered number of jobs in the next scheduling decision are reduced to the half # if the current problem instance was not solved, if the current usage is # leq of the previous time point. After a successful dispatching this value is reset. # The minimum is 1, otherwise there will be nothing to dispatch #=================================================================== if not solved: self.cur_q_length = max(1, self.cur_q_length // 2) else: self.cur_q_length = self.q_length if dispatched_jobs == 0: self.non_dispatched_state = (current_qjobs, self.resource_manager._resources.usage('dict'),) else: self.non_dispatched_state = None return list(schedalloc_plan.values()) + list(decision_jobs.values()), []
def scheduling_method(self, cur_time, es, es_dict): """ This function must map the queued events to available nodes at the current time. :param cur_time: current time :param es_dict: dictionary with full data of the events :param es: events to be scheduled :param debug: Flag to debug :return a tuple of (time to schedule, event id, list of assigned nodes) """ dispatching_plan = [] resource_types = self.resource_manager.resource_types avl_resources = self.resource_manager.current_availability system_capacity = self.resource_manager.system_capacity('nodes') # ======================================================================= # Considered queued jobs: Jobs can be fitted in the current system state and less or equal than q_length # If a job_obj cannot be fitted or exceed the q_length is directly loaded in the dispatching decision using the no-solution dispatching tuple # ======================================================================= priorized_jobs = SortedListWithKey(key=lambda job_tuple: job_tuple[1]) current_qjobs = SortedList() cons_qjobs = {} for node in self.resource_manager.node_names: avl_res = avl_resources[node] # avl_res = system_capacity[node] for idx, job_obj in enumerate(es): job_id = job_obj.id if not (job_id in cons_qjobs): current_qjobs.add(job_id) cons_qjobs[job_id] = [False, 0, {}, None] priorized_jobs.add((job_id, self._job_priority_slowdown(job_obj, cur_time))) if self._reduced_model: possibilities = self._joint_nodes(job_obj, avl_res) if possibilities > 0: cons_qjobs[job_id][2][node] = min(possibilities, job_obj.requested_nodes) cons_qjobs[job_id][1] += possibilities if cons_qjobs[job_id][1] >= job_obj.requested_nodes: cons_qjobs[job_id][0] = True if not cons_qjobs[job_id][3]: cons_qjobs[job_id][3] = job_obj else: cons_qjobs[job_id][0] = True cons_qjobs[job_id][1] = None cons_qjobs[job_id][2] = None cons_qjobs[job_id][3] = job_obj qjobs = 0 wc_makespan = 0 makespans = [] selected_priorized_jobs = [] # Job of the dispatching decision decision_jobs = {} if self._reduced_model: for job_id, _ in priorized_jobs: t = cons_qjobs[job_id] if not t[0] or qjobs > self._cur_q_length - 1: decision_jobs[job_id] = self.dispatching_tuple(job_id) cons_qjobs.pop(job_id) else: exp_duration = max(1, t[-1].expected_duration) wc_makespan += exp_duration makespans.append(exp_duration) qjobs += 1 selected_priorized_jobs.append(job_id) else: cannot_start_selected = 0 for job_id, _ in priorized_jobs: t = cons_qjobs[job_id] if (not t[0] and cannot_start_selected >= self._considered_cannot_start) or ( qjobs > self._cur_q_length - 1): decision_jobs[job_id] = self.dispatching_tuple(job_id) cons_qjobs.pop(job_id) else: if not t[0]: cons_qjobs[job_id][3] = es_dict[job_id] cannot_start_selected += 1 exp_duration = max(1, t[-1].expected_duration) wc_makespan += exp_duration # , self.get_queue(t[-1].queue)) # exp_duration makespans.append(exp_duration) qjobs += 1 selected_priorized_jobs.append(job_id) # ======================================================================= # There are no jobs to dispatch at the current system state. # Then a no solution list is returned. # ======================================================================= if not cons_qjobs: # Job Dispatching skip return decision_jobs.values(), [] solved = False self.priorized_jobs = None if self._safe: manager = mp_dill.Manager() schedule_plan = manager.dict() process_class = mp_dill.Process p = process_class(target=getattr(self, 'cp_model'), args=( schedule_plan, cur_time, cons_qjobs, selected_priorized_jobs, es_dict, resource_types, avl_resources), kwargs={'timelimit': timelimit} ) p.start() p.join() if p.exitcode != 0: schedule_plan.pop('solver_state', None) schedule_plan.pop('limit_reached', None) return list(decision_jobs.values()) \ + [self.dispatching_tuple(job_id, start_time, nodes) for (start_time, job_id, nodes) in schedule_plan.values()] \ + [self.dispatching_tuple(job_id, None, []) for job_id in cons_qjobs if not (job_id in schedule_plan)], [] else: schedule_plan = {} args = ( schedule_plan, cur_time, cons_qjobs, selected_priorized_jobs, es_dict, resource_types, avl_resources) kwargs = {'max_timelimit': self._max_timelimit} function = getattr(self, 'cp_model') function(*args, **kwargs) solved = schedule_plan.pop('solved') of_value = schedule_plan.pop('of_value') walltime = schedule_plan.pop('walltime') proc_time = schedule_plan.pop('proc_time') incurred_time = walltime + proc_time failures = schedule_plan.pop('failures') branches = schedule_plan.pop('branches') p = None self.priorized_jobs = None dispatching_plan = list(schedule_plan.values()) self.__instance_data = ( solved, of_value, walltime, incurred_time, failures, branches, dispatching_plan + list(decision_jobs.values()),) # This is useful for print and also to create the unsuccessful data dispatched_jobs = 0 queued_job_ids = [] for a in dispatching_plan: if a[2]: dispatched_jobs += 1 if dispatched_jobs == 0: queued_job_ids.append(a[1]) if self._reduce_job_length: # =================================================================== # The considered number of jobs in the next scheduling decision are reduced to the half # if the current problem instance was not solved, if the current usage is # leq of the previous time point. After a successful dispatching this value is reset. # The minimum is 1, otherwise there will be nothing to dispatch # =================================================================== if not solved: self._cur_q_length = max(1, min(self._cur_q_length, len(schedule_plan)) // 2) # max(1, self._cur_q_length // 2) else: self._cur_q_length = self._q_length print('{} - {}: Queued {}, Dispatched {}, Running {}. {}'.format(self._counter, cur_time, len(es) - dispatched_jobs, dispatched_jobs, len(self.resource_manager.current_allocations), self.resource_manager.current_usage)) return dispatching_plan + list(decision_jobs.values()), []
ds = [a for a in AnmalZoo] for uat in ds: r = SortedList() big128, big256 = 0, 0 automatas = atma.parse_anml_file(anml_path[uat]) automatas.remove_ors() automatas = automatas.get_connected_components_as_automatas() for atm in automatas: nc = atm.nodes_count if nc >= 128: big128 += 1 if nc >= 256: big256 += 1 if nc > bigest_component_size: print "this NFA can not be fit:", uat break if r and nc <= r[-1]: # can be packed cand_residual = r.pop(-1) r.add(cand_residual - nc) else: # new fcb r.add(bigest_component_size - nc) print "uat %s needs %d connected local switches each with (%d,%d) size. There are %d nodes not being assigned." \ " It has %d CCs bigger than 128 and %d CCs bigger than 256"\ %(uat, len(r), fcb_size, fcb_size, sum(r[:-1]), big128, big256)
def MedCombiner2(intermediates): """ The Running Medians Reducer merges the intermediate lists that are packed inside the outer list, intermediates, into one master flat list. :rtype : object master list of the final results :param intermediates: list of lists of the running medians of each input text file :return: the final results """ # master list of the final results linesWordCount = [] # iterating over the sub lists for each input file to concatenate them into a master list # for v in intermediates: # linesWordCount+=v # print(intermediates) resultDict = defaultdict(list) # the following loop iterates over the first dictionary key and value pairs and then iterates over the next dictionary's # pairs. It continues until it iterates over all dictionaries that are members of the intermediates. While iterating, # a new dictionary is created, result, to hold all the pairs of the intermediate dictionaries, thus effectively # merging all of them. # i = 0 for d in intermediates: # print(d) for k, v in dict(d).items(): resultDict[k] = v # for k,l in chain(*intermediates): # resultDict[k] = l # print("resultedDict ", resultDict) # the following loop iterates over the first dictionary key and value pairs and then iterates over the next dictionary's # pairs. It continues until it iterates over all dictionaries that are members of the intermediates. While iterating, # a new dictionary is created, result, to hold all the pairs of the intermediate dictionaries, thus effectively # merging all of them. sortedKeys=sorted(resultDict, key= lambda k:k,reverse=False) for k in sortedKeys: linesWordCount.extend(resultDict[k]) # print("linesWordCount ", linesWordCount) medianNumbers= [] # a sorted list to hold the word counts for input lines # the sorted list boosts performance substantially when computing the running median because this will not require # resorting the wordcount list every time we add an entry to it. sortedLinesWordCount = SortedList() lineNO = 0 # running median calculations # because I used a sorted list for the wordcounts of lines, now it is straightforward to compute # the running median for wordcount in linesWordCount: sortedLinesWordCount.add(wordcount) # print(sortedLinesWordCount) index = int(lineNO/2) if lineNO%2 == 0: medianNumbers.append(float(sortedLinesWordCount[index])) else: medianNumbers.append(float((sortedLinesWordCount[index] + sortedLinesWordCount[index+1])/2)) lineNO += 1 # print(medianNumbers) # print(medianNumbers) return medianNumbers