Python SortedList.add Examples

Programming Language: Python

Namespace/Package Name: sortedcontainers.sortedlist

Class/Type: SortedList

Method/Function: add

Examples at hotexamples.com: 10

Python SortedList.add - 10 examples found. These are the top rated real world Python examples of sortedcontainers.sortedlist.SortedList.add extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SortedList(17)

add(9)

pop(3)

update(1)

Example #1

Show file

def MedCalculator(fileNum, text):
    """
    thread worker function
    Calculates the running median for the lines present in the text list supplied.
    Currently the sequential implementation is identical to the parallel implementation.

    :rtype :        null
    :param fileNum: an index pointing to the file to be processing in the input files
    :param text:    a text buffer to be loaded with the input text
    """

    # Start Profiling
    # basic profiling for the speed of the algorithm
    # start = time.clock()

    # the list that is going to hold the running medians
    medianNumbers = []

    # a sorted list to hold the word counts for input lines
    # the sorted list boosts performance substantially when computing the running median because this will not require
    # resorting the wordcount list every time we add an entry to it.
    linesWordCount = SortedList()
    lineNO = 0

    for line in text:

        # fast conversion of uppercase to lowercase and removal of the following `'-_
        cleaned_line = line.translate(table)

        # matching words/tokens
        words = re.findall(tokenPattern, cleaned_line)

        # counting wordcount of a line and adding it ot the respective list
        lineWordCount = len(words)
        linesWordCount.add(lineWordCount)

        # running median calculations
        # because I used a sorted list for the worcounts of lines, now it is straightforward to compute
        # the running median
        index = int(lineNO / 2)
        if lineNO % 2 == 0:
            medianNumbers.append(float(linesWordCount[index]))
        else:
            medianNumbers.append(
                float((linesWordCount[index] + linesWordCount[index + 1]) / 2))
        lineNO += 1

        # optional profiling
        # print("Line NO: " + str(lineNO) + " wordcount: " + str(lineWordCount))
        # print("Size: " + str(len(linesWordCount)) + " linesWordCount elm: " + str(linesWordCount) )
        # print("Median NOs: " + str(medianNumbers))
        # end =  time.clock()

    # optional profiling
    # print("(Calculator)Time elapsed: ", (end-start), "Using Multiprocessing, Generated ", len(medianNumbers) , " medians from " , lineNO, " Lines")#, len(text) , " files")

    return medianNumbers

Example #2

Show file

File: transaction_sorter.py Project: empire/schallenge-python-2014

def sort_file_lists(input_file_list, file_name_generator):
    command_list = SortedList()
    for input_file in input_file_list:
        reader = _get_reader(input_file)
        for command in reader:
            command_list.add(command)
            if len(command_list) > number_of_allowed_command:
                dump_commands_to_file(command_list, file_name_generator)
                command_list = SortedList()

Example #3

Show file

File: transaction_sorter.py Project: Sajjad-HTLO/schallenge-python-2014

def sort_file_lists(input_file_list, file_name_generator):
    command_list = SortedList()
    for input_file in input_file_list:
        reader = _get_reader(input_file)
        for command in reader:
            command_list.add(command)
            if len(command_list) > number_of_allowed_command:
                dump_commands_to_file(command_list, file_name_generator)
                command_list = SortedList()

Example #4

Show file

File: model_evaluations.py Project: dawidbrzozowski/sentiment_analysis

def _get_top_n_samples(model_predictions: List[ModelPrediction], n: int, best: bool):
    top_n_samples = SortedList(key=lambda sample: -sample.true_label_probability) if best else SortedList()
    for model_prediction in model_predictions:
        if best == model_prediction.is_correct():
            if len(top_n_samples) < n:
                top_n_samples.add(model_prediction)
            else:
                if best != (model_prediction < top_n_samples[-1]):
                    top_n_samples.pop()
                    top_n_samples.add(model_prediction)
    return [sample for sample in top_n_samples]  # so that it returns a normal list instead of SortedList

Example #5

Show file

File: gaussian_numeric_attribute_class_observer.py Project: shuxiangzhang/scikit-multiflow

 def get_split_point_suggestions(self):
     suggested_split_values = SortedList()
     min_value = np.inf
     max_value = -np.inf
     for k, estimator in self._att_val_dist_per_class.items():
         if self._min_value_observed_per_class[k] < min_value:
             min_value = self._min_value_observed_per_class[k]
         if self._max_value_observed_per_class[k] > max_value:
             max_value = self._max_value_observed_per_class[k]
     if min_value < np.inf:
         bin_size = max_value - min_value
         bin_size /= (float(self.num_bin_options) + 1.0)
         for i in range(self.num_bin_options):
             split_value = min_value + (bin_size * (i + 1))
             if split_value > min_value and split_value < max_value:
                 suggested_split_values.add(split_value)
     return suggested_split_values

Example #6

Show file

class TxosLinker(object):
    '''
    A class allowing to compute the entropy of Bitcoin transactions 
    and the linkability of inputs/outputs of a transaction
    '''    
    
    '''
    CONSTANTS
    '''   
    # Default maximum duration in seconds
    MAX_DURATION = 180
    
    # Processing options
    LINKABILITY = 'LINKABILITY'
    PRECHECK = 'PRECHECK'
    MERGE_FEES = 'MERGE_FEES'
    
    # Markers
    FEES = 'FEES'
    PACK = 'PACK'
    
    # Max number of inputs (or outputs) which can be processed by this algorithm
    MAX_NB_TXOS = 12
    
    
        
    '''
    ATTRIBUTES
    
    # List of input txos expressed as tuples (id, amount)
    inputs = []
    
    # List of output txos expressed as tuples (id, amount)
    outputs = []
    
    # Fees associated to the transaction
    fees = 0
    
    # Matrix of txos linkability
    #    Columns = input txos
    #    Rows = output txos
    #    Cells = number of combinations for which an input and an output are linked
    links = np.array()
    
    # Number of valid transactions combinations
    nb_tx_cmbn = 0
    
    # Maximum duration of the script (in seconds)
    _max_duration = MAX_DURATION
    '''
    
    
    '''
    INITIALIZATION
    '''
    def __init__(self, inputs=[], outputs=[], fees=0, max_duration=MAX_DURATION, max_txos=MAX_NB_TXOS):
        '''
        Constructor
        Parameters:
            inputs       = list of inputs txos [(v1_id, v1_amount), ...]
            outputs      = list of outputs txos [(v1_id, v1_amount), ...]
            fees         = amount of fees associated to the transaction
            max_duration = max duration allocated to processing of a single tx (in seconds)
            max_txos     = max number of txos. Txs with more than max_txos inputs or outputs are not processed.
        '''
        self._orig_ins = inputs
        self._orig_outs = outputs
        self._orig_fees = fees
        self._max_duration = max_duration
        self.max_txos = max_txos        
        self._packs = []
                        
    
    '''
    PUBLIC METHODS
    '''  
    def process(self, linked_txos=[], options=[LINKABILITY, PRECHECK], intrafees=(0,0)):
        '''
        Computes the linkability between a set of input txos and a set of output txos
        Returns:
            linkability matrix
            number of possible combinations for the transaction
            list of inputs (sorted by decreasing value)
            list of outputs (sorted by decreasing value)
        Parameters:
            linked_txos     = list of sets storing linked input txos. Each txo is identified by its id
            options         = list of actions to be applied
                LINKABILITY : computes the linkability matrix
                PRECHECK    : prechecks existence of deterministic links between inputs and outputs
                MERGE_FEES  : consider that all fees have been paid by a unique sender and manage fees as an additionnal output
            intrafees       = tuple (fees_maker, fees_taker) of max "fees" paid among participants
                              used for joinmarket transactions
                              fees_maker are potential max "fees" received by a participant from another participant
                              fees_taker are potential max "fees" paid by a participant to all others participants 
        '''
        self._options = options
        self.inputs = self._orig_ins.copy()
        self.outputs = self._orig_outs.copy()
        self._fees_maker = intrafees[0]
        self._fees_taker = intrafees[1]
        self._has_intrafees = True if (self._fees_maker or self._fees_taker) else False
        
        # Packs txos known as being controlled by a same entity
        # It decreases the entropy and speeds-up computations 
        if linked_txos:
            self._pack_linked_txos(linked_txos)
        
        # Manages fees
        if (self.MERGE_FEES in options) and (self._orig_fees > 0):
            # Manages fees as an additional output (case of sharedsend by blockchain.info).
            # Allows to reduce the volume of computations to be done.
            self._fees = 0
            txo_fees = (self.FEES, self._orig_fees)
            self.outputs.append(txo_fees)
        else:
            self._fees = self._orig_fees
        
        # Checks deterministic links
        nb_cmbn = 0        
        if self.PRECHECK in options and self._check_limit_ok(self.PRECHECK) and (not self._has_intrafees):
            # Prepares the data
            self._prepare_data()
            self._match_agg_by_val()
            # Checks deterministic links
            dtrm_lnks, dtrm_lnks_id = self._check_dtrm_links()
            # If deterministic links have been found, fills the linkability matrix 
            # (returned as result if linkability is not processed) 
            if dtrm_lnks is not None:
                shape = ( len(self.outputs), len(self.inputs) )
                mat_lnk = np.zeros(shape, dtype=np.int64)
                for (r,c) in dtrm_lnks:
                    mat_lnk[r,c] = 1
        else:
            mat_lnk = None
            dtrm_lnks_id = None        
                    
        # Checks if all inputs and outputs have already been merged
        nb_ins = len(self.inputs)
        nb_outs = len(self.outputs)
        if (nb_ins == 0) or (nb_outs == 0):
            nb_cmbn = 1
            shape = (nb_outs, nb_ins)
            mat_lnk = np.ones(shape, dtype=np.int64)
        elif self.LINKABILITY in options and self._check_limit_ok(self.LINKABILITY):
            # Packs deterministic links if needed 
            if dtrm_lnks_id is not None:
                dtrm_lnks_id = [set(lnk) for lnk in dtrm_lnks_id]
                self._pack_linked_txos(dtrm_lnks_id)
            # Prepares data
            self._prepare_data()
            self._match_agg_by_val()
            # Computes a matrix storing a tree composed of valid pairs of input aggregates
            self._compute_in_agg_cmbn()
            # Builds the linkability matrix
            nb_cmbn, mat_lnk = self._compute_link_matrix()
        
        # Unpacks the matrix
        mat_lnk = self._unpack_link_matrix(mat_lnk, nb_cmbn)
        
        # Returns results
        return mat_lnk, nb_cmbn, self.inputs, self.outputs
                                                                             
        
    '''
    PREPARATION
    '''
    def _prepare_data(self):
        '''
        Computes several data structures which will be used later
        Parameters:
            inputs  = list of input txos
            outputs = list of output txos
        '''
        # Prepares data related to the input txos
        self.inputs,\
        self._all_in_agg,\
        self._all_in_agg_val = self._prepare_txos(self.inputs)
           
        # Prepares data related to the output txos
        self.outputs,\
        self._all_out_agg,\
        self._all_out_agg_val = self._prepare_txos(self.outputs)
        
        
    def _prepare_txos(self, txos):
        '''
        Computes several data structures related to a list of txos
        Returns:
            list of txos sorted by decreasing values
            array of aggregates (combinations of txos) in binary format
            array of values associated to the aggregates
        Parameters:
            txos = list of txos (list of tuples (id, value))
        '''
        # Removes txos with null value
        txos = filter(lambda x: x[1] > 0, txos)
        
        # Orders txos by value
        txos = sorted(txos, key=lambda tup: tup[1], reverse=True)
        
        # Creates a 1D array of values
        vals = [ e[1] for _, e in enumerate(txos) ]
        all_val = np.array(vals, dtype='int64')
        
        # Computes all possible combinations of txos encoded in binary format
        expnt = len(txos)
        shape = (expnt, 2**expnt)
        all_agg = np.zeros(shape, dtype=np.bool)
        base = np.array([0,1], dtype=bool)
        
        for j in range(0, expnt):
            two_exp_j = 2**j
            tmp = np.repeat(base, two_exp_j)
            all_agg[j, :] = np.tile(tmp, 2**(expnt-1) / two_exp_j)
        #all_agg = np.arange(2**expnt) >> np.arange(expnt)[::, np.newaxis] & 1
        
        # Computes values of aggregates
        all_agg_val = np.dot(all_val, all_agg)
        
        # Returns computed data structures
        return txos, all_agg, all_agg_val
   
    
    '''
    PROCESSING OF AGGREGATES
    '''  
    def _match_agg_by_val(self):
        '''
        Matches input/output aggregates by values and returns a bunch of data structs
        '''
        self._all_match_in_agg = SortedList()
        self._match_in_agg_to_val = defaultdict(int)
        self._val_to_match_out_agg = defaultdict(set)
        
        # Gets unique values of input / output aggregates
        all_unique_in_agg_val, _ = np.unique(self._all_in_agg_val, return_inverse=True)
        all_unique_out_agg_val, _ = np.unique(self._all_out_agg_val, return_inverse=True)
        
        # Computes total fees paid/receiver by taker/maker
        if self._has_intrafees:
            fees_taker = self._fees + self._fees_taker
            fees_maker = - self._fees_maker         # doesn't take into account tx fees paid by makers
        
        # Finds input and output aggregates with matching values
        for in_agg_val in np.nditer(all_unique_in_agg_val):
            val = int(in_agg_val)
            
            for out_agg_val in np.nditer(all_unique_out_agg_val):
                
                diff = in_agg_val - out_agg_val
                
                if (not self._has_intrafees) and (diff < 0):
                    break
                else:
                    # Computes conditions required for a matching
                    cond_no_intrafees = (not self._has_intrafees) and diff <= self._fees
                    cond_intrafees = self._has_intrafees and\
                                     ( (diff <= 0 and diff >= fees_maker) or (diff >= 0 and diff <= fees_taker) )
                                     
                    if cond_no_intrafees or cond_intrafees:
                        # Registers the matching input aggregate
                        match_in_agg = np.where(self._all_in_agg_val == in_agg_val)[0]
                        
                        for in_idx in match_in_agg:
                            if not in_idx in self._all_match_in_agg:
                                self._all_match_in_agg.add(in_idx)
                                self._match_in_agg_to_val[in_idx] = val
                        
                        # Registers the matching output aggregate
                        match_out_agg = np.where(self._all_out_agg_val == out_agg_val)[0]
                        self._val_to_match_out_agg[val].update(match_out_agg.tolist())
         
    
    def _compute_in_agg_cmbn(self):
        '''
        Computes a matrix of valid combinations (pairs) of input aggregates
        Returns a dictionary (parent_agg => (child_agg1, child_agg2))
        We have a valid combination (agg1, agg2) if:
           R1/ child_agg1 & child_agg2 = 0 (no bitwise overlap)
           R2/ child_agg1 > child_agg2 (matrix is symmetric)           
        '''
        aggs = self._all_match_in_agg[1:-1]
        tgt = self._all_match_in_agg[-1]
        mat = defaultdict(list)
        saggs = set(aggs)
        
        for i in range(0, tgt+1):
            if i in saggs:
                j_max = min(i, tgt - i + 1)
                for j in range(0, j_max):
                    if (i & j == 0) and (j in saggs):
                        mat[i+j].append( (i,j) )
        
        self._mat_in_agg_cmbn = mat
    
    
    '''
    COMPUTATION OF LINKS BETWEEN TXOS
    '''
    def _check_dtrm_links(self):    
        '''
        Checks the existence of deterministic links between inputs and outputs
        Returns a list of tuples (idx_output, idx_input) and a list of tuples (id_output, id_input)
        '''
        nb_ins = len(self.inputs)
        nb_outs = len(self.outputs)
        
        shape = (nb_outs, nb_ins)
        mat_cmbn = np.zeros(shape, dtype=np.int64)
        
        shape = (1, nb_ins)
        in_cmbn = np.zeros(shape, dtype=np.int64)
        
        # Computes a matrix storing numbers of raw combinations matching input/output pairs
        # Also computes sum of combinations along inputs axis to get the number of combinations
        for (in_idx, val) in self._match_in_agg_to_val.items():
            for out_idx in self._val_to_match_out_agg[val]:
                mat_cmbn += self._get_link_cmbn(in_idx, out_idx)
                in_cmbn += self._all_in_agg[:,in_idx][np.newaxis,:]                       
        
        # Builds a list of sets storing inputs having a deterministic link with an output
        nb_cmbn = in_cmbn[0,0]
        dtrm_rows, dtrm_cols = np.where(mat_cmbn == nb_cmbn)
        dtrm_coords = list(zip(dtrm_rows, dtrm_cols))
        dtrm_aggs = [(self.outputs[o][0], self.inputs[i][0]) for (o,i) in dtrm_coords]
        return dtrm_coords, dtrm_aggs
        
                
    def _compute_link_matrix(self):
        '''
        Computes the linkability matrix
        Returns the number of possible combinations and the links matrix        
        Implements a depth-first traversal of the inputs combinations tree (right to left)
        For each input combination we compute the matching output combinations.
        This is a basic brute-force solution. Will have to find a better method later.
        '''
        nb_tx_cmbn = 0
        itgt = 2 ** len(self.inputs) - 1
        otgt = 2 ** len(self.outputs) - 1
        d_links = defaultdict(int)
        
        # Initializes a stack of tasks & sets the initial task
        #  0: index used to resume the processing of the task (required for depth-first algorithm)
        #  1: il = left input aggregate
        #  2: ir = right input aggregate
        #  3: d_out = outputs combination matching with current input combination
        #             dictionary of dictionary :  { or =>  { ol => (nb_parents_cmbn, nb_children_cmbn) } }
        stack = deque()
        ini_d_out = defaultdict(dict)
        ini_d_out[otgt] = { 0: (1, 0) }
        stack.append( (0, 0, itgt, ini_d_out) )
        
        # Sets start date/hour
        start_time = datetime.now()
        
        # Iterates over all valid inputs combinations (top->down)
        while len(stack) > 0:
            # Checks duration
            curr_time = datetime.now()
            delta_time = curr_time - start_time
            if delta_time.total_seconds() >= self._max_duration:
                return 0, None
            
            # Gets data from task
            t = stack[-1]
            idx_il = t[0]
            il = t[1]
            ir = t[2]
            d_out = t[3]
            n_idx_il = idx_il
            
            # Gets all valid decompositions of right input aggregate
            ircs = self._mat_in_agg_cmbn[ir]
            len_ircs = len(ircs)            
            
            for i in range(idx_il, len_ircs):
                
                n_idx_il = i                
                n_d_out = defaultdict(dict)
                
                # Gets left input sub-aggregate (column from ircs)
                n_il = ircs[i][1]
                
                # Checks if we must process this pair (columns from ircs are sorted in decreasing order)
                if n_il > il:
                    # Gets the right input sub-aggregate (row from ircs)
                    n_ir = ircs[i][0]
                    
                    # Iterates over outputs combinations previously found
                    for o_r in d_out:
                        sol = otgt - o_r
                        # Computes the number of parent combinations
                        nb_prt = sum([s[0] for s in d_out[o_r].values()])
                    
                        # Iterates over output sub-aggregates matching with left input sub-aggregate
                        val_il = self._match_in_agg_to_val[n_il]
                        for n_ol in self._val_to_match_out_agg[val_il]:
                            
                            # Checks compatibility of output sub-aggregate with left part of output combination
                            if (sol & n_ol == 0):
                                # Computes:
                                #   the sum corresponding to the left part of the output combination
                                #   the complementary right output sub-aggregate
                                n_sol = sol + n_ol
                                n_or = otgt - n_sol                                
                                # Checks if the right output sub-aggregate is valid
                                val_ir = self._match_in_agg_to_val[n_ir]
                                match_out_agg = self._val_to_match_out_agg[val_ir]
                                # Adds this output combination into n_d_out if all conditions met
                                if (n_sol & n_or == 0) and (n_or in match_out_agg):
                                    n_d_out[n_or][n_ol] = (nb_prt, 0)
                                    
                    # Updates idx_il for the current task
                    stack[-1] = (i + 1, il, ir, d_out)
                    # Pushes a new task which will decompose the right input aggregate
                    stack.append( (0, n_il, n_ir, n_d_out) )
                    # Executes the new task (depth-first)
                    break
                
                else:
                    # No more results for il, triggers a break and a pop
                    n_idx_il = len_ircs
                    break
                
            # Checks if task has completed     
            if n_idx_il > len_ircs - 1:
                # Pops the current task
                t = stack.pop()
                il = t[1]
                ir = t[2]
                d_out = t[3]
                
                # Checks if it's the root task
                if len(stack) == 0:
                    # Retrieves the number of combinations from root task
                    nb_tx_cmbn = d_out[otgt][0][1]
                
                else:
                    # Gets parent task
                    p_t = stack[-1]
                    p_d_out = p_t[3]
                
                    # Iterates over all entries from d_out
                    for (o_r, l_ol) in d_out.items():
                        r_key = (ir, o_r)
                        # Iterates over all left aggregates
                        for (ol, (nb_prnt, nb_chld)) in l_ol.items():
                            l_key = (il, ol)
                            # Updates the dictionary of links for the pair of aggregates
                            nb_occur = nb_chld + 1
                            d_links[r_key] += nb_prnt
                            d_links[l_key] += nb_prnt * nb_occur
                            # Updates parent d_out by back-propagating number of child combinations
                            p_or = ol + o_r
                            p_l_ol = p_d_out[p_or]
                            for (p_ol, (p_nb_prt, p_nb_chld)) in p_l_ol.items():
                                p_d_out[p_or][p_ol] = (p_nb_prt, p_nb_chld + nb_occur)
        
        # Fills the matrix
        links = self._get_link_cmbn(itgt, otgt)
        nb_tx_cmbn += 1
        for (lnk, mult) in d_links.items():
            links = links + self._get_link_cmbn(lnk[0], lnk[1]) * mult
        
        return nb_tx_cmbn, links
    
    
    def _get_link_cmbn(self, in_agg, out_agg):
        '''
        Computes a linkability matrix encoding the matching of given input/output aggregates
        Returns a numpy array
        Parameters:
            in_agg     = input aggregate
            out_agg    = output aggregate
        '''
        vouts = self._all_out_agg[:,out_agg][:,np.newaxis]
        vins = self._all_in_agg[:,in_agg][np.newaxis,:]
        return np.dot(vouts, vins)        
    
    
    '''
    PACKING/UNPACKING OF LINKED TXOS
    '''
    def _pack_linked_txos(self, linked_txos):
        '''
        Packs input txos which are known as being controlled by a same entity
        Parameters:
            linked_txos = list of sets storing linked input txos. Each txo is identified by its "id"
        '''
        idx = len(self._packs)
         
        # Merges packs sharing common elements
        packs = merge_sets(linked_txos)
         
        for pack in packs:
            ins = []
            val_ins = 0
             
            for i in self.inputs:
                if i[0] in pack:
                    ins.append(i)
                    val_ins += i[1]
                     
            idx += 1
            if len(ins) > 0:
                lbl = '%s_I%i' % (self.PACK, idx)
                inp = (lbl, val_ins)
                self.inputs.append(inp)
                in_pack = (lbl, val_ins, 'INPUTS', ins, [])
                self._packs.append(in_pack)
                [self.inputs.remove(v) for v in ins]
            
    
    def _unpack_link_matrix(self, mat_lnk, nb_cmbn):
        '''
        Unpacks linked txos in the linkability matrix
        Returns the unpacked matrix
        Parameters:
            mat_lnk = linkability matrix to be unpacked
            nb_cmbn = number of combinations associated to the linkability matrix
        '''
        mat_res = mat_lnk
        nb_cmbn = max(1, nb_cmbn)
                        
        for (pack, val, lctn, ins, outs) in reversed(self._packs):
            
            if lctn == 'INPUTS':
                key = (pack, val)
                idx = self.inputs.index(key)
                if mat_lnk is not None:
                    nb_ins = len(ins)
                    nb_outs = len(self.outputs)
                    # Inserts columns into the matrix for packed inputs
                    shape = (nb_outs, nb_ins)
                    vals = np.zeros(shape , dtype=np.int64)
                    vals += mat_res[:,idx][:, np.newaxis]
                    mat_res = np.hstack( (mat_res[:,0:idx], vals, mat_res[:,idx+1:]) )
                # Inserts unpacked inputs into the list of inputs
                self.inputs[idx:idx+1] = ins
                  
            elif lctn == 'OUTPUTS':
                key = (pack, val)
                idx = self.outputs.index(key)
                if mat_lnk is not None:
                    nb_ins = len(self.inputs)
                    nb_outs = len(outs)
                    # Inserts rows into the matrix for packed outputs
                    shape = (nb_outs, nb_ins)
                    vals = np.zeros(shape, dtype=np.int64)
                    vals += mat_res[idx,:][np.newaxis,:]
                    mat_res = np.vstack( (mat_res[0:idx,:], vals, mat_res[idx+1:,:]) )
                # Inserts unpacked outputs into the list of outputs
                self.outputs[idx:idx+1] = outs
                
        return mat_res

    
    '''
    LIMITS
    '''
    def _check_limit_ok(self, mode):
        len_in = len(self.inputs)
        len_out = len(self.outputs)
        max_card = max(len_in, len_out)
        return True if (max_card <= self.max_txos) else False

Example #7

Show file

    def scheduling_method(self, cur_time, es, es_dict):
        """
            This function must map the queued events to available nodes at the current time.

            :param cur_time: current time
            :param es_dict: dictionary with full data of the events
            :param es: events to be scheduled
            :param debug: Flag to debug

            :return a tuple of (time to schedule, event id, list of assigned nodes)
        """               
        resource_types = self.resource_manager.resource_types
        avl_resources = self.resource_manager.current_availability
                    
        #=======================================================================
        # Considered queued jobs: Jobs can be fitted in the current system state and less or equal than q_length
        # If a job_obj cannot be fitted or exceed the q_length is directly loaded in the dispatching decision using the no-solution dispatching tuple 
        #=======================================================================
        priorized_jobs = SortedListWithKey(key=lambda job_tuple: job_tuple[1])
              
        current_qjobs = SortedList() 

        #===================================================================
        # Here, if there is a non dispatching previous state set, the current system capacity
        # is verified if it is different (more reasource available than before) the dispatcher is called.
        # Otherwise, a non dispatching decision is returned.
        #===================================================================
        
        # Dispatching Skip
        dispatch = True
        prev_qjobs = None
        
        # Dispatching skip
        if self.non_dispatched_state:

            dispatch = False
            (prev_qjobs, prev_total_resource_usage,) = self.non_dispatched_state
            
            new_jobs = False

            for e in es:                    
                if not(e.id in prev_qjobs):
                    new_jobs = True
                    self.non_dispatched_state = None
                    break        
            if not new_jobs:               
                cur_total_resource_usage = self.resource_manager._resources.usage('dict')
                
                zero_usage = []
                same_usage = []
                for res in resource_types:
                    zero_usage.append(cur_total_resource_usage[res] == 0)
                    same_usage.append(cur_total_resource_usage[res] >= prev_total_resource_usage[res])                    
                
                if all(zero_usage):
                    # The system is empty
                    self.non_dispatched_state = None
                elif all(same_usage):
                    # The system has the same or less capacity wrt the stuck state
                    return [self.dispatching_tuple(e.id) for e in es], []
                else:
                    # The system is not empty but has more capacity wrt the stuck state
                    self.non_dispatched_state = None
                            
        cons_qjobs = {}
        max_ewt = max([self.get_ewt(job.queue) for job in es] + [self.get_ewt(es_dict[job_id]) for job_id in self.resource_manager.current_allocations])
        for node in self.resource_manager.node_names:
            avl_res = avl_resources[node]
            for idx, job_obj in enumerate(es):
                job_id = job_obj.id
                
                if not(job_id in cons_qjobs):
                    current_qjobs.add(job_id)
                    cons_qjobs[job_id] = [False, 0, {}, None]
                    priorized_jobs.add((job_id, self._job_priority_ewt(job_obj, cur_time, max_ewt)))
                        
                possibilities = self._joint_nodes(job_obj, avl_res)
                if possibilities > 0:
                    cons_qjobs[job_id][2][node] = min(possibilities, job_obj.requested_nodes)
                    cons_qjobs[job_id][1] += possibilities 
                    if cons_qjobs[job_id][1] >= job_obj.requested_nodes: 
                        cons_qjobs[job_id][0] = True
                        if not cons_qjobs[job_id][3]: 
                            cons_qjobs[job_id][3] = job_obj
                                                       
        qjobs = 0
        wc_makespan = 0
        makespans = []
        
        remaining_priorized_jobs = []
        
        # Job of the dispatching decision 
        decision_jobs = {}    
        
        for job_id, _ in priorized_jobs:
            t = cons_qjobs[job_id]
            if not t[0] or qjobs > self.cur_q_length - 1:
                decision_jobs[job_id] = self.dispatching_tuple(job_id)
                cons_qjobs.pop(job_id)
            else:
                exp_duration = max(1, t[-1].expected_duration)
                wc_makespan += exp_duration
                makespans.append(exp_duration)
                qjobs += 1
                remaining_priorized_jobs.append(job_id)
        #=======================================================================
        # There are no jobs to dispatch at the current system state. 
        # Then a no solution list is returned. 
        #=======================================================================
        if not cons_qjobs:
            
            # Job Dispatching skip
            cur_total_resource_usage = self.resource_manager._resources.usage('dict')
            self.non_dispatched_state = (current_qjobs, cur_total_resource_usage,)
            
            return decision_jobs.values(), []

        #=======================================================================
        # After an unsuccessful dispatching
        #=======================================================================
        if self.use_max_timelimit:
            timelimit = self.timelimit
        else: 
            timelimit = self.initial_timelimit
                
        a_jobs_list = []
        best_z_list = []
        solved = False
        
        self.priorized_jobs = None
        
        prev_sched = []
        while timelimit <= self.timelimit:
            schedalloc_plan = {}
            args = (schedalloc_plan, cur_time, cons_qjobs, remaining_priorized_jobs, es_dict, resource_types, avl_resources)
            kwargs = {'timelimit':timelimit, 'prev_sched':prev_sched}
            function = getattr(self, 'cp_model')
            function(*args, **kwargs)

            solver_state = schedalloc_plan.pop('solver_state')
            best_z = schedalloc_plan.pop('best_z')
            best_z_list.append(best_z)
            
            if solver_state == self.SolverState.PROBLEM_INFEASIBLE:
                break
            limit_reached = schedalloc_plan.pop('limit_reached')            
            
            disp_jobs = 0
            prev_sched = [] 
            for stime, job_id, _ in schedalloc_plan.values():
                if stime == cur_time:
                    prev_sched.append(job_id)
                    disp_jobs += 1
                
            if disp_jobs == len(cons_qjobs) and solver_state == self.SolverState.NO_MORE_SOLUTIONS.value and not limit_reached:
                solved = True
                break
            elif disp_jobs < len(cons_qjobs) and solver_state == self.SolverState.NO_MORE_SOLUTIONS.value and not limit_reached:
                solved = True
                break
            elif len(best_z_list) >= self.max_k and all([best_z_list[-1] == b for b in best_z_list[-self.max_k:]]):
                solved = True
                break
            else:
                a_jobs_list.append(disp_jobs)
                timelimit *= 2 
        
        self.priorized_jobs = None
        
        # This is useful for print and also to create the unsuccessful data
        dispatched_jobs = 0
        queued_job_ids = []
        for a in schedalloc_plan:
            if a[2]:
                dispatched_jobs += 1
            if dispatched_jobs == 0:
                queued_job_ids.append(a[1])

        if self.reduce_job_length:
            #===================================================================
            # The considered number of jobs in the next scheduling decision are reduced to the half
            # if the current problem instance was not solved, if the current usage is
            # leq of the previous time point. After a successful dispatching this value is reset. 
            # The minimum is 1, otherwise there will be nothing to dispatch
            #===================================================================
            if not solved:
                self.cur_q_length = max(1, self.cur_q_length // 2)
            else:
                self.cur_q_length = self.q_length
        if dispatched_jobs == 0:
            self.non_dispatched_state = (current_qjobs, self.resource_manager._resources.usage('dict'),)
        else:
            self.non_dispatched_state = None
                 
        return list(schedalloc_plan.values()) + list(decision_jobs.values()), []

Example #8

Show file

File: pcp21_dispatcher.py Project: cgalleguillosm/cp_dispatchers

    def scheduling_method(self, cur_time, es, es_dict):
        """
            This function must map the queued events to available nodes at the current time.

            :param cur_time: current time
            :param es_dict: dictionary with full data of the events
            :param es: events to be scheduled
            :param debug: Flag to debug

            :return a tuple of (time to schedule, event id, list of assigned nodes)
        """
        dispatching_plan = []

        resource_types = self.resource_manager.resource_types
        avl_resources = self.resource_manager.current_availability
        system_capacity = self.resource_manager.system_capacity('nodes')

        # =======================================================================
        # Considered queued jobs: Jobs can be fitted in the current system state and less or equal than q_length
        # If a job_obj cannot be fitted or exceed the q_length is directly loaded in the dispatching decision using the no-solution dispatching tuple
        # =======================================================================
        priorized_jobs = SortedListWithKey(key=lambda job_tuple: job_tuple[1])

        current_qjobs = SortedList()

        cons_qjobs = {}
        for node in self.resource_manager.node_names:
            avl_res = avl_resources[node]
            # avl_res = system_capacity[node]
            for idx, job_obj in enumerate(es):
                job_id = job_obj.id

                if not (job_id in cons_qjobs):
                    current_qjobs.add(job_id)
                    cons_qjobs[job_id] = [False, 0, {}, None]
                    priorized_jobs.add((job_id, self._job_priority_slowdown(job_obj, cur_time)))
                if self._reduced_model:
                    possibilities = self._joint_nodes(job_obj, avl_res)
                    if possibilities > 0:
                        cons_qjobs[job_id][2][node] = min(possibilities, job_obj.requested_nodes)
                        cons_qjobs[job_id][1] += possibilities
                        if cons_qjobs[job_id][1] >= job_obj.requested_nodes:
                            cons_qjobs[job_id][0] = True
                            if not cons_qjobs[job_id][3]:
                                cons_qjobs[job_id][3] = job_obj
                else:
                    cons_qjobs[job_id][0] = True
                    cons_qjobs[job_id][1] = None
                    cons_qjobs[job_id][2] = None
                    cons_qjobs[job_id][3] = job_obj

        qjobs = 0
        wc_makespan = 0
        makespans = []

        selected_priorized_jobs = []

        # Job of the dispatching decision
        decision_jobs = {}

        if self._reduced_model:
            for job_id, _ in priorized_jobs:
                t = cons_qjobs[job_id]
                if not t[0] or qjobs > self._cur_q_length - 1:
                    decision_jobs[job_id] = self.dispatching_tuple(job_id)
                    cons_qjobs.pop(job_id)
                else:
                    exp_duration = max(1, t[-1].expected_duration)
                    wc_makespan += exp_duration
                    makespans.append(exp_duration)
                    qjobs += 1
                    selected_priorized_jobs.append(job_id)
        else:
            cannot_start_selected = 0
            for job_id, _ in priorized_jobs:
                t = cons_qjobs[job_id]
                if (not t[0] and cannot_start_selected >= self._considered_cannot_start) or (
                        qjobs > self._cur_q_length - 1):
                    decision_jobs[job_id] = self.dispatching_tuple(job_id)
                    cons_qjobs.pop(job_id)
                else:
                    if not t[0]:
                        cons_qjobs[job_id][3] = es_dict[job_id]
                        cannot_start_selected += 1
                    exp_duration = max(1, t[-1].expected_duration)
                    wc_makespan += exp_duration  # , self.get_queue(t[-1].queue))  # exp_duration
                    makespans.append(exp_duration)
                    qjobs += 1
                    selected_priorized_jobs.append(job_id)
        # =======================================================================
        # There are no jobs to dispatch at the current system state.
        # Then a no solution list is returned.
        # =======================================================================
        if not cons_qjobs:
            # Job Dispatching skip
            return decision_jobs.values(), []

        solved = False
        self.priorized_jobs = None

        if self._safe:
            manager = mp_dill.Manager()
            schedule_plan = manager.dict()
            process_class = mp_dill.Process

            p = process_class(target=getattr(self, 'cp_model'),
                              args=(
                                  schedule_plan, cur_time, cons_qjobs, selected_priorized_jobs, es_dict, resource_types,
                                  avl_resources),
                              kwargs={'timelimit': timelimit}
                              )
            p.start()
            p.join()

            if p.exitcode != 0:
                schedule_plan.pop('solver_state', None)
                schedule_plan.pop('limit_reached', None)
                return list(decision_jobs.values()) \
                       + [self.dispatching_tuple(job_id, start_time, nodes) for (start_time, job_id, nodes) in
                          schedule_plan.values()] \
                       + [self.dispatching_tuple(job_id, None, []) for job_id in cons_qjobs if
                          not (job_id in schedule_plan)], []
        else:
            schedule_plan = {}
            args = (
                schedule_plan, cur_time, cons_qjobs, selected_priorized_jobs, es_dict, resource_types, avl_resources)
            kwargs = {'max_timelimit': self._max_timelimit}
            function = getattr(self, 'cp_model')
            function(*args, **kwargs)

        solved = schedule_plan.pop('solved')
        of_value = schedule_plan.pop('of_value')
        walltime = schedule_plan.pop('walltime')
        proc_time = schedule_plan.pop('proc_time')
        incurred_time = walltime + proc_time
        failures = schedule_plan.pop('failures')
        branches = schedule_plan.pop('branches')
        p = None

        self.priorized_jobs = None
        dispatching_plan = list(schedule_plan.values())
        self.__instance_data = (
            solved, of_value, walltime, incurred_time, failures, branches,
            dispatching_plan + list(decision_jobs.values()),)

        # This is useful for print and also to create the unsuccessful data
        dispatched_jobs = 0
        queued_job_ids = []
        for a in dispatching_plan:
            if a[2]:
                dispatched_jobs += 1
            if dispatched_jobs == 0:
                queued_job_ids.append(a[1])

        if self._reduce_job_length:
            # ===================================================================
            # The considered number of jobs in the next scheduling decision are reduced to the half
            # if the current problem instance was not solved, if the current usage is
            # leq of the previous time point. After a successful dispatching this value is reset.
            # The minimum is 1, otherwise there will be nothing to dispatch
            # ===================================================================
            if not solved:
                self._cur_q_length = max(1, min(self._cur_q_length,
                                                len(schedule_plan)) // 2)  # max(1, self._cur_q_length // 2)
            else:
                self._cur_q_length = self._q_length

        print('{} - {}: Queued {}, Dispatched {}, Running {}. {}'.format(self._counter, cur_time,
                                                                         len(es) - dispatched_jobs, dispatched_jobs,
                                                                         len(self.resource_manager.current_allocations),
                                                                         self.resource_manager.current_usage))
        return dispatching_plan + list(decision_jobs.values()), []

Example #9

Show file

File: sensitivity_analysis.py Project: anonymousUser0/ASPLOS

ds = [a for a in AnmalZoo]

for uat in ds:
    r = SortedList()
    big128, big256 = 0, 0

    automatas = atma.parse_anml_file(anml_path[uat])
    automatas.remove_ors()
    automatas = automatas.get_connected_components_as_automatas()

    for atm in automatas:
        nc = atm.nodes_count
        if nc >= 128:
            big128 += 1
        if nc >= 256:
            big256 += 1

        if nc > bigest_component_size:
            print "this NFA can not be fit:", uat
            break

        if r and nc <= r[-1]:  # can be packed
            cand_residual = r.pop(-1)
            r.add(cand_residual - nc)
        else:  # new fcb
            r.add(bigest_component_size - nc)

    print "uat %s needs %d connected local switches each with (%d,%d) size. There are %d nodes not being assigned." \
          " It has %d CCs bigger than 128 and %d CCs bigger than 256"\
          %(uat, len(r), fcb_size, fcb_size, sum(r[:-1]), big128, big256)

Example #10

Show file

def MedCombiner2(intermediates):
    """
    The Running Medians Reducer
    merges the intermediate lists that are packed inside the outer list, intermediates, into one master flat list.

    :rtype : object         master list of the final results
    :param intermediates:   list of lists of the running medians of each input text file
    :return:                the final results
    """
    # master list of the final results
    linesWordCount = []

    # iterating over the sub lists for each input file to concatenate them into a master list
    # for v in intermediates:
    #     linesWordCount+=v
    # print(intermediates)
    resultDict = defaultdict(list)

    # the following loop iterates over the first dictionary key and value pairs and then iterates over the next dictionary's
    # pairs. It continues until it iterates over all dictionaries that are members of the intermediates. While iterating,
    # a new dictionary is created, result, to hold all the pairs of the intermediate dictionaries, thus effectively
    # merging all of them.
    # i = 0
    for d in intermediates:
        # print(d)
        for k, v in dict(d).items():
            resultDict[k] = v
    # for k,l in chain(*intermediates):
    #     resultDict[k] = l

    # print("resultedDict ", resultDict)
    # the following loop iterates over the first dictionary key and value pairs and then iterates over the next dictionary's
    # pairs. It continues until it iterates over all dictionaries that are members of the intermediates. While iterating,
    # a new dictionary is created, result, to hold all the pairs of the intermediate dictionaries, thus effectively
    # merging all of them.
    sortedKeys=sorted(resultDict, key= lambda k:k,reverse=False)
    for k in sortedKeys:
        linesWordCount.extend(resultDict[k])
    # print("linesWordCount ", linesWordCount)

    medianNumbers= []

    # a sorted list to hold the word counts for input lines
    # the sorted list boosts performance substantially when computing the running median because this will not require
    # resorting the wordcount list every time we add an entry to it.
    sortedLinesWordCount = SortedList()
    lineNO = 0

    # running median calculations
    # because I used a sorted list for the wordcounts of lines, now it is straightforward to compute
    # the running median
    for wordcount in linesWordCount:

        sortedLinesWordCount.add(wordcount)
        # print(sortedLinesWordCount)
        index = int(lineNO/2)
        if lineNO%2 == 0:
            medianNumbers.append(float(sortedLinesWordCount[index]))
        else:
            medianNumbers.append(float((sortedLinesWordCount[index] + sortedLinesWordCount[index+1])/2))
        lineNO += 1
        # print(medianNumbers)
    # print(medianNumbers)
    return medianNumbers