def test_popitem(self):
     pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)]
     shuffle(pairs)
     od = OrderedDict(pairs)
     while pairs:
         self.assertEqual(od.popitem(), pairs.pop())
     self.assertRaises(KeyError, od.popitem)
     self.assertEqual(len(od), 0)
 def test_popitem(self):
     pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)]
     shuffle(pairs)
     od = OrderedDict(pairs)
     while pairs:
         self.assertEqual(od.popitem(), pairs.pop())
     self.assertRaises(KeyError, od.popitem)
     self.assertEqual(len(od), 0)
Exemple #3
0
class SymbolLRUCacheManager(RequiredConfig):
    """for cleaning up the symbols cache"""
    required_config = Namespace()
    required_config.add_option(
        'symbol_cache_path',
        doc="the cache directory to automatically remove files from",
        default=os.path.join(tempfile.gettempdir(), 'symbols'))
    required_config.add_option('symbol_cache_size',
                               doc="the maximum size of the symbols cache",
                               default='1G',
                               from_string_converter=from_string_to_parse_size)
    required_config.add_option(
        'verbosity',
        doc="how chatty should this be? 1 - writes to stdout,"
        " 2 - uses the logger",
        default=0,
        from_string_converter=int)

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        """constructor for a registration object that runs an LRU cache
       cleaner"""
        self.config = config

        self.directory = os.path.abspath(config.symbol_cache_path)
        self.max_size = config.symbol_cache_size
        self.verbosity = config.verbosity
        # Cache state
        self.total_size = 0
        self._lru = OrderedDict()
        # pyinotify bits
        self._wm = pyinotify.WatchManager()
        self._handler = EventHandler(self, verbosity=config.verbosity)
        self._notifier = pyinotify.ThreadedNotifier(self._wm, self._handler)
        mask = pyinotify.IN_DELETE | pyinotify.IN_CREATE \
            | pyinotify.IN_OPEN | pyinotify.IN_MOVED_FROM \
            | pyinotify.IN_MOVED_TO | pyinotify.IN_MODIFY
        self._wdd = self._wm.add_watch(self.directory,
                                       mask,
                                       rec=True,
                                       auto_add=True)
        # Load existing files into the cache.
        self._get_existing_files(self.directory)
        self._notifier.start()

    #--------------------------------------------------------------------------
    @property
    def num_files(self):
        return len(self._lru)

    #--------------------------------------------------------------------------
    def _rm_empty_dirs(self, path):
        '''
       Attempt to remove any empty directories that are parents of path
       and children of self.directory.
       '''
        path = os.path.dirname(path)
        while not os.path.samefile(path, self.directory):
            if not os.listdir(path):
                os.rmdir(path)
            path = os.path.dirname(path)

    #--------------------------------------------------------------------------
    def _update_cache(self, path, update_size=False):
        if path in self._lru:
            size = self._lru.pop(path)
            if update_size:
                self.total_size -= size
        else:
            update_size = True

        if update_size:
            try:
                size = os.stat(path).st_size
            except OSError:
                self.config.logger.warning(
                    'file was not found while cleaning cache: %s', path)
                return

            self.total_size += size
            # If we're out of space, remove items from the cache until
            # we fit again.
            while self.total_size > self.max_size and self._lru:
                rm_path, rm_size = self._lru.popitem(last=False)
                self.total_size -= rm_size
                os.unlink(rm_path)
                self._rm_empty_dirs(rm_path)
                if self.verbosity >= 2:
                    self.config.logger.debug('RM %s', rm_path)
        self._lru[path] = size

    #--------------------------------------------------------------------------
    def _remove_cached(self, path):
        # We might have already removed this file in _update_cache.
        if path in self._lru:
            size = self._lru.pop(path)
            self.total_size -= size

    #--------------------------------------------------------------------------
    def _get_existing_files(self, path):
        for base, dirs, files in os.walk(path):
            for f in files:
                f = os.path.join(base, f)
                self._update_cache(f)

    #--------------------------------------------------------------------------
    def close(self):
        self._notifier.stop()
class SymbolLRUCacheManager(RequiredConfig):
    """for cleaning up the symbols cache"""
    required_config = Namespace()
    required_config.add_option(
        'symbol_cache_path',
        doc="the cache directory to automatically remove files from",
        default=os.path.join(tempfile.gettempdir(), 'symbols')
    )
    required_config.add_option(
        'symbol_cache_size',
        doc="the maximum size of the symbols cache",
        default='1G',
        from_string_converter=from_string_to_parse_size
    )
    required_config.add_option(
        'verbosity',
        doc="how chatty should this be? 1 - writes to stdout,"
            " 2 - uses the logger",
        default=0,
        from_string_converter=int
    )

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        """constructor for a registration object that runs an LRU cache
       cleaner"""
        self.config = config

        self.directory = os.path.abspath(config.symbol_cache_path)
        self.max_size = config.symbol_cache_size
        self.verbosity = config.verbosity
        # Cache state
        self.total_size = 0
        self._lru = OrderedDict()
        # pyinotify bits
        self._wm = pyinotify.WatchManager()
        self._handler = EventHandler(self, verbosity=config.verbosity)
        self._notifier = pyinotify.ThreadedNotifier(self._wm, self._handler)
        mask = pyinotify.IN_DELETE | pyinotify.IN_CREATE \
            | pyinotify.IN_OPEN | pyinotify.IN_MOVED_FROM \
            | pyinotify.IN_MOVED_TO | pyinotify.IN_MODIFY
        self._wdd = self._wm.add_watch(
            self.directory,
            mask,
            rec=True,
            auto_add=True
        )
        # Load existing files into the cache.
        self._get_existing_files(self.directory)
        self._notifier.start()

    #--------------------------------------------------------------------------
    @property
    def num_files(self):
        return len(self._lru)

    #--------------------------------------------------------------------------
    def _rm_empty_dirs(self, path):
        '''
       Attempt to remove any empty directories that are parents of path
       and children of self.directory.
       '''
        path = os.path.dirname(path)
        while not os.path.samefile(path, self.directory):
            if not os.listdir(path):
                os.rmdir(path)
            path = os.path.dirname(path)

    #--------------------------------------------------------------------------
    def _update_cache(self, path, update_size=False):
        if path in self._lru:
            size = self._lru.pop(path)
            if update_size:
                self.total_size -= size
        else:
            update_size = True

        if update_size:
            try:
                size = os.stat(path).st_size
            except OSError:
                self.config.logger.warning(
                    'file was not found while cleaning cache: %s', path
                )
                return

            self.total_size += size
            # If we're out of space, remove items from the cache until
            # we fit again.
            while self.total_size > self.max_size and self._lru:
                rm_path, rm_size = self._lru.popitem(last=False)
                self.total_size -= rm_size
                os.unlink(rm_path)
                self._rm_empty_dirs(rm_path)
                if self.verbosity >= 2:
                    self.config.logger.debug('RM %s', rm_path)
        self._lru[path] = size

    #--------------------------------------------------------------------------
    def _remove_cached(self, path):
        # We might have already removed this file in _update_cache.
        if path in self._lru:
            size = self._lru.pop(path)
            self.total_size -= size

    #--------------------------------------------------------------------------
    def _get_existing_files(self, path):
        for base, dirs, files in os.walk(path):
            for f in files:
                f = os.path.join(base, f)
                self._update_cache(f)

    #--------------------------------------------------------------------------
    def close(self):
        self._notifier.stop()
Exemple #5
0
def fast_subset_degree_sampling(filename, rp, delimeter):
    start_time = time.time()
    fp = open(filename, 'r')

    network = []
    degree_nodes, adjacency_matrix = {}, {}
    mod_degree_matrix, mod_adjacence_matrix = {}, {}

    #Reading Input File to Obain Degree and Adjacency Matrix
    for line in fp:
        data = split(strip(line, '\r\n'), sep=delimeter)
        if (len(data) < 3):
            weight = 1
        else:
            weight = float(data[2])
        node1 = int(float(data[0]))
        node2 = int(float(data[1]))
        #Degree Calculation
        if (node1 not in degree_nodes):
            degree_nodes[node1] = weight
        else:
            degree_nodes[node1] = degree_nodes[node1] + weight
        if (node2 not in degree_nodes):
            degree_nodes[node2] = weight
        else:
            degree_nodes[node2] = degree_nodes[node2] + weight

        #Adjacency Matrix Calculation
        if (node1 not in adjacency_matrix):
            adjacency_matrix[node1] = [weight, node2]
        else:
            adjacency_matrix[node1][0] = adjacency_matrix[node1][0] + weight
            adjacency_matrix[node1].append(node2)
        if (node2 not in adjacency_matrix):
            adjacency_matrix[node2] = [weight, node1]
        else:
            adjacency_matrix[node2][0] = adjacency_matrix[node2][0] + weight
            adjacency_matrix[node2].append(node1)

    median_degree = 1.0 * sorted(
        degree_nodes.values())[len(degree_nodes.values()) // 2]
    No_nodes = len(degree_nodes)
    #print No_nodes;
    represent_points = rp

    #Obtain Degree Matrix and Adjacency_Matrix with degree > median
    #Time required for this operation is O(n)
    mod_degree_nodes = dict(
        (k, v) for (k, v) in degree_nodes.iteritems() if v > median_degree)
    kout = open('DegreeDistribution.txt', 'w')
    for key, values in mod_degree_nodes.iteritems():
        kout.write(str(key) + "\t" + str(values) + "\n")
    kout.close()
    #sorted_degree_nodes = collections.OrderedDict(sorted(mod_degree_nodes.iteritems(),key=operator.itemgetter(1),reverse=True));
    sorted_degree_nodes = OrderedDict(
        sorted(mod_degree_nodes.iteritems(),
               key=operator.itemgetter(1),
               reverse=True))
    del degree_nodes
    #Time required for this operation is O(n)
    mod_adjacency_matrix = dict((k, v)
                                for (k, v) in adjacency_matrix.iteritems()
                                if v[0] > median_degree)
    del adjacency_matrix
    fp.close()

    #We have obtained the dictionary for degree and adjacency_matrix and perform the algorithm
    temp_degree_nodes, temp_adjacency_matrix = {}, {}
    selected_points = []
    iteration, count = 0, 0
    #Time required for this operation is O(n)
    if (len(mod_degree_nodes) < represent_points):
        print "Tune the model by decreasing representative number of points"

    print("First Step of the Sampling Process Completed")
    #The main algorithm goes here
    while (iteration < represent_points):
        if (bool(mod_degree_nodes) == 0
            ):  #Time required for this operation is O(1)
            count = count + 1
            print "Degree Matrix Empty for the %d time" % (count)
            if (bool(temp_degree_nodes) == 0
                ):  #Time required for this operation is O(1)
                print "Tune the model by decreasing representative number of points"
            else:
                del mod_degree_nodes, mod_adjacency_matrix
                mod_degree_nodes, mod_adjacency_matrix = {}, {}
                #mod_degree_nodes = copy.deepcopy(temp_degree_nodes);
                mod_degree_nodes = degree_copy.degree_copy(temp_degree_nodes)
                del temp_degree_nodes
                #mod_adjacency_matrix = copy.deepcopy(temp_adjacency_matrix);
                mod_adjacency_matrix = adjacency_copy.adjacency_copy(
                    temp_adjacency_matrix)
                del temp_adjacency_matrix, sorted_degree_nodes
                #sorted_degree_nodes=collections.OrderedDict(sorted(mod_degree_nodes.iteritems(),key=operator.itemgetter(1),reverse=True));
                sorted_degree_nodes = OrderedDict(
                    sorted(mod_degree_nodes.iteritems(),
                           key=operator.itemgetter(1),
                           reverse=True))
                temp_degree_nodes, temp_adjacency_matrix = {}, {}
        else:
            degree_tuple = sorted_degree_nodes.popitem(False)
            #Time required for this operation is O(1)
            adjacency_value = mod_adjacency_matrix[degree_tuple[0]]
            #Time required for this operation is O(1)
            adjacency_list = adjacency_value[1:len(adjacency_value)]
            #Time required for this operation is 0(1)
            selected_points.append(
                (degree_tuple[0], degree_tuple[1], len(adjacency_list)))
            #Time required for this operation is O(1)
            del mod_degree_nodes[degree_tuple[0]]
            del mod_adjacency_matrix[degree_tuple[0]]
            iteration = iteration + 1
            #print iteration;
            for node_index in adjacency_list:  #Look at the adjacency_list to start de-activating in O(k)
                if (node_index in mod_degree_nodes
                    ):  #If node is active in a dictionary in O(1)
                    degree_value = mod_degree_nodes[node_index]
                    adjacency_value = mod_adjacency_matrix[node_index]
                    temp_degree_nodes[node_index] = degree_value
                    #Put it in a temporary degree matrix in O(1);
                    temp_adjacency_matrix[node_index] = adjacency_value
                    #Put it in a temporary adjacency_matrix in O(1);
                    del mod_adjacency_matrix[node_index]
                    #Removal in O(1)
                    del mod_degree_nodes[node_index]
                    #Removal in O(1)
                    del sorted_degree_nodes[node_index]
    selected_points = sorted(selected_points,
                             key=operator.itemgetter(1),
                             reverse=True)
    mod_degree_nodes.clear()
    mod_adjacency_matrix.clear()
    del sorted_degree_nodes

    fout = open(filename + "_out.txt", 'w')
    for data in selected_points:
        outputstring = str(data[0]) + "\t" + str(data[1]) + "\t" + str(
            data[2]) + "\n"
        fout.write(outputstring)
    fout.close()
    print time.time() - start_time, "seconds"

    fout = open(filename + "_out.txt", 'r')
    S = nx.read_edgelist(fout, nodetype=int, data=(('weight', float), ))
    fout.close()
    print S.nodes()
    print "%d nodes" % S.number_of_nodes()
    print S.edges(data=False)
    print "%d edges" % S.number_of_edges()
    return S
Exemple #6
0
class Cache(object):
  '''
    Cache to hold cached contents
  '''
  index_filename = "index.pickle"
  # active requests are not counted.

  def __init__(self, path, entry_limit):
    self.path = path
    self.on_memory_entry_limit = entry_limit
    self.on_memory = OrderedDict() #FastAVLTree()
    self.index = {}
    self.load_index()

  def __contains__(self, key):
    return key in self.index 

  def __len__(self):
    return len(self.index)
  
  def __iter__(self):
    return self.index.values()

  def _make_path(self, fname):
    return os.path.join(self.path, fname)

  def make_entry(self, key):
    '''
      reserve
    '''
    assert key not in self.index
    entry = CacheEntry(key, self.path, 0.0, self.on_notify)
    #FIXME because cache entry is write once. read many.
    self.index[key] = entry
    return entry

  def on_notify(self, entry):
    print 'cache entries: ', len(self.on_memory)
    if entry.key in self.on_memory:
      self.touch(entry)
    else:
      self.push_to_memory(entry)

  def push_to_memory(self, entry):
    if len(self.on_memory) >=  self.on_memory_entry_limit:
      key, purged = self.on_memory.popitem(False) #popping first item
      print 'purged cache life=%f s since %f for %s' % (time.time() - purged.last_touch, purged.last_touch, purged.key)
      purged.move_to_disk()
    entry.touch()
    print "putting entry %s" % (entry.key)
    assert entry.datafile
    assert entry.last_touch > 1.0
    self.on_memory[entry.key] = entry

  def touch(self, entry):
    #revoke
    x = self.on_memory.pop(entry.key)
    assert x == entry
    # activate it as a new entry
    entry.touch()
    self.on_memory[entry.key] = entry

  def get(self, key):
    e = self.index.get(key, None)
    return e

  def pop(self, key):
    return self.index.pop(key)
    
  def load_index(self):
    p = self._make_path(self.index_filename)
    
    no_index = False
    try:
      f = open(p)
    except:
      f = None
      pass
    if f:
      try:
        self.index = pickle.load(f)
      except:
        no_index = True        
      finally:
        f.close()
    else:
      no_index = True

    if no_index: 
      self.index = {}
      self.save_index()

  def save_entries(self):
    print 'Cache.save_entries'
    for entry in self.index.itervalues():
      if entry.datafile:
        entry.move_to_disk()

  def save_index(self):
    print 'Cache.save_index'
    p = self._make_path(self.index_filename)
    for entry in self.index.itervalues():
      entry.abort()
    with open(p, 'w') as f:
      pickle.dump(self.index, f)

  def fix(self):
    to_delete = []
    for k, v in self.index.items():
      p = self._make_path(v)
      if not os.access(p, os.F_OK | os.R_OK | os.W_OK):
        to_delete.append[k]
    for k in to_delete:
      del self.index[k]
    self.save_index()

  def scan(self):
    ''' wrong idea. cant generate url from file name...'''
    #return os.path.join(self.path, fname)
    for fname in os.listdir(self.path):
      if fname == self.index_filename:
        continue

    

  def html_index(self):
    count = len(self.index)
    x = []
    for key, ce in self.on_memory.iteritems():
      x.append('<li>[%s ]: %s</li>\n'%(key, ce.status()))
    frag_mem = '<ol>%s</ol>'%(''.join(x))

    y = []
    for key, ce in self.index.iteritems():
      y.append('<li>[%s ]: %s</li>\n'%(key, ce.status()))
    frag_index = '<ol>%s</ol>'%(''.join(y))
    html = '''<html><body>
      <p>count:%s</p>
      %s
      <hr />
      %s
      </body></html>'''
    return html%(count, frag_mem, frag_index)