Ejemplo n.º 1
0
class AnswerHttpPing_withCache:
    def __init__(self, path_db=None):
        self.httping = HttpInfo()
        table_name = "httping"
        if path_db:
            self.cache = CacheManager(name_table=table_name,
                                      path_database=path_db)
        else:
            self.cache = CacheManager(name_table=table_name,
                                      path_database=os.path.join(
                                          os.getcwd(), "db.sqlite"))
        self.cache.setup("CREATE TABLE %s ("
                         "_url TEXT PRIMARY KEY, "
                         "code TEXT, "
                         "size TEXT, "
                         "duration TEXT);" % table_name)

    def query(self, url):
        from_cache = self.cache.retrieve_from_cache("_url", url)
        if from_cache:
            Logger.debug("from cache for %s" % url)
            return from_cache
        else:
            from_query = self.httping.query(url)
            Logger.debug("from query for %s" % url)
            if from_query:
                terms = [
                    from_query['url'],
                    str(from_query['code']),
                    str(from_query['size']),
                    str(from_query['duration'])
                ]
                self.cache.insert_to_cache(terms)
                return terms
Ejemplo n.º 2
0
    def __init__(self, source=None, logger=None):
        
        # General Attributes
        self.source = source
        self.reader = None
        self.drp    = None
        self.count  = 0
        self.Mcount = 99999

        if source is not None:
            self.ingestDir = PXPaths.RXQ + self.source.name
            if self.source.type == 'filter' or self.source.type == 'filter-bulletin' :
               self.ingestDir = PXPaths.FXQ + self.source.name
            self.logger = source.logger
        elif logger is not None:
            self.logger = logger
        self.pxManager = PXManager()              # Create a manager
        self.pxManager.setLogger(self.logger)     # Give the logger to the the manager
        self.pxManager.initNames()                # Set rx and tx names
        self.clientNames = self.pxManager.getTxNames()         # Obtains the list of client's names (the ones to wich we can link files)
        self.filterNames = self.pxManager.getFxNames()         # Obtains the list of filter's names (the ones to wich we can link files)
        if source is not None:
           if self.source.name in self.filterNames : self.filterNames.remove(self.source.name)
        self.sourlientNames = self.pxManager.getTRxNames()     # Obtains the list of sourlient's names (the ones to wich we can link files)
        self.allNames = self.clientNames + self.filterNames + self.sourlientNames # Clients + Sourlients names
        self.clients = {}   # All the Client/Filter/Sourlient objects
        self.fileCache = None                                                    # product processed.
        self.dbDirsCache = CacheManager(maxEntries=200000, timeout=25*3600)      # Directories created in the DB
        self.clientDirsCache = CacheManager(maxEntries=25000, timeout=2*3600)    # File ingested in RXQ
        self.feedNames = []  # source to feed
        self.feeds = {}
        if source is not None:
           self.logger.info("Ingestor (source %s) can link files to clients: %s" % (source.name, self.allNames))
Ejemplo n.º 3
0
    def __init__(self, client, logger):
        self.client = client  # Client object (give access to all configuration options)
        self.timeout = client.timeout  # No timeout for now
        self.logger = logger  # Logger object
        self.connection = None  # The connection
        self.igniter = None
        self.ssl = False
        self.reader = DiskReader(PXPaths.TXQ + self.client.name,
                                 self.client.batch, self.client.validation,
                                 self.client.patternMatching,
                                 self.client.mtime, True, self.logger,
                                 eval(self.client.sorter), self.client)

        self.debugFile = False

        self.cacheManager = CacheManager(maxEntries=self.client.cache_size,
                                         timeout=8 * 3600)

        # AMQP  is there a max for message size
        # self.set_maxLength(self.client.maxLength)

        # statistics.
        self.totBytes = 0
        self.initialTime = time.time()
        self.finalTime = None

        self._connect()
Ejemplo n.º 4
0
    def __init__(self, source=None, logger=None):
        
        # General Attributes
        self.source = source
        self.reader = None
        self.drp    = None
        self.count  = 0
        self.Mcount = 99999

        if source is not None:
            self.ingestDir = PXPaths.RXQ + self.source.name
            if self.source.type == 'filter' or self.source.type == 'filter-bulletin' :
               self.ingestDir = PXPaths.FXQ + self.source.name
            self.logger = source.logger
        elif logger is not None:
            self.logger = logger
        self.pxManager = PXManager()              # Create a manager
        self.pxManager.setLogger(self.logger)     # Give the logger to the the manager
        self.pxManager.initNames()                # Set rx and tx names
        self.clientNames = self.pxManager.getTxNames()         # Obtains the list of client's names (the ones to wich we can link files)
        self.filterNames = self.pxManager.getFxNames()         # Obtains the list of filter's names (the ones to wich we can link files)
        if source is not None:
           if self.source.name in self.filterNames : self.filterNames.remove(self.source.name)
        self.sourlientNames = self.pxManager.getTRxNames()     # Obtains the list of sourlient's names (the ones to wich we can link files)
        self.allNames = self.clientNames + self.filterNames + self.sourlientNames # Clients + Sourlients names
        self.clients = {}   # All the Client/Filter/Sourlient objects
        self.fileCache = None                                                    # product processed.
        self.dbDirsCache = CacheManager(maxEntries=200000, timeout=25*3600)      # Directories created in the DB
        self.clientDirsCache = CacheManager(maxEntries=25000, timeout=2*3600)    # File ingested in RXQ
        self.feedNames = []  # source to feed
        self.feeds = {}
        if source is not None:
           self.logger.info("Ingestor (source %s) can link files to clients: %s" % (source.name, self.allNames))
Ejemplo n.º 5
0
class AnswerIPInfo_withCache:
    def __init__(self, path_db=None):
        self.ipinfo = IPInfo()
        if path_db:
            self.cache = CacheManager(name_table="ipinfo",
                                      path_database=path_db)
        else:
            self.cache = CacheManager(name_table="ipinfo",
                                      path_database=os.path.join(
                                          os.getcwd(), "db.sqlite"))
        self.cache.setup("CREATE TABLE %s ("
                         "_hostname TEXT PRIMARY KEY, "
                         "city TEXT, "
                         "country TEXT, "
                         "countryCode TEXT, "
                         "org TEXT, "
                         "query TEXT);" % "ipinfo")

    def query(self, hostname):
        from_cache = self.cache.retrieve_from_cache("_hostname", hostname)
        if from_cache:
            Logger.debug("from cache for %s" % hostname)
            return from_cache
        else:
            from_query = self.ipinfo.query(hostname)
            Logger.debug("from query for %s" % hostname)
            if from_query:
                terms = [
                    from_query['hostname'], from_query['city'],
                    from_query['country'], from_query['countryCode'],
                    from_query['org'], from_query['query']
                ]
                self.cache.insert_to_cache(terms)
                return terms
Ejemplo n.º 6
0
def update_cache():
    """
        update CodeIgniter cache
    """
    try:
        c = CacheManager()
    except:
        logging.warning("Skipping CodeIgniter cache update")
        return

    try:
        c.update_pdb_cache()
    except:
        logging.warning(traceback.format_exc(sys.exc_info()))
        logging.warning('Pdb cache update failed')

    try:
        c.update_nrlist_cache()
    except:
        logging.warning(traceback.format_exc(sys.exc_info()))
        logging.warning('Nrlist cache update failed')

    try:
        c.update_motif_cache()
    except:
        logging.warning(traceback.format_exc(sys.exc_info()))
        logging.warning('Motif cache update failed')

    try:
        c.update_loop_cache()
    except:
        logging.warning(traceback.format_exc(sys.exc_info()))
        logging.warning('Loop cache update failed')
Ejemplo n.º 7
0
async def main():

    loop = asyncio.get_event_loop()

    a = CacheManager(loop)

    a.tset("b", 5, 6)

    await asyncio.sleep(3)

    print(a.get("b"))
Ejemplo n.º 8
0
 def setUp(self):
     self.manager = CacheManager(maxEntries=3, timeout=5 * 3600)
     self.files = [
         'db/test-file-grib', 'db/test-file-burf', 'db/test-file-bulletin'
     ]
     #f = open(self.files[0])
     #self.data = f.read(1048576)
     #while len(self.data) :
     #print self.data
     #self.data = f.read(1048576)
     #f.close()
     self.data = 'ceci est un test'
Ejemplo n.º 9
0
    def __init__(self, paths_manager, n_partial_ordering):
        """
        Constructor
        :param paths_manager: manager for paths
        :param n_partial_ordering: partial ordering between nodes involved in paths
        """

        self.paths_manager = paths_manager
        self.n_partial_ordering = n_partial_ordering

        self.predicates_to_most_specific_index = CacheManager()
        self.most_specific_indices = dict()
Ejemplo n.º 10
0
 def get(url, cache=True, use_cookie=False):
     if cache:
         data = CacheManager.get(url)
         if not data:
             data = Server.download(url, use_cookie=use_cookie)
             if data:
                 CacheManager.set(url, data)
             return data, False
         else:
             return data, True
     else:
         data = Server.download(url, use_cookie=use_cookie)
         return data, False
Ejemplo n.º 11
0
 def __init__(self, path_db=None):
     self.httping = HttpInfo()
     table_name = "httping"
     if path_db:
         self.cache = CacheManager(name_table=table_name,
                                   path_database=path_db)
     else:
         self.cache = CacheManager(name_table=table_name,
                                   path_database=os.path.join(
                                       os.getcwd(), "db.sqlite"))
     self.cache.setup("CREATE TABLE %s ("
                      "_url TEXT PRIMARY KEY, "
                      "code TEXT, "
                      "size TEXT, "
                      "duration TEXT);" % table_name)
Ejemplo n.º 12
0
 def __init__(self, path_db=None):
     self.ipinfo = IPInfo()
     if path_db:
         self.cache = CacheManager(name_table="ipinfo",
                                   path_database=path_db)
     else:
         self.cache = CacheManager(name_table="ipinfo",
                                   path_database=os.path.join(
                                       os.getcwd(), "db.sqlite"))
     self.cache.setup("CREATE TABLE %s ("
                      "_hostname TEXT PRIMARY KEY, "
                      "city TEXT, "
                      "country TEXT, "
                      "countryCode TEXT, "
                      "org TEXT, "
                      "query TEXT);" % "ipinfo")
Ejemplo n.º 13
0
    def __init__(self, client, logger):
        self.client = client  # Client object (give access to all configuration options)
        self.remoteHost = client.host  # Remote host (name or ip)
        self.port = int(
            client.port)  # Port (int) to which the receiver is bind
        self.address = (self.remoteHost, self.port)  # Socket address
        self.timeout = client.timeout  # No timeout for now
        self.logger = logger  # Logger object
        self.socketAMIS = None  # The socket
        self.igniter = None
        self.reader = DiskReader(PXPaths.TXQ + self.client.name,
                                 self.client.batch, self.client.validation,
                                 self.client.patternMatching,
                                 self.client.mtime, True, self.logger,
                                 eval(self.client.sorter), self.client)

        self.preamble = chr(curses.ascii.SOH) + "\r\n"
        self.endOfLineSep = "\r\r\n"
        self.endOfMessage = self.endOfLineSep + chr(
            curses.ascii.ETX) + "\r\n\n" + chr(curses.ascii.EOT)
        self.debugFile = False

        self.cacheManager = CacheManager(maxEntries=self.client.cache_size,
                                         timeout=8 * 3600)

        # AMIS's maximum bulletin size is 14000

        self.set_maxLength(self.client.maxLength)

        # statistics.
        self.totBytes = 0
        self.initialTime = time.time()
        self.finalTime = None

        self._connect()
Ejemplo n.º 14
0
    def __init__(self):
        QWidget.__init__(self)

        ssl._create_default_https_context = ssl._create_unverified_context

        self.threadaaa = MyThread(1, 1)
        self.threadaaa.start()
        self.threadaaa.singal.connect(self.setItemViews)
        self.downloadThreads = []

        self.setWindowTitle('视频下载')
        self.cacheManager = CacheManager()
        self.allDataArr = []
        # self.requet = Reques()

        # self.grid = QGridLayout()
        self.listView = QListWidget()
        self.container()
Ejemplo n.º 15
0
def load_cache_managers(cache_managers_dir):
    """
    Load cache managers from the given directory
    :param cache_managers_dir: directory where cache managers files are stored
    :return: a dict with 2 keys representing the 2 cache managers (rdf_nodes and predicates)
    """

    if cache_managers_dir[-1] != "/":
        cache_managers_dir += "/"

    cache_managers = {
        "rdf_nodes": CacheManager(),
        "predicates": CacheManager()
    }
    cache_managers["predicates"].load_from_csv(cache_managers_dir + "predicates_cache_manager.csv")
    cache_managers["rdf_nodes"].load_from_csv(cache_managers_dir + "rdf_nodes_cache_manager.csv")

    return cache_managers
Ejemplo n.º 16
0
    def get_payload(self, gene_symbol, request_type):
        # Attempt to pull payload from cache
        cs = CacheManager(self.settings['CACHE_FILE'])
        cache_key = f'{gene_symbol}_{request_type}'
        json_response = cs.fetch(cache_key)

        # Otherwise hit the API
        if not json_response:
            rs = ReqService(self.settings)
            json_response = rs.make_request(self.symbol_to_id(gene_symbol),
                                            request_type=request_type).json()

            # If request was successful, update cache
            if json_response:
                cs.update(cache_key, json_response)
            else:
                sys.exit()

        return json_response
Ejemplo n.º 17
0
 def setUp(self):
   self.manager = CacheManager(maxEntries=3, timeout=5 * 3600)
   self.files = ['db/test-file-grib', 'db/test-file-burf', 'db/test-file-bulletin']
   #f = open(self.files[0])
   #self.data = f.read(1048576)
   #while len(self.data) :
     #print self.data
     #self.data = f.read(1048576)
   #f.close()    
   self.data = 'ceci est un test'
Ejemplo n.º 18
0
 def setUp(self,logFile='log/SenderFTP.log'):
   self.logger = Logger(logFile, 'DEBUG', 'Sub')    
   self.logger = self.logger.getLogger()    
   self.client = Client('client-test',self.logger)  
   self.cacheManager = CacheManager(maxEntries=3, timeout=5 * 3600)
   #Le sender doit etre capable de connecter au serveur ftp pour continuer le test.
   self.sender = SenderFTP(self.client, self.logger, self.cacheManager)
   file1 = open("txq/client-test/3/test","w")
   file1.close()
   self.files = ['txq/client-test/3/test']
Ejemplo n.º 19
0
    def __init__(self,path,client,logger):
        gateway.gateway.__init__(self, path, client, logger)
        self.client = client
        self.establishConnection()

        self.reader = DiskReader(PXPaths.TXQ + self.client.name, 
                                 self.client.batch,            # Number of files we read each time
                                 self.client.validation,       # name validation
                                 self.client.patternMatching,  # pattern matching
                                 self.client.mtime,            # we don't check modification time
                                 True,                         # priority tree
                                 self.logger,
                                 eval(self.client.sorter),
                                 self.client)

        # Mechanism to eliminate multiple copies of a bulletin
        self.cacheManager = CacheManager(maxEntries=self.client.cache_size, timeout=8*3600)

        # WMO's maximum bulletin size is 500 000 bytes
        self.set_maxLength( self.client.maxLength )
Ejemplo n.º 20
0
 def _startFactory(self):
     log.msg('Starting the main apt_p2p application')
     self.cache_dir = FilePath(config.get('DEFAULT', 'CACHE_DIR'))
     if not self.cache_dir.child(download_dir).exists():
         self.cache_dir.child(download_dir).makedirs()
     if not self.cache_dir.child(peer_dir).exists():
         self.cache_dir.child(peer_dir).makedirs()
     self.db = DB(self.cache_dir.child('apt-p2p.db'))
     self.dht = DHT(self.dhtClass, self.db)
     df = self.dht.start()
     df.addCallback(self._dhtStarted)
     self.stats = StatsLogger(self.db)
     self.http_server = TopLevel(self.cache_dir.child(download_dir),
                                 self.db, self)
     self.http_server.getHTTPFactory().startFactory()
     self.peers = PeerManager(self.cache_dir.child(peer_dir), self.dht,
                              self.stats)
     self.mirrors = MirrorManager(self.cache_dir)
     self.cache = CacheManager(self.cache_dir.child(download_dir), self.db,
                               self)
Ejemplo n.º 21
0
def process_file(filename):
    parser = InputParser()
    parser.parse("in/" + filename + ".in")
    manager = CacheManager()
    manager.videos = parser.getVideos()
    manager.caches = parser.getCaches()
    manager.endpoints = parser.getEndPoints()
    manager.requests = parser.getRequests()
    manager.FillCaches()

    outputstring = manager.OutputString()
    outputfile = open("out/" + filename + ".out", 'w')
    outputfile.write(outputstring)
Ejemplo n.º 22
0
    def __init__(self, ingestor, bullManager, reader):

        # General Attributes

        self.bullManager = bullManager
        self.ingestor = ingestor
        self.logger = ingestor.logger
        self.reader = reader
        self.source = ingestor.source

        self.files = []
        self.data = []
        self.entry = None

        self.now = time.mktime(time.localtime())

        # a cacheManager to make sure we process a file only once

        self.cacheManager = CacheManager(maxEntries=self.source.cache_size,
                                         timeout=int(self.source.history) *
                                         3600)

        # reading the collection station config file...

        sp = StationParser.StationParser(PXPaths.STATION_TABLE)
        sp.parse()
        self.mapCollectionStation = sp.getStationsColl()

        # instantiate the collection state class

        self.collectionState = CollectionState.CollectionState(self)
        self.getState = self.collectionState.getState
        self.setState = self.collectionState.setState
        self.updateState = self.collectionState.updateCollectionState

        # instantiate the collection builder class

        self.collectionBuilder = CollectionBuilder.CollectionBuilder(self)
        self.collectionBuild = self.collectionBuilder.process
Ejemplo n.º 23
0
 def _startFactory(self):
     log.msg('Starting the main apt_p2p application')
     self.cache_dir = FilePath(config.get('DEFAULT', 'CACHE_DIR'))
     if not self.cache_dir.child(download_dir).exists():
         self.cache_dir.child(download_dir).makedirs()
     if not self.cache_dir.child(peer_dir).exists():
         self.cache_dir.child(peer_dir).makedirs()
     self.db = DB(self.cache_dir.child('apt-p2p.db'))
     self.dht = DHT(self.dhtClass, self.db)
     df = self.dht.start()
     df.addCallback(self._dhtStarted)
     self.stats = StatsLogger(self.db)
     self.http_server = TopLevel(self.cache_dir.child(download_dir), self.db, self)
     self.http_server.getHTTPFactory().startFactory()
     self.peers = PeerManager(self.cache_dir.child(peer_dir), self.dht, self.stats)
     self.mirrors = MirrorManager(self.cache_dir)
     self.cache = CacheManager(self.cache_dir.child(download_dir), self.db, self)
Ejemplo n.º 24
0
    def __init__(self,
                 path,
                 batch=20000,
                 validation=False,
                 patternMatching=False,
                 mtime=0,
                 prioTree=False,
                 logger=None,
                 sorterClass=None,
                 flow=None):
        """
        Set the root path and the sorter class used for sorting

        The regex will serve (if we use validation) to validate that the filename has the following form:
        SACN43_CWAO_012000_CYOJ_41613:ncp1:CWAO:SA:3.A.I.E::20050201200339"

        FIXME: The regex should be passed otherwise!  config file?

        """
        # self.regex = re.compile(r'^[^:]*?:[^:]*?:[^:]*?:[^:]*?:(\d)[^:]*?:[^:]*?:(\d{14})$')
        # This regex would be better than the one actually in use because it don't match the following entry:
        # "A:B:C:WXO_DAN:BAD:3:ALLO:20050714183623"
        # Surprisingly this entry match (priority in 6th position, should be in 5th) in spite of the use of *? operator ???
        # Maybe we already accept some file badly named, so it's risky to change the regex now

        self.regex = re.compile(r'^.*?:.*?:.*?:.*?:(\d).*?:.*?:(\d{14})$'
                                )  # Regex used to validate filenames
        self.path = path  # Path from where we ingest filenames
        self.flowName = os.path.basename(
            path)  # Last part of the path correspond to client/source name
        self.validation = validation  # Name Validation active (True or False)
        self.patternMatching = patternMatching  # Pattern matching active (True or False)
        self.logger = logger  # Use to log information
        self.batch = batch  # Maximum number of files that we are interested to sort
        self.mtime = mtime  # If we want to check modification time before taking a file
        self.sortedFiles = []  # Sorted filenames
        self.data = [
        ]  # Content of x filenames (x is set in getFilesContent())
        self.prioTree = prioTree  # Boolean that determine if the "priorities" structure is enforced
        self.sorterClass = sorterClass  # Sorting algorithm that will be used by sort()
        self.flow = flow  # Flow (Client, Source, Sourlient) object, only used when patternMatching is True
        maxEntries = 125000  # Flow (Client, Source, Sourlient) object, only used when patternMatching is True
        if self.flow != None: maxEntries = self.flow.cache_size
        self.cacheManager = CacheManager(maxEntries, timeout=12 *
                                         3600)  # Used to cache read entries
Ejemplo n.º 25
0
def test_load_mc(addr_list,
                 overwrite_memory=False,
                 ignore_lock=False,
                 force_set_valid=False):
    ''' Check (or redo) if all redis k-v are good. 
	- overwrite_memory: if you don't believe mm is loaded but flag shows valid, set this to true.
	- ignore_lock: still load if other server already started loading.
	- force_set_valid: overwrite valid_flags, regard as all loaded without checking. Do not use this option with other options. Run on 1 server.
	'''
    mm = CacheManager(overwrite_prefix=True,
                      overwrite_redis_servers=False,
                      rt_servers=False)
    Pretend_Valid = force_set_valid  # force change mm valid flag to 1. Careful! <set_to_be_valid>
    tasks = []
    for addr in addr_list:
        metas = [
            "osm/cache-%s-nodeid-to-lat-lng.txt" % addr,  #0
            "osm/cache-%s-nodeid-to-neighbor-nid.txt" % addr,  #1
            "osm/cache-%s-nids-to-speed.txt" % addr,  #2
            "osm/cache-%s-nid-to-elevation.txt" % addr,  #3
            "osm/cache-%s-nids-to-waytag.txt" % addr,  #4
        ]
        tasks.extend(metas)
    lock = AccessRestrictionContext(
        prefix="-test_load_mc~",
        persistent_restriction=True,
        persist_seconds=86400 * 3,
        no_restriction=not On_Cluster,
    )
    for task in tasks:
        if Pretend_Valid:
            mm.use_cache(meta_file_name=task,
                         overwrite_prefix=True,
                         set_to_be_valid=True,
                         loading_msg=task)
            continue
        with lock:
            lock.Access_Or_Skip(task)
            print(task, MyIp)
            mm.use_cache(meta_file_name=task,
                         overwrite_prefix=True,
                         overwrite_memory=overwrite_memory,
                         ignore_lock=ignore_lock,
                         loading_msg=task)

    print("Done", addr_list, MyIp)
Ejemplo n.º 26
0
    def __init__(self, ingestor, bullManager, reader ):
        
        # General Attributes

        self.bullManager   = bullManager
        self.ingestor      = ingestor
        self.logger        = ingestor.logger
        self.reader        = reader
        self.source        = ingestor.source

        self.files         = []
        self.data          = []
        self.entry         = None

        self.now           = time.mktime(time.localtime())

        # a cacheManager to make sure we process a file only once

        self.cacheManager  = CacheManager(maxEntries=self.source.cache_size, timeout=int(self.source.history)*3600)

        # reading the collection station config file...

        sp = StationParser.StationParser(PXPaths.STATION_TABLE)
        sp.parse()
        self.mapCollectionStation = sp.getStationsColl()

        # instantiate the collection state class

        self.collectionState    = CollectionState.CollectionState( self )
        self.getState           = self.collectionState.getState
        self.setState           = self.collectionState.setState
        self.updateState        = self.collectionState.updateCollectionState

        # instantiate the collection builder class

        self.collectionBuilder = CollectionBuilder.CollectionBuilder( self )
        self.collectionBuild   = self.collectionBuilder.process
Ejemplo n.º 27
0
   def __init__(self, client, logger):
      self.client     = client                        # Client object (give access to all configuration options)
      self.timeout    = client.timeout                # No timeout for now
      self.logger     = logger                        # Logger object
      self.connection = None                          # The connection
      self.igniter    = None
      self.ssl        = False
      self.reader     = DiskReader(PXPaths.TXQ  + self.client.name, self.client.batch,
                               self.client.validation, self.client.patternMatching,
                               self.client.mtime, True, self.logger, eval(self.client.sorter), self.client)

      self.debugFile    = False

      self.cacheManager = CacheManager(maxEntries=self.client.cache_size, timeout=8*3600)

      # AMQP  is there a max for message size
      # self.set_maxLength(self.client.maxLength)

      # statistics.
      self.totBytes = 0
      self.initialTime = time.time()
      self.finalTime = None

      self._connect()
Ejemplo n.º 28
0
class CollectionManager(object):
    """
    The collectionManager reads RXQ dir, classifies the bulletins:
    If they have to be ingested do it, if they have to be collected than collect and ingest.
    Unrelated files are removed. Files that are not ready to be collected stay in the RXQ dir.
    """

    def __init__(self, ingestor, bullManager, reader ):
        
        # General Attributes

        self.bullManager   = bullManager
        self.ingestor      = ingestor
        self.logger        = ingestor.logger
        self.reader        = reader
        self.source        = ingestor.source

        self.files         = []
        self.data          = []
        self.entry         = None

        self.now           = time.mktime(time.localtime())

        # a cacheManager to make sure we process a file only once

        self.cacheManager  = CacheManager(maxEntries=self.source.cache_size, timeout=int(self.source.history)*3600)

        # reading the collection station config file...

        sp = StationParser.StationParser(PXPaths.STATION_TABLE)
        sp.parse()
        self.mapCollectionStation = sp.getStationsColl()

        # instantiate the collection state class

        self.collectionState    = CollectionState.CollectionState( self )
        self.getState           = self.collectionState.getState
        self.setState           = self.collectionState.setState
        self.updateState        = self.collectionState.updateCollectionState

        # instantiate the collection builder class

        self.collectionBuilder = CollectionBuilder.CollectionBuilder( self )
        self.collectionBuild   = self.collectionBuilder.process

    #-----------------------------------------------------------------------------------------
    # check if the bulletin is defined in the collection_station dictionnary
    #-----------------------------------------------------------------------------------------

    def conformWithStationDictionary( self ):

        # working variables

        dict    = PXPaths.STATION_TABLE
        dictkey = self.entry.dictkey
        path    = self.entry.path
        station = self.entry.station

        # check if header found in the dictionnary

        if not dictkey in self.mapCollectionStation :
           self.logger.warning("Reject %s : (%s) not in %s" % (path,dictkey,dict) )
           self.unlink(path)
           return False

        # check if the station was found in bulletin

        if station == None :
           self.logger.warning("Reject %s : station %s not found" % (path,station) )
           self.unlink(path)
           return False

        # check if the station is defined in the dictionnary

        station_list = self.mapCollectionStation[dictkey]
        if not station in station_list :
           self.logger.warning("Reject %s : station %s not defined for %s in %s" % (path,station,dictkey,dict) )
           self.unlink(path)
           return False

        return True

    #-----------------------------------------------------------------------------------------
    # check if the bulletin is defined in the collecteur's configuration 
    #-----------------------------------------------------------------------------------------

    def conformWithSourceConfig( self ):

        # working variables

        bulltin = self.entry.bulletin
        data    = self.entry.data
        path    = self.entry.path
        type    = self.entry.type
        BBB     = self.entry.BBB

        name    = self.source.name

        # get bulletin type and check if configured collectable, if not ignore

        pos     = -1
        try     : pos = self.source.headers.index(type)
        except  :
                  # if bulletin type not configured collectable ignore it
                  self.logger.warning("Reject %s : (%s) not define in %s.conf header" % (path,type,name) )
                  self.unlink(path)
                  return False

        self.entry.sourceidx = pos

        # get the bulletin emission and check if time ok
        # specs says : if time not collectable or minute != 00  ingest NOW

        hour   = bulltin.emission[ 8:10]
        minute = bulltin.emission[10:12]

        if self.source.issue_hours[pos][0] != 'all' and not hour in self.source.issue_hours[pos] :
           self.logger.info("Forced ingestion (time) %s" % path )
           self.ingest(data)
           self.unlink(path)
           return False

        if minute != '00' :
           self.logger.info("Forced ingestion (time) %s" % path )
           self.ingest(data)
           self.unlink(path)
           return False

        # check if the bulletin is too early

        history = 3600 * self.source.history
        future  = -60  * self.source.future

        if bulltin.delay < future :
           self.logger.warning("Reject %s : arrived earlier than permitted (%d)" % (path,bulltin.delay) )
           self.unlink(path)
           return False

        # check if the bulletin is too old

        if bulltin.delay > history :
           self.logger.info("Forced ingestion (too old) %s" % path )
           self.ingestX(self.entry,'Z')
           self.unlink(path)
           return False

        # compute primary and cycle in secs

        primary = 60 * int(self.source.issue_primary[pos])
        cycle   = 60 * int(self.source.issue_cycle[pos])

        # if the bulletin is in its primary period
        # do nothing if it is not time for collection

        if bulltin.delay  < primary :
           if bulltin.age < primary :
              self.logger.debug("File %s will be primary soon" % path )
              return False
           self.entry.period = 0
           return True

        # the bulletin is in one of its cycle period : compute the period and time of collection

        self.entry.period = int((bulltin.delay - primary ) / cycle + 1)
        timeOfCollection  = primary + self.entry.period  * cycle

        # we can collect

        if bulltin.age >= timeOfCollection  : return True

        # it's not time to collect and the bulletin is regular or a repeat: do nothing

        if BBB == None or BBB[0] == 'R' :
           self.logger.debug("File %s will be cycle soon" % path )
           return False

        # still not time to collect BUT AT THIS POINT
        # we have an AMD or COR during one of its cycle... ingest !

        return True

    #-----------------------------------------------------------------------------------------
    # ingest one bulletin's data
    #-----------------------------------------------------------------------------------------

    def ingest( self, data ):

        self.bullManager.writeBulletinToDisk(data, True, True)

    #-----------------------------------------------------------------------------------------
    # ingestX : ingest one bulletin with a BBB having its last letter as X
    #-----------------------------------------------------------------------------------------

    def ingestX( self, entry, X ):

        # working variables

        bulltin = entry.bulletin
        BBB     = entry.BBB
        data    = entry.data
        header  = entry.header

        # create its new BBB

        cBBB = ''

        if   BBB    == None : cBBB = 'RR' + X
        elif BBB[0] == 'A'  : cBBB = 'AA' + X
        elif BBB[0] == 'C'  : cBBB = 'CC' + X
        else                : cBBB = 'RR' + X

        # rebuild data with new header

        data  = header[0] + ' ' + header[1] + ' ' + header[2] + ' ' + cBBB + '\n'
        data += string.join(bulltin.bulletin[1:],'\n')

        # ingest

        self.ingest(data)

    #-----------------------------------------------------------------------------------------
    # collection process
    #-----------------------------------------------------------------------------------------

    def process( self ):

        # setting current time
        # it is very important to set this only once for the duration of the process

        self.now = time.mktime(time.localtime())

        # updating the collection state map

        self.updateState(self.now)

        # read it all 
        # NOTE : it is important not to restrict the reading 
        #        so these lines don't use the batch option from config

        self.reader.read()

        self.logger.info("%d files in queue" % len(self.reader.sortedFiles))

        # no files : call the collectionBuilder for empty primary collection...

        if len(self.reader.sortedFiles) <= 0 : 
           self.collectionBuild(self.now)
           return

        # working variables

        self.data  = self.reader.getFilesContent()
        self.files = self.reader.sortedFiles

        # loop on all files

        for index in range(len(self.data)):

            # if the bulletin was already processed... skip it

            if self.cacheManager.has( self.data[index] ) :
               self.logger.info("File %s was cached earlier" % self.files[index] )
               self.unlink(self.files[index])
               continue

            # bulletinCollection is a class to hold bulletin if it has to be collected

            self.entry = CollectionEntry.CollectionEntry()

            # generate bulletin, set its arrival, its age ... save it into a list
            # TODO... if we have to set its arrival from clock than the file should
            #         be renamed with a few ":" and the arrival string date at the end

            bulltin = bulletin.bulletin(self.data[index],self.logger)

            try     : bulltin.setArrivalStr(self.files[index].split(':')[6])
            except  : bulltin.setArrivalEp (self.now)

            bulltin.compute_Age(self.now)

            # check if bulletin and all its date are ok

            if bulltin.errorBulletin != None or \
               bulltin.arrival       == None or bulltin.emission      == None or \
               bulltin.delay         == None or bulltin.age           == None or \
               bulltin.ep_arrival    ==   -1 or bulltin.ep_emission   ==   -1  :
               self.logger.warning("File %s had a problem...(most probably date)" % self.files[index] )
               self.unlink(self.files[index])
               continue

            # put info in bulletinCollection entry

            self.entry.path      = self.files[index]
            self.entry.data      = self.data[index]

            self.entry.bulletin  = bulltin
            self.entry.header    = bulltin.getHeader().split()
            self.entry.type      = bulltin.getType()
            self.entry.BBB       = bulltin.getBBB()
            self.entry.station   = bulltin.getStation()

            self.entry.dictkey   = self.entry.header[0] + ' ' + self.entry.header[1]
            self.entry.statekey  = self.entry.header[0] + '_' + self.entry.header[1]  + '_' + self.entry.header[2]

            # info initialize and set later

            self.entry.sourceidx = -1
            self.entry.period    = -1

            # check if the bulletin is defined in the collection_station dictionnary

            if not self.conformWithStationDictionary( ) : continue

            # check if the bulletin is defined in the collecteur's configuration 

            if not self.conformWithSourceConfig( ) : continue

            # check if the bulletin is not in conflict with its collection state

            self.addToCollectionState( index )

        # all files are classified... build collections if needed
        # saving the collection state map if needed

        self.collectionBuild(self.now)

    #-----------------------------------------------------------------------------------------
    # add the bulletin to the collection state map
    #-----------------------------------------------------------------------------------------

    def addToCollectionState( self, index ):

        # working variables

        path    = self.entry.path
        header  = self.entry.header
        BBB     = self.entry.BBB
        key     = self.entry.statekey

        # get MapCollectionState value

        ( period, amendement, correction, retard, Primary, Cycle ) = self.getState(key)

        # the bulletin is not a primary

        if self.entry.period >  0 :
           Cycle.append(self.entry)
           self.logger.debug("File %d %s : classified as Cycle" % (index,path) )
           self.setState( key, period, amendement, correction, retard, Primary, Cycle )
           return

        # the bulletin is primary
        # we have a problem if its primary collection was done...
        # some possibilities : we were in recovery mode and some files were moved after a collecteur iteration
        #                      we did not process enough files check the source.batch value

        if period == 0 :
           self.logger.warning("Reject %d %s : primary already done" % (index,path) )
           self.unlink(path)
           return

        # primary bulletin are splitted...
        # normal and repeated       are place in Primary
        # amendement and correction are place in Cycle
        # the primary is collected and ingested first than
        # the entries in cycle are ingested per period order 0,1,...
        # this garanty that AMD and COR (even primary) are sent after 
        # the primary collection

        if    BBB    == None :
              Primary.append(self.entry)
              self.logger.debug("File %d %s : classified as Primary" % (index,path) )

        elif  BBB[0] == 'R'  :
              Primary.append(self.entry)
              self.logger.debug("File %d %s : classified as Primary" % (index,path) )

        else                 : 
              Cycle.append(self.entry)
              self.logger.debug("File %d %s : classified as Cycle  " % (index,path) )

        self.setState( key, period, amendement, correction, retard, Primary, Cycle )

    #-----------------------------------------------------------------------------------------
    # unlink a file
    #-----------------------------------------------------------------------------------------

    def unlink( self, path ):

        try:
               os.unlink(path)
               self.logger.debug("%s has been erased", os.path.basename(path))
        except OSError, e:
               (type, value, tb) = sys.exc_info()
               self.logger.error("Unable to unlink %s ! Type: %s, Value: %s" % (path, type, value))
Ejemplo n.º 29
0
    def ingestSingleFile(self, igniter):
        from DiskReader import DiskReader
        from DirectRoutingParser import DirectRoutingParser
        from PullFTP import PullFTP

        if self.source.routemask:
            self.drp = DirectRoutingParser(self.source.routingTable,
                                           self.allNames, self.logger,
                                           self.source.routing_version)
            self.drp.parse()

        if self.source.nodups:
            self.fileCache = CacheManager(maxEntries=self.source.cache_size,
                                          timeout=8 * 3600)

        reader = DiskReader(self.ingestDir, self.source.batch,
                            self.source.validation,
                            self.source.patternMatching, self.source.mtime,
                            False, self.source.logger, self.source.sorter,
                            self.source)

        sleep_sec = 1
        if self.source.type == 'pull-file' or self.source.pull_script != None:
            sleep_sec = self.source.pull_sleep

        while True:
            if igniter.reloadMode == True:
                # We assign the defaults, reread configuration file for the source
                # and reread all configuration file for the clients (all this in __init__)
                if self.source.type == 'filter':
                    self.source.__init__(self.source.name, self.source.logger,
                                         True, True)
                else:
                    self.source.__init__(self.source.name, self.source.logger)

                if self.source.routemask:
                    self.drp = DirectRoutingParser(self.source.routingTable,
                                                   self.allNames, self.logger)
                    self.drp.parse()

                if self.source.nodups:
                    self.fileCache = CacheManager(
                        maxEntries=self.source.cache_size, timeout=8 * 3600)

                reader = DiskReader(self.ingestDir, self.source.batch,
                                    self.source.validation,
                                    self.source.patternMatching,
                                    self.source.mtime, False,
                                    self.source.logger, self.source.sorter,
                                    self.source)
                self.logger.info("Receiver has been reloaded")
                igniter.reloadMode = False

            # pull files in rxq directory if in pull mode
            if self.source.type == 'pull-file' or self.source.pull_script != None:
                files = []
                sleeping = os.path.isfile(PXPaths.RXQ + self.source.name +
                                          '/.sleep')

                if self.source.type == 'pull-file':
                    puller = PullFTP(self.source, self.logger, sleeping)
                    files = puller.get()
                    puller.close()
                elif self.source.pull_script != None:
                    files = self.source.pull_script(self.source, self.logger,
                                                    sleeping)

                if not sleeping:
                    self.logger.debug("Number of files pulled = %s" %
                                      len(files))
                else:
                    self.logger.info("This pull is sleeping")

            # normal diskreader call for files
            reader.read()
            if len(reader.sortedFiles) <= 0:
                time.sleep(sleep_sec)
                continue

            sortedFiles = reader.sortedFiles[:self.source.batch]

            # processing the list if necessary...

            if self.source.lx_execfile != None:
                sfiles = []
                sfiles.extend(sortedFiles)
                self.logger.info("%d files process with lx_script" %
                                 len(sfiles))
                sortedFiles = self.source.run_lx_script(
                    sfiles, self.source.logger)

            self.logger.info("%d files will be ingested" % len(sortedFiles))

            for file in sortedFiles:
                self.ingestFile(file)
Ejemplo n.º 30
0
 def activeCache(self, cachedir):
     """キャッシュ保存先の設定とキャッシュ保存関数の有効化"""
     self.cm = CacheManager(cachedir)
Ejemplo n.º 31
0
	def activeCache(self, cachedir):
		"""キャッシュ保存先の設定とキャッシュ保存関数の有効化"""
		self.cm = CacheManager(cachedir)
Ejemplo n.º 32
0
class AptP2P(protocol.Factory):
    """The main code object that does all of the work.
    
    Contains all of the sub-components that do all the low-level work, and
    coordinates communication between them.
    
    @type dhtClass: L{interfaces.IDHT}
    @ivar dhtClass: the DHT class to use
    @type cache_dir: L{twisted.python.filepath.FilePath}
    @ivar cache_dir: the directory to use for storing all files
    @type db: L{db.DB}
    @ivar db: the database to use for tracking files and hashes
    @type dht: L{DHTManager.DHT}
    @ivar dht: the manager for DHT requests
    @type stats: L{stats.StatsLogger}
    @ivar stats: the statistics logger to record sent data to
    @type http_server: L{HTTPServer.TopLevel}
    @ivar http_server: the web server that will handle all requests from apt
        and from other peers
    @type peers: L{PeerManager.PeerManager}
    @ivar peers: the manager of all downloads from mirrors and other peers
    @type mirrors: L{MirrorManager.MirrorManager}
    @ivar mirrors: the manager of downloaded information about mirrors which
        can be queried to get hashes from file names
    @type cache: L{CacheManager.CacheManager}
    @ivar cache: the manager of all downloaded files
    @type my_addr: C{string}, C{int}
    @ivar my_addr: the IP address and port of this peer
    """
    
    def __init__(self, dhtClass):
        """Initialize all the sub-components.
        
        @type dhtClass: L{interfaces.IDHT}
        @param dhtClass: the DHT class to use
        """
        log.msg('Initializing the main apt_p2p application')
        self.dhtClass = dhtClass
        self.my_addr = None

    #{ Factory interface
    def startFactory(self):
        reactor.callLater(0, self._startFactory)
        
    def _startFactory(self):
        log.msg('Starting the main apt_p2p application')
        self.cache_dir = FilePath(config.get('DEFAULT', 'CACHE_DIR'))
        if not self.cache_dir.child(download_dir).exists():
            self.cache_dir.child(download_dir).makedirs()
        if not self.cache_dir.child(peer_dir).exists():
            self.cache_dir.child(peer_dir).makedirs()
        self.db = DB(self.cache_dir.child('apt-p2p.db'))
        self.dht = DHT(self.dhtClass, self.db)
        df = self.dht.start()
        df.addCallback(self._dhtStarted)
        self.stats = StatsLogger(self.db)
        self.http_server = TopLevel(self.cache_dir.child(download_dir), self.db, self)
        self.http_server.getHTTPFactory().startFactory()
        self.peers = PeerManager(self.cache_dir.child(peer_dir), self.dht, self.stats)
        self.mirrors = MirrorManager(self.cache_dir)
        self.cache = CacheManager(self.cache_dir.child(download_dir), self.db, self)
    
    def _dhtStarted(self, result):
        """Save the returned address and start scanning the cache."""
        self.my_addr = result
        self.cache.scanDirectories()
        
    def stopFactory(self):
        log.msg('Stoppping the main apt_p2p application')
        self.http_server.getHTTPFactory().stopFactory()
        self.mirrors.cleanup()
        self.stats.save()
        self.db.close()
    
    def buildProtocol(self, addr):
        return self.http_server.getHTTPFactory().buildProtocol(addr)

    #{ Other functions
    def getStats(self):
        """Retrieve and format the statistics for the program.
        
        @rtype: C{string}
        @return: the formatted HTML page containing the statistics
        """
        out = '<html><body>\n\n'
        out += self.stats.formatHTML(self.my_addr)
        out += '\n\n'
        out += self.dht.getStats()
        out += '\n</body></html>\n'
        return out

    #{ Main workflow
    def get_resp(self, req, url, orig_resp = None):
        """Lookup a hash for the file in the local mirror info.
        
        Starts the process of getting a response to an apt request.
        
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        @type orig_resp: L{twisted.web2.http.Response}
        @param orig_resp: the response from the cache to be sent to apt
            (optional, ignored if missing)
        @rtype: L{twisted.internet.defer.Deferred}
        @return: a deferred that will be called back with the response
        """
        d = defer.Deferred()
        
        log.msg('Trying to find hash for %s' % url)
        findDefer = self.mirrors.findHash(unquote(url))
        
        findDefer.addCallbacks(self.findHash_done, self.findHash_error, 
                               callbackArgs=(req, url, orig_resp, d),
                               errbackArgs=(req, url, orig_resp, d))
        return d
    
    def findHash_error(self, failure, req, url, orig_resp, d):
        """Process the error in hash lookup by returning an empty L{HashObject}."""
        log.msg('Hash lookup for %s resulted in an error: %s' %
                (url, failure.getErrorMessage()))
        self.findHash_done(HashObject(), req, url, orig_resp, d)
        
    def findHash_done(self, hash, req, url, orig_resp, d):
        """Use the returned hash to lookup the file in the cache.
        
        If the hash was not found, the workflow skips down to download from
        the mirror (L{startDownload}), or checks the freshness of an old
        response if there is one.
        
        @type hash: L{Hash.HashObject}
        @param hash: the hash object containing the expected hash for the file
        """
        if hash.expected() is None:
            log.msg('Hash for %s was not found' % url)
            # Send the old response or get a new one
            if orig_resp:
                self.check_freshness(req, url, orig_resp, d)
            else:
                self.startDownload([], req, hash, url, d)
        else:
            log.msg('Found hash %s for %s' % (hash.hexexpected(), url))
            
            # Lookup hash in cache
            locations = self.db.lookupHash(hash.expected(), filesOnly = True)
            self.getCachedFile(hash, req, url, d, locations)

    def check_freshness(self, req, url, orig_resp, d):
        """Send a HEAD to the mirror to check if the response from the cache is still valid.
        
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        @type orig_resp: L{twisted.web2.http.Response}
        @param orig_resp: the response from the cache to be sent to apt
        """
        log.msg('Checking if %s is still fresh' % url)
        modtime = orig_resp.headers.getHeader('Last-Modified')
        headDefer = self.peers.get(HashObject(), url, method = "HEAD",
                                   modtime = modtime)
        headDefer.addCallbacks(self.check_freshness_done,
                               self.check_freshness_error,
                               callbackArgs = (req, url, orig_resp, d),
                               errbackArgs = (req, url, d))
    
    def check_freshness_done(self, resp, req, url, orig_resp, d):
        """Return the fresh response, if stale start to redownload.
        
        @type resp: L{twisted.web2.http.Response}
        @param resp: the response from the mirror to the HEAD request
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        @type orig_resp: L{twisted.web2.http.Response}
        @param orig_resp: the response from the cache to be sent to apt
        """
        if resp.code == 304:
            log.msg('Still fresh, returning: %s' % url)
            d.callback(orig_resp)
        else:
            log.msg('Stale, need to redownload: %s' % url)
            self.startDownload([], req, HashObject(), url, d)
    
    def check_freshness_error(self, err, req, url, d):
        """Mirror request failed, continue with download.
        
        @param err: the response from the mirror to the HEAD request
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        """
        log.err(err)
        self.startDownload([], req, HashObject(), url, d)
    
    def getCachedFile(self, hash, req, url, d, locations):
        """Try to return the file from the cache, otherwise move on to a DHT lookup.
        
        @type locations: C{list} of C{dictionary}
        @param locations: the files in the cache that match the hash,
            the dictionary contains a key 'path' whose value is a
            L{twisted.python.filepath.FilePath} object for the file.
        """
        if not locations:
            log.msg('Failed to return file from cache: %s' % url)
            self.lookupHash(req, hash, url, d)
            return
        
        # Get the first possible location from the list
        file = locations.pop(0)['path']
        log.msg('Returning cached file: %s' % file.path)
        
        # Get it's response
        resp = static.File(file.path).renderHTTP(req)
        if isinstance(resp, defer.Deferred):
            resp.addBoth(self._getCachedFile, hash, req, url, d, locations)
        else:
            self._getCachedFile(resp, hash, req, url, d, locations)
        
    def _getCachedFile(self, resp, hash, req, url, d, locations):
        """Check the returned response to be sure it is valid."""
        if isinstance(resp, failure.Failure):
            log.msg('Got error trying to get cached file')
            log.err(resp)
            # Try the next possible location
            self.getCachedFile(hash, req, url, d, locations)
            return
            
        log.msg('Cached response: %r' % resp)
        
        if resp.code >= 200 and resp.code < 400:
            d.callback(resp)
        else:
            # Try the next possible location
            self.getCachedFile(hash, req, url, d, locations)

    def lookupHash(self, req, hash, url, d):
        """Lookup the hash in the DHT."""
        log.msg('Looking up hash in DHT for file: %s' % url)
        key = hash.expected()
        lookupDefer = self.dht.get(key)
        lookupDefer.addBoth(self.startDownload, req, hash, url, d)

    def startDownload(self, values, req, hash, url, d):
        """Start the download of the file.
        
        The download will be from peers if the DHT lookup succeeded, or
        from the mirror otherwise.
        
        @type values: C{list} of C{dictionary}
        @param values: the returned values from the DHT containing peer
            download information
        """
        # Remove some headers Apt sets in the request
        req.headers.removeHeader('If-Modified-Since')
        req.headers.removeHeader('Range')
        req.headers.removeHeader('If-Range')
        
        if not isinstance(values, list) or not values:
            if not isinstance(values, list):
                log.msg('DHT lookup for %s failed with error %r' % (url, values))
            else:
                log.msg('Peers for %s were not found' % url)
            getDefer = self.peers.get(hash, url)
#            getDefer.addErrback(self.final_fallback, hash, url)
            getDefer.addCallback(self.cache.save_file, hash, url)
            getDefer.addErrback(self.cache.save_error, url)
            getDefer.addCallbacks(d.callback, d.errback)
        else:
            log.msg('Found peers for %s: %r' % (url, values))
            # Download from the found peers
            getDefer = self.peers.get(hash, url, values)
            getDefer.addCallback(self.check_response, hash, url)
            getDefer.addCallback(self.cache.save_file, hash, url)
            getDefer.addErrback(self.cache.save_error, url)
            getDefer.addCallbacks(d.callback, d.errback)
            
    def check_response(self, response, hash, url):
        """Check the response from peers, and download from the mirror if it is not."""
        if response.code < 200 or response.code >= 300:
            log.msg('Download from peers failed, going to direct download: %s' % url)
            getDefer = self.peers.get(hash, url)
#            getDefer.addErrback(self.final_fallback, hash, url)
            return getDefer
        return response
        
    def final_fallback(self, err, hash, url):
        """Final retry if the mirror still generated an error."""
        log.msg('Download from mirror failed, retrying once only: %s' % url)
        log.err(err)
        getDefer = self.peers.get(hash, url)
        return getDefer
        
    def new_cached_file(self, file_path, hash, new_hash, url = None, forceDHT = False):
        """Add a newly cached file to the mirror info and/or the DHT.
        
        If the file was downloaded, set url to the path it was downloaded for.
        Doesn't add a file to the DHT unless a hash was found for it
        (but does add it anyway if forceDHT is True).
        
        @type file_path: L{twisted.python.filepath.FilePath}
        @param file_path: the location of the file in the local cache
        @type hash: L{Hash.HashObject}
        @param hash: the original (expected) hash object containing also the
            hash of the downloaded file
        @type new_hash: C{boolean}
        @param new_hash: whether the has was new to this peer, and so should
            be added to the DHT
        @type url: C{string}
        @param url: the URI of the location of the file in the mirror
            (optional, defaults to not adding the file to the mirror info)
        @type forceDHT: C{boolean}
        @param forceDHT: whether to force addition of the file to the DHT
            even if the hash was not found in a mirror
            (optional, defaults to False)
        """
        if url:
            self.mirrors.updatedFile(url, file_path)
        
        if self.my_addr and hash and new_hash and (hash.expected() is not None or forceDHT):
            return self.dht.store(hash)
        return None
Ejemplo n.º 33
0
    def ingestBulletinFile(self, igniter):
        from DiskReader import DiskReader
        import bulletinManager
        import bulletinManagerAm
        from PullFTP import PullFTP

        sleep_sec = 1
        if self.source.type == 'pull-bulletin' or self.source.pull_script != None:
            sleep_sec = self.source.pull_sleep

        bullManager = bulletinManager.bulletinManager(
            self.ingestDir, self.logger, self.ingestDir, 99999, '\n',
            self.source.extension, self.source.routingTable,
            self.source.mapEnteteDelai, self.source,
            self.source.addStationInFilename)

        if self.source.bulletin_type == 'am':
            bullManager = bulletinManagerAm.bulletinManagerAm(
                self.ingestDir, self.logger, self.ingestDir, 99999, '\n',
                self.source.extension, self.source.routingTable,
                self.source.addSMHeader, PXPaths.STATION_TABLE,
                self.source.mapEnteteDelai, self.source,
                self.source.addStationInFilename)

        if self.source.nodups:
            self.fileCache = CacheManager(maxEntries=self.source.cache_size,
                                          timeout=8 * 3600)

        reader = DiskReader(bullManager.pathSource, self.source.batch,
                            self.source.validation,
                            self.source.patternMatching, self.source.mtime,
                            False, self.source.logger, self.source.sorter,
                            self.source)
        while True:
            # If a SIGHUP signal is received ...
            if igniter.reloadMode == True:
                # We assign the defaults, reread configuration file for the source
                # and reread all configuration file for the clients (all this in __init__)
                if self.source.type == 'filter-bulletin':
                    self.source.__init__(self.source.name, self.source.logger,
                                         True, True)
                else:
                    self.source.__init__(self.source.name, self.source.logger)

                bullManager = bulletinManager.bulletinManager(
                    self.ingestDir, self.logger, self.ingestDir, 99999, '\n',
                    self.source.extension, self.source.routingTable,
                    self.source.mapEnteteDelai, self.source,
                    self.source.addStationInFilename)

                if self.source.bulletin_type == 'am':
                    bullManager = bulletinManagerAm.bulletinManagerAm(
                        self.ingestDir, self.logger, self.ingestDir, 99999,
                        '\n', self.source.extension, self.source.routingTable,
                        self.source.addSMHeader, PXPaths.STATION_TABLE,
                        self.source.mapEnteteDelai, self.source,
                        self.source.addStationInFilename)

                if self.source.nodups:
                    self.fileCache = CacheManager(
                        maxEntries=self.source.cache_size, timeout=8 * 3600)

                reader = DiskReader(bullManager.pathSource, self.source.batch,
                                    self.source.validation,
                                    self.source.patternMatching,
                                    self.source.mtime, False,
                                    self.source.logger, self.source.sorter,
                                    self.source)

                self.logger.info("Receiver has been reloaded")
                igniter.reloadMode = False

            # pull files in rxq directory if in pull mode
            if self.source.type == 'pull-bulletin' or self.source.pull_script != None:
                files = []
                sleeping = os.path.isfile(PXPaths.RXQ + self.source.name +
                                          '/.sleep')

                if self.source.type == 'pull-bulletin':
                    puller = PullFTP(self.source, self.logger, sleeping)
                    files = puller.get()
                    puller.close()
                elif self.source.pull_script != None:
                    files = self.source.pull_script(self.source, self.logger,
                                                    sleeping)

                if not sleeping:
                    self.logger.debug("Number of files pulled = %s" %
                                      len(files))
                else:
                    self.logger.info("This pull is sleeping")

            # normal diskreader call for files
            reader.read()

            # processing the list if necessary...

            if self.source.lx_execfile != None and len(reader.sortedFiles) > 0:
                sfiles = []
                sfiles.extend(reader.sortedFiles)
                self.logger.info("%d files process with lx_script" %
                                 len(sfiles))
                sortedFiles = self.source.run_lx_script(
                    sfiles, self.source.logger)
                reader.sortedFiles = sortedFiles

            # continue normally
            data = reader.getFilesContent(reader.batch)

            if len(data) == 0:
                time.sleep(sleep_sec)
                continue
            else:
                self.logger.info("%d bulletins will be ingested", len(data))

            # Write (and name correctly) the bulletins to disk, erase them after
            for index in range(len(data)):

                # ignore duplicate if requiered
                duplicate = self.source.nodups and self.fileCache.find(
                    data[index], 'md5') is not None

                #nb_bytes = len(data[index])
                #self.logger.info("Lecture de %s: %d bytes" % (reader.sortedFiles[index], nb_bytes))
                if not duplicate:

                    # converting the file if necessary
                    if self.source.fx_execfile != None:

                        file = reader.sortedFiles[index]
                        fxfile = self.source.run_fx_script(
                            file, self.source.logger)

                        # convertion did not work
                        if fxfile == None:
                            self.logger.warning(
                                "FX script ignored the file : %s" %
                                os.path.basename(file))
                            os.unlink(file)
                            continue

                        # file already in proper format
                        elif fxfile == file:
                            self.logger.warning(
                                "FX script kept the file as is : %s" %
                                os.path.basename(file))

                        # file converted...
                        else:
                            self.logger.info("FX script modified %s to %s " %
                                             (os.path.basename(file),
                                              os.path.basename(fxfile)))
                            os.unlink(file)
                            fp = open(fxfile, 'r')
                            dx = fp.read()
                            fp.close()
                            reader.sortedFiles[index] = fxfile
                            data[index] = dx

                    # writing/ingesting the bulletin
                    if isinstance(bullManager,
                                  bulletinManagerAm.bulletinManagerAm):
                        bullManager.writeBulletinToDisk(data[index], True)
                    else:
                        bullManager.writeBulletinToDisk(
                            data[index], True, True)

                try:
                    file = reader.sortedFiles[index]
                    os.unlink(file)
                    if duplicate:
                        self.logger.info("suppressed duplicate file %s",
                                         os.path.basename(file))
                    self.logger.debug("%s has been erased",
                                      os.path.basename(file))
                except OSError, e:
                    (type, value, tb) = sys.exc_info()
                    self.logger.error(
                        "Unable to unlink %s ! Type: %s, Value: %s" %
                        (reader.sortedFiles[index], type, value))
Ejemplo n.º 34
0
class unittest_CacheManager(unittest.TestCase):
    def setUp(self):
        self.manager = CacheManager(maxEntries=3, timeout=5 * 3600)
        self.files = [
            'db/test-file-grib', 'db/test-file-burf', 'db/test-file-bulletin'
        ]
        #f = open(self.files[0])
        #self.data = f.read(1048576)
        #while len(self.data) :
        #print self.data
        #self.data = f.read(1048576)
        #f.close()
        self.data = 'ceci est un test'

    def test_CacheManager(self):
        self.assertEqual(self.manager.get_md5_from_file(self.files[0]),
                         '57285445a1c80023b3f2e96546754d5b')
        self.manager.find(self.data, 'md5')
        self.manager.find(self.files[1])
        #md5 of self.data = 11b35a0201513381dcdd130831f702d0
        self.assertEqual(self.manager.has('11b35a0201513381dcdd130831f702d0'),
                         True)
        self.assertEqual(self.manager.has(self.files[2]), False)
        self.manager.find(self.data, 'md5')
        self.manager.find(self.files[1])
        self.manager.find(self.files[2])
        self.assertEqual(self.manager.getStats(), ({1: 1, 2: 2}, 2.0, 5.0))
        self.manager.find(self.files[0])
        time.sleep(3)
        self.manager.timeoutClear(3)
        self.assertEqual(self.manager.cache, {})
Ejemplo n.º 35
0
    def ingestSingleFile(self, igniter):
        from DiskReader import DiskReader
        from DirectRoutingParser import DirectRoutingParser
        from PullFTP import PullFTP

        if self.source.routemask :
           self.drp = DirectRoutingParser(self.source.routingTable, self.allNames, self.logger, self.source.routing_version)
           self.drp.parse()

        if self.source.nodups :
           self.fileCache = CacheManager(maxEntries=self.source.cache_size, timeout=8*3600)

        reader = DiskReader(self.ingestDir, self.source.batch, self.source.validation, self.source.patternMatching,
                            self.source.mtime, False, self.source.logger, self.source.sorter, self.source)

        sleep_sec = 1
        if self.source.type == 'pull-file' or self.source.pull_script != None  : sleep_sec = self.source.pull_sleep

        while True:
            if igniter.reloadMode == True:
                # We assign the defaults, reread configuration file for the source
                # and reread all configuration file for the clients (all this in __init__)
                if self.source.type == 'filter' : 
                       self.source.__init__(self.source.name, self.source.logger, True, True)
                else :
                       self.source.__init__(self.source.name, self.source.logger)

                if self.source.routemask :
                   self.drp = DirectRoutingParser(self.source.routingTable, self.allNames, self.logger)
                   self.drp.parse()

                if self.source.nodups :
                   self.fileCache = CacheManager(maxEntries=self.source.cache_size, timeout=8*3600)

                reader = DiskReader(self.ingestDir, self.source.batch, self.source.validation, self.source.patternMatching,
                                    self.source.mtime, False, self.source.logger, self.source.sorter, self.source)
                self.logger.info("Receiver has been reloaded")
                igniter.reloadMode = False

            # pull files in rxq directory if in pull mode
            if self.source.type == 'pull-file' or self.source.pull_script != None :
               files    = []
               sleeping = os.path.isfile(PXPaths.RXQ + self.source.name + '/.sleep')

               if self.source.type == 'pull-file' :
                  puller = PullFTP(self.source,self.logger,sleeping)
                  files  = puller.get()
                  puller.close()
               elif self.source.pull_script != None :
                  files  = self.source.pull_script(self.source,self.logger,sleeping)

               if not sleeping :
                  self.logger.debug("Number of files pulled = %s" % len(files) )
               else :
                  self.logger.info("This pull is sleeping")

            # normal diskreader call for files
            reader.read()
            if len(reader.sortedFiles) <= 0:
               time.sleep(sleep_sec)
               continue

            sortedFiles = reader.sortedFiles[:self.source.batch]

            # processing the list if necessary... 

            if self.source.lx_execfile != None :
               sfiles = []
               sfiles.extend(sortedFiles)
               self.logger.info("%d files process with lx_script" % len(sfiles))
               sortedFiles = self.source.run_lx_script(sfiles,self.source.logger)

            self.logger.info("%d files will be ingested" % len(sortedFiles))

            for file in sortedFiles:
                self.ingestFile(file)
Ejemplo n.º 36
0
addpath = mypydir + "../zyrcode/"  # https://github.com/zyrgit/GreenRouteCode
if addpath not in sys.path: sys.path.append(addpath)
from mygmaps import GoogleMaps
from myosrm import gen_map_html_from_path_list
from common.osrm import get_fuel_given_latlng_list

addpath = mypydir + "../zyrcode/code/"
if addpath not in sys.path: sys.path.append(addpath)
from constants import *  # addr2ip
''' Mapping green routes and google's to sumo net.
https://sumo.dlr.de/wiki/Tools/Routes#tracemapper.py
'''

gmaps = GoogleMaps()  # need your API key!
_Cache_Dir = "cache/"
mm_nid2latlng = CacheManager(overwrite_prefix=True)  # need redis


def gen_edges_tracemapper(traceFile, netFile, routeFile):
    cmd = SUMO_Tools_dir + "/route/tracemapper.py -v --geo --fill-gaps --delta 10 -n %s -t %s -o %s" % (
        netFile, traceFile, routeFile)
    print("\n" + cmd)
    subprocess.call(cmd, shell=True)


def fix_edges_routecheck(netFile, routeFile):
    cmd = SUMO_Tools_dir + "/route/routecheck.py -v --fix --inplace --net %s %s" % (
        netFile, routeFile)
    print("\n" + cmd)
    subprocess.call(cmd, shell=True)
Ejemplo n.º 37
0
class Ingestor(object):
    """
    Normally, an Ingestor will be in a Source. It can also be used for the only reason that this object has
    access to all the configuration options of the clients. For this particular case, source=None.
    """

    def __init__(self, source=None, logger=None):
        
        # General Attributes
        self.source = source
        self.reader = None
        self.drp    = None
        self.count  = 0
        self.Mcount = 99999

        if source is not None:
            self.ingestDir = PXPaths.RXQ + self.source.name
            if self.source.type == 'filter' or self.source.type == 'filter-bulletin' :
               self.ingestDir = PXPaths.FXQ + self.source.name
            self.logger = source.logger
        elif logger is not None:
            self.logger = logger
        self.pxManager = PXManager()              # Create a manager
        self.pxManager.setLogger(self.logger)     # Give the logger to the the manager
        self.pxManager.initNames()                # Set rx and tx names
        self.clientNames = self.pxManager.getTxNames()         # Obtains the list of client's names (the ones to wich we can link files)
        self.filterNames = self.pxManager.getFxNames()         # Obtains the list of filter's names (the ones to wich we can link files)
        if source is not None:
           if self.source.name in self.filterNames : self.filterNames.remove(self.source.name)
        self.sourlientNames = self.pxManager.getTRxNames()     # Obtains the list of sourlient's names (the ones to wich we can link files)
        self.allNames = self.clientNames + self.filterNames + self.sourlientNames # Clients + Sourlients names
        self.clients = {}   # All the Client/Filter/Sourlient objects
        self.fileCache = None                                                    # product processed.
        self.dbDirsCache = CacheManager(maxEntries=200000, timeout=25*3600)      # Directories created in the DB
        self.clientDirsCache = CacheManager(maxEntries=25000, timeout=2*3600)    # File ingested in RXQ
        self.feedNames = []  # source to feed
        self.feeds = {}
        if source is not None:
           self.logger.info("Ingestor (source %s) can link files to clients: %s" % (source.name, self.allNames))

    def setFeeds(self, feedNames ):
        from Source import Source
        sources = self.pxManager.getRxNames()
        for name in feedNames :
            if not name in sources : continue
            instant = Source(name, self.logger, False)
            if instant.type == 'am' or instant.type == 'amqp' or instant.type == 'wmo' :
               self.logger.warning("Feed (source %s) will be ignored  (type %s)" % (name, instant.type) )
               continue
            self.feedNames.append(name)
            self.feeds[name] = instant
        self.logger.info("Ingestor (source %s) can link files to receiver: %s" % (self.source.name, self.feedNames))

    def createDir(self, dir, cacheManager):
        if cacheManager.find(dir) == None:
            try:
                os.makedirs(dir, 01775)
            except OSError:
                (type, value, tb) = sys.exc_info()
                self.logger.debug("Problem when creating dir (%s) => Type: %s, Value: %s" % (dir, type, value)) 

    def setClients(self):
        """"
        Set a dictionnary of Clients. Main usage will be to access value of 
        configuration options (mainly masks) of the Client objects.
        """
        from Source    import Source
        from Sourlient import Sourlient
        for name in self.clientNames:
            self.clients[name] = Client(name, self.logger)
        for name in self.filterNames :
            self.clients[name] = Source(name, self.logger, False, True)
        for name in self.sourlientNames:
            self.clients[name] = Sourlient(name, self.logger, False)
            #self.clients[name].readConfig()
            #print self.clients[name].masks

    def getIngestName(self, receptionName):
        """
        Map reception name to ingest name, based on the source configuration.

        This just inserts missing fields, like whattopds. DUMB!
        FIXME: Have a library of functions, configurable per source, to
        perform the mapping, perhaps using rmasks ? & other args.
        """
        receptionNameParts = receptionName.split(':')
        extensionParts = self.source.extension.split(':')

        if len(receptionNameParts) > 6 :
           receptionNameParts = receptionNameParts[:6]
           self.logger.warning("File %s truncated to %s" % (receptionName,':'.join(receptionNameParts) ) )

        for i in range(1,6):
            if len(receptionNameParts) == i :
                 receptionNameParts = receptionNameParts + [extensionParts[i]]
            elif receptionNameParts[i] == '':
                 receptionNameParts[i] = extensionParts[i]
        receptionNameParts = receptionNameParts + [time.strftime("%Y%m%d%H%M%S", time.gmtime())]
        return string.join(receptionNameParts,':')

    def getClientQueueName(self, clientName, ingestName, priority=None):
        """
        Return the directory into which a file of a given priority should be placed.
        Layout used is: /apps/px/txq/<client>/<priority>/YYYYmmddhh
        """
        parts = ingestName.split(':')
        if not priority:
            priority = parts[4].split('.')[0]

        clientpathName = PXPaths.TXQ + clientName + '/' + str(priority) + '/' + time.strftime("%Y%m%d%H", time.gmtime()) + '/' + ingestName

        if clientName in self.filterNames :
           clientpathName = PXPaths.FXQ + clientName + '/' + ingestName

        return clientpathName

    def getDBName(self, ingestName):
        """
        Given an ingest name, return a relative database name

        Given a file name of the form:
            what : ori_system : ori_site : data_type : format :
            link it to:
                db/<today>/data_type/ori_system/ori_site/ingestName
            (same pattern as PDS)

        NB: see notes/tests.txt for why the date/time is recalculated everytime.
        """
        if ingestName.count(':') >= 4:
            today = time.strftime("%Y%m%d", time.gmtime())
            dirs = ingestName.split(':')
            return PXPaths.DB + today + '/' + dirs[3] + '/' + dirs[1] + '/' + dirs[2] + '/' + ingestName
        else:
            return ''

    def getRouteKey(self, filename):
        """
        Given an ingest name, return a route key based on the imask given
        """
        # check against the masks
        for mask in self.source.masks:
            # no match
            if not mask[3].match(filename) : continue

            # reject
            if not mask[4] : return None

            # accept... so key generation
            parts = re.findall( mask[0], filename )
            if len(parts) == 2 and parts[1] == '' : parts.pop(1)
            if len(parts) != 1 : continue
            key = parts[0]
            if isinstance(parts[0],tuple) : key = '_'.join(parts[0])
            self.logger.debug("RouteKey Key = %s  Mask = %s  Filename = %s" % (key,mask[0],filename) )
            return key

        # fallback behavior return filename
        return filename

    def isMatching(self, client, ingestName):
        """
        Verify if ingestName is matching one mask of a client
        """
        from Source import Source

        if len(client.masks_deprecated) > 0 :
           for mask in client.masks_deprecated:
               if fnmatch.fnmatch(ingestName, mask[0]):
                   try:
                       if mask[2]:
                           return True
                   except:
                       return False

        for mask in client.masks:
            if mask[3].match(ingestName ) : return mask[4]

        if isinstance(client,Source) : return True

        return False

    def getMatchingClientNamesFromMasks(self, ingestName, potentialClientNames):
        matchingClientNames = []

        for name in potentialClientNames:
            try:
                if self.isMatching(self.clients[name], ingestName):
                    matchingClientNames.append(name)
            except KeyError:
                pass

        return matchingClientNames

    def getMatchingFeedNamesFromMasks(self, ingestName, potentialFeedNames):
        matchingFeedNames = []
        for name in potentialFeedNames:
            try:
                if self.feeds[name].fileMatchMask(ingestName):
                   matchingFeedNames.append(name)
            except KeyError:
                pass
        return matchingFeedNames

    def ingest(self, receptionName, ingestName, clientNames, priority=None ):
        self.logger.debug("Reception Name: %s" % receptionName)
        dbName = self.getDBName(ingestName)

        if dbName == '':
            self.logger.warning('Bad ingest name (%s) => No dbName' % ingestName)
            return 0
        
        self.createDir(os.path.dirname(dbName), self.dbDirsCache)
        #try:
        #    os.link(receptionName, dbName)
        #except OSError:
        #    (type, value, tb) = sys.exc_info()
        #    self.logger.error("Unable to link %s %s, Type: %s, Value: %s" % (receptionName, dbName, type, value))
        os.link(receptionName, dbName)

        nbBytes = os.stat(receptionName)[stat.ST_SIZE]

        if self.source.debug:
            self.logger.info("DBDirsCache: %s" % self.dbDirsCache.cache)
            stats, cached, total = self.dbDirsCache.getStats()
            if total:
                percentage = "%2.2f %% of the last %i requests were cached" % (cached/total * 100,  total)
            else:
                percentage = "No entries in the cache"
            self.logger.info("DB Caching stats: %s => %s" % (str(stats), percentage))


            self.logger.debug("ClientDirsCache: %s" % self.clientDirsCache.cache)
            stats, cached, total = self.clientDirsCache.getStats()
            if total:
                percentage = "%2.2f %% of the last %i requests were cached" % (cached/total * 100,  total)
            else:
                percentage = "No entries in the cache"
            self.logger.debug("Client Caching stats: %s => %s" % (str(stats), percentage))

            self.logger.info("Ingestion Name: %s" % ingestName)
            
        self.logger.info("(%i Bytes) Ingested in DB as %s" % (nbBytes, dbName))

        # Problem bulletins are databased, but not sent to clients
        if ingestName.find("PROBLEM_BULLETIN") is not -1:
            return 1

        for name in clientNames:
            clientQueueName = self.getClientQueueName(name, ingestName, priority)
            self.createDir(os.path.dirname(clientQueueName), self.clientDirsCache)
            #try:
            #    os.link(dbName, clientQueueName)
            #except OSError:
            #    (type, value, tb) = sys.exc_info()
            #    self.logger.error("Unable to link %s %s, Type: %s, Value: %s" % (dbName, clientQueueName, type, value))
            os.link(dbName, clientQueueName)

        feedNames = []
        if len(self.feedNames) > 0 :
           feedNames = self.getMatchingFeedNamesFromMasks(ingestName, self.feedNames )
           self.logger.debug("Matching (from patterns) feed names: %s" % feedNames )

        for name in feedNames:
            if name in clientNames : continue
            sourceQueueName = PXPaths.RXQ + name + '/' + ingestName
            self.createDir(os.path.dirname(sourceQueueName), self.clientDirsCache)
            os.link(dbName, sourceQueueName)

        self.logger.info("Queued for: %s" % string.join(clientNames) + ' ' + string.join(feedNames) )
        return 1

    def run(self):
        if self.source.type == 'single-file' or self.source.type == 'pull-file':
            self.ingestSingleFile()
        elif self.source.type == 'bulletin-file' or self.source.type == 'pull-bulletin':
            self.ingestBulletinFile()
        elif self.source.type == 'collector':
            self.ingestCollection()


    def ingestSingleFile(self, igniter):
        from DiskReader import DiskReader
        from DirectRoutingParser import DirectRoutingParser
        from PullFTP import PullFTP

        if self.source.routemask :
           self.drp = DirectRoutingParser(self.source.routingTable, self.allNames, self.logger, self.source.routing_version)
           self.drp.parse()

        if self.source.nodups :
           self.fileCache = CacheManager(maxEntries=self.source.cache_size, timeout=8*3600)

        reader = DiskReader(self.ingestDir, self.source.batch, self.source.validation, self.source.patternMatching,
                            self.source.mtime, False, self.source.logger, self.source.sorter, self.source)

        sleep_sec = 1
        if self.source.type == 'pull-file' or self.source.pull_script != None  : sleep_sec = self.source.pull_sleep

        while True:
            if igniter.reloadMode == True:
                # We assign the defaults, reread configuration file for the source
                # and reread all configuration file for the clients (all this in __init__)
                if self.source.type == 'filter' : 
                       self.source.__init__(self.source.name, self.source.logger, True, True)
                else :
                       self.source.__init__(self.source.name, self.source.logger)

                if self.source.routemask :
                   self.drp = DirectRoutingParser(self.source.routingTable, self.allNames, self.logger)
                   self.drp.parse()

                if self.source.nodups :
                   self.fileCache = CacheManager(maxEntries=self.source.cache_size, timeout=8*3600)

                reader = DiskReader(self.ingestDir, self.source.batch, self.source.validation, self.source.patternMatching,
                                    self.source.mtime, False, self.source.logger, self.source.sorter, self.source)
                self.logger.info("Receiver has been reloaded")
                igniter.reloadMode = False

            # pull files in rxq directory if in pull mode
            if self.source.type == 'pull-file' or self.source.pull_script != None :
               files    = []
               sleeping = os.path.isfile(PXPaths.RXQ + self.source.name + '/.sleep')

               if self.source.type == 'pull-file' :
                  puller = PullFTP(self.source,self.logger,sleeping)
                  files  = puller.get()
                  puller.close()
               elif self.source.pull_script != None :
                  files  = self.source.pull_script(self.source,self.logger,sleeping)

               if not sleeping :
                  self.logger.debug("Number of files pulled = %s" % len(files) )
               else :
                  self.logger.info("This pull is sleeping")

            # normal diskreader call for files
            reader.read()
            if len(reader.sortedFiles) <= 0:
               time.sleep(sleep_sec)
               continue

            sortedFiles = reader.sortedFiles[:self.source.batch]

            # processing the list if necessary... 

            if self.source.lx_execfile != None :
               sfiles = []
               sfiles.extend(sortedFiles)
               self.logger.info("%d files process with lx_script" % len(sfiles))
               sortedFiles = self.source.run_lx_script(sfiles,self.source.logger)

            self.logger.info("%d files will be ingested" % len(sortedFiles))

            for file in sortedFiles:
                self.ingestFile(file)
    
    def ingestFile(self, file ):

        # check for duplicated if user requieres
        if self.source.nodups : 

           # get md5 key from file...
           md5_key = self.fileCache.get_md5_from_file(file)

           # If data is already in cache, we don't send it
           if self.fileCache.find(md5_key, 'standard') is not None:
              os.unlink(file)
              self.logger.info("suppressed duplicate file %s", os.path.basename(file))
              return

        # converting the file if necessary
        if self.source.fx_execfile != None :

           fxfile = self.source.run_fx_script(file,self.source.logger)

           # convertion did not work
           if fxfile == None :
                  self.logger.warning("FX script ignored the file : %s"    % os.path.basename(file) )
                  os.unlink(file)
                  return

           # file already in proper format
           elif fxfile == file :
                  self.logger.warning("FX script kept the file as is : %s" % os.path.basename(file) )

           # file converted...
           else :
                  self.logger.info("FX script modified %s to %s " % (os.path.basename(file),os.path.basename(fxfile)) )
                  os.unlink(file)
                  file = fxfile

        # filename to ingest

        ingestName = self.getIngestName(os.path.basename(file))

        # make sure we do not have a dbName already in there
        # if it happens add a second to the postfix datetime stamp
        # until the dbName is not found

        dbName = self.getDBName(ingestName)
        if dbName != '' and os.path.exists(dbName) :
           ingestOrig = ingestName
           self.logger.warning('dbName %s found in db, attempt to modify suffix ' % ingestName)
           sec = int(ingestName[-2:]) + 1
           while( sec <= 59 ) :
                ingestName = ingestName[:-2] + "%.2d" % sec
                dbName = self.getDBName(ingestName)
                if not os.path.exists(dbName) : break
                sec = sec + 1

           # our trick did not work... process will bumb
           if sec == 60 : ingestName = ingestOrig

        # usual clients

        potentials = self.clientNames + self.filterNames

        # routing clients and priority (accept mask + routing info)

        priority = None
        if self.source.routemask :

           # ingestBase is the ingestName without the postfix reception date
           lst = ingestName.split(':')
           pos = -1
           if lst[-2] == '' : pos = -2
           ingestBase = ':'.join(lst[:pos])

           # build the key 
           key = self.getRouteKey(ingestBase)

           # get the clients for that key (if possible)
           lst = None
           potentials = []
           if key != None :
              lst = self.drp.getClients(key)
              if lst != None :
                 potentials = lst
                 priority = self.drp.getHeaderPriority(key)
              else :
                 self.logger.warning("Key not in routing table (%s from %s)" % (key,ingestName) )
           else :
              self.logger.warning("Key not generated (no accept match) with %s" % ingestName )

        # ingesting the file
        matchingClients  = self.getMatchingClientNamesFromMasks(ingestName, potentials )
        self.logger.debug("Matching (from patterns) client names: %s" % matchingClients)
        self.ingest(file, ingestName, matchingClients, priority )
        os.unlink(file)

    def ingestBulletinFile(self, igniter):
        from DiskReader import DiskReader
        import bulletinManager
        import bulletinManagerAm
        from PullFTP import PullFTP

        sleep_sec = 1
        if self.source.type == 'pull-bulletin' or self.source.pull_script != None : sleep_sec = self.source.pull_sleep

        bullManager = bulletinManager.bulletinManager(
                    self.ingestDir,
                    self.logger,
                    self.ingestDir,
                    99999,
                    '\n',
                    self.source.extension,
                    self.source.routingTable,
                    self.source.mapEnteteDelai,
                    self.source,
                    self.source.addStationInFilename)

        if self.source.bulletin_type == 'am' :
           bullManager = bulletinManagerAm.bulletinManagerAm(
                    self.ingestDir,
                    self.logger,
                    self.ingestDir,
                    99999,
                    '\n',
                    self.source.extension,
                    self.source.routingTable,
                    self.source.addSMHeader,
                    PXPaths.STATION_TABLE,
                    self.source.mapEnteteDelai,
                    self.source,
                    self.source.addStationInFilename)

        if self.source.nodups :
           self.fileCache = CacheManager(maxEntries=self.source.cache_size, timeout=8*3600)

        reader = DiskReader(bullManager.pathSource, self.source.batch, self.source.validation, self.source.patternMatching,
                            self.source.mtime, False, self.source.logger, self.source.sorter, self.source)
        while True:
            # If a SIGHUP signal is received ...
            if igniter.reloadMode == True:
                # We assign the defaults, reread configuration file for the source
                # and reread all configuration file for the clients (all this in __init__)
                if self.source.type == 'filter-bulletin' : 
                       self.source.__init__(self.source.name, self.source.logger, True, True)
                else :
                       self.source.__init__(self.source.name, self.source.logger)

                bullManager = bulletinManager.bulletinManager(
                               self.ingestDir,
                               self.logger,
                               self.ingestDir,
                               99999,
                               '\n',
                               self.source.extension,
                               self.source.routingTable,
                               self.source.mapEnteteDelai,
                               self.source,
                               self.source.addStationInFilename)

                if self.source.bulletin_type == 'am' :
                   bullManager = bulletinManagerAm.bulletinManagerAm(
                               self.ingestDir,
                               self.logger,
                               self.ingestDir,
                               99999,
                               '\n',
                               self.source.extension,
                               self.source.routingTable,
                               self.source.addSMHeader,
                               PXPaths.STATION_TABLE,
                               self.source.mapEnteteDelai,
                               self.source,
                               self.source.addStationInFilename)

                if self.source.nodups :
                   self.fileCache = CacheManager(maxEntries=self.source.cache_size, timeout=8*3600)

                reader = DiskReader(bullManager.pathSource, self.source.batch, self.source.validation, self.source.patternMatching,
                                    self.source.mtime, False, self.source.logger, self.source.sorter,self.source)

                self.logger.info("Receiver has been reloaded")
                igniter.reloadMode = False


            # pull files in rxq directory if in pull mode
            if self.source.type == 'pull-bulletin' or self.source.pull_script != None :
               files    = []
               sleeping = os.path.isfile(PXPaths.RXQ + self.source.name + '/.sleep')

               if self.source.type == 'pull-bulletin' :
                  puller = PullFTP(self.source,self.logger,sleeping)
                  files  = puller.get()
                  puller.close()
               elif self.source.pull_script != None :
                  files  = self.source.pull_script(self.source,self.logger,sleeping)

               if not sleeping :
                  self.logger.debug("Number of files pulled = %s" % len(files) )
               else :
                  self.logger.info("This pull is sleeping")


            # normal diskreader call for files
            reader.read()

            # processing the list if necessary... 

            if self.source.lx_execfile != None and len(reader.sortedFiles) > 0:
               sfiles = []
               sfiles.extend(reader.sortedFiles)
               self.logger.info("%d files process with lx_script" % len(sfiles))
               sortedFiles = self.source.run_lx_script(sfiles,self.source.logger)
               reader.sortedFiles = sortedFiles

            # continue normally
            data = reader.getFilesContent(reader.batch)

            if len(data) == 0:
                time.sleep(sleep_sec)
                continue
            else:
                self.logger.info("%d bulletins will be ingested", len(data))

            # Write (and name correctly) the bulletins to disk, erase them after
            for index in range(len(data)):

                # ignore duplicate if requiered
                duplicate = self.source.nodups and self.fileCache.find(data[index], 'md5') is not None

                #nb_bytes = len(data[index])
                #self.logger.info("Lecture de %s: %d bytes" % (reader.sortedFiles[index], nb_bytes))
                if not duplicate : 

                   # converting the file if necessary
                   if self.source.fx_execfile != None :

                      file   = reader.sortedFiles[index]
                      fxfile = self.source.run_fx_script(file,self.source.logger)

                      # convertion did not work
                      if fxfile == None :
                             self.logger.warning("FX script ignored the file : %s"    % os.path.basename(file) )
                             os.unlink(file)
                             continue

                      # file already in proper format
                      elif fxfile == file :
                             self.logger.warning("FX script kept the file as is : %s" % os.path.basename(file) )

                      # file converted...
                      else :
                             self.logger.info("FX script modified %s to %s " % (os.path.basename(file),os.path.basename(fxfile)) )
                             os.unlink(file)
                             fp = open(fxfile,'r')
                             dx = fp.read()
                             fp.close()
                             reader.sortedFiles[index] = fxfile
                             data[index] = dx

                   # writing/ingesting the bulletin
                   if isinstance(bullManager,bulletinManagerAm.bulletinManagerAm):
                      bullManager.writeBulletinToDisk(data[index], True)
                   else :
                      bullManager.writeBulletinToDisk(data[index], True, True)

                try:
                    file = reader.sortedFiles[index]
                    os.unlink(file)
                    if duplicate : self.logger.info("suppressed duplicate file %s", os.path.basename(file))
                    self.logger.debug("%s has been erased", os.path.basename(file))
                except OSError, e:
                    (type, value, tb) = sys.exc_info()
                    self.logger.error("Unable to unlink %s ! Type: %s, Value: %s" % (reader.sortedFiles[index], type, value))
Ejemplo n.º 38
0
class WebSearcher:
    def __init__(self):
        self.cm = False
        self.sleeptime = 2  #キャッシュ生成後にsleepする秒数
        self.swWrited = False  #検索結果のキャッシュを強制的に上書きするか(swWritedがTrueの場合に有効)
        self.__params = {}

    def activeCache(self, cachedir):
        """キャッシュ保存先の設定とキャッシュ保存関数の有効化"""
        self.cm = CacheManager(cachedir)

    def getHitCount(self):
        """get the number of retrieved pages
		@return int the number of retrieved pages
		"""
        hits = False
        if self.__engine == "tsubaki":
            url = "http://tsubaki.ixnlp.nii.ac.jp/api.cgi?query=%s&only_hitcount=1" % self.__query
            force_dpnd = self.getParameter("force_dpnd")
            if force_dpnd:
                url += "&force_dpnd=" + str(force_dpnd)
            print url
            if self.is_available_caching():
                res = self.runCacheFunc(url)
            else:
                res = urllib.urlopen(url).read()
            if not re.match("^[0-9]+$", res):
                return "error"
            hits = res.rstrip()
        elif self.__engine == "yahoo" or self.__engine == "yahoo2":
            if self.is_available_caching():
                xmlstr = self.runCacheFunc(self.getResultURI(1, 1))
                print xmlstr
                doc = ElementTree(fromstring(xmlstr))
            else:
                url = self.getResultURI(1, 1)
                fd = file(url, "rb")
                doc = ElementTree(file=fd)
            e = doc.getroot()
            hits = e.attrib["totalResultsAvailable"]
        return hits

    def getResultURI(self, start, num):
        engine = self.getEngine()
        if engine == "tsubaki":
            url = "http://tsubaki.ixnlp.nii.ac.jp/api.cgi?query=" + self.getQuery(
            ) + "&start=" + str(start) + "&results=" + str(num) + "&Snippet=1"
        elif engine == "yahoo":
            appid = self.getParameter("appid")
            if not appid:
                sys.exit("You must assign appid!!")
            type = self.getParameter("type")
            if not type:
                type = "all"
            url = "http://search.yahooapis.jp/WebSearchService/V1/webSearch?query=%s&appid=%s&type=%s&start=%s&results=%s&format=any" % (
                self.__query, appid, type, start, num)
            print url
        elif engine == "yahoo2":
            appid = self.getParameter("appid")
            if not appid:
                sys.exit("You must assign appid!!")
            type = self.getParameter("type")
            if not type:
                type = "all"
            url = "http://search.yahooapis.jp/WebSearchService/V2/webSearch?query=%s&appid=%s&type=%s&start=%s&results=%s&format=html" % (
                self.__query, appid, type, start, num)
        else:
            url = False
        return url

    #"?query=" + urlencode(fnkf("-w", $this->query)) \

    def is_available(self, engine):
        """check which defined search engine is available"""
        available_engines = ["okwave", "tsubaki", "yahoo", "yahooQA"]
        return engine in available_engines

    def is_available_caching(self):
        return self.cm != False

    def runCacheFunc(self, url):
        return self.cm.rwCache(url, self.sleeptime, self.swWrited)

    def search(self, start, max):
        """run searching Web"""
        engine = self.getEngine()

        result = {}
        rank = 1
        if engine == "tsubaki":
            url = self.getResultURI(start, max)
            if self.is_available_caching():
                xmlstr = self.runCacheFunc(url)
                doc = ElementTree(fromstring(xmlstr))
            else:
                fd = file(url, "rb")
                doc = ElementTree(file=fd)
            for e in doc.findall("//Result"):
                tmp = {
                    "title":
                    e.find("Title").text,
                    "url":
                    e.find("Url").text,
                    "snippet":
                    e.find("Snippet").text,
                    "cache":
                    "http://tsubaki.ixnlp.nii.ac.jp/api.cgi?id=" +
                    e.attrib["Id"] + "&format=html"
                }
                result[rank] = tmp
                rank += 1

        return result

    def getEngine(self):
        return self.__engine

    def setEngine(self, engine):
        self.__engine = engine

    def getParameter(self, name):
        if self.__params.has_key(name):
            return self.__params[name]
        else:
            return False

    def setParameter(self, name, value):
        self.__params[name] = value

    def getQuery(self):
        return self.__query

    def setQuery(self, query):
        self.__query = query

    ## プロパティ ##
    engine = property(getEngine, setEngine)
    query = property(getQuery, setQuery)
Ejemplo n.º 39
0
class AptP2P(protocol.Factory):
    """The main code object that does all of the work.
    
    Contains all of the sub-components that do all the low-level work, and
    coordinates communication between them.
    
    @type dhtClass: L{interfaces.IDHT}
    @ivar dhtClass: the DHT class to use
    @type cache_dir: L{twisted.python.filepath.FilePath}
    @ivar cache_dir: the directory to use for storing all files
    @type db: L{db.DB}
    @ivar db: the database to use for tracking files and hashes
    @type dht: L{DHTManager.DHT}
    @ivar dht: the manager for DHT requests
    @type stats: L{stats.StatsLogger}
    @ivar stats: the statistics logger to record sent data to
    @type http_server: L{HTTPServer.TopLevel}
    @ivar http_server: the web server that will handle all requests from apt
        and from other peers
    @type peers: L{PeerManager.PeerManager}
    @ivar peers: the manager of all downloads from mirrors and other peers
    @type mirrors: L{MirrorManager.MirrorManager}
    @ivar mirrors: the manager of downloaded information about mirrors which
        can be queried to get hashes from file names
    @type cache: L{CacheManager.CacheManager}
    @ivar cache: the manager of all downloaded files
    @type my_addr: C{string}, C{int}
    @ivar my_addr: the IP address and port of this peer
    """
    def __init__(self, dhtClass):
        """Initialize all the sub-components.
        
        @type dhtClass: L{interfaces.IDHT}
        @param dhtClass: the DHT class to use
        """
        log.msg('Initializing the main apt_p2p application')
        self.dhtClass = dhtClass
        self.my_addr = None

    #{ Factory interface
    def startFactory(self):
        reactor.callLater(0, self._startFactory)

    def _startFactory(self):
        log.msg('Starting the main apt_p2p application')
        self.cache_dir = FilePath(config.get('DEFAULT', 'CACHE_DIR'))
        if not self.cache_dir.child(download_dir).exists():
            self.cache_dir.child(download_dir).makedirs()
        if not self.cache_dir.child(peer_dir).exists():
            self.cache_dir.child(peer_dir).makedirs()
        self.db = DB(self.cache_dir.child('apt-p2p.db'))
        self.dht = DHT(self.dhtClass, self.db)
        df = self.dht.start()
        df.addCallback(self._dhtStarted)
        self.stats = StatsLogger(self.db)
        self.http_server = TopLevel(self.cache_dir.child(download_dir),
                                    self.db, self)
        self.http_server.getHTTPFactory().startFactory()
        self.peers = PeerManager(self.cache_dir.child(peer_dir), self.dht,
                                 self.stats)
        self.mirrors = MirrorManager(self.cache_dir)
        self.cache = CacheManager(self.cache_dir.child(download_dir), self.db,
                                  self)

    def _dhtStarted(self, result):
        """Save the returned address and start scanning the cache."""
        self.my_addr = result
        self.cache.scanDirectories()

    def stopFactory(self):
        log.msg('Stoppping the main apt_p2p application')
        self.http_server.getHTTPFactory().stopFactory()
        self.mirrors.cleanup()
        self.stats.save()
        self.db.close()

    def buildProtocol(self, addr):
        return self.http_server.getHTTPFactory().buildProtocol(addr)

    #{ Other functions
    def getStats(self):
        """Retrieve and format the statistics for the program.
        
        @rtype: C{string}
        @return: the formatted HTML page containing the statistics
        """
        out = '<html><body>\n\n'
        out += self.stats.formatHTML(self.my_addr)
        out += '\n\n'
        out += self.dht.getStats()
        out += '\n</body></html>\n'
        return out

    #{ Main workflow
    def get_resp(self, req, url, orig_resp=None):
        """Lookup a hash for the file in the local mirror info.
        
        Starts the process of getting a response to an apt request.
        
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        @type orig_resp: L{twisted.web2.http.Response}
        @param orig_resp: the response from the cache to be sent to apt
            (optional, ignored if missing)
        @rtype: L{twisted.internet.defer.Deferred}
        @return: a deferred that will be called back with the response
        """
        d = defer.Deferred()

        log.msg('Trying to find hash for %s' % url)
        findDefer = self.mirrors.findHash(unquote(url))

        findDefer.addCallbacks(self.findHash_done,
                               self.findHash_error,
                               callbackArgs=(req, url, orig_resp, d),
                               errbackArgs=(req, url, orig_resp, d))
        return d

    def findHash_error(self, failure, req, url, orig_resp, d):
        """Process the error in hash lookup by returning an empty L{HashObject}."""
        log.msg('Hash lookup for %s resulted in an error: %s' %
                (url, failure.getErrorMessage()))
        self.findHash_done(HashObject(), req, url, orig_resp, d)

    def findHash_done(self, hash, req, url, orig_resp, d):
        """Use the returned hash to lookup the file in the cache.
        
        If the hash was not found, the workflow skips down to download from
        the mirror (L{startDownload}), or checks the freshness of an old
        response if there is one.
        
        @type hash: L{Hash.HashObject}
        @param hash: the hash object containing the expected hash for the file
        """
        if hash.expected() is None:
            log.msg('Hash for %s was not found' % url)
            # Send the old response or get a new one
            if orig_resp:
                self.check_freshness(req, url, orig_resp, d)
            else:
                self.startDownload([], req, hash, url, d)
        else:
            log.msg('Found hash %s for %s' % (hash.hexexpected(), url))

            # Lookup hash in cache
            locations = self.db.lookupHash(hash.expected(), filesOnly=True)
            self.getCachedFile(hash, req, url, d, locations)

    def check_freshness(self, req, url, orig_resp, d):
        """Send a HEAD to the mirror to check if the response from the cache is still valid.
        
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        @type orig_resp: L{twisted.web2.http.Response}
        @param orig_resp: the response from the cache to be sent to apt
        """
        log.msg('Checking if %s is still fresh' % url)
        modtime = orig_resp.headers.getHeader('Last-Modified')
        headDefer = self.peers.get(HashObject(),
                                   url,
                                   method="HEAD",
                                   modtime=modtime)
        headDefer.addCallbacks(self.check_freshness_done,
                               self.check_freshness_error,
                               callbackArgs=(req, url, orig_resp, d),
                               errbackArgs=(req, url, d))

    def check_freshness_done(self, resp, req, url, orig_resp, d):
        """Return the fresh response, if stale start to redownload.
        
        @type resp: L{twisted.web2.http.Response}
        @param resp: the response from the mirror to the HEAD request
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        @type orig_resp: L{twisted.web2.http.Response}
        @param orig_resp: the response from the cache to be sent to apt
        """
        if resp.code == 304:
            log.msg('Still fresh, returning: %s' % url)
            d.callback(orig_resp)
        else:
            log.msg('Stale, need to redownload: %s' % url)
            self.startDownload([], req, HashObject(), url, d)

    def check_freshness_error(self, err, req, url, d):
        """Mirror request failed, continue with download.
        
        @param err: the response from the mirror to the HEAD request
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        """
        log.err(err)
        self.startDownload([], req, HashObject(), url, d)

    def getCachedFile(self, hash, req, url, d, locations):
        """Try to return the file from the cache, otherwise move on to a DHT lookup.
        
        @type locations: C{list} of C{dictionary}
        @param locations: the files in the cache that match the hash,
            the dictionary contains a key 'path' whose value is a
            L{twisted.python.filepath.FilePath} object for the file.
        """
        if not locations:
            log.msg('Failed to return file from cache: %s' % url)
            self.lookupHash(req, hash, url, d)
            return

        # Get the first possible location from the list
        file = locations.pop(0)['path']
        log.msg('Returning cached file: %s' % file.path)

        # Get it's response
        resp = static.File(file.path).renderHTTP(req)
        if isinstance(resp, defer.Deferred):
            resp.addBoth(self._getCachedFile, hash, req, url, d, locations)
        else:
            self._getCachedFile(resp, hash, req, url, d, locations)

    def _getCachedFile(self, resp, hash, req, url, d, locations):
        """Check the returned response to be sure it is valid."""
        if isinstance(resp, failure.Failure):
            log.msg('Got error trying to get cached file')
            log.err(resp)
            # Try the next possible location
            self.getCachedFile(hash, req, url, d, locations)
            return

        log.msg('Cached response: %r' % resp)

        if resp.code >= 200 and resp.code < 400:
            d.callback(resp)
        else:
            # Try the next possible location
            self.getCachedFile(hash, req, url, d, locations)

    def lookupHash(self, req, hash, url, d):
        """Lookup the hash in the DHT."""
        log.msg('Looking up hash in DHT for file: %s' % url)
        key = hash.expected()
        lookupDefer = self.dht.get(key)
        lookupDefer.addBoth(self.startDownload, req, hash, url, d)

    def startDownload(self, values, req, hash, url, d):
        """Start the download of the file.
        
        The download will be from peers if the DHT lookup succeeded, or
        from the mirror otherwise.
        
        @type values: C{list} of C{dictionary}
        @param values: the returned values from the DHT containing peer
            download information
        """
        # Remove some headers Apt sets in the request
        req.headers.removeHeader('If-Modified-Since')
        req.headers.removeHeader('Range')
        req.headers.removeHeader('If-Range')

        if not isinstance(values, list) or not values:
            if not isinstance(values, list):
                log.msg('DHT lookup for %s failed with error %r' %
                        (url, values))
            else:
                log.msg('Peers for %s were not found' % url)
            getDefer = self.peers.get(hash, url)
            #            getDefer.addErrback(self.final_fallback, hash, url)
            getDefer.addCallback(self.cache.save_file, hash, url)
            getDefer.addErrback(self.cache.save_error, url)
            getDefer.addCallbacks(d.callback, d.errback)
        else:
            log.msg('Found peers for %s: %r' % (url, values))
            # Download from the found peers
            getDefer = self.peers.get(hash, url, values)
            getDefer.addCallback(self.check_response, hash, url)
            getDefer.addCallback(self.cache.save_file, hash, url)
            getDefer.addErrback(self.cache.save_error, url)
            getDefer.addCallbacks(d.callback, d.errback)

    def check_response(self, response, hash, url):
        """Check the response from peers, and download from the mirror if it is not."""
        if response.code < 200 or response.code >= 300:
            log.msg(
                'Download from peers failed, going to direct download: %s' %
                url)
            getDefer = self.peers.get(hash, url)
            #            getDefer.addErrback(self.final_fallback, hash, url)
            return getDefer
        return response

    def final_fallback(self, err, hash, url):
        """Final retry if the mirror still generated an error."""
        log.msg('Download from mirror failed, retrying once only: %s' % url)
        log.err(err)
        getDefer = self.peers.get(hash, url)
        return getDefer

    def new_cached_file(self,
                        file_path,
                        hash,
                        new_hash,
                        url=None,
                        forceDHT=False):
        """Add a newly cached file to the mirror info and/or the DHT.
        
        If the file was downloaded, set url to the path it was downloaded for.
        Doesn't add a file to the DHT unless a hash was found for it
        (but does add it anyway if forceDHT is True).
        
        @type file_path: L{twisted.python.filepath.FilePath}
        @param file_path: the location of the file in the local cache
        @type hash: L{Hash.HashObject}
        @param hash: the original (expected) hash object containing also the
            hash of the downloaded file
        @type new_hash: C{boolean}
        @param new_hash: whether the has was new to this peer, and so should
            be added to the DHT
        @type url: C{string}
        @param url: the URI of the location of the file in the mirror
            (optional, defaults to not adding the file to the mirror info)
        @type forceDHT: C{boolean}
        @param forceDHT: whether to force addition of the file to the DHT
            even if the hash was not found in a mirror
            (optional, defaults to False)
        """
        if url:
            self.mirrors.updatedFile(url, file_path)

        if self.my_addr and hash and new_hash and (hash.expected() is not None
                                                   or forceDHT):
            return self.dht.store(hash)
        return None
Ejemplo n.º 40
0
class senderAMQP: 
   
   def __init__(self, client, logger):
      self.client     = client                        # Client object (give access to all configuration options)
      self.timeout    = client.timeout                # No timeout for now
      self.logger     = logger                        # Logger object
      self.connection = None                          # The connection
      self.igniter    = None
      self.ssl        = False
      self.reader     = DiskReader(PXPaths.TXQ  + self.client.name, self.client.batch,
                               self.client.validation, self.client.patternMatching,
                               self.client.mtime, True, self.logger, eval(self.client.sorter), self.client)

      self.debugFile    = False

      self.cacheManager = CacheManager(maxEntries=self.client.cache_size, timeout=8*3600)

      # AMQP  is there a max for message size
      # self.set_maxLength(self.client.maxLength)

      # statistics.
      self.totBytes = 0
      self.initialTime = time.time()
      self.finalTime = None

      self._connect()

   def printSpeed(self):
      elapsedTime = time.time() - self.initialTime
      speed = self.totBytes/elapsedTime
      self.totBytes = 0
      self.initialTime = time.time()
      return "Speed = %i" % int(speed)

   def setIgniter(self, igniter):
      self.igniter = igniter 

   def resetReader(self):
      self.reader = DiskReader(PXPaths.TXQ  + self.client.name, self.client.batch,
                               self.client.validation, self.client.patternMatching,
                               self.client.mtime, True, self.logger, eval(self.client.sorter), self.client)

   def _connect(self):

      self.connection = None
      self.channel    = None

      while True:
         try:
              host = self.client.host
              if self.client.port != None : host = host + ':' + self.client.port
              # connect
              self.connection = amqp.Connection(host, userid=self.client.user, password=self.client.passwd, ssl=self.ssl)
              self.channel    = self.connection.channel()

              # what kind of exchange
              self.channel.access_request(self.client.exchange_realm, active=True, write=True)
              self.channel.exchange_declare(self.client.exchange_name, self.client.exchange_type, auto_delete=False)

              self.logger.info("AMQP Sender is now connected to: %s" % str(self.client.host))
              break
         except:
            (type, value, tb) = sys.exc_info()
            self.logger.error("AMQP Sender cannot connected to: %s" % str(self.client.host))
            self.logger.error("Type: %s, Value: %s, Sleeping 5 seconds ..." % (type, value))
         time.sleep(5)

   def shutdown(self):
      pass

   def read(self):
      if self.igniter.reloadMode == True:
         # We assign the defaults and reread the configuration file (in __init__)
         if self.channel    != None : self.channel.close()
         if self.connection != None : self.connection.close()

         self.client.__init__(self.client.name, self.client.logger)

         self.resetReader()
         self.cacheManager.clear()
         self.logger.info("Cache has been cleared")
         self.logger.info("Sender AMQP has been reloaded")
         self.igniter.reloadMode = False

      self.reader.read()
      return self.reader.getFilesContent(self.client.batch)

   def write(self, data):
      if len(data) >= 1:
         self.logger.info("%d new messages will be sent", len(data) )

         for index in range(len(data)):

             self.logger.start_timer()

             # data info

             msg_body = data[index]
             nbBytesSent = len(msg_body)

             # if in cache than it was already sent... nothing to do
             # priority 0 is retransmission and is never suppressed

             path = self.reader.sortedFiles[index]
             priority = path.split('/')[-3]

             if self.client.nodups and priority != '0' and self.in_cache( data[index], True, path ) :
                #PS... same bug as in Senders AM, AMIS & WMO.
                #self.unlink_file( self.reader.sortedFiles[index] )
                continue

             # get/check destination Name
             basename = os.path.basename(path)
             destName, destDir = self.client.getDestInfos(basename)
             if not destName :
                os.unlink(path)
                self.logger.info('No destination name: %s has been erased' % path)
                continue

             # build message
             parts = basename.split(':')
             if parts[-1][0:2] == '20' : parts = parts[:-1]
             hdr = {'filename': ':'.join(parts) }
             msg = amqp.Message(msg_body, content_type= self.client.exchange_content,application_headers=hdr)

             # exchange_key pattern 
             exchange_key = self.client.exchange_key
             if '$' in self.client.exchange_key :
                exchange_key = self.keyPattern(basename,self.client.exchange_key)
             self.logger.debug("exchange key = %s" % exchange_key)

             # publish message
             self.channel.basic_publish(msg, self.client.exchange_name, exchange_key )

             self.logger.delivered("(%i Bytes) Message %s  delivered" % (nbBytesSent, basename),path,nbBytesSent)
             self.unlink_file( path )

             self.totBytes += nbBytesSent

      else:
         time.sleep(1)

   def run(self):
      while True:
         data = self.read()
         try:
            self.write(data)
         except:
            (type, value, tb) = sys.exc_info()
            self.logger.error("Sender error! Type: %s, Value: %s" % (type, value))
            
            # We close the connection
            try:
                self.channel.close()
                self.connection.close()
            except:
                (type, value, tb) = sys.exc_info()
                self.logger.error("Problem in closing socket! Type: %s, Value: %s" % (type, value))

            # We try to reconnect. 
            self._connect()

         #time.sleep(0.2)

   # check if data in cache... if not it is added automatically
   def in_cache(self,data,unlink_it,path):
       already_in = False

        # If data is already in cache, we don't send it
       if self.cacheManager.find(data, 'md5') is not None:
           already_in = True
           if unlink_it :
              try:
                   os.unlink(path)
                   self.logger.info("suppressed duplicate send %s", os.path.basename(path))
              except OSError, e:
                   (type, value, tb) = sys.exc_info()
                   self.logger.info("in_cache unable to unlink %s ! Type: %s, Value: %s"
                                   % (path, type, value))

       return already_in
Ejemplo n.º 41
0
DirOSM = get_conf(configfile, "DirOSM")  #~/greendrive/osmdata
DirData = replace_user_home(DirData)
DirOSM = replace_user_home(DirOSM)
gpsfolder = "gps"
obdfolder = "obd"
combinefolder = "combine"
MyIp = get_my_ip()
My_Platform = get_platform()  # "centos" means cluster
On_Cluster = False
if My_Platform == 'centos': On_Cluster = True

iprint = 2

err = ErrorLogger("allerror.txt", py_fname(__file__, False))
lg = SimpleAppendLogger("./logs/" + py_fname(__file__, False))
mm = CacheManager()  # using redis on cluster

EXT = get_conf(configfile, "EXT")  # .gz
CUT = get_conf(configfile, "CUT")  # ~|
EQU = get_conf(configfile, "EQU", delimiter=":")
KeyUserEmail = get_conf(configfile, "KeyUserEmail")
KeyUserName = get_conf(configfile, "KeyUserName")
UnknownUserEmail = get_conf(configfile, "UnknownUserEmail")  # Anonymous
KeySysMs = get_conf(configfile, "KeySysMs")
KeyGPSTime = get_conf(configfile, "KeyGPSTime")
KeyGPSLat = get_conf(configfile, "KeyGPSLat")
KeyGPSLng = get_conf(configfile, "KeyGPSLng")
KeyGPSAccuracy = get_conf(configfile, "KeyGPSAccuracy")
KeyGPSSpeed = get_conf(configfile, "KeyGPSSpeed")
KeyGPSBearing = get_conf(configfile, "KeyGPSBearing")
KeyGPSAltitude = get_conf(configfile, "KeyGPSAltitude")
Ejemplo n.º 42
0
class WebSearcher:
	def __init__(self):
		self.cm = False
		self.sleeptime = 2  #キャッシュ生成後にsleepする秒数
		self.swWrited = False #検索結果のキャッシュを強制的に上書きするか(swWritedがTrueの場合に有効)
		self.__params = {}

	def activeCache(self, cachedir):
		"""キャッシュ保存先の設定とキャッシュ保存関数の有効化"""
		self.cm = CacheManager(cachedir)
	def getHitCount(self):
		"""get the number of retrieved pages
		@return int the number of retrieved pages
		"""
		hits = False
		if self.__engine == "tsubaki":
			url = "http://tsubaki.ixnlp.nii.ac.jp/api.cgi?query=%s&only_hitcount=1" % self.__query
			force_dpnd = self.getParameter("force_dpnd")
			if force_dpnd:
				url += "&force_dpnd=" + str(force_dpnd)
			print url
			if self.is_available_caching():
				res = self.runCacheFunc(url)
			else:
				res = urllib.urlopen(url).read()
			if not re.match("^[0-9]+$", res):
				return "error"
			hits = res.rstrip()
		elif self.__engine == "yahoo" or self.__engine == "yahoo2":
			if self.is_available_caching():
				xmlstr = self.runCacheFunc(self.getResultURI(1, 1))
				print xmlstr
				doc = ElementTree(fromstring(xmlstr))
			else:
				url = self.getResultURI(1, 1)
				fd = file(url, "rb")
				doc = ElementTree(file=fd)
			e = doc.getroot()
			hits = e.attrib["totalResultsAvailable"]
		return hits
	def getResultURI(self, start, num):
		engine = self.getEngine()
		if engine == "tsubaki":
			url = "http://tsubaki.ixnlp.nii.ac.jp/api.cgi?query=" + self.getQuery() + "&start=" + str(start) + "&results=" + str(num) + "&Snippet=1"
		elif engine == "yahoo":
			appid = self.getParameter("appid")
			if not appid:
				sys.exit("You must assign appid!!")
			type = self.getParameter("type")
			if not type:
				type = "all"
			url = "http://search.yahooapis.jp/WebSearchService/V1/webSearch?query=%s&appid=%s&type=%s&start=%s&results=%s&format=any" % (self.__query, appid, type, start, num)
			print url
		elif engine == "yahoo2":
			appid = self.getParameter("appid")
			if not appid:
				sys.exit("You must assign appid!!")
			type = self.getParameter("type")
			if not type:
				type = "all"
			url = "http://search.yahooapis.jp/WebSearchService/V2/webSearch?query=%s&appid=%s&type=%s&start=%s&results=%s&format=html" % (self.__query, appid, type, start, num)
		else:
			url = False
		return url

	#"?query=" + urlencode(fnkf("-w", $this->query)) \
	
	def is_available(self, engine):
		"""check which defined search engine is available"""
		available_engines = ["okwave", "tsubaki", "yahoo", "yahooQA"]
		return engine in available_engines
	
	def is_available_caching(self):
		return self.cm != False
	
	def runCacheFunc(self, url):
		return self.cm.rwCache(url, self.sleeptime, self.swWrited)
	def search(self, start, max):
		"""run searching Web"""
		engine = self.getEngine()
		
		result = {}
		rank = 1
		if engine == "tsubaki":
			url = self.getResultURI(start, max)
			if self.is_available_caching():
				xmlstr = self.runCacheFunc(url)
				doc = ElementTree(fromstring(xmlstr))
			else:
				fd = file(url, "rb")
				doc = ElementTree(file=fd)
			for e in doc.findall("//Result"):
				tmp = {"title":e.find("Title").text, "url":e.find("Url").text, "snippet":e.find("Snippet").text, "cache":"http://tsubaki.ixnlp.nii.ac.jp/api.cgi?id=" + e.attrib["Id"] + "&format=html"}
				result[rank] = tmp
				rank += 1

		return result
		
		
	def getEngine(self):
		return self.__engine
	def setEngine(self, engine):
		self.__engine = engine
	def getParameter(self, name):
		if self.__params.has_key(name):
			return self.__params[name]
		else:
			return False
	def setParameter(self, name, value):
		self.__params[name] = value
	def getQuery(self):
		return self.__query
	def setQuery(self, query):
		self.__query = query
		
	## プロパティ ##
	engine = property(getEngine, setEngine)
	query = property(getQuery, setQuery)
Ejemplo n.º 43
0
def fix_no_speed(addr, mm_loaded=False):
    print("fix_no_speed():", addr, 'mm_loaded', mm_loaded)
    bugNoSpeedFn = mypydir + "/cache/nospeed-%s.txt" % addr
    if On_Cluster:
        mm_tmp_nid2spd = Mem({
            "num": 30,
            "prefix": "~fx_n2spd~",
            "expire": 86400 * 30
        })
    else:
        mm_tmp_nid2spd = Mem({
            "use_ips": ['localhost'],
            "prefix": "~fx_n2spd~",
            "expire": 86400 * 30
        })
    aleady_mm_loaded = mm_loaded
    correctSpdFn = DirOSM + os.sep + addr + "/%s-nids-to-speed.txt" % addr
    if not aleady_mm_loaded:
        cnt = 0
        pthresh = 1
        print('Loading ' + correctSpdFn)
        with open(correctSpdFn, "r") as f:
            for l in f:
                st = l.split(",")
                if len(st) < 3: continue
                mm_tmp_nid2spd.set((int(st[0]), int(st[1])), float(st[2]))
                cnt += 1
                if cnt >= pthresh:
                    print('fix_no_speed load cnt', cnt)
                    pthresh *= 2
        print("correct spd tup len=", cnt)
    mm_nid2latlng = CacheManager(
        overwrite_prefix=True)  # will use existing config
    mm_nid2neighbor = CacheManager(overwrite_prefix=True)
    mm_nid2latlng.use_cache(
        meta_file_name="osm/cache-%s-nodeid-to-lat-lng.txt" % addr)
    mm_nid2neighbor.use_cache(
        meta_file_name="osm/cache-%s-nodeid-to-neighbor-nid.txt" % addr)
    n1n2s = {}
    print('Reading ' + bugNoSpeedFn)
    with open(bugNoSpeedFn, "r") as f:
        for l in f:
            st = l.split(",")
            if len(st) < 2: continue
            st = [int(x) for x in st]
            tup = (st[0], st[1])
            if tup not in n1n2s:
                n1n2s[tup] = -1
    print("no_speed nid tup len=", len(n1n2s))
    lastBugCnt = None
    while True:
        bugcnt = 0
        for tup in n1n2s.keys():
            if n1n2s[tup] > 0:
                continue
            n1 = tup[0]
            n2 = tup[1]
            hd1 = get_bear_given_nid12(n1, n2, mm_nid2latlng)
            if hd1 is None: continue
            nblist = mm_nid2neighbor.get(n1)  # ?->n1->n2
            mindiff = 1e10
            fixed = 0
            if nblist:
                for nbn in nblist:
                    hdn = get_bear_given_nid12(nbn, n1, mm_nid2latlng)
                    if hdn is None: continue
                    angle = min_angle_diff(hd1, hdn)
                    if (nbn, n1) in n1n2s and n1n2s[(nbn, n1)] > 0:
                        spdn = n1n2s[(nbn, n1)]
                    else:
                        spdn = mm_tmp_nid2spd.get((nbn, n1))
                    if angle < mindiff and spdn is not None:
                        mindiff = angle
                        n1n2s[tup] = spdn
                        fixed = 1
            if fixed: continue
            nblist = mm_nid2neighbor.get(n2)  # n1->n2->?
            mindiff = 1e10
            if nblist:
                for nbn in nblist:
                    hdn = get_bear_given_nid12(n2, nbn, mm_nid2latlng)
                    if hdn is None: continue
                    angle = min_angle_diff(hd1, hdn)
                    if (n2, nbn) in n1n2s and n1n2s[(n2, nbn)] > 0:
                        spdn = n1n2s[(n2, nbn)]
                    else:
                        spdn = mm_tmp_nid2spd.get((n2, nbn))
                    if angle < mindiff and spdn is not None:
                        mindiff = angle
                        n1n2s[tup] = spdn
                        fixed = 1
            if fixed == 0: bugcnt += 1
        if bugcnt == 0:
            break
        print("bugcnt", bugcnt)
        if lastBugCnt is not None:
            if lastBugCnt == bugcnt:
                print("Give up #", bugcnt)
                break
        lastBugCnt = bugcnt

    with open(correctSpdFn, "a") as f:
        for tup in n1n2s.keys():
            if n1n2s[tup] < 0: continue
            print("%d,%d,%.2f" % (tup[0], tup[1], n1n2s[tup]))
            f.write("%d,%d,%.2f\n" % (tup[0], tup[1], n1n2s[tup]))
    print("Give up #", bugcnt)
Ejemplo n.º 44
0
class senderWmo(gateway.gateway):

    def __init__(self,path,client,logger):
        gateway.gateway.__init__(self, path, client, logger)
        self.client = client
        self.establishConnection()

        self.reader = DiskReader(PXPaths.TXQ + self.client.name, 
                                 self.client.batch,            # Number of files we read each time
                                 self.client.validation,       # name validation
                                 self.client.patternMatching,  # pattern matching
                                 self.client.mtime,            # we don't check modification time
                                 True,                         # priority tree
                                 self.logger,
                                 eval(self.client.sorter),
                                 self.client)

        # Mechanism to eliminate multiple copies of a bulletin
        self.cacheManager = CacheManager(maxEntries=self.client.cache_size, timeout=8*3600)

        # WMO's maximum bulletin size is 500 000 bytes
        self.set_maxLength( self.client.maxLength )

    def set_maxLength(self,value):
        if value <= 0  : value = 500000
        self.maxLength = value

    def shutdown(self):
        gateway.gateway.shutdown(self)

        resteDuBuffer, nbBullEnv = self.unSocketManagerWmo.closeProperly()

        self.write(resteDuBuffer)

        self.logger.info("Le senderWmo est mort.  Traitement en cours reussi.")

    def establishConnection(self):
        # Instanciation du socketManagerWmo
        self.logger.debug("Instanciation du socketManagerWmo")

        self.unSocketManagerWmo = \
                 socketManagerWmo.socketManagerWmo(
                         self.logger,type='master', \
                         port=self.client.port,\
                         remoteHost=self.client.host,
                         timeout=self.client.timeout,
                         flow=self.client)

    def read(self):
        if self.igniter.reloadMode == True:
            # We assign the defaults and reread the configuration file (in __init__)
            self.client.__init__(self.client.name, self.client.logger)
            self.set_maxLength( self.client.maxLength )
            self.resetReader()
            self.cacheManager.clear()
            self.logger.info("Cache has been cleared")
            self.logger.info("Sender WMO has been reloaded") 
            self.igniter.reloadMode = False
        self.reader.read()
        return self.reader.getFilesContent(self.client.batch)

    def write(self,data):
        #self.logger.info("%d nouveaux bulletins sont envoyes",len(data))
        self.logger.info("%d new bulletins will be sent", len(data))

        for index in range(len(data)):

            self.logger.start_timer()
            path = self.reader.sortedFiles[index]
            basename = os.path.basename( path )

            try:
                tosplit = self.need_split( data[index] )

                # need to be segmented...
                if tosplit :
                   succes, nbBytesSent = self.write_segmented_data( data[index], path )
                   # all parts were cached... nothing to do
                   if succes and nbBytesSent == 0 :
                      self.logger.delivered("(%i Bytes) Bulletin %s  delivered" % (len(data[index]), basename),path)
                      self.unlink_file( path )
                      continue

                # send the entire bulletin
                else :

                   # if in cache than it was already sent... nothing to do
                   # priority 0 are retransmission and no check for duplicate
                   priority = path.split('/')[-3]

                   if self.client.nodups and priority != '0' and self.in_cache( data[index], True, path ) :
                      #PS... same extra unlink as in AM sender call above is true, should it be false?
                      #self.unlink_file( self.reader.sortedFiles[index] )
                      continue
                   succes, nbBytesSent = self.write_data( data[index] )

                #If the bulletin was sent successfully, erase the file.
                if succes:
                   self.logger.delivered("(%i Bytes) Bulletin %s  delivered" % (nbBytesSent, basename),path,nbBytesSent)
                   self.unlink_file( path )
                else:
                   self.logger.info("%s: Sending problem" % path )

            except Exception, e:
            # e==104 or e==110 or e==32 or e==107 => connection broken
                (type, value, tb) = sys.exc_info()
                self.logger.error("Type: %s, Value: %s" % (type, value))

        # Log infos about tx speed 
        if (self.totBytes > 1000000):
            self.logger.info(self.printSpeed() + " Bytes/sec")
            # Log infos about caching 
            (stats, cached, total) = self.cacheManager.getStats()
            if total:
                percentage = "%2.2f %% of the last %i requests were cached (implied %i files were deleted)" % (cached/total * 100,  total, cached)
            else:
                percentage = "No entries in the cache"
            self.logger.info("Caching stats: %s => %s" % (str(stats), percentage))
Ejemplo n.º 45
0
    def ingestBulletinFile(self, igniter):
        from DiskReader import DiskReader
        import bulletinManager
        import bulletinManagerAm
        from PullFTP import PullFTP

        sleep_sec = 1
        if self.source.type == 'pull-bulletin' or self.source.pull_script != None : sleep_sec = self.source.pull_sleep

        bullManager = bulletinManager.bulletinManager(
                    self.ingestDir,
                    self.logger,
                    self.ingestDir,
                    99999,
                    '\n',
                    self.source.extension,
                    self.source.routingTable,
                    self.source.mapEnteteDelai,
                    self.source,
                    self.source.addStationInFilename)

        if self.source.bulletin_type == 'am' :
           bullManager = bulletinManagerAm.bulletinManagerAm(
                    self.ingestDir,
                    self.logger,
                    self.ingestDir,
                    99999,
                    '\n',
                    self.source.extension,
                    self.source.routingTable,
                    self.source.addSMHeader,
                    PXPaths.STATION_TABLE,
                    self.source.mapEnteteDelai,
                    self.source,
                    self.source.addStationInFilename)

        if self.source.nodups :
           self.fileCache = CacheManager(maxEntries=self.source.cache_size, timeout=8*3600)

        reader = DiskReader(bullManager.pathSource, self.source.batch, self.source.validation, self.source.patternMatching,
                            self.source.mtime, False, self.source.logger, self.source.sorter, self.source)
        while True:
            # If a SIGHUP signal is received ...
            if igniter.reloadMode == True:
                # We assign the defaults, reread configuration file for the source
                # and reread all configuration file for the clients (all this in __init__)
                if self.source.type == 'filter-bulletin' : 
                       self.source.__init__(self.source.name, self.source.logger, True, True)
                else :
                       self.source.__init__(self.source.name, self.source.logger)

                bullManager = bulletinManager.bulletinManager(
                               self.ingestDir,
                               self.logger,
                               self.ingestDir,
                               99999,
                               '\n',
                               self.source.extension,
                               self.source.routingTable,
                               self.source.mapEnteteDelai,
                               self.source,
                               self.source.addStationInFilename)

                if self.source.bulletin_type == 'am' :
                   bullManager = bulletinManagerAm.bulletinManagerAm(
                               self.ingestDir,
                               self.logger,
                               self.ingestDir,
                               99999,
                               '\n',
                               self.source.extension,
                               self.source.routingTable,
                               self.source.addSMHeader,
                               PXPaths.STATION_TABLE,
                               self.source.mapEnteteDelai,
                               self.source,
                               self.source.addStationInFilename)

                if self.source.nodups :
                   self.fileCache = CacheManager(maxEntries=self.source.cache_size, timeout=8*3600)

                reader = DiskReader(bullManager.pathSource, self.source.batch, self.source.validation, self.source.patternMatching,
                                    self.source.mtime, False, self.source.logger, self.source.sorter,self.source)

                self.logger.info("Receiver has been reloaded")
                igniter.reloadMode = False


            # pull files in rxq directory if in pull mode
            if self.source.type == 'pull-bulletin' or self.source.pull_script != None :
               files    = []
               sleeping = os.path.isfile(PXPaths.RXQ + self.source.name + '/.sleep')

               if self.source.type == 'pull-bulletin' :
                  puller = PullFTP(self.source,self.logger,sleeping)
                  files  = puller.get()
                  puller.close()
               elif self.source.pull_script != None :
                  files  = self.source.pull_script(self.source,self.logger,sleeping)

               if not sleeping :
                  self.logger.debug("Number of files pulled = %s" % len(files) )
               else :
                  self.logger.info("This pull is sleeping")


            # normal diskreader call for files
            reader.read()

            # processing the list if necessary... 

            if self.source.lx_execfile != None and len(reader.sortedFiles) > 0:
               sfiles = []
               sfiles.extend(reader.sortedFiles)
               self.logger.info("%d files process with lx_script" % len(sfiles))
               sortedFiles = self.source.run_lx_script(sfiles,self.source.logger)
               reader.sortedFiles = sortedFiles

            # continue normally
            data = reader.getFilesContent(reader.batch)

            if len(data) == 0:
                time.sleep(sleep_sec)
                continue
            else:
                self.logger.info("%d bulletins will be ingested", len(data))

            # Write (and name correctly) the bulletins to disk, erase them after
            for index in range(len(data)):

                # ignore duplicate if requiered
                duplicate = self.source.nodups and self.fileCache.find(data[index], 'md5') is not None

                #nb_bytes = len(data[index])
                #self.logger.info("Lecture de %s: %d bytes" % (reader.sortedFiles[index], nb_bytes))
                if not duplicate : 

                   # converting the file if necessary
                   if self.source.fx_execfile != None :

                      file   = reader.sortedFiles[index]
                      fxfile = self.source.run_fx_script(file,self.source.logger)

                      # convertion did not work
                      if fxfile == None :
                             self.logger.warning("FX script ignored the file : %s"    % os.path.basename(file) )
                             os.unlink(file)
                             continue

                      # file already in proper format
                      elif fxfile == file :
                             self.logger.warning("FX script kept the file as is : %s" % os.path.basename(file) )

                      # file converted...
                      else :
                             self.logger.info("FX script modified %s to %s " % (os.path.basename(file),os.path.basename(fxfile)) )
                             os.unlink(file)
                             fp = open(fxfile,'r')
                             dx = fp.read()
                             fp.close()
                             reader.sortedFiles[index] = fxfile
                             data[index] = dx

                   # writing/ingesting the bulletin
                   if isinstance(bullManager,bulletinManagerAm.bulletinManagerAm):
                      bullManager.writeBulletinToDisk(data[index], True)
                   else :
                      bullManager.writeBulletinToDisk(data[index], True, True)

                try:
                    file = reader.sortedFiles[index]
                    os.unlink(file)
                    if duplicate : self.logger.info("suppressed duplicate file %s", os.path.basename(file))
                    self.logger.debug("%s has been erased", os.path.basename(file))
                except OSError, e:
                    (type, value, tb) = sys.exc_info()
                    self.logger.error("Unable to unlink %s ! Type: %s, Value: %s" % (reader.sortedFiles[index], type, value))
Ejemplo n.º 46
0
class unittest_CacheManager(unittest.TestCase):
   
  def setUp(self):
    self.manager = CacheManager(maxEntries=3, timeout=5 * 3600)
    self.files = ['db/test-file-grib', 'db/test-file-burf', 'db/test-file-bulletin']
    #f = open(self.files[0])
    #self.data = f.read(1048576)
    #while len(self.data) :
      #print self.data
      #self.data = f.read(1048576)
    #f.close()    
    self.data = 'ceci est un test'

  def test_CacheManager(self):              
    self.assertEqual(self.manager.get_md5_from_file(self.files[0]),'57285445a1c80023b3f2e96546754d5b')    
    self.manager.find(self.data,'md5')
    self.manager.find(self.files[1])  
    #md5 of self.data = 11b35a0201513381dcdd130831f702d0
    self.assertEqual(self.manager.has('11b35a0201513381dcdd130831f702d0'),True)    
    self.assertEqual(self.manager.has(self.files[2]),False)    
    self.manager.find(self.data,'md5')
    self.manager.find(self.files[1])    
    self.manager.find(self.files[2])
    self.assertEqual(self.manager.getStats(),({1: 1, 2: 2}, 2.0, 5.0))
    self.manager.find(self.files[0])
    time.sleep(3)
    self.manager.timeoutClear(3)
    self.assertEqual(self.manager.cache,{})
Ejemplo n.º 47
0
        'AdmBldg22AP1': 19, 'AcadBldg30AP3': 1, 'ResBldg82AP3': 3, 'AcadBldg8AP4': 14, 
        'LibBldg2AP19': 8, 'AcadBldg30AP1': 10, 'LibBldg2AP17': 13, 'LibBldg1AP17': 2, 
        'LibBldg2AP13': 2, 'LibBldg2AP10': 3, 'LibBldg2AP11': 3} 
    }

    userMovement = {
        24: {'SocBldg2AP1': 10, 'SocBldg3AP2': 8, 'AcadBldg22AP2': 0, 'LibBldg4AP3': 4}
    }

    tp = TopologyGenerator('/home/ubuntu/Downloads/APlocations_clean.csv')
    networkManager = NetworkManager()
    apsByBuildings, buildingNames = tp.getSample()
    linkage = {} #tp.computeLinkage(printDendogram = False)
    clusters = {} #tp.computeClusters()

    net, tree = networkManager.networkFromCLusters('test', clusters, linkage, len(buildingNames), apsByBuildings, buildingNames)
    
    print '*** Getting requests data'
    #movementParser = MovementDataParser('/home/ubuntu/Downloads/movement/2001-2003/', '/data/movement.csv')
    #movementParser.getMovementInfo()

    cacheManager = CacheManager(tree)



    print cacheManager.computeKMedianCaches(k=2, userId=24, userMovement=userMovement)
    #networkManager.simulation(net)
    #CLI( net )
    #net.stop()