Beispiel #1
0
    def first_run_history2(self):
        logger.debug(str(self.fetcher_id) + " _ "+str(len(self.dataid)))

        self.timeStr = self.buildQueryTimeStr()
        data_count = 0
        conn = None
        for index in range(self.fetcher_id, len(self.dataid), config.fetcher_num):
            meta = Metadata.getMetaData(self.dataid[index], self.timeStr)

            conn = DBUtil.createConnection()
            DBUtil.UpdateDataSetToProcessed(conn, self.dataid[index])

            data_count = data_count + 1
            # logger.debug("Fetcher {" + str(self.fetcher_id)+"} query {" + str(data_count) + "} data set "
            #              + self.dataid[index] +
            #              " has {" + str(len(meta)) + "} resource")
            logger.debug("Fetcher [{}] query [{}] dataset [{}] @ dataid[{}] + has [{}] resource"
                         .format(str(self.fetcher_id), str(data_count), self.dataid[index], index, str(len(meta))))
            for m in meta:
                DBUtil.InsertResourceURL(conn, m.getDataSetID(),m.getFileID() ,m.getDownloadURL(), m.getFormat())
                logger.debug( "Fetcher {" + str(self.fetcher_id) + "} " +
                m.getDownloadURL() + " " +
                m.getFormat() + " " +
                m.getDataSetID() + " " +
                m.getFileID() + " " +
                m.getResourceID() )

        DBUtil.closeConnection(conn)
Beispiel #2
0
 def fetchNewMetadata(self):
     dataid = self.findUpdateDataID()
     for s in dataid:
         meta = Metadata.getMetaData(s)
         print(dataid)
         for md in meta:
             logger.debug(md.getResourceID() + " put to queue")
             self.queue.put(md)
Beispiel #3
0
 def process_history(self):
     """
     process history data since last fetch
     """
     logger.info(str(threading.get_ident()) + " process hisotry")
     conn = DBUtil.createConnection()
     latestTime = DBUtil.getLastUpdateEpoch(conn)
     DBUtil.closeConnection(conn)
     dataid = self.findUpdateDataID(latestTime)
     logger.debug(len(dataid))
     dataid_count = 0
     meta_count = 0
     for s in dataid:
         dataid_count = dataid_count +1
         logger.info("data count = " + str(dataid_count))
         meta = Metadata.getMetaData(s, self.timeStr)
         for md in meta:
             meta_count = meta_count + 1
             logger.info("meta_count = "+ str(meta_count))
             logger.debug(md.getResourceID() + " put to queue")
             self.queue.put(md)
Beispiel #4
0
    def first_run_history2(self):
        logger.debug(str(self.fetcher_id) + " _ "+str(len(self.dataid)))

        self.timeStr = self.buildQueryTimeStr()
        data_count = 0
        conn = None
        for index in range(self.fetcher_id, len(self.dataid), config.fetcher_num):
            meta = Metadata.getMetaData(self.dataid[index], self.timeStr)

            conn = DBUtil.createConnection()

            # To solve the restart problem
            # Solution: all processed flag will be set after download
            # DBUtil.UpdateDataSetToProcessed(conn, self.dataid[index])

            data_count = data_count + 1
            # logger.debug("Fetcher {" + str(self.fetcher_id)+"} query {" + str(data_count) + "} data set "
            #              + self.dataid[index] +
            #              " has {" + str(len(meta)) + "} resource")
            logger.debug("Fetcher [{}] query [{}] dataset [{}] @ dataid[{}] + has [{}] resource"
                         .format(str(self.fetcher_id), str(data_count), self.dataid[index], index, str(len(meta))))
            for m in meta:
                if DBUtil.isResourceURLExist(conn,m.getDataSetID(),m.getResourceID() ,m.getDownloadURL(), m.getFormat()) is False:
                    DBUtil.InsertResourceURL(conn, m.getDataSetID(),m.getResourceID() ,m.getDownloadURL(), m.getFormat())

                # building a downloadData and using queue to get the downloadData
                row = downloadData(m.getDownloadURL(),m.getFormat(),m.getDataSetID(),m.getResourceID())
                self.queue.put(row)


                logger.debug( "Fetcher {" + str(self.fetcher_id) + "} " +
                m.getDownloadURL() + " " +
                m.getFormat() + " " +
                m.getDataSetID() + " " +
                m.getResourceID() )

            DBUtil.closeConnection(conn)