def first_run_history2(self): logger.debug(str(self.fetcher_id) + " _ "+str(len(self.dataid))) self.timeStr = self.buildQueryTimeStr() data_count = 0 conn = None for index in range(self.fetcher_id, len(self.dataid), config.fetcher_num): meta = Metadata.getMetaData(self.dataid[index], self.timeStr) conn = DBUtil.createConnection() DBUtil.UpdateDataSetToProcessed(conn, self.dataid[index]) data_count = data_count + 1 # logger.debug("Fetcher {" + str(self.fetcher_id)+"} query {" + str(data_count) + "} data set " # + self.dataid[index] + # " has {" + str(len(meta)) + "} resource") logger.debug("Fetcher [{}] query [{}] dataset [{}] @ dataid[{}] + has [{}] resource" .format(str(self.fetcher_id), str(data_count), self.dataid[index], index, str(len(meta)))) for m in meta: DBUtil.InsertResourceURL(conn, m.getDataSetID(),m.getFileID() ,m.getDownloadURL(), m.getFormat()) logger.debug( "Fetcher {" + str(self.fetcher_id) + "} " + m.getDownloadURL() + " " + m.getFormat() + " " + m.getDataSetID() + " " + m.getFileID() + " " + m.getResourceID() ) DBUtil.closeConnection(conn)
def first_run_history2(self): logger.debug(str(self.fetcher_id) + " _ "+str(len(self.dataid))) self.timeStr = self.buildQueryTimeStr() data_count = 0 conn = None for index in range(self.fetcher_id, len(self.dataid), config.fetcher_num): meta = Metadata.getMetaData(self.dataid[index], self.timeStr) conn = DBUtil.createConnection() # To solve the restart problem # Solution: all processed flag will be set after download # DBUtil.UpdateDataSetToProcessed(conn, self.dataid[index]) data_count = data_count + 1 # logger.debug("Fetcher {" + str(self.fetcher_id)+"} query {" + str(data_count) + "} data set " # + self.dataid[index] + # " has {" + str(len(meta)) + "} resource") logger.debug("Fetcher [{}] query [{}] dataset [{}] @ dataid[{}] + has [{}] resource" .format(str(self.fetcher_id), str(data_count), self.dataid[index], index, str(len(meta)))) for m in meta: if DBUtil.isResourceURLExist(conn,m.getDataSetID(),m.getResourceID() ,m.getDownloadURL(), m.getFormat()) is False: DBUtil.InsertResourceURL(conn, m.getDataSetID(),m.getResourceID() ,m.getDownloadURL(), m.getFormat()) # building a downloadData and using queue to get the downloadData row = downloadData(m.getDownloadURL(),m.getFormat(),m.getDataSetID(),m.getResourceID()) self.queue.put(row) logger.debug( "Fetcher {" + str(self.fetcher_id) + "} " + m.getDownloadURL() + " " + m.getFormat() + " " + m.getDataSetID() + " " + m.getResourceID() ) DBUtil.closeConnection(conn)