def update_db(self): """ Fetch latest popularity data not in database """ # get dates coll = 'dataset_popularity' pipeline = list() sort = {'$sort':{'date':-1}} pipeline.append(sort) limit = {'$limit':1} pipeline.append(limit) project = {'$project':{'date':1, '_id':0}} pipeline.append(project) data = self.storage.get_data(coll=coll, pipeline=pipeline) try: start_date = data[0]['date'] except: self.logger.warning('Popularity needs to be initiated') self.initiate_db() return q = Queue.Queue() for i in range(self.MAX_THREADS): worker = threading.Thread(target=self.insert_popularity_data, args=(i, q)) worker.daemon = True worker.start() start_date = start_date + datetime.timedelta(days=1) end_date = datetime_day(datetime.datetime.utcnow()) # fetch popularity data t1 = datetime.datetime.utcnow() for date in daterange(start_date, end_date): q.put(date) q.join() t2 = datetime.datetime.utcnow() td = t2 - t1 self.logger.info('Updating Pop DB data took %s', str(td))
def test_daterange(self): "Test daterange function" start_date = datetime.datetime(1987, 10, 27) end_date = datetime.datetime(1987, 10, 30) expected = [datetime.datetime(1987, 10, 27), datetime.datetime(1987, 10, 28), datetime.datetime(1987, 10, 29)] results = daterange(start_date, end_date) i = 0 for result in results: self.assertEqual(result, expected[i]) i += 1
def test_daterange(self): "Test daterange function" start_date = datetime.datetime(1987, 10, 27) end_date = datetime.datetime(1987, 10, 30) expected = [ datetime.datetime(1987, 10, 27), datetime.datetime(1987, 10, 28), datetime.datetime(1987, 10, 29) ] results = daterange(start_date, end_date) i = 0 for result in results: self.assertEqual(result, expected[i]) i += 1
def initiate_db(self): """ Collect popularity data """ q = Queue.Queue() for i in range(self.MAX_THREADS): worker = threading.Thread(target=self.insert_popularity_data, args=(i, q)) worker.daemon = True worker.start() start_date = datetime_day(datetime.datetime.utcnow() - datetime.timedelta(days=90)) end_date = datetime_day(datetime.datetime.utcnow()) # fetch popularity data t1 = datetime.datetime.utcnow() for date in daterange(start_date, end_date): q.put(date) q.join() t2 = datetime.datetime.utcnow() td = t2 - t1 self.logger.info('Inserting Pop DB data took %s', str(td))