def backgroundAutoArima(task_id, dataset): # AutoArima takes too long to train. We train on a separate thread and return the result db_helper.update_task_by_id(task_id, status='running', started_at=utcnow()) model = autoarima.AutoArimaModel(dataset=dataset) model.train_model(model.training_data, model.testing_data) model_id = model.save_model(dataset) db_helper.update_task_by_id(task_id, status='finished', model_id=model_id, completed_at=utcnow())
def save_model(self, dataset): # Make sure we have a saved model if self.model_fit: # Write model to MongoDB mongo_doc = { 'algorithm': 'AutoARIMA', 'input_source': 'csv', # Whether this model is trained from a batch CSV or Influx # Starting timestamp of training data 'input_start': self.data_clean.index.min(), 'input_end': self.data_clean.index.max( ), # Ending timestamp of training data 'acquisition_time': utils.utcnow(), 'metadata': { 'start_p': 1, 'start_q': 1, 'max_p': 5, 'max_q': 5, 'm': 12, 'start_P': 0, 'seasonal': True, 'd': 1, 'D': 1, 'trace': True, 'error_action': 'ignore', 'suppress_warnings': True, 'stepwise': True, 'dataset': dataset } } inserted = mongo.db.models.insert_one(mongo_doc) model_id = inserted.inserted_id self.pklize(model_id) return model_id
def invalidate_predictions(pred_id): # Returns a Prediction as a JSON doc query_result = mongo.db.predictions.find_one_and_update( {'_id': ObjectId(pred_id)}, {'$set': { "is_valid": False, "invalidated_at": utcnow() }}) encoder = MongoEncoder() return json.loads(encoder.encode(query_result))
def get(self, id): subscriber = db_helper.get_subscriber_by_id(id) if subscriber is None: return flask.jsonify( {'error': 'No subscriber with ID {}'.format(id)}) predictions = subscriber['predictions'] thresholds = subscriber['thresholds'] notified_at = subscriber['notified_at'] or [] url = subscriber['url'] mock = [] ts = datetime.datetime.strptime(predictions['start_time'], '%Y-%m-%dT%H:%M:%SZ') for p, t in zip(predictions['values'], thresholds): mock.append({ 'timestamp': ts.strftime('%Y-%m-%dT%H:%M:%SZ'), 'value': p + (t * random.choice([1, 0])) + (random.random() * p) }) ts += datetime.timedelta(hours=1) response = { 'id': 'newMockId', 'values': mock, 'start_time': predictions['start_time'], 'end_time': predictions['end_time'] } try: fail = False result = requests.post(url, json=response) except Exception as e: fail = True result = 'Error sending POST request to {}: {}'.format( url, e.message) if fail: mock_resonse = {'status': 'Fail', 'result': result} else: mock_resonse = { 'url': url, 'json': response, 'message': 'Attempted', 'result': result.text } mongo.db.subscribers.update_one( {'id': id}, {'$set': { 'notified_at': notified_at + [utils.utcnow()] }}) return flask.jsonify(mock_resonse)
def update_model_input(model_id, input_end): query_result = mongo.db.models.find_one_and_update( {'_id': ObjectId(model_id)}, { "$set": { "input_end": pd.datetime.strptime(input_end, '%Y-%m-%dT%H:%M:%SZ'), "acquisition_time": utcnow(), "input_source": "influx" } }) if query_result: return True return False
def update_subscriber_predictions(sub_id, pred_id): subscriber_data = get_subscriber_by_id(sub_id) notified_at = subscriber_data['notified_at'] notified_at.append(utcnow()) query_result = mongo.db.subscribers.find_one_and_update( {'_id': ObjectId(sub_id)}, { "$set": { "predictions": get_predictions_by_id(pred_id), "notified_at": notified_at } }) encoder = MongoEncoder() return json.loads(encoder.encode(query_result))
def post(self): args = makeSubscribeParser().parse_args() try: # Find predictions stored in DB ans make sure they're valid predictions = db_helper.get_predictions_by_id(args.predictions_id) if predictions is None: return flask.jsonify({ 'status': '500', 'error': 'No predictions with id {} '.format(args.predictions_id) }) if predictions['is_valid'] is False: return flask.jsonify({ 'status': '500', 'error': 'Predictions with id {} were invalidated at {}'.format( args.predictions_id, predictions['invalidated_at']) }) # Save subscriber doc = { 'url': args.url, 'predictions': predictions, 'registered_at': utils.utcnow(), 'notified_at': [] } inserted = mongo.db.subscribers.insert_one(doc) if inserted.inserted_id: return flask.jsonify({ 'registered': True, 'id': str(inserted.inserted_id) }) except Exception as e: return flask.jsonify({'status': '500', 'error': e.message})
def save_model(self, dataset): # Make sure we have a saved model if self.model_fit: # Write model to MongoDB mongo_doc = { 'algorithm': 'ARIMA', 'input_source': 'csv', # Whether this model is trained from a batch CSV or Influx 'input_start': self.data_clean.index.min( ), # Starting timestamp of training data 'input_end': self.data_clean.index.max( ), # Ending timestamp of training data 'acquisition_time': utils.utcnow(), 'metadata': { 'p': 2, 'd': 1, 'q': 0, 'dataset': dataset } } inserted = mongo.db.models.insert_one(mongo_doc) model_id = inserted.inserted_id self.pklize(model_id) return model_id
def job(): subscribers = db_helper.get_all_subscribers() if len(subscribers) == 0: print >> sys.stderr, "No subscribers to notify" return for s in subscribers: print >> sys.stderr, "Processing Subcscriber", s['_id'] notify_url = s['url'] predictions = s['predictions'] start_time = predictions['start_time'] end_time = predictions['end_time'] old_predictions = predictions['values'] # Use active model to predict same range. active_model = db_helper.get_active_model() print >> sys.stderr, "Using Active Model: {}".format(active_model) if active_model: model = ArimaModel(load=True, model_id=active_model) model_attributes = db_helper.get_model_by_id(active_model) K = utils.get_time_difference(model_attributes['input_end'], end_time) + 1 S = utils.get_time_difference(model_attributes['input_end'], start_time) if S > K: print >> sys.stderr, "Bad Request, start_time should be before end_time" return start_time = datetime.datetime.strptime(start_time, '%Y-%m-%dT%H:%M:%SZ') end_time = datetime.datetime.strptime(end_time, '%Y-%m-%dT%H:%M:%SZ') forecasted = model.forecast(K) # Discard first S predictions since they are not required in the response new_predictions = forecasted[S:] # Prepare JSON to be sent in case difference is significant new_json = [] new_values = [] t = start_time for p in new_predictions: new_values.append(p) new_json.append({'timestamp': t.strftime('%Y-%m-%dT%H:%M:%SZ'), 'requests': p}) t += datetime.timedelta(hours=1) distance = math.sqrt( sum( [(old - new)**2 for old, new in zip(old_predictions, new_predictions)] ) ) print >> sys.stderr, "Difference in predictions: ", distance # TODO: Change threshold value!! if distance >= 1: # Invalidate old predictions db_helper.invalidate_predictions(predictions['_id']) # Save new predictions: to_save = { 'start_time': start_time, 'end_time': end_time, 'values': new_values, 'granularity': 'hour', 'is_valid': True, 'generated_at': utils.utcnow(), 'model': active_model } new_predictions_id = db_helper.save_predictions(to_save) response = { 'id': new_predictions_id, 'values': new_json, 'start_time': start_time.strftime('%Y-%m-%dT%H:%M:%SZ'), 'end_time': end_time.strftime('%Y-%m-%dT%H:%M:%SZ') } # Notify subscriber try: fail = False result = requests.post(notify_url, json=response) except Exception as e: fail = True result = 'Error sending POST request to {}: {}'.format(notify_url, e.message) print >> sys.stderr, "Notified {}".format(notify_url) print >> sys.stderr, "Result {}".format(result) if not fail and result.status_code == 200: # Update subscriber only if request was successfully received by subscriber updated = db_helper.update_subscriber_predictions(s['_id'], new_predictions_id) print >> sys.stderr, updated else: print >> sys.stderr, """