Ejemplo n.º 1
0
def backgroundAutoArima(task_id, dataset):
    # AutoArima takes too long to train. We train on a separate thread and return the result
    db_helper.update_task_by_id(task_id, status='running', started_at=utcnow())
    model = autoarima.AutoArimaModel(dataset=dataset)
    model.train_model(model.training_data, model.testing_data)
    model_id = model.save_model(dataset)
    db_helper.update_task_by_id(task_id,
                                status='finished',
                                model_id=model_id,
                                completed_at=utcnow())
Ejemplo n.º 2
0
 def save_model(self, dataset):
     # Make sure we have a saved model
     if self.model_fit:
         # Write model to MongoDB
         mongo_doc = {
             'algorithm': 'AutoARIMA',
             'input_source':
             'csv',  # Whether this model is trained from a batch CSV or Influx
             # Starting timestamp of training data
             'input_start': self.data_clean.index.min(),
             'input_end': self.data_clean.index.max(
             ),  # Ending timestamp of training data
             'acquisition_time': utils.utcnow(),
             'metadata': {
                 'start_p': 1,
                 'start_q': 1,
                 'max_p': 5,
                 'max_q': 5,
                 'm': 12,
                 'start_P': 0,
                 'seasonal': True,
                 'd': 1,
                 'D': 1,
                 'trace': True,
                 'error_action': 'ignore',
                 'suppress_warnings': True,
                 'stepwise': True,
                 'dataset': dataset
             }
         }
         inserted = mongo.db.models.insert_one(mongo_doc)
         model_id = inserted.inserted_id
         self.pklize(model_id)
         return model_id
Ejemplo n.º 3
0
def invalidate_predictions(pred_id):
    # Returns a Prediction as a JSON doc
    query_result = mongo.db.predictions.find_one_and_update(
        {'_id': ObjectId(pred_id)},
        {'$set': {
            "is_valid": False,
            "invalidated_at": utcnow()
        }})
    encoder = MongoEncoder()
    return json.loads(encoder.encode(query_result))
Ejemplo n.º 4
0
    def get(self, id):
        subscriber = db_helper.get_subscriber_by_id(id)
        if subscriber is None:
            return flask.jsonify(
                {'error': 'No subscriber with ID {}'.format(id)})

        predictions = subscriber['predictions']
        thresholds = subscriber['thresholds']
        notified_at = subscriber['notified_at'] or []
        url = subscriber['url']

        mock = []
        ts = datetime.datetime.strptime(predictions['start_time'],
                                        '%Y-%m-%dT%H:%M:%SZ')
        for p, t in zip(predictions['values'], thresholds):
            mock.append({
                'timestamp':
                ts.strftime('%Y-%m-%dT%H:%M:%SZ'),
                'value':
                p + (t * random.choice([1, 0])) + (random.random() * p)
            })
            ts += datetime.timedelta(hours=1)

        response = {
            'id': 'newMockId',
            'values': mock,
            'start_time': predictions['start_time'],
            'end_time': predictions['end_time']
        }

        try:
            fail = False
            result = requests.post(url, json=response)
        except Exception as e:
            fail = True
            result = 'Error sending POST request to {}: {}'.format(
                url, e.message)

        if fail:
            mock_resonse = {'status': 'Fail', 'result': result}
        else:
            mock_resonse = {
                'url': url,
                'json': response,
                'message': 'Attempted',
                'result': result.text
            }
            mongo.db.subscribers.update_one(
                {'id': id},
                {'$set': {
                    'notified_at': notified_at + [utils.utcnow()]
                }})
        return flask.jsonify(mock_resonse)
Ejemplo n.º 5
0
def update_model_input(model_id, input_end):
    query_result = mongo.db.models.find_one_and_update(
        {'_id': ObjectId(model_id)}, {
            "$set": {
                "input_end": pd.datetime.strptime(input_end,
                                                  '%Y-%m-%dT%H:%M:%SZ'),
                "acquisition_time": utcnow(),
                "input_source": "influx"
            }
        })
    if query_result:
        return True
    return False
Ejemplo n.º 6
0
def update_subscriber_predictions(sub_id, pred_id):
    subscriber_data = get_subscriber_by_id(sub_id)
    notified_at = subscriber_data['notified_at']
    notified_at.append(utcnow())

    query_result = mongo.db.subscribers.find_one_and_update(
        {'_id': ObjectId(sub_id)}, {
            "$set": {
                "predictions": get_predictions_by_id(pred_id),
                "notified_at": notified_at
            }
        })
    encoder = MongoEncoder()
    return json.loads(encoder.encode(query_result))
Ejemplo n.º 7
0
    def post(self):
        args = makeSubscribeParser().parse_args()
        try:
            # Find predictions stored in DB ans make sure they're valid
            predictions = db_helper.get_predictions_by_id(args.predictions_id)
            if predictions is None:
                return flask.jsonify({
                    'status':
                    '500',
                    'error':
                    'No predictions with id {} '.format(args.predictions_id)
                })

            if predictions['is_valid'] is False:
                return flask.jsonify({
                    'status':
                    '500',
                    'error':
                    'Predictions with id {} were invalidated at {}'.format(
                        args.predictions_id, predictions['invalidated_at'])
                })

            # Save subscriber
            doc = {
                'url': args.url,
                'predictions': predictions,
                'registered_at': utils.utcnow(),
                'notified_at': []
            }

            inserted = mongo.db.subscribers.insert_one(doc)
            if inserted.inserted_id:
                return flask.jsonify({
                    'registered': True,
                    'id': str(inserted.inserted_id)
                })

        except Exception as e:
            return flask.jsonify({'status': '500', 'error': e.message})
Ejemplo n.º 8
0
 def save_model(self, dataset):
     # Make sure we have a saved model
     if self.model_fit:
         # Write model to MongoDB
         mongo_doc = {
             'algorithm': 'ARIMA',
             'input_source':
             'csv',  # Whether this model is trained from a batch CSV or Influx
             'input_start': self.data_clean.index.min(
             ),  # Starting timestamp of training data
             'input_end': self.data_clean.index.max(
             ),  # Ending timestamp of training data
             'acquisition_time': utils.utcnow(),
             'metadata': {
                 'p': 2,
                 'd': 1,
                 'q': 0,
                 'dataset': dataset
             }
         }
         inserted = mongo.db.models.insert_one(mongo_doc)
         model_id = inserted.inserted_id
         self.pklize(model_id)
         return model_id
Ejemplo n.º 9
0
def job():
    subscribers = db_helper.get_all_subscribers()
    if len(subscribers) == 0:
        print >> sys.stderr, "No subscribers to notify"
        return

    for s in subscribers:
        print >> sys.stderr, "Processing Subcscriber", s['_id']
        notify_url = s['url']
        predictions = s['predictions']

        start_time = predictions['start_time']
        end_time = predictions['end_time']
        old_predictions = predictions['values']

        # Use active model to predict same range.
        active_model = db_helper.get_active_model()
        print >> sys.stderr, "Using Active Model: {}".format(active_model)
        if active_model:
            model = ArimaModel(load=True, model_id=active_model)
            model_attributes = db_helper.get_model_by_id(active_model)
            K = utils.get_time_difference(model_attributes['input_end'], end_time) + 1
            S = utils.get_time_difference(model_attributes['input_end'], start_time)

            if S > K:
                print >> sys.stderr, "Bad Request, start_time should be before end_time"
                return

            start_time = datetime.datetime.strptime(start_time, '%Y-%m-%dT%H:%M:%SZ')
            end_time = datetime.datetime.strptime(end_time, '%Y-%m-%dT%H:%M:%SZ')

            forecasted = model.forecast(K)
            # Discard first S predictions since they are not required in the response
            new_predictions = forecasted[S:]

            # Prepare JSON to be sent in case difference is significant
            new_json = []
            new_values = []
            t = start_time
            for p in new_predictions:
                new_values.append(p)
                new_json.append({'timestamp': t.strftime('%Y-%m-%dT%H:%M:%SZ'), 'requests': p})
                t += datetime.timedelta(hours=1)

            distance = math.sqrt(
                sum(
                    [(old - new)**2 for old, new in zip(old_predictions, new_predictions)]
                )
            )
            print >> sys.stderr, "Difference in predictions: ", distance
            # TODO: Change threshold value!!
            if distance >= 1:
                # Invalidate old predictions
                db_helper.invalidate_predictions(predictions['_id'])
                # Save new predictions:
                to_save = {
                    'start_time': start_time,
                    'end_time': end_time,
                    'values': new_values,
                    'granularity': 'hour',
                    'is_valid': True,
                    'generated_at': utils.utcnow(),
                    'model': active_model
                }

                new_predictions_id = db_helper.save_predictions(to_save)
                response = {
                    'id': new_predictions_id,
                    'values': new_json,
                    'start_time': start_time.strftime('%Y-%m-%dT%H:%M:%SZ'),
                    'end_time': end_time.strftime('%Y-%m-%dT%H:%M:%SZ')

                }
                # Notify subscriber
                try:
                    fail = False
                    result = requests.post(notify_url, json=response)
                except Exception as e:
                    fail = True
                    result = 'Error sending POST request to {}: {}'.format(notify_url, e.message)

                print >> sys.stderr, "Notified {}".format(notify_url)
                print >> sys.stderr, "Result {}".format(result)

                if not fail and result.status_code == 200:
                    # Update subscriber only if request was successfully received by subscriber
                    updated = db_helper.update_subscriber_predictions(s['_id'], new_predictions_id)
                    print >> sys.stderr, updated

        else:
            print >> sys.stderr, """