Exemple #1
0
def runMain():
    print('Running ...')
    try:
        data = readData(INPUT_FILE_NAME) 
    except Exception as e:
        print('ERROR 1: File I/O Error While Reading Input Data! App terminating ...')
        return


    try:
        dataOutputList = processData(HEADERS_LIST, data)
    except Exception as f:
        print('ERROR 2: Error Processing Data! App terminating ...')
        return

    try:
        writeOutputToFile(OUTPUT_FILE_NAME, HEADERS_LIST, dataOutputList)
        writeData(HEADERS_LIST, 1)
        for element in dataOutputList:
            writeData(element, 0)

    except Exception as g:
        print('ERROR 3: File I/O Error While Writing Output Data! App terminating ...')
        print(g)
        return
Exemple #2
0
def getPreds(df, out=None):
    # GPU won't work without the next three lines
    physical_devices = tf.config.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0],
                                                 enable=True)
    dp = DataProcessor()

    test_articles = processData(df, ['body']).to_numpy()
    test_articles = list(map(lambda x: x[0], test_articles))

    test_articles_raw = df.to_numpy()
    test_articles_raw = list(map(lambda x: x[0], test_articles_raw))

    with open('./onion_tokenizer.pyc', 'rb') as pickleHand:
        tokenizer = pickle.load(pickleHand)
    assert isinstance(tokenizer, Tokenizer)

    seqs = test_articles
    max_len = dp.getMaxWords()
    seqs = tokenizer.texts_to_sequences(seqs)
    seqs = pad_sequences(seqs, max_len)
    model = keras.models.load_model('static/onion_connoisseur.h5')
    assert isinstance(model, keras.models.Model)
    print(test_articles)
    predVals = model.predict(seqs)
    preds = list(map(lambda x: "Real" if x < 0.75 else "Fake", predVals))
    print(preds)
    if out:
        with open('predictions.csv', 'w', encoding='utf-8') as outHand:
            out = csv.writer(outHand)
            for i in range(0, len(preds)):
                out.writerow([test_articles_raw[i], preds[i], predVals[i]])

    return [preds, predVals]
def getStockData():
    stock = request.args.get('stock', default="IBM")
    method = int(request.args.get('method', default="1"))
    print(stock)
    print(method)

    quandl.ApiConfig.api_key = "M46EXcBvFPiHWDrdAFnY"  #"qWcicxSctVxrP9PhyneG"
    apiData = quandl.get('WIKI/' + stock)

    X_model, y_model, X_predict, y_actual, model_ts_train, model_ts_test = processData(
        apiData)

    prediction, accuracy, rmse, result = None, None, None, None

    if method == 1:
        prediction, accuracy = LinearRegression(X_model, y_model, X_predict)
    elif method == 2:
        prediction, accuracy = BayesianRidge(X_model, y_model, X_predict)
    elif method == 3:
        prediction, accuracy = RidgeRegression(X_model, y_model, X_predict)
    elif method == 4:
        prediction, accuracy = SupportVectorMachine(X_model, y_model,
                                                    X_predict)
    elif method == 5:
        prediction, rmse = ARIMARegression(model_ts_train, model_ts_test)
    elif method == 6:
        prediction, rmse = LSTMRegression(model_ts_train, model_ts_test)
    elif method == 7:
        prediction, accuracy = ARDRegression(X_model, y_model, X_predict)
    elif method == 8:
        prediction, accuracy = ElasticNet(X_model, y_model, X_predict)
    else:
        pass

    if accuracy is None:
        result = "RMSE: " + str(rmse)
    else:
        result = "Accuracy: " + str(accuracy)
    print(result)

    return packageData(y_actual, prediction, result)
def LSTMRegression(train_prep, test_prep):

    stock = "AAPL"

    quandl.ApiConfig.api_key = "M46EXcBvFPiHWDrdAFnY"   #"qWcicxSctVxrP9PhyneG"
    apiData = quandl.get('WIKI/' + stock)

    _, _, _, _, train_prep, test_prep = processData(apiData)

    # transform data to be stationary
    train_raw, test_raw = train_prep.values, test_prep.values
    train_diff, test_diff = difference(train_raw, 1), difference(test_raw, 1)
    
    # transform data to be supervised learning
    train_supervised, test_supervised = timeseries_to_supervised(train_diff, 1), timeseries_to_supervised(test_diff, 1)
    train, test = train_supervised.values, test_supervised.values
    
    # transform the scale of the data
    scaler, train_scaled, test_scaled = scale(train, test)
    
    # fit the model with 4 LSTM neurons for batch of 1 and 3000 epochs 
    lstm_model = fit_model(train_scaled, 1, 1500, 4)

    # forecast the entire training dataset to build up state for forecasting
    train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1)

    # seed the state by making a prediction on all samples 
    # in the training dataset so that  the internal state 
    # be set up ready to forecast the next time step
    lstm_model.predict(train_reshaped, batch_size=1)
    
    # walk-forward validation on the test data
    predictions = list()

    # iteratively predict on each element in test set
    for i in range(len(test_scaled)):
        
        # make one-step forecast
        X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
        
        # make the prediction
        yhat = forecast(lstm_model, 1, X)
        
        # invert scaling and differencing
        yhat = invert_scale(scaler, X, yhat)
        yhat = inverse_difference(np.append(train_raw, test_raw), yhat, len(test_scaled)+1-i)

        # store forecast
        predictions.append(yhat)
        
        # print result
        expected = test_raw[i + 1]
        print('Month=%d, Predicted=%f, Expected=%f' % (i + 1, yhat, expected))

    # report model performance
    rmse = sqrt(mean_squared_error(test_raw, predictions))
    return predictions, rmse
    print('Test RMSE: %.3f' % rmse)

    # line plot of observed vs predicted
    pyplot.plot(test_raw)
    pyplot.plot(predictions)
    pyplot.show()
Exemple #5
0
def main():
    df = parseData(sys.argv[1])
    #print(df)
    df = processData(df)
    #print(df)
    drawFig(df, 'time')
Exemple #6
0
    def __init__(self, raw_data, config):
        self.conf = config
        self.raw_data = raw_data
        self.keys = self.raw_data.keys()  #this is the questions ids
        self.vocab = utils.rawDataToVocabulary(raw_data)
        self.data = utils.processData(raw_data, self.vocab)
        self.vocab_size = len(self.vocab)
        self.batch_size = config['batch_size']
        self.embedding_dim = config['embedding_dim']
        self.delta = float(config['delta'])

        self.delta_vec = self.delta * tf.ones(self.batch_size, dtype=float32)

        self.zero_batch = tf.zeros(self.batch_size, tf.float32)

        #the embedding part. We will learn it as part of the model
        self.w = tf.Variable(tf.random_uniform(
            [self.vocab_size, self.embedding_dim], -1.0, 1.0),
                             name='W')
        self.w_init = tf.Variable(self.w.initialized_value(), name='W_init')

        self.extended_w = tf.concat((self.w, tf.zeros(
            (1, self.embedding_dim))),
                                    axis=0)

        #the main lstm cell. The second true argument sets peephole like the model used by Weiting at el.
        self.lstm_cell = tf.contrib.rnn.LSTMCell(self.embedding_dim, True)

        #train input placeholder - each input is a list of embedding indices represent the sentence
        self.x = tf.placeholder(tf.int32, [None, None], name='x')
        self.z_con = tf.placeholder(tf.int32, [None, None], name='z_con')
        self.z_incon = tf.placeholder(tf.int32, [None, None], name='z_incon')

        #transfer the input to lost embedding vectors we can feed the net with
        self.x_embeds = tf.nn.embedding_lookup(self.extended_w, self.x)
        z_con_embeds = tf.nn.embedding_lookup(self.extended_w, self.z_con)
        z_incon_embeds = tf.nn.embedding_lookup(self.extended_w, self.z_incon)

        #now we need placeholder for the sequence sizes - this is because the data is padded with dummy value
        #and we want the output to be correct (takes on the real end of the sequence, not containing the dummy value)
        self.x_seq_len = tf.placeholder(dtype=tf.int32,
                                        shape=(None),
                                        name='x_seq_len')
        self.z_con_seq_len = tf.placeholder(dtype=tf.int32, shape=(None))
        self.z_incon_seq_len = tf.placeholder(dtype=tf.int32, shape=(None))

        #now run the lstm to get the outputs. Note that output shape is batch_size * sequence_len * embedding_dim
        #sequence len is the max len of a sentence in the batch
        #last_state is a tuple with the following values:
        # last_state.h - this is the last output according to the sequence length parameter (this is h_t)
        # last_state.c - this is the last cell state according to the sequence length parameter (this is c_t)

        _, self.x_last_state = tf.nn.dynamic_rnn(
            cell=self.lstm_cell,
            dtype=tf.float32,
            sequence_length=self.x_seq_len,
            inputs=self.x_embeds)

        _, self.z_con_last_state = tf.nn.dynamic_rnn(
            cell=self.lstm_cell,
            dtype=tf.float32,
            sequence_length=self.z_con_seq_len,
            inputs=z_con_embeds)

        _, self.z_incon_last_state = tf.nn.dynamic_rnn(
            cell=self.lstm_cell,
            dtype=tf.float32,
            sequence_length=self.z_incon_seq_len,
            inputs=z_incon_embeds)

        #compute the loss from the correct outputs of the batches
        self.x_z_con_sim = self.cosineSim(self.x_last_state.h,
                                          self.z_con_last_state.h)
        self.x_z_incon_sim = self.cosineSim(self.x_last_state.h,
                                            self.z_incon_last_state.h)

        self.loss_vec = tf.maximum(
            self.zero_batch,
            self.delta - self.x_z_con_sim + self.x_z_incon_sim)
        self.loss1 = tf.reduce_mean(self.loss_vec)
        self.loss2 = self.getRegularizationLoss()
        self.loss = self.loss1 + self.loss2

        #evaluation hack to enable tensorflow's Java api support
        self.y1 = tf.add(self.x_last_state.h, self.x_last_state.h, name='y')
        self.y2 = tf.subtract(self.y1, self.x_last_state.h, name='embd')

        #add subgrapgh that just calculate the embeddings of sinle sentence
        self.input = tf.placeholder(tf.int32, [None, None])
        embeds = tf.nn.embedding_lookup(self.extended_w, self.input)
        _, res = tf.nn.dynamic_rnn(cell=self.lstm_cell,
                                   dtype=tf.float32,
                                   inputs=embeds)
        self.eval = res.h

        self.saver = tf.train.Saver()
Exemple #7
0
from flask import send_file
import io
import ConfigParser

app = Flask(__name__)

CONFIG_FILE = "/var/www/FLASKAPPS/services/config..ini"

# Load the configuration file
with open(CONFIG_FILE, 'r+') as f:
    sample_config = f.read()
config = ConfigParser.RawConfigParser(allow_no_value=True)
config.readfp(io.BytesIO(sample_config))

# External functions from utils.py
data_invoice = utils.processData()
print data_invoice
invoices = []
decoded_data_invoice = json.loads(data_invoice)


# This function get an invoice for an user id
@app.route('/invoice/user/<int:id_user>', methods=['GET'])
def get_user_invoice(id_user):
    invoices.append(decoded_data_invoice)
    for i in invoices:
        if str(i['invoice']['id_user']) == str(id_user):
            return jsonify({"invoice": i['invoice']})
        else:
            abort(404)