def runMain(): print('Running ...') try: data = readData(INPUT_FILE_NAME) except Exception as e: print('ERROR 1: File I/O Error While Reading Input Data! App terminating ...') return try: dataOutputList = processData(HEADERS_LIST, data) except Exception as f: print('ERROR 2: Error Processing Data! App terminating ...') return try: writeOutputToFile(OUTPUT_FILE_NAME, HEADERS_LIST, dataOutputList) writeData(HEADERS_LIST, 1) for element in dataOutputList: writeData(element, 0) except Exception as g: print('ERROR 3: File I/O Error While Writing Output Data! App terminating ...') print(g) return
def getPreds(df, out=None): # GPU won't work without the next three lines physical_devices = tf.config.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], enable=True) dp = DataProcessor() test_articles = processData(df, ['body']).to_numpy() test_articles = list(map(lambda x: x[0], test_articles)) test_articles_raw = df.to_numpy() test_articles_raw = list(map(lambda x: x[0], test_articles_raw)) with open('./onion_tokenizer.pyc', 'rb') as pickleHand: tokenizer = pickle.load(pickleHand) assert isinstance(tokenizer, Tokenizer) seqs = test_articles max_len = dp.getMaxWords() seqs = tokenizer.texts_to_sequences(seqs) seqs = pad_sequences(seqs, max_len) model = keras.models.load_model('static/onion_connoisseur.h5') assert isinstance(model, keras.models.Model) print(test_articles) predVals = model.predict(seqs) preds = list(map(lambda x: "Real" if x < 0.75 else "Fake", predVals)) print(preds) if out: with open('predictions.csv', 'w', encoding='utf-8') as outHand: out = csv.writer(outHand) for i in range(0, len(preds)): out.writerow([test_articles_raw[i], preds[i], predVals[i]]) return [preds, predVals]
def getStockData(): stock = request.args.get('stock', default="IBM") method = int(request.args.get('method', default="1")) print(stock) print(method) quandl.ApiConfig.api_key = "M46EXcBvFPiHWDrdAFnY" #"qWcicxSctVxrP9PhyneG" apiData = quandl.get('WIKI/' + stock) X_model, y_model, X_predict, y_actual, model_ts_train, model_ts_test = processData( apiData) prediction, accuracy, rmse, result = None, None, None, None if method == 1: prediction, accuracy = LinearRegression(X_model, y_model, X_predict) elif method == 2: prediction, accuracy = BayesianRidge(X_model, y_model, X_predict) elif method == 3: prediction, accuracy = RidgeRegression(X_model, y_model, X_predict) elif method == 4: prediction, accuracy = SupportVectorMachine(X_model, y_model, X_predict) elif method == 5: prediction, rmse = ARIMARegression(model_ts_train, model_ts_test) elif method == 6: prediction, rmse = LSTMRegression(model_ts_train, model_ts_test) elif method == 7: prediction, accuracy = ARDRegression(X_model, y_model, X_predict) elif method == 8: prediction, accuracy = ElasticNet(X_model, y_model, X_predict) else: pass if accuracy is None: result = "RMSE: " + str(rmse) else: result = "Accuracy: " + str(accuracy) print(result) return packageData(y_actual, prediction, result)
def LSTMRegression(train_prep, test_prep): stock = "AAPL" quandl.ApiConfig.api_key = "M46EXcBvFPiHWDrdAFnY" #"qWcicxSctVxrP9PhyneG" apiData = quandl.get('WIKI/' + stock) _, _, _, _, train_prep, test_prep = processData(apiData) # transform data to be stationary train_raw, test_raw = train_prep.values, test_prep.values train_diff, test_diff = difference(train_raw, 1), difference(test_raw, 1) # transform data to be supervised learning train_supervised, test_supervised = timeseries_to_supervised(train_diff, 1), timeseries_to_supervised(test_diff, 1) train, test = train_supervised.values, test_supervised.values # transform the scale of the data scaler, train_scaled, test_scaled = scale(train, test) # fit the model with 4 LSTM neurons for batch of 1 and 3000 epochs lstm_model = fit_model(train_scaled, 1, 1500, 4) # forecast the entire training dataset to build up state for forecasting train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1) # seed the state by making a prediction on all samples # in the training dataset so that the internal state # be set up ready to forecast the next time step lstm_model.predict(train_reshaped, batch_size=1) # walk-forward validation on the test data predictions = list() # iteratively predict on each element in test set for i in range(len(test_scaled)): # make one-step forecast X, y = test_scaled[i, 0:-1], test_scaled[i, -1] # make the prediction yhat = forecast(lstm_model, 1, X) # invert scaling and differencing yhat = invert_scale(scaler, X, yhat) yhat = inverse_difference(np.append(train_raw, test_raw), yhat, len(test_scaled)+1-i) # store forecast predictions.append(yhat) # print result expected = test_raw[i + 1] print('Month=%d, Predicted=%f, Expected=%f' % (i + 1, yhat, expected)) # report model performance rmse = sqrt(mean_squared_error(test_raw, predictions)) return predictions, rmse print('Test RMSE: %.3f' % rmse) # line plot of observed vs predicted pyplot.plot(test_raw) pyplot.plot(predictions) pyplot.show()
def main(): df = parseData(sys.argv[1]) #print(df) df = processData(df) #print(df) drawFig(df, 'time')
def __init__(self, raw_data, config): self.conf = config self.raw_data = raw_data self.keys = self.raw_data.keys() #this is the questions ids self.vocab = utils.rawDataToVocabulary(raw_data) self.data = utils.processData(raw_data, self.vocab) self.vocab_size = len(self.vocab) self.batch_size = config['batch_size'] self.embedding_dim = config['embedding_dim'] self.delta = float(config['delta']) self.delta_vec = self.delta * tf.ones(self.batch_size, dtype=float32) self.zero_batch = tf.zeros(self.batch_size, tf.float32) #the embedding part. We will learn it as part of the model self.w = tf.Variable(tf.random_uniform( [self.vocab_size, self.embedding_dim], -1.0, 1.0), name='W') self.w_init = tf.Variable(self.w.initialized_value(), name='W_init') self.extended_w = tf.concat((self.w, tf.zeros( (1, self.embedding_dim))), axis=0) #the main lstm cell. The second true argument sets peephole like the model used by Weiting at el. self.lstm_cell = tf.contrib.rnn.LSTMCell(self.embedding_dim, True) #train input placeholder - each input is a list of embedding indices represent the sentence self.x = tf.placeholder(tf.int32, [None, None], name='x') self.z_con = tf.placeholder(tf.int32, [None, None], name='z_con') self.z_incon = tf.placeholder(tf.int32, [None, None], name='z_incon') #transfer the input to lost embedding vectors we can feed the net with self.x_embeds = tf.nn.embedding_lookup(self.extended_w, self.x) z_con_embeds = tf.nn.embedding_lookup(self.extended_w, self.z_con) z_incon_embeds = tf.nn.embedding_lookup(self.extended_w, self.z_incon) #now we need placeholder for the sequence sizes - this is because the data is padded with dummy value #and we want the output to be correct (takes on the real end of the sequence, not containing the dummy value) self.x_seq_len = tf.placeholder(dtype=tf.int32, shape=(None), name='x_seq_len') self.z_con_seq_len = tf.placeholder(dtype=tf.int32, shape=(None)) self.z_incon_seq_len = tf.placeholder(dtype=tf.int32, shape=(None)) #now run the lstm to get the outputs. Note that output shape is batch_size * sequence_len * embedding_dim #sequence len is the max len of a sentence in the batch #last_state is a tuple with the following values: # last_state.h - this is the last output according to the sequence length parameter (this is h_t) # last_state.c - this is the last cell state according to the sequence length parameter (this is c_t) _, self.x_last_state = tf.nn.dynamic_rnn( cell=self.lstm_cell, dtype=tf.float32, sequence_length=self.x_seq_len, inputs=self.x_embeds) _, self.z_con_last_state = tf.nn.dynamic_rnn( cell=self.lstm_cell, dtype=tf.float32, sequence_length=self.z_con_seq_len, inputs=z_con_embeds) _, self.z_incon_last_state = tf.nn.dynamic_rnn( cell=self.lstm_cell, dtype=tf.float32, sequence_length=self.z_incon_seq_len, inputs=z_incon_embeds) #compute the loss from the correct outputs of the batches self.x_z_con_sim = self.cosineSim(self.x_last_state.h, self.z_con_last_state.h) self.x_z_incon_sim = self.cosineSim(self.x_last_state.h, self.z_incon_last_state.h) self.loss_vec = tf.maximum( self.zero_batch, self.delta - self.x_z_con_sim + self.x_z_incon_sim) self.loss1 = tf.reduce_mean(self.loss_vec) self.loss2 = self.getRegularizationLoss() self.loss = self.loss1 + self.loss2 #evaluation hack to enable tensorflow's Java api support self.y1 = tf.add(self.x_last_state.h, self.x_last_state.h, name='y') self.y2 = tf.subtract(self.y1, self.x_last_state.h, name='embd') #add subgrapgh that just calculate the embeddings of sinle sentence self.input = tf.placeholder(tf.int32, [None, None]) embeds = tf.nn.embedding_lookup(self.extended_w, self.input) _, res = tf.nn.dynamic_rnn(cell=self.lstm_cell, dtype=tf.float32, inputs=embeds) self.eval = res.h self.saver = tf.train.Saver()
from flask import send_file import io import ConfigParser app = Flask(__name__) CONFIG_FILE = "/var/www/FLASKAPPS/services/config..ini" # Load the configuration file with open(CONFIG_FILE, 'r+') as f: sample_config = f.read() config = ConfigParser.RawConfigParser(allow_no_value=True) config.readfp(io.BytesIO(sample_config)) # External functions from utils.py data_invoice = utils.processData() print data_invoice invoices = [] decoded_data_invoice = json.loads(data_invoice) # This function get an invoice for an user id @app.route('/invoice/user/<int:id_user>', methods=['GET']) def get_user_invoice(id_user): invoices.append(decoded_data_invoice) for i in invoices: if str(i['invoice']['id_user']) == str(id_user): return jsonify({"invoice": i['invoice']}) else: abort(404)