def recommend(): try: data = request.json user_history = parse_data(data) app.logger.error(user_history) if not user_history: return jsonify(baseline) else: inp = preprocess(user_history) resp = requests.post( 'http://localhost:8501/v1/models/model:predict', json=inp) outputs = resp.json()['outputs'][0] items_to_recommend = np.argsort(outputs)[::-1][:30] predictions = product_tokenizer.convert_ids_to_tokens( items_to_recommend) predictions = [pred for pred in predictions if pred[0] != '['] # remove [SEP], [CLS], etc from predictions app.logger.error(predictions) predictions = predictions + [ base_task for base_task in baseline["recommended_products"] if base_task not in predictions ] return jsonify({"recommended_products": predictions[:30]}) except Exception as e: app.logger.error(e) return jsonify(baseline)
def recommend(): try: data = request.get_json(force=True) user_id, user_frame = parse_data(data) products = user_frame["product_id"].unique() if not user_id: return jsonify(baseline) return jsonify({ 'recommended_products': [ pred[1] for pred in predict_user(model, user_id, products, product_dict, reverse_product_dict, matrix_shape) ] }) except Exception as e: return jsonify(baseline)
def recommend(): try: data = request.get_json(force=True) user_id, user_frame = parse_data(data) if not user_id: return jsonify(baseline) return jsonify({ 'recommended_products': list( model.recommend(users=[user_id], exclude_known=False, new_observation_data=user_frame, k=30)["product_id"]) }) except Exception as e: raise return jsonify(baseline)
def parse(self, response): soup = soupify(html=response.css('#bodyarea').extract_first()) containers = soup.find( id=re.compile('quickModForm')).table.find_all(filter_message) rows = [] for row in containers: data = parse_data(row) if data is not None: rows.append(data) self.secret = hash(self.secret + 100) open('out/bitcointalk.' + str(abs(self.secret)) + '.json', 'w+') with open('out/bitcointalk.' + str(abs(self.secret)) + '.json', 'w') as outfile: json.dump(rows, outfile) next_page = response.css( 'table .prevnext:nth-child(n+2) a ::attr(href)').extract_first() if next_page: yield scrapy.Request(response.urljoin(next_page), callback=self.parse)
def debug_print_sensordata(sensor_data): for sens_data in sensor_data: if sens_data.get_name() == "radar": x, y, vx, vy = sens_data.get() print('x={}, y={}, vx={}, vy={}'.format(x, y, vx, vy)) if __name__ == '__main__': #get the ground truths and the measurement data from input file data-1.txt clean_all_sensor_data = [] all_ground_truths = [] clean_all_state_estimations = [] clean_all_sensor_data, all_ground_truths = parse_data("data/data-1.txt") print('plain radar sensor data\n') # debug_print_sensordata(all_sensor_data) #get the predictions from the EKF class clean_all_state_estimations = get_state_estimations( EKF1, clean_all_sensor_data) #calculate the RMSE between the estimations and ground truths px, py, vx, vy = get_RMSE(clean_all_state_estimations, all_ground_truths) #print RMSE # print('\n') print('RMSE: {},{},{},{}'.format(px, py, vx, vy)) print('\n') # print('\n') #print the EKF data # print_EKF_data(all_sensor_data, all_ground_truths, all_state_estimations,
def preprocess(train_file, test_file, limit=None, remove_low_variance=True, remove_outliers=True): train_df = pd.read_csv(train_file) test_df = pd.read_csv(test_file) if limit is None: limit = len(train_df) if 0 < limit < len(train_df): print('Limited Sample: ' + str(limit)) train_df = train_df.sample(n=limit) train_df = helpers.parse_data(train_df) test_df = helpers.parse_data(test_df) keepColumns = ['QuoteConversion_Flag'] train_df, keepColumns = helpers.categorical_to_many(train_df, ['Geographic_info5'], keepColumns) test_df,a = helpers.categorical_to_many(test_df, ['Geographic_info5'], keepColumns) # Fill up train and test frame to have the same column length for key in list(set(train_df.keys()) - set(test_df.keys())): test_df.loc[:, key] = pd.Series(np.zeros(len(test_df['Original_Quote_Date'])), index=test_df.index) for key in list(set(test_df.keys()) - set(train_df.keys())): train_df.loc[:, key] = pd.Series(np.zeros(len(train_df['Original_Quote_Date'])), index=train_df.index) # Feature Pre-Selection # Drop Personal_info5, it has lot of empty values train_df.drop(columns=['Personal_info5'], inplace=True) test_df.drop(columns=['Personal_info5'], inplace=True) # Remove Rows with empty values train_df.dropna(inplace=True) # Fill empty values in test dataset, both are YN-Values, replace with previous value test_df.fillna(method='ffill', inplace=True) if remove_low_variance: train_df, removed_columns = helpers.remove_low_variance(train_df, keepColumns) test_df.drop(columns=removed_columns, inplace=True) print('DataFrame shape after feature selection:' + str(train_df.shape)) # Detect and Remove outliers if remove_outliers: train_df = helpers.remove_outliers(train_df) print('DataFrame shape after outlier removal:' + str(train_df.shape)) # Extract dependent variable from dataset train_dv = np.array(train_df['QuoteConversion_Flag']) train_data = np.array(train_df.drop(columns=['QuoteConversion_Flag', 'Quote_ID'])) test_data = np.array(test_df.drop(columns=['QuoteConversion_Flag','Quote_ID'])) # Scale things standard_scaler = StandardScaler() standard_scaler.fit(train_data) train_data = standard_scaler.transform(train_data) test_data = standard_scaler.transform(test_data) # Normalize it to be more gaussian train_data = normalize(train_data, return_norm=False, axis=0) test_data = normalize(test_data, return_norm=False, axis=0) return train_dv, train_data, test_data, train_df, test_df
from torch.autograd import Variable import torch.optim as optim from scipy import sparse from collections import namedtuple # Create parser for input arguments PARSER = argparse.ArgumentParser(description='Gene tagging with RNN model') PARSER.add_argument('training_set_path', help='path of training set') PARSER.add_argument('test_set_path', help='path of test set') # Parse arguments and display the parsed arguments ARGS = PARSER.parse_args() ARGS.dev_set_path = 'data/dev.tag' # Parse data sets into word and tag sequences TRAIN_IDENTIFIERS, TRAIN_X_RAW, TRAIN_Y = helpers.parse_data( ARGS.training_set_path) DEV_IDENTIFIERS, DEV_X_RAW, DEV_Y = helpers.parse_data(ARGS.dev_set_path) TEST_IDENTIFIERS, TEST_X_RAW, TEST_Y = helpers.parse_data(ARGS.test_set_path) DEV_IDENTIFIERS, DEV_X_RAW, DEV_Y = TEST_IDENTIFIERS, TEST_X_RAW, TEST_Y # Run the model N_WORDS = [0] N_CHARS = [0] N_TAGS = [1] NGRAMS_MAX = [2] HIDDEN_DIM = 100 BATCH_SIZE = 128 MAX_EPOCHS = 7 MAX_SEQ_LENGTH = 30 LEARNING_RATE = 1E-3 WEIGHT_DECAY = 1E-5
def print_variances(pxs, pys, vxs, vys, rhos, phis, drhos): print("x:", get_variance(pxs)) print("y:", get_variance(pys)) print("vx:", get_variance(vxs)) print("vy:", get_variance(vys)) print("rho:", get_variance(rhos)) print("phi:", get_variance(phis)) print("drho:", get_variance(drhos)) print() if __name__ == "__main__": all_sensor_data1, all_ground_truths1 = parse_data("data/data-1.txt") pxs1, pys1, vxs1, vys1, rhos1, phis1, drhos1 = get_all_differences( all_sensor_data1, all_ground_truths1) print("Variances from: data-1.txt") print_variances(pxs1, pys1, vxs1, vys1, rhos1, phis1, drhos1) all_sensor_data2, all_ground_truths2 = parse_data("data/data-2.txt") pxs2, pys2, vxs2, vys2, rhos2, phis2, drhos2 = get_all_differences( all_sensor_data2, all_ground_truths2) print("Variances from: data-2.txt") print_variances(pxs2, pys2, vxs2, vys2, rhos2, phis2, drhos2) print("Combined variances") print_variances(pxs1 + pxs2, pys1 + pys2, vxs1 + vxs2, vys1 + vys2,