Esempio n. 1
0
def recommend():
    try:
        data = request.json
        user_history = parse_data(data)
        app.logger.error(user_history)
        
        if not user_history:
            return jsonify(baseline)
        else:
            inp = preprocess(user_history)
            resp = requests.post(
                'http://localhost:8501/v1/models/model:predict', json=inp)
            outputs = resp.json()['outputs'][0]
            items_to_recommend = np.argsort(outputs)[::-1][:30]
            predictions = product_tokenizer.convert_ids_to_tokens(
                items_to_recommend)
            predictions = [pred for pred in predictions if pred[0] != '[']  # remove [SEP], [CLS], etc from predictions
            app.logger.error(predictions)

            predictions = predictions + [
                base_task
                for base_task in baseline["recommended_products"]
                if base_task not in predictions
            ]
            return jsonify({"recommended_products": predictions[:30]})
    except Exception as e:
        app.logger.error(e)
        return jsonify(baseline)
def recommend():
    try:
        data = request.get_json(force=True)
        user_id, user_frame = parse_data(data)
        products = user_frame["product_id"].unique()
        if not user_id:
            return jsonify(baseline)
        return jsonify({
            'recommended_products': [
                pred[1] for pred in
                predict_user(model, user_id, products, product_dict,
                             reverse_product_dict, matrix_shape)
            ]
        })
    except Exception as e:
        return jsonify(baseline)
def recommend():
    try:
        data = request.get_json(force=True)
        user_id, user_frame = parse_data(data)
        if not user_id:
            return jsonify(baseline)
        return jsonify({
            'recommended_products':
            list(
                model.recommend(users=[user_id],
                                exclude_known=False,
                                new_observation_data=user_frame,
                                k=30)["product_id"])
        })
    except Exception as e:
        raise
        return jsonify(baseline)
Esempio n. 4
0
    def parse(self, response):
        soup = soupify(html=response.css('#bodyarea').extract_first())
        containers = soup.find(
            id=re.compile('quickModForm')).table.find_all(filter_message)

        rows = []
        for row in containers:
            data = parse_data(row)

            if data is not None:
                rows.append(data)

        self.secret = hash(self.secret + 100)
        open('out/bitcointalk.' + str(abs(self.secret)) + '.json', 'w+')
        with open('out/bitcointalk.' + str(abs(self.secret)) + '.json',
                  'w') as outfile:
            json.dump(rows, outfile)

        next_page = response.css(
            'table .prevnext:nth-child(n+2) a ::attr(href)').extract_first()
        if next_page:
            yield scrapy.Request(response.urljoin(next_page),
                                 callback=self.parse)
def debug_print_sensordata(sensor_data):
    for sens_data in sensor_data:
        if sens_data.get_name() == "radar":
            x, y, vx, vy = sens_data.get()
            print('x={}, y={}, vx={}, vy={}'.format(x, y, vx, vy))


if __name__ == '__main__':

    #get the ground truths and the measurement data from input file data-1.txt
    clean_all_sensor_data = []
    all_ground_truths = []
    clean_all_state_estimations = []

    clean_all_sensor_data, all_ground_truths = parse_data("data/data-1.txt")
    print('plain radar sensor data\n')
    # debug_print_sensordata(all_sensor_data)
    #get the predictions from the EKF class
    clean_all_state_estimations = get_state_estimations(
        EKF1, clean_all_sensor_data)

    #calculate the RMSE between the estimations and ground truths
    px, py, vx, vy = get_RMSE(clean_all_state_estimations, all_ground_truths)
    #print RMSE
    # print('\n')
    print('RMSE: {},{},{},{}'.format(px, py, vx, vy))
    print('\n')
    # print('\n')
    #print the EKF data
    # print_EKF_data(all_sensor_data, all_ground_truths, all_state_estimations,
def preprocess(train_file, test_file, limit=None, remove_low_variance=True, remove_outliers=True):
    train_df = pd.read_csv(train_file)
    test_df = pd.read_csv(test_file)

    if limit is None:
        limit = len(train_df)
    if 0 < limit < len(train_df):
        print('Limited Sample: ' + str(limit))
        train_df = train_df.sample(n=limit)

    train_df = helpers.parse_data(train_df)
    test_df = helpers.parse_data(test_df)

    keepColumns = ['QuoteConversion_Flag']

    train_df, keepColumns = helpers.categorical_to_many(train_df, ['Geographic_info5'], keepColumns)
    test_df,a = helpers.categorical_to_many(test_df, ['Geographic_info5'], keepColumns)

    # Fill up train and test frame to have the same column length
    for key in list(set(train_df.keys()) - set(test_df.keys())):
        test_df.loc[:, key] = pd.Series(np.zeros(len(test_df['Original_Quote_Date'])), index=test_df.index)
    for key in list(set(test_df.keys()) - set(train_df.keys())):
        train_df.loc[:, key] = pd.Series(np.zeros(len(train_df['Original_Quote_Date'])), index=train_df.index)

    # Feature Pre-Selection

    # Drop Personal_info5, it has lot of empty values
    train_df.drop(columns=['Personal_info5'], inplace=True)
    test_df.drop(columns=['Personal_info5'], inplace=True)

    # Remove Rows with empty values
    train_df.dropna(inplace=True)
    # Fill empty values in test dataset, both are YN-Values, replace with previous value
    test_df.fillna(method='ffill', inplace=True)

    if remove_low_variance:
        train_df, removed_columns = helpers.remove_low_variance(train_df, keepColumns)
        test_df.drop(columns=removed_columns, inplace=True)

    print('DataFrame shape after feature selection:' + str(train_df.shape))

    # Detect and Remove outliers
    if remove_outliers:
        train_df = helpers.remove_outliers(train_df)

    print('DataFrame shape after outlier removal:' + str(train_df.shape))

    # Extract dependent variable from dataset
    train_dv = np.array(train_df['QuoteConversion_Flag'])
    train_data = np.array(train_df.drop(columns=['QuoteConversion_Flag', 'Quote_ID']))
    test_data = np.array(test_df.drop(columns=['QuoteConversion_Flag','Quote_ID']))


    # Scale things
    standard_scaler = StandardScaler()
    standard_scaler.fit(train_data)
    train_data = standard_scaler.transform(train_data)
    test_data = standard_scaler.transform(test_data)

    # Normalize it to be more gaussian
    train_data = normalize(train_data, return_norm=False, axis=0)
    test_data = normalize(test_data, return_norm=False, axis=0)

    return train_dv, train_data, test_data, train_df, test_df
Esempio n. 7
0
from torch.autograd import Variable
import torch.optim as optim
from scipy import sparse
from collections import namedtuple

# Create parser for input arguments
PARSER = argparse.ArgumentParser(description='Gene tagging with RNN model')
PARSER.add_argument('training_set_path', help='path of training set')
PARSER.add_argument('test_set_path', help='path of test set')

# Parse arguments and display the parsed arguments
ARGS = PARSER.parse_args()
ARGS.dev_set_path = 'data/dev.tag'

# Parse data sets into word and tag sequences
TRAIN_IDENTIFIERS, TRAIN_X_RAW, TRAIN_Y = helpers.parse_data(
    ARGS.training_set_path)
DEV_IDENTIFIERS, DEV_X_RAW, DEV_Y = helpers.parse_data(ARGS.dev_set_path)
TEST_IDENTIFIERS, TEST_X_RAW, TEST_Y = helpers.parse_data(ARGS.test_set_path)
DEV_IDENTIFIERS, DEV_X_RAW, DEV_Y = TEST_IDENTIFIERS, TEST_X_RAW, TEST_Y

# Run the model
N_WORDS = [0]
N_CHARS = [0]
N_TAGS = [1]
NGRAMS_MAX = [2]
HIDDEN_DIM = 100
BATCH_SIZE = 128
MAX_EPOCHS = 7
MAX_SEQ_LENGTH = 30
LEARNING_RATE = 1E-3
WEIGHT_DECAY = 1E-5

def print_variances(pxs, pys, vxs, vys, rhos, phis, drhos):
    print("x:", get_variance(pxs))
    print("y:", get_variance(pys))
    print("vx:", get_variance(vxs))
    print("vy:", get_variance(vys))
    print("rho:", get_variance(rhos))
    print("phi:", get_variance(phis))
    print("drho:", get_variance(drhos))
    print()


if __name__ == "__main__":

    all_sensor_data1, all_ground_truths1 = parse_data("data/data-1.txt")
    pxs1, pys1, vxs1, vys1, rhos1, phis1, drhos1 = get_all_differences(
        all_sensor_data1, all_ground_truths1)

    print("Variances from: data-1.txt")
    print_variances(pxs1, pys1, vxs1, vys1, rhos1, phis1, drhos1)

    all_sensor_data2, all_ground_truths2 = parse_data("data/data-2.txt")
    pxs2, pys2, vxs2, vys2, rhos2, phis2, drhos2 = get_all_differences(
        all_sensor_data2, all_ground_truths2)

    print("Variances from: data-2.txt")
    print_variances(pxs2, pys2, vxs2, vys2, rhos2, phis2, drhos2)

    print("Combined variances")
    print_variances(pxs1 + pxs2, pys1 + pys2, vxs1 + vxs2, vys1 + vys2,