def updatePredictions(download, preprocess, predictYN, leagues, firstSeason,
                      firstSeasonTest, lastSeason, train):

    if download:
        downloadFiles(firstSeason=firstSeason,
                      firstSeasonTest=firstSeasonTest,
                      lastSeason=lastSeason,
                      train=train,
                      leagues=leagues)
        print('Files downloaded!')

    if preprocess:
        preProcess(firstSeason=firstSeason,
                   firstSeasonTest=firstSeasonTest,
                   lastSeason=lastSeason,
                   train=train,
                   leagues=leagues)
        print('Preprocessing done!')

    if predictYN:
        predict(leagues=leagues)
        print('Predictions done!')

        uploadFileAzure(PREDICTED_FPATH, AZURE_CONNECTION_STRING,
                        AZURE_CONTAINER_NAME)
        print('Uploaded to Azure!')
Ejemplo n.º 2
0
 def predict(self,dfTrn,dfTest):
     #Create feature vectors
     self.features_create(dfTrn,dfTest)
     #Make predictions
     mtxTrn, mtxTest, mtxTrnTarget, mtxTestTarget = train.combine_features(self, dfTrn, dfTest)
     train.predict(mtxTrn,mtxTrnTarget.ravel(),mtxTest,dfTest,self)
     #Store predictions in dataframe as class attribute
     self.dfPredictions = dfTest.ix[:,['id',self.target]]
Ejemplo n.º 3
0
 def predict(self, dfTrn, dfTest):
     #Create feature vectors
     self.features_create(dfTrn, dfTest)
     #Make predictions
     mtxTrn, mtxTest, mtxTrnTarget, mtxTestTarget = train.combine_features(
         self, dfTrn, dfTest)
     train.predict(mtxTrn, mtxTrnTarget.ravel(), mtxTest, dfTest, self)
     #Store predictions in dataframe as class attribute
     self.dfPredictions = dfTest.ix[:, ['id', self.target]]
Ejemplo n.º 4
0
def main(args):
    mode = args.mode
    overwrite_flag = args.overwrite

    model_name = 'trajgru'
    data_folder = 'data'
    hurricane_path = os.path.join(data_folder, 'ibtracs.NA.list.v04r00.csv')
    results_folder = 'results'

    config_obj = Config(model_name)
    data = DataCreator(hurricane_path, **config_obj.data_params)
    hurricane_list, weather_list = data.hurricane_list, data.weather_list

    if mode == 'train':
        print("Starting experiments")
        for exp_count, conf in enumerate(config_obj.conf_list):
            print('\nExperiment {}'.format(exp_count))
            print('-*-' * 10)

            batch_generator = BatchGenerator(hurricane_list=hurricane_list,
                                             weather_list=weather_list,
                                             batch_size=conf["batch_size"],
                                             window_len=conf["window_len"],
                                             phase_shift=conf["phase_shift"],
                                             return_mode=conf['return_mode'],
                                             cut_start=conf['cut_start'],
                                             vector_mode=conf['vector_mode'],
                                             vector_freq=conf['vector_freq'],
                                             **config_obj.experiment_params)

            train(model_name, batch_generator, exp_count, overwrite_flag,
                  **conf)

    elif mode == 'test':
        best_model, best_conf, trainer = select_best_model(results_folder)

        batch_generator = BatchGenerator(hurricane_list=hurricane_list,
                                         weather_list=weather_list,
                                         batch_size=best_conf["batch_size"],
                                         window_len=best_conf["window_len"],
                                         phase_shift=best_conf["phase_shift"],
                                         return_mode=best_conf['return_mode'],
                                         cut_start=best_conf['cut_start'],
                                         vector_mode=best_conf['vector_mode'],
                                         vector_freq=best_conf['vector_freq'],
                                         **config_obj.experiment_params)

        print("Testing with best model...")
        predict(best_model, batch_generator, trainer)

    else:
        raise ValueError('input mode: {} is not found'.format(mode))
Ejemplo n.º 5
0
def main(_):
    opts = args_parser.opts
    D = opts.flag_values_dict()
    tf.logging.info("FLAGS: ")
    for key in D:
        tf.logging.info('{} = {}'.format(key, D[key]))

    if opts.run_mode == 'train':
        train.train(opts, export=False)
    elif opts.run_mode == 'predict':
        train.predict(opts)
    elif opts.run_mode == 'all':
        train.train(opts, export=False)
        train.predict(opts)
    else:
        raise ValueError("Unsupported run mode.")
Ejemplo n.º 6
0
def create_database_precision(kkk):
	conv	= cPickle.load(open("data/test_lenet","rb"))
	num_images_show = kkk
	fully 	= predict(weights, conv)
	pre 	= fully[-1]
	hiden1 	= fully[0]
	hiden2 	= fully[1]
	acc_pre = np.max(pre, axis = 1)
	label_pre 	= np.argmax(pre, axis = 1)
	number_query = label_pre.shape[0]

	
	data_pre 		= np.asarray(cPickle.load(open("database/"+"data_h2","rb")))
	sum_all_query = 0
	for i in range(number_query):
		data_softmax 	= np.linalg.norm(data_pre[:,2:data_pre.shape[-1]] -hiden2[i],ord=1, axis = 1)
		data_cosin 		= np.append(data_pre[:,:1], data_softmax.reshape(data_softmax.shape[0],1), axis = 1)
		data_select 	= data_cosin[np.argsort(data_cosin[:,-1])][0:num_images_show]
		array_label_select = (data_select[:,0]).astype(int)
		label_select 	= label_train_original[array_label_select]
		if i %100 == 0:
			print(i*100/number_query)
		sum_one_query = 0
		for j in range(label_select.shape[0]):
			if (label_select[j] == label_test_original[i] ):
				sum_all_query +=1

	sum_sum = sum_all_query/(number_query*label_select.shape[0])
	print(sum_sum)
Ejemplo n.º 7
0
def processImage(image, debug=False):
    """
    :param image: (bgr image)
    :param debug: (bool)
    :return:(float, float)
    """
    x, y = predict(model, image)
    if debug:
        return x, y

    # Compute bezier path and target point
    control_points = computeControlPoints(x, y, add_current_pos=True)
    target = bezier(TARGET_POINT, control_points)

    # Linear Regression to fit a line
    # It estimates the line curve

    # Case x = cst, m = 0
    if len(np.unique(x)) == 1:  # pragma: no cover
        turn_percent = 0
    else:
        # Linear regression using least squares method
        # x = m*y + b -> y = 1/m * x - b/m if m != 0
        A = np.vstack([y, np.ones(len(y))]).T
        m, b = np.linalg.lstsq(A, x, rcond=-1)[0]

        # Compute the angle between the reference and the fitted line
        track_angle = np.arctan(1 / m)
        diff_angle = abs(REF_ANGLE) - abs(track_angle)
        # Estimation of the line curvature
        turn_percent = (diff_angle / MAX_ANGLE) * 100
    return turn_percent, target[0]
Ejemplo n.º 8
0
 def predict(self, x, batch_sizes):
     # compute the siamese embeddings of the input data
     return train.predict(self.outputs['A'],
                          x_unlabeled=x,
                          inputs=self.orig_inputs,
                          y_true=self.y_true,
                          batch_sizes=batch_sizes)
Ejemplo n.º 9
0
def request_move(model, board, chance=2):
    use_random = random.random() < chance
    if use_random:
        prediction = torch.rand(7)
        return prediction
    else:
        return predict(model, board)
Ejemplo n.º 10
0
def algorithm(type, data, token):
    train.DFPreprocessing(data)
    result = train.predict(data);
    new_data = train.HeartRate(data)[1]
    for index,row in new_data.iterrows():
        saveData(token, "heartrate", row["heartRate"], row["timestamp"]);
    saveData(token, "predict", result);
    return result;
Ejemplo n.º 11
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    cur_dir = os.getcwd()
    if opt.predict:
        save_dir = cur_dir + '/../result/'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_path = save_dir + datetime.datetime.now().strftime("%Y_%m_%d_%H_%M") + ".csv"
        test_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='predict')
        test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False)
        predict(opt.model_path, test_loader, save_path, device)
        return

    
    train_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='train')
    train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True)
    test_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='test')
    test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False)
    need_feild = {"lr", "epoch", "batch_size"}
    log_name = "".join([k+"_"+str(v) for k,v in opt.__dict__.items() if k in need_feild])
    log_dir = cur_dir + '/../log/' + str(opt.dataset) + '/' + log_name
    model_dir = cur_dir + '/../model/' + str(opt.dataset)
    model_path = cur_dir + '/../model/' + str(opt.dataset) + '/' + log_name + '.pth'
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    logging.warning('logging to {}'.format(log_dir))
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    logging.warning('model save to {}'.format(log_dir))
    writer = SummaryWriter(log_dir)
    
    node_d = {'diginetica': 43097,  'yoochoose1_64': 37483,  'yoochoose1_4': 37483, 'debias': 117538}
    n_node =  node_d.get(opt.dataset, 309)
    model = GNNModel(hidden_size=opt.hidden_size, n_node=n_node).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.l2)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc)

    logging.warning(model)
    
    for epoch in tqdm(range(opt.epoch)):
        #scheduler.step()
        forward(model, train_loader, device, writer, epoch, scheduler, top_k=opt.top_k, optimizer=optimizer, train_flag=True)
        with torch.no_grad():
            forward(model, test_loader, device, writer, epoch, top_k=opt.top_k, train_flag=False)
    torch.save(model, model_path) 
Ejemplo n.º 12
0
 def test_predict(self):
     model = SimpleClassifier()
     puzzgen = SimplePuzzleGenerator()
     mgr = PuzzleDataLoader(puzzgen, 2, 2, 2, 2)
     _, loader = mgr.get_loaders(epoch=0)
     for (data, response) in loader:
         predictions = predict(model, data) 
         expected = tensor([0, 0])  # predicts choice 0 for all questions
         assert compare_tensors(predictions, expected)
Ejemplo n.º 13
0
def main(_):
    config = flags.FLAGS
    if config.length_num == 'auto':
        config.length_num = config.max_packet_length // config.length_block + 4
    else:
        config.length_num = int(config.length_num)
    if config.decay_step != 'auto':
        config.decay_step = int(config.decay_step)
    if config.mode == 'train':
        train.train(config)
    elif config.mode == 'prepro':
        preprocess.preprocess(config)
    elif config.mode == 'test':
        print(config.test_model_dir)
        train.predict(config)
    else:
        print('unknown mode, only support train now')
        raise Exception
Ejemplo n.º 14
0
def P3():
    print("hello")
    while(True):
        choice = input("Enter choice: \n\t1. Train\n\t2. Predict")
        if choice == 1:
            train()
        elif choice == 2:
            image = input("Enter Image name: ")
            predict(image)
        else:
            print("Enter a valid option!")

        yn = input("Do you want to continue?")
        if yn == 'y' or yn == 'Y':
            continue
        elif yn == 'n' or yn == 'N':
            break
        else:
            print("Enter a valid option!")
Ejemplo n.º 15
0
 def update(self, targets, outputs):
     probs = torch.sigmoid(outputs)
     dice, dice_neg, dice_pos, _, _ = metric(probs, targets,
                                             self.base_threshold)
     self.base_dice_scores.append(dice)
     self.dice_pos_scores.append(dice_pos)
     self.dice_neg_scores.append(dice_neg)
     preds = predict(probs, self.base_threshold)
     iou = compute_iou_batch(preds, targets, classes=[1])
     self.iou_scores.append(iou)
Ejemplo n.º 16
0
def recognize_number(digit):
    temp = digit.resize((28, 28))
    # print(np.array(temp))
    temp = Image.fromarray(255 - np.array(temp))
    # temp = preprocess(temp)
    temp.show()
    trans = transforms.Compose([transforms.ToTensor()])
    temp = trans(temp)
    temp = temp.view(1, 1, 28, 28)
    return predict(temp)
Ejemplo n.º 17
0
def chat():
    msg_dict = request.get_json()
    speaker = msg_dict['speaker']
    msg = msg_dict['msg']

    output = predict(msg)

    if output is not None:
        return jsonify({'status': 'success', 'data': {'output': output}})
    else:
        return jsonify({'status': 'fail', 'data': {}})
Ejemplo n.º 18
0
    def do_GET(self):

        if "getVAD" in self.path:
            from urllib.parse import urlparse, parse_qs
            query_components = parse_qs(urlparse(self.path).query)
            if(query_components):
                if (query_components['words'][0]):
                    v, a, d = train.predict(query_components['words'][0])
                self.respond('{"v": ' + str(v) + ', "a": ' + str(a) + ', "d": ' + str(d) + '}')

        else:
            super(MyHandler, self).do_GET()  # serves the static src file by default
Ejemplo n.º 19
0
 def predict(self, x):
     # test inputs do not require the 'Labeled' input
     inputs_test = {
         'Unlabeled': self.inputs['Unlabeled'],
         'Orthonorm': self.inputs['Orthonorm']
     }
     return train.predict(self.outputs['Unlabeled'],
                          x_unlabeled=x,
                          inputs=inputs_test,
                          y_true=self.y_true,
                          x_labeled=x[0:0],
                          y_labeled=self.y_train_labeled_onehot[0:0],
                          batch_sizes=self.batch_sizes)
Ejemplo n.º 20
0
def evaluation(annotation_path, taxonomy_path, mel_dir, 
					models_dir, output_dir):
	
	os.makedirs(output_dir, exist_ok=True)

	with open(taxonomy_path, 'r') as f:
		taxonomy = yaml.load(f, Loader=yaml.Loader)


	file_list = [os.path.splitext(f)[0]+'.wav' for f in os.listdir(mel_dir) if 'npy' in f]
	file_list.sort()

	test_file_idxs = range(len(file_list))

	model_list = [f for f in os.listdir(models_dir) if 'pth' in f]


	val_loss = [float(f.split('_')[-1][:-4]) for f in model_list]
	model_filename = model_list[np.argmin(val_loss)]

	model = MyCNN()
	model.load_state_dict(torch.load(os.path.join(models_dir, model_filename)))

	if torch.cuda.is_available():
		model.cuda()
	model.eval()

	mel_list = load_mels(file_list, mel_dir)
	y_pred = predict(mel_list, test_file_idxs, model)



	y_pred = predict(mel_list, test_file_idxs, model)

	aggregation_type = 'max'
	label_mode='coarse'
	generate_output_file(y_pred, test_file_idxs, output_dir, file_list,
								 aggregation_type, label_mode, taxonomy)
Ejemplo n.º 21
0
def main():
    args = get_args()
    weight_path = args.weight_path
    if not os.path.exists(RESPATH):
        os.makedirs(RESPATH)
    (Xtr, Ytr), (Xte, Yte) = train.load_data(DATAPATH)
    net_out = Ytr.shape[1]
    Xtr, Ytr_norm, Xte, Yte_norm, Y_means, Y_stds = train.standardize_data(
        Xtr, Ytr, Xte, Yte)
    model = train.build_model(net_out)
    model.compile(loss='mse', optimizer='adam')
    model.load_weights(weight_path)
    Ytr_pred, Yte_pred = train.predict(model, Xtr, Xte, Y_means, Y_stds)
    train.savedata(Ytr, Ytr_pred, Yte, Yte_pred, respath=RESPATH)
Ejemplo n.º 22
0
def confu_matrix():
    res = np.zeros((10, 10))
    conv = cPickle.load(open("data/datatrain_326464", "rb"))
    #num_images_show = 100
    fully = predict(weights, conv)
    pre = fully[-1]
    hiden1 = fully[0]
    acc_pre = np.max(pre, axis=1)
    label_pre = np.argmax(pre, axis=1)
    number_query = label_pre.shape[0]
    data_pre = np.asarray(cPickle.load(open("database/" + "data", "rb")))
    for i in range(label_train_original.shape[0]):
        res[label_train_original[i], label_pre[i]] += 1
    print(res.astype(int))
def main():
    model = train.load_checkpoint(path)
    with open(cat_names, 'r') as json_file:
        cat_to_name = json.load(json_file)
    probabilities = train.predict(path_image, model, top_k, device)
    labels = [
        cat_to_name[str(index + 1)] for index in np.array(probabilities[1][0])
    ]
    probability = np.array(probabilities[0][0])
    i = 0
    while i < top_k:
        print("{} it`s probability {}".format(labels[i], probability[i]))
        i += 1
    print("predect is done!")
Ejemplo n.º 24
0
def main():
    args = get_args()
    weight_path = args.weight_path
    if not os.path.exists(RESPATH):
        os.makedirs(RESPATH)
    viddata, auddata = train.load_data(DATAPATH)
    net_out = auddata.shape[1]
    viddata, auddata_norm, auddata_means, auddata_stds = standardize_data(
        viddata, auddata)
    model = train.build_model(net_out)
    model.compile(loss='mse', optimizer='adam')
    model.load_weights(weight_path)
    aud_pred = train.predict(model, viddata, auddata_means, auddata_stds)
    np.save(join(RESPATH, 'aud_pred.npy'), aud_pred)
def validate_images():
    """
    Used to check that the images in the validate directory are detected 
    correctly. Results are written to a .txt file.
    """
    file1 = open("validate/letters.txt","a") 
    
    for i in range(0, 18):
        original_img = cv2.imread("validate/{}.jpg".format(i))
        resized_img = cv2.resize(original_img, (IMAGE_WIDTH, IMAGE_HEIGHT), interpolation=cv2.INTER_AREA)
        cv2.imwrite("validate/{}_resize.jpg".format(i), resized_img)
        file1.write(predict(resized_img, MODEL_VGG) + "\n")
    
    file1.close()       
Ejemplo n.º 26
0
def main():
    w = np.load('w.npy')
    ordering = np.load('ordering.npy')

    classpaths = [sys.argv[1]]
    X, _, _ = create_dataset(classpaths, ordering)
    y = np.asscalar(predict(X, w))

    sorted_y = sorted(softmax(X.dot(w)).flatten())
    confidence = 1
    if len(sorted_y) > 1:
        confidence = round(sorted_y[-1] - sorted_y[-2], 2)

    category = get_datasets()[y]
    print(json.dumps({"category": category, "confidence": confidence}))
Ejemplo n.º 27
0
def create_database_F1():
	conv	= cPickle.load(open("data/datatrain_326464","rb"))
	fully 	= predict(weights, conv)
	pre 	= fully[-1]
	hiden1 	= fully[0]
	label_pre 	= np.argmax(pre, axis = 1)
	for i in range(conv.shape[0]):
		array_temp = np.append((i,label_pre[i]), hiden1[i])
		database[label_pre[i]].append(array_temp)
		if i%200 == 0:
			print(str(i*100/conv.shape[0]) + "%")
	if not os.path.exists(folder_database):
			os.makedirs (folder_database)
	for i in range(len(database)):
		cPickle.dump(database[i],open(folder_database+"/"+"data_F1_"+str(i),"wb"))
Ejemplo n.º 28
0
def predict_by_modelMLP(model_path, test_images):
    '''
    NOTE, this only works for MultiLayerPerceptron models
    :param model_path: path to the previously trained model, i.e. model.ckpt
    :param test_images:
    :return: predicted y
    '''
    model = base.MultiLayerPerceptron()
    # model = base.MultiLayerPerceptron(input_dim=784, output_dim=10, \
    #                                   hidden_dims=[512], activations=['relu', None], \
    #                                   learning_rate=0.3, dropout = False, \
    #                                   costfunc = utils.cross_entropy, optimizer='GD')

    y = op.predict(model, model_path, test_images)
    return y
Ejemplo n.º 29
0
def main():
    try:
        mileage = get_user_input()
    except:
        print("This is not a valid value.")
        return -1
    theta0, theta1 = 0, 0
    try:
        theta0, theta1 = recover()
        if type(theta0) is not float or type(theta1) is not float:
            print("Recovered datas are corrupted !")
            theta0, theta1 = 0, 0
    except:
        pass
    prediction = predict(theta0, theta1, mileage)
    print(f"The prediction value is: {prediction}")
def get_accuracy(cnn, test_file, true_label):
    correct_count = 0
    total_count = 0
    accuracy = 0
    f = open(test_file, "r")
    max_kernel_size = max(cnn.args.kernel_sizes)
    for line in f:
        tokens_count = len([val for val in line.split(" ")])
        if tokens_count < max_kernel_size:
            line = line + " ".join(["<pad>"] *
                                   (max_kernel_size - tokens_count))
        output = train.predict(line, cnn, text_field, label_field, args.cuda)
        if output is true_label:
            correct_count += 1
        total_count += 1
    accuracy = correct_count / total_count
    return accuracy, correct_count, total_count
def ImageResult():
	if request.method == 'POST':
		file = request.files['file']

		if file:
			filename = secure_filename(file.filename)
			file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))

		upload = '../web app implemenation/static/images/' + filename
		img1 = image.load_img(upload)
		img1 = image.img_to_array(img1)

		model = predict()
		results = model.detect([img1])

		occupied, empty = draw_image_with_boxes(upload,results[0]['rois'],results[0]['class_ids'])

		return render_template("prediction.html", occupied=occupied, empty=empty)
Ejemplo n.º 32
0
def main(model_file, test_file, test_pred, write_score):
    print "Loading model..."
    model = pickle.load(open(model_file, 'r'))
    print "Predicting test..."
    predict(model, test_file, test_pred, write_score)
dfTrn_ML=dfTrn_All_5_8; dfTest_ML= dfTest_All_1_5;
mtxTrn,mtxTest = features.standardize(dfTrn_ML,dfTest_ML,quant_features)
#--Combine the standardized quant features and the vectorized categorical features--#
#mtxTrn = hstack([mtxTrn,vecTrn_BusOpen])  #vecTrn_BusOpen,vecTrn_Cats,vecTrn_Zip,
#mtxTest = hstack([mtxTest,vecTest_BusOpen]) #vecTest_Master_Cats,vecTest_Master_Zip,
#--Test without the vecZip and vecCats--#
#mtxTrn = hstack([mtxTrn,vecTrn_BusOpen])
#mtxTest = hstack([mtxTest,vecTest_Master_BusOpen])
#--select target--#
mtxTarget = dfTrn_ML.ix[:,['rev_stars']].as_matrix()

#--Use classifier for cross validation--#
train.cross_validate(mtxTrn,mtxTarget,clf,folds=10,SEED=42,test_size=.2)  #may require mtxTrn.toarray()

#--Use classifier for predictions--#
dfTest_ML, clf = train.predict(mtxTrn,mtxTarget,mtxTest,dfTest_ML,clf,clf_name) #may require mtxTest.toarray()

#--Save predictions to file--#
train.save_predictions(dfTest_ML,clf_name,'_All_1_5_KitchenSink',submission_no)

#---------End Machine Learning Section-------------#

#------------------------------Optional Steps----------------------------------#
#--Memory cleanup prior to running the memory intensive classifiers--#
dfTrn,dfTest,dfAll = utils.data_garbage_collection(dfTrn,dfTest,dfAll)

#--use a benchmark instead of a classifier--#
benchmark_preds = train.cross_validate_using_benchmark('3.5', dfTrn, dfTrn[0].merge(dfTrn[1],how='inner',on='business_id').as_matrix(),dfTrn[0].ix[:,['rev_stars']].as_matrix(),folds=3,SEED=42,test_size=.15)
benchmark_preds = train.cross_validate_using_benchmark('global_mean', dfTrn, dfTrn[0].merge(dfTrn[1],how='inner',on='business_id').as_matrix(),dfTrn[0].ix[:,['rev_stars']].as_matrix(),folds=3,SEED=42,test_size=.15)
benchmark_preds = train.cross_validate_using_benchmark('business_mean', dfTrn, dfTrn[0].merge(dfTrn[1],how='inner',on='business_id').as_matrix(),dfTrn[0].ix[:,['rev_stars']].as_matrix(),folds=3,SEED=42,test_size=.15)
benchmark_preds = train.cross_validate_using_benchmark('usr_mean', dfTrn, dfTrn[0].merge(dfTrn[2],how='inner',on='user_id').as_matrix(),dfTrn[0].merge(dfTrn[2],how='inner',on='user_id').ix[:,['rev_stars']].as_matrix(),folds=3,SEED=22,test_size=.15)
Ejemplo n.º 34
0
                                folds=10,SEED=42,test_size=.1,clf=clf,clf_name=clf_name,pred_fg=True)
train.cross_validate(mtxTrn,mtxTrnTarget.ravel(),folds=8,SEED=888,test_size=.1,clf=clf,clf_name=clf_name,pred_fg=False)
train.cross_validate_temporal(mtxTrn,mtxTest,mtxTrnTarget.ravel(),mtxTestTarget.ravel(),clf=clf,
                              clf_name=clf_name,pred_fg=False)
train.cross_validate_using_benchmark('global_mean',dfTrn, mtxTrn,mtxTrnTarget,folds=20)


################################################################################################
#---Calculate the degree of variance between ground truth and the mean of the CV predictions.----#
#---Returns a list of all training records with their average variance---#
train.calc_cv_preds_var(dfTrn,cv_preds)


################################################################################################
#--Use estimator for manual predictions--#
dfTest, clf = train.predict(mtxTrn,mtxTrnTarget.ravel(),mtxTest,dfTest,clf,clf_name) #may require mtxTest.toarray()
dfTest, clf = train.predict(mtxTrn.todense(),mtxTrnTarget.ravel(),mtxTest.todense(),dfTest,clf,clf_name) #may require mtxTest.toarray()

################################################################################################
#--Save feature matrices in svm format for external modeling--#
y_trn = np.asarray(dfTrn.num_votes)
y_test = np.ones(mtxTest.shape[0], dtype = int )
dump_svmlight_file(mtxTrn, y_trn, f = 'Data/Votes_trn.svm', zero_based = False )
dump_svmlight_file(mtxTest, y_test, f = 'Data/Votes_test.svm', zero_based = False )

################################################################################################
#--Save a model to joblib file--#
data_io.save_cached_object(clf,'rf_500_TextAll')

#--Load a model from joblib file--#
data_io.load_cached_object('Models/040513--rf_500_TextAll.joblib.pk1')
Ejemplo n.º 35
0
def output():
    output = open('output.dta', 'w+')
    with open('qual.dta', 'r') as qual:
        for line in qual:
            user_id, movie_id, time = [int(v) for v in line.split()]
            output.write(str(predict(movie_id - 1, user_id - 1, uf, mf)) + "\n")