Example #1
0
def Logistic_Regression(kalender, predicted_length, weekday):
    """
    Method used to test the Logistic Regression prototype
    
    :param kalender: The calendar to use
    :param predicted_length: Duration of the predicted block
    :param weekday: The specific weekday to perform tests on
    """
    predicted_times = []
    file = "scheman/" + str(kalender) + ".csv"

    n_timeblocks = p.Get_n_blocks(file)
    thumb_rule = [np.floor(n_timeblocks * (2 / 3)), n_timeblocks]
    """
    #6 månad test data
    last_testBlock = p.Get_Block_By_Index(file,n_timeblocks)
    first_testBlock = p.Get_Index_By_Date(file,[last_testBlock[0][0],last_testBlock[0][1]-6,last_testBlock[0][2]])
    test_data = [first_testBlock, last_testBlock]
    """
    """
    #Specific training and test interval
    test_startBlock = p.Get_Block_By_Index(file,np.floor(n_timeblocks*(2/3)))
    training_start = p.Get_Index_By_Date(file,[test_startBlock[0][0]-1,test_startBlock[0][1],test_startBlock[0][2]])
    one_year_training = [training_start, np.floor(n_timeblocks*(2/3))-1]
    """

    training_set = p.Fetch_Data(file, 0, thumb_rule[0])  #
    test_set = p.Fetch_Data(file, thumb_rule[0], thumb_rule[1])  #
    p.InsertFreeSpace(training_set, [1, 0])  #
    p.InsertFreeSpace(test_set, [1, 0])  #

    #Filter by a specific weekday
    filtered_training = p.GetBlocksByWeekday(training_set, weekday)
    filtered_test = p.GetBlocksByWeekday(test_set, weekday)
    n_of_days = p.GetNumberOfDays(filtered_test, weekday) + p.GetNumberOfDays(
        filtered_training, weekday)

    #Both freespace and occupied blocks
    training_startTimes = p.GetStartTimes_decimal(filtered_training)
    training_lengths = p.GetLengths(filtered_training)
    training_classes = []
    for i in range(0, len(filtered_training)):
        for j in range(0, len(filtered_training[i][1])):
            training_classes.append(filtered_training[i][3][j])

    #Save the free and occupied test times
    test_free_startTimes = p.GetStartTimes_By_Classes(filtered_test, 1)
    test_free_lengths = p.GetLengths_By_Classes(filtered_test, 1)
    test_free_classes = []
    for i in range(0, len(test_free_startTimes)):
        test_free_classes.append(1)
    test_occupied_startTimes = p.GetStartTimes_By_Classes(filtered_test, 0)
    test_occupied_lengths = p.GetLengths_By_Classes(filtered_test, 0)
    test_occupied_classes = []
    for i in range(0, len(test_occupied_startTimes)):
        test_occupied_classes.append(0)

    #Prepare the format of the points so they can be presented in the xy-plane
    training_points = p.Prepare_Plane(training_startTimes, training_lengths)
    test_free_points = p.Prepare_Plane(test_free_startTimes, test_free_lengths)
    test_occupied_points = p.Prepare_Plane(test_occupied_startTimes,
                                           test_occupied_lengths)

    total_training_hours = 0
    free_training_hours = 0
    occupied_training_hours = 0
    for i in range(0, len(training_points)):
        total_training_hours += training_points[i][1]
        if (training_classes[i] == 1):
            free_training_hours += training_points[i][1]
    threshold_value = (free_training_hours / total_training_hours) * 100

    #Create the log_reg object
    lr = sv.LogRegression([training_startTimes, training_lengths],
                          training_classes)
    prediction = 0
    for x in p.decimal_range(8, 17, (0.25)):
        prediction = lr.Predict([x, predicted_length])
        #if (prediction[0][1] >= threshold_value):
        predicted_times.append([x, prediction[0][1]])

    total_test_hours = 0
    free_test_hours = 0
    occupied_test_hours = 0
    successful_predictions = 0
    pred_counter = 0

    for i in range(0, len(test_free_points)):
        total_test_hours += test_free_points[i][1]
        free_test_hours += test_free_points[i][1]
    for i in range(0, len(test_occupied_points)):
        total_test_hours += test_occupied_points[i][1]
        occupied_test_hours += test_occupied_points[i][1]

    #Calculate predicted free-time
    for i in range(0, len(test_free_points)):
        for j in range(0, len(predicted_times)):
            tid = 0
            #Store the start and end times in temporary variables
            pred_start = predicted_times[j][0]
            pred_end = pred_start + predicted_length
            start_time = test_free_points[i][0]
            end_time = start_time + test_free_points[i][1]

            if ((pred_start >= start_time) and (pred_start < end_time)):
                #true
                if (pred_end <= end_time):
                    #true
                    tid = pred_end - pred_start
                else:
                    #false
                    tid = end_time - pred_start

            #false
            elif ((pred_start < start_time) and (pred_end >= start_time)
                  and (pred_end <= end_time)):
                #true
                tid = pred_end - start_time
            if (tid != 0):
                successful_predictions += tid
            pred_counter += predicted_length

    print("--------------------------------------------")
    #print(str(predicted_times) + '\n')
    print("Kalender: " + str(kalender) + " , Veckodag: " + str(weekday))
    print("Threshold value: " + str(threshold_value))
    print("Totala timmar [h]: " + str(total_test_hours))
    print("Lediga timmar [h]: " + str(free_test_hours))
    print("Upptagna timmar [h]: " + str(occupied_test_hours) + '\n')
    print("Lyckad prediktion [h]: " + str(successful_predictions))
    print("Totala prediktioner [h]: " + str(pred_counter) + '\n')

    #Plotta beslutslinjen
    first_prediction = 0
    for i in range(0, len(predicted_times)):
        if (first_prediction != 1):
            plt.scatter(predicted_times[i][0],
                        predicted_times[i][1],
                        c='r',
                        s=3,
                        marker='o',
                        label="Prediction")
            first_prediction = 1
        else:
            plt.scatter(predicted_times[i][0],
                        predicted_times[i][1],
                        c='r',
                        s=3,
                        marker='o')
    plt.axhline(y=threshold_value, color='g', linestyle='-', label="Threshold")
    plt.title("Kalender " + str(kalender) + ": Beslutslinje")
    plt.xlabel('Tid på dygnet [h]')
    plt.ylabel('Sannolikhet att tiden är ledig [%]')

    # Now add the legend with some customizations.
    legend = plt.legend(loc='upper center',
                        shadow=True,
                        bbox_to_anchor=(1.0, 1.2))

    # The frame is matplotlib.patches.Rectangle instance surrounding the legend.
    frame = legend.get_frame()
    frame.set_facecolor('0.90')

    # Set the fontsize
    for label in legend.get_texts():
        label.set_fontsize('large')
    for label in legend.get_lines():
        label.set_linewidth(1.5)  # the legend line width
    plt.show()

    return
Example #2
0
def wKNN(kalender, predicted_length, weekday):
    """
    Method used to test the wKNN prototype
    
    :param kalender: The calendar to use
    :param predicted_length: Duration of the predicted block
    :param weekday: The specific weekday to perform tests on
    """
    wknn = 0
    predicted_times = []
    file = "scheman/" + str(kalender) + ".csv"

    n_timeblocks = p.Get_n_blocks(file)
    thumb_rule = [np.floor(n_timeblocks * (2 / 3)), n_timeblocks]
    """
    #6 månad test data
    last_testBlock = p.Get_Block_By_Index(file,n_timeblocks-10)
    if (last_testBlock[0][1] > 6):
        first_testBlock = p.Get_Index_By_Date(file,[last_testBlock[0][0],last_testBlock[0][1]-6,last_testBlock[0][2]])
        test_data = [first_testBlock, last_testBlock]
    else:
        first_testBlock = p.Get_Index_By_Date(file,[last_testBlock[0][0]-1,last_testBlock[0][1]+6,last_testBlock[0][2]])
        test_data = [first_testBlock, last_testBlock]
    """
    """
    #Specific training and test interval
    test_startBlock = p.Get_Block_By_Index(file,np.floor(n_timeblocks*(2/3)))
    training_start = p.Get_Index_By_Date(file,[test_startBlock[0][0]-1,test_startBlock[0][1],test_startBlock[0][2]])
    one_year_training = [training_start, np.floor(n_timeblocks*(2/3))-1]
    """

    training_set = p.Fetch_Data(file, 0, thumb_rule[0])  #
    test_set = p.Fetch_Data(file, thumb_rule[0], thumb_rule[1])  #
    p.InsertFreeSpace(training_set, [1, 0])  #
    p.InsertFreeSpace(test_set, [1, 0])  #

    #Filter by a specific weekday
    filtered_training = p.GetBlocksByWeekday(training_set, weekday)
    filtered_test = p.GetBlocksByWeekday(test_set, weekday)
    n_of_days = p.GetNumberOfDays(filtered_test, weekday) + p.GetNumberOfDays(
        filtered_training, weekday)

    #Both freespace and occupied blocks
    training_startTimes = p.GetStartTimes_decimal(filtered_training)
    training_lengths = p.GetLengths(filtered_training)
    training_classes = []
    for i in range(0, len(filtered_training)):
        for j in range(0, len(filtered_training[i][1])):
            training_classes.append(filtered_training[i][3][j])

    #Save the free and occupied test times
    test_free_startTimes = p.GetStartTimes_By_Classes(filtered_test, 1)
    test_free_lengths = p.GetLengths_By_Classes(filtered_test, 1)
    test_free_classes = []
    for i in range(0, len(test_free_startTimes)):
        test_free_classes.append(1)
    test_occupied_startTimes = p.GetStartTimes_By_Classes(filtered_test, 0)
    test_occupied_lengths = p.GetLengths_By_Classes(filtered_test, 0)
    test_occupied_classes = []
    for i in range(0, len(test_occupied_startTimes)):
        test_occupied_classes.append(0)

    #Prepare the format of the points so they can be presented in the xy-plane
    training_points = p.Prepare_Plane(training_startTimes, training_lengths)
    test_free_points = p.Prepare_Plane(test_free_startTimes, test_free_lengths)
    test_occupied_points = p.Prepare_Plane(test_occupied_startTimes,
                                           test_occupied_lengths)

    #Create the wKNN object
    wknn = sv.wKNN(training_points, training_classes, predicted_length)
    probabilities = wknn.predict(predicted_length)

    total_training_hours = 0
    free_training_hours = 0
    occupied_training_hours = 0
    for i in range(0, len(training_points)):
        total_training_hours += training_points[i][1]
        if (training_classes[i] == 1):
            free_training_hours += training_points[i][1]
    #threshold_value = (free_training_hours/total_training_hours)*100

    #Top3 method: Choosing the top3 highest probabilities
    top_3 = [0.003, 0.002, 0.001]
    new_value = 0
    for i in range(0, len(probabilities)):
        for j in range(0, 3):
            if (np.all(probabilities[i][1] >= top_3[j]) and new_value == 0):
                top_3[j] = probabilities[i]
                new_value = 1
        new_value = 0
    predicted_times.append(top_3[0])
    predicted_times.append(top_3[1])
    predicted_times.append(top_3[2])
    """     
    for i in range (0,len(probabilities)):
        #if (probabilities[i][1] >= threshold_value):
            predicted_times.append(probabilities[i])
    """

    total_test_hours = 0
    free_test_hours = 0
    occupied_test_hours = 0
    successful_predictions = 0
    pred_counter = 0

    for i in range(0, len(test_free_points)):
        total_test_hours += test_free_points[i][1]
        free_test_hours += test_free_points[i][1]
    for i in range(0, len(test_occupied_points)):
        total_test_hours += test_occupied_points[i][1]
        occupied_test_hours += test_occupied_points[i][1]

    #Calculate predicted free-time
    for i in range(0, len(test_free_points)):
        for j in range(0, len(predicted_times)):
            tid = 0
            #Store the start and end times in temporary variables
            pred_start = predicted_times[j][0]
            pred_end = pred_start + predicted_length
            start_time = test_free_points[i][0]
            end_time = start_time + test_free_points[i][1]

            if ((pred_start >= start_time) and (pred_start < end_time)):
                #true
                if (pred_end <= end_time):
                    #true
                    tid = pred_end - pred_start
                else:
                    #false
                    tid = end_time - pred_start

            #false
            elif ((pred_start < start_time) and (pred_end >= start_time)
                  and (pred_end <= end_time)):
                #true
                tid = pred_end - start_time
            if (tid != 0):
                successful_predictions += tid
            pred_counter += predicted_length

    print("--------------------------------------------")
    print(str(predicted_times) + '\n')
    print("Kalender: " + str(kalender) + " , Veckodag: " + str(weekday))
    print("Totala timmar [h]: " + str(total_test_hours))
    print("Lediga timmar [h]: " + str(free_test_hours))
    print("Upptagna timmar [h]: " + str(occupied_test_hours) + '\n')
    print("Lyckad prediktion [h]: " + str(successful_predictions))
    print("Totala prediktioner [h]: " + str(pred_counter) + '\n')
    return