Python load_dataset Exemples, dataprocessing.load_dataset Python Exemples

Exemple #1

0

Afficher le fichier

def convert_file(filename='../datasets/DATASET.short.txt'):
    states_ini, moves = load_dataset(filename)

    states_ini_arr = []
    states_res_arr = []
    # Create result array

    for i in range(len(moves)):
        TO, has_FROM, FROM, has_REMOVE, REMOVE = moves[i]
        state = states_ini[i]

        # only select states when in phase 2
        if state.my_phase != 2:
            continue
        if not has_FROM:
            continue
        if has_REMOVE:
            continue

        # Convert to array
        arr = []
        for pos in state.positions:
            if pos == 'O':
                arr.append(0)
            if pos == 'M':
                arr.append(1)
            if pos == 'E':
                arr.append(2)
        states_ini_arr.append(arr)

        # create result array
        res_arr = np.copy(arr)
        res_arr[TO - 1], res_arr[FROM - 1] = res_arr[FROM - 1], res_arr[TO - 1]
        states_res_arr.append(res_arr)
    states_ini_arr = np.array(states_ini_arr)
    states_res_arr = np.array(states_res_arr)
    return states_ini_arr, states_res_arr

Exemple #2

0

Afficher le fichier

Fichier : testing.py Projet : moscalej/Alpha-2Zero

def test_networks(datasetname, statesonly, expanded, name, batchsize=20000):
    """
    Measure the accuracy and the legality of a triplet of networks
    
    Loads three networks and a dataset and than measure how accurate are the
    networks predictions with respect to the dataset and if their choices are
    legal. If the dataset is made only by states (therefore no moves), the
    accuracy test is not performed.

    Parameters
    ----------
    datasetname : string
        The name of the dataset file.
    statesonly : boolean
        True if the dataset has doesn't contains informations about the moves.
    expanded : boolean
        True if the dataset has been already expanded through symmetries.
    name : string
        The name of the network configuration to load
    """

    print("Testing " + name)
    print("Loading networks...")
    print("\tloading " + name + "_TO")
    TOnet = load_net(name + "_TO")
    print("\tloading " + name + "_FROM")
    FROMnet = load_net(name + "_FROM")

    print("\tloading " + name + "_REMOVE")
    REMOVEnet = load_net(name + "_REMOVE")
    print("\tNetworks loaded!")

    data_format = TOnet[3]
    print(str(data_format))

    orderc = ['X', 'X', 'X']
    orderc[TOnet[2]] = 'T'
    orderc[FROMnet[2]] = 'F'
    orderc[REMOVEnet[2]] = 'R'
    order = "" + orderc[0] + orderc[1] + orderc[2]
    print("\tOrder: " + order)

    print("Loading data...")

    if statesonly:
        if (expanded):
            A, B = load_expanded_states_dataset(datasetname)
        else:
            A, B = load_states_dataset(datasetname)
    else:
        if expanded:
            A, B = load_expanded_dataset(datasetname)
        else:
            A, B = load_dataset(datasetname)

    print("\tData loaded! Loaded: " + str(len(B)) + " data")

    print("Processing data and getting choices")
    X_TO_set = []
    X_FROM_set = []
    X_REMOVE_set = []

    TO_choice_set = []
    FROM_choice_set = []
    REMOVE_choice_set = []

    y_TO_set = []
    y_FROM_set = []
    y_REMOVE_set = []

    numbatch = int(len(A) / batchsize) + 1

    for i in range(1, numbatch + 1):
        print("\t" + str(i * batchsize * 100.0 / len(A)) + "%")

        if (i * batchsize <= len(A)):
            Ai = A[(i - 1) * batchsize:i * batchsize]
            Bi = B[(i - 1) * batchsize:i * batchsize]

        else:
            Ai = A[(i - 1) * batchsize:]
            Bi = B[(i - 1) * batchsize:]

        y_TO = process_move_onlyTO(Bi)
        y_FROM = process_move_onlyFROM(Bi)
        y_REMOVE = process_move_onlyREMOVE(Bi)

        init_state = process_state_binary(Ai, data_format)

        if order == "FTR":
            X_FROM = init_state
            FROM_choice = get_choices(FROMnet, X_FROM)

            X_TO = add_CHOICE_binary_raw(X_FROM, FROM_choice)
            TO_choice = get_choices(TOnet, X_TO)

            X_REMOVE = add_CHOICE_binary_raw(X_TO, TO_choice)
            REMOVE_choice = get_choices(REMOVEnet, X_REMOVE)
        elif order == "RFT":
            X_REMOVE = init_state
            REMOVE_choice = get_choices(REMOVEnet, X_REMOVE)

            X_FROM = add_CHOICE_binary_raw(X_REMOVE, REMOVE_choice)
            FROM_choice = get_choices(FROMnet, X_FROM)

            X_TO = add_CHOICE_binary_raw(X_FROM, FROM_choice)
            TO_choice = get_choices(TOnet, X_TO)
        elif order == "FRT":
            X_FROM = init_state
            FROM_choice = get_choices(FROMnet, X_FROM)

            X_REMOVE = add_CHOICE_binary_raw(X_FROM, FROM_choice)
            REMOVE_choice = get_choices(REMOVEnet, X_REMOVE)

            X_TO = add_CHOICE_binary_raw(X_REMOVE, REMOVE_choice)
            TO_choice = get_choices(TOnet, X_TO)
        elif order == "RTF":
            X_REMOVE = init_state
            REMOVE_choice = get_choices(REMOVEnet, X_REMOVE)

            X_TO = add_CHOICE_binary_raw(X_REMOVE, REMOVE_choice)

            TO_choice = get_choices(TOnet, X_TO)

            X_FROM = add_CHOICE_binary_raw(X_TO, TO_choice)

            FROM_choice = get_choices(FROMnet, X_FROM)
        elif order == "TRF":
            X_TO = init_state
            TO_choice = get_choices(TOnet, X_TO)

            X_REMOVE = add_CHOICE_binary_raw(X_TO, TO_choice)
            REMOVE_choice = get_choices(REMOVEnet, X_REMOVE)

            X_FROM = add_CHOICE_binary_raw(X_REMOVE, REMOVE_choice)
            FROM_choice = get_choices(FROMnet, X_FROM)
        elif order == "TFR":
            X_TO = init_state
            TO_choice = get_choices(TOnet, X_TO)

            #print("\tGot TO choices")

            X_FROM = add_CHOICE_binary_raw(X_TO, TO_choice)

            FROM_choice = get_choices(FROMnet, X_FROM)

            #print("\tGot FROM choices")

            X_REMOVE = add_CHOICE_binary_raw(X_FROM, FROM_choice)

            REMOVE_choice = get_choices(REMOVEnet, X_REMOVE)
        else:
            print("Unknown configuration. Using TFR")
            X_TO = init_state
            TO_choice = get_choices(TOnet, X_TO)

            #print("\tGot TO choices")

            X_FROM = add_CHOICE_binary_raw(X_TO, TO_choice)

            FROM_choice = get_choices(FROMnet, X_FROM)

            #print("\tGot FROM choices")

            X_REMOVE = add_CHOICE_binary_raw(X_FROM, FROM_choice)

            REMOVE_choice = get_choices(REMOVEnet, X_REMOVE)

        for j in range(len(X_TO)):
            X_TO_set.append(X_TO[j])
            X_FROM_set.append(X_FROM[j])
            X_REMOVE_set.append(X_REMOVE[j])

            TO_choice_set.append(TO_choice[j])
            FROM_choice_set.append(FROM_choice[j])
            REMOVE_choice_set.append(REMOVE_choice[j])

            y_TO_set.append(y_TO[j])
            y_FROM_set.append(y_FROM[j])
            y_REMOVE_set.append(y_REMOVE[j])

        #print("\tGot REMOVE choices")

    # variable renaming
    X_TO = X_TO_set
    X_FROM = X_FROM_set
    X_REMOVE = X_REMOVE_set

    TO_choice = TO_choice_set
    FROM_choice = FROM_choice_set
    REMOVE_choice = REMOVE_choice_set

    y_TO = y_TO_set
    y_FROM = y_FROM_set
    y_REMOVE = y_REMOVE_set

    print("Testing legality")

    legalities = get_legalities(TO_choice, FROM_choice, REMOVE_choice,
                                X_REMOVE, data_format)

    print("\ttesting the legality of " + str(len(legalities[0])) + " data\n")

    TO_self_leg = legalities[0]
    FROM_self_leg = legalities[1]
    REMOVE_self_leg = legalities[2]
    FROM_leg = legalities[3]
    REMOVE_leg = legalities[4]
    wholeFROM = legalities[5]
    wholeREMOVE = legalities[6]
    wholeMOVE = legalities[7]

    fileT = open(name + "_testing.txt", 'a')

    leg = ("Legality response on " + datasetname + ":" + "\n\tOnly TO leg:\t" +
           str(numpy.mean(TO_self_leg) * 100) + "\n\tOnly FROM leg:\t" +
           str(numpy.mean(FROM_self_leg) * 100) + "\n\tOnly REMOVE leg:\t" +
           str(numpy.mean(REMOVE_self_leg) * 100) + "\n\tOnly FROM-TO leg:\t" +
           str(numpy.mean(FROM_leg) * 100) + "\n\tOnly REMOVE-FROM-TO leg:\t" +
           str(numpy.mean(REMOVE_leg) * 100) + "\n\tWhole FROM leg:\t" +
           str(numpy.mean(wholeFROM) * 100) + "\n\tWhole REMOVE leg:\t" +
           str(numpy.mean(wholeREMOVE) * 100) + "\n\tWhole MOVE leg:\t" +
           str(numpy.mean(wholeMOVE) * 100))

    print(leg)
    fileT.write(leg)

    print("\nTesting accuracy and special cases\n")

    # accuracy valutation and legality evaluation for some particular cases:
    # the FROM move in phase 2 and the REMOVE when is not 0
    correctTO = 0
    correctFROM = 0
    correctREMOVE = 0
    correctWHOLE = 0

    # legality for FROM in phase 2
    legalFROM2 = 0
    f2 = 0

    # legality for REMOVE != 0 for the network
    legalREMOVEeat = 0
    re = 0

    # accuracy when REMOVE != 0 in dataset
    correctREMOVEyes = 0
    ry = 0

    # accuracy when FROM != 0 in dataset
    correctFROMyes = 0
    fy = 0

    # accuracy for the different phases
    correctWHOLE1 = 0
    p1 = 0
    correctWHOLE2 = 0
    p2 = 0
    correctWHOLE3 = 0
    p3 = 0

    size = len(TO_choice)

    print("\tTesting the accuracy of " + str(size) + " data")

    for i in range(size):

        # TO DECISION
        if TO_choice[i] == y_TO[i]:
            correctTO += 1

        # legal FROM decision in phase 2
        if (is_phase_2(X_REMOVE[i], data_format)):
            f2 += 1
            # legality
            l = get_legalities(TO_choice[i:i + 1], FROM_choice[i:i + 1],
                               REMOVE_choice[i:i + 1], X_REMOVE[i:i + 1],
                               data_format)
            if (l[5] == 1):
                legalFROM2 += 1

        # FROM decision is different from 0
        if y_FROM[i] != 0:
            fy += 1

        # FROM decision
        if FROM_choice[i] == y_FROM[i]:
            correctFROM += 1
            # correct FROM decision if different from 0
            if y_FROM[i] != 0:
                correctFROMyes += 1

        # REMOVE decision when present
        if y_REMOVE[i] != 0:
            ry += 1

        if REMOVE_choice[i] != 0:
            re += 1
            # legality
            l = get_legalities(TO_choice[i:i + 1], FROM_choice[i:i + 1],
                               REMOVE_choice[i:i + 1], X_REMOVE[i:i + 1],
                               data_format)
            if (l[6] == 1):
                legalREMOVEeat += 1

        # REMOVE decision
        if REMOVE_choice[i] == y_REMOVE[i]:
            correctREMOVE += 1
            if (y_REMOVE[i] != 0):
                correctREMOVEyes += 1

        if is_phase_1(X_REMOVE[i], data_format):
            p1 += 1
        elif is_phase_3(X_REMOVE[i], data_format):
            p3 += 1
        else:
            p2 += 1

        # WHOLE move (in different phases of game)
        if (TO_choice[i] == y_TO[i] and FROM_choice[i] == y_FROM[i]
                and REMOVE_choice[i] == y_REMOVE[i]):
            correctWHOLE += 1

            if is_phase_1(X_REMOVE[i], data_format):
                correctWHOLE1 += 1
            elif is_phase_3(X_REMOVE[i], data_format):
                correctWHOLE3 += 1
            else:
                correctWHOLE2 += 1

    if (size > 0):
        correctTO = correctTO * 100.0 / size
        correctFROM = correctFROM * 100.0 / size
        correctREMOVE = correctREMOVE * 100.0 / size
        correctWHOLE = correctWHOLE * 100.0 / size
    else:
        correctTO = -1
        correctFROM = -1
        correctREMOVE = -1
        correctWHOLE = -1

    if (f2 > 0):
        legalFROM2 = legalFROM2 * 100.0 / f2
    else:
        legalFROM2 = -1

    if (re > 0):
        legalREMOVEeat = (legalREMOVEeat * 100.0) / re
    else:
        legalREMOVEeat = -1

    if (ry > 0):
        correctREMOVEyes = correctREMOVEyes * 100.0 / ry
    else:
        correctREMOVEyes = -1

    if (fy > 0):
        correctFROMyes = correctFROMyes * 100.0 / fy
    else:
        correctFROMyes = -1

    if (p1 > 0):
        correctWHOLE1 = correctWHOLE1 * 100.0 / p1
    else:
        correctWHOLE1 = -1

    if (p2 > 0):
        correctWHOLE2 = correctWHOLE2 * 100.0 / p2
    else:
        correctWHOLE2 = -1

    if (p3 > 0):
        correctWHOLE3 = correctWHOLE3 * 100.0 / p3
    else:
        correctWHOLE3 = -1

    leg = ("\n\tFROM phase 2 leg:\t" + str(legalFROM2) +
           "\n\tREMOVE when chosen leg:\t" + str(legalREMOVEeat))

    if (not statesonly):
        leg += ("\n--------------------\n\nAccuracy response on " +
                datasetname + ":" + "\n\tTO accuracy:\t" + str(correctTO) +
                "\n\tFROM accuracy:\t" + str(correctFROM) +
                "\n\tREMOVE accuracy:\t" + str(correctREMOVE) +
                "\n\tWhole MOVE accuracy:\t" + str(correctWHOLE) +
                "\n\n\tWhole MOVE accuracy in phase 1:\t" +
                str(correctWHOLE1) + "\n\tWhole MOVE accuracy in phase 2:\t" +
                str(correctWHOLE2) + "\n\tWhole MOVE accuracy in phase 3:\t" +
                str(correctWHOLE3) + "\n\n\tFROM not 0 accuracy:\t" +
                str(correctFROMyes) + "\n\tREMOVE not 0 accuracy:\t" +
                str(correctREMOVEyes) + "\n\n\n")

    print(leg)
    fileT.write(leg)

    fileT.close()

Exemple #3

0

Afficher le fichier

Fichier : training.py Projet : gluoNNet/NineQuantumsMorris

def train(name='prova',
          datasetname="15vsALL.resultall.map.txt", expanded=False,
             vset_size=0.05, tset_size=0,
             testsetname="",
             movepart= "TO",
             order = "TFR",
             batch_size=2000, num_epochs=100,
             patience=10,
             nettype=2,
             neurons=[200, 300],
             blocks=10,
             lr_alfa0=0.001, b1=0.99, b2=0.999,
             lr_annealing=True,
             lr_k=0.1,
             dropi=0, drop=0,
             regularization=True,
             reg_type=rgl.l1,
             reg_weight=0.001,
             normalization=False,
             load=False, initial_epoch=0,
             data_format="binary raw"):
    
    if(vset_size < 0 or tset_size < 0):
        sys.exit(0)
    
    print("Loading data...")
    # Load the dataset and creates the symmetries
    if expanded:
        A, B = load_expanded_dataset(datasetname)
    else:
        A, B = load_dataset(datasetname)
    
    print("Dataset loaded: " + str(len(B)) + " data")

    # if the size of the set are expressed as decimal, they are the percentage
    # of the total set
    if(vset_size < 1):
        vset_size = vset_size * len(A)
    if(tset_size < 1):
        tset_size = tset_size * len(A)

    vset_size = int(vset_size)
    tset_size = int(tset_size)

    if os.path.isfile(name + "_indexes.txt"):
        indexes = load_indexes(name + "_indexes.txt")
    else:
        indexes = range(len(A))
        random.shuffle(indexes)
        write_indexes(name + "_indexes.txt", indexes)
    
    A_train = []
    A_val = []
    A_test = []
    B_train = []
    B_val = []
    B_test = []
    
    val_ind = []
    test_ind = []

    if(testsetname != "" and vset_size > 0):
    # load a different test set from a file
        A_test, B_test = load_dataset(testsetname)
        train_ind, val_ind = indexes[:-(vset_size)], indexes[-(vset_size):]
        
    elif(tset_size > 0 and vset_size > 0):  # different validation and test set
        train_ind = indexes[:-(vset_size + tset_size)]
        val_ind = indexes[-(vset_size + tset_size):-tset_size]
        test_ind = indexes[-tset_size:]
        
    elif(vset_size > 0):  # no test set, only validation and train set
        train_ind = indexes[:-(vset_size + tset_size)]
        val_ind = indexes[-(vset_size):]
        
    else:  # only one set, used both for training and validation
        A_train, A_val = A, A
        B_train, B_val = B, B


    for index in train_ind:
        A_train.append(A[index])       
        B_train.append(B[index])
        
    for index in val_ind:
        A_val.append(A[index])       
        B_val.append(B[index])
        
    for index in test_ind:
        A_test.append(A[index])       
        B_test.append(B[index])

    print("Process data...")

    X_train = process_state_binary(A_train, data_format)
    X_val = process_state_binary(A_val, data_format)
    X_test = process_state_binary(A_test, data_format)
    
    TO_train = process_move_onlyTO(B_train)
    TO_val = process_move_onlyTO(B_val)
    TO_test = process_move_onlyTO(B_test)
    
    FROM_train = process_move_onlyFROM(B_train)
    FROM_val = process_move_onlyFROM(B_val)
    FROM_test = process_move_onlyFROM(B_test)
    
    REMOVE_train = process_move_onlyREMOVE(B_train)
    REMOVE_val = process_move_onlyREMOVE(B_val)
    REMOVE_test = process_move_onlyREMOVE(B_test)

    for char in order:
        if (char == 'T'):
            if (movepart == "TO"):
                y_train = process_move_onlyTO(B_train)
                y_val = process_move_onlyTO(B_val)
                y_test = process_move_onlyTO(B_test)
                break
            else:
                X_train = add_CHOICE_binary_raw(X_train, TO_train)
                X_val = add_CHOICE_binary_raw(X_val, TO_val)
                X_test = add_CHOICE_binary_raw(X_test, TO_test)
        
        elif (char == 'F'):
            if (movepart == "FROM"):
                y_train = process_move_onlyFROM(B_train)
                y_val = process_move_onlyFROM(B_val)
                y_test = process_move_onlyFROM(B_test)
                break
            else:
                X_train = add_CHOICE_binary_raw(X_train, FROM_train)
                X_val = add_CHOICE_binary_raw(X_val, FROM_val)
                X_test = add_CHOICE_binary_raw(X_test, FROM_test)
        
        elif (char == 'R'):
            if (movepart == "REMOVE"):
                y_train = process_move_onlyREMOVE(B_train)
                y_val = process_move_onlyREMOVE(B_val)
                y_test = process_move_onlyREMOVE(B_test)
                break
            else:
                X_train = add_CHOICE_binary_raw(X_train, REMOVE_train)
                X_val = add_CHOICE_binary_raw(X_val, REMOVE_val)
                X_test = add_CHOICE_binary_raw(X_test, REMOVE_test)

    print ("Data processed!\n")

    print("Using " + str(len(X_train)) + " data for training, " +
          str(len(X_val)) + " for validation and " +
          str(len(X_test)) + " for testing \n")
    
    if load:
        results_file = open(name + "_" + movepart + ".txt", 'a')
    else:
        results_file = open(name + "_" + movepart + ".txt", 'w')
    results_file.write("TRAINED ON: " + datasetname +
                       "\tVal: " + str(vset_size) +
                       "\tTest: " + str(tset_size) + "\n")
    results_file.close()

    time1 = time.time()
    do_training(X_train, X_val, X_test, y_train, y_val, y_test,
                name=name, movepart=movepart, order=order,
                batch_size=batch_size, num_epochs=num_epochs,
                patience=patience,
                nettype=nettype,
                neurons=neurons,
                blocks=blocks,
                lr_alfa0=lr_alfa0, b1=b1, b2=b2,
                lr_annealing=lr_annealing, lr_k=lr_k,
                dropi=dropi, drop=drop,
                regularization=regularization,
                reg_type=reg_type, reg_weight=reg_weight,
                normalization=normalization,
                load=load, initial_epoch=initial_epoch,
                data_format = data_format)
    time2 = time.time()
    totaltime = time2-time1
    print("\tTIME OCCURRED: " + str(totaltime))