Ejemplo n.º 1
0
def test_f(args, y, output):
    correct = [0] * args.seq_length
    total = [0] * args.seq_length
    if args.label_type == 'one_hot':
        y_decode = one_hot_decode(y)
        output_decode = one_hot_decode(output)
    elif args.label_type == 'five_hot':
        y_decode = five_hot_decode(y)
        output_decode = five_hot_decode(output)
    for i in range(np.shape(y)[0]):
        y_i = y_decode[i]
        output_i = output_decode[i]
        # print(y_i)
        # print(output_i)
        class_count = {}
        for j in range(args.seq_length):
            if y_i[j] not in class_count:
                class_count[y_i[j]] = 0
            class_count[y_i[j]] += 1
            total[class_count[y_i[j]]] += 1
            if y_i[j] == output_i[j]:
                correct[class_count[y_i[j]]] += 1
        #  set_trace()
    #  return [float(correct[i]) / total[i] if total[i] > 0. else 0. for i in range(1, int(args.seq_length/args.n_classes))]
    return [
        float(correct[i]) / total[i] if total[i] > 0. else 0.
        for i in range(1, 11)
    ], total[1:11]
Ejemplo n.º 2
0
def test_f(args, y, output):

    correct = [0] * args.seq_length  #correctly predicted

    total = [0] * args.seq_length  #total predicted
    if args.label_type == 'one_hot':
        y_decode = one_hot_decode(y)  #getting the index of the arg max
        output_decode = one_hot_decode(output)  #getting the index of argmax

    elif args.label_type == 'five_hot':
        y_decode = five_hot_decode(y)
        output_decode = five_hot_decode(output)

    for i in range(
            np.shape(y)[0]
    ):  #this is iterating through the each predicted example in the batch

        y_i = y_decode[i]  #one y_i have 50 elementns
        print("Printing the correct classes of the sequence", y_i)
        output_i = output_decode[i]
        # print(y_i)
        # print(output_i)
        class_count = {}
        for j in range(args.seq_length):  #now for each time step we iterate.
            print(j)
            if y_i[j] not in class_count:  #get the first class in the starting time step. Check whether the sequence saw it before
                class_count[y_i[j]] = 0  #start for the that class with sero
            class_count[y_i[
                j]] += 1  #add one for the class    #each time when this sees a class it will up the counts
            print("Printing the class counts", class_count)
            print(
                "printing the class cout of the current correct-class in the sequence",
                class_count[y_i[j]])
            total[class_count[y_i[j]]] += 1
            print(total)

            if y_i[j] == output_i[
                    j]:  #if corerctly predicted the current time step one
                correct[class_count[y_i[
                    j]]] += 1  #This is to basically find how many times networks see a class and how many times network correctly predicted a class.
            print("Printing the correctness thing", correct)

    #basically here we calculate end of each time step how many times I have seen this examples and how many times my network predicted correctly.

    #here total is a [0,8,2,3,......49]  of there  8 in second position is in the batch the network has seen same class for twice while and .
    return [
        float(correct[i]) / total[i] if total[i] > 0. else 0.
        for i in range(1, 11)
    ]  # accuracy is get by how many time steps in a back has seen sa
Ejemplo n.º 3
0
def main(path_test, path_model, path_result):

    # ------------------------------
    # ---- LOAD DATA AND MODELS ----
    # ------------------------------

    print("Loading data...", end=" ")
    data = pd.read_csv(path_test, names=["mr"], skiprows=1)

    ###
    len_seq = 25
    with open('models/word2idx_mr.pkl', 'rb') as handle:
        w2i_mr = pickle.load(handle)
    with open('models/idx2word_ref.pkl', 'rb') as handle:
        i2w_ref = pickle.load(handle)

    size_voc_mr = len(w2i_mr.values())
    size_voc_ref = len(i2w_ref.values())

    ###
    nhid = 128
    model = Sequential()
    model.add(
        LSTM(nhid,
             return_sequences=True,
             input_shape=(len_seq, size_voc_mr + 1)))
    model.add(AttentionDecoder(nhid, size_voc_ref + 1))
    model.load_weights(path_model)

    print("ok!")
    # --------------------
    # ---- PREPROCESS ----
    # --------------------

    # -- Preprocessing MRs --
    # -----------------------
    print("Preprocessing MRs...", end=" ")
    # Extract Name, Food and Near features
    data["mr_name"] = data.mr.map(lambda mr: extract_feature(mr, "name"))
    data["mr_food"] = data.mr.map(lambda mr: extract_feature(mr, "food"))
    data["mr_near"] = data.mr.map(lambda mr: extract_feature(mr, "near"))

    # Delexicalize MRs
    data["mr_delexicalized"] = data.mr\
                                .map(lambda mr: delexicalize_tokenize_mr(mr))

    print("ok!")

    # ------------------------
    # ---- CREATE DATASET ----
    # ------------------------

    # -- Create X (features)--
    # ------------------------
    print("Creating features...", end=" ")
    data["mr_encoded"] = data.mr_delexicalized\
                            .map(lambda mr: encode(mr, w2i_mr))
    data["mr_padded"] = list(pad_sequences(data.mr_encoded, maxlen=len_seq))

    X = []
    for i in range(len(data)):
        one_hot_encoded = one_hot_encode(data.mr_padded[i], size_voc_mr + 1)
        X.append(one_hot_encoded)

    X = np.array(X)
    print("ok!")
    # -----------------
    # ---- PREDICT ----
    # -----------------

    print("Predicting...", end=" ")
    predictions = []
    for i in range(len(X)):
        prediction = decode(one_hot_decode(model.predict(X[i:i + 1])[0]),
                            i2w_ref)
        predictions.append(prediction)

    data["pred"] = predictions
    print("ok!")
    # ----------------------
    # ---- POST-PROCESS ----
    # ----------------------
    print("Postprocessing and saving...", end=" ")
    data["pred"] = data.apply(
        lambda row: relexicalize_ref(row, "mr_name", "name_tag"), axis=1)
    data["pred"] = data.apply(
        lambda row: relexicalize_ref(row, "mr_food", "food_tag"), axis=1)
    data["pred"] = data.apply(
        lambda row: relexicalize_ref(row, "mr_near", "near_tag"), axis=1)

    data["pred"] = data.pred.map(lambda pred: pred.replace("<begin>", ""))
    data["pred"] = data.pred.map(lambda pred: pred.replace("<end>", ""))

    np.savetxt(path_result, list(data.pred), fmt='%s')
    print("ok!")