예제 #1
0
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
            p = softmax(score.flatten())

            sampled = np.random.choice(len(p), size=1, p=p)
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))

        return word_ids
예제 #2
0
파일: gbm.py 프로젝트: zzhsaga/chefboost
def classifier(df, config, header, dataset_features):

    models = []

    print("gradient boosting for classification")

    epochs = config['epochs']

    temp_df = df.copy()
    original_dataset = df.copy()
    worksheet = df.copy()

    classes = df['Decision'].unique()

    boosted_predictions = np.zeros([df.shape[0], len(classes)])

    pbar = tqdm(range(0, epochs), desc='Boosting')

    #store actual set, we will use this to calculate loss
    actual_set = pd.DataFrame(np.zeros([df.shape[0], len(classes)]),
                              columns=classes)
    for i in range(0, len(classes)):
        current_class = classes[i]
        actual_set[current_class] = np.where(df['Decision'] == current_class,
                                             1, 0)
    actual_set = actual_set.values  #transform it to numpy array

    #for epoch in range(0, epochs):
    for epoch in pbar:
        for i in range(0, len(classes)):
            current_class = classes[i]

            if epoch == 0:
                temp_df['Decision'] = np.where(df['Decision'] == current_class,
                                               1, 0)
                worksheet['Y_' + str(i)] = temp_df['Decision']
            else:
                temp_df['Decision'] = worksheet['Y-P_' + str(i)]

            predictions = []

            #change data type for decision column
            temp_df[['Decision']].astype('int64')

            root = 1
            file = "outputs/rules/rules-for-" + current_class + "-round-" + str(
                epoch) + ".py"

            functions.createFile(file, header)

            Training.buildDecisionTree(temp_df, root, file, config,
                                       dataset_features)
            #decision rules created
            #----------------------------

            #dynamic import
            moduleName = "outputs/rules/rules-for-" + current_class + "-round-" + str(
                epoch)
            fp, pathname, description = imp.find_module(moduleName)
            myrules = imp.load_module(moduleName, fp, pathname,
                                      description)  #rules0

            models.append(myrules)

            num_of_columns = df.shape[1]

            for row, instance in df.iterrows():
                features = []
                for j in range(0, num_of_columns - 1):  #iterate on features
                    features.append(instance[j])

                actual = temp_df.loc[row]['Decision']
                prediction = myrules.findDecision(features)

                predictions.append(prediction)

            #----------------------------
            if epoch == 0:
                worksheet['F_' + str(i)] = 0
            else:
                worksheet['F_' + str(i)] = pd.Series(predictions).values

            boosted_predictions[:, i] = boosted_predictions[:, i] + worksheet[
                'F_' + str(i)].values.astype(np.float32)

            #print(boosted_predictions[0:5,:])

            worksheet['P_' + str(i)] = 0

            #----------------------------
            temp_df = df.copy()  #restoration

        for row, instance in worksheet.iterrows():
            f_scores = []
            for i in range(0, len(classes)):
                f_scores.append(instance['F_' + str(i)])

            probabilities = functions.softmax(f_scores)

            for j in range(0, len(probabilities)):
                instance['P_' + str(j)] = probabilities[j]

            worksheet.loc[row] = instance

        for i in range(0, len(classes)):
            worksheet['Y-P_' +
                      str(i)] = worksheet['Y_' + str(i)] - worksheet['P_' +
                                                                     str(i)]

        prediction_set = np.zeros([df.shape[0], len(classes)])
        for i in range(0, boosted_predictions.shape[0]):
            predicted_index = np.argmax(boosted_predictions[i])
            prediction_set[i][predicted_index] = 1

        #----------------------------
        #find loss for this epoch: prediction_set vs actual_set
        classified = 0
        for i in range(0, actual_set.shape[0]):
            actual = np.argmax(actual_set[i])
            prediction = np.argmax(prediction_set[i])
            #print("actual: ",actual," - prediction: ",prediction)

            if actual == prediction:
                classified = classified + 1

        accuracy = str(100 * classified / actual_set.shape[0]) + "%"

        #----------------------------

        #print(worksheet.head())
        #print("round ",epoch+1)
        pbar.set_description("Epoch %d. Accuracy: %s. Process: " %
                             (epoch + 1, accuracy))

    return models, classes
예제 #3
0
def classifier(df, config, header, dataset_features):
    print("gradient boosting for classification")

    debug = config['debug']
    epochs = config['epochs']

    temp_df = df.copy()
    original_dataset = df.copy()
    worksheet = df.copy()

    classes = df['Decision'].unique()

    boosted_predictions = np.zeros([df.shape[0], len(classes)])

    for epoch in range(0, epochs):
        for i in range(0, len(classes)):
            current_class = classes[i]

            if epoch == 0:
                temp_df['Decision'] = np.where(df['Decision'] == current_class,
                                               1, 0)
                worksheet['Y_' + str(i)] = temp_df['Decision']
            else:
                temp_df['Decision'] = worksheet['Y-P_' + str(i)]

            predictions = []

            #change data type for decision column
            temp_df[['Decision']].astype('int64')

            root = 1
            file = "outputs/rules/rules-for-" + current_class + ".py"

            if debug == False: functions.createFile(file, header)

            Training.buildDecisionTree(temp_df, root, file, config,
                                       dataset_features)
            #decision rules created
            #----------------------------

            #dynamic import
            moduleName = "outputs/rules/rules-for-" + current_class
            fp, pathname, description = imp.find_module(moduleName)
            myrules = imp.load_module(moduleName, fp, pathname,
                                      description)  #rules0

            num_of_columns = df.shape[1]

            for row, instance in df.iterrows():
                features = []
                for j in range(0, num_of_columns - 1):  #iterate on features
                    features.append(instance[j])

                actual = temp_df.loc[row]['Decision']
                prediction = myrules.findDecision(features)
                predictions.append(prediction)

            #----------------------------
            if epoch == 0:
                worksheet['F_' + str(i)] = 0
            else:
                worksheet['F_' + str(i)] = pd.Series(predictions).values

            boosted_predictions[:, i] = boosted_predictions[:, i] + worksheet[
                'F_' + str(i)].values.astype(np.float32)

            worksheet['P_' + str(i)] = 0

            #----------------------------
            temp_df = df.copy()  #restoration

        for row, instance in worksheet.iterrows():
            f_scores = []
            for i in range(0, len(classes)):
                f_scores.append(instance['F_' + str(i)])

            probabilities = functions.softmax(f_scores)

            for j in range(0, len(probabilities)):
                instance['P_' + str(j)] = probabilities[j]

            worksheet.loc[row] = instance

        for i in range(0, len(classes)):
            worksheet['Y-P_' +
                      str(i)] = worksheet['Y_' + str(i)] - worksheet['P_' +
                                                                     str(i)]

        print("round ", epoch + 1)