def generate(self, start_id, skip_ids=None, sample_size=100): word_ids = [start_id] x = start_id while len(word_ids) < sample_size: x = np.array(x).reshape(1, 1) score = self.predict(x) p = softmax(score.flatten()) sampled = np.random.choice(len(p), size=1, p=p) if (skip_ids is None) or (sampled not in skip_ids): x = sampled word_ids.append(int(x)) return word_ids
def classifier(df, config, header, dataset_features): models = [] print("gradient boosting for classification") epochs = config['epochs'] temp_df = df.copy() original_dataset = df.copy() worksheet = df.copy() classes = df['Decision'].unique() boosted_predictions = np.zeros([df.shape[0], len(classes)]) pbar = tqdm(range(0, epochs), desc='Boosting') #store actual set, we will use this to calculate loss actual_set = pd.DataFrame(np.zeros([df.shape[0], len(classes)]), columns=classes) for i in range(0, len(classes)): current_class = classes[i] actual_set[current_class] = np.where(df['Decision'] == current_class, 1, 0) actual_set = actual_set.values #transform it to numpy array #for epoch in range(0, epochs): for epoch in pbar: for i in range(0, len(classes)): current_class = classes[i] if epoch == 0: temp_df['Decision'] = np.where(df['Decision'] == current_class, 1, 0) worksheet['Y_' + str(i)] = temp_df['Decision'] else: temp_df['Decision'] = worksheet['Y-P_' + str(i)] predictions = [] #change data type for decision column temp_df[['Decision']].astype('int64') root = 1 file = "outputs/rules/rules-for-" + current_class + "-round-" + str( epoch) + ".py" functions.createFile(file, header) Training.buildDecisionTree(temp_df, root, file, config, dataset_features) #decision rules created #---------------------------- #dynamic import moduleName = "outputs/rules/rules-for-" + current_class + "-round-" + str( epoch) fp, pathname, description = imp.find_module(moduleName) myrules = imp.load_module(moduleName, fp, pathname, description) #rules0 models.append(myrules) num_of_columns = df.shape[1] for row, instance in df.iterrows(): features = [] for j in range(0, num_of_columns - 1): #iterate on features features.append(instance[j]) actual = temp_df.loc[row]['Decision'] prediction = myrules.findDecision(features) predictions.append(prediction) #---------------------------- if epoch == 0: worksheet['F_' + str(i)] = 0 else: worksheet['F_' + str(i)] = pd.Series(predictions).values boosted_predictions[:, i] = boosted_predictions[:, i] + worksheet[ 'F_' + str(i)].values.astype(np.float32) #print(boosted_predictions[0:5,:]) worksheet['P_' + str(i)] = 0 #---------------------------- temp_df = df.copy() #restoration for row, instance in worksheet.iterrows(): f_scores = [] for i in range(0, len(classes)): f_scores.append(instance['F_' + str(i)]) probabilities = functions.softmax(f_scores) for j in range(0, len(probabilities)): instance['P_' + str(j)] = probabilities[j] worksheet.loc[row] = instance for i in range(0, len(classes)): worksheet['Y-P_' + str(i)] = worksheet['Y_' + str(i)] - worksheet['P_' + str(i)] prediction_set = np.zeros([df.shape[0], len(classes)]) for i in range(0, boosted_predictions.shape[0]): predicted_index = np.argmax(boosted_predictions[i]) prediction_set[i][predicted_index] = 1 #---------------------------- #find loss for this epoch: prediction_set vs actual_set classified = 0 for i in range(0, actual_set.shape[0]): actual = np.argmax(actual_set[i]) prediction = np.argmax(prediction_set[i]) #print("actual: ",actual," - prediction: ",prediction) if actual == prediction: classified = classified + 1 accuracy = str(100 * classified / actual_set.shape[0]) + "%" #---------------------------- #print(worksheet.head()) #print("round ",epoch+1) pbar.set_description("Epoch %d. Accuracy: %s. Process: " % (epoch + 1, accuracy)) return models, classes
def classifier(df, config, header, dataset_features): print("gradient boosting for classification") debug = config['debug'] epochs = config['epochs'] temp_df = df.copy() original_dataset = df.copy() worksheet = df.copy() classes = df['Decision'].unique() boosted_predictions = np.zeros([df.shape[0], len(classes)]) for epoch in range(0, epochs): for i in range(0, len(classes)): current_class = classes[i] if epoch == 0: temp_df['Decision'] = np.where(df['Decision'] == current_class, 1, 0) worksheet['Y_' + str(i)] = temp_df['Decision'] else: temp_df['Decision'] = worksheet['Y-P_' + str(i)] predictions = [] #change data type for decision column temp_df[['Decision']].astype('int64') root = 1 file = "outputs/rules/rules-for-" + current_class + ".py" if debug == False: functions.createFile(file, header) Training.buildDecisionTree(temp_df, root, file, config, dataset_features) #decision rules created #---------------------------- #dynamic import moduleName = "outputs/rules/rules-for-" + current_class fp, pathname, description = imp.find_module(moduleName) myrules = imp.load_module(moduleName, fp, pathname, description) #rules0 num_of_columns = df.shape[1] for row, instance in df.iterrows(): features = [] for j in range(0, num_of_columns - 1): #iterate on features features.append(instance[j]) actual = temp_df.loc[row]['Decision'] prediction = myrules.findDecision(features) predictions.append(prediction) #---------------------------- if epoch == 0: worksheet['F_' + str(i)] = 0 else: worksheet['F_' + str(i)] = pd.Series(predictions).values boosted_predictions[:, i] = boosted_predictions[:, i] + worksheet[ 'F_' + str(i)].values.astype(np.float32) worksheet['P_' + str(i)] = 0 #---------------------------- temp_df = df.copy() #restoration for row, instance in worksheet.iterrows(): f_scores = [] for i in range(0, len(classes)): f_scores.append(instance['F_' + str(i)]) probabilities = functions.softmax(f_scores) for j in range(0, len(probabilities)): instance['P_' + str(j)] = probabilities[j] worksheet.loc[row] = instance for i in range(0, len(classes)): worksheet['Y-P_' + str(i)] = worksheet['Y_' + str(i)] - worksheet['P_' + str(i)] print("round ", epoch + 1)