def predict(self, test_x): features = get_feature_vectors(test_x, self.bin_feats) output = [] for x in range(len(features)): featureNump = np.array(features[x]) predict = self.bias + np.dot(self.w, featureNump) if 1 / (1 + math.exp(-predict)) < .05: output.append(-1) else: output.append(1) return output
def fit(self, train_data): data = self.randomize(train_data) self.features = get_feature_vectors(data[0], self.bin_feats) labels = data[1] for x in range(self.num_iter): steps = self.updateWeights(labels) self.bias = self.bias - (labels[x] * self.lr_bgd) if (np.any((steps <= -.001) | (steps >= .001))): pass else: break
def predict(self, test_x): #TO DO: Compute and return the output for the given test inputs input = np.array(get_feature_vectors(test_x)) output = [] for vec in range(len(input)): prediction = np.sign(np.dot(self.weights, input[vec]) + self.bias) if prediction <= 0: prediction = -1 else: prediction = 1 output.append(prediction) return output
def predict(self, test_x): #TO DO: Compute and return the output for the given test inputs labels = [] fv = np.array(utils.get_feature_vectors(test_x, False)) for i in range((len(test_x))): predictedLabel = np.dot(fv[i], self.weights) + self.bias if (predictedLabel > 0): labels.append(1) else: labels.append(-1) return labels
def predict(self, test_x): training_features = np.array( get_feature_vectors(test_x, self.binary_features)) predict_labels = [] for x in range(len(training_features)): sig = 1 / (1 + math.exp(-1 * (np.dot(np.transpose( self.f_weights), training_features[x]) + self.f_bias))) if sig > 0.5: predict_labels.append(1) else: predict_labels.append(-1) return predict_labels
def fit(self, train_data): #TO DO: Learn the parameters from the training data #train_data[0] features #train_data[1] label words_bin_form=utils.get_feature_vectors(train_data[0]) self.w=[0.0]*(len(words_bin_form[0])) #self.b=0 #correct_count=0 time=0 #for i in range len(self.w): #print("train_data") #print(len(train_data)) #2 #print(train_data) #print("train_data[0]") #print(len(train_data[0])) #699 #print(train_data[0]) #error_found=True while time<self.max_iteration: #& error_found: # error_found=False for i in range(len(words_bin_form)): x=words_bin_form[i] sumx=sum([words_bin_form[i][j] for j in range(self.dim)] ) y=train_data[1][i] #index=np.random.randint(0,(len(train_data[1])-1)) # x=list(words_bin_form[index]) #x.append(1.0) #y=train_data[1][index] wx_b= sum([self.w[j]*x[j]/sumx for j in range(len(self.w))])+self.b si=np.sign(wx_b) time+=1; if y==si: break else: # error_found=True for j in range(len(self.w)): self.w[j]+=self.learning_step*y*x[j] self.b+=self.learning_step*y
def predict(self, test_x): #TO DO: Compute and return the output for the given test inputs feature_vectors = get_feature_vectors(test_x) testPredictions = [] for epoch in range(self.args.num_iter): for row in range(len(test_x)): tempArray = self.weights tempArray = np.transpose(tempArray) calculation = np.dot(tempArray, feature_vectors[row]) + self.bias # with bias calculationWithoutBias = np.dot(tempArray, feature_vectors[row]) # without bias prediction = self.findSign(calculation) testPredictions.append(prediction) return testPredictions
def fit(self, train_data): # TO DO: Learn the parameters from the training data data = get_feature_vectors(train_data[0], True) lab = train_data[1] for i in range(0, self.iterate): self.gw = np.zeros(self.vector_size) self.gb = 0 for j in range(0, len(data)): original_value = lab[j] if original_value * (np.dot(self.w, np.array(data[j])) + self.b) <= 1: self.gw += original_value * np.array(data[j]) self.gb += original_value self.b += self.bgd_learning_rate * self.gb self.w += self.bgd_learning_rate * self.gw
def fit(self, train_data): # TO DO: Learn the parameters from the training data data = get_feature_vectors(train_data[0], True) lab = train_data[1] for i in range(0, self.iterate): for j in range(0, len(data)): original_value = lab[j] sign_value = np.sign(np.dot(self.w, np.array(data[j])) + self.b) error = original_value - sign_value if error != 0: self.b += error shift = np.array([feat * error * self.learning_rate for feat in data[j]]) self.w = np.array(self.w) + shift
def fit(self, train_data): #TO DO: Learn the parameters from the training data tr_size = len(train_data[0]) indices = range(tr_size) random.seed( 5 ) #this line is to ensure that you get the same shuffled order everytime random.shuffle(indices) train_data = ([train_data[0][i] for i in indices], [train_data[1][i] for i in indices]) feature_vec = get_feature_vectors(train_data[0], bin_feats) survival = 1 global Bias global W i = 0 j = 0 Wtemp = [0.] * vocab_size while i < num_iter: j = 0 while j <= len(feature_vec) - 1: X = feature_vec[j] Y = train_data[1][j] if np.dot(X, W) + Bias <= 0: #if np.dot(X,W) <= 0: sign = -1 else: sign = 1 if Y != sign: k = 0 while k < vocab_size: W[k] = (W[k] + lr * Y * X[k] + survival * W[k]) / (survival + 1) k = k + 1 survival = 1 Bias = Bias + lr * Y else: survival = survival + 1 j = j + 1 i = i + 1
def fit(self, train_data): #TO DO: Learn the parameters from the training data shuffle = np.arange(len(train_data[0])) np.random.shuffle(shuffle) fv = np.array(utils.get_feature_vectors(train_data[0], False)) for h in range(self.numIter): for i in shuffle: predictedLabel = np.dot(fv[i], self.weights) + self.bias if (predictedLabel > 0): result = 1 else: result = -1 if (result != train_data[1][i]): self.weights += self.learnRate * train_data[1][i] * fv[i] self.bias += self.learnRate * train_data[1][i]
def fit(self, train_data): # TO DO: Learn the parameters from the training data data = np.array(get_feature_vectors(train_data[0], True)) lab = train_data[1] for i in range(0, self.iterate): self.gw = np.zeros(self.vector_size) self.gb = 0 for j in range(0, data.shape[0]): pred = np.dot(self.w.T, data[j]) + self.b z = self.g(pred) temp = lab[j] if lab[j] < 0: temp = 0 self.gw += np.array(data[j]) * (temp - z) # / float(data.shape[0]) self.gb += lab[j] * (temp - z) # / float(data.shape[0]) self.w += self.bgd_learning_rate * np.array(self.gw) self.b += self.bgd_learning_rate * np.array(self.gb)
def fit(self, train_data): features = utils.get_feature_vectors(train_data[0], binary=False) features_df = pd.DataFrame(features, columns=vocab.keys()) features_df['Label'] = train_data[1] self.positive_prob = len( features_df.loc[features_df['Label'] == 1]) / float( len(features_df)) self.negative_prob = len( features_df.loc[features_df['Label'] == -1]) / float( len(features_df)) self.positive_features = features_df.loc[features_df['Label'] == 1] self.negative_features = features_df.loc[features_df['Label'] == -1] self.positive_features = self.positive_features.drop(columns=['Label'], axis=1) self.negative_features = self.negative_features.drop(columns=['Label'], axis=1)
def predict(self, test_x): #TO DO: Compute and return the output for the given test inputs text_l = [0] * len(test_x) feature_vec = get_feature_vectors(test_x, bin_feats) i = 0 while i <= len(feature_vec) - 1: X = feature_vec[i] if np.dot(X, W) + Bias <= 0: sign = -1 else: sign = 1 text_l[i] = sign i = i + 1 #print(text_l); return text_l
def predict(self, test_x): #TO DO: Compute and return the output for the given test inputs feature_vectors = get_feature_vectors(test_x) testPredictions = [] posProbLogSum = 0 for row in range(len(feature_vectors)): posLogSum = 0 negLogSum = 0 for i in range(len(feature_vectors[row])): if feature_vectors[row][i] > 0: posLogSum = posLogSum + math.log( self.positiveProbVector[i]) negLogSum = negLogSum + math.log( self.negativeProbVector[i]) if posLogSum > negLogSum: testPredictions.append(1) else: testPredictions.append(-1) return testPredictions
def fit(self, train_data): training_classifications = np.array(train_data[1]) training_features = np.array( get_feature_vectors(train_data[0], self.binary_features)) weights_vec = np.zeros(len(training_features[0])) bias = 0.0 for x in range(self.epoch): gradient_weights_vec = np.zeros(len(training_features[0])) gradient_bias = 0.0 for feature_vec, label in zip(training_features, training_classifications): if label * (np.dot(weights_vec, feature_vec) + bias) <= 1: gradient_weights_vec += label * feature_vec gradient_bias += label weights_vec += self.learning_rate * gradient_weights_vec bias = bias + self.learning_rate * gradient_bias self.f_weights = weights_vec self.f_bias = bias
def fit(self, train_data): # TO DO: Learn the parameters from the training data for i in range(0, self.iterate): indices = list(range(len(train_data[0]))) random.seed(5) random.shuffle(indices) train_data = ([train_data[0][i] for i in indices], [train_data[1][i] for i in indices]) data = get_feature_vectors(train_data[0], True) lab = train_data[1] for j in range(0, len(data)): self.gw = np.zeros(self.vector_size) self.gb = 0 original_value = lab[j] if original_value * (np.dot(self.w, np.array(data[j])) + self.b) <= 1: self.gw += original_value * np.array(data[j]) self.gb += original_value self.b += self.sgd_learning_rate * self.gb self.w += self.sgd_learning_rate * self.gw
def fit(self, train_data): #shuffle indices = [x for x in range(len(train_data[0]))] random.shuffle(indices) train_data = ([train_data[0][i] for i in indices], [train_data[1][i] for i in indices]) training_features = np.array( get_feature_vectors(train_data[0], self.binary_features)) training_classifications = np.array(train_data[1]) for x in range(self.epoch): bias_gradient = 0 gradient_vec = np.zeros(self.f_dim) for y in range(len(training_features)): input = training_features[y] label = 0 if training_classifications[y] == -1: label = 0 else: label = 1 sigmoid_func = 1 / ( 1 + np.exp(-1 * (np.dot(self.f_weights, input) + self.f_bias))) gradient_vec += np.dot(input, (sigmoid_func - label)) bias_gradient -= (1 / len(training_features[0]) * (sigmoid_func - label)) gradient_vec = np.dot( gradient_vec, -self.learning_rate / len(training_features[0])) gradient_vec = np.add(gradient_vec, (self.f_weights * self.lam * (-self.learning_rate))) norm = LA.norm(gradient_vec) if norm == 0.0: break #gradient_vec = np.dot(gradient_vec, 1/norm) self.f_weights += gradient_vec self.f_bias = bias_gradient
def fit(self, train_data): #TO DO: Learn the parameters from the training data tr_size = len(train_data[0]) indices = range(tr_size) random.seed(5) random.shuffle(indices) train_data = ([train_data[0][i] for i in indices], [train_data[1][i] for i in indices]) review = train_data[0] classes = train_data[1] features = np.array(get_feature_vectors(review, self.bin_feats)) for num in range(self.num_iter): gradient = np.zeros(self.vocab_size) bias_gradient = 0 for i in range(len(features)): gradient += classes[i] * features[i] bias_gradient += classes[i] # self.weights[i+1] = self.weights[i] + self.rate * classes[i] * features[i] # self.bias = self.bias + self.rate * classes[i] gradient -= self.la * self.weights self.weights += self.rate * gradient bias_gradient += self.rate * bias_gradient
def fit(self, train_data): # TO DO: Learn the parameters from the training data data = get_feature_vectors(train_data[0], True) lab = train_data[1] count = 1 for i in range(0, self.iterate): for j in range(0, len(data)): original_value = lab[j] sign_value = np.sign(np.dot(self.w, np.array(data[j])) + self.b) error = original_value - sign_value if error != 0: self.b += error self.w = np.array(self.w) + np.array( [feat * error * self.averaged_learning_rate for feat in data[j]]) self.beta += error * count self.u = np.array(self.u) + np.array( [feat * error * count * self.averaged_learning_rate for feat in data[j]]) count += 1 self.b -= (self.beta/count) self.u = [feat/count for feat in self.u] self.w -= self.u
def fit(self, train_data): # TO DO: Learn the parameters from the training data for i in range(0, self.iterate): indices = list(range(len(train_data[0]))) random.shuffle(indices) random.seed(5) train_data = ([train_data[0][i] for i in indices], [train_data[1][i] for i in indices]) data = get_feature_vectors(train_data[0], True) lab = train_data[1] for j in range(0, len(data)): self.gw = np.zeros(self.vector_size) self.gb = 0 pred = np.dot(self.w.T, data[j]) + self.b z = self.g(pred) temp = lab[j] if lab[j] < 0: temp = 0 self.gw += np.array(data[j]) * (temp - z ) # / float(data.shape[0]) self.gb += lab[j] * (temp - z) # / float(data.shape[0]) self.w += self.sgd_learning_rate * np.array(self.gw) self.b += self.sgd_learning_rate * np.array(self.gb)
def fit(self, train_data): #TO DO: Learn the parameters from the training data self.weights = [] for i in range(self.args.f_dim): self.weights.append(0) self.bias = 0 tr_size = len(train_data[0]) indices = list(range(tr_size)) random.seed(5) #this line is to ensure that you get the same shuffled order everytime random.shuffle(indices) train_data = ([train_data[0][i] for i in indices], [train_data[1][i] for i in indices]) predictionTestList = train_data[1]; feature_vectors = get_feature_vectors(train_data[0]) for epoch in range(self.args.num_iter): for row in range(len(train_data[0])): tempArray = self.weights tempArray = np.transpose(tempArray) calculation = np.matmul(tempArray, feature_vectors[row]) + self.bias prediction = self.findSign(calculation) if prediction != predictionTestList[row]: self.weights = self.weights + (self.args.lr * predictionTestList[row] * np.array(feature_vectors[row])) self.bias = self.bias + (self.args.lr * predictionTestList[row])
def fit(self, train_data): indices = [x for x in range(len(train_data[0]))] random.shuffle(indices) train_data = ([train_data[0][i] for i in indices], [train_data[1][i] for i in indices]) training_classifications = np.array(train_data[1]) training_features = np.array( get_feature_vectors(train_data[0], self.binary_features)) weights_vec = np.zeros(len(training_features[0])) bias = 0.0 for x in range(self.epoch): gradient_weights_vec = np.zeros(len(training_features[0])) gradient_bias = 0.0 for feature_vec, label in zip(training_features, training_classifications): if label * (np.dot(weights_vec, feature_vec) + bias) <= 1: gradient_weights_vec += label * feature_vec gradient_bias += label gradient_weights_vec -= weights_vec * self.lam weights_vec += self.learning_rate * gradient_weights_vec bias = bias + self.learning_rate * gradient_bias self.f_weights = weights_vec self.f_bias = bias
def predict(self, test_x): #TO DO: Compute and return the output for the given test inputs # print("inside perceptron predict") #print("test_x") #print(len(test_x))#301 #print(len(test_x[0])) #4227 #print(len(test_x[1])) # 6092 #print(test_x) # test_features=utils.get_feature_vectors(test_x) #print("test_features") #print(len(test_features)) #301 #print(len(test_features[0])) #10000 #print(test_features) #pred_y=[0.]*len(test_features) # labels = [0.]*len(test_features) #301 #print("len(test_features[0])") #10000 top words #print(len(test_features[0])) # for i in range (len(test_features)): #loop through the 301 test_features #separate each test_features #for feature in test_x[0]: # x=list(test_features[i]) #print("x as list of features[%d]",i) #print(x) # x.append(1) #add 1 to the end of list #print("x as list of features[%d] after append",i) #print(x) # wx_b=sum([self.w[j]*x[j] for j in range(len(self.w))])+self.b # p=0 # if int(wx_b)>0: # p=1 # else: # p=-1 # print("printing p") # print(p) # labels.append(p) # return labels test_features = utils.get_feature_vectors(test_x) pred_y = [0.] * len(test_features) # print(len(test_features)) #i = 0 #for feature in test_features: for i in range(len(test_features)): #x = list(feature) x = test_features[i] sumx=sum([test_features[i][j] for j in range(self.dim)] ) #print("sumx") #print(sumx) # x.append(1) # wx_b = sum([self.weights[j] * x[j] + self.bias[j] for j in range(len(self.weights))]) # wx_b = wx + self.bias wx_b = sum([self.w[j] * x[j]/sumx for j in range(len(self.w))]) + self.b #print("in predict") #print(int(wx_b)) if int(wx_b) > 0: pred_y[i] = 1 else: pred_y[i] = -1 #i += 1 # print(pred_y) # print(pred_y.count(-1)) return pred_y
def fit(self, train_data): data = self.randomize(train_data) self.features = get_feature_vectors(data[0], self.bin_feats) labels = data[1] for x in range(self.num_iter): self.updateWeights(labels)
def predict(self, test_x): # TO DO: Compute and return the output for the given test inputs data = get_feature_vectors(test_x, True) return [np.sign(np.dot(self.w, np.array(d)) + self.b) for d in data]
def fit(self, train_data): #TO DO: Learn the parameters from the training data # find all words positive weight and negative weight # p(c+|word) p(c-|word) # wfr:word's frequency ratio # accuracy #two classes:positive and negative #a data instance is a movie review d #which is a sequence of words w1,w2,..,wn #idea:estimate the probability of each class for a given instnac #e #estimate P(c+|d)and P(c-|d). assign the class with higher proba #bility score #how to estimate the probabilities #using Bayes theorem , P(c+|d)=P(d|c+)P(c+)/P(d) #we can drop the denominator #as we are only interested in comparing P(c+|d) to P(c-|d) #P(c+) can be learnt from the trainng data as #size(c+)/(size(c+)+size(c-)) #to estimate P(d|c+) #P(d|c+)=P(w1w2...wn|c+) #learn P(wi|c+) and P(wi|c-) from the training data #use for loop to seperate c+ and c- #wi the frequency of showing up of the words # 100000 words positive and negative words_freq_form=utils.get_feature_vectors(train_data[0]) #print("words_freq[0]:%d",len(words_freq_form[0])) #print(words_freq_form) #699 rows each with 10000 popular words fre #num_words=sum([sum(i) for i in zip(*words_freq_form)]) #print("num_words: %d",num_words) self.pos_words=[0.]*(len(words_freq_form[0])) self.neg_words=[0.]*(len(words_freq_form[0])) #print(len(words_freq_form[0]))#10000 #print(len(words_freq_form)) #699 #print(train_data) #print(len(train_data)) #2 row #print(len(train_data[0])) #699 columns #print("train_data[1]") #print(train_data[1]) for i in range(0,(len(words_freq_form[0]))): #loop for each positive word pos_word=0.0 neg_word=0.0 for j in range(0, (len(words_freq_form))): #loop through frequency for each word #word_freq_form[j][i] if train_data[1][j]==1: pos_word=pos_word+words_freq_form[j][i] elif train_data[1][j]==-1: neg_word=neg_word+words_freq_form[j][i] #update the frequency for the word both for pos and neg self.pos_words[i]=pos_word self.neg_words[i]=neg_word
def predict(self, test_x): data = np.array(get_feature_vectors(test_x, self.binary_features)) return [ np.sign(np.dot(self.f_weights, np.array(d)) + self.f_bias) for d in data ]
def main(run): train_data = pickle.load( open(os.path.join(opt.dataset_folder, 'train.dat'), 'rb')) if opt.validation: train_data, valid_data = split_validation(train_data, opt.valid_portion) test_data = valid_data else: test_data = pickle.load( open(os.path.join(opt.dataset_folder, 'test.dat'), 'rb')) print(test_data[0][0], test_data[1][0]) cars = pickle.load( open(os.path.join(opt.dataset_folder, 'reg_no_item_id.dat'), 'rb')) item_features = pickle.load( open(os.path.join(opt.dataset_folder, 'itemid_features.dat'), 'rb')) train_data = Data(train_data, shuffle=True, features=item_features) test_data = Data(test_data, shuffle=False, features=item_features) n_node = len(cars) + 1 #1149 #6176 #5933 #unique cars n_feature_columns = len(item_features[1]) features_vector = get_feature_vectors(n_node, item_features) run.log("Unique No. of Cars", n_node) model = trans_to_cuda( SessionGraph(opt, n_node, n_feature_columns=n_feature_columns, features=features_vector)) start = time.time() best_result = [0, 0] best_epoch = [0, 0] bad_counter = 0 #Before Training, Predict hit, mrr = predict_scores(model, test_data) run.log(f'Recall@{opt.top_k}', hit) run.log(f'MRR@{opt.top_k}', mrr) for epoch in range(opt.epoch): print('-------------------------------------------------------') print('epoch: ', epoch) hit, mrr, mean_loss = train_test(model, train_data, test_data) flag = 0 if hit >= best_result[0]: best_result[0] = hit best_epoch[0] = epoch flag = 1 if mrr >= best_result[1]: best_result[1] = mrr best_epoch[1] = epoch flag = 1 #Metrics Capture run.log(f'Recall@{opt.top_k}', hit) run.log(f'MRR@{opt.top_k}', mrr) run.log('Mean Loss', mean_loss) print('Current Result:') print( '\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tMean Loss:\t%.4f,\tEpoch:\t%d,\t%d' % (hit, mrr, mean_loss, epoch, epoch)) print('Best Result:') print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d' % (best_result[0], best_result[1], best_epoch[0], best_epoch[1])) bad_counter += 1 - flag if bad_counter >= opt.patience: break print('-------------------------------------------------------') end = time.time() print("Run time: %f s" % (end - start)) run.log('Training Time (s)', (end - start)) #Save Model output_folder = opt.output_folder os.makedirs(output_folder, exist_ok=True) torch.save(model, f'{output_folder}/{opt.model_name}_full.pth') torch.save(model.state_dict(), f'{output_folder}/{opt.model_name}.pt') shutil.copy( os.path.join(opt.dataset_folder, 'itemid_to_vehicle_mapping.dat'), f'{output_folder}/{opt.model_name}_item_veh_mapping.dat') shutil.copy(os.path.join(opt.dataset_folder, 'reg_no_item_id.dat'), f'{output_folder}/{opt.model_name}_veh_item_mapping.dat') shutil.copy(os.path.join(opt.dataset_folder, 'itemid_features.dat'), f'{output_folder}/itemid_features.dat') run.log("Model Saved in Outputs", True)