def tf_idf_normalized(words, use_pos_tagging=True): normalized_words = [] result = [] included_tf_idf_values = [] for i, word in enumerate(words): normalized_words.append(morph.parse(word)[0].normal_form) # Calculate if-idf values create_tf_idf_info(result, words, normalized_words, included_tf_idf_values) # Calculate highlight threshold (get first 30% of list) included_tf_idf_values_sorted = sorted(included_tf_idf_values, reverse=True) highlight_threshold = percentile(included_tf_idf_values_sorted, percent=0.7) for word in result: if word["tf_idf"] >= highlight_threshold: word["highlight"] = True else: word["highlight"] = False if use_pos_tagging: pos_tagging( list( filter(lambda word: word.get("normal_form") is not None, result))) return result
def Prob(self, numList, num): return percentile.percentile(numList, num)/100.0
birthWeightFirsties = [] birthWeightOthers = [] myBirthWeight = 8.5 firsties = [] others = [] for record in table.records: if record.outcome == 1: if record.birthord == 1: firsties.append(record) else: others.append(record) for record in firsties: if type(record.birthwgt_lb) and type(record.birthwgt_oz) == int: birthWeightFirsties.append(record.birthwgt_lb + record.birthwgt_oz/16.0) weightFirstiesPercentile = percentile.percentile(birthWeightFirsties, myBirthWeight) for record in others: if type(record.birthwgt_lb) and type(record.birthwgt_oz) == int: birthWeightOthers.append(record.birthwgt_lb + record.birthwgt_oz/16.0) weightOthersPercentile = percentile.percentile(birthWeightOthers, myBirthWeight) print "percentile birthweight among firsties:", weightFirstiesPercentile print "percentile birthweight among others:", weightOthersPercentile
def calculate(cycles, attacks, at, pow, defense, arm, dice_hit, dice_dmg, foc, boostflag): results = [0] * max_result # Create an array to handle up to 'max_result' dmg ordered_results = [] # Create an array for results to land in before sorting input_foc = foc # Grab the foc now before we manipulate it out = 0 counter_cycle = 0 while counter_cycle < cycles: result = 0 counter_cycle += 1 # if full == 1: # process = counter_cycle / cycles * 10 # if process == int(process): # print(int(process * 10), "% Complete",round(time.time() - start_time,2), "seconds") if input_foc == 0: # If we are not in focus mode... cycle_attacks = attacks while cycle_attacks > 0: out = rolls.roll_full(at, pow, defense, arm, dice_hit, dice_dmg) # Do initial attacks if out == -1: out = 0 result = result + out cycle_attacks -= 1 elif boostflag == 0: cycle_attacks = attacks + foc while cycle_attacks > 0: out = rolls.roll_full(at, pow, defense, arm, dice_hit, dice_dmg) # Do initial attacks if out == -1: out = 0 result = result + out cycle_attacks -= 1 elif boostflag == 1: cycle_attacks = attacks + foc while cycle_attacks > 0: if cycle_attacks >= 2: out = rolls.roll_full(at, pow, defense, arm, dice_hit + 1, dice_dmg) # Do initial attacks if out == -1: out = 0 cycle_attacks -= 2 else: out = rolls.roll_full(at, pow, defense, arm, dice_hit, dice_dmg) if out == -1: out = 0 cycle_attacks -= 1 result = result + out elif boostflag == 2: cycle_attacks = attacks + foc while cycle_attacks > 0: if cycle_attacks >= 2: out = rolls.roll_full(at, pow, defense, arm, dice_hit, dice_dmg + 1) # Do initial attacks if out == -1: out = 0 cycle_attacks += 1 cycle_attacks -= 2 else: out = rolls.roll_full(at, pow, defense, arm, dice_hit, dice_dmg) if out == -1: out = 0 cycle_attacks -= 1 result = result + out elif boostflag == 3: cycle_attacks = attacks + foc while cycle_attacks > 0: if cycle_attacks >= 3: out = rolls.roll_full(at, pow, defense, arm, dice_hit + 1, dice_dmg + 1) # Do initial attacks if out == -1: out = 0 cycle_attacks += 1 cycle_attacks -= 3 elif cycle_attacks == 2: out = rolls.roll_full(at, pow, defense, arm, dice_hit, dice_dmg + 1) # Do initial attacks if out == -1: out = 0 cycle_attacks += 1 cycle_attacks -= 2 else: out = rolls.roll_full(at, pow, defense, arm, dice_hit, dice_dmg) if out == -1: out = 0 cycle_attacks -= 1 result = result + out # if input_foc == 1: # If one foc is left, buy an attack # out = sim.simroll(1, at, pow, defense, arm, dice_hit, dice_dmg) # if out == -1: # out = 0 # result = result + out ordered_results.append(result) ordered_results.sort() # Order the ordered_results for later use for index, i in enumerate(ordered_results): results[i] += 1 # Populate the results array damage_total = sum(ordered_results) # print("Total Damage:", damage_total) if full == 1: print("*** Full results for", attacks, "attacks ***") print("Average Damage:", "\t", round(damage_total / cycles, 2)) print("25th Percentile:", "\t", percentile.percentile(ordered_results, 0.25)) print("50th Percentile:", "\t", percentile.percentile(ordered_results, 0.50)) print("75th Percentile:", "\t", percentile.percentile(ordered_results, 0.75)) print("90th Percentile:", "\t", percentile.percentile(ordered_results, 0.90)) if full == 1: print("DAMAGE\t", "CONFIDENCE\t", "SPECIFIC") result_counter = 0 while result_counter < max_result: if results[result_counter] != 0: ordered_index = ordered_results.index(result_counter) confidence = cycles - ordered_index print(result_counter, "\t", round(confidence / cycles * 100, 2), "%", "\t", round(results[result_counter] / cycles * 100, 2), "%") result_counter += 1
def predict_test(self, save_csv=False): ''' Use the MCMC traces to predict the test data ''' # setup constants num_test_rows = self.test_data.shape[0] num_iters = self.mod_mc.beta.trace().shape[0] # indices t_index = dict([(t, i) for i, t in enumerate(self.year_list)]) a_index = dict([(a, i) for i, a in enumerate(self.age_list)]) # fixed effects X = np.array([self.test_data['x%d'%i] for i in range(self.mod_mc.beta.value.shape[0])]) BX = np.dot(self.mod_mc.beta.trace(), X) # exposure ''' if self.training_type == 'make predictions': E = np.ones((num_iters, num_test_rows))*self.test_data.envelope else: E = np.random.binomial(np.round(self.test_data.envelope).astype('int'), (self.test_data.sample_size/self.test_data.envelope), (num_iters, num_test_rows)) ''' E = np.ones((num_iters, num_test_rows))*self.test_data.envelope # pi_s s_index = [np.where(self.test_data.super_region==s) for s in self.super_region_list] t_by_s = [[t_index[self.test_data.year[j]] for j in s_index[s][0]] for s in range(len(self.super_region_list))] a_by_s = [[a_index[self.test_data.age[j]] for j in s_index[s][0]] for s in range(len(self.super_region_list))] pi_s = np.zeros((num_iters, num_test_rows)) for s in range(len(self.super_region_list)): pi_s[:,s_index[s][0]] = self.mod_mc.pi_s_list.trace()[:,s][:,a_by_s[s],t_by_s[s]] self.test_s_index = s_index # pi_r r_index = [np.where(self.test_data.region==r) for r in self.region_list] t_by_r = [[t_index[self.test_data.year[j]] for j in r_index[r][0]] for r in range(len(self.region_list))] a_by_r = [[a_index[self.test_data.age[j]] for j in r_index[r][0]] for r in range(len(self.region_list))] pi_r = np.zeros((num_iters, num_test_rows)) for r in range(len(self.region_list)): pi_r[:,r_index[r][0]] = self.mod_mc.pi_r_list.trace()[:,r][:,a_by_r[r],t_by_r[r]] self.test_r_index = r_index # pi_c c_index = [np.where(self.test_data.country==c) for c in self.country_list] t_by_c = [[t_index[self.test_data.year[j]] for j in c_index[c][0]] for c in range(len(self.country_list))] a_by_c = [[a_index[self.test_data.age[j]] for j in c_index[c][0]] for c in range(len(self.country_list))] pi_c = np.zeros((num_iters, num_test_rows)) for c in range(len(self.country_list)): pi_c[:,c_index[c][0]] = self.mod_mc.pi_c_list.trace()[:,c][:,a_by_c[c],t_by_c[c]] self.test_c_index = c_index # make predictions import os os.chdir('/home/j/Project/Causes of Death/CoDMod/codmod2/') import percentile predictions = np.exp(BX + np.log(E) + pi_s + pi_r + pi_c) mean = predictions.mean(axis=0) lower = percentile.percentile(predictions, 2.5, axis=0) upper = percentile.percentile(predictions, 97.5, axis=0) self.predictions = self.test_data[['country','region','super_region','year','age','pop']] self.predictions = recfunctions.append_fields(self.predictions, 'mean_deaths', mean) self.predictions = recfunctions.append_fields(self.predictions, 'lower_deaths', lower) self.predictions = recfunctions.append_fields(self.predictions, 'upper_deaths', upper) if self.training_type != 'make predictions': self.predictions = recfunctions.append_fields(self.predictions, 'actual_deaths', self.test_data.cf*self.test_data.envelope) self.predictions = self.predictions.view(np.recarray) # save the predictions if save_csv == True: pl.rec2csv(self.predictions, '/home/j/Project/Causes of Death/CoDMod/tmp/' + self.name + '_predictions_' + self.cause + '_' + self.sex + '.csv')
def predict_test(self, save_csv=False): ''' Use the MCMC traces to predict the test data ''' # setup constants num_test_rows = self.test_data.shape[0] num_iters = self.approxs['beta'].shape[0] # indices t_index = dict([(t, i) for i, t in enumerate(self.year_list)]) a_index = dict([(a, i) for i, a in enumerate(self.age_list)]) # fixed effects X = np.array([self.test_data['x%d'%i] for i in range(self.mod_mc.beta.value.shape[0])]) BX = np.dot(self.approxs['beta'], X) # exposure ''' if self.training_type == 'make predictions': E = np.ones((num_iters, num_test_rows))*self.test_data.envelope else: E = np.random.binomial(np.round(self.test_data.envelope).astype('int'), (self.test_data.sample_size/self.test_data.envelope), (num_iters, num_test_rows)) ''' E = np.ones((num_iters, num_test_rows))*self.test_data.envelope # interpolation parameters x_samples = self.sample_points[:,0] y_samples = self.sample_points[:,1] xb = self.age_list[0] xe = self.age_list[-1] yb = self.year_list[0] ye = self.year_list[-1] kx = 3 if len(self.age_samples) > 3 else len(self.age_samples)-1 ky = 3 if len(self.year_samples) > 3 else len(self.year_samples)-1 # pi_s s_index = [np.where(self.test_data.super_region==s) for s in self.super_region_list] t_by_s = [[t_index[self.test_data.year[j]] for j in s_index[s][0]] for s in range(len(self.super_region_list))] a_by_s = [[a_index[self.test_data.age[j]] for j in s_index[s][0]] for s in range(len(self.super_region_list))] pi_s = np.zeros((num_iters, num_test_rows)) for s in range(len(self.super_region_list)): for i in range(num_iters): interpolator = interpolate.bisplrep(x=x_samples, y=y_samples, z=self.approxs['pi_s_'+str(s)][i], xb=xb, xe=xe, yb=yb, ye=ye, kx=kx, ky=ky) pi_s[i,s_index[s][0]] = interpolate.bisplev(x=self.age_list, y=self.year_list, tck=interpolator)[a_by_s[s],t_by_s[s]] mean_pi_s = pi_s[:,s_index[s][0]].mean(axis=1) pi_s[:,s_index[s][0]] = pi_s[:,s_index[s][0]][np.argsort(mean_pi_s)] # pi_r r_index = [np.where(self.test_data.region==r) for r in self.region_list] t_by_r = [[t_index[self.test_data.year[j]] for j in r_index[r][0]] for r in range(len(self.region_list))] a_by_r = [[a_index[self.test_data.age[j]] for j in r_index[r][0]] for r in range(len(self.region_list))] pi_r = np.zeros((num_iters, num_test_rows)) for r in range(len(self.region_list)): for i in range(num_iters): interpolator = interpolate.bisplrep(x=x_samples, y=y_samples, z=self.approxs['pi_r_'+str(r)][i], xb=xb, xe=xe, yb=yb, ye=ye, kx=kx, ky=ky) pi_r[i,r_index[r][0]] = interpolate.bisplev(x=self.age_list, y=self.year_list, tck=interpolator)[a_by_r[r],t_by_r[r]] mean_pi_r = pi_r[:,r_index[r][0]].mean(axis=1) pi_r[:,r_index[r][0]] = pi_r[:,r_index[r][0]][np.argsort(mean_pi_r)] # pi_c c_index = [np.where(self.test_data.country==c) for c in self.country_list] t_by_c = [[t_index[self.test_data.year[j]] for j in c_index[c][0]] for c in range(len(self.country_list))] a_by_c = [[a_index[self.test_data.age[j]] for j in c_index[c][0]] for c in range(len(self.country_list))] pi_c = np.zeros((num_iters, num_test_rows)) for c in range(len(self.country_list)): for i in range(num_iters): interpolator = interpolate.bisplrep(x=x_samples, y=y_samples, z=self.approxs['pi_c_'+str(c)][i], xb=xb, xe=xe, yb=yb, ye=ye, kx=kx, ky=ky) pi_c[i,c_index[c][0]] = interpolate.bisplev(x=self.age_list, y=self.year_list, tck=interpolator)[a_by_c[c],t_by_c[c]] mean_pi_c = pi_c[:,c_index[c][0]].mean(axis=1) pi_s[:,c_index[c][0]] = pi_c[:,c_index[c][0]][np.argsort(mean_pi_c)] # make predictions import os os.chdir('/home/j/Project/Causes of Death/CoDMod/codmod2/') import percentile predictions = np.exp(BX + np.log(E) + pi_s + pi_r + pi_c) mean = predictions.mean(axis=0) lower = percentile.percentile(predictions, 2.5, axis=0) upper = percentile.percentile(predictions, 97.5, axis=0) self.predictions = self.test_data[['country','region','super_region','year','age','pop']] self.predictions = recfunctions.append_fields(self.predictions, 'mean_deaths', mean) self.predictions = recfunctions.append_fields(self.predictions, 'lower_deaths', lower) self.predictions = recfunctions.append_fields(self.predictions, 'upper_deaths', upper) if self.training_type != 'make predictions': self.predictions = recfunctions.append_fields(self.predictions, 'actual_deaths', self.test_data.cf*self.test_data.envelope) self.predictions = self.predictions.view(np.recarray) # save the predictions if save_csv == True: pl.rec2csv(self.predictions, '/home/j/Project/Causes of Death/CoDMod/tmp/' + self.name + '_predictions_' + self.cause + '_' + self.sex + '.csv')