def information_gain(examples, attribute, threshold, class_labels): # variable name meaning # exc -> total number of examples # excc -> total number of columns in first example # ex_in_lt -> example indices less than threshold # ex_in_ge -> example indices greater than or equal to threshold # exc_lt -> total number of elements in ex_in_lt # exc_ge -> total number of elements in ex_in_ge # cl_freq_node -> node's class label frequency # cl_freq_l_node -> class label frequency of examples less than threshold # cl_freq_r_node -> class label frequency of examples greater than or equal # ----------------> to threshold exc = len(examples) * 1.0 excc = len(examples[0]) ex_in_lt = [index for index, row in enumerate(examples) if row[attribute] < threshold] ex_in_ge = [index for index, row in enumerate(examples) if row[attribute] >= threshold] exc_lt = len(ex_in_lt) * 1.0 exc_ge = len(ex_in_ge) * 1.0 gain = 0 cl_freq_node = {} cl_freq_l_node = {} cl_freq_r_node = {} for label in class_labels: cl_freq_node[label] = 0 cl_freq_l_node[label] = 0 cl_freq_r_node[label] = 0 for index, row in enumerate(examples): cl_freq_node[int(row[excc-1])] += 1 if index in ex_in_lt: cl_freq_l_node[int(row[excc-1])] += 1 if index in ex_in_ge: cl_freq_r_node[int(row[excc-1])] += 1 for key, val in cl_freq_node.iteritems(): if val > 0: gain += -((val/exc)*logarithm((val/exc), 2)) if exc_lt > 0: for key, val in cl_freq_l_node.iteritems(): if val > 0: gain -= (exc_lt/exc) * -((val/exc_lt)*logarithm((val/exc_lt), 2)) if exc_ge > 0: for key, val in cl_freq_r_node.iteritems(): if val > 0: gain -= (exc_ge/exc) * -((val/exc_ge)*logarithm((val/exc_ge), 2)) return gain
def compute_my_variability(event_log: Log) -> float: """ Computes the prefix entropy of the input Log Args: event_log (Log): the input log Returns: the prefix-block entropy """ prefixes: List[List[Event]] = [] bar: Bar = IncrementalBar("Prefix generation", max=len(event_log.trace_list)) for trace in event_log.trace_list: trace_prefixes: List[List[Event]] = trace.get_all_prefixes() for prefix in trace_prefixes: if prefix not in prefixes: prefixes.append(prefix) bar.next() bar.finish() entropy: float = 0 bar = ShadyBar("Prefix likelihood estimation", max=len(prefixes)) for prefix in prefixes: p: float = _prefix_likelihood_estimator(event_log, prefix) entropy += p * logarithm(p, 10) bar.next() bar.finish() entropy *= -1 return entropy
def add_result(self, cfg, model, result): result.model = model result.params = cfg.processor.models.get_num_params(model) K = float(result.params) n = float(len(self.column_set)) lnL = float(result.lnl) # Here we put in a catch for small subsets, where n < K+2. # If this happens, the AICc actually starts rewarding very small # datasets, which is wrong a simple but crude catch for this is just to # never allow n to go below k+2 result.aic = (-2.0 * lnL) + (2.0 * K) result.bic = (-2.0 * lnL) + (K * logarithm(n)) if n < (K + 2): log.warning(self.SMALL_WARNING % (self, n, model, K, self.name)) n = K + 2 result.aicc = (-2.0 * lnL) + ((2.0 * K) * (n / (n - K - 1.0))) # This is the rate per site of the model - used in some clustering # analyses result.site_rate = float(result.tree_size) log.debug("Adding model to subset. Model: %s, params %d, site_rate %f" % (model, K, result.site_rate)) if model in self.results: log.error("Can't add model result %s, it already exists in %s", model, self) self.results[model] = result
def add_result(self, cfg, model, result): result.model = model result.params = cfg.processor.models.get_num_params(model) K = float(result.params) n = float(len(self.columnset)) lnL = float(result.lnl) #here we put in a catch for small subsets, where n<K+2 #if this happens, the AICc actually starts rewarding very small datasets, which is wrong #a simple but crude catch for this is just to never allow n to go below k+2 if n < (K + 2): log.warning("The subset containing the following data_blocks: %s, has a very small" " number of sites (%d) compared to the number of parameters" " in the model being estimated (the %s model which has %d parameters)." " This may give misleading AICc results, so please check carefully" " if you are using the AICc for your analyses." " The model selection results for this subset are in the following file:" " /analysis/subsets/%s.txt\n" % (self, n, model, K, self.name)) n = K + 2 result.aic = (-2.0 * lnL) + (2.0 * K) result.bic = (-2.0 * lnL) + (K * logarithm(n)) result.aicc = (-2.0 * lnL) + ((2.0 * K) * (n / (n - K - 1.0))) #this is the rate per site of the model - used in some clustering analyses result.site_rate = float(result.tree_size) log.debug("Adding model to subset. Model: %s, params %d, site_rate %f" % (model, K, result.site_rate)) if model in self.results: log.error("Can't add model result %s, it already exists in %s", model, self) self.results[model] = result
def get_CIs(cfg): ci_list = [] fname = os.path.join(cfg.base_path, 'rates.txt') the_cis = open(fname) for ci in the_cis.readlines(): ci_list.append([logarithm(float(ci))]) return ci_list
def __init__(self, sch, nseq, branchlengths, model_selection): self.scheme_name = sch.name self.scheme = sch self.model_selection = model_selection # Calculate AIC, BIC, AICc for each scheme. # How you do this depends on whether brlens are linked or not. self.nsubs = len(sch.subsets) # number of subsets sum_subset_k = sum([s.best_params for s in sch]) # sum of number of parameters in the best model of each subset log.debug("Calculating number of parameters in scheme:") log.debug("Total parameters from subset models: %d" % (sum_subset_k)) if branchlengths == 'linked': # linked brlens - only one extra parameter per subset self.sum_k = sum_subset_k + (self.nsubs - 1) + ( (2 * nseq) - 3) # number of parameters in a scheme log.debug("Total parameters from brlens: %d" % ((2 * nseq) - 3)) log.debug( "Parameters from subset multipliers: %d" % (self.nsubs - 1)) elif branchlengths == 'unlinked': # unlinked brlens - every subset has its own set of brlens self.sum_k = sum_subset_k + (self.nsubs * ( (2 * nseq) - 3)) # number of parameters in a scheme log.debug("Total parameters from brlens: %d" % (( 2 * nseq) - 3) * self.nsubs) else: # WTF? log.error("Unknown option for branchlengths: %s", branchlengths) raise AnalysisError log.debug("Grand total parameters: %d" % (self.sum_k)) self.lnl = sum([s.best_lnl for s in sch]) self.nsites = sum([len(s.columnset) for s in sch]) K = float(self.sum_k) n = float(self.nsites) lnL = float(self.lnl) log.debug("n: %d\tK: %d" % (n, K)) #here we put in a catch for small subsets, where n<K+2 #if this happens, the AICc actually starts rewarding very small datasets, which is wrong #a simple but crude catch for this is just to never allow n to go below k+2 if n < (K + 2): log.warning("Scheme '%s' has a very small" " number of sites (%d) compared to the number of parameters" " in the models that make up the subsets" " This may give misleading AICc results, so please check carefully" " if you are using the AICc for your analyses." " The results for this scheme are in the following file:" " /analysis/schemes/%s.txt\n" % (sch.name, n, sch.name)) n = K + 2 self.aic = (-2.0 * lnL) + (2.0 * K) self.bic = (-2.0 * lnL) + (K * logarithm(n)) self.aicc = (-2.0 * lnL) + ((2.0 * K) * (n / (n - K - 1.0)))
def __init__(self, sch, nseq, branchlengths, model_selection): self.scheme_name = sch.name self.scheme = sch self.model_selection = model_selection # Calculate AIC, BIC, AICc for each scheme. # How you do this depends on whether brlens are linked or not. self.nsubs = len(sch.subsets) # number of subsets sum_subset_k = sum([s.best_params for s in sch]) # sum of number of parameters in the best model of each subset log.debug("Calculating number of parameters in scheme:") log.debug("Total parameters from subset models: %d" % (sum_subset_k)) if branchlengths == 'linked': # linked brlens - only one extra parameter per subset self.sum_k = sum_subset_k + (self.nsubs - 1) + ( (2 * nseq) - 3) # number of parameters in a scheme log.debug("Total parameters from brlens: %d" % ((2 * nseq) - 3)) log.debug( "Parameters from subset multipliers: %d" % (self.nsubs - 1)) elif branchlengths == 'unlinked': # unlinked brlens - every subset has its own set of brlens self.sum_k = sum_subset_k + (self.nsubs * ( (2 * nseq) - 3)) # number of parameters in a scheme log.debug("Total parameters from brlens: %d" % (( 2 * nseq) - 3) * self.nsubs) else: # WTF? log.error("Unknown option for branchlengths: %s", branchlengths) raise AnalysisError log.debug("Grand total parameters: %d" % (self.sum_k)) self.lnl = sum([s.best_lnl for s in sch]) self.nsites = sum([len(s.columnset) for s in sch]) K = float(self.sum_k) n = float(self.nsites) lnL = float(self.lnl) log.debug("n: %d\tK: %d" % (n, K)) #here we put in a catch for small subsets, where n<K+2 #if this happens, the AICc actually starts rewarding very small datasets, which is wrong #a simple but crude catch for this is just to never allow n to go below k+2 self.aic = (-2.0 * lnL) + (2.0 * K) self.bic = (-2.0 * lnL) + (K * logarithm(n)) if n < (K + 2): log.warning("Scheme '%s' has a very small" " number of sites (%d) compared to the number of parameters" " in the models that make up the subsets" " This may give misleading AICc results, so please check carefully" " if you are using the AICc for your analyses." % (sch.name, n,)) n = K + 2 self.aicc = (-2.0 * lnL) + ((2.0 * K) * (n / (n - K - 1.0)))
def convert_size(size_bytes, table, base): if len(table) == 0 or base <= 0: raise ValueError("ERROR in convert_size") if size_bytes == 0: return "0" + table[0] i = min(int(logarithm(size_bytes, base)), len(table) - 1) p = base ** i s = round(size_bytes / p, 2) return "{:g}{:s}".format(s, table[i])
def _mutual_info(joint): assert numpy.isclose(joint.sum(), 1.0) info = 0.0 rownum, colnum = joint.shape colsum = joint.sum(axis=0) rowsum = joint.sum(axis=1) for row in range(rownum): for col in range(colnum): p_xy = joint[row, col] p_x = rowsum[row] p_y = colsum[col] if p_xy != 0: info += p_xy * logarithm(p_xy / (p_x * p_y), 2) return info
async def generate_random_url_alias(_urandom=urandom, _encode=b32encode, _randint=randint, _factor=logarithm(256, 32)): # The length of URL alias is 8-12 characters count = _randint(8, 12) # count + 1 / _factor gives us the number of bytes needed # to produce *at least* count encoded characters random_str = (_encode(_urandom(int( (count + 1) / _factor)))[:count].decode("ascii").lower()) while await Url.query.where(Url.url_alias == random_str).gino.first(): random_str = (_encode(_urandom(int( (count + 1) / _factor)))[:count].decode("ascii").lower()) return random_str
def _entropy(self, X): """ Calculates the Shannon entropy on the given data X Arguments: X: An iterable for feature values. Usually, this is now a 1D list """ summed = 0 counter = Counter(X) for value in counter: count = counter[value] px = count / float(len(X)) summed += px * logarithm(1. / px, 2) return summed
def gpu(): nvmlInit() num_devices = nvmlDeviceGetCount() if num_devices > 0: padding = int(logarithm(num_devices, 10)) + 1 for i in range(num_devices): handle = nvmlDeviceGetHandleByIndex(i) name = nvmlDeviceGetName(handle).decode("ascii") total_memory = nvmlDeviceGetMemoryInfo(handle).total utilization = nvmlDeviceGetUtilizationRates(handle) gpu_percent, memory_percent = utilization.gpu, utilization.memory yield (f'gpu{i:0{padding}d}', gpu_percent, 0, f'{name} ({i})') yield ( f'gpu{i:0{padding}d} memory', memory_percent, 0, f'{convert_size_2(memory_percent * total_memory / 100)}B / {convert_size_2(total_memory)}B', ) nvmlShutdown()
def add_result(self, cfg, model, result): result.model = model result.params = cfg.processor.models.get_num_params(model) K = float(result.params) n = float(len(self.columnset)) lnL = float(result.lnl) #here we put in a catch for small subsets, where n<K+2 #if this happens, the AICc actually starts rewarding very small datasets, which is wrong #a simple but crude catch for this is just to never allow n to go below k+2 result.aic = (-2.0 * lnL) + (2.0 * K) result.bic = (-2.0 * lnL) + (K * logarithm(n)) if n < (K + 2): log.warning( "The subset containing the following data_blocks: %s, has a very small" " number of sites (%d) compared to the number of parameters" " in the model being estimated (the %s model which has %d parameters)." " This may give misleading AICc results, so please check carefully" " if you are using the AICc for your analyses." " The model selection results for this subset are in the following file:" " /analysis/subsets/%s.txt\n" % (self, n, model, K, self.name)) n = K + 2 result.aicc = (-2.0 * lnL) + ((2.0 * K) * (n / (n - K - 1.0))) #this is the rate per site of the model - used in some clustering analyses result.site_rate = float(result.tree_size) log.debug( "Adding model to subset. Model: %s, params %d, site_rate %f" % (model, K, result.site_rate)) if model in self.results: log.error("Can't add model result %s, it already exists in %s", model, self) self.results[model] = result
def cpu(): cpu_times = psutil.cpu_times_percent(percpu=True) frequencies = psutil.cpu_freq(percpu=True) padding = int(logarithm(len(cpu_times), 10)) + 1 cpu_infos = [ (f'cpu{i:0{padding}d}', cpu_time.user, cpu_time.system) for i, cpu_time in enumerate(cpu_times) ] cpu_infos.append( ( 'cpu', sum(x[1] for x in cpu_infos) / len(cpu_infos), sum(x[2] for x in cpu_infos) / len(cpu_infos), ) ) if len(frequencies) == 1: cpu_infos[-1] += (str(frequencies[0].current) + 'MHz',) elif len(frequencies) == len(cpu_infos) - 1: for i in range(len(frequencies)): cpu_infos[i] += (str(frequencies[i].current) + 'MHz',) return cpu_infos
def price_request(self, requested_fib): return int(floor(logarithm(requested_fib, 10))) + 1
def price_request(self, requested_fib): return int(floor(logarithm(requested_fib,10))) + 1
def update_modbus_registers(args): log.debug("Updated thread started.") update_interval_seconds = 5 heartbeat_counter = 1 heartbeat_counter_max_value = 10 register_type = 4 register_offset = 0 while (CONTINUE_UPDATING_MODBUS_REGISTERS is True): log.debug("Updating the server registers") simulated_modbus_server_context = args[0] # Initialize the number of discovered bluetooth devices to 0 number_of_nearby_bluetooth_devices = 0 # Initialize the temperature to a simulated random value temperature = int((110 + 4 * logarithm(100 * random())) * 100) # Read the board temperature try: temperature = int((float( popen("vcgencmd measure_temp").readline().replace( "temp=", "").replace("'C", "")) * 9 / 5 + 32) * 100) except Exception as ex: # Log any error, if it occurs log.debug("Error reading temperature: " + str(ex)) log.debug( "Simulated temperature data will ge generated instead of a real value" ) # Scan for nearby devices if (BLUETOOTH_DEVICE_SCANNING_ENABLED): try: # Save the results to a file popen( "sudo timeout -s SIGINT 1s hcitool -i hci0 lescan --passive > bluetoothScanResults.txt" ) # Open the file and count the lines, and save the line count as the number of devices (omit the header line) number_of_nearby_bluetooth_devices = len( open("bluetoothScanResults.txt").readlines()) - 1 if (number_of_nearby_bluetooth_devices == -1): raise Exception( "Possible popen error", "len(open(" "bluetoothScanResults.txt" ").readlines()) equals zero") except Exception as ex: # Log any error, if it occurs log.debug("Error scanning for bluetooth devices: " + str(ex)) log.debug( "Default value of 0 will be used instead of a real value") # Write the new values back to the Modbus register new_register_values = [ temperature, number_of_nearby_bluetooth_devices, heartbeat_counter ] log.debug("New values: " + str(new_register_values)) simulated_modbus_server_context.setValues(register_type, register_offset, new_register_values) # Increment the hearbeat counter by one heartbeat_counter = heartbeat_counter + 1 # Reset the counter if necessary if (heartbeat_counter > heartbeat_counter_max_value): heartbeat_counter = 1 # Wait until the next loop sleep(update_interval_seconds) # Once broken out of the loop, note that the thread is over log.debug("Updated thread ended.")
def get_bic(lnL, K, n): bic = (-2.0 * lnL) + (K * logarithm(n)) return bic
def naivebayes(fileid, string_tags, dictionary, global_var): import sys import os import codecs import json pathname = "eattreat_nlp_taggenerator/" path = [ 'Bakery&Sweets/', 'Snacks/', 'Meats/', 'Organics/', 'Other/', 'Drinks/', 'Restaurants/' ] path1 = [ 'Bakery&Sweets', 'Snacks', 'Meats', 'Organics', 'Other', 'Drinks', 'Restaurants' ] dict_bakery = ['bakery', 'cake', 'chocolate', 'dessert', 'sweet'] dict_snacks = ['cafe', 'street', 'chaat', 'food', 'snack', 'golgappe'] dict_organics = ['healthy', 'detox', 'vegan', 'salad', 'diet', 'dietary'] dict_restaurants = [ 'restaurant', 'bar', 'new', 'market', 'menu', 'eatery', 'kitchen', 'hotel', 'cafe' ] dict_meats = [ 'chicken', 'biryani', 'seafood', 'prawn', 'fish', 'salmon', 'mutton', 'meat' ] dict_drinks = [ 'rum', 'cocktail', 'mocktail', 'drink', 'beer', 'wine', 'drinking', 'tea', 'coffee', 'whisky', 'whiskey' ] dict_others = ['festival', 'fest', 'travel'] dict_top_keywords = { 'Bakery&Sweets': dict_bakery, 'Snacks': dict_snacks, 'Meats': dict_meats, 'Organics': dict_organics, 'Other': dict_others, 'Drinks': dict_drinks, 'Restaurants': dict_restaurants } vocab = [{}, {}, {}, {}, {}, {}, {}] V = [] alltags = set() classoccur = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] for p in range(len(path)): for filename in os.listdir(pathname + path[p]): if not filename.startswith('.'): classoccur[p] += 1 inputfile = codecs.open(pathname + path[p] + filename, 'r') for line in inputfile: content = line.split("\t") post_id = content[0] post_title = content[1] post_tags = content[2] tags = post_tags.split(', ') terms_freq = dictionary[post_id] # print terms_freq for t in tags: if t != '': t_freq = terms_freq[t] # hyp_count = t.count('-') tt = t.split('-') t_score = 0 for ttt in tt: if ttt not in alltags: alltags.add(ttt) # if ttt in dict_top_keywords[path1[p]]: # t_score = t_score + (hyp_count+1) if ttt not in vocab[p]: vocab[p].update({ttt: 1}) else: vocab[p][ttt] += 1 V.append(sum(vocab[p].values())) naive = [{}, {}, {}, {}, {}, {}, {}] lenalltags = len(alltags) #---------------------DICTIONARIES------------------------------------------ if global_var < 2: for alpha in range(len(vocab)): class_dict = open('dictionaries/' + path1[alpha] + '.txt', 'a') for key in vocab[alpha]: class_dict.write( str(key) + '\t' + str(vocab[alpha][key]) + '\n') #--------------------------------------------------------------------------- test_tags = [] total_tags = string_tags.split(", ") frequency = dictionary[fileid] sum1 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] for k in range(len(vocab)): s = 0 for e in total_tags: fterm_freq = frequency[e] hyphen_count = e.count('-') ee = e.split('-') e_score = 1 for eee in ee: if eee not in alltags: alltags.add(eee) lenalltags += 1 if eee in dict_top_keywords[path1[k]]: e_score = (hyphen_count + 1) if eee not in vocab[k]: vocab[k].update({eee: 1}) V[k] += 1 else: vocab[k][eee] += 1 V[k] += 1 log_prior = math.lo naive[k].update({ eee: e_score * float(float(1 + vocab[k][eee]) / float(lenalltags + V[k])) }) s = s + math.logarithm(naive[k][eee]) beta = float(s + math.logarithm(classoccur[k] / sum(classoccur))) sum1[k] = beta inputfile.close() max_value = max(sum1) max_index = sum1.index(max_value) classoccur[max_index] += 1 print path1[max_index] return path1[max_index] #naivebayes()
def log(num, base=10): if num > 0: return logarithm(num, base) return logarithm(sys.float_info.min * sys.float_info.epsilon, base)
def moredigits(a, b): loga, logb = logarithm(a, 10), logarithm(b, 10) loga, logb = floor(loga), floor(logb) return loga > logb
def likelihood_parser(phyml_lk_file): ''' Takes a *_phyml_lk.txt file and returns a dictionary of sites and site likelihoods and a dictionary of sites and lists of likelihoods under different rate categories. If no rate categories are specified, it will return a dictionary with sites and likelihoods P(D|M) for each site. Here is an example of the first few lines of the file that it takes: Note : P(D|M) is the probability of site D given the model M (i.e., the site likelihood) P(D|M,rr[x]) is the probability of site D given the model M and the relative rate of evolution rr[x], where x is the class of rate to be considered. We have P(D|M) = \sum_x P(x) x P(D|M,rr[x]). Site P(D|M) P(D|M,rr[1]=2.6534) P(D|M,rr[2]=0.2289) P(D|M,rr[3]=0.4957) P(D|M,rr[4]=1.0697) Posterior mean 1 2.07027e-12 1.3895e-19 6.2676e-12 1.2534e-12 1.21786e-15 0.273422 2 1.8652e-07 2.05811e-19 6.73481e-07 4.14575e-09 7.97623e-14 0.23049 3 4.48873e-15 1.37274e-19 7.11221e-15 9.11826e-15 9.21848e-17 0.382265 4 3.38958e-10 1.31413e-19 1.18939e-09 4.20659e-11 5.86537e-15 0.237972 5 8.29969e-17 1.11587e-19 3.1672e-17 2.52183e-16 1.9722e-17 0.502077 6 9.24579e-09 1.59891e-19 3.31101e-08 4.79946e-10 2.59524e-14 0.232669 7 3.43996e-10 2.1917e-19 1.19544e-09 5.43128e-11 1.22969e-14 0.240455 8 4.43262e-13 1.1447e-19 1.32148e-12 2.8874e-13 3.7386e-16 0.27685 9 3.42513e-11 1.70149e-19 1.14227e-10 1.02103e-11 4.05239e-15 0.250765 10 1.15506e-11 1.28107e-19 3.86378e-11 3.32642e-12 1.46151e-15 0.250024 ''' try: with open(str(phyml_lk_file)) as phyml_lk_file: # The phyml_lk files differ based on whether different rate # categories are estimated or not, this figures out which # file we are dealing with phyml_lk_file.next() line2 = phyml_lk_file.next() # Check to see if the file contains rate categories if line2[0] != "P": phyml_lk_file.next() # If it contains rate categories, we need to skip a few more lines else: for _ in xrange(4): phyml_lk_file.next() # Read in the contents of the file and get rid of whitespace list_of_dicts = list(csv.DictReader(phyml_lk_file, delimiter = " ", skipinitialspace = True)) except IOError: raise IOError("Could not find the likelihood file!") phyml_lk_file.close() # Right now, when the alignment is over 1,000,000 sites, PhyML # merges the site number with the site likelihood, catch that and # throw an error if len(list_of_dicts) > 999999: raise IOError("PhyML file cannot process more than 1 M sites") # The headers values change with each run so we need a list of them headers = [] for k in list_of_dicts[0]: headers.append(k) # Sort the headers into alphabetical order headers.sort() # Check if the rate cateogories were estimated, if they weren't # just return the likelihood scores for each site, otherwise, return # site likelihoods and likelihoods under each rate category if len(headers) < 4: # Make a list of site log likelihoods likelihood_list = [[logarithm(float(site[headers[1]]))] for site in list_of_dicts] return likelihood_list else: # Make a list of site log likelihoods if list_of_dicts[0][headers[1]] == 'nan' or list_of_dicts[0][headers[1]] == 'inf': likelihood_list = None print "Whoopsies!" rate_list = None lk_rate_list = None lk_site_rate_list = None return likelihood_list, lk_rate_list, rate_list, lk_site_rate_list else: likelihood_list = [[logarithm(float(site[headers[1]]))] for site in list_of_dicts] # Make a rate list # print list_of_dicts[0][headers[len(headers) - 3]] # if list_of_dicts[0][headers[len(headers) - 3]] == 'nan' or list_of_dicts[0][headers[len(headers) - 3]] == 'inf': # rate_list = None # print "Whoopsies!" # else: rate_list = [[(logarithm(float(site[headers[len(headers) - 3]])))] for site in list_of_dicts] # Now make a list of lists of site likelihoods under different # rate categories lk_rate_list = [] for i in list_of_dicts: ind_lk_list = [] # Pull the likelihood from each rate category by calling the # appropriate key from "headers" for num in range(2, len(headers) - 3): ind_lk_list.append(logarithm(float(i[headers[num]]))) # Now add the list of likelihoods for the site to a master list lk_rate_list.append(ind_lk_list) # Now pull likelihoods and rates for a two dimensional list lk_site_rate_list = [] for i in list_of_dicts: ind_lk_r_list = [] ind_lk_r_list.append(logarithm(float(i[headers[1]]))) ind_lk_r_list.append(logarithm(float(i[headers[len(headers) - 3]]))) lk_site_rate_list.append(ind_lk_r_list) # Return both the list of site likelihoods and the list of lists of # likelihoods under different rate categories return likelihood_list, lk_rate_list, rate_list, lk_site_rate_list