def add_diversity_data_to_serovar(serovar_data): for serovar in serovar_data: human_removed_taxa = {} data = serovar_data[serovar] for taxon in data['associated_taxa']: if taxon == 'h**o sapien': continue human_removed_taxa[taxon] = data['associated_taxa'][taxon] taxa_counts = list(human_removed_taxa.values()) if len(taxa_counts) > 0 and sum(taxa_counts) >= 10: serovar_data[serovar]['taxa_entropy'] = calc_shanon_entropy( taxa_counts) serovar_data[serovar]['taxa_shannon_index'] = alpha.shannon( taxa_counts) serovar_data[serovar]['taxa_simpson_index'] = alpha.simpson( taxa_counts) serovar_data[serovar]['taxa_simpson_index_e'] = alpha.simpson_e( taxa_counts) serovar_data[serovar]['taxa_chao1'] = alpha.chao1(taxa_counts) plasmid_counts = list(serovar_data[serovar]['plasmids'].values()) if len(plasmid_counts) > 0 and sum(plasmid_counts) >= 10: serovar_data[serovar]['plasmid_entropy'] = calc_shanon_entropy( plasmid_counts) serovar_data[serovar]['plasmid_shannon_index'] = alpha.shannon( plasmid_counts) serovar_data[serovar]['plasmid_simpson_index'] = alpha.simpson( plasmid_counts) serovar_data[serovar]['plasmid_simpson_index_e'] = alpha.simpson_e( plasmid_counts) serovar_data[serovar]['plasmid_chao1'] = alpha.chao1( plasmid_counts) return serovar_data
def bugs(): bugs = pd.read_csv('./dat/bugs.csv') bugs = bugs[pd.notnull( bugs['Total Count'])] # Let's drop all the rows that don't have counts bugs = bugs[pd.notnull( bugs['Sensor Number'] )] # Let's drop all the rows that don't have sensor numbers bugs['Date'] = [ datetime.datetime.strptime(x, '%m/%d/%Y') for x in bugs['Date'] ] flights = flight_dates() ps = plots() dates = [] big_plots = [] shannons = [] for flight in flights: interval = bugs[bugs['Date'] == flight] for p in ps: if bugPlot(p) in interval['Sensor Number'].values: sub_interval = interval[interval['Sensor Number'] == bugPlot( p)] big_plots.append(p) dates.append(flight) shannons.append( shannon(sub_interval['Total Count'].values, base=math.exp(1))) bug_dict = {'date': dates, 'plot': big_plots, 'bug_shannon': shannons} bug_df = pd.DataFrame.from_dict(bug_dict) bug_df.to_csv('./dat/bugFrame.csv', index=False) return bug_df
def update_terminal_metrics(self): self.net_vomits = self.rct_env.park.net_vomits self.avg_ride_nausea = np.mean([ride.nausea for ride in self.rct_env.park.rides_by_pos.values()]) self.avg_ride_excitement = np.mean([ride.excitement for ride in self.rct_env.park.rides_by_pos.values()]) self.avg_ride_intensity = np.mean([ride.intensity for ride in self.rct_env.park.rides_by_pos.values()]) ride_type_counts = np.bincount([ride.ride_i for ride in self.rct_env.park.rides_by_pos.values()]) self.ride_diversity = shannon(ride_type_counts)
def test_heip_e(self): # Calculate "by hand". arr = np.array([1, 2, 3, 1]) h = shannon(arr, base=np.e) expected = (np.exp(h) - 1) / 3 self.assertEqual(heip_e(arr), expected) # From Statistical Ecology: A Primer in Methods and Computing, page 94, # table 8.1. self.assertAlmostEqual(heip_e([500, 300, 200]), 0.90, places=2) self.assertAlmostEqual(heip_e([500, 299, 200, 1]), 0.61, places=2)
def test_heip_e(self): # Calculate "by hand". arr = np.array([1, 2, 3, 1]) h = shannon(arr, base=np.e) expected = (np.exp(h) - 1) / 3 self.assertEqual(heip_e(arr), expected) # From Statistical Ecology: A Primer in Methods and Computing, page 94, # table 8.1. self.assertAlmostEqual(heip_e([500, 300, 200]), 0.90, places=2) self.assertAlmostEqual(heip_e([500, 299, 200, 1]), 0.61, places=2)
def mercat_compute_alpha_beta_diversity(counts,bif): abm = dict() abm['shannon'] = skbio_alpha.shannon(counts) abm['simpson'] = skbio_alpha.simpson(counts) abm['simpson_e'] = skbio_alpha.simpson_e(counts) abm['goods_coverage'] = skbio_alpha.goods_coverage(counts) abm['fisher_alpha'] = skbio_alpha.fisher_alpha(counts) abm['dominance'] = skbio_alpha.dominance(counts) abm['chao1'] = skbio_alpha.chao1(counts) abm['chao1_ci'] = skbio_alpha.chao1_ci(counts) abm['ace'] = skbio_alpha.ace(counts) with open(bif + "_diversity_metrics.txt", 'w') as dmptr: for abmetric in abm: dmptr.write(abmetric + " = " + str(abm[abmetric]) + "\n")
def thematic_diversity(objs, labels, is_covid): """ Calculate the Shannon diversity of objects by topic, for objects tagged as covid-related and non-covid-related. Args: objs (DataFrame): Objects over which to calculate total activity labels (int): CorEx's binary labels matrix, provided by CorEx. is_covid (Series): boolean indexer, indicating projects tagged as covid and non-covid related. Returns: diversity: Thematic diversity for covid-related non-covid-related objects. """ _date = objs["created"] from_date = INDICATORS["covid_dates"]["from_date"] in_date_range = _date > pd.to_datetime(from_date) return shannon(labels.loc[in_date_range & is_covid].sum(axis=0))
def test_pielou_e(self): # Calculate "by hand". arr = np.array([1, 2, 3, 1]) h = shannon(arr, np.e) s = 4 expected = h / np.log(s) self.assertAlmostEqual(pielou_e(arr), expected) self.assertAlmostEqual(pielou_e(self.counts), 0.92485490560) self.assertEqual(pielou_e([1, 1]), 1.0) self.assertEqual(pielou_e([1, 1, 1, 1]), 1.0) self.assertEqual(pielou_e([1, 1, 1, 1, 0, 0]), 1.0) # Examples from # http://ww2.mdsg.umd.edu/interactive_lessons/biofilm/diverse.htm#3 self.assertAlmostEqual(pielou_e([1, 1, 196, 1, 1]), 0.078, 3) self.assertTrue(np.isnan(pielou_e([0, 0, 200, 0, 0]))) self.assertTrue(np.isnan(pielou_e([0, 0, 0, 0, 0])))
def test_pielou_e(self): # Calculate "by hand". arr = np.array([1, 2, 3, 1]) h = shannon(arr, np.e) s = 4 expected = h / np.log(s) self.assertAlmostEqual(pielou_e(arr), expected) self.assertAlmostEqual(pielou_e(self.counts), 0.92485490560) self.assertEqual(pielou_e([1, 1]), 1.0) self.assertEqual(pielou_e([1, 1, 1, 1]), 1.0) self.assertEqual(pielou_e([1, 1, 1, 1, 0, 0]), 1.0) # Examples from # http://ww2.mdsg.umd.edu/interactive_lessons/biofilm/diverse.htm#3 self.assertAlmostEqual(pielou_e([1, 1, 196, 1, 1]), 0.078, 3) self.assertTrue(np.isnan(pielou_e([0, 0, 200, 0, 0]))) self.assertTrue(np.isnan(pielou_e([0, 0, 0, 0, 0])))
def test_shannon(self): self.assertEqual(shannon(np.array([5])), 0) self.assertEqual(shannon(np.array([5, 5])), 1) self.assertEqual(shannon(np.array([1, 1, 1, 1, 0])), 2)
def main(menLen, i_power, sigmas, samples, simulations, condition): agents = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] #agents = list(range(1, 101)) signals = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10'] network = group(agents) pairs = [list(elem) for elem in network] # i_power = 1 # menLen = 3 ####SIGMAS: Agents' value system#### #s1 = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] #s2 = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] #sigmas = {1: s1, 2: s1, 3: s1, 4: s1, 5: s1, 6: s2, 7: s2, 8: s2, 9:s2, 10:s2} # samples = [ # {'cont': 1.0, 'coord': 0.5, 'conform': 1, 'confirm':0, 'mut': 0.02}] # samples = [d for d in samples for _ in range(1)] #simulations = 1 statistics = { sim: { agent: { sample: { signal: [0 for round in range(1, len(pairs) + 1)] for signal in signals } for sample in range(len(samples)) } for agent in agents } for sim in range(simulations) } for sim in range(simulations): #network = group(agents) #pairs = [list(elem) for elem in network] for mu in range(len(samples)): game = Match( agents, pairs, signals, sigmas, samples[mu]["cont"], samples[mu]["coord"], samples[mu]["mut"], menLen, i_power, ) game.play() for n, round in enumerate(game.memory): for agent, signal in round.items(): statistics[sim][agent][mu][signal][n] += 1 with open('PRep_heterogeneity_R_I005.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow([ 'Simulation', 'Sample', 'Agent', 'Memory', 'Generation', 'Condition', 'Inst_power', 'Content bias', 'Coordination bias', 'Mutation rate' ] + signals + ['Population signals'] + ['Entropy_population'] + ['Entropy_subpopulation_1'] + ['Entropy_subpopulation_2'] + ['Subpopulation_1 signals'] + ['Subpopulation_2 signals'] + ['Brillouin_population'] + ['Margalef_population'] + ['Simpson_population'] + ['Simpson_e_population'] + ['Richness']) # Creando listas que contienen la produccion de cada senal: para toda la poblacion (aux) y para cada jugador (auxn) #for agent in agents: for sim in range(simulations): for mu in range(len(samples)): for round in range(1, len(pairs) + 1): aux = [ statistics[sim][agent][mu][signal][round - 1] for signal in signals ] aux1 = [ statistics[sim][1][mu][signal][round - 1] for signal in signals ] aux2 = [ statistics[sim][2][mu][signal][round - 1] for signal in signals ] aux3 = [ statistics[sim][3][mu][signal][round - 1] for signal in signals ] aux4 = [ statistics[sim][4][mu][signal][round - 1] for signal in signals ] aux5 = [ statistics[sim][5][mu][signal][round - 1] for signal in signals ] aux6 = [ statistics[sim][6][mu][signal][round - 1] for signal in signals ] aux7 = [ statistics[sim][7][mu][signal][round - 1] for signal in signals ] aux8 = [ statistics[sim][8][mu][signal][round - 1] for signal in signals ] aux9 = [ statistics[sim][9][mu][signal][round - 1] for signal in signals ] aux10 = [ statistics[sim][10][mu][signal][round - 1] for signal in signals ] # aux11 = [statistics[sim][11][mu][signal][round - 1] for signal in signals] # aux12 = [statistics[sim][12][mu][signal][round - 1] for signal in signals] # aux13 = [statistics[sim][13][mu][signal][round - 1] for signal in signals] # aux14 = [statistics[sim][14][mu][signal][round - 1] for signal in signals] # aux15 = [statistics[sim][15][mu][signal][round - 1] for signal in signals] # aux16 = [statistics[sim][16][mu][signal][round - 1] for signal in signals] # aux17 = [statistics[sim][17][mu][signal][round - 1] for signal in signals] # aux18 = [statistics[sim][18][mu][signal][round - 1] for signal in signals] # aux19 = [statistics[sim][19][mu][signal][round - 1] for signal in signals] # aux20 = [statistics[sim][20][mu][signal][round - 1] for signal in signals] # aux21 = [statistics[sim][21][mu][signal][round - 1] for signal in signals] # aux22 = [statistics[sim][22][mu][signal][round - 1] for signal in signals] # aux23 = [statistics[sim][23][mu][signal][round - 1] for signal in signals] # aux24 = [statistics[sim][24][mu][signal][round - 1] for signal in signals] # aux25 = [statistics[sim][25][mu][signal][round - 1] for signal in signals] # aux26 = [statistics[sim][26][mu][signal][round - 1] for signal in signals] # aux27 = [statistics[sim][27][mu][signal][round - 1] for signal in signals] # aux28 = [statistics[sim][28][mu][signal][round - 1] for signal in signals] # aux29 = [statistics[sim][29][mu][signal][round - 1] for signal in signals] # aux30 = [statistics[sim][30][mu][signal][round - 1] for signal in signals] # aux31 = [statistics[sim][31][mu][signal][round - 1] for signal in signals] # aux32 = [statistics[sim][32][mu][signal][round - 1] for signal in signals] # aux33 = [statistics[sim][33][mu][signal][round - 1] for signal in signals] # aux34 = [statistics[sim][34][mu][signal][round - 1] for signal in signals] # aux35 = [statistics[sim][35][mu][signal][round - 1] for signal in signals] # aux36 = [statistics[sim][36][mu][signal][round - 1] for signal in signals] # aux37 = [statistics[sim][37][mu][signal][round - 1] for signal in signals] # aux38 = [statistics[sim][38][mu][signal][round - 1] for signal in signals] # aux39 = [statistics[sim][39][mu][signal][round - 1] for signal in signals] # aux40 = [statistics[sim][40][mu][signal][round - 1] for signal in signals] # aux41 = [statistics[sim][41][mu][signal][round - 1] for signal in signals] # aux42 = [statistics[sim][42][mu][signal][round - 1] for signal in signals] # aux43 = [statistics[sim][43][mu][signal][round - 1] for signal in signals] # aux44 = [statistics[sim][44][mu][signal][round - 1] for signal in signals] # aux45 = [statistics[sim][45][mu][signal][round - 1] for signal in signals] # aux46 = [statistics[sim][46][mu][signal][round - 1] for signal in signals] # aux47 = [statistics[sim][47][mu][signal][round - 1] for signal in signals] # aux48 = [statistics[sim][48][mu][signal][round - 1] for signal in signals] # aux49 = [statistics[sim][49][mu][signal][round - 1] for signal in signals] # aux50 = [statistics[sim][50][mu][signal][round - 1] for signal in signals] # aux51 = [statistics[sim][51][mu][signal][round - 1] for signal in signals] # aux52 = [statistics[sim][52][mu][signal][round - 1] for signal in signals] # aux53 = [statistics[sim][53][mu][signal][round - 1] for signal in signals] # aux54 = [statistics[sim][54][mu][signal][round - 1] for signal in signals] # aux55 = [statistics[sim][55][mu][signal][round - 1] for signal in signals] # aux56 = [statistics[sim][56][mu][signal][round - 1] for signal in signals] # aux57 = [statistics[sim][57][mu][signal][round - 1] for signal in signals] # aux58 = [statistics[sim][58][mu][signal][round - 1] for signal in signals] # aux59 = [statistics[sim][59][mu][signal][round - 1] for signal in signals] # aux60 = [statistics[sim][50][mu][signal][round - 1] for signal in signals] # aux61 = [statistics[sim][61][mu][signal][round - 1] for signal in signals] # aux62 = [statistics[sim][62][mu][signal][round - 1] for signal in signals] # aux63 = [statistics[sim][63][mu][signal][round - 1] for signal in signals] # aux64 = [statistics[sim][64][mu][signal][round - 1] for signal in signals] # aux65 = [statistics[sim][65][mu][signal][round - 1] for signal in signals] # aux66 = [statistics[sim][66][mu][signal][round - 1] for signal in signals] # aux67 = [statistics[sim][67][mu][signal][round - 1] for signal in signals] # aux68 = [statistics[sim][68][mu][signal][round - 1] for signal in signals] # aux69 = [statistics[sim][69][mu][signal][round - 1] for signal in signals] # aux70 = [statistics[sim][70][mu][signal][round - 1] for signal in signals] # aux71 = [statistics[sim][71][mu][signal][round - 1] for signal in signals] # aux72 = [statistics[sim][72][mu][signal][round - 1] for signal in signals] # aux73 = [statistics[sim][73][mu][signal][round - 1] for signal in signals] # aux74 = [statistics[sim][74][mu][signal][round - 1] for signal in signals] # aux75 = [statistics[sim][75][mu][signal][round - 1] for signal in signals] # aux76 = [statistics[sim][76][mu][signal][round - 1] for signal in signals] # aux77 = [statistics[sim][77][mu][signal][round - 1] for signal in signals] # aux78 = [statistics[sim][78][mu][signal][round - 1] for signal in signals] # aux79 = [statistics[sim][79][mu][signal][round - 1] for signal in signals] # aux80 = [statistics[sim][80][mu][signal][round - 1] for signal in signals] # aux81 = [statistics[sim][81][mu][signal][round - 1] for signal in signals] # aux82 = [statistics[sim][82][mu][signal][round - 1] for signal in signals] # aux83 = [statistics[sim][83][mu][signal][round - 1] for signal in signals] # aux84 = [statistics[sim][84][mu][signal][round - 1] for signal in signals] # aux85 = [statistics[sim][85][mu][signal][round - 1] for signal in signals] # aux86 = [statistics[sim][86][mu][signal][round - 1] for signal in signals] # aux87 = [statistics[sim][87][mu][signal][round - 1] for signal in signals] # aux88 = [statistics[sim][88][mu][signal][round - 1] for signal in signals] # aux89 = [statistics[sim][89][mu][signal][round - 1] for signal in signals] # aux90 = [statistics[sim][90][mu][signal][round - 1] for signal in signals] # aux91 = [statistics[sim][91][mu][signal][round - 1] for signal in signals] # aux92 = [statistics[sim][92][mu][signal][round - 1] for signal in signals] # aux93 = [statistics[sim][93][mu][signal][round - 1] for signal in signals] # aux94 = [statistics[sim][94][mu][signal][round - 1] for signal in signals] # aux95 = [statistics[sim][95][mu][signal][round - 1] for signal in signals] # aux96 = [statistics[sim][96][mu][signal][round - 1] for signal in signals] # aux97 = [statistics[sim][97][mu][signal][round - 1] for signal in signals] # aux98 = [statistics[sim][98][mu][signal][round - 1] for signal in signals] # aux99 = [statistics[sim][99][mu][signal][round - 1] for signal in signals] # aux100 = [statistics[sim][100][mu][signal][round - 1] for signal in signals] # Lista que contiene los sumatorios de cada tipo de senales producidas a nivel de la poblacion global en cada muestra y ronda summation_pop = [] # Lista que contiene los sumatorios de cada tipo de senales producidas a nivel de subpoblacion en cada muestra y ronda summation_subpop_1 = [] summation_subpop_2 = [] # Sumando las senales de cada tipo for i in range(len(aux1)): # A nivel de la poblacion summation_pop.append(aux1[i] + aux2[i] + aux3[i] + aux4[i] + aux5[i] + aux6[i] + aux7[i] + aux8[i] + aux9[i] + aux10[i]) # + # aux11[i] + aux12[i] + aux13[i] + aux14[i] + aux15[i] + aux16[i] + aux17[i] + aux18[ # i] + aux19[i] + aux20[i] + # aux21[i] + aux22[i] + aux23[i] + aux24[i] + aux25[i] + aux26[i] + aux27[i] + aux28[ # i] + aux29[i] + aux30[i] + # aux31[i] + aux32[i] + aux33[i] + aux34[i] + aux35[i] + aux36[i] + aux37[i] + aux38[ # i] + aux39[i] + aux40[i] + # aux41[i] + aux42[i] + aux43[i] + aux44[i] + aux45[i] + aux46[i] + aux47[i] + aux48[ # i] + aux49[i] + aux50[i] + # aux51[i] + aux52[i] + aux53[i] + aux54[i] + aux55[i] + aux56[i] + aux57[i] + aux58[ # i] + aux59[i] + aux60[i] + # aux61[i] + aux62[i] + aux63[i] + aux64[i] + aux65[i] + aux66[i] + aux67[i] + aux68[ # i] + aux69[i] + aux70[i] + # aux71[i] + aux72[i] + aux73[i] + aux74[i] + aux75[i] + aux76[i] + aux77[i] + aux78[ # i] + aux79[i] + aux80[i] + # aux81[i] + aux82[i] + aux83[i] + aux84[i] + aux85[i] + aux86[i] + aux87[i] + aux88[ # i] + aux89[i] + aux90[i] + # aux91[i] + aux92[i] + aux93[i] + aux94[i] + aux95[i] + aux96[i] + aux97[i] + aux98[ # i] + aux99[i] + aux100[i]) # A nivel de las subpoblaciones for i in range(len(aux1)): summation_subpop_1.append(aux1[i] + aux2[i] + aux3[i] + aux4[i] + aux5[i]) summation_subpop_2.append(+aux6[i] + aux7[i] + aux8[i] + aux9[i] + aux10[i]) #print(aux1) #output.append(shannon(summation_pop)) #print(output) writer.writerow([ sim + 1, mu + 1, agent, menLen, round, condition, i_power, samples[mu]['cont'], samples[mu]['coord'], samples[mu]['mut'] ] + aux + [summation_pop] + [shannon(summation_pop)] + [shannon(summation_subpop_1)] + [shannon(summation_subpop_2)] + [summation_subpop_1] + [summation_subpop_2] + [brillouin_d(summation_pop)] + [margalef(summation_pop)] + [simpson(summation_pop)] + [simpson_e(summation_pop)] + [observed_otus(summation_pop) / 10])
else: data_key = mpatches.Patch(color=legend_entries[taxa],label="$\it{%s}$" %(taxa_text)) patch_list.append(data_key) stacked_fig.legend(handles=patch_list,loc=6 ,ncol=1,fontsize=16) CST_color_scheme = {'I-A':'#ff6868','I-B':'#ffd4da','II':'#b4ff68','III-A':'#ffbc6b','III-B':'#e4a67b','IV-A':'#c1adec','IV-B':'#91a8ed', 'IV-C0':'#989898','IV-C1':'#ffc0cb','IV-C2':'#a8e5e5','IV-C3':'#9acc9a','IV-C4':'#800080','V':'#ffff71'} CSTs = ['I-A','I-B','II','III-A','III-B','V','IV-A','IV-B','IV-C0','IV-C1','IV-C2','IV-C3','IV-C4'] #calculating shannon diversity data['shannon'] = data.apply(lambda y: shannon(list(y)[6:205]),axis=1) #building the plot #creating x axis location variables loc=12 for CST in CSTs: boxprops = dict(linewidth=1, color="k") medianprops = dict(linewidth=1,color="k") box = similarity_axs.boxplot(x=data[data['subCST'] == CST].shannon,positions=[loc],notch=True,widths=[0.5],patch_artist=True,boxprops=boxprops,medianprops=medianprops,vert=False) patch = box['boxes'] for patch in box['boxes']:
def main(): # Agents names (and number of agents): agents = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # Variants signals = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10'] # Social network (order in which agents pair over time) network = group(agents) pairs = [list(elem) for elem in network] # Memory length (amount of hisotry (in rounds) that agents are able to recall) menLen = 3 # Condition (type name according to value system structure, sigmas) condition = "Homogeneity" # Scenario (type name according to value system structure sigmas) scenario = "OTA" # Institutional power (type number according to value assinged to institutional power in "choose" method i_power = 0 ####SIGMAS: Agents' value system at the initial state. That is, the value that an agent assigns to each signal in the initial state#### ### Homogeneity and hegemony (OTA) s1 = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] s2 = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] ### Heterogeneity and pseudo-random (PR) #s1 = np.random.uniform(low=0, high=1, size=(10,)) #s2 = np.random.uniform(low=0, high=1, size=(10,)) ### Dictionary of agents' value systems sigmas = {1: s1, 2: s1, 3: s1, 4: s1, 5: s1, 6: s2, 7: s2, 8: s2, 9:s2, 10:s2} # Samples: agents' content bias, coordination bias, conformity bias, confirmation bias, innovation rate. # Content bias ('cont'): no content bias=0.0, fully content biased population=1.0 # Coordination bias ('coord'): fully egocentric=0.0, fully allocentric=1.0, neutral=0.5 # Compliance bias ('conform'): null compliance=0.0, fully compliant=1.0 # Conformity bias ('conformity'): null conformity=0.0, full conformity=1.0 # Innovation rate('mut') # Setup for different parameter combinations: samples = [ {'cont': 0.0, 'coord': 0.5, 'conform': 0, 'confirm':0, 'mut': 0.02}, {'cont': 0.2, 'coord': 0.5, 'conform': 0, 'confirm':0, 'mut': 0.02}, {'cont': 0.4, 'coord': 0.5, 'conform': 0, 'confirm':0, 'mut': 0.02}, {'cont': 0.5, 'coord': 0.5, 'conform': 0, 'confirm':0, 'mut': 0.02}, {'cont': 0.6, 'coord': 0.5, 'conform': 0, 'confirm':0, 'mut': 0.02}, {'cont': 0.8, 'coord': 0.5, 'conform': 0, 'confirm':0, 'mut': 0.02}, {'cont': 1.0, 'coord': 0.5, 'conform': 0, 'confirm':0, 'mut': 0.02}, {'cont': 0.0, 'coord': 0.5, 'conform': 0, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.2, 'coord': 0.5, 'conform': 0, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.4, 'coord': 0.5, 'conform': 0, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.5, 'coord': 0.5, 'conform': 0, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.6, 'coord': 0.5, 'conform': 0, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.8, 'coord': 0.5, 'conform': 0, 'confirm': 0.5, 'mut': 0.02}, {'cont': 1.0, 'coord': 0.5, 'conform': 0, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.0, 'coord': 0.5, 'conform': 0, 'confirm': 1, 'mut': 0.02}, {'cont': 0.2, 'coord': 0.5, 'conform': 0, 'confirm': 1, 'mut': 0.02}, {'cont': 0.4, 'coord': 0.5, 'conform': 0, 'confirm': 1, 'mut': 0.02}, {'cont': 0.5, 'coord': 0.5, 'conform': 0, 'confirm': 1, 'mut': 0.02}, {'cont': 0.6, 'coord': 0.5, 'conform': 0, 'confirm': 1, 'mut': 0.02}, {'cont': 0.8, 'coord': 0.5, 'conform': 0, 'confirm': 1, 'mut': 0.02}, {'cont': 1.0, 'coord': 0.5, 'conform': 0, 'confirm': 1, 'mut': 0.02}, {'cont': 0.0, 'coord': 0.5, 'conform': 0.5, 'confirm': 0, 'mut': 0.02}, {'cont': 0.2, 'coord': 0.5, 'conform': 0.5, 'confirm': 0, 'mut': 0.02}, {'cont': 0.4, 'coord': 0.5, 'conform': 0.5, 'confirm': 0, 'mut': 0.02}, {'cont': 0.5, 'coord': 0.5, 'conform': 0.5, 'confirm': 0, 'mut': 0.02}, {'cont': 0.6, 'coord': 0.5, 'conform': 0.5, 'confirm': 0, 'mut': 0.02}, {'cont': 0.8, 'coord': 0.5, 'conform': 0.5, 'confirm': 0, 'mut': 0.02}, {'cont': 1.0, 'coord': 0.5, 'conform': 0.5, 'confirm': 0, 'mut': 0.02}, {'cont': 0.0, 'coord': 0.5, 'conform': 0.5, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.2, 'coord': 0.5, 'conform': 0.5, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.4, 'coord': 0.5, 'conform': 0.5, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.5, 'coord': 0.5, 'conform': 0.5, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.6, 'coord': 0.5, 'conform': 0.5, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.8, 'coord': 0.5, 'conform': 0.5, 'confirm': 0.5, 'mut': 0.02}, {'cont': 1.0, 'coord': 0.5, 'conform': 0.5, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.0, 'coord': 0.5, 'conform': 0.5, 'confirm': 1, 'mut': 0.02}, {'cont': 0.2, 'coord': 0.5, 'conform': 0.5, 'confirm': 1, 'mut': 0.02}, {'cont': 0.4, 'coord': 0.5, 'conform': 0.5, 'confirm': 1, 'mut': 0.02}, {'cont': 0.5, 'coord': 0.5, 'conform': 0.5, 'confirm': 1, 'mut': 0.02}, {'cont': 0.6, 'coord': 0.5, 'conform': 0.5, 'confirm': 1, 'mut': 0.02}, {'cont': 0.8, 'coord': 0.5, 'conform': 0.5, 'confirm': 1, 'mut': 0.02}, {'cont': 1.0, 'coord': 0.5, 'conform': 0.5, 'confirm': 1, 'mut': 0.02}, {'cont': 0.0, 'coord': 0.5, 'conform': 1, 'confirm': 0, 'mut': 0.02}, {'cont': 0.2, 'coord': 0.5, 'conform': 1, 'confirm': 0, 'mut': 0.02}, {'cont': 0.4, 'coord': 0.5, 'conform': 1, 'confirm': 0, 'mut': 0.02}, {'cont': 0.5, 'coord': 0.5, 'conform': 1, 'confirm': 0, 'mut': 0.02}, {'cont': 0.6, 'coord': 0.5, 'conform': 1, 'confirm': 0, 'mut': 0.02}, {'cont': 0.8, 'coord': 0.5, 'conform': 1, 'confirm': 0, 'mut': 0.02}, {'cont': 1.0, 'coord': 0.5, 'conform': 1, 'confirm': 0, 'mut': 0.02}, {'cont': 0.0, 'coord': 0.5, 'conform': 1, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.2, 'coord': 0.5, 'conform': 1, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.4, 'coord': 0.5, 'conform': 1, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.5, 'coord': 0.5, 'conform': 1, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.6, 'coord': 0.5, 'conform': 1, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.8, 'coord': 0.5, 'conform': 1, 'confirm': 0.5, 'mut': 0.02}, {'cont': 1.0, 'coord': 0.5, 'conform': 1, 'confirm': 0.5, 'mut': 0.02}, {'cont': 0.0, 'coord': 0.5, 'conform': 1, 'confirm': 1, 'mut': 0.02}, {'cont': 0.2, 'coord': 0.5, 'conform': 1, 'confirm': 1, 'mut': 0.02}, {'cont': 0.4, 'coord': 0.5, 'conform': 1, 'confirm': 1, 'mut': 0.02}, {'cont': 0.5, 'coord': 0.5, 'conform': 1, 'confirm': 1, 'mut': 0.02}, {'cont': 0.6, 'coord': 0.5, 'conform': 1, 'confirm': 1, 'mut': 0.02}, {'cont': 0.8, 'coord': 0.5, 'conform': 1, 'confirm': 1, 'mut': 0.02}, {'cont': 1.0, 'coord': 0.5, 'conform': 1, 'confirm': 1, 'mut': 0.02}] # Number of samples of each parameter combination samples = [d for d in samples for _ in range(1)] # Number of simulations simulations = 1 # Statistics statistics = { sim: { agent: { sample: { signal: [0 for round in range(1, len(pairs) + 1)] for signal in signals } for sample in range(len(samples)) } for agent in agents } for sim in range(simulations) } # Piece of code to run each instance of the game (game.play) for the specified number of samples and simulations for sim in range(simulations): network = group(agents) pairs = [list(elem) for elem in network] for mu in range(len(samples)): game = Match( agents, pairs, signals, sigmas, samples[mu]["cont"], samples[mu]["coord"], samples[mu]["conform"], samples[mu]["confirm"], samples[mu]["mut"], menLen ) game.play() for n, round in enumerate(game.memory): for agent, signal in round.items(): statistics[sim][agent][mu][signal][n] += 1 # Write csv file with open('Test_COEVO_Hom_OTA_R_I00_F.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow(['Simulation', 'Sample', 'Agent', 'Memory', 'Generation', 'Condition', 'Scenario', 'Inst_power','Content bias', 'Coordination bias','Conformity bias','Confirmation bias', 'Mutation rate'] + signals + ['Population signals'] + ['Entropy_population'] + ['Entropy_subpopulation_1'] + [ 'Entropy_subpopulation_2'] + ['Subpopulation_1 signals'] + ['Subpopulation_2 signals'] + ['Brillouin_population'] + ['Margalef_population'] + ['Simpson_population'] + [ 'Simpson_e_population'] + ['Richness']) # Creating lists that contain the the production of signals at each round: for the whole population (aux) and each agent (auxn) for agent in agents: for sim in range(simulations): for mu in range(len(samples)): for round in range(1, len(pairs) + 1): aux = [statistics[sim][agent][mu][signal][round - 1] for signal in signals] aux1 = [statistics[sim][1][mu][signal][round - 1] for signal in signals] aux2 = [statistics[sim][2][mu][signal][round - 1] for signal in signals] aux3 = [statistics[sim][3][mu][signal][round - 1] for signal in signals] aux4 = [statistics[sim][4][mu][signal][round - 1] for signal in signals] aux5 = [statistics[sim][5][mu][signal][round - 1] for signal in signals] aux6 = [statistics[sim][6][mu][signal][round - 1] for signal in signals] aux7 = [statistics[sim][7][mu][signal][round - 1] for signal in signals] aux8 = [statistics[sim][8][mu][signal][round - 1] for signal in signals] aux9 = [statistics[sim][9][mu][signal][round - 1] for signal in signals] aux10 = [statistics[sim][10][mu][signal][round - 1] for signal in signals] # List that contains the summation of produced signals at the level of the population summation_pop = [] # # List that contains the summation of produced signals at the level of each subpopulation summation_subpop_1 = [] summation_subpop_2 = [] # Piece of code to append the lists of signals for i in range(len(aux1)): # At the population level summation_pop.append( aux1[i] + aux2[i] + aux3[i] + aux4[i] + aux5[i] + aux6[i] + aux7[i] + aux8[i] + aux9[i] + aux10[i]) # At the subpopulation level for i in range(len(aux1)): summation_subpop_1.append(aux1[i] + aux2[i] + aux3[i] + aux4[i]) summation_subpop_2.append(aux5[i] + aux6[i] + aux7[i] + aux8[i]) # Writing csv file writer.writerow([sim + 1, mu + 1, agent, menLen, round, condition, scenario, i_power, samples[mu]['cont'], samples[mu]['coord'], samples[mu]['conform'], samples[mu]['confirm'], samples[mu]['mut']] + aux + [summation_pop] + [shannon(summation_pop)] + [ shannon(summation_subpop_1)] + [shannon(summation_subpop_2)] + [ summation_subpop_1] + [summation_subpop_2] + [brillouin_d(summation_pop)] + [margalef(summation_pop)] + [ simpson(summation_pop)] + [simpson_e(summation_pop)] + [ observed_otus(summation_pop) / 8])
def alpha_diversity(args): """ Our counts data in the biomfile is per OTU NOT per sample as needed. So it must be transformed """ try: json_data = open(args.in_file, 'r') except: print("NO FILE FOUND ERROR") sys.exit() data = json.load(json_data) json_data.close() #size = len(data['rows'])*len(data['columns']) #A = np.arange(size).reshape((len(data['rows']),len(data['columns']))) A = np.zeros(shape=(len(data['rows']), len(data['columns']))) #A.astype(int) #print A for i, counts in enumerate(data['data']): #print 'OTU:',data['rows'][i]['id'], counts #print alpha.chao1(counts) A[i] = counts #pass X = A.astype(int) # insure int #print X Y = np.transpose(X) txt = "Dataset\tobserved richness\tACE\tchao1\tShannon\tSimpson" print(txt) for i, row in enumerate(Y): ds = data['columns'][i]['id'] row = row.tolist() try: ace = alpha.ace(row) except: ace = 'error' try: chao1 = alpha.chao1(row) except: chao1 = 'error' try: osd = alpha.osd(row) except: osd = ['error'] try: simpson = alpha.simpson(row) except: simpson = 'error' try: shannon = alpha.shannon(row) except: shannon = 'error' txt = ds + "\t" + str(osd[0]) + "\t" + str(ace) + "\t" + str( chao1) + "\t" + str(shannon) + "\t" + str(simpson) print(txt)
# Getting output length dis_len = len(dissolved) # Counting language dominance, menhinick diversity and simpson index print('[INFO] - Calculating variables..') for i, row in dissolved.iterrows(): print("[INFO] - Calculating grid cell {}/{}...".format(i, dis_len)) lang_counts = list(Counter( row[args['language']]).values()) # occurence counts lang_counts = np.asarray(lang_counts) # cast as numpy array for skbio dissolved.at[i, 'dominance'] = sk.dominance(lang_counts) dissolved.at[i, 'menhinick'] = sk.menhinick(lang_counts) dissolved.at[i, 'simpson'] = sk.simpson(lang_counts) dissolved.at[i, 'berger'] = sk.berger_parker_d(lang_counts) dissolved.at[i, 'singles'] = sk.singles(lang_counts) dissolved.at[i, 'shannon'] = np.exp(sk.shannon(lang_counts, base=np.e)) dissolved.at[i, 'unique'] = sk.observed_otus(lang_counts) # Select columns for output cols = [ 'geometry', 'dominance', 'menhinick', 'simpson', 'berger', 'singles', 'shannon', 'unique' ] output = dissolved[cols] # Save the output to pickle print('[INFO] - Saving to shapefile') output.to_file(args['output'], encoding='utf-8') # Print status print("[INFO] - ... Done.")
def mobtyper_plasmid_summarize(mobtyper): summary = {} for sample_id in mobtyper: plasmids = mobtyper[sample_id] for plasmid_id in plasmids: data = plasmids[plasmid_id] if not plasmid_id in summary: summary[plasmid_id] = { 'replicons': {}, 'relaxases': {}, 'overall_mobility': '', 'mobility': { 'conjugative': 0, 'mobilizable': 0, 'non-mobilizable': 0 }, 'overall_serovar': '', 'serovar': {}, 'continent': {}, 'country': {}, 'primary_sample_category': {}, 'secondary_sample_category': {}, 'associated_taxa': {}, 'earliest_year': 0, 'year': {}, 'samples': [], 'total_samples': 0, 'num_resistant': 0, 'proportion_resistant': 0, 'resistance_genes': {}, 'serovar_entropy': -1, 'serovar_shannon_index': -1, 'serovar_simpson_index': -1, 'serovar_simpson_index_e': -1, 'serovar_chao1': 0, 'num_serovars': 0, 'poportion_human': 0, 'taxa_entropy': -1, 'taxa_shannon_index': -1, 'taxa_simpson_index': -1, 'taxa_simpson_index_e': -1, 'taxa_chao1': -1, } summary[plasmid_id]['total_samples'] += 1 summary[plasmid_id]['samples'].append(sample_id) mobility = data['predicted_mobility'] summary[plasmid_id]['mobility'][mobility] += 1 rep = data['rep_type(s)'].split(",") for r in rep: if r not in summary[plasmid_id]['replicons']: summary[plasmid_id]['replicons'][r] = 0 summary[plasmid_id]['replicons'][r] += 1 mob = data['relaxase_type(s)'].split(",") for m in mob: if m not in summary[plasmid_id]['relaxases']: summary[plasmid_id]['relaxases'][m] = 0 summary[plasmid_id]['relaxases'][m] += 1 res_genes = data['resistance_genes'] if len(res_genes) > 0: summary[plasmid_id]['num_resistant'] += 1 for gene_id in res_genes: if not gene_id in summary[plasmid_id]['resistance_genes']: summary[plasmid_id]['resistance_genes'][gene_id] = 0 summary[plasmid_id]['resistance_genes'][ gene_id] += res_genes[gene_id] if not 'metadata' in data: continue for field_id in data['metadata']: value = data['metadata'][field_id] if value == 'nan' or value == '': value = 'unknown' if not field_id in summary[plasmid_id]: continue if field_id == 'associated_taxa': for v in value: if v == '' or v == 'nan': continue if not v in summary[plasmid_id][field_id]: summary[plasmid_id][field_id][v] = 0 summary[plasmid_id][field_id][v] += 1 continue if field_id in ('resistance_genes'): continue if not value in summary[plasmid_id][field_id]: summary[plasmid_id][field_id][value] = 0 summary[plasmid_id][field_id][value] += 1 for plasmid_id in summary: serovar_counts = list(summary[plasmid_id]['serovar'].values()) if len(summary[plasmid_id]['year']) > 0: summary[plasmid_id]['earliest_year'] = min( list(summary[plasmid_id]['year'].keys())) if 'human' in summary[plasmid_id]['primary_sample_category']: value = summary[plasmid_id]['primary_sample_category']['human'] else: value = 0 summary[plasmid_id][ 'poportion_human'] = value / summary[plasmid_id]['total_samples'] summary[plasmid_id]['num_serovars'] = len( summary[plasmid_id]['serovar']) summary[plasmid_id]['proportion_resistant'] = summary[plasmid_id][ 'num_resistant'] / summary[plasmid_id]['total_samples'] summary[plasmid_id]['overall_mobility'] = max( summary[plasmid_id]['mobility'], key=summary[plasmid_id]['mobility'].get) if len(summary[plasmid_id]['serovar']) > 0: summary[plasmid_id]['overall_serovar'] = max( summary[plasmid_id]['serovar'], key=summary[plasmid_id]['serovar'].get) if len(serovar_counts) > 0 and sum(serovar_counts) >= 10: summary[plasmid_id]['serovar_entropy'] = calc_shanon_entropy( serovar_counts) summary[plasmid_id]['serovar_shannon_index'] = alpha.shannon( serovar_counts) summary[plasmid_id]['serovar_simpson_index'] = alpha.simpson( serovar_counts) summary[plasmid_id]['serovar_simpson_index_e'] = alpha.simpson_e( serovar_counts) summary[plasmid_id]['serovar_chao1'] = alpha.chao1(serovar_counts) else: print("{}\t{}".format(plasmid_id, sum(serovar_counts))) print(summary[plasmid_id]) human_removed_taxa = {} for taxon in summary[plasmid_id]['associated_taxa']: if taxon == 'h**o sapiens': continue human_removed_taxa[taxon] = summary[plasmid_id]['associated_taxa'][ taxon] taxa_counts = list(human_removed_taxa.values()) if len(taxa_counts) > 0 and sum(taxa_counts) >= 10: summary[plasmid_id]['taxa_entropy'] = calc_shanon_entropy( taxa_counts) summary[plasmid_id]['taxa_shannon_index'] = alpha.shannon( taxa_counts) summary[plasmid_id]['taxa_simpson_index'] = alpha.simpson( taxa_counts) summary[plasmid_id]['taxa_simpson_index_e'] = alpha.simpson_e( taxa_counts) summary[plasmid_id]['taxa_chao1'] = alpha.chao1(taxa_counts) return summary
def testShannon(self, otu): diversity = [0] * len(otu[0]) for j in range(len(otu[0])): diversity[j] = alpha.shannon([row[j] for row in otu]) print(diversity) print(self.shannon(otu))
def main(menLen, i_power, sigmas, samples, simulations, condition): agents = [1,2,3,4,5,6,7,8,9,10] signals = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10'] network = group(agents) pairs = [list(elem) for elem in network] statistics = { sim: { agent: { sample: { signal: [0 for round in range(1, len(pairs) + 1)] for signal in signals } for sample in range(len(samples)) } for agent in agents } for sim in range(simulations) } for sim in range(simulations): #network = group(agents) #pairs = [list(elem) for elem in network] for mu in range(len(samples)): game = Match( agents, pairs, signals, sigmas, samples[mu]["cont"], samples[mu]["coord"], samples[mu]["mut"], menLen, i_power ) game.play() for n, round in enumerate(game.memory): for agent, signal in round.items(): statistics[sim][agent][mu][signal][n] += 1 with open('homogeneity_C_0.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow(['Simulation', 'Sample', 'Agent', 'Memory', 'Generation', 'Condition', 'Inst_power','Content bias', 'Coordination bias','Mutation rate'] + signals + ['Population signals'] + ['Entropy_population'] + ['Entropy_subpopulation_1'] + [ 'Entropy_subpopulation_2'] + ['Subpopulation_1 signals'] + ['Subpopulation_2 signals'] + ['Brillouin_population'] + ['Margalef_population'] + ['Simpson_population'] + [ 'Simpson_e_population'] + ['Richness']) # Creando listas que contienen la produccion de cada senal: para toda la poblacion (aux) y para cada jugador (auxn) #for agent in agents: for sim in range(simulations): for mu in range(len(samples)): for round in range(1, len(pairs) + 1): aux = [statistics[sim][agent][mu][signal][round - 1] for signal in signals] aux1 = [statistics[sim][1][mu][signal][round - 1] for signal in signals] aux2 = [statistics[sim][2][mu][signal][round - 1] for signal in signals] aux3 = [statistics[sim][3][mu][signal][round - 1] for signal in signals] aux4 = [statistics[sim][4][mu][signal][round - 1] for signal in signals] aux5 = [statistics[sim][5][mu][signal][round - 1] for signal in signals] aux6 = [statistics[sim][6][mu][signal][round - 1] for signal in signals] aux7 = [statistics[sim][7][mu][signal][round - 1] for signal in signals] aux8 = [statistics[sim][8][mu][signal][round - 1] for signal in signals] aux9 = [statistics[sim][9][mu][signal][round - 1] for signal in signals] aux10 = [statistics[sim][10][mu][signal][round - 1] for signal in signals] # Lista que contiene los sumatorios de cada tipo de senales producidas a nivel de la poblacion global en cada muestra y ronda summation_pop = [] # Lista que contiene los sumatorios de cada tipo de senales producidas a nivel de subpoblacion en cada muestra y ronda summation_subpop_1 = [] summation_subpop_2 = [] # Sumando las senales de cada tipo for i in range(len(aux1)): # A nivel de la poblacion summation_pop.append( aux1[i] + aux2[i] + aux3[i] + aux4[i] + aux5[i] + aux6[i] + aux7[i] + aux8[i] + aux9[i] + aux10[i]) # A nivel de las subpoblaciones for i in range(len(aux1)): summation_subpop_1.append(aux1[i] + aux2[i] + aux3[i] + aux4[i] + aux5[i]) summation_subpop_2.append(+ aux6[i] + aux7[i] + aux8[i] + aux9[i] + aux10[i]) #print(aux1) #output.append(shannon(summation_pop)) #print(output) writer.writerow([sim + 1, mu + 1, agent, menLen, round, condition, i_power, samples[mu]['cont'], samples[mu]['coord'], samples[mu]['mut']] + aux + [summation_pop] + [shannon(summation_pop)] + [ shannon(summation_subpop_1)] + [shannon(summation_subpop_2)] + [ summation_subpop_1] + [summation_subpop_2] + [brillouin_d(summation_pop)] + [margalef(summation_pop)] + [ simpson(summation_pop)] + [simpson_e(summation_pop)] + [ observed_otus(summation_pop) / 10])
def bugs(): print('SHANNON INDICES for ' + farm + '\n----------------------') bugs = pd.read_csv( os.path.join(my_path, './in/' + farm + '_bugs.csv')) # read in the raw csv bug sheet for a farm bugs = bugs[pd.notnull( bugs['Total Count'])] # Let's drop all the rows that don't have counts bugs = bugs[pd.notnull( bugs.iloc[:, [sc]].values )] # Let's drop all the rows that don't have sensor numbers bugs['Date'] = [ datetime.datetime.strptime(x, '%m/%d/%y') for x in bugs['Date'] ] # Replace each date with a datetime.datetime object dates = [] # initialize empty list to store distinct dates trueDates = [ ] # initialize empty list to store list of dates for each shannon measurement sensors = [] # initialize empty list of sensors to store distinct sensors shannons = [ ] # initialize empty list of shannon values to store shannon values to be computed bug_finds = [ ] # initialize empty list of sensors to store sensor for each shannon measurement # Get the distinct dates and sensors for index, row in bugs.iterrows(): if not row['Date'] in dates: dates.append(row['Date']) if not row[[sc]].values[0] in sensors: sensors.append(row[[sc]].values[0]) # Go through each date and sensor and compute shannon for each, appending # date and sensor to trueDates and big_finds for each shannon computation # so that all three lists will be the same length at the end for date in dates: print(date.strftime('%Y-%m-%d') + ':') interval = bugs[ bugs['Date'] == date] # cut a dataframe that contains only the date we're looking for from the master bugs dataframe for sensor in sensors: if [sensor] in interval.iloc[:, [sc]].values: sub_interval = interval[interval.iloc[:, [sc]].values == [ sensor ]] # cut a dataframe out of the date dataframe that contains only the sensor s = shannon(sub_interval['Total Count'].values, base=math.exp(1)) # compute shannon index shannons.append(s) # append index value to shannons list print(' ' + str(sensor) + ': ' + str(s)) bug_finds.append(sensor) # append sensor to sensor list trueDates.append(date) # append date to dates list # When we've done all the computations, we want to make a new dataframe # out of the three lists we've made bug_dict = { 'date': [x.strftime('%Y-%m-%d') for x in trueDates], 'sensor': [int(x) if type(x) == float else x for x in bug_finds], 'bug_shannon': shannons } bug_df = pd.DataFrame.from_dict(bug_dict) # Create the output directory if it doesn't exist if not os.path.exists(os.path.join(my_path, './out')): os.makedirs(os.path.join(my_path, './out')) # Output csv to output directory bug_df.to_csv(os.path.join(my_path, './out/' + farm + '_bugShannon.csv'), index=False) print('Results output to: ' + '/out/' + farm + '_bugShannon.csv') return bug_df
areas.at[i, colname4] = (int(lposts) / int(lpostsum)) * 100 # get dominant language from selected columns areas['propmax'] = areas[['fi_prop','en_prop','et_prop','ru_prop','sv_prop','es_prop','ja_prop','fr_prop','pt_prop','de_prop']].idxmax(axis=1) areas['mean_propmax'] = areas[['fi_mean_prop','en_mean_prop','et_mean_prop','ru_mean_prop','sv_mean_prop','es_mean_prop','ja_mean_prop','fr_mean_prop','pt_mean_prop','de_mean_prop']].idxmax(axis=1) areas['sum_propmax'] = areas[['fi_sum_prop','en_sum_prop','et_sum_prop','ru_sum_prop','sv_sum_prop','es_sum_prop','ja_sum_prop','fr_sum_prop','pt_sum_prop','de_sum_prop']].idxmax(axis=1) # get all language column names cols = list(areas[langlist].columns) # loop over areas print('[INFO] - Calculating diversity metrics per area..') for i, row in areas.iterrows(): # get counts of languages otus = list(row[cols]) # drop zeros otus = [i for i in otus if i != 0] # calculate diversity metrics areas.at[i, 'dominance'] = sk.dominance(otus) areas.at[i, 'berger'] = sk.berger_parker_d(otus) areas.at[i, 'menhinick'] = sk.menhinick(otus) areas.at[i, 'singletons'] = sk.singles(otus) areas.at[i, 'shannon'] = np.exp(sk.shannon(otus, base=np.e)) areas.at[i, 'unique'] = sk.observed_otus(otus) # save to file print('[INFO] - Saving output geopackage...') areas.to_file(args['output'], driver='GPKG') print('[INFO] - ... done!')
tweetdf['month'] = tweetdf['created_at'].dt.month tweetdf['week'] = tweetdf['created_at'].dt.week # drop week 53 which is one day and only present in 2018 tweetdf = tweetdf[tweetdf['week'] != 53] # explode tweets tweetdf = tweetdf.explode('langs') # get diversity order print('[INFO] - Preparing data for plotting...') divord = tweetdf.groupby('nimi')['langs'].apply(list).rename( 'langs').reset_index() divord['counts'] = divord['langs'].apply(lambda x: langcount(x)[1]) divord['shannon'] = divord['counts'].apply( lambda x: np.exp(sk.shannon(x, base=np.e))) divord = divord.sort_values(by=['shannon'], ascending=False) divord = divord['nimi'].tolist() # get unique user counts per spatial unit users = tweetdf.groupby('nimi')['user_id'].apply(list).rename( 'users').reset_index() users['count'] = users['users'].apply(lambda x: len(Counter(x))) users = pd.Series(users['count'].values, index=users.nimi).to_dict() # group areas and calculate langs tweetareas = tweetdf.groupby( ['nimi', 'week'])['langs'].apply(list).rename('langs').reset_index() # count langs print('[INFO] - Calculating language counts and Shannon diversity...')
def test_shannon(self): self.assertEqual(shannon(np.array([5])), 0) self.assertEqual(shannon(np.array([5, 5])), 1) self.assertEqual(shannon(np.array([1, 1, 1, 1, 0])), 2)
def alpha_diversity(args): """ Our counts data in the biomfile is per OTU NOT per sample as needed. So it must be transformed """ try: json_data = open(args.in_file, 'r') except: print("NO FILE FOUND ERROR") sys.exit() data = json.load(json_data) json_data.close() #size = len(data['rows'])*len(data['columns']) #A = np.arange(size).reshape((len(data['rows']),len(data['columns']))) A = np.zeros(shape=(len(data['rows']),len(data['columns']))) #A.astype(int) #print A for i,counts in enumerate(data['data']): #print 'OTU:',data['rows'][i]['id'], counts #print alpha.chao1(counts) A[i] = counts #pass X = A.astype(int) # insure int #print X Y = np.transpose(X) txt = "Dataset\tobserved richness\tACE\tchao1\tShannon\tSimpson" print(txt) for i,row in enumerate(Y): ds = data['columns'][i]['id'] row = row.tolist() try: ace = alpha.ace(row) except: ace = 'error' try: chao1 = alpha.chao1(row) except: chao1 = 'error' try: osd = alpha.osd(row) except: osd = ['error'] try: simpson = alpha.simpson(row) except: simpson = 'error' try: shannon = alpha.shannon(row) except: shannon = 'error' txt = ds+"\t"+str(osd[0])+"\t"+str(ace)+"\t"+str(chao1)+"\t"+str(shannon)+"\t"+str(simpson) print(txt)