Ejemplo n.º 1
0
    def test_enspie(self):
        # Totally even community should have ENS_pie = number of OTUs.
        self.assertAlmostEqual(enspie(np.array([1, 1, 1, 1, 1, 1])), 6)
        self.assertAlmostEqual(enspie(np.array([13, 13, 13, 13])), 4)

        # Hand calculated.
        arr = np.array([1, 41, 0, 0, 12, 13])
        exp = 1 / ((arr / arr.sum()) ** 2).sum()
        self.assertAlmostEqual(enspie(arr), exp)

        # Using dominance.
        exp = 1 / dominance(arr)
        self.assertAlmostEqual(enspie(arr), exp)

        arr = np.array([1, 0, 2, 5, 2])
        exp = 1 / dominance(arr)
        self.assertAlmostEqual(enspie(arr), exp)
Ejemplo n.º 2
0
    def test_enspie(self):
        # Totally even community should have ENS_pie = number of OTUs.
        self.assertAlmostEqual(enspie(np.array([1, 1, 1, 1, 1, 1])), 6)
        self.assertAlmostEqual(enspie(np.array([13, 13, 13, 13])), 4)

        # Hand calculated.
        arr = np.array([1, 41, 0, 0, 12, 13])
        exp = 1 / ((arr / arr.sum())**2).sum()
        self.assertAlmostEqual(enspie(arr), exp)

        # Using dominance.
        exp = 1 / dominance(arr)
        self.assertAlmostEqual(enspie(arr), exp)

        arr = np.array([1, 0, 2, 5, 2])
        exp = 1 / dominance(arr)
        self.assertAlmostEqual(enspie(arr), exp)
Ejemplo n.º 3
0
def mercat_compute_alpha_beta_diversity(counts,bif):

    abm = dict()

    abm['shannon'] = skbio_alpha.shannon(counts)
    abm['simpson'] = skbio_alpha.simpson(counts)
    abm['simpson_e'] = skbio_alpha.simpson_e(counts)
    abm['goods_coverage'] = skbio_alpha.goods_coverage(counts)
    abm['fisher_alpha'] = skbio_alpha.fisher_alpha(counts)
    abm['dominance'] = skbio_alpha.dominance(counts)
    abm['chao1'] = skbio_alpha.chao1(counts)
    abm['chao1_ci'] = skbio_alpha.chao1_ci(counts)
    abm['ace'] = skbio_alpha.ace(counts)

    with open(bif + "_diversity_metrics.txt", 'w') as dmptr:
        for abmetric in abm:
            dmptr.write(abmetric + " = " + str(abm[abmetric]) + "\n")
Ejemplo n.º 4
0
 def test_dominance(self):
     self.assertEqual(dominance(np.array([5])), 1)
     self.assertAlmostEqual(dominance(np.array([1, 0, 2, 5, 2])), 0.34)
Ejemplo n.º 5
0
# Dissolving
print('[INFO] - Dissolving results')
dissolved = joined.dissolve(by='id', aggfunc=lambda x: list(x))

# Getting output length
dis_len = len(dissolved)

# Counting language dominance, menhinick diversity and simpson index
print('[INFO] - Calculating variables..')
for i, row in dissolved.iterrows():
    print("[INFO] - Calculating grid cell {}/{}...".format(i, dis_len))
    lang_counts = list(Counter(
        row[args['language']]).values())  # occurence counts
    lang_counts = np.asarray(lang_counts)  # cast as numpy array for skbio
    dissolved.at[i, 'dominance'] = sk.dominance(lang_counts)
    dissolved.at[i, 'menhinick'] = sk.menhinick(lang_counts)
    dissolved.at[i, 'simpson'] = sk.simpson(lang_counts)
    dissolved.at[i, 'berger'] = sk.berger_parker_d(lang_counts)
    dissolved.at[i, 'singles'] = sk.singles(lang_counts)
    dissolved.at[i, 'shannon'] = np.exp(sk.shannon(lang_counts, base=np.e))
    dissolved.at[i, 'unique'] = sk.observed_otus(lang_counts)

# Select columns for output
cols = [
    'geometry', 'dominance', 'menhinick', 'simpson', 'berger', 'singles',
    'shannon', 'unique'
]
output = dissolved[cols]

# Save the output to pickle
Ejemplo n.º 6
0
joined = gpd.sjoin(grid, points, how='inner', op='contains')

# Dissolving
print('[INFO] - Dissolving results')
dissolved = joined.dissolve(by='id', aggfunc=lambda x: list(x))

# Getting output length
dis_len = len(dissolved)

# Counting topic dominance, menhinick diversity and simpson index
print('[INFO] - Calculating variables..')
for i, row in dissolved.iterrows():
    print("[INFO] Processing row {}/{}...".format(i, dis_len))
    topic_counts = list(Counter(
        row[args['topic']]).values())  # occurence counts
    topic_counts = np.asarray(topic_counts)  # cast as numpy array for skbio
    dissolved.at[i, 'dominance'] = sk.dominance(topic_counts)
    dissolved.at[i, 'menhinick'] = sk.menhinick(topic_counts)
    dissolved.at[i, 'simpson'] = sk.simpson(topic_counts)

# Select columns for output
cols = ['geometry', 'dominance', 'menhinick', 'simpson']
output = dissolved[cols]

# Save the output to pickle
print('[INFO] - Saving to shapefile')
output.to_file(args['output'], encoding='utf-8')

# Print status
print("[INFO] ... Done.")
Ejemplo n.º 7
0
 def test_dominance(self):
     self.assertEqual(dominance(np.array([5])), 1)
     self.assertAlmostEqual(dominance(np.array([1, 0, 2, 5, 2])), 0.34)
Ejemplo n.º 8
0
# rename home detection column
data = data.rename(columns={'home_unique_weeks': 'home_country'})

# filter only users who most likely live in Finland
data = data[data['home_country'].str.contains('Finland')]

# count language use without singletons
print('[INFO] - Calculating language diversities...')
data['ulangs'] = data['langs'].apply(lambda x: langcount(x)[0])
data['counts'] = data['langs'].apply(lambda x: langcount(x)[1])
data = data[data['counts'].map(lambda d: len(d)) >
            0]  # drop empties if any exist

# calculate diversity metrics
data['dominance'] = data['counts'].apply(lambda x: sk.dominance(x))
data['berger'] = data['counts'].apply(lambda x: sk.berger_parker_d(x))
data['menhinick'] = data['counts'].apply(sk.menhinick)
data['simpson'] = data['counts'].apply(sk.simpson)
data['singles'] = data['counts'].apply(sk.singles)
data['shannon'] = data['counts'].apply(
    lambda x: np.exp(sk.shannon(x, base=np.e)))
data['unique'] = data['counts'].apply(sk.observed_otus)

# language counts to dictionary
data['langdict'] = data.apply(lambda x: dict(zip(x['ulangs'], x['counts'])),
                              axis=1)

# calculate ellis et al diversity metrics
data['divs'] = data['langdict'].apply(lang_entropy)
        areas.at[i, colname4] = (int(lposts) / int(lpostsum)) * 100

# get dominant language from selected columns
areas['propmax'] = areas[['fi_prop','en_prop','et_prop','ru_prop','sv_prop','es_prop','ja_prop','fr_prop','pt_prop','de_prop']].idxmax(axis=1)
areas['mean_propmax'] = areas[['fi_mean_prop','en_mean_prop','et_mean_prop','ru_mean_prop','sv_mean_prop','es_mean_prop','ja_mean_prop','fr_mean_prop','pt_mean_prop','de_mean_prop']].idxmax(axis=1)
areas['sum_propmax'] = areas[['fi_sum_prop','en_sum_prop','et_sum_prop','ru_sum_prop','sv_sum_prop','es_sum_prop','ja_sum_prop','fr_sum_prop','pt_sum_prop','de_sum_prop']].idxmax(axis=1)

# get all language column names
cols = list(areas[langlist].columns)

# loop over areas
print('[INFO] - Calculating diversity metrics per area..')
for i, row in areas.iterrows():
    # get counts of languages
    otus = list(row[cols])
    # drop zeros
    otus = [i for i in otus if i != 0]
    # calculate diversity metrics
    areas.at[i, 'dominance'] = sk.dominance(otus)
    areas.at[i, 'berger'] = sk.berger_parker_d(otus)
    areas.at[i, 'menhinick'] = sk.menhinick(otus)
    areas.at[i, 'singletons'] = sk.singles(otus)
    areas.at[i, 'shannon'] = np.exp(sk.shannon(otus, base=np.e))
    areas.at[i, 'unique'] = sk.observed_otus(otus)

# save to file
print('[INFO] - Saving output geopackage...')
areas.to_file(args['output'], driver='GPKG')

print('[INFO] - ... done!')