def analyze_numlangedition_per_profession(self): classes = self.people["class"].unique() print(self.people["class"].nunique()) # print classes res = pd.DataFrame(columns=("class", "mean-men", "sem-men", "mean-women", "sem-women")) for pclass in classes: people = self.people[self.people["class"] == pclass] print(pclass) classname = pclass.split("/")[-1] print(classname) resdict = self.analyze_num_lang_edition(classname, people) res = res.append(resdict, ignore_index=True) ut.plot_shaded_lines( res["class"].values, res["mean-women"].values, res["mean-men"].values, res["sem-women"].values, res["sem-women"].values, "mean num editions", "professions", "img/numedition_gender_deacdes.png", )
def analyze_numlangedition_per_profession(self): classes = self.people["class"].unique() print(self.people["class"].nunique()) #print classes res = pd.DataFrame(columns=('class', 'mean-men', "sem-men", 'mean-women', 'sem-women')) for pclass in classes: people = self.people[self.people["class"] == pclass] print(pclass) classname = pclass.split("/")[-1] print(classname) resdict = self.analyze_num_lang_edition(classname, people) res = res.append(resdict, ignore_index=True) ut.plot_shaded_lines(res["class"].values, res["mean-women"].values, res["mean-men"].values, res["sem-women"].values, res["sem-women"].values, 'mean num editions', 'professions', 'img/numedition_gender_deacdes.png')
def analyze_numlangedition_per_decade(self): interval = 10 decade = 1000 # decades_of_interest = np.arange(0, 2010, interval) res = pd.DataFrame(columns=("class", "mean-men", "sem-men", "mean-women", "sem-women")) # for decade in decades_of_interest: print(self.people.shape) print(self.people.head(n=1)) # print (self.people[(self.people.birth_year < 1000) & (self.people.birth_year > 0)].shape) people = self.people[(self.people.birth_year < decade) & (self.people.birth_year >= 0)] print(people.shape) resdict = self.analyze_num_lang_edition("0-1000", people) res = res.append(resdict, ignore_index=True) while decade < 2015: end = decade + interval people = self.people[(self.people.birth_year >= decade) & (self.people.birth_year < end)] print(decade) print(decade + interval) print(people.shape) resdict = self.analyze_num_lang_edition(str(decade) + "-" + str(end), people) res = res.append(resdict, ignore_index=True) decade = end ut.plot_shaded_lines( res["class"].values, res["mean-women"].values, res["mean-men"].values, res["sem-women"].values, res["sem-women"].values, "Mean Num Editions", "Birth Year", "img/numedition_gender_deacdes" + self.pre + "-" + self.post + ".png", )
def analyze_numlangedition_per_decade(self): interval = 10 decade = 1000 #decades_of_interest = np.arange(0, 2010, interval) res = pd.DataFrame(columns=('class', 'mean-men', "sem-men", 'mean-women', 'sem-women')) #for decade in decades_of_interest: print(self.people.shape) print(self.people.head(n=1)) #print (self.people[(self.people.birth_year < 1000) & (self.people.birth_year > 0)].shape) people = self.people[(self.people.birth_year < decade) & (self.people.birth_year >= 0)] print(people.shape) resdict = self.analyze_num_lang_edition("0-1000", people) res = res.append(resdict, ignore_index=True) while (decade < 2015): end = decade + interval people = self.people[(self.people.birth_year >= decade) & (self.people.birth_year < end)] print(decade) print(decade + interval) print(people.shape) resdict = self.analyze_num_lang_edition( str(decade) + "-" + str(end), people) res = res.append(resdict, ignore_index=True) decade = end ut.plot_shaded_lines( res["class"].values, res["mean-women"].values, res["mean-men"].values, res["sem-women"].values, res["sem-women"].values, 'Mean Num Editions', 'Birth Year', 'img/numedition_gender_deacdes' + self.pre + "-" + self.post + '.png')