outputs: @author: Misha """ import GSSUtility as GU # this also imports a whole bunch of other modules if __name__ == "__main__": pathToData = "../../Data/" dataCont = GU.dataContainer(pathToData) articlesToUse = GU.filterArticles( dataCont.articleClasses, GSSYearsUsed=True, GSSYearsPossible=False, centralIVs=True ) print "len of articleClasses:", len(articlesToUse) raw_input("...") # define the storage containers for outputs group1 = "onDataUsed" group2 = "onFutureYear" output = defaultdict(dict) groups = [group1, group2] outcomes = [ "propSig", "paramSizesNormed", "Rs", "adjRs", "pvalues",
# In[148]: grouped = df[df.year_published.notnull()].groupby('year_published') grouped.get_group(2004).head() # # Number of variables over time # In[52]: pathToData = '../../Data/' dataCont = GU.dataContainer(pathToData) articlesClasses = GU.filterArticles(dataCont.articleClasses, GSSYearsUsed=True) df = pd.DataFrame(columns=['aid', 'yearpublished', 'dvs', 'ivs', 'controls', 'total']) for a in articleClasses: df.loc[a.articleID, :] = np.array([a.articleID, a.yearPublished, a.DVs, a.IVs, a.controls, 0], dtype=object) df = df[df.yearpublished.notnull()] df.yearpublished = df.yearpublished.astype(int) df.aid = df.aid.astype(int) df.index = df.aid df.dvs = [len(v) for k, v in df.dvs.iteritems()] df.ivs = [len(v) for k, v in df.ivs.iteritems()] df.controls = [len(v) for k, v in df.controls.iteritems()] df.total = df.dvs + df.ivs + df.controls
############################################################ if __name__ == "__main__": try: get_ipython().magic(u'rm ../GSSUtility.pyc # remove this file because otherwise it will be used instead of the updated .py file') reload(GU) except: pass pathToData = '../../Data/' dataCont = GU.dataContainer(pathToData) articlesToUse = GU.filterArticles(dataCont.articleClasses, GSSYearsUsed=True, GSSYearsPossible=False, centralIVs=False, nextYearBound=0, linearModels=False) print 'len of articleClasses:', len(articlesToUse) # raw_input('...') # define the storage containers for outputs group1 = 'on last GSS year' group2 = 'on first "future" GSS year' groups = [group1, group2] outcomes = ['propSig', 'paramSizesNormed', 'Rs', 'adjRs', 'pvalues', 'numTotal', 'propSig_CentralVars', 'paramSizesNormed_CentralVars', 'pvalues_CentralVars'] output = defaultdict(dict) output['metadata'] = {'article_id':[]} for group in groups: for outcome in outcomes: output[group][outcome] = []
#********************************************************* allPropsForYearsUsed = [] allPropsForYearsPossible =[] allParamSizesForYearsUsed = [] allParamSizesForYearsPossible = [] allRsForYearsUsed, allRsForYearsPossible = [], [] ############################################################ if __name__ == "__main__": pathToData = '../../Data/' dataCont = GU.dataContainer(pathToData) articlesToUse = GU.filterArticles(dataCont.articleClasses, GSSYearsUsed=True, GSSYearsPossible=True, centralIVs=False, nextYearBound=3, yearPublished=True) print 'len of articleClasses:', len(articlesToUse) raw_input('...') YEARS = range(1972, 2013) outcomes = ['propSig', 'paramSizesNormed', 'Rs', 'adjRs', 'pvalues', \ 'propSig_CentralVars', 'paramSizesNormed_CentralVars', 'pvalues_CentralVars'] output = pd.DataFrame(np.empty((len(YEARS), len(outcomes))), columns=outcomes, index=YEARS) output = output.astype(object) for row in output.iterrows(): for col in range(len(row[1])): row[1][col] = [] #for article in random.sample(articlesToUse, 150): for article in articlesToUse: #for article in [a for a in articlesToUse if a.articleID == 6755]: