print('Number of sentences: ' + str(len(crp.sents()))) print('Number of words: ' + str(len(crp.words()))) #corpus_Stats(data_m) #print('\n'+'First file: '+ data_fileids[0]) #print('Last file: '+ data_fileids[-1]) #%% num_para_py = defaultdict(int) num_word_py = defaultdict(int) for y in range(1983, 2013): files = data_m.fileids(str(y)) files_size = len(files) num_para_py[y] += sum([len(data_m.paras(f)) for f in files]) / files_size num_word_py[y] += sum([len(data_m.words(f)) for f in files]) / files_size para_words = pd.DataFrame( [num_para_py, num_word_py], index=['Average number of paragraphs', 'Average number of words']).T #word around groupbed bar charts trace0 = go.Bar(x=para_words.index, y=para_words['Average number of paragraphs'], name='Average number of paragraphs ') trace1 = go.Bar(x=para_words.index, y=[0], showlegend=False, hoverinfo='none') trace2 = go.Bar(x=para_words.index, y=[0], yaxis='y2', showlegend=False,
print('Number of sentences: '+str(len(crp.sents()))) print('Number of words: '+str(len(crp.words()))) #corpus_Stats(data_m) print('\n'+'First file: '+ data_fileids[0]) print('Last file: '+ data_fileids[-1]) #%% num_para_py = defaultdict(int) num_word_py = defaultdict(int) for y in range(1993,2019): files = data_m.fileids(str(y)) files_size = len(files) num_para_py[y] += sum([len(data_m.paras(f))for f in files])/files_size num_word_py[y] += sum([len(data_m.words(f))for f in files])/files_size para_words = pd.DataFrame([num_para_py,num_word_py], index = ['Average number of paragraphs','Average number of words']).T #word around groupbed bar charts trace0 = go.Bar(x = para_words.index, y=para_words['Average number of paragraphs'], name ='Average number of paragraphs ') trace1 = go.Bar(x = para_words.index, y=[0],showlegend=False,hoverinfo='none') trace2 = go.Bar(x = para_words.index, y=[0], yaxis='y2',showlegend=False,hoverinfo='none') trace3 = go.Bar(x = para_words.index, y=para_words['Average number of words'], yaxis='y2',name ='Average number of words' ) data = [trace0,trace1,trace2,trace3]#,trace2 layout = go.Layout(barmode='group',