print('Number of sentences: ' + str(len(crp.sents())))
    print('Number of words: ' + str(len(crp.words())))


#corpus_Stats(data_m)
#print('\n'+'First file: '+ data_fileids[0])
#print('Last file: '+ data_fileids[-1])

#%%
num_para_py = defaultdict(int)
num_word_py = defaultdict(int)

for y in range(1983, 2013):
    files = data_m.fileids(str(y))
    files_size = len(files)
    num_para_py[y] += sum([len(data_m.paras(f)) for f in files]) / files_size
    num_word_py[y] += sum([len(data_m.words(f)) for f in files]) / files_size

para_words = pd.DataFrame(
    [num_para_py, num_word_py],
    index=['Average number of paragraphs', 'Average number of words']).T

#word around groupbed bar charts
trace0 = go.Bar(x=para_words.index,
                y=para_words['Average number of paragraphs'],
                name='Average number of paragraphs ')
trace1 = go.Bar(x=para_words.index, y=[0], showlegend=False, hoverinfo='none')
trace2 = go.Bar(x=para_words.index,
                y=[0],
                yaxis='y2',
                showlegend=False,
    print('Number of sentences: '+str(len(crp.sents())))
    print('Number of words: '+str(len(crp.words())))

#corpus_Stats(data_m)

print('\n'+'First file: '+ data_fileids[0])
print('Last file: '+ data_fileids[-1])

#%%
num_para_py = defaultdict(int)
num_word_py = defaultdict(int)

for y in range(1993,2019):
    files = data_m.fileids(str(y))
    files_size = len(files)
    num_para_py[y] += sum([len(data_m.paras(f))for f in files])/files_size
    num_word_py[y] += sum([len(data_m.words(f))for f in files])/files_size
        
para_words = pd.DataFrame([num_para_py,num_word_py],
                          index = ['Average number of paragraphs','Average number of words']).T

#word around groupbed bar charts
trace0 = go.Bar(x = para_words.index, y=para_words['Average number of paragraphs'], 
                name ='Average number of paragraphs ')
trace1 = go.Bar(x = para_words.index, y=[0],showlegend=False,hoverinfo='none')
trace2 = go.Bar(x = para_words.index, y=[0], yaxis='y2',showlegend=False,hoverinfo='none') 
trace3 = go.Bar(x = para_words.index, y=para_words['Average number of words'],
                yaxis='y2',name ='Average number of words' ) 
data = [trace0,trace1,trace2,trace3]#,trace2

layout = go.Layout(barmode='group',