def read_articles():
    '''
    read all articles as dataframe from mongodb collection 'articles'
        - INPUT: None
        - OUTPUT: df.   columns: title, url, uri, body_text, 
    '''
    my_mongo = MyMongo()

    t0 = time.time()
    cur_articles = my_mongo.get_article_body_text(testing=0)

    articles_cleaned = {}
    # print '%d unique articles ' % len(articles_cleaned)
    clean_articles(cur_articles, articles_cleaned)
    print '%d unique articles with body_text' % len(articles_cleaned)

    t1 = time.time()  # time it
    print "finished in  %4.4fmin for %s " % ((t1 - t0) / 60, 'read/clean articles')

    df = pd.DataFrame([{'url': k, 'body_text': v[1]}
                       for k, v in articles_cleaned.items()])

    article_dict,  article_dt = MyMongo().get_article_attri()
    #article_dict_all = dict(article_dict)
    df['title'] = df['url'].map(lambda x: article_dict.get(x, 'Unknown'))
    df['uri'] = df['url'].map(lambda x: parse_url(x).host)
    df['dt'] = df['url'].map(lambda x: article_dt.get(x, ''))

    my_mongo.close()
    return df