def testPageFetcher():
    webCrawler=WebContentCrawler('top-1m.csv');
    time=datetime.now();
    #returnDict=webCrawler.getPagesInTime('google.com',[time.year,time.month,time.day,time.hour],[0,0,0]);
    returnDict=webCrawler.getPagesInTime('google.com',[time.year,time.month,time.day,time.hour],[3,0,0]);
    for key in returnDict:
        print returnDict[key][0];
    return;
def testPageFetcher():
    webCrawler = WebContentCrawler('top-1m.csv')
    time = datetime.now()
    #returnDict=webCrawler.getPagesInTime('google.com',[time.year,time.month,time.day,time.hour],[0,0,0]);
    returnDict = webCrawler.getPagesInTime(
        'google.com', [time.year, time.month, time.day, time.hour], [3, 0, 0])
    for key in returnDict:
        print returnDict[key][0]
    return
def webCrawlerDriver():
    webCrawler=WebContentCrawler('top-1m.csv');
    time=datetime.now();
    webCrawler.fetchAndStoreTopPages(1,5000,time);
    webCrawler.fetchAndStoreTopPages(150000,5000,time);
    #time=datetime.now()
    #timeString=str(time.year)+'_'+str(time.month)+'_'+str(time.day)+'_'+str(time.hour)+'/';
    #webCrawler.storeTree(pages, 'web/'+timeString);
    #cmpTree=webCrawler.loadTreeFromMem('web/'+timeString+'google.com.html');
    #print html.tostring(cmpTree);
    return;
def webCrawlerDriver():
    webCrawler = WebContentCrawler('top-1m.csv')
    time = datetime.now()
    webCrawler.fetchAndStoreTopPages(1, 5000, time)
    webCrawler.fetchAndStoreTopPages(150000, 5000, time)
    #time=datetime.now()
    #timeString=str(time.year)+'_'+str(time.month)+'_'+str(time.day)+'_'+str(time.hour)+'/';
    #webCrawler.storeTree(pages, 'web/'+timeString);
    #cmpTree=webCrawler.loadTreeFromMem('web/'+timeString+'google.com.html');
    #print html.tostring(cmpTree);
    return