Example #1
0
def query(q, minTime, maxTime, timeWindow):
    print "helloo"

 #    q=term1+"+"+term2 #query

    #time loop
    time=minTime
    print minTime
    print maxTime

    while (time<maxTime):
        timeL=time
        timeU=time+timeWindow
        time=time+timeWindow

        #finding the pagae number
        i=1
        link="http://topsy.com/s/"+q+"/tweet?maxtime="+str(timeU)+"&mintime="+str(timeL)+"&offset="+str((i-1)*10)+"&page="+str(i)
        search = urllib2.urlopen(link)
        html = search.read()
        soup = BeautifulSoup(html)
        try:
            pageNumText=soup.find('span',{"class":"page-number"}).text # the text is "...about page#" or "... of page#"
        except:
            pageNumText=" "

        if (pageNumText.find('about')!=-1):
            pageIndex=pageNumText.find('about')+6
        elif ((pageNumText.find('of')!=-1)):
            pageIndex=pageNumText.find('of')+3
        #finding the maximum page#
        try:
            page=int(pageNumText[pageIndex:])
        except:
            page=2


        #page loop
        for i in range (1,page):

            link="http://topsy.com/s/"+q+"/tweet?maxtime="+str(timeU)+"&mintime="+str(timeL)+"&offset="+str((i-1)*10)+"&page="+str(i)
            print link
            search = urllib2.urlopen(link)
            html = search.read()
            soup = BeautifulSoup(html)

            #print soup

            for body in soup.findAll('div',class_="twitter-post-big"):#The body loop


                for tweet in body.findAll('span',{"class":"twitter-post-text translatable language-en"}):#The tweets loop
                    a = tweet.text
                    myfile.write(a.encode("utf-8")+'\n')
                    print a,'\n'
                    ValidatedTweet=cl.cleanseTweet(a)
                    tweets.append(tweet.text)
Example #2
0
def query(term1,term2,page):
    ValidatedTweet = ''
    a =''
    for i in range (1,page):
        search = urllib2.urlopen("http://topsy.com/s/"+term1+"+"+term2+"/tweet?allow_lang=en&window=m&om=b&offset="+str(i*10)+"&page="+str(i))
        html = search.read()
        soup = BeautifulSoup(html)
        for body in soup.findAll('div',class_="twitter-post-big"):#The body loop
            for tweet in body.findAll('span',{"class":"twitter-post-text translatable language-en"}):#The tweets loop
                a = tweet.text
                myfile.write(a.encode("utf-8")+'\n')
                print a,'\n'
                ValidatedTweet=cl.cleanseTweet(a)
                tweets.append(tweet.text)
Example #3
0
def query(term1, term2, page):
    ValidatedTweet = ''
    a = ''
    for i in range(1, page):
        search = urllib2.urlopen("http://topsy.com/s/" + term1 + "+" + term2 +
                                 "/tweet?allow_lang=en&window=m&om=b&offset=" +
                                 str(i * 10) + "&page=" + str(i))
        html = search.read()
        soup = BeautifulSoup(html)
        for body in soup.findAll('div',
                                 class_="twitter-post-big"):  #The body loop
            for tweet in body.findAll(
                    'span',
                {"class": "twitter-post-text translatable language-en"
                 }):  #The tweets loop
                a = tweet.text.encode("utf-8")
                myfile.write(a + '\n')
                ValidatedTweet = cl.cleanseTweet(a)
                tweets.append(ValidatedTweet)
                print ValidatedTweet, '\n'