def __init__(self, corpusdirectory):        
        self.users = {} #initialisation of dictionary that will store all twitter users. They keys are the names, the values are TwitterUser objects.

        #Load the twitter corpus
        for filepath in preprocess.find_corpus_files(corpusdirectory): 
            text = preprocess.read_corpus_file(filepath)
            for line in text.split("\n"):
                try:
                    user, time, tweetmessage = line.split("\t", 3) #do a maximum of three splits
                except ValueError:
                    continue #we have an invalid line in our data, ignore it and continue the for loop
                    
                if not user in self.users:
                    #we have a new user, make a new TwitterUser instance and add it to the dictionary:
                    self.users[user] = TwitterUser(user)

                #Does this message contain a @, which indicated there may be @recipient syntax in the message 
                #Otherwise, we are not interested in the tweet and just ignore it
                if tweetmessage.find('@') != -1:
                    tweet = Tweet(tweetmessage,time)
                    self.users[user].append(tweet)

        #Compute relations between users
        for user in self:
            user.computerelations(self)
Beispiel #2
0
    def __init__(self, corpusdirectory):
        self.users = {
        }  #initialisation of dictionary that will store all twitter users. They keys are the names, the values are TwitterUser objects.

        #Load the twitter corpus
        for filepath in preprocess.find_corpus_files(corpusdirectory):
            text = preprocess.read_corpus_file(filepath)
            for line in text.split("\n"):
                try:
                    user, time, tweetmessage = line.split(
                        "\t", 3)  #do a maximum of three splits
                except ValueError:
                    continue  #we have an invalid line in our data, ignore it and continue the for loop

                if not user in self.users:
                    #we have a new user, make a new TwitterUser instance and add it to the dictionary:
                    self.users[user] = TwitterUser(user)

                #Does this message contain a @, which indicated there may be @recipient syntax in the message
                #Otherwise, we are not interested in the tweet and just ignore it
                if tweetmessage.find('@') != -1:
                    tweet = Tweet(tweetmessage, time)
                    self.users[user].append(tweet)

        #Compute relations between users
        for user in self:
            user.computerelations(self)
Beispiel #3
0
def extract_features(filename):
    "Open and tokenise the contents of a file."
    return tokenise(read_corpus_file(filename))