コード例 #1
0
ファイル: language.py プロジェクト: aakinlalu/tweetdashboard
	def getData(self, params):
          '''Return dataframe '''
          top = int(params['top'])
          data = conn.createDataframe()
          data = data['lang'].value_counts()
          lang = [item for item in data.keys()]
          count = [item for item in data]
          language = pd.read_csv('language.csv', sep=',', encoding='utf-8')
          languageAbbr = {language.iloc[i][0]:language.iloc[i][1] for i in np.arange(len(language))}
          lang = [languageAbbr[item] if item in list(language['Abbreviation']) else item for item in lang]
          df = pd.DataFrame({'Language': lang, 'Count':count})
          df = df[['Language', 'Count']]
          return df[:top]
コード例 #2
0
ファイル: sentiment.py プロジェクト: aakinlalu/tweetdashboard
    Repeating letter e.g hungrryyy for hungry
    Punctuation
    '''
    stopWords = get_stop_words('en')
    stopWords.append('at_user')
    stopWords.append('url') 
    return stopWords

def featureVector(tweet):
    featureVectorList = []
    regex = r'^[a-zA-Z][a-zA-Z0-9]*$'
    for char in tweet:
        if tweet is not None:
            char = tweet.split()
            char = replacefn(char).strip('\'"?,.')
            #chech if the word starts with an alphabet
            alphebet = re.search(regex, char)
            if char not in stopWord() or alphebet is not None:
                featureVectorList.append(char.lower())
            
    return featureVectorList

df = conn.createDataframe()
k = df['text'].map(lambda x:processing(x))
for num in np.arange(len(k)):
    x = k.iloc[num]
    print featureVector(x)

#print df['tweet'].map(lambda x: featureVector(x))