def normGoogleProfile(jresult): # placesLived(value, primary), name(givenName, familyName), displayName, circledByCount, occupation, aboutMe, organizations(name, title, endDate, startDate), gender profile = dict() profile["name"] = (jresult["name"]["givenName"]+" "+jresult["name"]["familyName"]).lower().strip() profile["displayName"] = jresult.get("displayName","").lower().strip() profile["placesLived"] = jresult.get("placesLived", list()) profile["circledByCount"] = jresult.get("circledByCount", 0) profile["tags"] = getGoogleTag(jresult) profile["nameLang"] = ut.detectLang(profile["name"]) profile["displayNameLang"] = ut.detectLang(profile["displayName"]) return profile
def normTwitterProfile(jresult): # location, name, screen_name, lang, followers_count, description profile = dict() profile["name"] = jresult["name"].lower().strip() profile["displayName"] = jresult["screen_name"].lower().strip() profile["placesLived"] = [{"value": jresult["location"], "primary": True}] profile["circledByCount"] = jresult.get("followers_count", 0) profile["tags"] = getStringTag(jresult["description"]) profile["nameLang"] = ut.detectLang(profile["name"]) profile["displayNameLang"] = ut.detectLang(profile["displayName"]) # print(profile) return profile
def normGoogleProfile(jresult): # placesLived(value, primary), name(givenName, familyName), displayName, circledByCount, occupation, aboutMe, organizations(name, title, endDate, startDate), gender profile = dict() profile["name"] = (jresult["name"]["givenName"] + " " + jresult["name"]["familyName"]).lower().strip() profile["displayName"] = jresult.get("displayName", "").lower().strip() profile["placesLived"] = jresult.get("placesLived", list()) profile["circledByCount"] = jresult.get("circledByCount", 0) profile["tags"] = getGoogleTag(jresult) profile["nameLang"] = ut.detectLang(profile["name"]) profile["displayNameLang"] = ut.detectLang(profile["displayName"]) return profile
def normGoogleWall(jresult): posts = list() page_count = 0 if type(jresult) == list: for page in jresult: if page_count > 10: # revise the size in the future break for post in page["items"]: published_time = formatGoogleTime(post["published"]) place = formatGooglePlace(post.get("location", ""), 2) info = post.get("object", "") if info != "": text = info.get("content", "") urls = getGoogleUrls(info.get("attachments", "")) # a = time.time() lang = ut.detectLang(text) # b = time.time() text_en = ut.translate(text, lang) # c= time.time() sentiment = ut.getSentiment(text_en) # d= time.time() topic_distri = ut.getTopic(text_en) tf = ut.wordProcess(text, lang) # e= time.time() # print(b-a, c-b, d-c, e-d) posts.append(getPost(text, text_en, published_time, place, urls, lang, sentiment, topic_distri, tf)) page_count+=1 return posts
def normGoogleWall(jresult): posts = list() page_count = 0 if type(jresult) == list: for page in jresult: if page_count > 10: # revise the size in the future break for post in page["items"]: published_time = formatGoogleTime(post["published"]) place = formatGooglePlace(post.get("location", ""), 2) info = post.get("object", "") if info != "": text = info.get("content", "") urls = getGoogleUrls(info.get("attachments", "")) # a = time.time() lang = ut.detectLang(text) # b = time.time() text_en = ut.translate(text, lang) # c= time.time() sentiment = ut.getSentiment(text_en) # d= time.time() topic_distri = ut.getTopic(text_en) tf = ut.wordProcess(text, lang) # e= time.time() # print(b-a, c-b, d-c, e-d) posts.append( getPost(text, text_en, published_time, place, urls, lang, sentiment, topic_distri, tf)) page_count += 1 return posts
def normTwitterWall(wall): posts = list() for post in wall: text = post.get("text", "") time = formatTwitterTime(post.get("created_at")) place = formatTwitterPlace(post["geo"], 2) urls = getTwitterUrls(post) lang = post.get("lang", "") if lang == "": lang = ut.detectLang(text) # translate text text_en = ut.translate(text, lang) sentiment = ut.getSentiment(text_en) topic_distri = ut.getTopic(text_en) tf = ut.wordProcess(text, lang) posts.append(getPost(text, text_en, time, place, urls, lang, sentiment, topic_distri, tf)) return posts
def normTwitterWall(wall): posts = list() for post in wall: text = post.get("text", "") time = formatTwitterTime(post.get("created_at")) place = formatTwitterPlace(post["geo"], 2) urls = getTwitterUrls(post) lang = post.get("lang", "") if lang == "": lang = ut.detectLang(text) # translate text text_en = ut.translate(text, lang) sentiment = ut.getSentiment(text_en) topic_distri = ut.getTopic(text_en) tf = ut.wordProcess(text, lang) posts.append( getPost(text, text_en, time, place, urls, lang, sentiment, topic_distri, tf)) return posts
def getStringTag(string): tokens = list(ut.wordProcess(string, ut.detectLang(string)).keys()) return tokens